libstdc++
regex.tcc
Go to the documentation of this file.
00001 // class template regex -*- C++ -*-
00002 
00003 // Copyright (C) 2013-2018 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /**
00026  *  @file bits/regex.tcc
00027  *  This is an internal header file, included by other library headers.
00028  *  Do not attempt to use it directly. @headername{regex}
00029  */
00030 
00031 namespace std _GLIBCXX_VISIBILITY(default)
00032 {
00033 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00034 
00035 namespace __detail
00036 {
00037   // Result of merging regex_match and regex_search.
00038   //
00039   // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
00040   // the other one if possible, for test purpose).
00041   //
00042   // That __match_mode is true means regex_match, else regex_search.
00043   template<typename _BiIter, typename _Alloc,
00044            typename _CharT, typename _TraitsT,
00045            _RegexExecutorPolicy __policy,
00046            bool __match_mode>
00047     bool
00048     __regex_algo_impl(_BiIter                              __s,
00049                       _BiIter                              __e,
00050                       match_results<_BiIter, _Alloc>&      __m,
00051                       const basic_regex<_CharT, _TraitsT>& __re,
00052                       regex_constants::match_flag_type     __flags)
00053     {
00054       if (__re._M_automaton == nullptr)
00055         return false;
00056 
00057       typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
00058       __m._M_begin = __s;
00059       __m._M_resize(__re._M_automaton->_M_sub_count());
00060       for (auto& __it : __res)
00061         __it.matched = false;
00062 
00063       bool __ret;
00064       if ((__re.flags() & regex_constants::__polynomial)
00065           || (__policy == _RegexExecutorPolicy::_S_alternate
00066               && !__re._M_automaton->_M_has_backref))
00067         {
00068           _Executor<_BiIter, _Alloc, _TraitsT, false>
00069             __executor(__s, __e, __m, __re, __flags);
00070           if (__match_mode)
00071             __ret = __executor._M_match();
00072           else
00073             __ret = __executor._M_search();
00074         }
00075       else
00076         {
00077           _Executor<_BiIter, _Alloc, _TraitsT, true>
00078             __executor(__s, __e, __m, __re, __flags);
00079           if (__match_mode)
00080             __ret = __executor._M_match();
00081           else
00082             __ret = __executor._M_search();
00083         }
00084       if (__ret)
00085         {
00086           for (auto& __it : __res)
00087             if (!__it.matched)
00088               __it.first = __it.second = __e;
00089           auto& __pre = __m._M_prefix();
00090           auto& __suf = __m._M_suffix();
00091           if (__match_mode)
00092             {
00093               __pre.matched = false;
00094               __pre.first = __s;
00095               __pre.second = __s;
00096               __suf.matched = false;
00097               __suf.first = __e;
00098               __suf.second = __e;
00099             }
00100           else
00101             {
00102               __pre.first = __s;
00103               __pre.second = __res[0].first;
00104               __pre.matched = (__pre.first != __pre.second);
00105               __suf.first = __res[0].second;
00106               __suf.second = __e;
00107               __suf.matched = (__suf.first != __suf.second);
00108             }
00109         }
00110       else
00111         {
00112           __m._M_resize(0);
00113           for (auto& __it : __res)
00114             {
00115               __it.matched = false;
00116               __it.first = __it.second = __e;
00117             }
00118         }
00119       return __ret;
00120     }
00121 }
00122 
00123   template<typename _Ch_type>
00124   template<typename _Fwd_iter>
00125     typename regex_traits<_Ch_type>::string_type
00126     regex_traits<_Ch_type>::
00127     lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
00128     {
00129       typedef std::ctype<char_type> __ctype_type;
00130       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
00131 
00132       static const char* __collatenames[] =
00133         {
00134           "NUL",
00135           "SOH",
00136           "STX",
00137           "ETX",
00138           "EOT",
00139           "ENQ",
00140           "ACK",
00141           "alert",
00142           "backspace",
00143           "tab",
00144           "newline",
00145           "vertical-tab",
00146           "form-feed",
00147           "carriage-return",
00148           "SO",
00149           "SI",
00150           "DLE",
00151           "DC1",
00152           "DC2",
00153           "DC3",
00154           "DC4",
00155           "NAK",
00156           "SYN",
00157           "ETB",
00158           "CAN",
00159           "EM",
00160           "SUB",
00161           "ESC",
00162           "IS4",
00163           "IS3",
00164           "IS2",
00165           "IS1",
00166           "space",
00167           "exclamation-mark",
00168           "quotation-mark",
00169           "number-sign",
00170           "dollar-sign",
00171           "percent-sign",
00172           "ampersand",
00173           "apostrophe",
00174           "left-parenthesis",
00175           "right-parenthesis",
00176           "asterisk",
00177           "plus-sign",
00178           "comma",
00179           "hyphen",
00180           "period",
00181           "slash",
00182           "zero",
00183           "one",
00184           "two",
00185           "three",
00186           "four",
00187           "five",
00188           "six",
00189           "seven",
00190           "eight",
00191           "nine",
00192           "colon",
00193           "semicolon",
00194           "less-than-sign",
00195           "equals-sign",
00196           "greater-than-sign",
00197           "question-mark",
00198           "commercial-at",
00199           "A",
00200           "B",
00201           "C",
00202           "D",
00203           "E",
00204           "F",
00205           "G",
00206           "H",
00207           "I",
00208           "J",
00209           "K",
00210           "L",
00211           "M",
00212           "N",
00213           "O",
00214           "P",
00215           "Q",
00216           "R",
00217           "S",
00218           "T",
00219           "U",
00220           "V",
00221           "W",
00222           "X",
00223           "Y",
00224           "Z",
00225           "left-square-bracket",
00226           "backslash",
00227           "right-square-bracket",
00228           "circumflex",
00229           "underscore",
00230           "grave-accent",
00231           "a",
00232           "b",
00233           "c",
00234           "d",
00235           "e",
00236           "f",
00237           "g",
00238           "h",
00239           "i",
00240           "j",
00241           "k",
00242           "l",
00243           "m",
00244           "n",
00245           "o",
00246           "p",
00247           "q",
00248           "r",
00249           "s",
00250           "t",
00251           "u",
00252           "v",
00253           "w",
00254           "x",
00255           "y",
00256           "z",
00257           "left-curly-bracket",
00258           "vertical-line",
00259           "right-curly-bracket",
00260           "tilde",
00261           "DEL",
00262         };
00263 
00264       string __s;
00265       for (; __first != __last; ++__first)
00266         __s += __fctyp.narrow(*__first, 0);
00267 
00268       for (const auto& __it : __collatenames)
00269         if (__s == __it)
00270           return string_type(1, __fctyp.widen(
00271             static_cast<char>(&__it - __collatenames)));
00272 
00273       // TODO Add digraph support:
00274       // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
00275 
00276       return string_type();
00277     }
00278 
00279   template<typename _Ch_type>
00280   template<typename _Fwd_iter>
00281     typename regex_traits<_Ch_type>::char_class_type
00282     regex_traits<_Ch_type>::
00283     lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
00284     {
00285       typedef std::ctype<char_type> __ctype_type;
00286       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
00287 
00288       // Mappings from class name to class mask.
00289       static const pair<const char*, char_class_type> __classnames[] =
00290       {
00291         {"d", ctype_base::digit},
00292         {"w", {ctype_base::alnum, _RegexMask::_S_under}},
00293         {"s", ctype_base::space},
00294         {"alnum", ctype_base::alnum},
00295         {"alpha", ctype_base::alpha},
00296         {"blank", ctype_base::blank},
00297         {"cntrl", ctype_base::cntrl},
00298         {"digit", ctype_base::digit},
00299         {"graph", ctype_base::graph},
00300         {"lower", ctype_base::lower},
00301         {"print", ctype_base::print},
00302         {"punct", ctype_base::punct},
00303         {"space", ctype_base::space},
00304         {"upper", ctype_base::upper},
00305         {"xdigit", ctype_base::xdigit},
00306       };
00307 
00308       string __s;
00309       for (; __first != __last; ++__first)
00310         __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
00311 
00312       for (const auto& __it : __classnames)
00313         if (__s == __it.first)
00314           {
00315             if (__icase
00316                 && ((__it.second
00317                      & (ctype_base::lower | ctype_base::upper)) != 0))
00318               return ctype_base::alpha;
00319             return __it.second;
00320           }
00321       return 0;
00322     }
00323 
00324   template<typename _Ch_type>
00325     bool
00326     regex_traits<_Ch_type>::
00327     isctype(_Ch_type __c, char_class_type __f) const
00328     {
00329       typedef std::ctype<char_type> __ctype_type;
00330       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
00331 
00332       return __fctyp.is(__f._M_base, __c)
00333         // [[:w:]]
00334         || ((__f._M_extended & _RegexMask::_S_under)
00335             && __c == __fctyp.widen('_'));
00336     }
00337 
00338   template<typename _Ch_type>
00339     int
00340     regex_traits<_Ch_type>::
00341     value(_Ch_type __ch, int __radix) const
00342     {
00343       std::basic_istringstream<char_type> __is(string_type(1, __ch));
00344       long __v;
00345       if (__radix == 8)
00346         __is >> std::oct;
00347       else if (__radix == 16)
00348         __is >> std::hex;
00349       __is >> __v;
00350       return __is.fail() ? -1 : __v;
00351     }
00352 
00353   template<typename _Bi_iter, typename _Alloc>
00354   template<typename _Out_iter>
00355     _Out_iter match_results<_Bi_iter, _Alloc>::
00356     format(_Out_iter __out,
00357            const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
00358            const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
00359            match_flag_type __flags) const
00360     {
00361       __glibcxx_assert( ready() );
00362       regex_traits<char_type> __traits;
00363       typedef std::ctype<char_type> __ctype_type;
00364       const __ctype_type&
00365         __fctyp(use_facet<__ctype_type>(__traits.getloc()));
00366 
00367       auto __output = [&](size_t __idx)
00368         {
00369           auto& __sub = (*this)[__idx];
00370           if (__sub.matched)
00371             __out = std::copy(__sub.first, __sub.second, __out);
00372         };
00373 
00374       if (__flags & regex_constants::format_sed)
00375         {
00376           bool __escaping = false;
00377           for (; __fmt_first != __fmt_last; __fmt_first++)
00378             {
00379               if (__escaping)
00380                 {
00381                   __escaping = false;
00382                   if (__fctyp.is(__ctype_type::digit, *__fmt_first))
00383                     __output(__traits.value(*__fmt_first, 10));
00384                   else
00385                     *__out++ = *__fmt_first;
00386                   continue;
00387                 }
00388               if (*__fmt_first == '\\')
00389                 {
00390                   __escaping = true;
00391                   continue;
00392                 }
00393               if (*__fmt_first == '&')
00394                 {
00395                   __output(0);
00396                   continue;
00397                 }
00398               *__out++ = *__fmt_first;
00399             }
00400           if (__escaping)
00401             *__out++ = '\\';
00402         }
00403       else
00404         {
00405           while (1)
00406             {
00407               auto __next = std::find(__fmt_first, __fmt_last, '$');
00408               if (__next == __fmt_last)
00409                 break;
00410 
00411               __out = std::copy(__fmt_first, __next, __out);
00412 
00413               auto __eat = [&](char __ch) -> bool
00414                 {
00415                   if (*__next == __ch)
00416                     {
00417                       ++__next;
00418                       return true;
00419                     }
00420                   return false;
00421                 };
00422 
00423               if (++__next == __fmt_last)
00424                 *__out++ = '$';
00425               else if (__eat('$'))
00426                 *__out++ = '$';
00427               else if (__eat('&'))
00428                 __output(0);
00429               else if (__eat('`'))
00430                 {
00431                   auto& __sub = _M_prefix();
00432                   if (__sub.matched)
00433                     __out = std::copy(__sub.first, __sub.second, __out);
00434                 }
00435               else if (__eat('\''))
00436                 {
00437                   auto& __sub = _M_suffix();
00438                   if (__sub.matched)
00439                     __out = std::copy(__sub.first, __sub.second, __out);
00440                 }
00441               else if (__fctyp.is(__ctype_type::digit, *__next))
00442                 {
00443                   long __num = __traits.value(*__next, 10);
00444                   if (++__next != __fmt_last
00445                       && __fctyp.is(__ctype_type::digit, *__next))
00446                     {
00447                       __num *= 10;
00448                       __num += __traits.value(*__next++, 10);
00449                     }
00450                   if (0 <= __num && __num < this->size())
00451                     __output(__num);
00452                 }
00453               else
00454                 *__out++ = '$';
00455               __fmt_first = __next;
00456             }
00457           __out = std::copy(__fmt_first, __fmt_last, __out);
00458         }
00459       return __out;
00460     }
00461 
00462   template<typename _Out_iter, typename _Bi_iter,
00463            typename _Rx_traits, typename _Ch_type>
00464     _Out_iter
00465     regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
00466                   const basic_regex<_Ch_type, _Rx_traits>& __e,
00467                   const _Ch_type* __fmt,
00468                   regex_constants::match_flag_type __flags)
00469     {
00470       typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
00471       _IterT __i(__first, __last, __e, __flags);
00472       _IterT __end;
00473       if (__i == __end)
00474         {
00475           if (!(__flags & regex_constants::format_no_copy))
00476             __out = std::copy(__first, __last, __out);
00477         }
00478       else
00479         {
00480           sub_match<_Bi_iter> __last;
00481           auto __len = char_traits<_Ch_type>::length(__fmt);
00482           for (; __i != __end; ++__i)
00483             {
00484               if (!(__flags & regex_constants::format_no_copy))
00485                 __out = std::copy(__i->prefix().first, __i->prefix().second,
00486                                   __out);
00487               __out = __i->format(__out, __fmt, __fmt + __len, __flags);
00488               __last = __i->suffix();
00489               if (__flags & regex_constants::format_first_only)
00490                 break;
00491             }
00492           if (!(__flags & regex_constants::format_no_copy))
00493             __out = std::copy(__last.first, __last.second, __out);
00494         }
00495       return __out;
00496     }
00497 
00498   template<typename _Bi_iter,
00499            typename _Ch_type,
00500            typename _Rx_traits>
00501     bool
00502     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00503     operator==(const regex_iterator& __rhs) const
00504     {
00505       if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
00506         return true;
00507       return _M_pregex == __rhs._M_pregex
00508           && _M_begin == __rhs._M_begin
00509           && _M_end == __rhs._M_end
00510           && _M_flags == __rhs._M_flags
00511           && _M_match[0] == __rhs._M_match[0];
00512     }
00513 
00514   template<typename _Bi_iter,
00515            typename _Ch_type,
00516            typename _Rx_traits>
00517     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
00518     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00519     operator++()
00520     {
00521       // In all cases in which the call to regex_search returns true,
00522       // match.prefix().first shall be equal to the previous value of
00523       // match[0].second, and for each index i in the half-open range
00524       // [0, match.size()) for which match[i].matched is true,
00525       // match[i].position() shall return distance(begin, match[i].first).
00526       // [28.12.1.4.5]
00527       if (_M_match[0].matched)
00528         {
00529           auto __start = _M_match[0].second;
00530           auto __prefix_first = _M_match[0].second;
00531           if (_M_match[0].first == _M_match[0].second)
00532             {
00533               if (__start == _M_end)
00534                 {
00535                   _M_pregex = nullptr;
00536                   return *this;
00537                 }
00538               else
00539                 {
00540                   if (regex_search(__start, _M_end, _M_match, *_M_pregex,
00541                                    _M_flags
00542                                    | regex_constants::match_not_null
00543                                    | regex_constants::match_continuous))
00544                     {
00545                       __glibcxx_assert(_M_match[0].matched);
00546                       auto& __prefix = _M_match._M_prefix();
00547                       __prefix.first = __prefix_first;
00548                       __prefix.matched = __prefix.first != __prefix.second;
00549                       // [28.12.1.4.5]
00550                       _M_match._M_begin = _M_begin;
00551                       return *this;
00552                     }
00553                   else
00554                     ++__start;
00555                 }
00556             }
00557           _M_flags |= regex_constants::match_prev_avail;
00558           if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
00559             {
00560               __glibcxx_assert(_M_match[0].matched);
00561               auto& __prefix = _M_match._M_prefix();
00562               __prefix.first = __prefix_first;
00563               __prefix.matched = __prefix.first != __prefix.second;
00564               // [28.12.1.4.5]
00565               _M_match._M_begin = _M_begin;
00566             }
00567           else
00568             _M_pregex = nullptr;
00569         }
00570       return *this;
00571     }
00572 
00573   template<typename _Bi_iter,
00574            typename _Ch_type,
00575            typename _Rx_traits>
00576     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
00577     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00578     operator=(const regex_token_iterator& __rhs)
00579     {
00580       _M_position = __rhs._M_position;
00581       _M_subs = __rhs._M_subs;
00582       _M_n = __rhs._M_n;
00583       _M_suffix = __rhs._M_suffix;
00584       _M_has_m1 = __rhs._M_has_m1;
00585       _M_normalize_result();
00586       return *this;
00587     }
00588 
00589   template<typename _Bi_iter,
00590            typename _Ch_type,
00591            typename _Rx_traits>
00592     bool
00593     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00594     operator==(const regex_token_iterator& __rhs) const
00595     {
00596       if (_M_end_of_seq() && __rhs._M_end_of_seq())
00597         return true;
00598       if (_M_suffix.matched && __rhs._M_suffix.matched
00599           && _M_suffix == __rhs._M_suffix)
00600         return true;
00601       if (_M_end_of_seq() || _M_suffix.matched
00602           || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
00603         return false;
00604       return _M_position == __rhs._M_position
00605         && _M_n == __rhs._M_n
00606         && _M_subs == __rhs._M_subs;
00607     }
00608 
00609   template<typename _Bi_iter,
00610            typename _Ch_type,
00611            typename _Rx_traits>
00612     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
00613     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00614     operator++()
00615     {
00616       _Position __prev = _M_position;
00617       if (_M_suffix.matched)
00618         *this = regex_token_iterator();
00619       else if (_M_n + 1 < _M_subs.size())
00620         {
00621           _M_n++;
00622           _M_result = &_M_current_match();
00623         }
00624       else
00625         {
00626           _M_n = 0;
00627           ++_M_position;
00628           if (_M_position != _Position())
00629             _M_result = &_M_current_match();
00630           else if (_M_has_m1 && __prev->suffix().length() != 0)
00631             {
00632               _M_suffix.matched = true;
00633               _M_suffix.first = __prev->suffix().first;
00634               _M_suffix.second = __prev->suffix().second;
00635               _M_result = &_M_suffix;
00636             }
00637           else
00638             *this = regex_token_iterator();
00639         }
00640       return *this;
00641     }
00642 
00643   template<typename _Bi_iter,
00644            typename _Ch_type,
00645            typename _Rx_traits>
00646     void
00647     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00648     _M_init(_Bi_iter __a, _Bi_iter __b)
00649     {
00650       _M_has_m1 = false;
00651       for (auto __it : _M_subs)
00652         if (__it == -1)
00653           {
00654             _M_has_m1 = true;
00655             break;
00656           }
00657       if (_M_position != _Position())
00658         _M_result = &_M_current_match();
00659       else if (_M_has_m1)
00660         {
00661           _M_suffix.matched = true;
00662           _M_suffix.first = __a;
00663           _M_suffix.second = __b;
00664           _M_result = &_M_suffix;
00665         }
00666       else
00667         _M_result = nullptr;
00668     }
00669 
00670 _GLIBCXX_END_NAMESPACE_VERSION
00671 } // namespace