libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2013-2017 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex.tcc 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 namespace std _GLIBCXX_VISIBILITY(default) 00032 { 00033 namespace __detail 00034 { 00035 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00036 00037 // Result of merging regex_match and regex_search. 00038 // 00039 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use 00040 // the other one if possible, for test purpose). 00041 // 00042 // That __match_mode is true means regex_match, else regex_search. 00043 template<typename _BiIter, typename _Alloc, 00044 typename _CharT, typename _TraitsT, 00045 _RegexExecutorPolicy __policy, 00046 bool __match_mode> 00047 bool 00048 __regex_algo_impl(_BiIter __s, 00049 _BiIter __e, 00050 match_results<_BiIter, _Alloc>& __m, 00051 const basic_regex<_CharT, _TraitsT>& __re, 00052 regex_constants::match_flag_type __flags) 00053 { 00054 if (__re._M_automaton == nullptr) 00055 return false; 00056 00057 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m; 00058 __m._M_begin = __s; 00059 __m._M_resize(__re._M_automaton->_M_sub_count()); 00060 for (auto& __it : __res) 00061 __it.matched = false; 00062 00063 bool __ret; 00064 if ((__re.flags() & regex_constants::__polynomial) 00065 || (__policy == _RegexExecutorPolicy::_S_alternate 00066 && !__re._M_automaton->_M_has_backref)) 00067 { 00068 _Executor<_BiIter, _Alloc, _TraitsT, false> 00069 __executor(__s, __e, __m, __re, __flags); 00070 if (__match_mode) 00071 __ret = __executor._M_match(); 00072 else 00073 __ret = __executor._M_search(); 00074 } 00075 else 00076 { 00077 _Executor<_BiIter, _Alloc, _TraitsT, true> 00078 __executor(__s, __e, __m, __re, __flags); 00079 if (__match_mode) 00080 __ret = __executor._M_match(); 00081 else 00082 __ret = __executor._M_search(); 00083 } 00084 if (__ret) 00085 { 00086 for (auto& __it : __res) 00087 if (!__it.matched) 00088 __it.first = __it.second = __e; 00089 auto& __pre = __m._M_prefix(); 00090 auto& __suf = __m._M_suffix(); 00091 if (__match_mode) 00092 { 00093 __pre.matched = false; 00094 __pre.first = __s; 00095 __pre.second = __s; 00096 __suf.matched = false; 00097 __suf.first = __e; 00098 __suf.second = __e; 00099 } 00100 else 00101 { 00102 __pre.first = __s; 00103 __pre.second = __res[0].first; 00104 __pre.matched = (__pre.first != __pre.second); 00105 __suf.first = __res[0].second; 00106 __suf.second = __e; 00107 __suf.matched = (__suf.first != __suf.second); 00108 } 00109 } 00110 else 00111 { 00112 __m._M_resize(0); 00113 for (auto& __it : __res) 00114 { 00115 __it.matched = false; 00116 __it.first = __it.second = __e; 00117 } 00118 } 00119 return __ret; 00120 } 00121 00122 _GLIBCXX_END_NAMESPACE_VERSION 00123 } 00124 00125 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00126 00127 template<typename _Ch_type> 00128 template<typename _Fwd_iter> 00129 typename regex_traits<_Ch_type>::string_type 00130 regex_traits<_Ch_type>:: 00131 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const 00132 { 00133 typedef std::ctype<char_type> __ctype_type; 00134 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 00135 00136 static const char* __collatenames[] = 00137 { 00138 "NUL", 00139 "SOH", 00140 "STX", 00141 "ETX", 00142 "EOT", 00143 "ENQ", 00144 "ACK", 00145 "alert", 00146 "backspace", 00147 "tab", 00148 "newline", 00149 "vertical-tab", 00150 "form-feed", 00151 "carriage-return", 00152 "SO", 00153 "SI", 00154 "DLE", 00155 "DC1", 00156 "DC2", 00157 "DC3", 00158 "DC4", 00159 "NAK", 00160 "SYN", 00161 "ETB", 00162 "CAN", 00163 "EM", 00164 "SUB", 00165 "ESC", 00166 "IS4", 00167 "IS3", 00168 "IS2", 00169 "IS1", 00170 "space", 00171 "exclamation-mark", 00172 "quotation-mark", 00173 "number-sign", 00174 "dollar-sign", 00175 "percent-sign", 00176 "ampersand", 00177 "apostrophe", 00178 "left-parenthesis", 00179 "right-parenthesis", 00180 "asterisk", 00181 "plus-sign", 00182 "comma", 00183 "hyphen", 00184 "period", 00185 "slash", 00186 "zero", 00187 "one", 00188 "two", 00189 "three", 00190 "four", 00191 "five", 00192 "six", 00193 "seven", 00194 "eight", 00195 "nine", 00196 "colon", 00197 "semicolon", 00198 "less-than-sign", 00199 "equals-sign", 00200 "greater-than-sign", 00201 "question-mark", 00202 "commercial-at", 00203 "A", 00204 "B", 00205 "C", 00206 "D", 00207 "E", 00208 "F", 00209 "G", 00210 "H", 00211 "I", 00212 "J", 00213 "K", 00214 "L", 00215 "M", 00216 "N", 00217 "O", 00218 "P", 00219 "Q", 00220 "R", 00221 "S", 00222 "T", 00223 "U", 00224 "V", 00225 "W", 00226 "X", 00227 "Y", 00228 "Z", 00229 "left-square-bracket", 00230 "backslash", 00231 "right-square-bracket", 00232 "circumflex", 00233 "underscore", 00234 "grave-accent", 00235 "a", 00236 "b", 00237 "c", 00238 "d", 00239 "e", 00240 "f", 00241 "g", 00242 "h", 00243 "i", 00244 "j", 00245 "k", 00246 "l", 00247 "m", 00248 "n", 00249 "o", 00250 "p", 00251 "q", 00252 "r", 00253 "s", 00254 "t", 00255 "u", 00256 "v", 00257 "w", 00258 "x", 00259 "y", 00260 "z", 00261 "left-curly-bracket", 00262 "vertical-line", 00263 "right-curly-bracket", 00264 "tilde", 00265 "DEL", 00266 }; 00267 00268 string __s; 00269 for (; __first != __last; ++__first) 00270 __s += __fctyp.narrow(*__first, 0); 00271 00272 for (const auto& __it : __collatenames) 00273 if (__s == __it) 00274 return string_type(1, __fctyp.widen( 00275 static_cast<char>(&__it - __collatenames))); 00276 00277 // TODO Add digraph support: 00278 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html 00279 00280 return string_type(); 00281 } 00282 00283 template<typename _Ch_type> 00284 template<typename _Fwd_iter> 00285 typename regex_traits<_Ch_type>::char_class_type 00286 regex_traits<_Ch_type>:: 00287 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const 00288 { 00289 typedef std::ctype<char_type> __ctype_type; 00290 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 00291 00292 // Mappings from class name to class mask. 00293 static const pair<const char*, char_class_type> __classnames[] = 00294 { 00295 {"d", ctype_base::digit}, 00296 {"w", {ctype_base::alnum, _RegexMask::_S_under}}, 00297 {"s", ctype_base::space}, 00298 {"alnum", ctype_base::alnum}, 00299 {"alpha", ctype_base::alpha}, 00300 {"blank", ctype_base::blank}, 00301 {"cntrl", ctype_base::cntrl}, 00302 {"digit", ctype_base::digit}, 00303 {"graph", ctype_base::graph}, 00304 {"lower", ctype_base::lower}, 00305 {"print", ctype_base::print}, 00306 {"punct", ctype_base::punct}, 00307 {"space", ctype_base::space}, 00308 {"upper", ctype_base::upper}, 00309 {"xdigit", ctype_base::xdigit}, 00310 }; 00311 00312 string __s; 00313 for (; __first != __last; ++__first) 00314 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0); 00315 00316 for (const auto& __it : __classnames) 00317 if (__s == __it.first) 00318 { 00319 if (__icase 00320 && ((__it.second 00321 & (ctype_base::lower | ctype_base::upper)) != 0)) 00322 return ctype_base::alpha; 00323 return __it.second; 00324 } 00325 return 0; 00326 } 00327 00328 template<typename _Ch_type> 00329 bool 00330 regex_traits<_Ch_type>:: 00331 isctype(_Ch_type __c, char_class_type __f) const 00332 { 00333 typedef std::ctype<char_type> __ctype_type; 00334 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 00335 00336 return __fctyp.is(__f._M_base, __c) 00337 // [[:w:]] 00338 || ((__f._M_extended & _RegexMask::_S_under) 00339 && __c == __fctyp.widen('_')); 00340 } 00341 00342 template<typename _Ch_type> 00343 int 00344 regex_traits<_Ch_type>:: 00345 value(_Ch_type __ch, int __radix) const 00346 { 00347 std::basic_istringstream<char_type> __is(string_type(1, __ch)); 00348 long __v; 00349 if (__radix == 8) 00350 __is >> std::oct; 00351 else if (__radix == 16) 00352 __is >> std::hex; 00353 __is >> __v; 00354 return __is.fail() ? -1 : __v; 00355 } 00356 00357 template<typename _Bi_iter, typename _Alloc> 00358 template<typename _Out_iter> 00359 _Out_iter match_results<_Bi_iter, _Alloc>:: 00360 format(_Out_iter __out, 00361 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first, 00362 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last, 00363 match_flag_type __flags) const 00364 { 00365 __glibcxx_assert( ready() ); 00366 regex_traits<char_type> __traits; 00367 typedef std::ctype<char_type> __ctype_type; 00368 const __ctype_type& 00369 __fctyp(use_facet<__ctype_type>(__traits.getloc())); 00370 00371 auto __output = [&](size_t __idx) 00372 { 00373 auto& __sub = (*this)[__idx]; 00374 if (__sub.matched) 00375 __out = std::copy(__sub.first, __sub.second, __out); 00376 }; 00377 00378 if (__flags & regex_constants::format_sed) 00379 { 00380 for (; __fmt_first != __fmt_last;) 00381 if (*__fmt_first == '&') 00382 { 00383 __output(0); 00384 ++__fmt_first; 00385 } 00386 else if (*__fmt_first == '\\') 00387 { 00388 if (++__fmt_first != __fmt_last 00389 && __fctyp.is(__ctype_type::digit, *__fmt_first)) 00390 __output(__traits.value(*__fmt_first++, 10)); 00391 else 00392 *__out++ = '\\'; 00393 } 00394 else 00395 *__out++ = *__fmt_first++; 00396 } 00397 else 00398 { 00399 while (1) 00400 { 00401 auto __next = std::find(__fmt_first, __fmt_last, '$'); 00402 if (__next == __fmt_last) 00403 break; 00404 00405 __out = std::copy(__fmt_first, __next, __out); 00406 00407 auto __eat = [&](char __ch) -> bool 00408 { 00409 if (*__next == __ch) 00410 { 00411 ++__next; 00412 return true; 00413 } 00414 return false; 00415 }; 00416 00417 if (++__next == __fmt_last) 00418 *__out++ = '$'; 00419 else if (__eat('$')) 00420 *__out++ = '$'; 00421 else if (__eat('&')) 00422 __output(0); 00423 else if (__eat('`')) 00424 { 00425 auto& __sub = _M_prefix(); 00426 if (__sub.matched) 00427 __out = std::copy(__sub.first, __sub.second, __out); 00428 } 00429 else if (__eat('\'')) 00430 { 00431 auto& __sub = _M_suffix(); 00432 if (__sub.matched) 00433 __out = std::copy(__sub.first, __sub.second, __out); 00434 } 00435 else if (__fctyp.is(__ctype_type::digit, *__next)) 00436 { 00437 long __num = __traits.value(*__next, 10); 00438 if (++__next != __fmt_last 00439 && __fctyp.is(__ctype_type::digit, *__next)) 00440 { 00441 __num *= 10; 00442 __num += __traits.value(*__next++, 10); 00443 } 00444 if (0 <= __num && __num < this->size()) 00445 __output(__num); 00446 } 00447 else 00448 *__out++ = '$'; 00449 __fmt_first = __next; 00450 } 00451 __out = std::copy(__fmt_first, __fmt_last, __out); 00452 } 00453 return __out; 00454 } 00455 00456 template<typename _Out_iter, typename _Bi_iter, 00457 typename _Rx_traits, typename _Ch_type> 00458 _Out_iter 00459 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, 00460 const basic_regex<_Ch_type, _Rx_traits>& __e, 00461 const _Ch_type* __fmt, 00462 regex_constants::match_flag_type __flags) 00463 { 00464 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT; 00465 _IterT __i(__first, __last, __e, __flags); 00466 _IterT __end; 00467 if (__i == __end) 00468 { 00469 if (!(__flags & regex_constants::format_no_copy)) 00470 __out = std::copy(__first, __last, __out); 00471 } 00472 else 00473 { 00474 sub_match<_Bi_iter> __last; 00475 auto __len = char_traits<_Ch_type>::length(__fmt); 00476 for (; __i != __end; ++__i) 00477 { 00478 if (!(__flags & regex_constants::format_no_copy)) 00479 __out = std::copy(__i->prefix().first, __i->prefix().second, 00480 __out); 00481 __out = __i->format(__out, __fmt, __fmt + __len, __flags); 00482 __last = __i->suffix(); 00483 if (__flags & regex_constants::format_first_only) 00484 break; 00485 } 00486 if (!(__flags & regex_constants::format_no_copy)) 00487 __out = std::copy(__last.first, __last.second, __out); 00488 } 00489 return __out; 00490 } 00491 00492 template<typename _Bi_iter, 00493 typename _Ch_type, 00494 typename _Rx_traits> 00495 bool 00496 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00497 operator==(const regex_iterator& __rhs) const 00498 { 00499 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr) 00500 return true; 00501 return _M_pregex == __rhs._M_pregex 00502 && _M_begin == __rhs._M_begin 00503 && _M_end == __rhs._M_end 00504 && _M_flags == __rhs._M_flags 00505 && _M_match[0] == __rhs._M_match[0]; 00506 } 00507 00508 template<typename _Bi_iter, 00509 typename _Ch_type, 00510 typename _Rx_traits> 00511 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 00512 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00513 operator++() 00514 { 00515 // In all cases in which the call to regex_search returns true, 00516 // match.prefix().first shall be equal to the previous value of 00517 // match[0].second, and for each index i in the half-open range 00518 // [0, match.size()) for which match[i].matched is true, 00519 // match[i].position() shall return distance(begin, match[i].first). 00520 // [28.12.1.4.5] 00521 if (_M_match[0].matched) 00522 { 00523 auto __start = _M_match[0].second; 00524 auto __prefix_first = _M_match[0].second; 00525 if (_M_match[0].first == _M_match[0].second) 00526 { 00527 if (__start == _M_end) 00528 { 00529 _M_pregex = nullptr; 00530 return *this; 00531 } 00532 else 00533 { 00534 if (regex_search(__start, _M_end, _M_match, *_M_pregex, 00535 _M_flags 00536 | regex_constants::match_not_null 00537 | regex_constants::match_continuous)) 00538 { 00539 __glibcxx_assert(_M_match[0].matched); 00540 auto& __prefix = _M_match._M_prefix(); 00541 __prefix.first = __prefix_first; 00542 __prefix.matched = __prefix.first != __prefix.second; 00543 // [28.12.1.4.5] 00544 _M_match._M_begin = _M_begin; 00545 return *this; 00546 } 00547 else 00548 ++__start; 00549 } 00550 } 00551 _M_flags |= regex_constants::match_prev_avail; 00552 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) 00553 { 00554 __glibcxx_assert(_M_match[0].matched); 00555 auto& __prefix = _M_match._M_prefix(); 00556 __prefix.first = __prefix_first; 00557 __prefix.matched = __prefix.first != __prefix.second; 00558 // [28.12.1.4.5] 00559 _M_match._M_begin = _M_begin; 00560 } 00561 else 00562 _M_pregex = nullptr; 00563 } 00564 return *this; 00565 } 00566 00567 template<typename _Bi_iter, 00568 typename _Ch_type, 00569 typename _Rx_traits> 00570 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 00571 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00572 operator=(const regex_token_iterator& __rhs) 00573 { 00574 _M_position = __rhs._M_position; 00575 _M_subs = __rhs._M_subs; 00576 _M_n = __rhs._M_n; 00577 _M_suffix = __rhs._M_suffix; 00578 _M_has_m1 = __rhs._M_has_m1; 00579 _M_normalize_result(); 00580 return *this; 00581 } 00582 00583 template<typename _Bi_iter, 00584 typename _Ch_type, 00585 typename _Rx_traits> 00586 bool 00587 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00588 operator==(const regex_token_iterator& __rhs) const 00589 { 00590 if (_M_end_of_seq() && __rhs._M_end_of_seq()) 00591 return true; 00592 if (_M_suffix.matched && __rhs._M_suffix.matched 00593 && _M_suffix == __rhs._M_suffix) 00594 return true; 00595 if (_M_end_of_seq() || _M_suffix.matched 00596 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) 00597 return false; 00598 return _M_position == __rhs._M_position 00599 && _M_n == __rhs._M_n 00600 && _M_subs == __rhs._M_subs; 00601 } 00602 00603 template<typename _Bi_iter, 00604 typename _Ch_type, 00605 typename _Rx_traits> 00606 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 00607 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00608 operator++() 00609 { 00610 _Position __prev = _M_position; 00611 if (_M_suffix.matched) 00612 *this = regex_token_iterator(); 00613 else if (_M_n + 1 < _M_subs.size()) 00614 { 00615 _M_n++; 00616 _M_result = &_M_current_match(); 00617 } 00618 else 00619 { 00620 _M_n = 0; 00621 ++_M_position; 00622 if (_M_position != _Position()) 00623 _M_result = &_M_current_match(); 00624 else if (_M_has_m1 && __prev->suffix().length() != 0) 00625 { 00626 _M_suffix.matched = true; 00627 _M_suffix.first = __prev->suffix().first; 00628 _M_suffix.second = __prev->suffix().second; 00629 _M_result = &_M_suffix; 00630 } 00631 else 00632 *this = regex_token_iterator(); 00633 } 00634 return *this; 00635 } 00636 00637 template<typename _Bi_iter, 00638 typename _Ch_type, 00639 typename _Rx_traits> 00640 void 00641 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00642 _M_init(_Bi_iter __a, _Bi_iter __b) 00643 { 00644 _M_has_m1 = false; 00645 for (auto __it : _M_subs) 00646 if (__it == -1) 00647 { 00648 _M_has_m1 = true; 00649 break; 00650 } 00651 if (_M_position != _Position()) 00652 _M_result = &_M_current_match(); 00653 else if (_M_has_m1) 00654 { 00655 _M_suffix.matched = true; 00656 _M_suffix.first = __a; 00657 _M_suffix.second = __b; 00658 _M_result = &_M_suffix; 00659 } 00660 else 00661 _M_result = nullptr; 00662 } 00663 00664 _GLIBCXX_END_NAMESPACE_VERSION 00665 } // namespace 00666