00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 namespace std _GLIBCXX_VISIBILITY(default)
00032 {
00033 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00034
00035 namespace __regex
00036 {
00037 struct _Scanner_base
00038 {
00039
00040 typedef unsigned int _StateT;
00041
00042 static const _StateT _S_state_at_start = 1 << 0;
00043 static const _StateT _S_state_in_brace = 1 << 2;
00044 static const _StateT _S_state_in_bracket = 1 << 3;
00045 };
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 template<typename _InputIterator>
00058 class _Scanner: public _Scanner_base
00059 {
00060 public:
00061 typedef _InputIterator _IteratorT;
00062 typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
00063 typedef std::basic_string<_CharT> _StringT;
00064 typedef regex_constants::syntax_option_type _FlagT;
00065 typedef const std::ctype<_CharT> _CtypeT;
00066
00067
00068 enum _TokenT
00069 {
00070 _S_token_anychar,
00071 _S_token_backref,
00072 _S_token_bracket_begin,
00073 _S_token_bracket_end,
00074 _S_token_inverse_class,
00075 _S_token_char_class_name,
00076 _S_token_closure0,
00077 _S_token_closure1,
00078 _S_token_collelem_multi,
00079 _S_token_collelem_single,
00080 _S_token_collsymbol,
00081 _S_token_comma,
00082 _S_token_dash,
00083 _S_token_dup_count,
00084 _S_token_eof,
00085 _S_token_equiv_class_name,
00086 _S_token_interval_begin,
00087 _S_token_interval_end,
00088 _S_token_line_begin,
00089 _S_token_line_end,
00090 _S_token_opt,
00091 _S_token_or,
00092 _S_token_ord_char,
00093 _S_token_quoted_char,
00094 _S_token_subexpr_begin,
00095 _S_token_subexpr_end,
00096 _S_token_word_begin,
00097 _S_token_word_end,
00098 _S_token_unknown
00099 };
00100
00101 public:
00102 _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
00103 std::locale __loc)
00104 : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
00105 _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
00106 { _M_advance(); }
00107
00108 void
00109 _M_advance();
00110
00111 _TokenT
00112 _M_token() const
00113 { return _M_curToken; }
00114
00115 const _StringT&
00116 _M_value() const
00117 { return _M_curValue; }
00118
00119 #ifdef _GLIBCXX_DEBUG
00120 std::ostream&
00121 _M_print(std::ostream&);
00122 #endif
00123
00124 private:
00125 void
00126 _M_eat_escape();
00127
00128 void
00129 _M_scan_in_brace();
00130
00131 void
00132 _M_scan_in_bracket();
00133
00134 void
00135 _M_eat_charclass();
00136
00137 void
00138 _M_eat_equivclass();
00139
00140 void
00141 _M_eat_collsymbol();
00142
00143 private:
00144 _IteratorT _M_current;
00145 _IteratorT _M_end;
00146 _FlagT _M_flags;
00147 _CtypeT& _M_ctype;
00148 _TokenT _M_curToken;
00149 _StringT _M_curValue;
00150 _StateT _M_state;
00151 };
00152
00153 template<typename _InputIterator>
00154 void
00155 _Scanner<_InputIterator>::
00156 _M_advance()
00157 {
00158 if (_M_current == _M_end)
00159 {
00160 _M_curToken = _S_token_eof;
00161 return;
00162 }
00163
00164 _CharT __c = *_M_current;
00165 if (_M_state & _S_state_in_bracket)
00166 {
00167 _M_scan_in_bracket();
00168 return;
00169 }
00170 if (_M_state & _S_state_in_brace)
00171 {
00172 _M_scan_in_brace();
00173 return;
00174 }
00175 else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
00176 {
00177 _M_curToken = _S_token_line_begin;
00178 ++_M_current;
00179 return;
00180 }
00181 else if (__c == _M_ctype.widen('$'))
00182 {
00183 _M_curToken = _S_token_line_end;
00184 ++_M_current;
00185 return;
00186 }
00187 else if (__c == _M_ctype.widen('.'))
00188 {
00189 _M_curToken = _S_token_anychar;
00190 ++_M_current;
00191 return;
00192 }
00193 else if (__c == _M_ctype.widen('*'))
00194 {
00195 _M_curToken = _S_token_closure0;
00196 ++_M_current;
00197 return;
00198 }
00199 else if (__c == _M_ctype.widen('+'))
00200 {
00201 _M_curToken = _S_token_closure1;
00202 ++_M_current;
00203 return;
00204 }
00205 else if (__c == _M_ctype.widen('|'))
00206 {
00207 _M_curToken = _S_token_or;
00208 ++_M_current;
00209 return;
00210 }
00211 else if (__c == _M_ctype.widen('['))
00212 {
00213 _M_curToken = _S_token_bracket_begin;
00214 _M_state |= (_S_state_in_bracket | _S_state_at_start);
00215 ++_M_current;
00216 return;
00217 }
00218 else if (__c == _M_ctype.widen('\\'))
00219 {
00220 _M_eat_escape();
00221 return;
00222 }
00223 else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00224 {
00225 if (__c == _M_ctype.widen('('))
00226 {
00227 _M_curToken = _S_token_subexpr_begin;
00228 ++_M_current;
00229 return;
00230 }
00231 else if (__c == _M_ctype.widen(')'))
00232 {
00233 _M_curToken = _S_token_subexpr_end;
00234 ++_M_current;
00235 return;
00236 }
00237 else if (__c == _M_ctype.widen('{'))
00238 {
00239 _M_curToken = _S_token_interval_begin;
00240 _M_state |= _S_state_in_brace;
00241 ++_M_current;
00242 return;
00243 }
00244 }
00245
00246 _M_curToken = _S_token_ord_char;
00247 _M_curValue.assign(1, __c);
00248 ++_M_current;
00249 }
00250
00251
00252 template<typename _InputIterator>
00253 void
00254 _Scanner<_InputIterator>::
00255 _M_scan_in_brace()
00256 {
00257 if (_M_ctype.is(_CtypeT::digit, *_M_current))
00258 {
00259 _M_curToken = _S_token_dup_count;
00260 _M_curValue.assign(1, *_M_current);
00261 ++_M_current;
00262 while (_M_current != _M_end
00263 && _M_ctype.is(_CtypeT::digit, *_M_current))
00264 {
00265 _M_curValue += *_M_current;
00266 ++_M_current;
00267 }
00268 return;
00269 }
00270 else if (*_M_current == _M_ctype.widen(','))
00271 {
00272 _M_curToken = _S_token_comma;
00273 ++_M_current;
00274 return;
00275 }
00276 if (_M_flags & (regex_constants::basic | regex_constants::grep))
00277 {
00278 if (*_M_current == _M_ctype.widen('\\'))
00279 _M_eat_escape();
00280 }
00281 else
00282 {
00283 if (*_M_current == _M_ctype.widen('}'))
00284 {
00285 _M_curToken = _S_token_interval_end;
00286 _M_state &= ~_S_state_in_brace;
00287 ++_M_current;
00288 return;
00289 }
00290 }
00291 }
00292
00293 template<typename _InputIterator>
00294 void
00295 _Scanner<_InputIterator>::
00296 _M_scan_in_bracket()
00297 {
00298 if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
00299 {
00300 _M_curToken = _S_token_inverse_class;
00301 _M_state &= ~_S_state_at_start;
00302 ++_M_current;
00303 return;
00304 }
00305 else if (*_M_current == _M_ctype.widen('['))
00306 {
00307 ++_M_current;
00308 if (_M_current == _M_end)
00309 {
00310 _M_curToken = _S_token_eof;
00311 return;
00312 }
00313
00314 if (*_M_current == _M_ctype.widen('.'))
00315 {
00316 _M_curToken = _S_token_collsymbol;
00317 _M_eat_collsymbol();
00318 return;
00319 }
00320 else if (*_M_current == _M_ctype.widen(':'))
00321 {
00322 _M_curToken = _S_token_char_class_name;
00323 _M_eat_charclass();
00324 return;
00325 }
00326 else if (*_M_current == _M_ctype.widen('='))
00327 {
00328 _M_curToken = _S_token_equiv_class_name;
00329 _M_eat_equivclass();
00330 return;
00331 }
00332 }
00333 else if (*_M_current == _M_ctype.widen('-'))
00334 {
00335 _M_curToken = _S_token_dash;
00336 ++_M_current;
00337 return;
00338 }
00339 else if (*_M_current == _M_ctype.widen(']'))
00340 {
00341 if (!(_M_flags & regex_constants::ECMAScript)
00342 || !(_M_state & _S_state_at_start))
00343 {
00344
00345
00346 _M_curToken = _S_token_bracket_end;
00347 ++_M_current;
00348 return;
00349 }
00350 }
00351 _M_curToken = _S_token_collelem_single;
00352 _M_curValue.assign(1, *_M_current);
00353 ++_M_current;
00354 }
00355
00356 template<typename _InputIterator>
00357 void
00358 _Scanner<_InputIterator>::
00359 _M_eat_escape()
00360 {
00361 ++_M_current;
00362 if (_M_current == _M_end)
00363 {
00364 _M_curToken = _S_token_eof;
00365 return;
00366 }
00367 _CharT __c = *_M_current;
00368 ++_M_current;
00369
00370 if (__c == _M_ctype.widen('('))
00371 {
00372 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00373 {
00374 _M_curToken = _S_token_ord_char;
00375 _M_curValue.assign(1, __c);
00376 }
00377 else
00378 _M_curToken = _S_token_subexpr_begin;
00379 }
00380 else if (__c == _M_ctype.widen(')'))
00381 {
00382 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00383 {
00384 _M_curToken = _S_token_ord_char;
00385 _M_curValue.assign(1, __c);
00386 }
00387 else
00388 _M_curToken = _S_token_subexpr_end;
00389 }
00390 else if (__c == _M_ctype.widen('{'))
00391 {
00392 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00393 {
00394 _M_curToken = _S_token_ord_char;
00395 _M_curValue.assign(1, __c);
00396 }
00397 else
00398 {
00399 _M_curToken = _S_token_interval_begin;
00400 _M_state |= _S_state_in_brace;
00401 }
00402 }
00403 else if (__c == _M_ctype.widen('}'))
00404 {
00405 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00406 {
00407 _M_curToken = _S_token_ord_char;
00408 _M_curValue.assign(1, __c);
00409 }
00410 else
00411 {
00412 if (!(_M_state && _S_state_in_brace))
00413 __throw_regex_error(regex_constants::error_badbrace);
00414 _M_state &= ~_S_state_in_brace;
00415 _M_curToken = _S_token_interval_end;
00416 }
00417 }
00418 else if (__c == _M_ctype.widen('x'))
00419 {
00420 ++_M_current;
00421 if (_M_current == _M_end)
00422 {
00423 _M_curToken = _S_token_eof;
00424 return;
00425 }
00426 if (_M_ctype.is(_CtypeT::digit, *_M_current))
00427 {
00428 _M_curValue.assign(1, *_M_current);
00429 ++_M_current;
00430 if (_M_current == _M_end)
00431 {
00432 _M_curToken = _S_token_eof;
00433 return;
00434 }
00435 if (_M_ctype.is(_CtypeT::digit, *_M_current))
00436 {
00437 _M_curValue += *_M_current;
00438 ++_M_current;
00439 return;
00440 }
00441 }
00442 }
00443 else if (__c == _M_ctype.widen('^')
00444 || __c == _M_ctype.widen('.')
00445 || __c == _M_ctype.widen('*')
00446 || __c == _M_ctype.widen('$')
00447 || __c == _M_ctype.widen('\\'))
00448 {
00449 _M_curToken = _S_token_ord_char;
00450 _M_curValue.assign(1, __c);
00451 }
00452 else if (_M_ctype.is(_CtypeT::digit, __c))
00453 {
00454 _M_curToken = _S_token_backref;
00455 _M_curValue.assign(1, __c);
00456 }
00457 else
00458 __throw_regex_error(regex_constants::error_escape);
00459 }
00460
00461
00462
00463
00464 template<typename _InputIterator>
00465 void
00466 _Scanner<_InputIterator>::
00467 _M_eat_charclass()
00468 {
00469 ++_M_current;
00470 if (_M_current == _M_end)
00471 __throw_regex_error(regex_constants::error_ctype);
00472 for (_M_curValue.clear();
00473 _M_current != _M_end && *_M_current != _M_ctype.widen(':');
00474 ++_M_current)
00475 _M_curValue += *_M_current;
00476 if (_M_current == _M_end)
00477 __throw_regex_error(regex_constants::error_ctype);
00478 ++_M_current;
00479 if (*_M_current != _M_ctype.widen(']'))
00480 __throw_regex_error(regex_constants::error_ctype);
00481 ++_M_current;
00482 }
00483
00484
00485 template<typename _InputIterator>
00486 void
00487 _Scanner<_InputIterator>::
00488 _M_eat_equivclass()
00489 {
00490 ++_M_current;
00491 if (_M_current == _M_end)
00492 __throw_regex_error(regex_constants::error_collate);
00493 for (_M_curValue.clear();
00494 _M_current != _M_end && *_M_current != _M_ctype.widen('=');
00495 ++_M_current)
00496 _M_curValue += *_M_current;
00497 if (_M_current == _M_end)
00498 __throw_regex_error(regex_constants::error_collate);
00499 ++_M_current;
00500 if (*_M_current != _M_ctype.widen(']'))
00501 __throw_regex_error(regex_constants::error_collate);
00502 ++_M_current;
00503 }
00504
00505
00506 template<typename _InputIterator>
00507 void
00508 _Scanner<_InputIterator>::
00509 _M_eat_collsymbol()
00510 {
00511 ++_M_current;
00512 if (_M_current == _M_end)
00513 __throw_regex_error(regex_constants::error_collate);
00514 for (_M_curValue.clear();
00515 _M_current != _M_end && *_M_current != _M_ctype.widen('.');
00516 ++_M_current)
00517 _M_curValue += *_M_current;
00518 if (_M_current == _M_end)
00519 __throw_regex_error(regex_constants::error_collate);
00520 ++_M_current;
00521 if (*_M_current != _M_ctype.widen(']'))
00522 __throw_regex_error(regex_constants::error_collate);
00523 ++_M_current;
00524 }
00525
00526 #ifdef _GLIBCXX_DEBUG
00527 template<typename _InputIterator>
00528 std::ostream&
00529 _Scanner<_InputIterator>::
00530 _M_print(std::ostream& ostr)
00531 {
00532 switch (_M_curToken)
00533 {
00534 case _S_token_anychar:
00535 ostr << "any-character\n";
00536 break;
00537 case _S_token_backref:
00538 ostr << "backref\n";
00539 break;
00540 case _S_token_bracket_begin:
00541 ostr << "bracket-begin\n";
00542 break;
00543 case _S_token_bracket_end:
00544 ostr << "bracket-end\n";
00545 break;
00546 case _S_token_char_class_name:
00547 ostr << "char-class-name \"" << _M_curValue << "\"\n";
00548 break;
00549 case _S_token_closure0:
00550 ostr << "closure0\n";
00551 break;
00552 case _S_token_closure1:
00553 ostr << "closure1\n";
00554 break;
00555 case _S_token_collelem_multi:
00556 ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
00557 break;
00558 case _S_token_collelem_single:
00559 ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
00560 break;
00561 case _S_token_collsymbol:
00562 ostr << "collsymbol \"" << _M_curValue << "\"\n";
00563 break;
00564 case _S_token_comma:
00565 ostr << "comma\n";
00566 break;
00567 case _S_token_dash:
00568 ostr << "dash\n";
00569 break;
00570 case _S_token_dup_count:
00571 ostr << "dup count: " << _M_curValue << "\n";
00572 break;
00573 case _S_token_eof:
00574 ostr << "EOF\n";
00575 break;
00576 case _S_token_equiv_class_name:
00577 ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
00578 break;
00579 case _S_token_interval_begin:
00580 ostr << "interval begin\n";
00581 break;
00582 case _S_token_interval_end:
00583 ostr << "interval end\n";
00584 break;
00585 case _S_token_line_begin:
00586 ostr << "line begin\n";
00587 break;
00588 case _S_token_line_end:
00589 ostr << "line end\n";
00590 break;
00591 case _S_token_opt:
00592 ostr << "opt\n";
00593 break;
00594 case _S_token_or:
00595 ostr << "or\n";
00596 break;
00597 case _S_token_ord_char:
00598 ostr << "ordinary character: \"" << _M_value() << "\"\n";
00599 break;
00600 case _S_token_quoted_char:
00601 ostr << "quoted char\n";
00602 break;
00603 case _S_token_subexpr_begin:
00604 ostr << "subexpr begin\n";
00605 break;
00606 case _S_token_subexpr_end:
00607 ostr << "subexpr end\n";
00608 break;
00609 case _S_token_word_begin:
00610 ostr << "word begin\n";
00611 break;
00612 case _S_token_word_end:
00613 ostr << "word end\n";
00614 break;
00615 case _S_token_unknown:
00616 ostr << "-- unknown token --\n";
00617 break;
00618 }
00619 return ostr;
00620 }
00621 #endif
00622
00623
00624 template<typename _InIter, typename _TraitsT>
00625 class _Compiler
00626 {
00627 public:
00628 typedef _InIter _IterT;
00629 typedef typename std::iterator_traits<_InIter>::value_type _CharT;
00630 typedef std::basic_string<_CharT> _StringT;
00631 typedef regex_constants::syntax_option_type _FlagT;
00632
00633 public:
00634 _Compiler(const _InIter& __b, const _InIter& __e,
00635 _TraitsT& __traits, _FlagT __flags);
00636
00637 const _Nfa&
00638 _M_nfa() const
00639 { return _M_state_store; }
00640
00641 private:
00642 typedef _Scanner<_InIter> _ScannerT;
00643 typedef typename _ScannerT::_TokenT _TokenT;
00644 typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
00645 typedef _RangeMatcher<_InIter, _TraitsT> _RMatcherT;
00646
00647
00648 bool
00649 _M_match_token(_TokenT __token);
00650
00651 void
00652 _M_disjunction();
00653
00654 bool
00655 _M_alternative();
00656
00657 bool
00658 _M_term();
00659
00660 bool
00661 _M_assertion();
00662
00663 bool
00664 _M_quantifier();
00665
00666 bool
00667 _M_atom();
00668
00669 bool
00670 _M_bracket_expression();
00671
00672 bool
00673 _M_bracket_list(_RMatcherT& __matcher);
00674
00675 bool
00676 _M_follow_list(_RMatcherT& __matcher);
00677
00678 bool
00679 _M_follow_list2(_RMatcherT& __matcher);
00680
00681 bool
00682 _M_expression_term(_RMatcherT& __matcher);
00683
00684 bool
00685 _M_range_expression(_RMatcherT& __matcher);
00686
00687 bool
00688 _M_start_range(_RMatcherT& __matcher);
00689
00690 bool
00691 _M_collating_symbol(_RMatcherT& __matcher);
00692
00693 bool
00694 _M_equivalence_class(_RMatcherT& __matcher);
00695
00696 bool
00697 _M_character_class(_RMatcherT& __matcher);
00698
00699 int
00700 _M_cur_int_value(int __radix);
00701
00702 private:
00703 _TraitsT& _M_traits;
00704 _ScannerT _M_scanner;
00705 _StringT _M_cur_value;
00706 _Nfa _M_state_store;
00707 _StackT _M_stack;
00708 };
00709
00710 template<typename _InIter, typename _TraitsT>
00711 _Compiler<_InIter, _TraitsT>::
00712 _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
00713 _Compiler<_InIter, _TraitsT>::_FlagT __flags)
00714 : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
00715 _M_state_store(__flags)
00716 {
00717 using std::bind;
00718 using std::placeholders::_1;
00719 using std::placeholders::_2;
00720 typedef _StartTagger<_InIter, _TraitsT> _Start;
00721 typedef _EndTagger<_InIter, _TraitsT> _End;
00722
00723 _StateSeq __r(_M_state_store,
00724 _M_state_store._M_insert_subexpr_begin(
00725 bind(_Start(0), _1, _2)));
00726 _M_disjunction();
00727 if (!_M_stack.empty())
00728 {
00729 __r._M_append(_M_stack.top());
00730 _M_stack.pop();
00731 }
00732 __r._M_append(_M_state_store.
00733 _M_insert_subexpr_end(0, bind(_End(0), _1, _2)));
00734 __r._M_append(_M_state_store._M_insert_accept());
00735 }
00736
00737 template<typename _InIter, typename _TraitsT>
00738 bool
00739 _Compiler<_InIter, _TraitsT>::
00740 _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
00741 {
00742 if (token == _M_scanner._M_token())
00743 {
00744 _M_cur_value = _M_scanner._M_value();
00745 _M_scanner._M_advance();
00746 return true;
00747 }
00748 return false;
00749 }
00750
00751 template<typename _InIter, typename _TraitsT>
00752 void
00753 _Compiler<_InIter, _TraitsT>::
00754 _M_disjunction()
00755 {
00756 this->_M_alternative();
00757 if (_M_match_token(_ScannerT::_S_token_or))
00758 {
00759 _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
00760 this->_M_disjunction();
00761 _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
00762 _M_stack.push(_StateSeq(__alt1, __alt2));
00763 }
00764 }
00765
00766 template<typename _InIter, typename _TraitsT>
00767 bool
00768 _Compiler<_InIter, _TraitsT>::
00769 _M_alternative()
00770 {
00771 if (this->_M_term())
00772 {
00773 _StateSeq __re = _M_stack.top(); _M_stack.pop();
00774 this->_M_alternative();
00775 if (!_M_stack.empty())
00776 {
00777 __re._M_append(_M_stack.top());
00778 _M_stack.pop();
00779 }
00780 _M_stack.push(__re);
00781 return true;
00782 }
00783 return false;
00784 }
00785
00786 template<typename _InIter, typename _TraitsT>
00787 bool
00788 _Compiler<_InIter, _TraitsT>::
00789 _M_term()
00790 {
00791 if (this->_M_assertion())
00792 return true;
00793 if (this->_M_atom())
00794 {
00795 this->_M_quantifier();
00796 return true;
00797 }
00798 return false;
00799 }
00800
00801 template<typename _InIter, typename _TraitsT>
00802 bool
00803 _Compiler<_InIter, _TraitsT>::
00804 _M_assertion()
00805 {
00806 if (_M_match_token(_ScannerT::_S_token_line_begin))
00807 {
00808
00809 return true;
00810 }
00811 if (_M_match_token(_ScannerT::_S_token_line_end))
00812 {
00813
00814 return true;
00815 }
00816 if (_M_match_token(_ScannerT::_S_token_word_begin))
00817 {
00818
00819 return true;
00820 }
00821 if (_M_match_token(_ScannerT::_S_token_word_end))
00822 {
00823
00824 return true;
00825 }
00826 return false;
00827 }
00828
00829 template<typename _InIter, typename _TraitsT>
00830 bool
00831 _Compiler<_InIter, _TraitsT>::
00832 _M_quantifier()
00833 {
00834 if (_M_match_token(_ScannerT::_S_token_closure0))
00835 {
00836 if (_M_stack.empty())
00837 __throw_regex_error(regex_constants::error_badrepeat);
00838 _StateSeq __r(_M_stack.top(), -1);
00839 __r._M_append(__r._M_front());
00840 _M_stack.pop();
00841 _M_stack.push(__r);
00842 return true;
00843 }
00844 if (_M_match_token(_ScannerT::_S_token_closure1))
00845 {
00846 if (_M_stack.empty())
00847 __throw_regex_error(regex_constants::error_badrepeat);
00848 _StateSeq __r(_M_state_store,
00849 _M_state_store.
00850 _M_insert_alt(_S_invalid_state_id,
00851 _M_stack.top()._M_front()));
00852 _M_stack.top()._M_append(__r);
00853 return true;
00854 }
00855 if (_M_match_token(_ScannerT::_S_token_opt))
00856 {
00857 if (_M_stack.empty())
00858 __throw_regex_error(regex_constants::error_badrepeat);
00859 _StateSeq __r(_M_stack.top(), -1);
00860 _M_stack.pop();
00861 _M_stack.push(__r);
00862 return true;
00863 }
00864 if (_M_match_token(_ScannerT::_S_token_interval_begin))
00865 {
00866 if (_M_stack.empty())
00867 __throw_regex_error(regex_constants::error_badrepeat);
00868 if (!_M_match_token(_ScannerT::_S_token_dup_count))
00869 __throw_regex_error(regex_constants::error_badbrace);
00870 _StateSeq __r(_M_stack.top());
00871 int __min_rep = _M_cur_int_value(10);
00872 for (int __i = 1; __i < __min_rep; ++__i)
00873 _M_stack.top()._M_append(__r._M_clone());
00874 if (_M_match_token(_ScannerT::_S_token_comma))
00875 if (_M_match_token(_ScannerT::_S_token_dup_count))
00876 {
00877 int __n = _M_cur_int_value(10) - __min_rep;
00878 if (__n < 0)
00879 __throw_regex_error(regex_constants::error_badbrace);
00880 for (int __i = 0; __i < __n; ++__i)
00881 {
00882 _StateSeq __r(_M_state_store,
00883 _M_state_store.
00884 _M_insert_alt(_S_invalid_state_id,
00885 _M_stack.top()._M_front()));
00886 _M_stack.top()._M_append(__r);
00887 }
00888 }
00889 else
00890 {
00891 _StateSeq __r(_M_stack.top(), -1);
00892 __r._M_push_back(__r._M_front());
00893 _M_stack.pop();
00894 _M_stack.push(__r);
00895 }
00896 if (!_M_match_token(_ScannerT::_S_token_interval_end))
00897 __throw_regex_error(regex_constants::error_brace);
00898 return true;
00899 }
00900 return false;
00901 }
00902
00903 template<typename _InIter, typename _TraitsT>
00904 bool
00905 _Compiler<_InIter, _TraitsT>::
00906 _M_atom()
00907 {
00908 using std::bind;
00909 using std::placeholders::_1;
00910 using std::placeholders::_2;
00911 typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
00912 typedef _StartTagger<_InIter, _TraitsT> _Start;
00913 typedef _EndTagger<_InIter, _TraitsT> _End;
00914
00915 if (_M_match_token(_ScannerT::_S_token_anychar))
00916 {
00917 _M_stack.push(_StateSeq(_M_state_store,
00918 _M_state_store.
00919 _M_insert_matcher(bind(_AnyMatcher, _1))));
00920 return true;
00921 }
00922 if (_M_match_token(_ScannerT::_S_token_ord_char))
00923 {
00924 _M_stack.push(_StateSeq
00925 (_M_state_store, _M_state_store.
00926 _M_insert_matcher
00927 (bind(_CMatcher(_M_cur_value[0], _M_traits), _1))));
00928 return true;
00929 }
00930 if (_M_match_token(_ScannerT::_S_token_quoted_char))
00931 {
00932
00933 _M_stack.push(_StateSeq(_M_state_store,
00934 _M_state_store.
00935 _M_insert_matcher
00936 (bind(_CMatcher(_M_cur_value[0], _M_traits),
00937 _1))));
00938 return true;
00939 }
00940 if (_M_match_token(_ScannerT::_S_token_backref))
00941 {
00942
00943 return true;
00944 }
00945 if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
00946 {
00947 int __mark = _M_state_store._M_sub_count();
00948 _StateSeq __r(_M_state_store,
00949 _M_state_store.
00950 _M_insert_subexpr_begin(bind(_Start(__mark), _1, _2)));
00951 this->_M_disjunction();
00952 if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
00953 __throw_regex_error(regex_constants::error_paren);
00954 if (!_M_stack.empty())
00955 {
00956 __r._M_append(_M_stack.top());
00957 _M_stack.pop();
00958 }
00959 __r._M_append(_M_state_store._M_insert_subexpr_end
00960 (__mark, bind(_End(__mark), _1, _2)));
00961 _M_stack.push(__r);
00962 return true;
00963 }
00964 return _M_bracket_expression();
00965 }
00966
00967 template<typename _InIter, typename _TraitsT>
00968 bool
00969 _Compiler<_InIter, _TraitsT>::
00970 _M_bracket_expression()
00971 {
00972 using std::bind;
00973 using std::placeholders::_1;
00974 if (_M_match_token(_ScannerT::_S_token_bracket_begin))
00975 {
00976 _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
00977 _M_traits);
00978 if (!_M_bracket_list(__matcher)
00979 || !_M_match_token(_ScannerT::_S_token_bracket_end))
00980 __throw_regex_error(regex_constants::error_brack);
00981 _M_stack.push(_StateSeq(_M_state_store,
00982 _M_state_store._M_insert_matcher
00983 (bind(__matcher, _1))));
00984 return true;
00985 }
00986 return false;
00987 }
00988
00989
00990
00991 template<typename _InIter, typename _TraitsT>
00992 bool
00993 _Compiler<_InIter, _TraitsT>::
00994 _M_bracket_list(_RMatcherT& __matcher)
00995 {
00996 if (_M_follow_list(__matcher))
00997 {
00998 if (_M_match_token(_ScannerT::_S_token_dash))
00999 __matcher._M_add_char(_M_cur_value[0]);
01000 return true;
01001 }
01002 return false;
01003 }
01004
01005 template<typename _InIter, typename _TraitsT>
01006 bool
01007 _Compiler<_InIter, _TraitsT>::
01008 _M_follow_list(_RMatcherT& __matcher)
01009 { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
01010
01011 template<typename _InIter, typename _TraitsT>
01012 bool
01013 _Compiler<_InIter, _TraitsT>::
01014 _M_follow_list2(_RMatcherT& __matcher)
01015 {
01016 if (_M_expression_term(__matcher))
01017 return _M_follow_list2(__matcher);
01018 return true;
01019 }
01020
01021 template<typename _InIter, typename _TraitsT>
01022 bool
01023 _Compiler<_InIter, _TraitsT>::
01024 _M_expression_term(_RMatcherT& __matcher)
01025 {
01026 return (_M_collating_symbol(__matcher)
01027 || _M_character_class(__matcher)
01028 || _M_equivalence_class(__matcher)
01029 || (_M_start_range(__matcher)
01030 && _M_range_expression(__matcher)));
01031 }
01032
01033 template<typename _InIter, typename _TraitsT>
01034 bool
01035 _Compiler<_InIter, _TraitsT>::
01036 _M_range_expression(_RMatcherT& __matcher)
01037 {
01038 if (!_M_collating_symbol(__matcher))
01039 if (!_M_match_token(_ScannerT::_S_token_dash))
01040 __throw_regex_error(regex_constants::error_range);
01041 __matcher._M_make_range();
01042 return true;
01043 }
01044
01045 template<typename _InIter, typename _TraitsT>
01046 bool
01047 _Compiler<_InIter, _TraitsT>::
01048 _M_start_range(_RMatcherT& __matcher)
01049 { return _M_match_token(_ScannerT::_S_token_dash); }
01050
01051 template<typename _InIter, typename _TraitsT>
01052 bool
01053 _Compiler<_InIter, _TraitsT>::
01054 _M_collating_symbol(_RMatcherT& __matcher)
01055 {
01056 if (_M_match_token(_ScannerT::_S_token_collelem_single))
01057 {
01058 __matcher._M_add_char(_M_cur_value[0]);
01059 return true;
01060 }
01061 if (_M_match_token(_ScannerT::_S_token_collsymbol))
01062 {
01063 __matcher._M_add_collating_element(_M_cur_value);
01064 return true;
01065 }
01066 return false;
01067 }
01068
01069 template<typename _InIter, typename _TraitsT>
01070 bool
01071 _Compiler<_InIter, _TraitsT>::
01072 _M_equivalence_class(_RMatcherT& __matcher)
01073 {
01074 if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
01075 {
01076 __matcher._M_add_equivalence_class(_M_cur_value);
01077 return true;
01078 }
01079 return false;
01080 }
01081
01082 template<typename _InIter, typename _TraitsT>
01083 bool
01084 _Compiler<_InIter, _TraitsT>::
01085 _M_character_class(_RMatcherT& __matcher)
01086 {
01087 if (_M_match_token(_ScannerT::_S_token_char_class_name))
01088 {
01089 __matcher._M_add_character_class(_M_cur_value);
01090 return true;
01091 }
01092 return false;
01093 }
01094
01095 template<typename _InIter, typename _TraitsT>
01096 int
01097 _Compiler<_InIter, _TraitsT>::
01098 _M_cur_int_value(int __radix)
01099 {
01100 int __v = 0;
01101 for (typename _StringT::size_type __i = 0;
01102 __i < _M_cur_value.length(); ++__i)
01103 __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
01104 return __v;
01105 }
01106
01107 template<typename _InIter, typename _TraitsT>
01108 _AutomatonPtr
01109 __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
01110 regex_constants::syntax_option_type __f)
01111 { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
01112 __f)._M_nfa())); }
01113
01114 }
01115
01116 _GLIBCXX_END_NAMESPACE_VERSION
01117 }
01118
01119