00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 namespace std
00032 {
00033 namespace __regex
00034 {
00035
00036
00037 class _Automaton
00038 {
00039 public:
00040 typedef unsigned int _SizeT;
00041
00042 public:
00043 virtual
00044 ~_Automaton()
00045 { }
00046
00047 virtual _SizeT
00048 _M_sub_count() const = 0;
00049
00050 #ifdef _GLIBCXX_DEBUG
00051 virtual std::ostream&
00052 _M_dot(std::ostream& __ostr) const = 0;
00053 #endif
00054 };
00055
00056
00057 typedef std::shared_ptr<_Automaton> _AutomatonPtr;
00058
00059
00060
00061 enum _Opcode
00062 {
00063 _S_opcode_unknown = 0,
00064 _S_opcode_alternative = 1,
00065 _S_opcode_subexpr_begin = 4,
00066 _S_opcode_subexpr_end = 5,
00067 _S_opcode_match = 100,
00068 _S_opcode_accept = 255
00069 };
00070
00071
00072 struct _Results
00073 {
00074 virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
00075 virtual void _M_set_matched(int __i, bool __is_matched) = 0;
00076 };
00077
00078
00079 typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
00080
00081 template<typename _FwdIterT, typename _TraitsT>
00082 struct _StartTagger
00083 : public _Tagger
00084 {
00085 explicit
00086 _StartTagger(int __i)
00087 : _M_index(__i)
00088 { }
00089
00090 void
00091 operator()(const _PatternCursor& __pc, _Results& __r)
00092 { __r._M_set_pos(_M_index, 0, __pc); }
00093
00094 int _M_index;
00095 };
00096
00097 template<typename _FwdIterT, typename _TraitsT>
00098 struct _EndTagger
00099 : public _Tagger
00100 {
00101 explicit
00102 _EndTagger(int __i)
00103 : _M_index(__i)
00104 { }
00105
00106 void
00107 operator()(const _PatternCursor& __pc, _Results& __r)
00108 { __r._M_set_pos(_M_index, 1, __pc); }
00109
00110 int _M_index;
00111 _FwdIterT _M_pos;
00112 };
00113
00114 typedef std::function<bool (const _PatternCursor&)> _Matcher;
00115
00116
00117 inline bool
00118 _AnyMatcher(const _PatternCursor&)
00119 { return true; }
00120
00121
00122 template<typename _InIterT, typename _TraitsT>
00123 struct _CharMatcher
00124 : public _Matcher
00125 {
00126 typedef typename _TraitsT::char_type char_type;
00127
00128 explicit
00129 _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT())
00130 : _M_traits(__t), _M_c(_M_traits.translate(__c))
00131 { }
00132
00133 bool
00134 operator()(const _PatternCursor& __pc) const
00135 {
00136 typedef const _SpecializedCursor<_InIterT>& _CursorT;
00137 _CursorT __c = static_cast<_CursorT>(__pc);
00138 return _M_traits.translate(__c._M_current()) == _M_c;
00139 }
00140
00141 const _TraitsT& _M_traits;
00142 char_type _M_c;
00143 };
00144
00145
00146 template<typename _InIterT, typename _TraitsT>
00147 struct _RangeMatcher
00148 : public _Matcher
00149 {
00150 typedef typename _TraitsT::char_type _CharT;
00151 typedef std::basic_string<_CharT> _StringT;
00152
00153 explicit
00154 _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT())
00155 : _M_traits(__t), _M_is_non_matching(__is_non_matching)
00156 { }
00157
00158 bool
00159 operator()(const _PatternCursor& __pc) const
00160 {
00161 typedef const _SpecializedCursor<_InIterT>& _CursorT;
00162 _CursorT __c = static_cast<_CursorT>(__pc);
00163 return true;
00164 }
00165
00166 void
00167 _M_add_char(_CharT __c)
00168 { }
00169
00170 void
00171 _M_add_collating_element(const _StringT& __s)
00172 { }
00173
00174 void
00175 _M_add_equivalence_class(const _StringT& __s)
00176 { }
00177
00178 void
00179 _M_add_character_class(const _StringT& __s)
00180 { }
00181
00182 void
00183 _M_make_range()
00184 { }
00185
00186 const _TraitsT& _M_traits;
00187 bool _M_is_non_matching;
00188 };
00189
00190
00191 typedef int _StateIdT;
00192
00193
00194 static const _StateIdT _S_invalid_state_id = -1;
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204 struct _State
00205 {
00206 typedef int _OpcodeT;
00207
00208 _OpcodeT _M_opcode;
00209 _StateIdT _M_next;
00210 _StateIdT _M_alt;
00211 unsigned int _M_subexpr;
00212 _Tagger _M_tagger;
00213 _Matcher _M_matches;
00214
00215 explicit _State(_OpcodeT __opcode)
00216 : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
00217 { }
00218
00219 _State(const _Matcher& __m)
00220 : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m)
00221 { }
00222
00223 _State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t)
00224 : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s),
00225 _M_tagger(__t)
00226 { }
00227
00228 _State(_StateIdT __next, _StateIdT __alt)
00229 : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
00230 { }
00231
00232 #ifdef _GLIBCXX_DEBUG
00233 std::ostream&
00234 _M_print(std::ostream& ostr) const;
00235
00236
00237 std::ostream&
00238 _M_dot(std::ostream& __ostr, _StateIdT __id) const;
00239 #endif
00240 };
00241
00242
00243
00244 typedef std::set<_StateIdT> _StateSet;
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259 class _Nfa
00260 : public _Automaton, public std::vector<_State>
00261 {
00262 public:
00263 typedef _State _StateT;
00264 typedef unsigned int _SizeT;
00265 typedef regex_constants::syntax_option_type _FlagT;
00266
00267 public:
00268 _Nfa(_FlagT __f)
00269 : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
00270 { }
00271
00272 ~_Nfa()
00273 { }
00274
00275 _FlagT
00276 _M_options() const
00277 { return _M_flags; }
00278
00279 _StateIdT
00280 _M_start() const
00281 { return _M_start_state; }
00282
00283 const _StateSet&
00284 _M_final_states() const
00285 { return _M_accepting_states; }
00286
00287 _SizeT
00288 _M_sub_count() const
00289 { return _M_subexpr_count; }
00290
00291 _StateIdT
00292 _M_insert_accept()
00293 {
00294 this->push_back(_StateT(_S_opcode_accept));
00295 _M_accepting_states.insert(this->size()-1);
00296 return this->size()-1;
00297 }
00298
00299 _StateIdT
00300 _M_insert_alt(_StateIdT __next, _StateIdT __alt)
00301 {
00302 this->push_back(_StateT(__next, __alt));
00303 return this->size()-1;
00304 }
00305
00306 _StateIdT
00307 _M_insert_matcher(_Matcher __m)
00308 {
00309 this->push_back(_StateT(__m));
00310 return this->size()-1;
00311 }
00312
00313 _StateIdT
00314 _M_insert_subexpr_begin(const _Tagger& __t)
00315 {
00316 this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++, __t));
00317 return this->size()-1;
00318 }
00319
00320 _StateIdT
00321 _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t)
00322 {
00323 this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t));
00324 return this->size()-1;
00325 }
00326
00327 #ifdef _GLIBCXX_DEBUG
00328 std::ostream&
00329 _M_dot(std::ostream& __ostr) const;
00330 #endif
00331
00332 private:
00333 _FlagT _M_flags;
00334 _StateIdT _M_start_state;
00335 _StateSet _M_accepting_states;
00336 _SizeT _M_subexpr_count;
00337 };
00338
00339
00340
00341
00342 class _StateSeq
00343 {
00344 public:
00345
00346 _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id)
00347 : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
00348 { }
00349
00350 _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
00351 : _M_nfa(__e1._M_nfa),
00352 _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
00353 _M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
00354 { }
00355
00356
00357 _StateSeq(const _StateSeq& __e, _StateIdT __id)
00358 : _M_nfa(__e._M_nfa),
00359 _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
00360 _M_end1(__id), _M_end2(__e._M_end1)
00361 { }
00362
00363
00364 _StateSeq(const _StateSeq& __rhs)
00365 : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
00366 _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
00367 { }
00368
00369
00370 _StateSeq& operator=(const _StateSeq& __rhs);
00371
00372 _StateIdT
00373 _M_front() const
00374 { return _M_start; }
00375
00376
00377 void
00378 _M_push_back(_StateIdT __id);
00379
00380
00381 void
00382 _M_append(_StateIdT __id);
00383
00384 void
00385 _M_append(_StateSeq& __rhs);
00386
00387
00388 _StateIdT
00389 _M_clone();
00390
00391 private:
00392 _Nfa& _M_nfa;
00393 _StateIdT _M_start;
00394 _StateIdT _M_end1;
00395 _StateIdT _M_end2;
00396
00397 };
00398
00399 }
00400 }
00401
00402 #include <bits/regex_nfa.tcc>
00403