1 from lepl.stream.core import DUMMY_HELPER
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 '''
32 Tests for the lepl.regexp.rewriters module.
33 '''
34
35 from logging import basicConfig, DEBUG
36 from string import ascii_letters
37 from unittest import TestCase
38
39 from lepl import *
40 from lepl.regexp.rewriters import CompileRegexp
41
42
43
44
45
47
63
65
66 rx = Any('a') | Any('b')
67
68 rx.config.clear().compile_to_nfa(force=True).no_memoize()
69 matcher = rx.get_match_sequence()
70 results = list(matcher('bq'))
71 assert results == [(['b'], (1, DUMMY_HELPER))], results
72 results = list(matcher('aq'))
73 assert results == [(['a'], (1, DUMMY_HELPER))], results
74 assert isinstance(matcher.matcher, NfaRegexp)
75
76 rx.config.clear().compile_to_nfa(force=True).compose_transforms().no_memoize()
77 matcher = rx.get_match_sequence()
78 results = list(matcher('bq'))
79 assert results == [(['b'], (1, DUMMY_HELPER))], results
80 results = list(matcher('aq'))
81 assert results == [(['a'], (1, DUMMY_HELPER))], results
82 assert isinstance(matcher.matcher, NfaRegexp)
83
85 rx = Any('a') + Any('b')
86
87 rx.config.clear().compile_to_nfa(force=True).no_memoize()
88 matcher = rx.get_match_sequence()
89 results = list(matcher('abq'))
90 assert results == [(['ab'], (2, DUMMY_HELPER))], results
91 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
92
93 rx.config.clear().compile_to_nfa(force=True).compose_transforms()
94 matcher = rx.get_match_sequence()
95 results = list(matcher('abq'))
96 assert results == [(['ab'], (2, DUMMY_HELPER))], results
97 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
98
100 rx = Add(And(Any('a'), Any('b')))
101
102 rx.config.clear().compile_to_nfa(force=True).no_memoize()
103 matcher = rx.get_match_sequence()
104 results = list(matcher('abq'))
105 assert results == [(['ab'], (2, DUMMY_HELPER))], results
106 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
107 rx.config.clear().compile_to_nfa(force=True).compose_transforms()
108
109 matcher = rx.get_match_sequence()
110 results = list(matcher('abq'))
111 assert results == [(['ab'], (2, DUMMY_HELPER))], results
112 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
113
115 rx = Literal('abc')
116
117 rx.config.clear().compile_to_nfa(force=True).no_memoize()
118 matcher = rx.get_match_sequence()
119 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
120 results = list(matcher('abcd'))
121 assert results == [(['abc'], (3, DUMMY_HELPER))], results
122
123 rx.config.clear().compile_to_nfa(force=True).compose_transforms().no_memoize()
124 matcher = rx.get_match_sequence()
125 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
126 results = list(matcher('abcd'))
127 assert results == [(['abc'], (3, DUMMY_HELPER))], results
128
129 rx = Literal('abc') >> (lambda x: x+'e')
130
131 rx.config.clear().compile_to_nfa(force=True).no_memoize()
132 matcher = rx.get_match_sequence()
133 print(matcher.matcher.tree())
134 results = list(matcher('abcd'))
135 assert results == [(['abce'], (3, DUMMY_HELPER))], results
136 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
137
138 rx.config.clear().compile_to_nfa(force=True).compose_transforms().no_memoize()
139 matcher = rx.get_match_sequence()
140 print(matcher.matcher.tree())
141 results = list(matcher('abcd'))
142 assert results == [(['abce'], (3, DUMMY_HELPER))], results
143 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
144
146 expected = [(['abcd'], (4, DUMMY_HELPER)),
147 (['abc'], (3, DUMMY_HELPER)),
148 (['ab'], (2, DUMMY_HELPER)),
149 (['a'], (1, DUMMY_HELPER)),
150 ([], (0, DUMMY_HELPER))]
151 rx = Any()[:, ...]
152
153
154 rx.config.clear().no_memoize()
155 matcher = rx.get_match_sequence()
156 results = list(matcher('abcd'))
157 assert results == expected, results
158
159 rx.config.clear().compose_transforms().no_memoize()
160 matcher = rx.get_match_sequence()
161 results = list(matcher('abcd'))
162 assert results == expected, results
163
164
165 rx.config.clear().compile_to_nfa().no_memoize()
166 matcher = rx.get_match_sequence()
167 results = list(matcher('abcd'))
168 assert results == expected, results
169 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
170
171 rx.config.clear().compile_to_nfa().compose_transforms().no_memoize()
172 matcher = rx.get_match_sequence()
173 results = list(matcher('abcd'))
174 assert results == expected, results
175 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
176
178
179 rx = Literal('foo') | (Literal('ba') + Any('a')[1:,...])
180
181 rx.config.compile_to_nfa().no_full_first_match().no_memoize()
182 matcher = rx.get_match_sequence()
183 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
184 results = list(matcher('foo'))
185 assert results == [(['foo'], (3, DUMMY_HELPER))], results
186 results = list(matcher('baaaaax'))
187 assert results == [(['baaaaa'], (6, DUMMY_HELPER)),
188 (['baaaa'], (5, DUMMY_HELPER)),
189 (['baaa'], (4, DUMMY_HELPER)),
190 (['baa'], (3, DUMMY_HELPER))], results
191 results = list(matcher('ba'))
192 assert results == [], results
193
194 rx.config.clear().compile_to_nfa().no_full_first_match().no_memoize()
195 matcher = rx.get_match_sequence()
196 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
197 results = list(matcher('foo'))
198 assert results == [(['foo'], (3, DUMMY_HELPER))], results
199 results = list(matcher('baaaaax'))
200 assert results == [(['baaaaa'], (6, DUMMY_HELPER)),
201 (['baaaa'], (5, DUMMY_HELPER)),
202 (['baaa'], (4, DUMMY_HELPER)),
203 (['baa'], (3, DUMMY_HELPER))], results
204 results = list(matcher('ba'))
205 assert results == [], results
206
207 rx.config.clear().compile_to_nfa().no_full_first_match().compose_transforms().no_memoize()
208 matcher = rx.get_match_sequence()
209 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
210 results = list(matcher('foo'))
211 assert results == [(['foo'], (3, DUMMY_HELPER))], results
212 results = list(matcher('baaaaax'))
213 assert results == [(['baaaaa'], (6, DUMMY_HELPER)),
214 (['baaaa'], (5, DUMMY_HELPER)),
215 (['baaa'], (4, DUMMY_HELPER)),
216 (['baa'], (3, DUMMY_HELPER))], results
217 results = list(matcher('ba'))
218 assert results == [], results
219
221 rx = Integer()
222
223 rx.config.compile_to_nfa().no_full_first_match().no_memoize()
224 matcher = rx.get_match_sequence()
225 results = list(matcher('12x'))
226 assert results == [(['12'], (2, DUMMY_HELPER)), (['1'], (1, DUMMY_HELPER))], results
227 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
228
229 rx.config.clear().compile_to_nfa().no_full_first_match().no_memoize()
230 matcher = rx.get_match_sequence()
231 results = list(matcher('12x'))
232 assert results == [(['12'], (2, DUMMY_HELPER)), (['1'], (1, DUMMY_HELPER))], results
233 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
234
235 rx.config.clear().compile_to_nfa().no_full_first_match().compose_transforms().no_memoize()
236 matcher = rx.get_match_sequence()
237 results = list(matcher('12x'))
238 assert results == [(['12'], (2, DUMMY_HELPER)), (['1'], (1, DUMMY_HELPER))], results
239 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
240
242 rx = Real()
243
244 rx.config.compile_to_nfa().no_full_first_match().no_memoize()
245 matcher = rx.get_match_sequence()
246 results = list(matcher('1.2x'))
247 assert results == [(['1.2'], (3, DUMMY_HELPER)), (['1.'], (2, DUMMY_HELPER)), (['1'], (1, DUMMY_HELPER))], results
248 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
249
250 rx.config.clear().compile_to_nfa().no_full_first_match().no_memoize()
251 matcher = rx.get_match_sequence()
252 results = list(matcher('1.2x'))
253 assert results == [(['1.2'], (3, DUMMY_HELPER)), (['1.'], (2, DUMMY_HELPER)), (['1'], (1, DUMMY_HELPER))], results
254 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
255
256 rx.config.clear().compile_to_nfa().no_full_first_match().compose_transforms().no_memoize()
257 matcher = rx.get_match_sequence()
258 results = list(matcher('1.2x'))
259 assert results == [(['1.2'], (3, DUMMY_HELPER)), (['1.'], (2, DUMMY_HELPER)), (['1'], (1, DUMMY_HELPER))], results
260 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
261
263 rx = Float()
264
265 rx.config.compile_to_nfa().no_full_first_match().no_memoize()
266 matcher = rx.get_match_sequence()
267 results = list(matcher('1.2x'))
268 assert results == [(['1.2'], (3, DUMMY_HELPER)), (['1.'], (2, DUMMY_HELPER))], results
269 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
270
271 rx.config.clear().compile_to_nfa().no_full_first_match().no_memoize()
272 matcher = rx.get_match_sequence()
273 results = list(matcher('1.2x'))
274 assert results == [(['1.2'], (3, DUMMY_HELPER)), (['1.'], (2, DUMMY_HELPER))], results
275 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree().no_memoize()
276
277 rx.config.clear().compile_to_nfa().no_full_first_match().compose_transforms()
278 matcher = rx.get_match_sequence()
279 results = list(matcher('1.2x'))
280 assert results == [(['1.2'], (3, DUMMY_HELPER)), (['1.'], (2, DUMMY_HELPER))], results
281 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree().no_memoize()
282
284 rx = Add(Star('a'))
285
286 rx.config.compile_to_nfa().no_full_first_match().no_memoize()
287 matcher = rx.get_match_sequence()
288 results = list(matcher('aa'))
289 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER)), ([], (0, DUMMY_HELPER))], results
290 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
291
292 rx.config.clear().compile_to_nfa().no_full_first_match().no_memoize()
293 matcher = rx.get_match_sequence()
294 results = list(matcher('aa'))
295 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER)), ([], (0, DUMMY_HELPER))], results
296 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
297
298 rx.config.clear().compile_to_nfa().no_full_first_match().compose_transforms().no_memoize()
299 matcher = rx.get_match_sequence()
300 results = list(matcher('aa'))
301 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER)), ([], (0, DUMMY_HELPER))], results
302 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
303
305
306 rx = Word('a')
307
308 rx.config.compile_to_nfa().no_full_first_match().no_memoize()
309 matcher = rx.get_match_sequence()
310 results = list(matcher('aa'))
311 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER))], results
312 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
313
314 rx.config.clear().compile_to_nfa().no_full_first_match().no_memoize()
315 matcher = rx.get_match_sequence()
316 results = list(matcher('aa'))
317 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER))], results
318 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
319
320 rx.config.clear().compile_to_nfa().no_full_first_match().compose_transforms().no_memoize()
321 matcher = rx.get_match_sequence()
322 results = list(matcher('aa'))
323 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER))], results
324 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
325
326
328 '''
329 Test the rewrite routine directly.
330 '''
331
337
341
344
346 self.assert_regexp(Any()[:, ...], '(?:.)*')
347 self.assert_regexp(Any()[1:, ...], '.(?:.)*')
348 self.assert_regexp(Any()[1, ...], '.')
349 self.assert_regexp(Any()[1:2, ...], '.(?:.)?')
350 self.assert_regexp(Any()[2, ...], '..')
351 self.assert_regexp(Any()[2:4, ...], '..(?:.(?:.)?)?')
352 self.assert_regexp(Any()[:, 'x', ...], '(?:.(?:x.)*|)')
353 self.assert_regexp(Any()[1:, 'x', ...], '.(?:x.)*')
354 self.assert_regexp(Any()[1, 'x', ...], '.')
355 self.assert_regexp(Any()[1:2, 'x', ...], '.(?:x.)?')
356 self.assert_regexp(Any()[2, 'x', ...], '.x.')
357 self.assert_regexp(Any()[2:4, 'x', ...], '.x.(?:x.(?:x.)?)?')
358 self.assert_regexp(Literal('foo')[:, ...], '(?:foo)*')
359
362
365
369
370
387
388
390 '''
391 Used to not be possible to compile a raw Word()
392 '''
393
403
404
406 '''
407 Token(Word()) and Token(Any(ascii_letters)[1:]) gave errors.
408 '''
409
414
419
426
432