Package lepl :: Package regexp :: Package _test :: Module rewriters
[hide private]
[frames] | no frames]

Source Code for Module lepl.regexp._test.rewriters

  1  from lepl.stream.core import DUMMY_HELPER 
  2   
  3  # The contents of this file are subject to the Mozilla Public License 
  4  # (MPL) Version 1.1 (the "License"); you may not use this file except 
  5  # in compliance with the License. You may obtain a copy of the License 
  6  # at http://www.mozilla.org/MPL/ 
  7  # 
  8  # Software distributed under the License is distributed on an "AS IS" 
  9  # basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
 10  # the License for the specific language governing rights and 
 11  # limitations under the License. 
 12  # 
 13  # The Original Code is LEPL (http://www.acooke.org/lepl) 
 14  # The Initial Developer of the Original Code is Andrew Cooke. 
 15  # Portions created by the Initial Developer are Copyright (C) 2009-2010 
 16  # Andrew Cooke (andrew@acooke.org). All Rights Reserved. 
 17  # 
 18  # Alternatively, the contents of this file may be used under the terms 
 19  # of the LGPL license (the GNU Lesser General Public License, 
 20  # http://www.gnu.org/licenses/lgpl.html), in which case the provisions 
 21  # of the LGPL License are applicable instead of those above. 
 22  # 
 23  # If you wish to allow use of your version of this file only under the 
 24  # terms of the LGPL License and not to allow others to use your version 
 25  # of this file under the MPL, indicate your decision by deleting the 
 26  # provisions above and replace them with the notice and other provisions 
 27  # required by the LGPL License.  If you do not delete the provisions 
 28  # above, a recipient may use your version of this file under either the 
 29  # MPL or the LGPL License. 
 30   
 31  ''' 
 32  Tests for the lepl.regexp.rewriters module. 
 33  ''' 
 34   
 35  from logging import basicConfig, DEBUG 
 36  from string import ascii_letters 
 37  from unittest import TestCase 
 38   
 39  from lepl import * 
 40  from lepl.regexp.rewriters import CompileRegexp 
 41   
 42  # pylint: disable-msg=C0103, C0111, C0301, C0324 
 43  # (dude this is just a test) 
 44   
 45   
46 -class RewriteTest(TestCase):
47
48 - def test_any(self):
49 #basicConfig(level=DEBUG) 50 char = Any() 51 52 char.config.clear().compile_to_nfa(force=True).no_memoize() 53 matcher = char.get_match_sequence() 54 results = list(matcher('abc')) 55 assert results == [(['a'], (1, DUMMY_HELPER))], results 56 assert isinstance(matcher.matcher, NfaRegexp) 57 58 char.config.clear().compile_to_nfa(force=True).compose_transforms().no_memoize() 59 matcher = char.get_match_sequence() 60 results = list(matcher('abc')) 61 assert results == [(['a'], (1, DUMMY_HELPER))], results 62 assert isinstance(matcher.matcher, NfaRegexp)
63
64 - def test_or(self):
65 #basicConfig(level=DEBUG) 66 rx = Any('a') | Any('b') 67 68 rx.config.clear().compile_to_nfa(force=True).no_memoize() 69 matcher = rx.get_match_sequence() 70 results = list(matcher('bq')) 71 assert results == [(['b'], (1, DUMMY_HELPER))], results 72 results = list(matcher('aq')) 73 assert results == [(['a'], (1, DUMMY_HELPER))], results 74 assert isinstance(matcher.matcher, NfaRegexp) 75 76 rx.config.clear().compile_to_nfa(force=True).compose_transforms().no_memoize() 77 matcher = rx.get_match_sequence() 78 results = list(matcher('bq')) 79 assert results == [(['b'], (1, DUMMY_HELPER))], results 80 results = list(matcher('aq')) 81 assert results == [(['a'], (1, DUMMY_HELPER))], results 82 assert isinstance(matcher.matcher, NfaRegexp)
83
84 - def test_plus(self):
85 rx = Any('a') + Any('b') 86 87 rx.config.clear().compile_to_nfa(force=True).no_memoize() 88 matcher = rx.get_match_sequence() 89 results = list(matcher('abq')) 90 assert results == [(['ab'], (2, DUMMY_HELPER))], results 91 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 92 93 rx.config.clear().compile_to_nfa(force=True).compose_transforms() 94 matcher = rx.get_match_sequence() 95 results = list(matcher('abq')) 96 assert results == [(['ab'], (2, DUMMY_HELPER))], results 97 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
98
99 - def test_add(self):
100 rx = Add(And(Any('a'), Any('b'))) 101 102 rx.config.clear().compile_to_nfa(force=True).no_memoize() 103 matcher = rx.get_match_sequence() 104 results = list(matcher('abq')) 105 assert results == [(['ab'], (2, DUMMY_HELPER))], results 106 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 107 rx.config.clear().compile_to_nfa(force=True).compose_transforms() 108 109 matcher = rx.get_match_sequence() 110 results = list(matcher('abq')) 111 assert results == [(['ab'], (2, DUMMY_HELPER))], results 112 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
113
114 - def test_literal(self):
115 rx = Literal('abc') 116 117 rx.config.clear().compile_to_nfa(force=True).no_memoize() 118 matcher = rx.get_match_sequence() 119 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 120 results = list(matcher('abcd')) 121 assert results == [(['abc'], (3, DUMMY_HELPER))], results 122 123 rx.config.clear().compile_to_nfa(force=True).compose_transforms().no_memoize() 124 matcher = rx.get_match_sequence() 125 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 126 results = list(matcher('abcd')) 127 assert results == [(['abc'], (3, DUMMY_HELPER))], results 128 129 rx = Literal('abc') >> (lambda x: x+'e') 130 131 rx.config.clear().compile_to_nfa(force=True).no_memoize() 132 matcher = rx.get_match_sequence() 133 print(matcher.matcher.tree()) 134 results = list(matcher('abcd')) 135 assert results == [(['abce'], (3, DUMMY_HELPER))], results 136 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 137 138 rx.config.clear().compile_to_nfa(force=True).compose_transforms().no_memoize() 139 matcher = rx.get_match_sequence() 140 print(matcher.matcher.tree()) 141 results = list(matcher('abcd')) 142 assert results == [(['abce'], (3, DUMMY_HELPER))], results 143 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
144
145 - def test_dfs(self):
146 expected = [(['abcd'], (4, DUMMY_HELPER)), 147 (['abc'], (3, DUMMY_HELPER)), 148 (['ab'], (2, DUMMY_HELPER)), 149 (['a'], (1, DUMMY_HELPER)), 150 ([], (0, DUMMY_HELPER))] 151 rx = Any()[:, ...] 152 153 # do un-rewritten to check whether [] or [''] is correct 154 rx.config.clear().no_memoize() 155 matcher = rx.get_match_sequence() 156 results = list(matcher('abcd')) 157 assert results == expected, results 158 159 rx.config.clear().compose_transforms().no_memoize() 160 matcher = rx.get_match_sequence() 161 results = list(matcher('abcd')) 162 assert results == expected, results 163 164 #basicConfig(level=DEBUG) 165 rx.config.clear().compile_to_nfa().no_memoize() 166 matcher = rx.get_match_sequence() 167 results = list(matcher('abcd')) 168 assert results == expected, results 169 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 170 171 rx.config.clear().compile_to_nfa().compose_transforms().no_memoize() 172 matcher = rx.get_match_sequence() 173 results = list(matcher('abcd')) 174 assert results == expected, results 175 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
176
177 - def test_complex(self):
178 #basicConfig(level=DEBUG) 179 rx = Literal('foo') | (Literal('ba') + Any('a')[1:,...]) 180 181 rx.config.compile_to_nfa().no_full_first_match().no_memoize() 182 matcher = rx.get_match_sequence() 183 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 184 results = list(matcher('foo')) 185 assert results == [(['foo'], (3, DUMMY_HELPER))], results 186 results = list(matcher('baaaaax')) 187 assert results == [(['baaaaa'], (6, DUMMY_HELPER)), 188 (['baaaa'], (5, DUMMY_HELPER)), 189 (['baaa'], (4, DUMMY_HELPER)), 190 (['baa'], (3, DUMMY_HELPER))], results 191 results = list(matcher('ba')) 192 assert results == [], results 193 194 rx.config.clear().compile_to_nfa().no_full_first_match().no_memoize() 195 matcher = rx.get_match_sequence() 196 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 197 results = list(matcher('foo')) 198 assert results == [(['foo'], (3, DUMMY_HELPER))], results 199 results = list(matcher('baaaaax')) 200 assert results == [(['baaaaa'], (6, DUMMY_HELPER)), 201 (['baaaa'], (5, DUMMY_HELPER)), 202 (['baaa'], (4, DUMMY_HELPER)), 203 (['baa'], (3, DUMMY_HELPER))], results 204 results = list(matcher('ba')) 205 assert results == [], results 206 207 rx.config.clear().compile_to_nfa().no_full_first_match().compose_transforms().no_memoize() 208 matcher = rx.get_match_sequence() 209 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 210 results = list(matcher('foo')) 211 assert results == [(['foo'], (3, DUMMY_HELPER))], results 212 results = list(matcher('baaaaax')) 213 assert results == [(['baaaaa'], (6, DUMMY_HELPER)), 214 (['baaaa'], (5, DUMMY_HELPER)), 215 (['baaa'], (4, DUMMY_HELPER)), 216 (['baa'], (3, DUMMY_HELPER))], results 217 results = list(matcher('ba')) 218 assert results == [], results
219
220 - def test_integer(self):
221 rx = Integer() 222 223 rx.config.compile_to_nfa().no_full_first_match().no_memoize() 224 matcher = rx.get_match_sequence() 225 results = list(matcher('12x')) 226 assert results == [(['12'], (2, DUMMY_HELPER)), (['1'], (1, DUMMY_HELPER))], results 227 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 228 229 rx.config.clear().compile_to_nfa().no_full_first_match().no_memoize() 230 matcher = rx.get_match_sequence() 231 results = list(matcher('12x')) 232 assert results == [(['12'], (2, DUMMY_HELPER)), (['1'], (1, DUMMY_HELPER))], results 233 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 234 235 rx.config.clear().compile_to_nfa().no_full_first_match().compose_transforms().no_memoize() 236 matcher = rx.get_match_sequence() 237 results = list(matcher('12x')) 238 assert results == [(['12'], (2, DUMMY_HELPER)), (['1'], (1, DUMMY_HELPER))], results 239 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
240
241 - def test_real(self):
242 rx = Real() 243 244 rx.config.compile_to_nfa().no_full_first_match().no_memoize() 245 matcher = rx.get_match_sequence() 246 results = list(matcher('1.2x')) 247 assert results == [(['1.2'], (3, DUMMY_HELPER)), (['1.'], (2, DUMMY_HELPER)), (['1'], (1, DUMMY_HELPER))], results 248 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 249 250 rx.config.clear().compile_to_nfa().no_full_first_match().no_memoize() 251 matcher = rx.get_match_sequence() 252 results = list(matcher('1.2x')) 253 assert results == [(['1.2'], (3, DUMMY_HELPER)), (['1.'], (2, DUMMY_HELPER)), (['1'], (1, DUMMY_HELPER))], results 254 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 255 256 rx.config.clear().compile_to_nfa().no_full_first_match().compose_transforms().no_memoize() 257 matcher = rx.get_match_sequence() 258 results = list(matcher('1.2x')) 259 assert results == [(['1.2'], (3, DUMMY_HELPER)), (['1.'], (2, DUMMY_HELPER)), (['1'], (1, DUMMY_HELPER))], results 260 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
261
262 - def test_float(self):
263 rx = Float() 264 265 rx.config.compile_to_nfa().no_full_first_match().no_memoize() 266 matcher = rx.get_match_sequence() 267 results = list(matcher('1.2x')) 268 assert results == [(['1.2'], (3, DUMMY_HELPER)), (['1.'], (2, DUMMY_HELPER))], results 269 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 270 271 rx.config.clear().compile_to_nfa().no_full_first_match().no_memoize() 272 matcher = rx.get_match_sequence() 273 results = list(matcher('1.2x')) 274 assert results == [(['1.2'], (3, DUMMY_HELPER)), (['1.'], (2, DUMMY_HELPER))], results 275 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree().no_memoize() 276 277 rx.config.clear().compile_to_nfa().no_full_first_match().compose_transforms() 278 matcher = rx.get_match_sequence() 279 results = list(matcher('1.2x')) 280 assert results == [(['1.2'], (3, DUMMY_HELPER)), (['1.'], (2, DUMMY_HELPER))], results 281 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree().no_memoize()
282
283 - def test_star(self):
284 rx = Add(Star('a')) 285 286 rx.config.compile_to_nfa().no_full_first_match().no_memoize() 287 matcher = rx.get_match_sequence() 288 results = list(matcher('aa')) 289 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER)), ([], (0, DUMMY_HELPER))], results 290 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 291 292 rx.config.clear().compile_to_nfa().no_full_first_match().no_memoize() 293 matcher = rx.get_match_sequence() 294 results = list(matcher('aa')) 295 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER)), ([], (0, DUMMY_HELPER))], results 296 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 297 298 rx.config.clear().compile_to_nfa().no_full_first_match().compose_transforms().no_memoize() 299 matcher = rx.get_match_sequence() 300 results = list(matcher('aa')) 301 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER)), ([], (0, DUMMY_HELPER))], results 302 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
303
304 - def test_word(self):
305 #basicConfig(level=DEBUG) 306 rx = Word('a') 307 308 rx.config.compile_to_nfa().no_full_first_match().no_memoize() 309 matcher = rx.get_match_sequence() 310 results = list(matcher('aa')) 311 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER))], results 312 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 313 314 rx.config.clear().compile_to_nfa().no_full_first_match().no_memoize() 315 matcher = rx.get_match_sequence() 316 results = list(matcher('aa')) 317 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER))], results 318 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree() 319 320 rx.config.clear().compile_to_nfa().no_full_first_match().compose_transforms().no_memoize() 321 matcher = rx.get_match_sequence() 322 results = list(matcher('aa')) 323 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER))], results 324 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
325 326
327 -class CompileTest(TestCase):
328 ''' 329 Test the rewrite routine directly. 330 ''' 331
332 - def assert_regexp(self, matcher, regexp):
333 compiler = CompileRegexp(use=True) 334 matcher = compiler(matcher) 335 assert isinstance(matcher, NfaRegexp), matcher.tree() 336 assert str(matcher.regexp) == regexp, matcher.regexp
337
338 - def test_any(self):
339 self.assert_regexp(Any(), '.') 340 self.assert_regexp(Any('abc'), '[a-c]')
341
342 - def test_literal(self):
343 self.assert_regexp(Literal('foo'), 'foo')
344
345 - def test_repeat(self):
346 self.assert_regexp(Any()[:, ...], '(?:.)*') 347 self.assert_regexp(Any()[1:, ...], '.(?:.)*') 348 self.assert_regexp(Any()[1, ...], '.') 349 self.assert_regexp(Any()[1:2, ...], '.(?:.)?') 350 self.assert_regexp(Any()[2, ...], '..') 351 self.assert_regexp(Any()[2:4, ...], '..(?:.(?:.)?)?') 352 self.assert_regexp(Any()[:, 'x', ...], '(?:.(?:x.)*|)') 353 self.assert_regexp(Any()[1:, 'x', ...], '.(?:x.)*') 354 self.assert_regexp(Any()[1, 'x', ...], '.') 355 self.assert_regexp(Any()[1:2, 'x', ...], '.(?:x.)?') 356 self.assert_regexp(Any()[2, 'x', ...], '.x.') 357 self.assert_regexp(Any()[2:4, 'x', ...], '.x.(?:x.(?:x.)?)?') 358 self.assert_regexp(Literal('foo')[:, ...], '(?:foo)*')
359
360 - def test_and(self):
361 self.assert_regexp(Any('ab')[:, ...] + Any('p'), '(?:[a-b])*p')
362
363 - def test_or(self):
364 self.assert_regexp(Any('ab')[:, ...] | Any('p'), '(?:(?:[a-b])*|p)')
365
366 - def test_complex(self):
367 self.assert_regexp((Any('ab') + Literal('q')) | Literal('z'), '(?:[a-b]q|z)') 368 self.assert_regexp((Any('ab') + 'q') | 'z', '(?:[a-b]q|z)')
369 370
371 -class RepeatBugTest(TestCase):
372
373 - def test_bug(self):
374 #basicConfig(level=DEBUG) 375 matcher = Any()[2, ...] 376 matcher.config.no_full_first_match().compile_to_nfa() 377 parser = matcher.get_parse_all() 378 results = list(parser('abc')) 379 assert results == [['ab']], results
380
381 - def test_bug2(self):
382 matcher = NfaRegexp('..') 383 matcher.config.no_full_first_match() 384 parser = matcher.get_parse_all() 385 results = list(parser('abc')) 386 assert results == [['ab']], results
387 388
389 -class WordBugTest(TestCase):
390 ''' 391 Used to not be possible to compile a raw Word() 392 ''' 393
394 - def test_word(self):
395 #basicConfig(level=DEBUG) 396 rx = Word() 397 398 rx.config.compile_to_nfa().no_full_first_match().no_memoize() 399 matcher = rx.get_match_sequence() 400 results = list(matcher('aa')) 401 assert results == [(['aa'], (2, DUMMY_HELPER)), (['a'], (1, DUMMY_HELPER))], results 402 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
403 404
405 -class TokenBugTest(TestCase):
406 ''' 407 Token(Word()) and Token(Any(ascii_letters)[1:]) gave errors. 408 ''' 409
410 - def test_token_word(self):
411 tk = Token(Word()) 412 tk.config.lines(block_policy=explicit) 413 tk.get_parse()
414
415 - def test_token_any(self):
416 tk = Token(Any(ascii_letters)[1:,...]) 417 tk.config.lines(block_policy=explicit) 418 tk.get_parse()
419
420 - def test_simple_word(self):
421 #basicConfig(level=DEBUG) 422 rx = Word() 423 rx.config.no_memoize() 424 matcher = rx.get_parse().matcher 425 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
426
427 - def test_simple_any(self):
428 rx = Any(ascii_letters)[1:,...] 429 rx.config.compile_to_nfa().no_memoize() 430 matcher = rx.get_parse().matcher 431 assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher.tree()
432