Package lepl :: Package matchers :: Module operators
[hide private]
[frames] | no frames]

Source Code for Module lepl.matchers.operators

  1   
  2  # The contents of this file are subject to the Mozilla Public License 
  3  # (MPL) Version 1.1 (the "License"); you may not use this file except 
  4  # in compliance with the License. You may obtain a copy of the License 
  5  # at http://www.mozilla.org/MPL/ 
  6  # 
  7  # Software distributed under the License is distributed on an "AS IS" 
  8  # basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
  9  # the License for the specific language governing rights and 
 10  # limitations under the License. 
 11  # 
 12  # The Original Code is LEPL (http://www.acooke.org/lepl) 
 13  # The Initial Developer of the Original Code is Andrew Cooke. 
 14  # Portions created by the Initial Developer are Copyright (C) 2009-2010 
 15  # Andrew Cooke (andrew@acooke.org). All Rights Reserved. 
 16  # 
 17  # Alternatively, the contents of this file may be used under the terms 
 18  # of the LGPL license (the GNU Lesser General Public License, 
 19  # http://www.gnu.org/licenses/lgpl.html), in which case the provisions 
 20  # of the LGPL License are applicable instead of those above. 
 21  # 
 22  # If you wish to allow use of your version of this file only under the 
 23  # terms of the LGPL License and not to allow others to use your version 
 24  # of this file under the MPL, indicate your decision by deleting the 
 25  # provisions above and replace them with the notice and other provisions 
 26  # required by the LGPL License.  If you do not delete the provisions 
 27  # above, a recipient may use your version of this file under either the 
 28  # MPL or the LGPL License. 
 29   
 30  ''' 
 31  Support for operator syntactic sugar (and operator redefinition). 
 32  ''' 
 33   
 34  from re import compile as compile_ 
 35   
 36  from lepl.matchers.matcher import Matcher 
 37  from lepl.support.context import Namespace, NamespaceMixin, Scope 
 38  from lepl.support.lib import open_stop, fmt, basestring 
 39   
 40   
 41  DIGITS = compile_('^(-?\d+)(.*)') 
 42   
43 -def RepeatWrapper(matcher, start, stop, step, separator, add, reduce):
44 '''Parse `step` if it is a string.''' 45 # Handle circular dependencies 46 from lepl.matchers.derived import Repeat 47 if (isinstance(step, int)): 48 limit = step 49 algorithm = DEPTH_FIRST 50 elif (isinstance(step, basestring)): 51 limit = None 52 algorithm = None 53 while step: 54 match = DIGITS.match(step) 55 if match: 56 if limit is None: 57 limit = int(match.group(1)) 58 step = match.group(2) 59 else: 60 raise TypeError(fmt('Cannot parse limit/algorithm for []: {}', 61 step)) 62 else: 63 if algorithm is None: 64 algorithm = step[0] 65 step = step[1:] 66 else: 67 raise TypeError('The step of [...] must be an integer limit, or a ' 68 'string to select the algorithm, or both as a string ' 69 'like "d1" for a single value, depth first') 70 return Repeat(matcher, start=start, stop=stop, limit=limit, 71 algorithm=algorithm, separator=separator, add_=add, 72 reduce=reduce)
73
74 -class OperatorNamespace(Namespace):
75 ''' 76 Define the default operators. 77 ''' 78
79 - def __init__(self):
80 # Handle circular dependencies 81 from lepl.matchers.error import raise_error 82 from lepl.matchers.derived import Space, Add, Apply, KApply, Drop, Map 83 from lepl.matchers.combine import And, Or, First 84 super(OperatorNamespace, self).__init__({ 85 SPACE_OPT: lambda a, b: And(a, Space()[0:,...], b), 86 SPACE_REQ: lambda a, b: And(a, Space()[1:,...], b), 87 ADD: lambda a, b: Add(And(a, b)), 88 AND: And, 89 OR: Or, 90 APPLY: Apply, 91 APPLY_RAW: lambda a, b: Apply(a, b, raw=True), 92 NOT: Drop, 93 KARGS: KApply, 94 RAISE: lambda a, b: KApply(a, raise_error(b)), 95 REPEAT: RepeatWrapper, 96 FIRST: First, 97 MAP: Map, 98 REDUCE: None, 99 })
100 101 102 OPERATORS = 'operators' 103 ''' 104 The name used to retrieve operators definitions. 105 ''' 106 107 SPACE_OPT = '/' 108 '''Name for / operator.''' 109 SPACE_REQ = '//' 110 '''Name for // operator.''' 111 ADD = '+' 112 '''Name for + operator.''' 113 AND = '&' 114 '''Name for & operator.''' 115 OR = '|' 116 '''Name for | operator.''' 117 APPLY = '>' 118 '''Name for > operator.''' 119 APPLY_RAW = '>=' 120 '''Name for >= operator.''' 121 NOT = '~' 122 '''Name for ~ operator.''' 123 KARGS = '**' 124 '''Name for ** operator.''' 125 RAISE = '^' 126 '''Name for ^ operator.''' 127 REPEAT = '[]' 128 '''Name for [] operator.''' 129 FIRST = '%' 130 '''Name for % operator.''' 131 MAP = '>>' 132 '''Name for >> operator.''' 133 REDUCE = '<reduce>' 134 '''Name for accumulator of data during repetition.''' 135 136
137 -class Override(Scope):
138 ''' 139 Allow an operator to be redefined within a with context. Uses the 140 OPERATORS namespace. 141 ''' 142
143 - def __init__(self, space_opt=None, space_req=None, repeat=None, 144 add=None, and_=None, or_=None, not_=None, 145 apply_=None, apply_raw=None, kargs=None, 146 raise_=None, first=None, map_=None, reduce=None):
147 super(Override, self).__init__(OPERATORS, OperatorNamespace, 148 {SPACE_OPT: space_opt, SPACE_REQ: space_req, 149 REPEAT: repeat, ADD: add, AND: and_, OR: or_, 150 NOT: not_, APPLY: apply_, APPLY_RAW: apply_raw, 151 KARGS: kargs, RAISE: raise_, FIRST: first, MAP: map_, 152 REDUCE: reduce})
153 154
155 -class _BaseSeparator(Override):
156 ''' 157 Support class for `Separator` and similar classes. 158 159 Uses the OPERATORS namespace. 160 ''' 161
162 - def __init__(self, separator):
163 ''' 164 If the separator is a string it is coerced to `Regexp()`; if None 165 then any previous defined separator is effectively removed. 166 ''' 167 # Handle circular dependencies 168 from lepl.matchers.core import Regexp 169 from lepl.matchers.combine import And 170 from lepl.matchers.support import coerce_ 171 if separator is None: 172 and_ = And 173 repeat = RepeatWrapper 174 else: 175 separator = coerce_(separator, Regexp) 176 (and_, repeat) = self._replacements(separator) 177 super(_BaseSeparator, self).__init__(and_=and_, repeat=repeat)
178
179 - def _replacements(self, _separator):
180 ''' 181 Sub-classes should return (And, Repeat) 182 ''' 183 raise Exception('Unimplemented')
184
185 - def _repeat(self, separator):
186 ''' 187 A simple Repeat with separator. 188 ''' 189 from lepl.matchers.combine import And 190 def repeat(m, st=0, sp=None, d=0, s=None, a=False, r=None): 191 ''' 192 Wrap `Repeat` to adapt the separator. 193 ''' 194 if s is None: 195 s = separator 196 elif not a: 197 s = And(separator, s, separator) 198 return RepeatWrapper(m, st, sp, d, s, a, r)
199 return repeat
200 201
202 -class Separator(_BaseSeparator):
203 ''' 204 Redefine ``[]`` and ``&`` to include the given matcher as a separator 205 (so it will be used between list items and between matchers separated by the & 206 operator) 207 208 Uses the OPERATORS namespace. 209 ''' 210
211 - def _replacements(self, separator):
212 ''' 213 Require the separator on each `And`. 214 ''' 215 # Handle circular dependencies 216 from lepl.matchers.combine import And 217 return (lambda a, b: And(a, separator, b), 218 self._repeat(separator))
219 220
221 -class DroppedSpace(Separator):
222 ''' 223 Skip spaces (by default, zero or more Space()). Any argument is dropped. 224 ''' 225
226 - def __init__(self, space=None):
227 from lepl.matchers.derived import Space, Drop 228 if space is None: 229 space = Space()[:] 230 space = Drop(space) 231 super(DroppedSpace, self).__init__(space)
232 233
234 -class SmartSeparator1(_BaseSeparator):
235 ''' 236 Similar to `Separator`, but tried to be clever about whether the 237 separator is needed. It replaces `&` with a matcher that only uses 238 the separator if the second sub-matcher consumes some input. 239 240 Uses the OPERATORS namespace. 241 242 See also `SmartSeparator2`, which is less general, but more efficient. 243 ''' 244
245 - def _replacements(self, separator):
246 ''' 247 Require the separator on each `And`. 248 ''' 249 # Handle circular dependencies 250 from lepl.matchers.combine import And, Or 251 from lepl.matchers.core import Consumer 252 def and_(a, b): 253 ''' 254 Add space only in the case when both consume something. 255 ''' 256 return Or(And(Consumer(a), separator, Consumer(b)), 257 And(Consumer(a), Consumer(b, False)), 258 And(Consumer(a, False), Consumer(b)), 259 And(Consumer(a, False), Consumer(b, False)))
260 return (and_, self._repeat(separator))
261 262 263 GREEDY = 'g' 264 '''Flag (splice increment) for inefficient, guaranteed greedy matching.''' 265 NON_GREEDY = 'n' 266 '''Flag (splice increment) for inefficient, guaranteed non-greedy matching.''' 267 DEPTH_FIRST = 'd' 268 '''Flag (splice increment) for efficient, quasi-greedy, matching (default).''' 269 BREADTH_FIRST = 'b' 270 '''Flag (splice increment) for efficient, quasi-non-greedy, matching.''' 271 272
273 -class OperatorMixin(NamespaceMixin):
274 ''' 275 Define the operators used to combine elements in a grammar specification. 276 ''' 277
278 - def __init__(self, name, namespace):
279 super(OperatorMixin, self).__init__(name, namespace)
280
281 - def __add__(self, other):
282 ''' 283 **self + other** - Join strings, merge lists. 284 285 Combine adjacent matchers in sequence, merging the result with "+" 286 (so strings are joined, lists merged). 287 288 :Parameters: 289 290 other 291 Another matcher or a string that will be converted to a literal 292 match. 293 ''' 294 self.__check(ADD, other, True) 295 return self._lookup(ADD)(self, other)
296
297 - def __radd__(self, other):
298 ''' 299 **other + self** - Join strings, merge lists. 300 301 Combine adjacent matchers in sequence, merging the result with "+" 302 (so strings are joined, lists merged). 303 304 :Parameters: 305 306 other 307 Another matcher or a string that will be converted to a literal 308 match. 309 ''' 310 self.__check(ADD, other, True) 311 return self._lookup(ADD)(other, self)
312
313 - def __and__(self, other):
314 ''' 315 **self & other** - Append results. 316 317 Combine adjacent matchers in sequence. This is equivalent to 318 `And()`. 319 320 :Parameters: 321 322 other 323 Another matcher or a string that will be converted to a literal 324 match. 325 ''' 326 self.__check(AND, other, True) 327 return self._lookup(AND)(self, other)
328
329 - def __rand__(self, other):
330 ''' 331 **other & self** - Append results. 332 333 Combine adjacent matchers in sequence. This is equivalent to 334 `And()`. 335 336 :Parameters: 337 338 other 339 Another matcher or a string that will be converted to a literal 340 match. 341 ''' 342 self.__check(AND, other, True) 343 return self._lookup(AND)(other, self)
344
345 - def __div__(self, other):
346 ''' 347 For 2.6 348 ''' 349 return self.__truediv__(other)
350
351 - def __rdiv__(self, other):
352 ''' 353 For 2.6 354 ''' 355 return self.__rtruediv__(other)
356
357 - def __truediv__(self, other):
358 ''' 359 **self / other** - Append results, with optional separating space. 360 361 Combine adjacent matchers in sequence, with an optional space between 362 them. The space is included in the results. 363 364 :Parameters: 365 366 other 367 Another matcher or a string that will be converted to a literal 368 match. 369 ''' 370 self.__check(SPACE_OPT, other, True) 371 return self._lookup(SPACE_OPT)(self, other)
372
373 - def __rtruediv__(self, other):
374 ''' 375 **other / self** - Append results, with optional separating space. 376 377 Combine adjacent matchers in sequence, with an optional space between 378 them. The space is included in the results. 379 380 :Parameters: 381 382 other 383 Another matcher or a string that will be converted to a literal 384 match. 385 ''' 386 self.__check(SPACE_OPT, other, True) 387 return self._lookup(SPACE_OPT)(other, self)
388
389 - def __floordiv__(self, other):
390 ''' 391 **self // other** - Append results, with required separating space. 392 393 Combine adjacent matchers in sequence, with a space between them. 394 The space is included in the results. 395 396 :Parameters: 397 398 other 399 Another matcher or a string that will be converted to a literal 400 match. 401 ''' 402 self.__check(SPACE_REQ, other, True) 403 return self._lookup(SPACE_REQ)(self, other)
404
405 - def __rfloordiv__(self, other):
406 ''' 407 **other // self** - Append results, with required separating space. 408 409 Combine adjacent matchers in sequence, with a space between them. 410 The space is included in the results. 411 412 :Parameters: 413 414 other 415 Another matcher or a string that will be converted to a literal 416 match. 417 ''' 418 self.__check(SPACE_REQ, other, True) 419 return self._lookup(SPACE_REQ)(other, self)
420
421 - def __or__(self, other):
422 ''' 423 **self | other** - Try alternative matchers. 424 425 This introduces backtracking. Matches are tried from left to right 426 and successful results returned (one on each "recall"). This is 427 equivalent to `Or()`. 428 429 :Parameters: 430 431 other 432 Another matcher or a string that will be converted to a literal 433 match. 434 ''' 435 self.__check(OR, other, True) 436 return self._lookup(OR)(self, other)
437
438 - def __ror__(self, other):
439 ''' 440 **other | self** - Try alternative matchers. 441 442 This introduces backtracking. Matches are tried from left to right 443 and successful results returned (one on each "recall"). This is 444 equivalent to `Or()`. 445 446 :Parameters: 447 448 other 449 Another matcher or a string that will be converted to a literal 450 match. 451 ''' 452 self.__check(OR, other, True) 453 return self._lookup(OR)(other, self)
454
455 - def __mod__(self, other):
456 ''' 457 **self % other** - Take first match (committed choice). 458 459 Matches are tried from left to right and the first successful result 460 is returned. This is equivalent to `First()`. 461 462 :Parameters: 463 464 other 465 Another matcher or a string that will be converted to a literal 466 match. 467 ''' 468 self.__check(FIRST, other, True) 469 return self._lookup(FIRST)(self, other)
470
471 - def __rmod__(self, other):
472 ''' 473 **other % self** - Take first match (committed choice). 474 475 Matches are tried from left to right and the first successful result 476 is returned. This is equivalent to `First()`. 477 478 :Parameters: 479 480 other 481 Another matcher or a string that will be converted to a literal 482 match. 483 ''' 484 self.__check(FIRST, other, True) 485 return self._lookup(FIRST)(other, self)
486
487 - def __invert__(self):
488 ''' 489 **~self** - Discard the result. 490 491 This generates a matcher that behaves as the original, but returns 492 an empty list. This is equivalent to `Drop()`. 493 494 Note that `Lookahead()` overrides this method to have 495 different semantics (negative lookahead). 496 ''' 497 return self._lookup(NOT)(self)
498
499 - def __getitem__(self, indices):
500 ''' 501 **self[start:stop:algorithm, separator, ...]** - Repetition and lists. 502 503 This is a complex statement that modifies the current matcher so 504 that it matches several times. A separator may be specified 505 (eg for comma-separated lists) and the results may be combined with 506 "+" (so repeated matching of characters would give a word). 507 508 start:stop:algorithm 509 This controls the number of matches made and the order in which 510 different numbers of matches are returned. 511 512 [start] 513 Repeat exactly *start* times 514 515 [start:stop] 516 Repeat *start* to *stop* times (starting with as many matches 517 as possible, and then decreasing as necessary). 518 519 [start:stop:algorithm] 520 Direction selects the algorithm for searching. 521 522 'b' (BREADTH_FIRST) 523 A breadth first search is used, which tends to give shorter 524 matches before longer ones. This tries all possible matches for 525 the sub-matcher first (before repeating calls to consume more 526 of the stream). If the sub-matcher does not backtrack then this 527 guarantees that the number of matches returned will not decrease 528 (ie will monotonically increase) on backtracking. 529 530 'd' (DEPTH_FIRST) 531 A depth first search is used, which tends to give longer 532 matches before shorter ones. This tries to repeats matches 533 with the sub-matcher, consuming as much of the stream as 534 possible, before backtracking to find alternative matchers. 535 If the sub-matcher does not backtrack then this guarantees 536 that the number of matches returned will not increase (ie will 537 monotonically decrease) on backtracking. 538 539 'g' (GREEDY) 540 An exhaustive search is used, which finds all results (by 541 breadth first search) and orders them by length before returning 542 them ordered from longest to shortest. This guarantees that 543 the number of matches returned will not increase (ie will 544 monotonically decrease) on backtracking, but can consume a lot 545 of resources. 546 547 'n' (NON_GREEDY) 548 As for 'g' (GREEDY), but results are ordered shortest to 549 longest. This guarantees that the number of matches returned 550 will not decrease (ie will monotonically increase) on 551 backtracking, but can consume a lot of resources, 552 553 Values may be omitted; the defaults are: *start* = 0, *stop* = 554 infinity, *algorithm* = 'd' (DEPTH_FIRST). 555 556 separator 557 If given, this must appear between repeated values. Matched 558 separators are returned as part of the result (unless, of course, 559 they are implemented with a matcher that returns nothing). If 560 *separator* is a string it is converted to a literal match. 561 562 ... 563 If ... (an ellipsis) is given then the results are joined together 564 with "+". 565 566 Examples 567 -------- 568 569 Any()[0:3,...] will match 3 or less characters, joining them 570 together so that the result is a single string. 571 572 Word()[:,','] will match a comma-separated list of words. 573 574 value[:] or value[0:] or value[0::'d'] is a "greedy" match that, 575 if value does not backtrack, is equivalent to the "*" in a regular 576 expression. 577 value[::'n'] is the "non-greedy" equivalent (preferring as short a 578 match as possible) and value[::'g'] is greedy even when value does 579 provide alternative matches on backtracking. 580 ''' 581 start = 0 582 stop = None 583 step = DEPTH_FIRST 584 separator = None 585 add = False 586 have_index = False 587 if not isinstance(indices, tuple): 588 indices = [indices] 589 for index in indices: 590 if isinstance(index, int): 591 if have_index: 592 raise TypeError( 593 fmt('Multiple slices not supported: {0!r}', index)) 594 start = index 595 stop = index 596 step = DEPTH_FIRST 597 have_index = True 598 elif isinstance(index, slice): 599 if have_index: 600 raise TypeError( 601 fmt('Multiple slices not supported: {0!r}', index)) 602 start = index.start if index.start != None else 0 603 stop = index.stop if not open_stop(index) else None 604 step = index.step if index.step != None else DEPTH_FIRST 605 have_index = True 606 elif index == Ellipsis: 607 add = True 608 elif separator is None: 609 separator = index 610 else: 611 raise TypeError(index) 612 # important for rewriting 613 if stop == 1: 614 add = False 615 return self._lookup(REPEAT)(self, start, stop, step, separator, add, 616 self._lookup(REDUCE))
617
618 - def __gt__(self, function):
619 ''' 620 **self > function** - Process or label the results. 621 622 Create a named pair or apply a function to the results. This is 623 equivalent to `Apply()`. 624 625 :Parameters: 626 627 function 628 This can be a string or a function. 629 630 If a string is given each result is replaced by a 631 (name, value) pair, where name is the string and value is the 632 result. 633 634 If a function is given it is called with the results as an 635 argument. The return value is used *within a list* as the new 636 result. This is equivalent to `Apply()` with raw=False. 637 ''' 638 self.__check(APPLY, function, False) 639 return self._lookup(APPLY)(self, function)
640
641 - def __ge__(self, function):
642 ''' 643 **self >= function** - Process or label the results. 644 645 Apply a function to the results. 646 This is equivalent to `Apply(raw=True)`. 647 648 :Parameters: 649 650 function 651 This is called with the results as an argument. The return value 652 is used as the new result. This is equivalent to `Apply()` with 653 raw=True. 654 ''' 655 self.__check(APPLY_RAW, function, False) 656 return self._lookup(APPLY_RAW)(self, function)
657
658 - def __rshift__(self, function):
659 ''' 660 **self >> function** - Process or label the results (map). 661 662 Create a named pair or apply a function to each result in turn. 663 This is equivalent to `Map()`. It is similar to 664 *self >= function*, except that the function is applied to each 665 result in turn. 666 667 :Parameters: 668 669 function 670 This can be a string or a function. 671 672 If a string is given each result is replaced by a 673 (name, value) pair, where name is the string and value is the 674 result. 675 676 If a function is given it is called with each result in turn. 677 The return values are used as the new result. 678 ''' 679 self.__check(MAP, function, False) 680 return self._lookup(MAP)(self, function)
681
682 - def __pow__(self, function):
683 ''' 684 **self \** function** - Process the results (\**kargs). 685 686 Apply a function to keyword arguments 687 This is equivalent to `KApply()`. 688 689 :Parameters: 690 691 function 692 A function that is called with the keyword arguments described below. 693 The return value is used as the new result. 694 695 Keyword arguments: 696 697 stream_in 698 The stream passed to the matcher. 699 700 stream_out 701 The stream returned from the matcher. 702 703 results 704 A list of the results returned. 705 ''' 706 self.__check(KARGS, function, False) 707 return self._lookup(KARGS)(self, function)
708
709 - def __xor__(self, message):
710 ''' 711 **self ^ message** 712 713 Raise a SytaxError. 714 715 :Parameters: 716 717 message 718 The message for the SyntaxError. 719 ''' 720 return self._lookup(RAISE)(self, message)
721
722 - def __check(self, name, other, is_match):
723 ''' 724 Provide some diagnostics if the syntax is completely mixed up. 725 ''' 726 if not isinstance(other, basestring): # can go either way 727 if is_match != isinstance(other, Matcher): 728 if is_match: 729 msg = 'The operator {0} for {1} was applied to something ' \ 730 'that is not a matcher ({2}).' 731 else: 732 msg = 'The operator {0} for {1} was applied to a matcher ' \ 733 '({2}).' 734 msg += ' Check syntax and parentheses.' 735 raise SyntaxError(fmt(msg, name, self, other))
736