Package lepl :: Package matchers :: Module operators
[hide private]
[frames] | no frames]

Source Code for Module lepl.matchers.operators

  1   
  2  # The contents of this file are subject to the Mozilla Public License 
  3  # (MPL) Version 1.1 (the "License"); you may not use this file except 
  4  # in compliance with the License. You may obtain a copy of the License 
  5  # at http://www.mozilla.org/MPL/ 
  6  # 
  7  # Software distributed under the License is distributed on an "AS IS" 
  8  # basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
  9  # the License for the specific language governing rights and 
 10  # limitations under the License. 
 11  # 
 12  # The Original Code is LEPL (http://www.acooke.org/lepl) 
 13  # The Initial Developer of the Original Code is Andrew Cooke. 
 14  # Portions created by the Initial Developer are Copyright (C) 2009-2010 
 15  # Andrew Cooke (andrew@acooke.org). All Rights Reserved. 
 16  # 
 17  # Alternatively, the contents of this file may be used under the terms 
 18  # of the LGPL license (the GNU Lesser General Public License, 
 19  # http://www.gnu.org/licenses/lgpl.html), in which case the provisions 
 20  # of the LGPL License are applicable instead of those above. 
 21  # 
 22  # If you wish to allow use of your version of this file only under the 
 23  # terms of the LGPL License and not to allow others to use your version 
 24  # of this file under the MPL, indicate your decision by deleting the 
 25  # provisions above and replace them with the notice and other provisions 
 26  # required by the LGPL License.  If you do not delete the provisions 
 27  # above, a recipient may use your version of this file under either the 
 28  # MPL or the LGPL License. 
 29   
 30  ''' 
 31  Support for operator syntactic sugar (and operator redefinition). 
 32  ''' 
 33   
 34  from re import compile as compile_ 
 35   
 36  from lepl.matchers.matcher import Matcher 
 37  from lepl.support.context import Namespace, NamespaceMixin, Scope 
 38  from lepl.support.lib import open_stop, fmt, basestring 
 39   
 40   
 41  DIGITS = compile_('^(-?\d+)(.*)') 
 42   
43 -def RepeatWrapper(matcher, start, stop, step, separator, add, reduce):
44 '''Parse `step` if it is a string.''' 45 # Handle circular dependencies 46 from lepl.matchers.derived import Repeat 47 try: 48 int(step) # if this works, we may have a var, so keep the instance 49 limit = step 50 algorithm = DEPTH_FIRST 51 except ValueError: 52 if (isinstance(step, basestring)): 53 limit = None 54 algorithm = None 55 while step: 56 match = DIGITS.match(step) 57 if match: 58 if limit is None: 59 limit = int(match.group(1)) 60 step = match.group(2) 61 else: 62 raise TypeError(fmt('Cannot parse limit/algorithm for []: {}', 63 step)) 64 else: 65 if algorithm is None: 66 algorithm = step[0] 67 step = step[1:] 68 else: 69 raise TypeError('The step of [...] must be an integer limit, or a ' 70 'string to select the algorithm, or both as a string ' 71 'like "d1" for a single value, depth first') 72 return Repeat(matcher, start=start, stop=stop, limit=limit, 73 algorithm=algorithm, separator=separator, add_=add, 74 reduce=reduce)
75
76 -class OperatorNamespace(Namespace):
77 ''' 78 Define the default operators. 79 ''' 80
81 - def __init__(self):
82 # Handle circular dependencies 83 from lepl.matchers.error import raise_error 84 from lepl.matchers.derived import Space, Add, Apply, KApply, Drop, Map 85 from lepl.matchers.combine import And, Or, First 86 super(OperatorNamespace, self).__init__({ 87 SPACE_OPT: lambda a, b: And(a, Space()[0:,...], b), 88 SPACE_REQ: lambda a, b: And(a, Space()[1:,...], b), 89 ADD: lambda a, b: Add(And(a, b)), 90 AND: And, 91 OR: Or, 92 APPLY: Apply, 93 APPLY_RAW: lambda a, b: Apply(a, b, raw=True), 94 NOT: Drop, 95 KARGS: KApply, 96 RAISE: lambda a, b: KApply(a, raise_error(b)), 97 REPEAT: RepeatWrapper, 98 FIRST: First, 99 MAP: Map, 100 REDUCE: None, 101 })
102 103 104 OPERATORS = 'operators' 105 ''' 106 The name used to retrieve operators definitions. 107 ''' 108 109 SPACE_OPT = '/' 110 '''Name for / operator.''' 111 SPACE_REQ = '//' 112 '''Name for // operator.''' 113 ADD = '+' 114 '''Name for + operator.''' 115 AND = '&' 116 '''Name for & operator.''' 117 OR = '|' 118 '''Name for | operator.''' 119 APPLY = '>' 120 '''Name for > operator.''' 121 APPLY_RAW = '>=' 122 '''Name for >= operator.''' 123 NOT = '~' 124 '''Name for ~ operator.''' 125 KARGS = '**' 126 '''Name for ** operator.''' 127 RAISE = '^' 128 '''Name for ^ operator.''' 129 REPEAT = '[]' 130 '''Name for [] operator.''' 131 FIRST = '%' 132 '''Name for % operator.''' 133 MAP = '>>' 134 '''Name for >> operator.''' 135 REDUCE = '<reduce>' 136 '''Name for accumulator of data during repetition.''' 137 138
139 -class Override(Scope):
140 ''' 141 Allow an operator to be redefined within a with context. Uses the 142 OPERATORS namespace. 143 ''' 144
145 - def __init__(self, space_opt=None, space_req=None, repeat=None, 146 add=None, and_=None, or_=None, not_=None, 147 apply_=None, apply_raw=None, kargs=None, 148 raise_=None, first=None, map_=None, reduce=None):
149 super(Override, self).__init__(OPERATORS, OperatorNamespace, 150 {SPACE_OPT: space_opt, SPACE_REQ: space_req, 151 REPEAT: repeat, ADD: add, AND: and_, OR: or_, 152 NOT: not_, APPLY: apply_, APPLY_RAW: apply_raw, 153 KARGS: kargs, RAISE: raise_, FIRST: first, MAP: map_, 154 REDUCE: reduce})
155 156
157 -class _BaseSeparator(Override):
158 ''' 159 Support class for `Separator` and similar classes. 160 161 Uses the OPERATORS namespace. 162 ''' 163
164 - def __init__(self, separator):
165 ''' 166 If the separator is a string it is coerced to `Regexp()`; if None 167 then any previous defined separator is effectively removed. 168 ''' 169 # Handle circular dependencies 170 from lepl.matchers.core import Regexp 171 from lepl.matchers.combine import And 172 from lepl.matchers.support import coerce_ 173 if separator is None: 174 and_ = And 175 repeat = RepeatWrapper 176 else: 177 separator = coerce_(separator, Regexp) 178 (and_, repeat) = self._replacements(separator) 179 super(_BaseSeparator, self).__init__(and_=and_, repeat=repeat)
180
181 - def _replacements(self, _separator):
182 ''' 183 Sub-classes should return (And, Repeat) 184 ''' 185 raise Exception('Unimplemented')
186
187 - def _repeat(self, separator):
188 ''' 189 A simple Repeat with separator. 190 ''' 191 from lepl.matchers.combine import And 192 def repeat(m, st=0, sp=None, d=0, s=None, a=False, r=None): 193 ''' 194 Wrap `Repeat` to adapt the separator. 195 ''' 196 if s is None: 197 s = separator 198 elif not a: 199 s = And(separator, s, separator) 200 return RepeatWrapper(m, st, sp, d, s, a, r)
201 return repeat
202 203
204 -class Separator(_BaseSeparator):
205 ''' 206 Redefine ``[]`` and ``&`` to include the given matcher as a separator 207 (so it will be used between list items and between matchers separated by the & 208 operator) 209 210 Uses the OPERATORS namespace. 211 ''' 212
213 - def _replacements(self, separator):
214 ''' 215 Require the separator on each `And`. 216 ''' 217 # Handle circular dependencies 218 from lepl.matchers.combine import And 219 return (lambda a, b: And(a, separator, b), 220 self._repeat(separator))
221 222
223 -class DroppedSpace(Separator):
224 ''' 225 Skip spaces (by default, zero or more Space()). Any argument is dropped. 226 ''' 227
228 - def __init__(self, space=None):
229 from lepl.matchers.derived import Space, Drop 230 if space is None: 231 space = Space()[:] 232 space = Drop(space) 233 super(DroppedSpace, self).__init__(space)
234 235
236 -class SmartSeparator1(_BaseSeparator):
237 ''' 238 Similar to `Separator`, but tried to be clever about whether the 239 separator is needed. It replaces `&` with a matcher that only uses 240 the separator if the second sub-matcher consumes some input. 241 242 Uses the OPERATORS namespace. 243 244 See also `SmartSeparator2`, which is less general, but more efficient. 245 ''' 246
247 - def _replacements(self, separator):
248 ''' 249 Require the separator on each `And`. 250 ''' 251 # Handle circular dependencies 252 from lepl.matchers.combine import And, Or 253 from lepl.matchers.core import Consumer 254 def and_(a, b): 255 ''' 256 Add space only in the case when both consume something. 257 ''' 258 return Or(And(Consumer(a), separator, Consumer(b)), 259 And(Consumer(a), Consumer(b, False)), 260 And(Consumer(a, False), Consumer(b)), 261 And(Consumer(a, False), Consumer(b, False)))
262 return (and_, self._repeat(separator))
263 264 265 GREEDY = 'g' 266 '''Flag (splice increment) for inefficient, guaranteed greedy matching.''' 267 NON_GREEDY = 'n' 268 '''Flag (splice increment) for inefficient, guaranteed non-greedy matching.''' 269 DEPTH_FIRST = 'd' 270 '''Flag (splice increment) for efficient, quasi-greedy, matching (default).''' 271 BREADTH_FIRST = 'b' 272 '''Flag (splice increment) for efficient, quasi-non-greedy, matching.''' 273 274
275 -class OperatorMixin(NamespaceMixin):
276 ''' 277 Define the operators used to combine elements in a grammar specification. 278 ''' 279
280 - def __init__(self, name, namespace):
281 super(OperatorMixin, self).__init__(name, namespace)
282
283 - def __add__(self, other):
284 ''' 285 **self + other** - Join strings, merge lists. 286 287 Combine adjacent matchers in sequence, merging the result with "+" 288 (so strings are joined, lists merged). 289 290 :Parameters: 291 292 other 293 Another matcher or a string that will be converted to a literal 294 match. 295 ''' 296 self.__check(ADD, other, True) 297 return self._lookup(ADD)(self, other)
298
299 - def __radd__(self, other):
300 ''' 301 **other + self** - Join strings, merge lists. 302 303 Combine adjacent matchers in sequence, merging the result with "+" 304 (so strings are joined, lists merged). 305 306 :Parameters: 307 308 other 309 Another matcher or a string that will be converted to a literal 310 match. 311 ''' 312 self.__check(ADD, other, True) 313 return self._lookup(ADD)(other, self)
314
315 - def __and__(self, other):
316 ''' 317 **self & other** - Append results. 318 319 Combine adjacent matchers in sequence. This is equivalent to 320 `And()`. 321 322 :Parameters: 323 324 other 325 Another matcher or a string that will be converted to a literal 326 match. 327 ''' 328 self.__check(AND, other, True) 329 return self._lookup(AND)(self, other)
330
331 - def __rand__(self, other):
332 ''' 333 **other & self** - Append results. 334 335 Combine adjacent matchers in sequence. This is equivalent to 336 `And()`. 337 338 :Parameters: 339 340 other 341 Another matcher or a string that will be converted to a literal 342 match. 343 ''' 344 self.__check(AND, other, True) 345 return self._lookup(AND)(other, self)
346
347 - def __div__(self, other):
348 ''' 349 For 2.6 350 ''' 351 return self.__truediv__(other)
352
353 - def __rdiv__(self, other):
354 ''' 355 For 2.6 356 ''' 357 return self.__rtruediv__(other)
358
359 - def __truediv__(self, other):
360 ''' 361 **self / other** - Append results, with optional separating space. 362 363 Combine adjacent matchers in sequence, with an optional space between 364 them. The space is included in the results. 365 366 :Parameters: 367 368 other 369 Another matcher or a string that will be converted to a literal 370 match. 371 ''' 372 self.__check(SPACE_OPT, other, True) 373 return self._lookup(SPACE_OPT)(self, other)
374
375 - def __rtruediv__(self, other):
376 ''' 377 **other / self** - Append results, with optional separating space. 378 379 Combine adjacent matchers in sequence, with an optional space between 380 them. The space is included in the results. 381 382 :Parameters: 383 384 other 385 Another matcher or a string that will be converted to a literal 386 match. 387 ''' 388 self.__check(SPACE_OPT, other, True) 389 return self._lookup(SPACE_OPT)(other, self)
390
391 - def __floordiv__(self, other):
392 ''' 393 **self // other** - Append results, with required separating space. 394 395 Combine adjacent matchers in sequence, with a space between them. 396 The space is included in the results. 397 398 :Parameters: 399 400 other 401 Another matcher or a string that will be converted to a literal 402 match. 403 ''' 404 self.__check(SPACE_REQ, other, True) 405 return self._lookup(SPACE_REQ)(self, other)
406
407 - def __rfloordiv__(self, other):
408 ''' 409 **other // self** - Append results, with required separating space. 410 411 Combine adjacent matchers in sequence, with a space between them. 412 The space is included in the results. 413 414 :Parameters: 415 416 other 417 Another matcher or a string that will be converted to a literal 418 match. 419 ''' 420 self.__check(SPACE_REQ, other, True) 421 return self._lookup(SPACE_REQ)(other, self)
422
423 - def __or__(self, other):
424 ''' 425 **self | other** - Try alternative matchers. 426 427 This introduces backtracking. Matches are tried from left to right 428 and successful results returned (one on each "recall"). This is 429 equivalent to `Or()`. 430 431 :Parameters: 432 433 other 434 Another matcher or a string that will be converted to a literal 435 match. 436 ''' 437 self.__check(OR, other, True) 438 return self._lookup(OR)(self, other)
439
440 - def __ror__(self, other):
441 ''' 442 **other | self** - Try alternative matchers. 443 444 This introduces backtracking. Matches are tried from left to right 445 and successful results returned (one on each "recall"). This is 446 equivalent to `Or()`. 447 448 :Parameters: 449 450 other 451 Another matcher or a string that will be converted to a literal 452 match. 453 ''' 454 self.__check(OR, other, True) 455 return self._lookup(OR)(other, self)
456
457 - def __mod__(self, other):
458 ''' 459 **self % other** - Take first match (committed choice). 460 461 Matches are tried from left to right and the first successful result 462 is returned. This is equivalent to `First()`. 463 464 :Parameters: 465 466 other 467 Another matcher or a string that will be converted to a literal 468 match. 469 ''' 470 self.__check(FIRST, other, True) 471 return self._lookup(FIRST)(self, other)
472
473 - def __rmod__(self, other):
474 ''' 475 **other % self** - Take first match (committed choice). 476 477 Matches are tried from left to right and the first successful result 478 is returned. This is equivalent to `First()`. 479 480 :Parameters: 481 482 other 483 Another matcher or a string that will be converted to a literal 484 match. 485 ''' 486 self.__check(FIRST, other, True) 487 return self._lookup(FIRST)(other, self)
488
489 - def __invert__(self):
490 ''' 491 **~self** - Discard the result. 492 493 This generates a matcher that behaves as the original, but returns 494 an empty list. This is equivalent to `Drop()`. 495 496 Note that `Lookahead()` overrides this method to have 497 different semantics (negative lookahead). 498 ''' 499 return self._lookup(NOT)(self)
500
501 - def __getitem__(self, indices):
502 ''' 503 **self[start:stop:algorithm, separator, ...]** - Repetition and lists. 504 505 This is a complex statement that modifies the current matcher so 506 that it matches several times. A separator may be specified 507 (eg for comma-separated lists) and the results may be combined with 508 "+" (so repeated matching of characters would give a word). 509 510 start:stop:algorithm 511 This controls the number of matches made and the order in which 512 different numbers of matches are returned. 513 514 [start] 515 Repeat exactly *start* times 516 517 [start:stop] 518 Repeat *start* to *stop* times (starting with as many matches 519 as possible, and then decreasing as necessary). 520 521 [start:stop:algorithm] 522 Direction selects the algorithm for searching. 523 524 'b' (BREADTH_FIRST) 525 A breadth first search is used, which tends to give shorter 526 matches before longer ones. This tries all possible matches for 527 the sub-matcher first (before repeating calls to consume more 528 of the stream). If the sub-matcher does not backtrack then this 529 guarantees that the number of matches returned will not decrease 530 (ie will monotonically increase) on backtracking. 531 532 'd' (DEPTH_FIRST) 533 A depth first search is used, which tends to give longer 534 matches before shorter ones. This tries to repeats matches 535 with the sub-matcher, consuming as much of the stream as 536 possible, before backtracking to find alternative matchers. 537 If the sub-matcher does not backtrack then this guarantees 538 that the number of matches returned will not increase (ie will 539 monotonically decrease) on backtracking. 540 541 'g' (GREEDY) 542 An exhaustive search is used, which finds all results (by 543 breadth first search) and orders them by length before returning 544 them ordered from longest to shortest. This guarantees that 545 the number of matches returned will not increase (ie will 546 monotonically decrease) on backtracking, but can consume a lot 547 of resources. 548 549 'n' (NON_GREEDY) 550 As for 'g' (GREEDY), but results are ordered shortest to 551 longest. This guarantees that the number of matches returned 552 will not decrease (ie will monotonically increase) on 553 backtracking, but can consume a lot of resources, 554 555 Values may be omitted; the defaults are: *start* = 0, *stop* = 556 infinity, *algorithm* = 'd' (DEPTH_FIRST). 557 558 separator 559 If given, this must appear between repeated values. Matched 560 separators are returned as part of the result (unless, of course, 561 they are implemented with a matcher that returns nothing). If 562 *separator* is a string it is converted to a literal match. 563 564 ... 565 If ... (an ellipsis) is given then the results are joined together 566 with "+". 567 568 Examples 569 -------- 570 571 Any()[0:3,...] will match 3 or less characters, joining them 572 together so that the result is a single string. 573 574 Word()[:,','] will match a comma-separated list of words. 575 576 value[:] or value[0:] or value[0::'d'] is a "greedy" match that, 577 if value does not backtrack, is equivalent to the "*" in a regular 578 expression. 579 value[::'n'] is the "non-greedy" equivalent (preferring as short a 580 match as possible) and value[::'g'] is greedy even when value does 581 provide alternative matches on backtracking. 582 ''' 583 start = 0 584 stop = None 585 step = DEPTH_FIRST 586 separator = None 587 add = False 588 have_index = False 589 if not isinstance(indices, tuple): 590 indices = [indices] 591 for index in indices: 592 if isinstance(index, int): 593 if have_index: 594 raise TypeError( 595 fmt('Multiple slices not supported: {0!r}', index)) 596 start = index 597 stop = index 598 step = DEPTH_FIRST 599 have_index = True 600 elif isinstance(index, slice): 601 if have_index: 602 raise TypeError( 603 fmt('Multiple slices not supported: {0!r}', index)) 604 start = index.start if index.start != None else 0 605 stop = index.stop if not open_stop(index) else None 606 step = index.step if index.step != None else DEPTH_FIRST 607 have_index = True 608 elif index == Ellipsis: 609 add = True 610 elif separator is None: 611 separator = index 612 else: 613 raise TypeError(index) 614 # important for rewriting 615 if stop == 1: 616 add = False 617 return self._lookup(REPEAT)(self, start, stop, step, separator, add, 618 self._lookup(REDUCE))
619
620 - def __gt__(self, function):
621 ''' 622 **self > function** - Process or label the results. 623 624 Create a named pair or apply a function to the results. This is 625 equivalent to `Apply()`. 626 627 :Parameters: 628 629 function 630 This can be a string or a function. 631 632 If a string is given each result is replaced by a 633 (name, value) pair, where name is the string and value is the 634 result. 635 636 If a function is given it is called with the results as an 637 argument. The return value is used *within a list* as the new 638 result. This is equivalent to `Apply()` with raw=False. 639 ''' 640 self.__check(APPLY, function, False) 641 return self._lookup(APPLY)(self, function)
642
643 - def __ge__(self, function):
644 ''' 645 **self >= function** - Process or label the results. 646 647 Apply a function to the results. 648 This is equivalent to `Apply(raw=True)`. 649 650 :Parameters: 651 652 function 653 This is called with the results as an argument. The return value 654 is used as the new result. This is equivalent to `Apply()` with 655 raw=True. 656 ''' 657 self.__check(APPLY_RAW, function, False) 658 return self._lookup(APPLY_RAW)(self, function)
659
660 - def __rshift__(self, function):
661 ''' 662 **self >> function** - Process or label the results (map). 663 664 Create a named pair or apply a function to each result in turn. 665 This is equivalent to `Map()`. It is similar to 666 *self >= function*, except that the function is applied to each 667 result in turn. 668 669 :Parameters: 670 671 function 672 This can be a string or a function. 673 674 If a string is given each result is replaced by a 675 (name, value) pair, where name is the string and value is the 676 result. 677 678 If a function is given it is called with each result in turn. 679 The return values are used as the new result. 680 ''' 681 self.__check(MAP, function, False) 682 return self._lookup(MAP)(self, function)
683
684 - def __pow__(self, function):
685 ''' 686 **self \** function** - Process the results (\**kargs). 687 688 Apply a function to keyword arguments 689 This is equivalent to `KApply()`. 690 691 :Parameters: 692 693 function 694 A function that is called with the keyword arguments described below. 695 The return value is used as the new result. 696 697 Keyword arguments: 698 699 stream_in 700 The stream passed to the matcher. 701 702 stream_out 703 The stream returned from the matcher. 704 705 results 706 A list of the results returned. 707 ''' 708 self.__check(KARGS, function, False) 709 return self._lookup(KARGS)(self, function)
710
711 - def __xor__(self, message):
712 ''' 713 **self ^ message** 714 715 Raise a SytaxError. 716 717 :Parameters: 718 719 message 720 The message for the SyntaxError. 721 ''' 722 return self._lookup(RAISE)(self, message)
723
724 - def __check(self, name, other, is_match):
725 ''' 726 Provide some diagnostics if the syntax is completely mixed up. 727 ''' 728 if not isinstance(other, basestring): # can go either way 729 if is_match != isinstance(other, Matcher): 730 if is_match: 731 msg = 'The operator {0} for {1} was applied to something ' \ 732 'that is not a matcher ({2}).' 733 else: 734 msg = 'The operator {0} for {1} was applied to a matcher ' \ 735 '({2}).' 736 msg += ' Check syntax and parentheses.' 737 raise SyntaxError(fmt(msg, name, self, other))
738