Package lepl :: Package lexer :: Package _test :: Module matchers
[hide private]
[frames] | no frames]

Source Code for Module lepl.lexer._test.matchers

  1   
  2  # The contents of this file are subject to the Mozilla Public License 
  3  # (MPL) Version 1.1 (the "License"); you may not use this file except 
  4  # in compliance with the License. You may obtain a copy of the License 
  5  # at http://www.mozilla.org/MPL/ 
  6  # 
  7  # Software distributed under the License is distributed on an "AS IS" 
  8  # basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
  9  # the License for the specific language governing rights and 
 10  # limitations under the License. 
 11  # 
 12  # The Original Code is LEPL (http://www.acooke.org/lepl) 
 13  # The Initial Developer of the Original Code is Andrew Cooke. 
 14  # Portions created by the Initial Developer are Copyright (C) 2009-2010 
 15  # Andrew Cooke (andrew@acooke.org). All Rights Reserved. 
 16  # 
 17  # Alternatively, the contents of this file may be used under the terms 
 18  # of the LGPL license (the GNU Lesser General Public License, 
 19  # http://www.gnu.org/licenses/lgpl.html), in which case the provisions 
 20  # of the LGPL License are applicable instead of those above. 
 21  # 
 22  # If you wish to allow use of your version of this file only under the 
 23  # terms of the LGPL License and not to allow others to use your version 
 24  # of this file under the MPL, indicate your decision by deleting the 
 25  # provisions above and replace them with the notice and other provisions 
 26  # required by the LGPL License.  If you do not delete the provisions 
 27  # above, a recipient may use your version of this file under either the 
 28  # MPL or the LGPL License. 
 29   
 30  ''' 
 31  Wide range of tests for lexer. 
 32  ''' 
 33   
 34  # pylint: disable-msg=R0201, R0904, R0903, R0914 
 35  # tests 
 36   
 37  from logging import basicConfig, DEBUG 
 38  from math import sin, cos 
 39  from operator import add, sub, truediv, mul 
 40  from unittest import TestCase 
 41   
 42  from lepl.lexer.matchers import Token 
 43  from lepl.lexer.support import LexerError, RuntimeLexerError 
 44  from lepl.matchers.core import Literal, Delayed 
 45  from lepl.matchers.derived import Real,  Any, Eos, UnsignedReal, Word 
 46  from lepl.matchers.combine import Or 
 47  from lepl.support.lib import str 
 48  from lepl.support.node import Node 
 49   
 50   
 51  #basicConfig(level=DEBUG) 
 52   
 53   
54 -def str26(value):
55 ''' 56 Convert to string with crude hack for 2.6 Unicode 57 ''' 58 string = str(value) 59 return string.replace("u'", "'")
60 61
62 -class RegexpCompilationTest(TestCase):
63 ''' 64 Test whether embedded matchers are converted to regular expressions. 65 ''' 66
67 - def test_literal(self):
68 ''' 69 Simple literal should compile directly. 70 ''' 71 token = Token(Literal('abc')) 72 token.compile() 73 assert token.regexp == 'abc', repr(token.regexp)
74
75 - def test_words(self):
76 ''' 77 This used to be impossible. 78 ''' 79 results = Token(Word())[:].parse('foo bar') 80 assert results == ['foo', 'bar'], results
81
82 - def test_real(self):
83 ''' 84 A real is more complex, but still compiles. 85 ''' 86 token = Token(Real(exponent='Ee')) 87 token.compile() 88 assert token.regexp == \ 89 '(?:[\\+\\-])?(?:(?:[0-9](?:[0-9])*)?\\.[0-9](?:[0-9])*|[0-9](?:[0-9])*(?:\\.)?)(?:[Ee](?:[\\+\\-])?[0-9](?:[0-9])*)?', \ 90 repr(token.regexp)
91
92 - def test_impossible(self):
93 ''' 94 Cannot compile arbitrary functions. 95 ''' 96 try: 97 token = Token(Real() > (lambda x: x)) 98 token.compile() 99 assert False, 'Expected error' 100 except LexerError: 101 pass
102 103
104 -class TokenRewriteTest(TestCase):
105 ''' 106 Test token support. 107 ''' 108
109 - def test_defaults(self):
110 ''' 111 Basic configuration. 112 ''' 113 #basicConfig(level=DEBUG) 114 reals = (Token(Real()) >> float)[:] 115 reals.config.lexer() 116 parser = reals.get_parse() 117 results = parser('1 2.3') 118 assert results == [1.0, 2.3], results
119
120 - def test_string_arg(self):
121 ''' 122 Skip anything(not just spaces) 123 ''' 124 words = Token('[a-z]+')[:] 125 words.config.lexer(discard='.') 126 parser = words.get_parse() 127 results = parser('abc defXghi') 128 assert results == ['abc', 'def', 'ghi'], results
129
130 - def test_bad_error_msg(self):
131 ''' 132 An ugly error message. 133 ''' 134 #basicConfig(level=DEBUG) 135 words = Token('[a-z]+')[:] 136 words.config.lexer() 137 parser = words.get_parse_sequence() 138 try: 139 parser('abc defXghi') 140 assert False, 'expected error' 141 except RuntimeLexerError as err: 142 assert str(err) == "No token for 'Xghi' at offset 7, value 'X' of 'abc defXghi'.", str(err)
143
144 - def test_good_error_msg(self):
145 ''' 146 Better error message with streams. 147 ''' 148 #basicConfig(level=DEBUG) 149 words = Token('[a-z]+')[:] 150 words.config.lexer() 151 parser = words.get_parse_string() 152 try: 153 parser('abc defXghi') 154 assert False, 'expected error' 155 except RuntimeLexerError as err: 156 assert str(err) == "No token for 'Xghi' at line 1, character 8 of 'abc defXghi'.", str(err)
157
158 - def test_expr_with_functions(self):
159 ''' 160 Expression with function calls and appropriate binding. 161 ''' 162 163 #basicConfig(level=DEBUG) 164 165 # pylint: disable-msg=C0111, C0321 166 class Call(Node): pass 167 class Term(Node): pass 168 class Factor(Node): pass 169 class Expression(Node): pass 170 171 value = Token(Real()) > 'value' 172 name = Token('[a-z]+') 173 symbol = Token('[^a-zA-Z0-9\\. ]') 174 175 expr = Delayed() 176 open_ = ~symbol('(') 177 close = ~symbol(')') 178 funcn = name > 'name' 179 call = funcn & open_ & expr & close > Call 180 term = call | value | open_ & expr & close > Term 181 muldiv = symbol(Any('*/')) > 'operator' 182 factor = term & (muldiv & term)[:] > Factor 183 addsub = symbol(Any('+-')) > 'operator' 184 expr += factor & (addsub & factor)[:] > Expression 185 line = expr & Eos() 186 187 line.config.trace_stack(True).lexer() 188 parser = line.get_parse_string() 189 results = str26(parser('1 + 2*sin(3+ 4) - 5')[0]) 190 assert results == """Expression 191 +- Factor 192 | `- Term 193 | `- value '1' 194 +- operator '+' 195 +- Factor 196 | +- Term 197 | | `- value '2' 198 | +- operator '*' 199 | `- Term 200 | `- Call 201 | +- name 'sin' 202 | `- Expression 203 | +- Factor 204 | | `- Term 205 | | `- value '3' 206 | +- operator '+' 207 | `- Factor 208 | `- Term 209 | `- value '4' 210 +- operator '-' 211 `- Factor 212 `- Term 213 `- value '5'""", '[' + results + ']'
214 215
216 - def test_expression2(self):
217 ''' 218 As before, but with evaluation. 219 ''' 220 221 #basicConfig(level=DEBUG) 222 223 # we could do evaluation directly in the parser actions. but by 224 # using the nodes instead we allow future expansion into a full 225 # interpreter 226 227 # pylint: disable-msg=C0111, C0321 228 class BinaryExpression(Node): 229 op = lambda x, y: None 230 def __float__(self): 231 return self.op(float(self[0]), float(self[1]))
232 233 class Sum(BinaryExpression): op = add 234 class Difference(BinaryExpression): op = sub 235 class Product(BinaryExpression): op = mul 236 class Ratio(BinaryExpression): op = truediv 237 238 class Call(Node): 239 funs = {'sin': sin, 240 'cos': cos} 241 def __float__(self): 242 return self.funs[self[0]](self[1]) 243 244 # we use unsigned float then handle negative values explicitly; 245 # this lets us handle the ambiguity between subtraction and 246 # negation which requires context (not available to the the lexer) 247 # to resolve correctly. 248 number = Token(UnsignedReal()) 249 name = Token('[a-z]+') 250 symbol = Token('[^a-zA-Z0-9\\. ]') 251 252 expr = Delayed() 253 factor = Delayed() 254 255 real_ = Or(number >> float, 256 ~symbol('-') & number >> (lambda x: -float(x))) 257 258 open_ = ~symbol('(') 259 close = ~symbol(')') 260 trig = name(Or('sin', 'cos')) 261 call = trig & open_ & expr & close > Call 262 parens = open_ & expr & close 263 value = parens | call | real_ 264 265 ratio = value & ~symbol('/') & factor > Ratio 266 prod = value & ~symbol('*') & factor > Product 267 factor += prod | ratio | value 268 269 diff = factor & ~symbol('-') & expr > Difference 270 sum_ = factor & ~symbol('+') & expr > Sum 271 expr += sum_ | diff | factor | value 272 273 line = expr & Eos() 274 parser = line.get_parse() 275 276 def myeval(text): 277 result = parser(text) 278 return float(result[0]) 279 280 self.assertAlmostEqual(myeval('1'), 1) 281 self.assertAlmostEqual(myeval('1 + 2*3'), 7) 282 self.assertAlmostEqual(myeval('1 - 4 / (3 - 1)'), -1) 283 self.assertAlmostEqual(myeval('1 -4 / (3 -1)'), -1) 284 self.assertAlmostEqual(myeval('1 + 2*sin(3+ 4) - 5'), -2.68602680256) 285 286
287 -class ErrorTest(TestCase):
288 ''' 289 Test various error messages. 290 ''' 291
292 - def test_mixed(self):
293 ''' 294 Cannot mix tokens and non-tokens at same level. 295 ''' 296 bad = Token(Any()) & Any() 297 try: 298 bad.get_parse() 299 assert False, 'expected failure' 300 except LexerError as err: 301 assert str(err) == 'The grammar contains a mix of Tokens and ' \ 302 'non-Token matchers at the top level. If ' \ 303 'Tokens are used then non-token matchers ' \ 304 'that consume input must only appear "inside" ' \ 305 'Tokens. The non-Token matchers include: ' \ 306 'Any(None).', str(err) 307 else: 308 assert False, 'wrong exception'
309
310 - def test_bad_space(self):
311 ''' 312 An unexpected character fails to match. 313 ''' 314 token = Token('a') 315 token.config.clear().lexer(discard='b') 316 parser = token.get_parse() 317 assert parser('a') == ['a'], parser('a') 318 assert parser('b') == None, parser('b') 319 try: 320 parser('c') 321 assert False, 'expected failure' 322 except RuntimeLexerError as err: 323 assert str(err) == "No token for 'c' at line 1, character 1 of 'c'.", str(err)
324
325 - def test_incomplete(self):
326 ''' 327 A token is not completely consumed (this doesn't raise error messages, 328 it just fails to match). 329 ''' 330 token = Token('[a-z]+')(Any()) 331 token.config.no_full_first_match() 332 parser = token.get_parse_string() 333 assert parser('a') == ['a'], parser('a') 334 # even though this matches the token, the Any() sub-matcher doesn't 335 # consume all the contents 336 assert parser('ab') == None, parser('ab') 337 token = Token('[a-z]+')(Any(), complete=False) 338 token.config.no_full_first_match() 339 parser = token.get_parse_string() 340 assert parser('a') == ['a'], parser('a') 341 # whereas this is fine, since complete=False 342 assert parser('ab') == ['a'], parser('ab')
343
344 - def test_none_discard(self):
345 ''' 346 If discard is '', discard nothing. 347 ''' 348 token = Token('a') 349 token.config.lexer(discard='').no_full_first_match() 350 parser = token[1:].get_parse() 351 result = parser('aa') 352 assert result == ['a', 'a'], result 353 try: 354 parser(' a') 355 except RuntimeLexerError as error: 356 assert str26(error) == "No discard for ' a'.", str26(error)
357
358 - def test_paren(self):
359 try: 360 Token('(').match('foo') 361 assert False, 'expected error' 362 except Exception as e: 363 assert "Cannot parse regexp '('" in str(e), e
364