1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 '''
31 Wide range of tests for lexer.
32 '''
33
34
35
36
37 from logging import basicConfig, DEBUG
38 from math import sin, cos
39 from operator import add, sub, truediv, mul
40 from unittest import TestCase
41
42 from lepl.lexer.matchers import Token
43 from lepl.lexer.support import LexerError, RuntimeLexerError
44 from lepl.matchers.core import Literal, Delayed
45 from lepl.matchers.derived import Real, Any, Eos, UnsignedReal, Word
46 from lepl.matchers.combine import Or
47 from lepl.support.lib import str
48 from lepl.support.node import Node
49
50
51
52
53
55 '''
56 Convert to string with crude hack for 2.6 Unicode
57 '''
58 string = str(value)
59 return string.replace("u'", "'")
60
61
63 '''
64 Test whether embedded matchers are converted to regular expressions.
65 '''
66
68 '''
69 Simple literal should compile directly.
70 '''
71 token = Token(Literal('abc'))
72 token.compile()
73 assert token.regexp == 'abc', repr(token.regexp)
74
76 '''
77 This used to be impossible.
78 '''
79 results = Token(Word())[:].parse('foo bar')
80 assert results == ['foo', 'bar'], results
81
83 '''
84 A real is more complex, but still compiles.
85 '''
86 token = Token(Real(exponent='Ee'))
87 token.compile()
88 assert token.regexp == \
89 '(?:[\\+\\-])?(?:(?:[0-9](?:[0-9])*)?\\.[0-9](?:[0-9])*|[0-9](?:[0-9])*(?:\\.)?)(?:[Ee](?:[\\+\\-])?[0-9](?:[0-9])*)?', \
90 repr(token.regexp)
91
93 '''
94 Cannot compile arbitrary functions.
95 '''
96 try:
97 token = Token(Real() > (lambda x: x))
98 token.compile()
99 assert False, 'Expected error'
100 except LexerError:
101 pass
102
103
105 '''
106 Test token support.
107 '''
108
110 '''
111 Basic configuration.
112 '''
113
114 reals = (Token(Real()) >> float)[:]
115 reals.config.lexer()
116 parser = reals.get_parse()
117 results = parser('1 2.3')
118 assert results == [1.0, 2.3], results
119
121 '''
122 Skip anything(not just spaces)
123 '''
124 words = Token('[a-z]+')[:]
125 words.config.lexer(discard='.')
126 parser = words.get_parse()
127 results = parser('abc defXghi')
128 assert results == ['abc', 'def', 'ghi'], results
129
131 '''
132 An ugly error message.
133 '''
134
135 words = Token('[a-z]+')[:]
136 words.config.lexer()
137 parser = words.get_parse_sequence()
138 try:
139 parser('abc defXghi')
140 assert False, 'expected error'
141 except RuntimeLexerError as err:
142 assert str(err) == "No token for 'Xghi' at offset 7, value 'X' of 'abc defXghi'.", str(err)
143
145 '''
146 Better error message with streams.
147 '''
148
149 words = Token('[a-z]+')[:]
150 words.config.lexer()
151 parser = words.get_parse_string()
152 try:
153 parser('abc defXghi')
154 assert False, 'expected error'
155 except RuntimeLexerError as err:
156 assert str(err) == "No token for 'Xghi' at line 1, character 8 of 'abc defXghi'.", str(err)
157
159 '''
160 Expression with function calls and appropriate binding.
161 '''
162
163
164
165
166 class Call(Node): pass
167 class Term(Node): pass
168 class Factor(Node): pass
169 class Expression(Node): pass
170
171 value = Token(Real()) > 'value'
172 name = Token('[a-z]+')
173 symbol = Token('[^a-zA-Z0-9\\. ]')
174
175 expr = Delayed()
176 open_ = ~symbol('(')
177 close = ~symbol(')')
178 funcn = name > 'name'
179 call = funcn & open_ & expr & close > Call
180 term = call | value | open_ & expr & close > Term
181 muldiv = symbol(Any('*/')) > 'operator'
182 factor = term & (muldiv & term)[:] > Factor
183 addsub = symbol(Any('+-')) > 'operator'
184 expr += factor & (addsub & factor)[:] > Expression
185 line = expr & Eos()
186
187 line.config.trace_stack(True).lexer()
188 parser = line.get_parse_string()
189 results = str26(parser('1 + 2*sin(3+ 4) - 5')[0])
190 assert results == """Expression
191 +- Factor
192 | `- Term
193 | `- value '1'
194 +- operator '+'
195 +- Factor
196 | +- Term
197 | | `- value '2'
198 | +- operator '*'
199 | `- Term
200 | `- Call
201 | +- name 'sin'
202 | `- Expression
203 | +- Factor
204 | | `- Term
205 | | `- value '3'
206 | +- operator '+'
207 | `- Factor
208 | `- Term
209 | `- value '4'
210 +- operator '-'
211 `- Factor
212 `- Term
213 `- value '5'""", '[' + results + ']'
214
215
217 '''
218 As before, but with evaluation.
219 '''
220
221
222
223
224
225
226
227
228 class BinaryExpression(Node):
229 op = lambda x, y: None
230 def __float__(self):
231 return self.op(float(self[0]), float(self[1]))
232
233 class Sum(BinaryExpression): op = add
234 class Difference(BinaryExpression): op = sub
235 class Product(BinaryExpression): op = mul
236 class Ratio(BinaryExpression): op = truediv
237
238 class Call(Node):
239 funs = {'sin': sin,
240 'cos': cos}
241 def __float__(self):
242 return self.funs[self[0]](self[1])
243
244
245
246
247
248 number = Token(UnsignedReal())
249 name = Token('[a-z]+')
250 symbol = Token('[^a-zA-Z0-9\\. ]')
251
252 expr = Delayed()
253 factor = Delayed()
254
255 real_ = Or(number >> float,
256 ~symbol('-') & number >> (lambda x: -float(x)))
257
258 open_ = ~symbol('(')
259 close = ~symbol(')')
260 trig = name(Or('sin', 'cos'))
261 call = trig & open_ & expr & close > Call
262 parens = open_ & expr & close
263 value = parens | call | real_
264
265 ratio = value & ~symbol('/') & factor > Ratio
266 prod = value & ~symbol('*') & factor > Product
267 factor += prod | ratio | value
268
269 diff = factor & ~symbol('-') & expr > Difference
270 sum_ = factor & ~symbol('+') & expr > Sum
271 expr += sum_ | diff | factor | value
272
273 line = expr & Eos()
274 parser = line.get_parse()
275
276 def myeval(text):
277 result = parser(text)
278 return float(result[0])
279
280 self.assertAlmostEqual(myeval('1'), 1)
281 self.assertAlmostEqual(myeval('1 + 2*3'), 7)
282 self.assertAlmostEqual(myeval('1 - 4 / (3 - 1)'), -1)
283 self.assertAlmostEqual(myeval('1 -4 / (3 -1)'), -1)
284 self.assertAlmostEqual(myeval('1 + 2*sin(3+ 4) - 5'), -2.68602680256)
285
286
288 '''
289 Test various error messages.
290 '''
291
293 '''
294 Cannot mix tokens and non-tokens at same level.
295 '''
296 bad = Token(Any()) & Any()
297 try:
298 bad.get_parse()
299 assert False, 'expected failure'
300 except LexerError as err:
301 assert str(err) == 'The grammar contains a mix of Tokens and ' \
302 'non-Token matchers at the top level. If ' \
303 'Tokens are used then non-token matchers ' \
304 'that consume input must only appear "inside" ' \
305 'Tokens. The non-Token matchers include: ' \
306 'Any(None).', str(err)
307 else:
308 assert False, 'wrong exception'
309
324
343
357
359 try:
360 Token('(').match('foo')
361 assert False, 'expected error'
362 except Exception as e:
363 assert "Cannot parse regexp '('" in str(e), e
364