1 from lepl.support.node import Node
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 '''
32 Support for managing sets of intervals (not all used).
33 An interval is an open range implemented as a tuple pair. For example (2,5)
34 is an interval that represents the integers 2,3,4 and 5.
35 '''
36
37 from bisect import bisect_left
38 from collections import deque
39
40
42 '''
43 A set of possible values for a character, described as a collection of
44 intervals. Each interval is [a, b] (ie a <= x <= b, where x is a character
45 code). We use open bounds to avoid having to specify an "out of range"
46 value, making it easier to work with a variety of alphabets.
47
48 The intervals are stored in a list, ordered by a, joining overlapping
49 intervals as necessary.
50 '''
51
52
53
54
55 - def __init__(self, intervals, alphabet):
65
72
74 '''
75 Pre-construct the index used for bisection.
76 '''
77 self.__index = [interval[1] for interval in self.__intervals]
78
80 '''
81 Add an interval to the existing intervals.
82
83 This maintains self.__intervals in the normalized form described above.
84 '''
85 (a1, b1) = interval
86 if b1 < a1:
87 (a1, b1) = (b1, a1)
88 intervals = deque()
89 done = False
90 while self.__intervals:
91
92
93 (a0, b0) = self.__intervals.popleft()
94 if a0 <= a1:
95 if b0 < a1 and b0 != self.alphabet.before(a1):
96
97
98 intervals.append((a0, b0))
99 elif b1 <= b0:
100
101
102 intervals.append((a0, b0))
103 done = True
104 break
105 else:
106
107
108
109 (a1, b1) = (a0, b1)
110 else:
111 if b1 < a0 and b1 != self.alphabet.before(a0):
112
113
114 intervals.append((a1, b1))
115 intervals.append((a0, b0))
116 done = True
117 break
118 elif b0 <= b1:
119
120
121 pass
122 else:
123
124
125 intervals.append((a1, b0))
126 done = True
127 break
128 if not done:
129 intervals.append((a1, b1))
130 intervals.extend(self.__intervals)
131 self.__intervals = intervals
132
135
138
140 '''
141 The number of intervals in the range.
142 '''
143 return len(self.__intervals)
144
146 return self.__intervals[index]
147
149 return iter(self.__intervals)
150
152 '''
153 Does the value lie within the intervals?
154 '''
155 if self.__index:
156 index = bisect_left(self.__index, c)
157 if index < len(self.__intervals):
158 (a, b) = self.__intervals[index]
159 return a <= c <= b
160 return False
161
163 return hash(self.__str)
164
166
167
168 return isinstance(other, Intervals) and self.__str == other.__str
169
170
172 '''
173 A set of intervals that is part of a DFA/NFA graph. This is separate
174 from the `Intervals` instance since we need to clone the node, but can
175 keep the intervals (necessary so that node equality works correctly when
176 the same character is used more than once).
177 '''
178
182
184 '''
185 Add an interval to the range.
186 '''
187 self.__intervals.append(interval)
188
190 return str(self.__intervals)
191
193 return repr(self.__intervals)
194
196 '''
197 The number of intervals in the range.
198 '''
199 return len(self.__intervals)
200
202 return self.__intervals[index]
203
205 return iter(self.__intervals)
206
208 return c in self.__intervals
209
210 - def build(self, graph, src, dest):
211 '''
212 Insert within an NFA graph (although at this level, it's not clear it's
213 NFA).
214 '''
215 graph.connect(src, dest, self)
216
217
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
346 '''
347 Map from intervals to values.
348
349 Note - this is for open intervals! This means it will not work as
350 expected for continuous variables (which will overlap when two intervals
351 share a single boundary value). In other words, you cannot store
352 (1,2) and (2,3) together because both contain 2.
353 '''
354
355
356
357
363
365 '''
366 Build the internal indices. Called automatically when necessary.
367 '''
368 second = lambda x: x[1]
369 self.__intervals = list(sorted(self.keys(), key=second))
370
371 self.__index = list(map(second, self.__intervals))
372
379
381 '''
382 The argument here is a single value, not an interval.
383 '''
384 if self.__index is None:
385 self.index()
386 if self.__index:
387 try:
388 index = bisect_left(self.__index, point)
389 except TypeError as e:
390 from lepl.regexp.core import RegexpError
391 raise RegexpError(
392 """Input characters are inconsistent with the given alphabet.
393 This error is often triggered by using the default configuration with non-text input; disable with matcher.config.no_compile_to_regexp().
394 Alternatively, configure with a suitable alphabet.""")
395 if index < len(self.__index):
396
397 (a, b) = interval = self.__intervals[index]
398 if a <= point <= b:
399 return super(IntervalMap, self).__getitem__(interval)
400 return None
401
405
407 (a, b) = interval
408 return self[a] is not None or self[b] is not None
409
410
412 '''
413 Similar to Fragments, but associates a value with each initial interval;
414 on retrieval returns a list of all values associated with fragment.
415 '''
416
417
418
419
423
424 - def append(self, character, value):
431
433 '''
434 Add an interval to the existing intervals.
435 '''
436 (a1, b1) = interval
437 if b1 < a1:
438 (a1, b1) = (b1, a1)
439 intervals = deque()
440 alphabet = self.alphabet
441 done = False
442 while self.__intervals:
443 ((a0, b0), v0) = self.__intervals.popleft()
444 if a0 <= a1:
445 if b0 < a1:
446
447
448 intervals.append(((a0, b0), v0))
449 elif b1 <= b0:
450
451
452
453 if a0 < a1:
454
455 intervals.append(((a0, alphabet.before(a1)), v0))
456
457 intervals.append(((a1, b1), v0+v1))
458 if b1 < b0:
459
460 intervals.append(((alphabet.after(b1), b0), v0))
461 done = True
462 break
463 else:
464
465
466
467 if a0 < a1:
468
469 intervals.append(((a0, alphabet.before(a1)), v0))
470
471 intervals.append(((a1, b0), v0+v1))
472 a1 = alphabet.after(b0)
473 else:
474 if b1 < a0:
475
476 intervals.append(((a1, b1), v1))
477 intervals.append(((a0, b0), v0))
478 done = True
479 break
480 elif b0 <= b1:
481
482
483
484
485
486 intervals.append(((a1, alphabet.before(a0)), v1))
487
488 intervals.append(((a0, b0), v0+v1))
489 if b1 > b0:
490 a1 = alphabet.after(b0)
491 else:
492 done = True
493 break
494 else:
495
496
497
498 intervals.append(((a1, alphabet.before(a0)), v1))
499
500 intervals.append(((a0, b1), v0+v1))
501
502 intervals.append(((alphabet.after(b1), b0), v0))
503 done = True
504 break
505 if not done:
506 intervals.append(((a1, b1), v1))
507 intervals.extend(self.__intervals)
508 self.__intervals = intervals
509
511 '''
512 The number of intervals contained.
513 '''
514 return len(self.__intervals)
515
517 return self.__intervals[index]
518
520 return iter(self.__intervals)
521