1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 '''
31 Storage of binary values of arbitrary length.
32
33 Endianness is an issue here because we want to naturally "do the right
34 thing" and unfortunately this varies, depending on context. Most target
35 hardware (x86) is little-endian, but network protocols are typically
36 big-endian.
37
38 I personally prefer big-endian for long hex strings - it seems obvious that
39 0x123456 should be encoded as [0x12, 0x34, 0x56]. On the other hand, it
40 also seems reasonable that the integer 1193046 (=0x123456) should be stored
41 small-endian as [0x56, 0x34, 0x12, 0x00] because that is how it is
42 stored in memory. Unfortunately we cannot implement both because integer
43 values do not contain any flag to say how the user specified them (hex or
44 decimal).
45
46 A very similar issue - that integers do not carry any information to say
47 how many leading zeroes were entered by the user - suggests a solution to
48 this problem. To solve the leading zeroes issue we accept integers as
49 strings and do the conversion ourselves. Since we are dealing with strings
50 we can invent an entirely new encoding to specify endianness. We will use
51 little-endian for ints and the "usual" notation since this reflects the
52 hardware (it appeals to the idea that we are simply taking the chunk of
53 memory in which the integer existed and using it directly). For big endian,
54 we will use a trailing type flag (ie change "ends") in strings.
55
56 So 1193046, "1193046", 0x123456, "0x123456" all encode to [0x56, 0x34, 0x12]
57 (module some questions about implicit/explicit lengths).
58
59 But "123456x0" encodes to [0x12, 0x34, 0x56]. This does have a slight
60 wrinkle - 100b0 looks like a hex value (but is not, as it does not start
61 with 0x).
62
63 Note: No attempt is made to handle sign (complements etc).
64 '''
65
66
67
68
69
70 if bytes is str:
71 print('Binary parsing unsupported in this Python version')
72 else:
73
74
75 STRICT = 'strict'
76
77
78 - class Int(int):
79 '''
80 An integer with a length (the number of bits). This extends Python's type
81 system so that we can distinguish between different integer types, which
82 may have different encodings.
83 '''
84
87
89 super(Int, self).__init__()
90 self.__length = length
91
94
96 return 'Int({0},{1})'.format(super(Int, self).__str__(),
97 self.__length)
98
101 '''
102 Table of reversed bit patterns for 8 bits.
103 '''
104
105 table = [0] * 256
106 power = [1 << n for n in range(8)]
107 for n in range(8):
108 table[1 << n] = 1 << (7 - n)
109 for i in range(256):
110 if not table[i]:
111 for p in power:
112 if i & p:
113 table[i] |= table[p]
114 table[table[i]] = i
115 return table
116
119 '''
120 A sequence of bits, of arbitrary length. Has similar semantics to
121 strings, in that a single index is itself a BitString (of unit length).
122
123 This is intended as a standard fmt for arbitrary binary data, to help
124 with conversion between other types. In other words, convert to and from
125 this, and then chain conversions.
126
127 BitStr are stored as a contiguous sequence in an array of bytes. Both bits
128 and bytes are "little endian" - this allows arbitrary lengths of bits,
129 at arbitrary offsets, to be given values without worrying about
130 alignment.
131
132 The bit sequence starts at bit 'offset' in the first byte and there are
133 a total of 'length' bits. The number of bytes stored is the minimum
134 implied by those two values, with zero padding.
135 '''
136
137 __swap = swap_table()
138
139 - def __init__(self, value=None, length=0, offset=0):
140 '''
141 value is a bytes() instance that contains the data.
142
143 length is the number of valid bits. If given as a float it is the
144 number of bytes (bits = int(float) * 8 + decimal(float) * 10)
145
146 offset is the index of the first valid bit in the value.
147 '''
148 if value is None:
149 value = bytes()
150 if not isinstance(value, bytes):
151 raise TypeError('BitString wraps bytes: {0!r}'.format(value))
152 if length < 0:
153 raise ValueError('Negative length: {0!r}'.format(length))
154 if not 0 <= offset < 8 :
155 raise ValueError('Non-byte offset: {0!r}'.format(offset))
156 self.__bytes = value
157 self.__length = unpack_length(length)
158 self.__offset = offset
159 if len(value) != bytes_for_bits(self.__length, self.__offset):
160 raise ValueError('Inconsistent length: {0!r}/{1!r}'
161 .format(value, length))
162
163 - def bytes(self, offset=0):
164 '''
165 Return a series of bytes values, which encode the data for len(self)
166 bits when offset=0 (with final padding in the last byte if necessary).
167 It is the caller's responsibility to discard any trailing bits.
168
169 When 0 < offset < 8 then the data are zero-padded by offset bits first.
170 '''
171
172
173
174
175
176
177 return ByteIterator(self.__bytes, self.__length,
178 self.__offset, offset)
179
181 '''
182 Return a series of bits (encoded as booleans) that contain the contents.
183 '''
184 return BitIterator(self.__bytes, 0, self.__length, 1, self.__offset)
185
187 '''
188 For 64 bits or less, show bits grouped by byte (octet), with bytes
189 and bits running from left to right. This is a "picture" of the bits.
190
191 For more than 64 bits, give a hex encoding of bytes (right padded
192 with zeros), shown in big-endian fmt.
193
194 In both cases, the length in bits is given after a trailing slash.
195
196 Whatever the internal offset, values are displayed with no initial
197 padding.
198 '''
199 if self.__length > 64:
200 hex_ = ''.join(hex(x)[2:] for x in self.bytes())
201 return '{0}x0/{1}'.format(hex_, self.__length)
202 else:
203 chars = []
204 byte = []
205 count = 0
206 for bit in self.bits():
207 if not count % 8:
208 chars.extend(byte)
209 byte = []
210 if count:
211 chars.append(' ')
212 if bit.zero():
213 byte.append('0')
214 else:
215 byte.append('1')
216 count += 1
217 chars.extend(byte)
218 return '{0}b0/{1}'.format(''.join(chars), self.__length)
219
221 '''
222 An explicit display of internal state, including padding and offset.
223 '''
224 return 'BitString({0!r}, {1!r}, {2!r})' \
225 .format(self.__bytes, self.__length, self.__offset)
226
229
231 '''
232 Are all bits zero?
233 '''
234 for byte in self.__bytes:
235 if byte != 0:
236 return False
237 return True
238
240 '''
241 The internal offset. This is not useful as an external API, but
242 helps with debugging.
243 '''
244 return self.__offset
245
248
250 '''
251 Combine two sequences, appending then together.
252 '''
253 bbs = bytearray(self.to_bytes())
254 matching_offset = self.__length % 8
255 for byte in other.bytes(matching_offset):
256 if matching_offset:
257 bbs[-1] |= byte
258 matching_offset = False
259 else:
260 bbs.append(byte)
261 return BitString(bytes(bbs), self.__length + len(other))
262
264 '''
265 Return a bytes() object, right-padded with zero bits of necessary.
266 '''
267 if self.__offset == offset:
268 return self.__bytes
269 else:
270 return bytes(self.bytes(offset))
271
272 - def to_int(self, big_endian=False):
273 '''
274 Convert the entire bit sequence (of any size) to an integer.
275
276 Big endian conversion is only possible if the bits form a whole number
277 of bytes.
278 '''
279 if big_endian and self.__length % 8:
280 raise ValueError('Length is not a multiple of 8 bits, so big '
281 'endian integer poorly defined: {0}'
282 .format(self.__length))
283 bbs = self.bytes()
284 if not big_endian:
285 bbs = reversed(list(bbs))
286 value = 0
287 for byte in bbs:
288 value = (value << 8) + byte
289 return Int(value, self.__length)
290
291 - def to_str(self, encoding=None, errors='strict'):
301
304
307
309 inv = bytearray([0xff ^ b for b in self.bytes()])
310 if self.__length % 8:
311 inv[-1] &= 0xff >> self.__length % 8
312 return BitString(bytes(inv), self.__length)
313
315 if not isinstance(index, slice):
316 index = slice(index, index+1, None)
317 (start, stop, step) = index.indices(self.__length)
318 if step == 1:
319 start += self.__offset
320 stop += self.__offset
321 bbs = bytearray(self.__bytes[start // 8:bytes_for_bits(stop)])
322 if start % 8:
323 bbs[0] &= 0xff << start % 8
324 if stop % 8:
325 bbs[-1] &= 0xff >> 8 - stop % 8
326 return BitString(bytes(bbs), stop - start, start % 8)
327 else:
328 acc = BitString()
329 for byte in BitIterator(self.__bytes, start, stop, step,
330 self.__offset):
331 acc += byte
332 return acc
333
335
336
337 if not isinstance(other, BitString) \
338 or self.__length != other.__length:
339 return False
340 for (bb1, bb2) in zip(self.bytes(), other.bytes()):
341 if bb1 != bb2:
342 return False
343 return True
344
346 return hash(self.__bytes) ^ self.__length
347
348 @staticmethod
350 '''
351 Create a BitString from a byte.
352 '''
353 return BitString.from_int(value, 8)
354
355 @staticmethod
357 '''
358 Create a BitString from a 32 bit integer.
359 '''
360 return BitString.from_int(value, 32, big_endian)
361
362 @staticmethod
364 '''
365 Create a BitString from a 64 bit integer.
366 '''
367 return BitString.from_int(value, 64, big_endian)
368
369 @staticmethod
370 - def from_int(value, length=None, big_endian=None):
371 '''
372 Value can be an int, or a string with a leading or trailing tag.
373 A plain int, or no tag, or leading tag, is byte little-endian by
374 default.
375
376 Length and big-endianness are inferred from the fmt for values
377 given as strings, but explicit parameters override these.
378 If no length is given, and none can be inferred, 32 bits is assumed
379 (bit length cannot be inferred for decimal values, even as strings).
380
381 The interpretation of big-endian values depends on the base and is
382 either very intuitive and useful, or completely stupid. Use at your
383 own risk.
384
385 Big-endian hex values must specify an exact number of bytes (even
386 number of hex digits). Each separate byte is assigned a value
387 according to big-endian semantics, but with a byte small-endian
388 order is used. This is consistent with the standard conventions for
389 network data. So, for example, 1234x0 gives two bytes. The first
390 contains the value 0x12, the second the value 0x34.
391
392 Big-endian binary values are taken to be a "picture" of the bits,
393 with the array reading from left to right. So 0011b0 specifies
394 four bits, starting with two zeroes.
395
396 Big-endian decimal and octal values are treated as hex values.
397 '''
398
399 bits = None
400 if isinstance(value, str):
401 value.strip()
402
403 if value.endswith('0') and len(value) > 1 and \
404 not value[-2].isdigit() \
405 and not (len(value) == 3 and value.startswith('0')):
406 value = '0' + value[-2] + value[0:-2]
407 if big_endian is None:
408 big_endian = True
409
410 if value.startswith('0d') or value.startswith('0D'):
411 value = value[2:]
412
413 if len(value) > 1 and not value[1].isdigit() and length is None:
414 bits = {'b':1, 'o':3, 'x':4}.get(value[1].lower(), None)
415 if not bits:
416 raise ValueError('Unexpected base: {0!r}'.format(value))
417 length = bits * (len(value) - 2)
418 if big_endian and bits == 1:
419
420 value = value[0:2] + value[-1:1:-1]
421 value = int(value, 0)
422 if length is None:
423 try:
424
425 length = len(value)
426 except TypeError:
427
428 length = 32
429 length = unpack_length(length)
430 if length % 8 and big_endian and bits != 1:
431 raise ValueError('A big-endian int with a length that '
432 'is not an integer number of bytes cannot be '
433 'encoded as a stream of bits: {0!r}/{1!r}'
434 .format(value, length))
435 bbs, val = bytearray(), value
436 for _index in range(bytes_for_bits(length)):
437 bbs.append(val & 0xff)
438 val >>= 8
439 if val > 0:
440 raise ValueError('Value contains more bits than length: %r/%r' %
441 (value, length))
442
443 if big_endian and bits != 1:
444 bbs = reversed(bbs)
445 return BitString(bytes(bbs), length)
446
447 @staticmethod
449 '''
450 Unpack is called for each item in turn (so should be, say, from_byte).
451 '''
452 accumulator = BitString()
453 for item in value:
454 accumulator += unpack(item)
455 return accumulator
456
457 @staticmethod
459 '''
460 Create a BitString from a bytearray.
461 '''
462 if not isinstance(value, bytes):
463 value = bytes(value)
464 return BitString(value, len(value) * 8)
465
466 @staticmethod
476
479 '''
480 Length is in bits, unless a decimal is specified, in which case it
481 it has the structure bytes.bits. Obviously this is ambiguous with float
482 values (eg 3.1 or 3.10), but since we only care about bits 0-7 we can
483 avoid any issues by requiring that range.
484 '''
485 if isinstance(length, str):
486 try:
487 length = int(length, 0)
488 except ValueError:
489 length = float(length)
490 if isinstance(length, int):
491 return length
492 if isinstance(length, float):
493 nbytes = int(length)
494 bits = int(10 * (length - nbytes) + 0.5)
495 if bits < 0 or bits > 7:
496 raise ValueError('BitStr specification must be between 0 and 7')
497 return nbytes * 8 + bits
498 raise TypeError('Cannot infer length from %r' % length)
499
502 '''
503 The number of bytes required to specify the given number of bits.
504 '''
505 return (bits + 7 + offset) // 8
506
509 '''
510 A sequence of bits (used by BitString).
511 '''
512
513 - def __init__(self, value, start, stop, step, offset):
514 assert 0 <= offset < 8
515 self.__bytes = value
516 self.__start = start
517 self.__stop = stop
518 self.__step = step
519 self.__offset = offset
520 self.__index = start
521
524
526 if (self.__step > 0 and self.__index < self.__stop) \
527 or (self.__step < 0 and self.__index > self.__stop):
528 index = self.__index + self.__offset
529 byte = self.__bytes[index // 8] >> index % 8
530 self.__index += self.__step
531 return ONE if byte & 0x1 else ZERO
532 else:
533 raise StopIteration()
534
537 '''
538 A sequence of bytes (used by BitString).
539 '''
540
541 - def __init__(self, value, length, existing, required):
542 assert 0 <= required < 8
543 assert 0 <= existing < 8
544 self.__bytes = value
545 self.__length = length
546 self.__required = required
547 self.__existing = existing
548 if self.__required > self.__existing:
549 self.__index = -1
550 else:
551 self.__index = 0
552 self.__total = 0
553
556
564
566 '''
567 Already aligned, so return next byte.
568 '''
569 if self.__index < len(self.__bytes):
570 byte = self.__bytes[self.__index]
571 self.__index += 1
572 return byte
573 else:
574 raise StopIteration()
575
577 '''
578 No longer understand this. Replace with BitStream project?
579 '''
580 if self.__index < 0:
581 if self.__total < self.__length:
582
583 byte = 0xff & (self.__bytes[0] <<
584 (self.__required - self.__existing))
585 self.__index = 0
586 self.__total = 8 - self.__required
587 return byte
588 else:
589 raise StopIteration()
590 else:
591 if self.__total < self.__length:
592 byte = 0xff & (self.__bytes[self.__index] >>
593 (8 - self.__required + self.__existing))
594 self.__index += 1
595 self.__total += self.__required
596 else:
597 raise StopIteration()
598 if self.__total < self.__length:
599 byte |= 0xff & (self.__bytes[self.__index] <<
600 (self.__required - self.__existing))
601 self.__total += 8 - self.__required
602 return byte
603
605 '''
606 No longer understand this. Replace with BitStream project?
607 '''
608 if self.__total < self.__length:
609 byte = 0xff & (self.__bytes[self.__index] >>
610 (self.__existing - self.__required))
611 self.__index += 1
612 self.__total += 8 - self.__existing + self.__required
613 else:
614 raise StopIteration()
615 if self.__total < self.__length:
616 byte |= 0xff & (self.__bytes[self.__index] <<
617 (8 - self.__existing + self.__required))
618 self.__total += self.__existing - self.__required
619 return byte
620
621
622 ONE = BitString(b'\x01', 1)
623 ZERO = BitString(b'\x00', 1)
624