binparser
parent
3e404d0b30
commit
d8094ab381
@ -0,0 +1,424 @@
|
||||
; Binary parsing
|
||||
|
||||
;----------------------------------------
|
||||
; Apologia
|
||||
;
|
||||
; Binary parsing and unparsing are transformations between primitive or
|
||||
; composite Scheme values and their external binary representations.
|
||||
;
|
||||
; Examples include reading and writing JPEG, TIFF, MP3, ELF file
|
||||
; formats, communicating with DNS, Kerberos, LDAP, SLP internet
|
||||
; services, participating in Sun RPC and CORBA/IIOP distributed systems,
|
||||
; storing and retrieving (arrays of) floating-point numbers in a
|
||||
; portable and efficient way. This project will propose a set of low- and
|
||||
; intermediate- level procedures that make binary parsing possible.
|
||||
|
||||
; Scheme is a good language to do research in text compression. Text
|
||||
; compression involves a great deal of building and traversing
|
||||
; dictionaries, trees and similar data structures, where Scheme
|
||||
; excels. Performance doesn't matter in research, but the size of
|
||||
; compressed files does (to figure out the bpc for the common
|
||||
; benchmarks). Variable-bit i/o is a necessity. It is implemented
|
||||
; in the present file.
|
||||
|
||||
; ASN.1 corresponds to a higher-level parsing (LR parser
|
||||
; vs. lexer). Information in LDAP responses and X.509 certificates is
|
||||
; structural and recursive, and so lends itself to be processed in
|
||||
; Scheme. Variable bit i/o is necessary, and so is a binary lexer for
|
||||
; a LR parser. Parsing of ASN.1 is a highly profitable enterprise
|
||||
|
||||
;----------------------------------------
|
||||
; The outline of the project
|
||||
;
|
||||
; Primitives and streams
|
||||
;
|
||||
; - read-byte
|
||||
; - read-u8vector (cf. read-string)
|
||||
; - with-input-from-u8vector, with-input-from-encoded-u8vector 'base64,...
|
||||
; building binary i/o streams from a sequence of bytes. Streams over
|
||||
; u8vector, u16vector, etc. provide a serial access to memory. See SRFI-4
|
||||
;
|
||||
; - read-bit, read-bits via overlayed streams given read-byte
|
||||
; implemented in the present file.
|
||||
;
|
||||
; - mmap-u8vector, munmap-u8vector
|
||||
;
|
||||
; Conversions
|
||||
; - u8vector->integer u8vector endianness,
|
||||
; u8vector->sinteger u8vector endianness
|
||||
; These conversion procedures turn a sequence of bytes to an unsigned or
|
||||
; signed integer, minding the byte order. The u8vector in question can
|
||||
; have size 1,2,4,8, 3 etc. bytes. These two functions therefore can be
|
||||
; used to read shorts, longs, extra longs, etc. numbers.
|
||||
; - u8vector-reverse and other useful u8vector operations
|
||||
;
|
||||
; - modf, frexp, ldexp
|
||||
; The above primitives can be emulated in R5RS, yet they are quite handy
|
||||
; (for portable FP manipulation) and can be executed very efficiently by
|
||||
; an FPU.
|
||||
;
|
||||
; Higher-level parsing and combinators
|
||||
; These are combinators that can compose primitives above for more
|
||||
; complex (possibly iterative) actions.
|
||||
;
|
||||
; - skip-bits, next-u8token,...
|
||||
; - IIOP, RPC/XDR, RMI
|
||||
; - binary lexer for existing LR/LL-parsers
|
||||
;
|
||||
; The composition of primitives and combinators will represent binary
|
||||
; parsing language in a _full_ notation. This is similar to XPath
|
||||
; expressions in full notation. Later we need to find out the
|
||||
; most-frequently used patterns of the binary parsing language and
|
||||
; design an abbreviated notation. The latter will need a special
|
||||
; "interpreter". The abbreviated notation may turn out to look like
|
||||
; Olin's regular expressions.
|
||||
|
||||
; $Id: binary-read.scm,v 1.1 2000/10/20 17:49:47 oleg Exp oleg $
|
||||
|
||||
|
||||
;----------------------------------------
|
||||
; Test harness
|
||||
;
|
||||
; The following macro runs built-in test cases -- or does not run,
|
||||
; depending on which of the two lines below you commented out
|
||||
(define-macro (run-test . body) `(begin (display "\n-->Test\n") ,@body))
|
||||
;(define-macro (run-test . body) '(begin #f))
|
||||
;(defmacro run-test body `(begin (display "\n-->Test\n") ,@body))
|
||||
|
||||
;;========================================================================
|
||||
;; Configuration section
|
||||
;;
|
||||
; Performance is very important for binary parsing. We have to get all
|
||||
; help from a particular Scheme system we can get. If a Scheme function
|
||||
; can support the following primitives faster, we should take
|
||||
; advantage of that fact.
|
||||
|
||||
|
||||
;---
|
||||
; Configuration for Gambit. See below for other systems, as well as R5RS
|
||||
; implementations
|
||||
(declare
|
||||
(block)
|
||||
(standard-bindings)
|
||||
)
|
||||
|
||||
(define-macro (logior x y) `(##fixnum.logior ,x ,y))
|
||||
(define-macro (logand x y) `(##fixnum.logand ,x ,y))
|
||||
(define-macro (lsh-left x n) `(##fixnum.shl ,x ,n))
|
||||
(define-macro (lsh-right x n) `(##fixnum.lshr ,x ,n))
|
||||
(define-macro (lsh-left-one x) `(##fixnum.shl ,x 1))
|
||||
(define-macro (lsh-right-one x) `(##fixnum.lshr ,x 1))
|
||||
|
||||
(define-macro (-- x) `(##fixnum.- ,x 1))
|
||||
(define-macro (++ x) `(##fixnum.+ ,x 1))
|
||||
|
||||
(define-macro (bit-set? x mask) ; return x & mask != 0
|
||||
`(##not (##fixnum.zero? (logand ,x ,mask)))
|
||||
)
|
||||
; End of the Gambit-specific configuration section
|
||||
;---
|
||||
|
||||
; combine bytes in the MSB order. A byte may be #f
|
||||
(define (combine-two b1 b2) ; The result is for sure a fixnum
|
||||
(and b1 b2 (logior (lsh-left b1 8) b2)))
|
||||
|
||||
(define (combine-three b1 b2 b3) ; The result is for sure a fixnum
|
||||
(and b1 b2 b3 (logior (lsh-left (logior (lsh-left b1 8) b2) 8) b3)))
|
||||
|
||||
; Here the result may be a BIGNUM
|
||||
(define (combine-bytes . bytes)
|
||||
(cond
|
||||
((null? bytes) 0)
|
||||
((not (car bytes)) #f)
|
||||
(else
|
||||
(let loop ((bytes (cdr bytes)) (result (car bytes)))
|
||||
(cond
|
||||
((null? bytes) result)
|
||||
((not (car bytes)) #f)
|
||||
(else (loop (cdr bytes) (+ (car bytes) (* 256 result)))))))))
|
||||
|
||||
;---
|
||||
; R5RS implementations of the primitives
|
||||
; This is the most portable -- and the slowest implementation
|
||||
|
||||
; See also logical.scm from SLIB
|
||||
; (define (logior x y)
|
||||
; (cond ((= x y) x)
|
||||
; ((zero? x) y)
|
||||
; ((zero? y) x)
|
||||
; (else
|
||||
; (+ (* (logior (quotient x 2) (quotient y 2)) 2)
|
||||
; (if (and (even? x) (even? y)) 0 1)))))
|
||||
; (define (logand x y)
|
||||
; (cond ((= x y) x)
|
||||
; ((zero? x) 0)
|
||||
; ((zero? y) 0)
|
||||
; (else
|
||||
; (+ (* (logand (quotient x 2) (quotient y 2)) 2)
|
||||
; (if (or (even? x) (even? y)) 0 1)))))
|
||||
|
||||
; (define (lsh-left x n) (* x (expt 2 n)))
|
||||
; (define (lsh-right x n) (quotient x (expt 2 n)))
|
||||
; (define (lsh-left-one x) (* x 2))
|
||||
; (define (lsh-right-one x) (quotient x 2))
|
||||
|
||||
|
||||
; (define (-- x) (- x 1))
|
||||
; (define (++ x) (+ x 1))
|
||||
|
||||
; (define (bit-set? x mask) ; return x & mask != 0
|
||||
; (odd? (quotient x mask)) ; mask is an exact power of two
|
||||
; )
|
||||
|
||||
|
||||
;========================================================================
|
||||
; Reading a byte
|
||||
|
||||
; Read-byte is a fundamental primitive; it needs to be
|
||||
; added to the standard. Most of the other functions are library
|
||||
; procedures. The following is an approximation, which clearly doesn't
|
||||
; hold if the port is a Unicode (especially UTF-8) character stream.
|
||||
|
||||
; Return a byte as an exact integer [0,255], or the EOF object
|
||||
(define (read-byte port)
|
||||
(let ((c (read-char port)))
|
||||
(if (eof-object? c) c (char->integer c))))
|
||||
|
||||
; The same as above, but returns #f on EOF.
|
||||
(define (read-byte-f port)
|
||||
(let ((c (read-char port)))
|
||||
(and (not (eof-object? c)) (char->integer c))))
|
||||
|
||||
|
||||
|
||||
;========================================================================
|
||||
; Bit stream
|
||||
|
||||
; -- Function: make-bit-reader BYTE-READER
|
||||
|
||||
; Given a BYTE-READER (a thunk), construct and return a function
|
||||
; bit-reader N
|
||||
;
|
||||
; that reads N bits from a byte-stream represented by the BYTE-READER.
|
||||
; The BYTE-READER is a function that takes no arguments and returns
|
||||
; the current byte as an exact integer [0-255]. The byte reader
|
||||
; should return #f on EOF.
|
||||
; The bit reader returns N bits as an exact unsigned integer,
|
||||
; 0 -... (no limit). N must be a positive integer, otherwise the bit reader
|
||||
; returns #f. There is no upper limit on N -- other than the size of the
|
||||
; input stream itself and the amount of (virtual) memory an OS is willing
|
||||
; to give to your process. If you want to read 1M of _bits_, go ahead.
|
||||
;
|
||||
; It is assumed that the bit order is the most-significant bit first.
|
||||
;
|
||||
; Note the bit reader keeps the following condition true at all times:
|
||||
; (= current-inport-pos (ceiling (/ no-bits-read 8)))
|
||||
; That is, no byte is read until the very moment we really need (some of)
|
||||
; its bits. The bit reader does _not_ "byte read ahead".
|
||||
; Therefore, it can be used to handle a concatenation of different
|
||||
; bit/byte streams *STRICTLY* sequentially, _without_ 'backing up a char',
|
||||
; 'unreading-char' etc. tricks.
|
||||
; For example, make-bit-reader has been used to read GRIB files of
|
||||
; meteorological data, which made of several bitstreams with headers and
|
||||
; tags.
|
||||
; Thus careful attention to byte-buffering and optimization are the
|
||||
; features of this bit reader.
|
||||
;
|
||||
; Usage example:
|
||||
; (define bit-reader (make-bit-reader (lambda () #b11000101)))
|
||||
; (bit-reader 3) ==> 6
|
||||
; (bit-reader 4) ==> 2
|
||||
; The test driver below is another example.
|
||||
;
|
||||
; Notes on the algorithm.
|
||||
; The function recognizes and handles the following special cases:
|
||||
; - the buffer is empty and 8, 16, 24 bits are to be read
|
||||
; - reading all bits which are currently in the byte-buffer
|
||||
; (and then maybe more)
|
||||
; - reading only one bit
|
||||
|
||||
; Since the bit reader is going to be called many times, optimization is
|
||||
; critical. We need all the help from the compiler/interpreter
|
||||
; we can get.
|
||||
|
||||
|
||||
(define (make-bit-reader byte-reader)
|
||||
(let ((buffer 0) (mask 0) ; mask = 128 means that the buffer is full and
|
||||
; the msb bit is the current (yet unread) bit
|
||||
(bits-in-buffer 0))
|
||||
|
||||
; read the byte into the buffer and set up the counters.
|
||||
; return #f on eof
|
||||
(define (set-buffer)
|
||||
(set! buffer (byte-reader))
|
||||
(and buffer
|
||||
(begin
|
||||
(set! bits-in-buffer 8)
|
||||
(set! mask 128)
|
||||
#t)))
|
||||
|
||||
; Read fewer bits than there are in the buffer
|
||||
(define (read-few-bits n)
|
||||
(let ((value (logand buffer ; all bits in buffer
|
||||
(-- (lsh-left-one mask)))))
|
||||
(set! bits-in-buffer (- bits-in-buffer n))
|
||||
(set! mask (lsh-right mask n))
|
||||
(lsh-right value bits-in-buffer))) ; remove extra bits
|
||||
|
||||
; read n bits given an empty buffer, and append them to value, n>=8
|
||||
(define (add-more-bits value n)
|
||||
(let loop ((value value) (n n))
|
||||
(cond
|
||||
((zero? n) value)
|
||||
((< n 8)
|
||||
(let ((rest (read-n-bits n)))
|
||||
(and rest (+ (* value (lsh-left 1 n)) rest))))
|
||||
(else
|
||||
(let ((b (byte-reader)))
|
||||
(and b (loop (+ (* value 256) b) (- n 8))))))))
|
||||
|
||||
; The main module
|
||||
(define (read-n-bits n)
|
||||
; Check the most common cases first
|
||||
(cond
|
||||
((not (positive? n)) #f)
|
||||
((zero? bits-in-buffer) ; the bit-buffer is empty
|
||||
(case n
|
||||
((8) (byte-reader))
|
||||
((16)
|
||||
(let ((b (byte-reader)))
|
||||
(combine-two b (byte-reader))))
|
||||
((24)
|
||||
(let* ((b1 (byte-reader)) (b2 (byte-reader)))
|
||||
(combine-three b1 b2 (byte-reader))))
|
||||
(else
|
||||
(cond
|
||||
((< n 8)
|
||||
(and (set-buffer) (read-few-bits n)))
|
||||
((< n 16)
|
||||
(let ((b (byte-reader)))
|
||||
(and (set-buffer)
|
||||
(logior (lsh-left b (- n 8))
|
||||
(read-few-bits (- n 8))))))
|
||||
(else
|
||||
(let ((b (byte-reader)))
|
||||
(and b (add-more-bits b (- n 8)))))))))
|
||||
|
||||
((= n 1) ; read one bit
|
||||
(let ((value (if (bit-set? buffer mask) 1 0)))
|
||||
(set! mask (lsh-right-one mask))
|
||||
(set! bits-in-buffer (-- bits-in-buffer))
|
||||
value))
|
||||
|
||||
((>= n bits-in-buffer) ; will empty the buffer
|
||||
(let ((n-rem (- n bits-in-buffer))
|
||||
(value (logand buffer ; for mask=64, it'll be &63
|
||||
(-- (lsh-left-one mask)))))
|
||||
(set! bits-in-buffer 0)
|
||||
(cond
|
||||
((zero? n-rem) value)
|
||||
((<= n-rem 16)
|
||||
(let ((rest (read-n-bits n-rem)))
|
||||
(and rest (logior (lsh-left value n-rem) rest))))
|
||||
(else (add-more-bits value n-rem)))))
|
||||
(else (read-few-bits n))
|
||||
))
|
||||
|
||||
read-n-bits)
|
||||
)
|
||||
|
||||
|
||||
; Validation tests
|
||||
|
||||
(run-test
|
||||
(define (read-bits numbers nbits)
|
||||
(let* ((left-numbers numbers)
|
||||
(bit-reader
|
||||
(make-bit-reader
|
||||
(lambda ()
|
||||
(and (pair? left-numbers)
|
||||
(let ((byte (car left-numbers)))
|
||||
(set! left-numbers (cdr left-numbers))
|
||||
byte))))))
|
||||
(let loop ((result '()))
|
||||
(let ((num (bit-reader nbits)))
|
||||
(if num (loop (cons num result)) (reverse result))))))
|
||||
(define (do-test numbers nbits expected)
|
||||
(let ((result (read-bits numbers nbits)))
|
||||
(for-each display
|
||||
(list "Reading " numbers " by " nbits " bits\n"
|
||||
"The result is: " result "\n"))
|
||||
(or (equal? result expected)
|
||||
(error "the result differs from the expected: " expected))))
|
||||
|
||||
(do-test '(1 2 3 4 5 6 7) 8 '(1 2 3 4 5 6 7))
|
||||
(do-test '(193 5 131 4) 1
|
||||
'(1 1 0 0 0 0 0 1 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 1
|
||||
0 0 0 0 0 1 0 0))
|
||||
(do-test '(193 5 131 4 5) 2
|
||||
'(3 0 0 1 0 0 1 1 2 0 0 3 0 0 1 0 0 0 1 1))
|
||||
(do-test '(193 5 131 4) 3
|
||||
'(6 0 2 0 2 6 0 3 0 1))
|
||||
(do-test '(193 5 131 4 5 6 7) 4
|
||||
'(12 1 0 5 8 3 0 4 0 5 0 6 0 7))
|
||||
(do-test '(193 5 131 4 5 6 7) 5
|
||||
'(24 4 2 24 6 1 0 5 0 24 3))
|
||||
(do-test '(193 5 131 4 5 6 7 8 17 24) 8
|
||||
'(193 5 131 4 5 6 7 8 17 24))
|
||||
(do-test '(193 5 131 4 5 6 7 8 17 24) 9
|
||||
'(386 22 24 64 160 385 388 17))
|
||||
(do-test '(193 5 131 4 5 6 7 8 17) 16
|
||||
'(49413 33540 1286 1800))
|
||||
(do-test '(193 5 131 4 5 6 104) 17
|
||||
'(98827 3088 10291))
|
||||
(do-test '(193 5 131 4 5 6 104) 19
|
||||
'(395308 49409))
|
||||
(do-test '(193 5 131 4 5 6 104) 55
|
||||
'(27165365385724724))
|
||||
(do-test '(193 5 131 4 5 6 104) 56
|
||||
'(54330730771449448))
|
||||
)
|
||||
|
||||
|
||||
; Timing test
|
||||
; This test relies on a Gambit special form 'time' to clock
|
||||
; evaluation of an expression.
|
||||
; R5RS does not provide any timing facilities. So the test below
|
||||
; might not run on your particular system, and probably needs
|
||||
; adjustment anyway.
|
||||
|
||||
(run-test
|
||||
(let ((fname "/tmp/a") (size 10240)
|
||||
(pattern (integer->char #x55)))
|
||||
(define (read-by-bits n)
|
||||
(for-each display
|
||||
(list "Reading the file by " n " bits "))
|
||||
(call-with-input-file fname
|
||||
(lambda (port)
|
||||
(let ((bit-reader (make-bit-reader
|
||||
(lambda () (read-byte-f port)))))
|
||||
(time
|
||||
(do ((c (bit-reader n) (bit-reader n))) ((not c))))))))
|
||||
|
||||
(for-each display
|
||||
(list "Creating a file " fname " of size " size " filled with "
|
||||
pattern "\n"))
|
||||
(with-output-to-file fname
|
||||
(lambda () (do ((i 0 (+ 1 i))) ((>= i size)) (write-char pattern))))
|
||||
|
||||
(display "\nReading the file by characters: the baseline ")
|
||||
(call-with-input-file fname
|
||||
(lambda (port)
|
||||
(time
|
||||
(do ((c (read-char port) (read-char port))) ((eof-object? c))))))
|
||||
|
||||
(display "\nReading the file by bytes: ")
|
||||
(call-with-input-file fname
|
||||
(lambda (port)
|
||||
(time
|
||||
(do ((c (read-byte-f port) (read-byte-f port))) ((not c))))))
|
||||
|
||||
(for-each read-by-bits
|
||||
(list 1 2 3 4 5 6 7 8 9 10 11 15 16 17 23 24 25 30 32 65535
|
||||
(* 8 size)))
|
||||
))
|
Binary file not shown.
@ -0,0 +1,26 @@
|
||||
#lang br
|
||||
(require brag/support "gifparser.rkt")
|
||||
|
||||
(define the-lexer
|
||||
(lexer-srcloc
|
||||
[(eof) (return-without-srcloc eof)]
|
||||
["GIF" (token 'GIF-HEADER lexeme)]
|
||||
[any-char (token 'BYTE (char->integer (car (string->list lexeme))))]
|
||||
))
|
||||
|
||||
(define (make-tokenizer ip [path #f])
|
||||
(port-count-lines! ip)
|
||||
(lexer-file-path path)
|
||||
(λ () (the-lexer ip)))
|
||||
|
||||
(define (munge ip)
|
||||
(parse-to-datum (make-tokenizer (reencode-input-port ip "latin1"))))
|
||||
|
||||
|
||||
(module+ test
|
||||
(require pitfall/binprint)
|
||||
#;(binprint (open-input-file "test.gif") #:width 24)
|
||||
|
||||
#;(munge (open-input-bytes #"GIF87a1234567"))
|
||||
(munge (open-input-file "test.gif"))
|
||||
)
|
@ -0,0 +1,26 @@
|
||||
#lang brag
|
||||
|
||||
gif-file : header logical_screen u1*
|
||||
|
||||
header : magic version
|
||||
magic : GIF-HEADER
|
||||
version : u1 u1 u1
|
||||
|
||||
logical_screen : image_width image_height flags bg_color_index pixel_aspect_ratio
|
||||
image_width : u2le
|
||||
image_height :u2le
|
||||
flags : u1
|
||||
bg_color_index : u1
|
||||
pixel_aspect_ratio : u1
|
||||
u2le : BYTE BYTE
|
||||
u1 : BYTE
|
||||
|
||||
color_table : color_table_entry*
|
||||
color_table_entry : red green blue
|
||||
red : u1
|
||||
green : u1
|
||||
blue : u1
|
||||
|
||||
#block : block_type body
|
||||
#block_type : u1
|
||||
|
@ -0,0 +1,47 @@
|
||||
#lang br
|
||||
|
||||
|
||||
gif-file : header logical-screen-descriptor global-color-table middle trailer
|
||||
header : signature version
|
||||
signature : #"GIF"
|
||||
version : u1{3} => bytes->string
|
||||
logical-screen-descriptor : width height packed bgcolor-idx aspect
|
||||
width : u2
|
||||
height : u2
|
||||
packed : color-table-flag color-res sort-flag color-table-size
|
||||
bgcolor-idx : u1
|
||||
aspect : u1
|
||||
|
||||
color-table-flag : b{1} => ->integer
|
||||
color-res : b{3}
|
||||
sort-flag : b{1}
|
||||
color-table-size : b{3}
|
||||
|
||||
color-table-colors (size->colors color-table-size)
|
||||
|
||||
global-color-table : color-rec{color-table-flag * color-table-colors}
|
||||
color-rec : red green blue
|
||||
red : u1
|
||||
blue : u1
|
||||
green : u1
|
||||
|
||||
middle: gfx-control-ext img-descriptor lct? image-data
|
||||
|
||||
gfx-control-ext : u1{8}
|
||||
img-descriptor : img-separator left top width height img-packed
|
||||
|
||||
img-separator : #"2C"
|
||||
left : u2
|
||||
top : u2
|
||||
width : u2
|
||||
height : u2
|
||||
img-packed : lct-flag interlace sort res lct-size
|
||||
lct-flag : b
|
||||
interlace : b
|
||||
sort : b
|
||||
res : b{2}
|
||||
lct-size : b{3}
|
||||
|
||||
lct: color-rec{lct-flag * (size->colors lct-size)}
|
||||
|
||||
image-data : lzw-size data-subblock
|
Binary file not shown.
After Width: | Height: | Size: 1.5 KiB |
@ -0,0 +1,31 @@
|
||||
#lang pitfall/racket
|
||||
(require sugar/list)
|
||||
(provide binprint)
|
||||
|
||||
(define (binprint in #:width [width 16])
|
||||
(unless (even? width) (raise-argument-error 'binprint "even width" width))
|
||||
(for-each displayln
|
||||
(for/list ([bs (in-port (curry read-bytes width) in)])
|
||||
(string-append (pad-string (hexline bs) (+ (* width 2) (sub1 (/ width 2)))) " " (alphaline bs)))))
|
||||
|
||||
|
||||
(define (pad-string str width)
|
||||
(string-append str (make-string (- width (string-length str)) #\ )))
|
||||
|
||||
|
||||
(define (hexline bs)
|
||||
(string-join
|
||||
(map string-append*
|
||||
(slice-at (for/list ([b (in-bytes bs)])
|
||||
(~r b #:base 16 #:min-width 2 #:pad-string "0")) 2)) " "))
|
||||
|
||||
|
||||
(define (alphaline bs)
|
||||
(define printable-ascii? (λ (b) (<= 32 b 126)))
|
||||
(list->string
|
||||
(for/list ([b (in-bytes bs)])
|
||||
(integer->char (if (printable-ascii? b) b 32)))))
|
||||
|
||||
(module+ test
|
||||
#;(binprint (open-input-bytes #"foobar is the name"))
|
||||
(binprint (open-input-file "test/test12.pdf") #:width 24))
|
@ -0,0 +1,98 @@
|
||||
#lang at-exp br/quicklang
|
||||
(require "parser.rkt" "tokenizer.rkt" "struct.rkt" gregor racket/bytes)
|
||||
(provide (matching-identifiers-out #rx"pf-" (all-defined-out)))
|
||||
|
||||
(module+ test (require rackunit))
|
||||
|
||||
(module+ reader (provide read-syntax))
|
||||
|
||||
(define (read-syntax src port)
|
||||
;; use latin-1 reencoding to make one char = one byte (note `latin1` respelling is correct)
|
||||
(define parse-tree (parse (make-tokenizer (reencode-input-port port "latin1") src)))
|
||||
(strip-bindings
|
||||
#`(module pitfall-parse-mod pitfall/parse
|
||||
#,parse-tree)))
|
||||
|
||||
(define-macro (my-mb ARG ...)
|
||||
#'(#%module-begin ARG ...))
|
||||
(provide (rename-out [my-mb #%module-begin])
|
||||
require)
|
||||
|
||||
(provide null)
|
||||
|
||||
(define-macro (pf-program COS-OBJECT ...)
|
||||
#'(begin COS-OBJECT ...))
|
||||
|
||||
(define (pf-name str)
|
||||
(let* ([str (string-trim str "/" #:right? #f)]
|
||||
[str (regexp-replace* @pregexp{#(\d\d)} str (λ (m sub) (string (integer->char (string->number sub 16)))))])
|
||||
(string->symbol str)))
|
||||
|
||||
(module+ test
|
||||
(check-equal? (pf-name "B#45#20NICE") '|BE NICE|))
|
||||
|
||||
(define (pf-string arg . tail)
|
||||
(cond
|
||||
[(andmap byte? (cons arg tail)) (cons arg tail)]
|
||||
[(string-prefix? arg "D:")
|
||||
#;(parameterize ([current-locale "en"])
|
||||
(parse-date "2015-03-15T02:02:02-04:00" "yyyy-MM-dd'T'HH:mm:ssxxx"))
|
||||
#f]
|
||||
[else
|
||||
(let* ([str (regexp-replace @regexp{^\((.*)\)$} arg "\\1")] ; remove parens
|
||||
[str (string-replace str (string-append "\\" "\n") "")]
|
||||
[str (regexp-replace* @pregexp{\\(n|r|t|b|f|\(|\)|\\)} str (λ (m sub)
|
||||
(case sub
|
||||
[("n") "\n"]
|
||||
[("r") "\r"]
|
||||
[("t") "\t"]
|
||||
[("b") "\b"]
|
||||
[("f") "\f"]
|
||||
[else sub])))]
|
||||
[str (regexp-replace* @pregexp{\\(\d{2,3})} str (λ (m sub) (string (integer->char (string->number sub 8)))))])
|
||||
(co-string str))]))
|
||||
|
||||
(module+ test
|
||||
(check-equal? @pf-string{(Testing)} (co-string "Testing"))
|
||||
(check-equal? (pf-string @string-append{(Test\
|
||||
ing)}) (co-string "Testing"))
|
||||
(check-equal? @pf-string{(Test\)ing)} (co-string "Test)ing"))
|
||||
(check-equal? @pf-string{(Test\ning)} (co-string "Test\ning"))
|
||||
(check-equal? @pf-string{(Test\\ing)} (co-string "Test\\ing"))
|
||||
(check-equal? @pf-string{(A\53B)} (co-string "A+B"))
|
||||
(check-equal? @pf-string{(A\053B)} (co-string "A+B"))
|
||||
#;(check-equal? @pf-string{(D:19990209153925-08\'00\')})
|
||||
#;(check-true (andmap byte? @pf-string{<1C2D3F>}))
|
||||
#;(check-true (andmap byte? @pf-string{<1C 2D 3F>})))
|
||||
|
||||
(define (pf-array . xs) (co-array xs))
|
||||
|
||||
(define (pf-dict . args)
|
||||
(co-dict (apply hasheq args)))
|
||||
|
||||
|
||||
(define (pf-stream dict str)
|
||||
(define data (string->bytes/utf-8 str))
|
||||
(when (not (equal? (hash-ref (co-dict-dict dict) 'Length) (bytes-length data)))
|
||||
(raise-argument-error 'pf-stream
|
||||
(format "~a bytes of data" (hash-ref (co-dict-dict dict) 'Length))
|
||||
(format "~a = ~v" (bytes-length data) data)))
|
||||
(co-stream dict data))
|
||||
|
||||
(define (pf-indirect-object obj gen thing)
|
||||
(co-io obj gen thing))
|
||||
|
||||
(define-macro (pf-indirect-object-ref (OBJ GEN _))
|
||||
#'(co-io-ref OBJ GEN))
|
||||
|
||||
(define (pf-header num) (co-header num))
|
||||
|
||||
(define (pf-comment text) (co-comment text))
|
||||
|
||||
(define (pf-trailer dict)
|
||||
(let ([h (co-dict-dict dict)])
|
||||
(unless (and (hash-has-key? h 'Size) (hash-has-key? h 'Root))
|
||||
(raise-argument-error 'pf-trailer
|
||||
"Size and Root keys are required for trailer"
|
||||
(hash-keys h))))
|
||||
(co-trailer dict))
|
Loading…
Reference in New Issue