From 6f997f8b7e8fd7a3f292a6080fd9b4ad3be4bd07 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Tue, 11 Dec 2018 13:39:45 -0800 Subject: [PATCH] string tests pass --- xenomorph/xenomorph/redo/array.rkt | 5 +- xenomorph/xenomorph/redo/number.rkt | 30 ++--- xenomorph/xenomorph/redo/string.rkt | 125 ++++++++++++++++++ xenomorph/xenomorph/redo/test/array-test.rkt | 4 +- xenomorph/xenomorph/redo/test/string-test.rkt | 109 +++++++++++++++ 5 files changed, 254 insertions(+), 19 deletions(-) create mode 100644 xenomorph/xenomorph/redo/string.rkt create mode 100644 xenomorph/xenomorph/redo/test/string-test.rkt diff --git a/xenomorph/xenomorph/redo/array.rkt b/xenomorph/xenomorph/redo/array.rkt index 19666ba7..0d7195c7 100644 --- a/xenomorph/xenomorph/redo/array.rkt +++ b/xenomorph/xenomorph/redo/array.rkt @@ -32,12 +32,13 @@ https://github.com/mbutterick/restructure/blob/master/src/Array.coffee [else (for/list ([i (in-range decoded-len)]) (decode (xarray-type xa) port #:parent ctx))])) -(define (xarray-encode xa array [port-arg #f] #:parent [parent #f]) +(define (xarray-encode xa array [port-arg (current-output-port)] #:parent [parent #f]) (unless (sequence? array) (raise-argument-error 'xarray-encode "sequence" array)) (define port (if (output-port? port-arg) port-arg (open-output-bytes))) (define (encode-items ctx) ;; todo: should array with fixed length stop encoding after it reaches max? + ;; cf. xstring, which rejects input that is too big for fixed length. (let* (#;[items (sequence->list array)] #;[item-count (length items)] #;[max-items (if (number? (xarray-len xa)) (xarray-len xa) item-count)]) @@ -89,6 +90,6 @@ https://github.com/mbutterick/restructure/blob/master/src/Array.coffee (module+ test (require rackunit) (check-equal? (decode (+xarray uint16be 3) #"ABCDEF") '(16706 17220 17734)) - (check-equal? (encode (+xarray uint16be 3) '(16706 17220 17734)) #"ABCDEF") + (check-equal? (encode (+xarray uint16be 3) '(16706 17220 17734) #f) #"ABCDEF") (check-equal? (size (+xarray uint16be) '(1 2 3)) 6) (check-equal? (size (+xarray doublebe) '(1 2 3 4 5)) 40)) diff --git a/xenomorph/xenomorph/redo/number.rkt b/xenomorph/xenomorph/redo/number.rkt index de8766d8..07945c3d 100644 --- a/xenomorph/xenomorph/redo/number.rkt +++ b/xenomorph/xenomorph/redo/number.rkt @@ -12,7 +12,7 @@ https://github.com/mbutterick/restructure/blob/master/src/Number.coffee (define system-endian (if (system-big-endian?) 'be 'le)) -(define (xint-encode i val [port #f] #:parent [parent #f]) +(define (xint-encode i val [port (current-output-port)] #:parent [parent #f]) (unless (xint? i) (raise-argument-error 'encode "xint instance" i)) (define-values (bound-min bound-max) (bounds i)) @@ -102,15 +102,15 @@ https://github.com/mbutterick/restructure/blob/master/src/Number.coffee (module+ test (require rackunit) (check-exn exn:fail:contract? (λ () (+xint 'not-a-valid-type))) - (check-exn exn:fail:contract? (λ () (encode uint8 256))) - (check-not-exn (λ () (encode uint8 255))) - (check-exn exn:fail:contract? (λ () (encode int8 256))) - (check-exn exn:fail:contract? (λ () (encode int8 255))) - (check-not-exn (λ () (encode int8 127))) - (check-not-exn (λ () (encode int8 -128))) - (check-exn exn:fail:contract? (λ () (encode int8 -129))) - (check-exn exn:fail:contract? (λ () (encode uint16 (add1 #xffff)))) - (check-not-exn (λ () (encode uint16 #xffff))) + (check-exn exn:fail:contract? (λ () (encode uint8 256 #f))) + (check-not-exn (λ () (encode uint8 255 #f))) + (check-exn exn:fail:contract? (λ () (encode int8 256 #f))) + (check-exn exn:fail:contract? (λ () (encode int8 255 #f))) + (check-not-exn (λ () (encode int8 127 #f))) + (check-not-exn (λ () (encode int8 -128 #f))) + (check-exn exn:fail:contract? (λ () (encode int8 -129 #f))) + (check-exn exn:fail:contract? (λ () (encode uint16 (add1 #xffff) #f))) + (check-not-exn (λ () (encode uint16 #xffff #f))) (let ([i (+xint 2 #:signed #f #:endian 'le)] [ip (open-input-bytes (bytes 1 2 3 4))] @@ -139,8 +139,8 @@ https://github.com/mbutterick/restructure/blob/master/src/Number.coffee (check-equal? (decode int8 (bytes 127)) 127) (check-equal? (decode int8 (bytes 255)) -1) - (check-equal? (encode int8 -1) (bytes 255)) - (check-equal? (encode int8 127) (bytes 127))) + (check-equal? (encode int8 -1 #f) (bytes 255)) + (check-equal? (encode int8 127 #f) (bytes 127))) (define (xfloat-decode xf [port-arg (current-input-port)] #:parent [parent #f]) (unless (xfloat? xf) @@ -148,7 +148,7 @@ https://github.com/mbutterick/restructure/blob/master/src/Number.coffee (define bs (read-bytes (xfloat-size xf) (->input-port port-arg))) (floating-point-bytes->real bs (eq? (xfloat-endian xf) 'be))) -(define (xfloat-encode xf val [port #f] #:parent [parent #f]) +(define (xfloat-encode xf val [port (current-output-port)] #:parent [parent #f]) (unless (xfloat? xf) (raise-argument-error 'encode "xfloat instance" xf)) (unless (or (not port) (output-port? port)) @@ -183,7 +183,7 @@ https://github.com/mbutterick/restructure/blob/master/src/Number.coffee (define int (xint-decode xf port-arg)) (exact-if-possible (/ int (fixed-shift xf) 1.0))) -(define (xfixed-encode xf val [port #f] #:parent [parent #f]) +(define (xfixed-encode xf val [port (current-output-port)] #:parent [parent #f]) (unless (xfixed? xf) (raise-argument-error 'encode "xfixed instance" xf)) (define int (exact-if-possible (floor (* val (fixed-shift xf))))) @@ -215,6 +215,6 @@ https://github.com/mbutterick/restructure/blob/master/src/Number.coffee (define fixed32le (+xfixed 4 #:endian 'le)) (module+ test - (define bs (encode fixed16be 123.45)) + (define bs (encode fixed16be 123.45 #f)) (check-equal? bs #"{s") (check-equal? (ceiling (* (decode fixed16be bs) 100)) 12345.0)) diff --git a/xenomorph/xenomorph/redo/string.rkt b/xenomorph/xenomorph/redo/string.rkt new file mode 100644 index 00000000..d1f51d03 --- /dev/null +++ b/xenomorph/xenomorph/redo/string.rkt @@ -0,0 +1,125 @@ +#lang debug racket/base +(require racket/dict "base.rkt" "util.rkt" "number.rkt") +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/String.coffee +|# + +(define (read-encoded-string port len [encoding 'ascii]) + (define proc (case encoding + [(utf16le) (error 'bah)] + [(ucs2) (error 'bleh)] + [(utf8) bytes->string/utf-8] + [(ascii) bytes->string/latin-1] + [else values])) + (proc (read-bytes len port))) + +(define (write-encoded-string port string [encoding 'ascii]) + ;; todo: handle encodings correctly. + ;; right now just utf8 and ascii are correct + (define proc (case encoding + [(ucs2 utf8 ascii) string->bytes/utf-8] + [(utf16le) (error 'swap-bytes-unimplemented)] + [else (error 'unsupported-string-encoding)])) + (write-bytes (proc string) port)) + +(define (count-nonzero-chars port) + ;; helper function for String + ;; counts nonzero chars from current position + (bytes-length (car (regexp-match-peek "[^\u0]*" port)))) + +(define (bytes-left-in-port? port) + (not (eof-object? (peek-byte port)))) + +(define (byte-length val encoding) + (define encoder + (case encoding + [(ascii utf8) string->bytes/utf-8])) + (bytes-length (encoder (format "~a" val)))) + +(define (xstring-decode xs [port-arg (current-input-port)] #:parent [parent #f]) + (define port (->input-port port-arg)) + (let ([len (or (resolve-length (xstring-len xs) port parent) (count-nonzero-chars port))] + [encoding (if (procedure? (xstring-encoding xs)) + (or ((xstring-encoding xs) parent) 'ascii) + (xstring-encoding xs))] + [adjustment (if (and (not (xstring-len xs)) (bytes-left-in-port? port)) 1 0)]) + (define string (read-encoded-string port len encoding)) + (pos port (+ (pos port) adjustment)) + string)) + +(define (xstring-encode xs val [port-arg (current-output-port)] #:parent [parent #f]) + (define port (if (output-port? port-arg) port-arg (open-output-bytes))) + (let* ([val (format "~a" val)] + [encoding (if (procedure? (xstring-encoding xs)) + (or ((xstring-encoding xs) (and parent (dict-ref parent val)) 'ascii)) + (xstring-encoding xs))]) + (define encoded-length (byte-length val encoding)) + (when (and (exact-nonnegative-integer? (xstring-len xs)) (> encoded-length (xstring-len xs))) + (raise-argument-error 'xstring-encode (format "string no longer than ~a" (xstring-len xs)) val)) + (when (xint? (xstring-len xs)) + (encode (xstring-len xs) encoded-length port)) + (write-encoded-string port val encoding) + (when (not (xstring-len xs)) (write-byte #x00 port)) ; null terminated when no len + (unless port-arg (get-output-bytes port)))) + +(define (xstring-size xs [val #f] [parent #f]) + (if (not val) + (resolve-length (xstring-len xs) #f parent) + (let* ([encoding (if (procedure? (xstring-encoding xs)) + (or ((xstring-encoding xs) (and parent (dict-ref parent val)) 'ascii)) + (xstring-encoding xs))] + [encoding (if (eq? encoding 'utf16be) 'utf16le encoding)]) + (+ (byte-length val encoding) (cond + [(not (xstring-len xs)) 1] + [(xint? (xstring-len xs)) (size (xstring-len xs))] + [else 0]))))) + +(struct xstring (len encoding) #:transparent + #:methods gen:xenomorphic + [(define decode xstring-decode) + (define encode xstring-encode) + (define size xstring-size)]) + +(define supported-encodings '(ascii utf8)) +(define (+xstring [len #f] [encoding 'ascii]) + (unless (length-resolvable? len) + (raise-argument-error '+xarray "length-resolvable?" len)) + (unless (or (procedure? encoding) (memq encoding supported-encodings)) + (raise-argument-error '+xarray (format "procedure or member of ~v" supported-encodings) encoding)) + (xstring len encoding)) + +(define (xsymbol-decode xs [port-arg (current-input-port)] #:parent [parent #f]) + (string->symbol (xstring-decode xs port-arg #:parent parent))) + +(define (xsymbol-encode xs val [port (current-output-port)] #:parent [parent #f]) + (unless (xsymbol? xs) + (raise-argument-error 'encode "xsymbol instance" xs)) + (unless (or (string? val) (symbol? val)) + (raise-argument-error 'xsymbol-encode "symbol or string" val)) + (xstring-encode xs (if (symbol? val) val (string->symbol val)) port #:parent parent)) + +(struct xsymbol xstring () #:transparent + #:methods gen:xenomorphic + [(define decode xsymbol-decode) + (define encode xsymbol-encode) + (define size xstring-size)]) + +(define (+xsymbol [len #f] [encoding 'ascii]) + (xsymbol len encoding)) + +(module+ test + (require rackunit) + (define S-fixed (+xstring 4 'utf8)) + (check-equal? (encode S-fixed "Mike" #f) #"Mike") + (check-exn exn:fail? (λ () (encode S-fixed "Mikes" #f))) ; too long for fixed string + (define S (+xstring uint8 'utf8)) + (check-equal? (decode S #"\2BCDEF") "BC") + (check-equal? (encode S "Mike" #f) #"\4Mike") + (check-equal? (size (+xstring) "foobar") 7) ; null terminated when no len + (check-equal? (decode (+xsymbol 4) #"Mike") 'Mike) + (check-equal? (encode (+xsymbol 4) 'Mike #f) #"Mike") + (check-equal? (encode (+xsymbol 4) "Mike" #f) #"Mike") + (check-exn exn:fail:contract? (λ () (encode (+xsymbol 4) 42 #f)))) \ No newline at end of file diff --git a/xenomorph/xenomorph/redo/test/array-test.rkt b/xenomorph/xenomorph/redo/test/array-test.rkt index 1e153648..273a64b4 100644 --- a/xenomorph/xenomorph/redo/test/array-test.rkt +++ b/xenomorph/xenomorph/redo/test/array-test.rkt @@ -79,11 +79,11 @@ https://github.com/mbutterick/restructure/blob/master/test/Array.coffee (test-case "encode using array length" - (check-equal? (encode (+xarray uint8 10) '(1 2 3 4)) (bytes 1 2 3 4))) + (check-equal? (encode (+xarray uint8 10) '(1 2 3 4) #f) (bytes 1 2 3 4))) (test-case "encode length as number before array" - (check-equal? (encode (+xarray uint8 uint8) '(1 2 3 4)) (bytes 4 1 2 3 4))) + (check-equal? (encode (+xarray uint8 uint8) '(1 2 3 4) #f) (bytes 4 1 2 3 4))) ;; todo: reinstate pointer test #;(test-case diff --git a/xenomorph/xenomorph/redo/test/string-test.rkt b/xenomorph/xenomorph/redo/test/string-test.rkt new file mode 100644 index 00000000..ba6d4cb9 --- /dev/null +++ b/xenomorph/xenomorph/redo/test/string-test.rkt @@ -0,0 +1,109 @@ +#lang racket/base +(require rackunit + "../base.rkt" + "../string.rkt" + "../number.rkt" + sugar/unstable/dict) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/String.coffee +|# + +(test-case + "decode fixed length" + (parameterize ([current-input-port (open-input-bytes #"testing")]) + (check-equal? (decode (+xstring 7)) "testing"))) + +(test-case + "decode length from parent key" + (parameterize ([current-input-port (open-input-bytes #"testing")]) + (check-equal? (decode (+xstring 'len) #:parent (mhash 'len 7)) "testing"))) + +(test-case + "decode length as number before string" + (parameterize ([current-input-port (open-input-bytes #"\x07testing")]) + (check-equal? (decode (+xstring uint8) #:parent (mhash 'len 7)) "testing"))) + +(test-case + "decode utf8" + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻"))]) + (check-equal? (decode (+xstring 4 'utf8)) "🍻"))) + +(test-case + "decode encoding computed from function" + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻"))]) + (check-equal? (decode (+xstring 4 (λ _ 'utf8))) "🍻"))) + +(test-case + "decode null-terminated string and read past terminator" + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻\x00"))]) + (check-equal? (decode (+xstring #f 'utf8)) "🍻") + (check-equal? (pos (current-input-port)) 5))) + +(test-case + "decode remainder of buffer when null-byte missing" + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻"))]) + (check-equal? (decode (+xstring #f 'utf8)) "🍻"))) + +(test-case + "size should use string length" + (check-equal? (size (+xstring 7) "testing") 7)) + +(test-case + "size should use correct encoding" + (check-equal? (size (+xstring 10 'utf8) "🍻") 4)) + +(test-case + "size should use encoding from function" + (check-equal? (size (+xstring 10 (λ _ 'utf8)) "🍻") 4)) + +(test-case + "should add size of length field before string" + (check-equal? (size (+xstring uint8 'utf8) "🍻") 5)) + +; todo: it 'should work with utf16be encoding', -> + +(test-case + "size should take null-byte into account" + (check-equal? (size (+xstring #f 'utf8) "🍻") 5)) + +(test-case + "size should use defined length if no value given" + (check-equal? (size (+xstring 10)) 10)) + +(test-case + "encode using string length" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (+xstring 7) "testing") + (check-equal? (dump (current-output-port)) #"testing"))) + +(test-case + "encode length as number before string" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (+xstring uint8) "testing") + (check-equal? (dump (current-output-port)) #"\x07testing"))) + +(test-case + "encode length as number before string utf8" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (+xstring uint8 'utf8) "testing 😜") + (check-equal? (dump (current-output-port)) (string->bytes/utf-8 "\x0ctesting 😜")))) + +(test-case + "encode utf8" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (+xstring 4 'utf8) "🍻" ) + (check-equal? (dump (current-output-port)) (string->bytes/utf-8 "🍻")))) + +(test-case + "encode encoding computed from function" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (+xstring 4 (λ _ 'utf8)) "🍻") + (check-equal? (dump (current-output-port)) (string->bytes/utf-8 "🍻")))) + +(test-case + "encode null-terminated string" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (+xstring #f 'utf8) "🍻" ) + (check-equal? (dump (current-output-port)) (string->bytes/utf-8 "🍻\x00")))) \ No newline at end of file