diff --git a/xenomorph/.github/workflows/ci.yml b/xenomorph/.github/workflows/ci.yml new file mode 100644 index 00000000..ecdef0ac --- /dev/null +++ b/xenomorph/.github/workflows/ci.yml @@ -0,0 +1,37 @@ +name: CI + +on: [push, pull_request] + +jobs: + run: + name: "Build using Racket '${{ matrix.racket-version }}' (${{ matrix.racket-variant }})" + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + racket-version: ["6.12", "7.0", "7.1", "7.2", "7.3", "7.4", "7.5", "7.6", "7.7", "7.8", "7.9", "current"] + racket-variant: ["BC", "CS"] + # CS builds are only provided for versions 7.4 and up so avoid + # running the job for prior versions. + exclude: + - {racket-version: "6.12", racket-variant: "CS"} + - {racket-version: "7.0", racket-variant: "CS"} + - {racket-version: "7.1", racket-variant: "CS"} + - {racket-version: "7.2", racket-variant: "CS"} + - {racket-version: "7.3", racket-variant: "CS"} + + steps: + - name: Checkout + uses: actions/checkout@master + + - uses: Bogdanp/setup-racket@v0.11 + with: + distribution: 'full' + version: ${{ matrix.racket-version }} + variant: ${{ matrix.racket-variant }} + + - name: Install package and its dependencies + run: raco pkg install --auto --batch + + - name: Run the tests + run: xvfb-run raco test -j 4 -p xenomorph diff --git a/xenomorph/.gitignore b/xenomorph/.gitignore new file mode 100644 index 00000000..f4c67e1e --- /dev/null +++ b/xenomorph/.gitignore @@ -0,0 +1,20 @@ +# for Racket +compiled/ +*~ + +# for Mac OS X +.DS_Store +.AppleDouble +.LSOverride +Icon + +# Thumbnails +._* + +# Files that might appear on external disk +.Spotlight-V100 +.Trashes +xenomorph/doc/* +xenomorph/scribblings/*.css +xenomorph/scribblings/*.js +xenomorph/scribblings/*.html \ No newline at end of file diff --git a/xenomorph/LICENSE.md b/xenomorph/LICENSE.md new file mode 100644 index 00000000..e246acbe --- /dev/null +++ b/xenomorph/LICENSE.md @@ -0,0 +1,9 @@ +MIT License for Xenomorph + +© 2018-2019 Matthew Butterick + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/xenomorph/README.md b/xenomorph/README.md new file mode 100644 index 00000000..b543ed32 --- /dev/null +++ b/xenomorph/README.md @@ -0,0 +1,17 @@ +## xenomorph ![Build Status](https://github.com/mbutterick/xenomorph/workflows/CI/badge.svg) + +Racket library for binary encoding & decoding. Based on [`restructure`](https://github.com/foliojs/restructure). + +## Docs + +https://docs.racket-lang.org/xenomorph/ + + +## License + +MIT + + +## Project status + +Actively developed though I’m not sure what more will be done. Everything from `restructure` has been ported over. Mostly this package supports [Quad](https://github.com/mbutterick/quad), so I don’t expect it will have a meaningful life as a standalone library. Moreover nobody seems to like binary formats. diff --git a/xenomorph/SUBLICENSE.md b/xenomorph/SUBLICENSE.md new file mode 100644 index 00000000..1196d7ec --- /dev/null +++ b/xenomorph/SUBLICENSE.md @@ -0,0 +1,24 @@ +Xenomorph contains substantial portions of the following software: + +[Restructure](https://github.com/devongovett/restructure) + +MIT LICENSE +Copyright (c) 2014 Devon Govett + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/xenomorph/info.rkt b/xenomorph/info.rkt new file mode 100644 index 00000000..5675ec95 --- /dev/null +++ b/xenomorph/info.rkt @@ -0,0 +1,10 @@ +#lang info +(define collection 'multi) +(define version "0.0") +(define deps '("base" + "beautiful-racket-lib" + "rackunit-lib" + "sugar")) +(define build-deps '("debug" + "racket-doc" + "scribble-lib")) diff --git a/xenomorph/xenomorph/base.rkt b/xenomorph/xenomorph/base.rkt new file mode 100644 index 00000000..e87c5bb3 --- /dev/null +++ b/xenomorph/xenomorph/base.rkt @@ -0,0 +1,98 @@ +#lang debug racket/base +(require racket/class racket/dict racket/match) +(provide (all-defined-out)) + +(struct x:ptr (type val parent) #:transparent) + +(define x:version-key 'x:version) +(define x:start-offset-key 'x:start-offset) +(define x:current-offset-key 'x:current-offset) +(define x:length-key 'x:length) +(define x:parent-key 'x:parent) +(define x:pointer-size-key 'x:ptr-size) +(define x:pointers-key 'x:pointers) +(define x:pointer-offset-key 'x:ptr-offset) ;; formerly pointerOffset +(define x:pointer-type-key 'x:ptr-type) +(define x:val-key 'x:val) + +(define private-keys (list x:parent-key x:start-offset-key x:current-offset-key x:length-key x:pointer-size-key + x:pointers-key x:pointer-offset-key x:pointer-type-key x:val-key)) + +(define (dict->mutable-hash x) + (define h (make-hasheq)) + (for ([(k v) (in-dict x)] + #:unless (memq k private-keys)) + (hash-set! h k v)) + h) + +(define (hash-ref* d . keys) + (for/fold ([d d]) + ([k (in-list keys)]) + (hash-ref d k))) + +(define (pos p [new-pos #f]) + (when new-pos + (file-position p new-pos)) + (file-position p)) + +#| +We make `parent` a kwarg so that we can pass it without necessitating an explicit port-arg. (Meaning, if it's positional, whenever we want to use it, we also have to make port-arg explicit, which is boring.) + +We don't make port-arg a kwarg because it's the most common arg passed. + +We don't make port-arg the last arg (similar to other Racket port funcs) because we want to let the functions be variable arity. +|# +(define (decode xo [port-arg (current-input-port)] #:parent [parent #f] . args) + (define port + (cond + [(input-port? port-arg) port-arg] + [(bytes? port-arg) (open-input-bytes port-arg)] + [else (raise-argument-error 'decode "byte string or input port" port-arg)])) + (send xo x:decode port parent . args)) + +(define (encode xo val [port-arg (current-output-port)] + #:parent [parent #f] + . args) + (define port (if (output-port? port-arg) port-arg (open-output-bytes))) + (send xo x:encode val port parent . args) + (unless port-arg (get-output-bytes port))) + +(define (xenomorphic-type? x) (is-a? x x:base%)) +(define xenomorphic? xenomorphic-type?) + +(define-syntax-rule (generate-subclass CLASS PRE-ENCODE-PROC POST-DECODE-PROC) + (cond + [(and PRE-ENCODE-PROC POST-DECODE-PROC) + (class CLASS + (super-new) + (define/override (pre-encode x) (super pre-encode (PRE-ENCODE-PROC x))) + (define/override (post-decode x) (POST-DECODE-PROC (super post-decode x))))] + [PRE-ENCODE-PROC + (class CLASS + (super-new) + (define/override (pre-encode x) (super pre-encode (PRE-ENCODE-PROC x))))] + [POST-DECODE-PROC + (class CLASS + (super-new) + (define/override (post-decode x) (POST-DECODE-PROC (super post-decode x))))] + [else CLASS])) + +(define x:base% + (class object% + (super-new) + + (define/pubment (x:decode input-port [parent #f] . args) + (post-decode (inner (error 'xenomorph (format "decode not augmented in ~a" this)) x:decode input-port parent . args))) + + (define/pubment (x:encode val output-port [parent #f] . args) + (match (inner (error 'xenomorph (format "encode not augmented in ~a" this)) x:encode (pre-encode val) output-port parent . args) + [(? bytes? encode-result) (write-bytes encode-result output-port)] + [other other])) + + (define/pubment (x:size [val #f] [parent #f] . args) + (match (inner 0 x:size val parent . args) + [(? exact-nonnegative-integer? size) size] + [other (raise-result-error 'size "nonnegative integer" other)])) + + (define/public (post-decode val) val) + (define/public (pre-encode val) val))) \ No newline at end of file diff --git a/xenomorph/xenomorph/bitfield.rkt b/xenomorph/xenomorph/bitfield.rkt new file mode 100644 index 00000000..c3f36149 --- /dev/null +++ b/xenomorph/xenomorph/bitfield.rkt @@ -0,0 +1,89 @@ +#lang racket/base +(require "base.rkt" + "int.rkt" + racket/class + racket/dict + racket/list + racket/contract + sugar/unstable/dict) +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/Bitfield.coffee +|# + +(define x:bitfield% + (class x:base% + (super-new) + (init-field [(@type type)] + [(@flags flags)]) + + (let ([named-flags (filter values @flags)]) + (unless (= (length named-flags) (length (remove-duplicates named-flags))) + (raise-argument-error 'x:bitfield% "no duplicates among flag names" named-flags))) + + (when (> (length @flags) (* 8 (send @type x:size))) + (raise-argument-error 'x:bitfield% (format "~a flags or fewer (~a-byte bitfield)" (* 8 (send @type x:size)) (send @type x:size)) (length @flags))) + + (define/augment (x:decode port parent) + (define val (send @type x:decode port)) + (define flag-hash (mhash)) + (for ([(flag idx) (in-indexed @flags)] + #:when flag) + (hash-set! flag-hash flag (bitwise-bit-set? val idx))) + flag-hash) + + (define/augment (x:encode flag-hash port [parent #f]) + (define invalid-flags + (for/list ([flag (in-hash-keys flag-hash)] + #:unless (member flag @flags)) + flag)) + (unless (null? invalid-flags) + (raise-argument-error 'encode (format "valid flag name ~v" @flags) invalid-flags)) + (define bit-int (for/sum ([(flag idx) (in-indexed @flags)] + #:when (and flag (hash-ref flag-hash flag #f))) + (arithmetic-shift 1 idx))) + (send @type x:encode bit-int port)) + + (define/augment (x:size [val #f] [parent #f]) + (send @type x:size)))) + +(define (x:bitfield? x) (is-a? x x:bitfield%)) + +(define/contract (x:bitfield + [type-arg #f] + [flag-arg #f] + #:type [type-kwarg uint8] + #:flags [flag-kwarg null] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:bitfield%]) + (() + ((or/c x:int? #false) + (listof any/c) + #:type (or/c x:int? #false) + #:flags (listof any/c) + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:bitfield%))) + . ->* . + x:bitfield?) + (define type (or type-arg type-kwarg)) + (define flags (or flag-arg flag-kwarg)) + (new (generate-subclass base-class pre-proc post-proc) + [type type] + [flags flags])) + +(module+ test + (require rackunit "number.rkt" "base.rkt") + (define bfer (x:bitfield uint16be '(bold italic underline #f shadow condensed extended))) + (define bf (decode bfer #"\0\25")) + (check-equal? (length (hash-keys bf)) 6) ; omits #f flag + (check-true (hash-ref bf 'bold)) + (check-true (hash-ref bf 'underline)) + (check-true (hash-ref bf 'shadow)) + (check-false (hash-ref bf 'italic)) + (check-false (hash-ref bf 'condensed)) + (check-false (hash-ref bf 'extended)) + (check-equal? (encode bfer bf #f) #"\0\25")) \ No newline at end of file diff --git a/xenomorph/xenomorph/bytes.rkt b/xenomorph/xenomorph/bytes.rkt new file mode 100644 index 00000000..d5d29098 --- /dev/null +++ b/xenomorph/xenomorph/bytes.rkt @@ -0,0 +1,42 @@ +#lang racket/base +(require racket/class racket/match "base.rkt" "util.rkt" "number.rkt") +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/Buffer.coffee +|# + +(define x:bytes% + (class x:base% + (super-new) + (init-field [(@len len)]) + (unless (length-resolvable? @len) + (raise-argument-error 'x:buffer "resolvable length" @len)) + + (define/augment (x:decode port parent) + (read-bytes (resolve-length @len port parent))) + + (define/augment (x:encode buf port [parent #f]) + (unless (bytes? buf) + (raise-argument-error 'x:buffer-encode "bytes" buf)) + (when (x:int? @len) + (send @len x:encode (bytes-length buf) port)) + (write-bytes buf port)) + + (define/augment (x:size [val #f] [parent #f]) + (match val + [(? bytes?) (bytes-length val)] + [#false (resolve-length @len val parent)] + [_ (raise-argument-error 'x:buffer-size "bytes or #f" val)])))) + +(define (x:bytes [len-arg #f] + #:length [len-kwarg #f] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:bytes%]) + (define len (or len-arg len-kwarg #xffff)) + (new (generate-subclass base-class pre-proc post-proc) [len len])) + +(define x:buffer% x:bytes%) +(define x:buffer x:bytes) \ No newline at end of file diff --git a/xenomorph/xenomorph/dict.rkt b/xenomorph/xenomorph/dict.rkt new file mode 100644 index 00000000..79c3c9bb --- /dev/null +++ b/xenomorph/xenomorph/dict.rkt @@ -0,0 +1,128 @@ +#lang debug racket/base +(require racket/dict + racket/class + racket/sequence + racket/match + racket/list + racket/contract + "base.rkt" + "number.rkt" + sugar/unstable/dict) +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/Struct.coffee +|# + +(define (setup-private-fields port parent len) + (define mheq (make-hasheq)) + (dict-set*! mheq + x:parent-key parent + x:start-offset-key (pos port) + x:current-offset-key 0 + x:length-key len) + mheq) + +(define (parse-fields port mheq fields-arg) + (define fields (if (x:dict? fields-arg) (get-field fields fields-arg) fields-arg)) + (unless (assocs? fields) + (raise-argument-error 'x:dict-parse-fields "assocs" fields)) + (for ([(key type) (in-dict fields)]) + (define val (match type + [(? procedure? proc) (proc mheq)] + [_ (send type x:decode port mheq)])) + (unless (void? val) + (hash-set! mheq key val)) + (hash-set! mheq x:current-offset-key (- (pos port) (hash-ref mheq x:start-offset-key)))) + mheq) + +(define x:dict% + (class x:base% + (super-new) + (init-field [(@fields fields)]) + + (when @fields (unless (dict? @fields) + (raise-argument-error '+xstruct "dict" @fields))) + + (define/augride (x:decode port parent [len 0]) + (define res (setup-private-fields port parent len)) + (parse-fields port res @fields)) + + (define/override (post-decode val) + (dict->mutable-hash val)) + + (define/augride (x:encode field-data port [parent-arg #f]) + (unless (dict? field-data) + (raise-result-error 'x:dict-encode "dict" field-data)) + ;; check keys, because `size` also relies on keys being valid + (unless (andmap (λ (field-key) (memq field-key (dict-keys field-data))) (dict-keys @fields)) + (raise-argument-error 'x:dict-encode + (format "dict that contains superset of xstruct keys: ~a" + (dict-keys @fields)) (dict-keys field-data))) + (define parent (mhasheq x:pointers-key null + x:start-offset-key (pos port) + x:parent-key parent-arg + x:val-key field-data + x:pointer-size-key 0)) + (hash-set! parent x:pointer-offset-key (+ (pos port) (x:size field-data parent #f))) + (for ([(key type) (in-dict @fields)]) + (send type x:encode (dict-ref field-data key) port parent)) + (for ([ptr (in-list (hash-ref parent x:pointers-key))]) + (match ptr + [(x:ptr type val parent) (send type x:encode val port parent)]))) + + (define/augride (x:size [val #f] [parent-arg #f] [include-pointers #t]) + (define parent (mhasheq x:parent-key parent-arg + x:val-key val + x:pointer-size-key 0)) + (define fields-size (for/sum ([(key type) (in-dict @fields)] + #:when (xenomorphic-type? type)) + (send type x:size (and val (send type pre-encode (dict-ref val key))) parent))) + (define pointers-size (if include-pointers (dict-ref parent x:pointer-size-key) 0)) + (+ fields-size pointers-size)))) + +(define (x:dict? x) (is-a? x x:dict%)) + +(define/contract (x:dict #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:dict%] + . dicts) + (() + (#:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:dict%))) + #:rest (listof any/c) + . ->* . + x:dict?) + (define args (flatten dicts)) + (unless (even? (length args)) + (raise-argument-error 'x:dict "equal number of keys and values" dicts)) + (define fields (for/list ([kv (in-slice 2 args)]) + (unless (symbol? (car kv)) + (raise-argument-error '+xstruct "symbol" (car kv))) + (apply cons kv))) + (new (generate-subclass base-class pre-proc post-proc) [fields fields])) + +(module+ test + (require rackunit "number.rkt" "base.rkt") + (define (random-pick xs) (list-ref xs (random (length xs)))) + (check-exn exn:fail:contract? (λ () (x:dict 42))) + (for ([i (in-range 20)]) + ;; make random structs and make sure we can round trip + (define field-types + (for/list ([i (in-range 40)]) + (random-pick (list uint8 uint16be uint16le uint32be uint32le double)))) + (define size-num-types + (for/sum ([num-type (in-list field-types)]) + (send num-type x:size))) + (define xs (x:dict (for/list ([num-type (in-list field-types)]) + (cons (gensym) num-type)))) + (define bs (apply bytes (for/list ([i (in-range size-num-types)]) + (random 256)))) + (check-equal? (encode xs (decode xs bs) #f) bs))) + +;; bw compat +(define x:struct% x:dict%) +(define x:struct? x:dict?) +(define x:struct x:dict) \ No newline at end of file diff --git a/xenomorph/xenomorph/enum.rkt b/xenomorph/xenomorph/enum.rkt new file mode 100644 index 00000000..7359fc2e --- /dev/null +++ b/xenomorph/xenomorph/enum.rkt @@ -0,0 +1,61 @@ +#lang racket/base +(require racket/class + racket/match + "base.rkt" + "int.rkt" + racket/contract + racket/list) +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/Enum.coffee +|# + +(define x:enum% + (class x:base% + (super-new) + (init-field [(@type type)] [(@values values)]) + + (unless (x:int? @type) + (raise-argument-error 'x:enum "xenomorphic integer type" @type)) + + (unless (list? @values) + (raise-argument-error 'x:enum "list of values" @values)) + + (define/augment (x:decode port parent) + (define index (send @type x:decode port parent)) + (if (< index (length @values)) + (or (list-ref @values index) index) + index)) + + (define/augment (x:encode val port [parent #f]) + (match (index-of @values val) + [(? values idx) (send @type x:encode idx port parent)] + [_ (raise-argument-error 'x:enum-encode "valid option" val)])) + + (define/augment (x:size [val #f] [parent #f]) + (send @type x:size val parent)))) + +(define (x:enum? x) (is-a? x x:enum%)) + +(define/contract (x:enum [type-arg #f] + [values-arg #f] + #:type [type-kwarg uint8] + #:values [values-kwarg null] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:enum%]) + (() + ((or/c x:int? #false) + (listof any/c) + #:type (or/c x:int? #false) + #:values (listof any/c) + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:enum%))) + . ->* . + x:enum?) + (define type (or type-arg type-kwarg)) + (define values (or values-arg values-kwarg)) + (new (generate-subclass base-class pre-proc post-proc) [type type] [values values])) \ No newline at end of file diff --git a/xenomorph/xenomorph/info.rkt b/xenomorph/xenomorph/info.rkt new file mode 100644 index 00000000..c31e0e2a --- /dev/null +++ b/xenomorph/xenomorph/info.rkt @@ -0,0 +1,3 @@ +#lang info +(define version "0.0") +(define scribblings '(("scribblings/xenomorph.scrbl" ()))) \ No newline at end of file diff --git a/xenomorph/xenomorph/int.rkt b/xenomorph/xenomorph/int.rkt new file mode 100644 index 00000000..e920c31b --- /dev/null +++ b/xenomorph/xenomorph/int.rkt @@ -0,0 +1,179 @@ +#lang racket/base +(require "base.rkt" racket/class racket/contract) +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/Number.coffee +|# + +(define (unsigned->signed uint bits) + (define most-significant-bit-mask (arithmetic-shift 1 (sub1 bits))) + (- (bitwise-xor uint most-significant-bit-mask) most-significant-bit-mask)) + +(define (signed->unsigned sint bits) + (bitwise-and sint (arithmetic-shift 1 bits))) + +(define (reverse-bytes bstr) + (apply bytes + (for/list ([b (in-bytes bstr (sub1 (bytes-length bstr)) -1 -1)]) + b))) + +(define (exact-if-possible x) (if (integer? x) (inexact->exact x) x)) + +(define (endian-value? x) + (and (symbol? x) (memq x '(be le)))) + +(define system-endian (if (system-big-endian?) 'be 'le)) + +(define x:number% + (class x:base% + (super-new) + (init-field [(@size size)] [(@endian endian)]) + + (unless (exact-positive-integer? @size) + (raise-argument-error 'xenomorph "exact positive integer" @size)) + (unless (memq @endian '(le be)) + (raise-argument-error 'xenomorph "'le or 'be" @endian)) + + (field [@bits (* @size 8)]) + + (define/augment (x:size . _) @size))) + +(define (x:int? x) (is-a? x x:int%)) + +(define (bytes->uint bs) + (for/sum ([b (in-bytes bs)] + [i (in-naturals)]) + (arithmetic-shift b (* 8 i)))) + +(define x:int% + (class x:number% + (super-new) + (init-field signed) + (inherit-field (@endian endian) (@size size) @bits) + + ;; if a signed integer has n bits, it can contain a number + ;; between - (expt 2 (sub1 n)) and (sub1 (expt 2 (sub1 n)). + (define signed-max (sub1 (arithmetic-shift 1 (sub1 @bits)))) + (define signed-min (sub1 (- signed-max))) + (define delta (if signed 0 signed-min)) + (field [bound-min (- signed-min delta)] + [bound-max (- signed-max delta)]) + + (define/augment (x:decode port . _) + (define bs ((if (eq? @endian system-endian) values reverse-bytes) (read-bytes @size port))) + (define uint (bytes->uint bs)) + (if signed (unsigned->signed uint @bits) uint)) + + (define/augment (x:encode val . _) + (unless (integer? val) + (raise-argument-error 'encode "integer" val)) + + (unless (<= bound-min val bound-max) + (raise-argument-error 'encode + (format "value that fits within ~a ~a-byte int (~a to ~a)" (if signed "signed" "unsigned") @size bound-min bound-max) val)) + (for/fold ([bs null] + [val (exact-if-possible val)] + #:result (apply bytes ((if (eq? @endian 'be) values reverse) bs))) + ([i (in-range @size)]) + (values (cons (bitwise-and val #xff) bs) (arithmetic-shift val -8)))))) + +(define/contract (x:int [size-arg #f] + #:size [size-kwarg 2] + #:signed [signed #true] + #:endian [endian system-endian] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:int%]) + (() + ((or/c exact-positive-integer? #false) + #:size exact-positive-integer? + #:signed boolean? + #:endian endian-value? + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:int%))) + . ->* . + x:int?) + (define size (or size-arg size-kwarg)) + (unless (exact-positive-integer? size) + (raise-argument-error 'x:int "exact positive integer" size)) + (new (generate-subclass base-class pre-proc post-proc) + [size size] + [signed signed] + [endian endian])) + +(define int8 (x:int 1)) +(define int16 (x:int 2)) +(define int24 (x:int 3)) +(define int32 (x:int 4)) +(define int64 (x:int 8)) +(define uint8 (x:int 1 #:signed #f)) +(define uint16 (x:int 2 #:signed #f)) +(define uint24 (x:int 3 #:signed #f)) +(define uint32 (x:int 4 #:signed #f)) +(define uint64 (x:int 8 #:signed #f)) +(define int8be (x:int 1 #:endian 'be)) +(define int16be (x:int 2 #:endian 'be)) +(define int24be (x:int 3 #:endian 'be)) +(define int32be (x:int 4 #:endian 'be)) +(define int64be (x:int 8 #:endian 'be)) +(define uint8be (x:int 1 #:signed #f #:endian 'be)) +(define uint16be (x:int 2 #:signed #f #:endian 'be)) +(define uint24be (x:int 3 #:signed #f #:endian 'be)) +(define uint32be (x:int 4 #:signed #f #:endian 'be)) +(define uint64be (x:int 8 #:signed #f #:endian 'be)) +(define int8le (x:int 1 #:endian 'le)) +(define int16le (x:int 2 #:endian 'le)) +(define int24le (x:int 3 #:endian 'le)) +(define int32le (x:int 4 #:endian 'le)) +(define int64le (x:int 8 #:endian 'le)) +(define uint8le (x:int 1 #:signed #f #:endian 'le)) +(define uint16le (x:int 2 #:signed #f #:endian 'le)) +(define uint24le (x:int 3 #:signed #f #:endian 'le)) +(define uint32le (x:int 4 #:signed #f #:endian 'le)) +(define uint64le (x:int 8 #:signed #f #:endian 'le)) + +(module+ test + (require rackunit "base.rkt") + (check-exn exn:fail:contract? (λ () (x:int 'not-a-valid-type))) + (check-exn exn:fail:contract? (λ () (encode uint8 256 #f))) + (check-not-exn (λ () (encode uint8 255 #f))) + (check-exn exn:fail:contract? (λ () (encode int8 256 #f))) + (check-exn exn:fail:contract? (λ () (encode int8 255 #f))) + (check-not-exn (λ () (encode int8 127 #f))) + (check-not-exn (λ () (encode int8 -128 #f))) + (check-exn exn:fail:contract? (λ () (encode int8 -129 #f))) + (check-exn exn:fail:contract? (λ () (encode uint16 (add1 #xffff) #f))) + (check-not-exn (λ () (encode uint16 #xffff #f))) + + (let ([i (x:int 2 #:signed #f #:endian 'le)] + [ip (open-input-bytes (bytes 1 2 3 4))] + [op (open-output-bytes)]) + (check-equal? (decode i ip) 513) ;; 1000 0000 0100 0000 + (check-equal? (decode i ip) 1027) ;; 1100 0000 0010 0000 + (encode i 513 op) + (check-equal? (get-output-bytes op) (bytes 1 2)) + (encode i 1027 op) + (check-equal? (get-output-bytes op) (bytes 1 2 3 4))) + + (let ([i (x:int 2 #:signed #f #:endian 'be)] + [ip (open-input-bytes (bytes 1 2 3 4))] + [op (open-output-bytes)]) + (check-equal? (decode i ip) 258) ;; 0100 0000 1000 0000 + (check-equal? (decode i ip) 772) ;; 0010 0000 1100 0000 + (encode i 258 op) + (check-equal? (get-output-bytes op) (bytes 1 2)) + (encode i 772 op) + (check-equal? (get-output-bytes op) (bytes 1 2 3 4))) + + (check-equal? (send (x:int 1) x:size) 1) + (check-equal? (send (x:int) x:size) 2) + (check-equal? (send (x:int 4) x:size) 4) + (check-equal? (send (x:int 8) x:size) 8) + + (check-equal? (decode int8 (bytes 127)) 127) + (check-equal? (decode int8 (bytes 255)) -1) + (check-equal? (encode int8 -1 #f) (bytes 255)) + (check-equal? (encode int8 127 #f) (bytes 127))) \ No newline at end of file diff --git a/xenomorph/xenomorph/list.rkt b/xenomorph/xenomorph/list.rkt new file mode 100644 index 00000000..58808ecf --- /dev/null +++ b/xenomorph/xenomorph/list.rkt @@ -0,0 +1,137 @@ +#lang debug racket/base +(require racket/class + racket/sequence + racket/contract + "base.rkt" + "int.rkt" + "util.rkt" + sugar/unstable/dict) +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/Array.coffee +|# + +(define x:list% + (class x:base% + (super-new) + (init-field [(@type type)] [(@len len)] [(@count-bytes? count-bytes?)]) + + (unless (xenomorphic-type? @type) + (raise-argument-error 'x:list "xenomorphic type" @type)) + (unless (length-resolvable? @len) + (raise-argument-error 'x:list "length-resolvable?" @len)) + (unless (boolean? @count-bytes?) + (raise-argument-error 'x:list "boolean" @count-bytes?)) + + (define/augride (x:decode port parent) + (define new-parent (if (x:int? @len) + (mhasheq x:parent-key parent + x:start-offset-key (pos port) + x:current-offset-key 0 + x:length-key @len) + parent)) + (define len (resolve-length @len port parent)) + (cond + [(or (not len) @count-bytes?) + (define end-pos (cond + ;; len is byte length + [len (+ (pos port) len)] + ;; no len, but parent has length + [(and parent (not (zero? (hash-ref parent x:length-key)))) + (+ (hash-ref parent x:start-offset-key) (hash-ref parent x:length-key))] + ;; no len or parent, so consume whole stream + [else +inf.0])) + (for/list ([i (in-naturals)] + #:break (or (eof-object? (peek-byte port)) (= (pos port) end-pos))) + (send @type x:decode port new-parent))] + ;; we have len, which is treated as count of items + [else (for/list ([i (in-range len)]) + (when (eof-object? (peek-byte port)) + (raise-argument-error 'decode (format "bytes for ~a items" len) i)) + (send @type x:decode port new-parent))])) + + (define/augride (x:encode val-arg port [parent #f]) + (unless (sequence? val-arg) + (raise-argument-error 'encode "sequence" val-arg)) + (define vals (if (list? val-arg) val-arg (sequence->list val-arg))) + ;; if @len is not an integer, we have variable length + (define maybe-fixed-len (and (integer? @len) @len)) + (when maybe-fixed-len + (unless (eq? (length vals) maybe-fixed-len) + (raise-argument-error 'encode (format "sequence of ~a values" maybe-fixed-len) (length vals)))) + (define (encode-items parent) + (for ([item (in-list vals)] + [idx (in-range (or maybe-fixed-len +inf.0))]) + (send @type x:encode item port parent))) + (cond + [(x:int? @len) + (define new-parent (mhasheq x:pointers-key null + x:start-offset-key (pos port) + x:parent-key parent)) + (hash-set! new-parent x:pointer-offset-key (+ (pos port) (x:size vals new-parent))) + (send @len x:encode (length vals) port) ; encode length at front + (encode-items new-parent) + (for ([ptr (in-list (hash-ref new-parent x:pointers-key))]) ; encode pointer data at end + (send (x:ptr-type ptr) x:encode (x:ptr-val ptr) port))] + [else (encode-items parent)])) + + (define/augride (x:size [val #f] [parent #f]) + (when val (unless (sequence? val) + (raise-argument-error 'size "sequence" val))) + (cond + [val (define-values (new-parent len-size) + (if (x:int? @len) + (values (mhasheq x:parent-key parent) (send @len x:size)) + (values parent 0))) + (define items-size (for/sum ([item val]) + (send @type x:size item new-parent))) + (+ items-size len-size)] + [else (define count (resolve-length @len #f parent)) + (define size (send @type x:size #f parent)) + (* size count)])))) + +(define (x:list? x) (is-a? x x:list%)) + +(define/contract (x:list + [type-arg #f] + [len-arg #f] + #:type [type-kwarg uint8] + #:length [len-kwarg #f] + #:count-bytes [count-bytes? #f] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:list%]) + (() + ((or/c xenomorphic? #false) + (or/c length-resolvable? #false) + #:type (or/c xenomorphic? #false) + #:length (or/c length-resolvable? #false) + #:count-bytes boolean? + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:list%))) + . ->* . + x:list?) + (define type (or type-arg type-kwarg)) + (unless (xenomorphic? type) + (raise-argument-error 'x:list "xenomorphic type" type)) + (define len (or len-arg len-kwarg)) + (unless (length-resolvable? len) + (raise-argument-error 'x:list "resolvable length" len)) + (new (generate-subclass base-class pre-proc post-proc) + [type type] + [len len] + [count-bytes? count-bytes?])) + + +(define x:array% x:list%) +(define x:array x:list) +(define x:array? x:list?) + +(module+ test + (require rackunit "base.rkt") + (check-equal? (decode (x:list uint16be 3) #"ABCDEF") '(16706 17220 17734)) + (check-equal? (encode (x:list uint16be 3) '(16706 17220 17734) #f) #"ABCDEF") + (check-equal? (send (x:list uint16be) x:size '(1 2 3)) 6)) diff --git a/xenomorph/xenomorph/main.rkt b/xenomorph/xenomorph/main.rkt new file mode 100644 index 00000000..5484ddce --- /dev/null +++ b/xenomorph/xenomorph/main.rkt @@ -0,0 +1,22 @@ +#lang racket/base +(require racket/require) + +(define-syntax-rule (r+p ID ...) + (begin (require ID ...) (provide (all-from-out ID ...)))) + +(r+p "bitfield.rkt" + "bytes.rkt" + "dict.rkt" + "enum.rkt" + "base.rkt" + "list.rkt" + "number.rkt" + "optional.rkt" + "pointer.rkt" + "reserved.rkt" + "string.rkt" + "stream.rkt" + "symbol.rkt" + "vector.rkt" + "versioned-dict.rkt" + "util.rkt") diff --git a/xenomorph/xenomorph/number.rkt b/xenomorph/xenomorph/number.rkt new file mode 100644 index 00000000..dc49ec6b --- /dev/null +++ b/xenomorph/xenomorph/number.rkt @@ -0,0 +1,156 @@ +#lang debug racket/base +(require "base.rkt" "int.rkt" "list.rkt" racket/class racket/contract) +(provide (all-defined-out) (all-from-out "int.rkt")) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/Number.coffee +|# + + +(define (x:float? x) (is-a? x x:float%)) + +(define x:float% + (class x:number% + (super-new) + (inherit-field (@size size) (@endian endian)) + + (define/augment (x:decode port . _) + (floating-point-bytes->real (read-bytes @size port) (eq? @endian 'be))) + + (define/augment (x:encode val . _) + (real->floating-point-bytes val @size (eq? @endian 'be))))) + +(define/contract (x:float [size-arg #f] + #:size [size-kwarg 4] + #:endian [endian system-endian] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:float%]) + (() + ((or/c exact-positive-integer? #false) + #:size exact-positive-integer? + #:endian endian-value? + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:float?))) + . ->* . + x:float?) + (define size (or size-arg size-kwarg)) + (unless (exact-positive-integer? size) + (raise-argument-error 'x:float% "exact positive integer" size)) + (new (generate-subclass base-class pre-proc post-proc) [size size] [endian endian])) + +(define float (x:float 4)) +(define floatbe (x:float 4 #:endian 'be)) +(define floatle (x:float 4 #:endian 'le)) + +(define double (x:float 8)) +(define doublebe (x:float 8 #:endian 'be)) +(define doublele (x:float 8 #:endian 'le)) + +(define (x:fixed? x) (is-a? x x:fixed%)) + +(define x:fixed% + (class x:int% + (super-new) + (init-field [(@fracbits fracbits)]) + (inherit-field (@size size)) + (unless (exact-positive-integer? @fracbits) + (raise-argument-error 'x:fixed% "exact positive integer" @fracbits)) + (unless (<= @fracbits (* 8 @size)) + (raise-argument-error 'x:fixed% "fracbits no bigger than size bits" (list @fracbits @size))) + + (define fixed-shift (arithmetic-shift 1 @fracbits)) + + (define/override (post-decode int) + (exact-if-possible (/ int fixed-shift 1.0))) + + (define/override (pre-encode val) + (exact-if-possible (floor (* val fixed-shift)))))) + +(define/contract (x:fixed [size-arg #false] + #:size [size-kwarg 2] + #:signed [signed #true] + #:endian [endian system-endian] + #:fracbits [fracbits-arg #f] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:fixed%]) + (() + ((or/c exact-positive-integer? #false) + #:size exact-positive-integer? + #:endian endian-value? + #:fracbits exact-positive-integer? + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:fixed%))) + . ->* . + x:fixed?) + (define size (or size-arg size-kwarg)) + (unless (exact-positive-integer? size) + (raise-argument-error 'x:fixed "exact positive integer" size)) + (define fracbits (or fracbits-arg (/ (* size 8) 2))) + (unless (<= fracbits (* size 8)) + (raise-argument-error 'x:fixed "fracbits no bigger than size bits" fracbits)) + (new (generate-subclass base-class pre-proc post-proc) [size size] [signed signed] [endian endian] [fracbits fracbits])) + +(define fixed16 (x:fixed 2)) +(define fixed16be (x:fixed 2 #:endian 'be)) +(define fixed16le (x:fixed 2 #:endian 'le)) +(define fixed32 (x:fixed 4)) +(define fixed32be (x:fixed 4 #:endian 'be)) +(define fixed32le (x:fixed 4 #:endian 'le)) + +(module+ test + (require rackunit) + (define bs (encode fixed16be 123.45 #f)) + (check-equal? bs #"{s") + (check-equal? (ceiling (* (decode fixed16be bs) 100)) 12345.0)) + + +(define x:bigint + (x:list + #:type uint8 + #:length uint64 + #:pre-encode (λ (int) (for/fold ([int8s null] + [int int] + #:result int8s) + ([i (in-naturals)] + #:final (< int 256)) + (values (cons (bitwise-and int 255) int8s) + (arithmetic-shift int -8)))) + #:post-decode (λ (ints) (bytes->uint (apply bytes (reverse ints)))))) + +(module+ test + (define (bigint) (string->number (list->string (for/list ([i (in-range (random 10 30))]) + (integer->char (+ 48 (random 10))))))) + (for ([i (in-range 100)]) + (define int (bigint)) + (check-= int (decode x:bigint (encode x:bigint int #f)) 0))) + +(define x:exact + (x:list + x:bigint + #:length 2 + #:pre-encode (λ (exact) (list (numerator exact) (denominator exact))) + #:post-decode (λ (nd) (apply / nd)))) + +(module+ test + (define (exact) (/ (bigint) (bigint))) + (for ([i (in-range 100)]) + (define ex (exact)) + (check-= ex (decode x:exact (encode x:exact ex #f)) 0))) + +(define x:complex + (x:list + double + #:length 2 + #:pre-encode (λ (num) (list (real-part num) (imag-part num))) + #:post-decode (λ (ri) (+ (car ri) (* +i (cadr ri)))))) + +(module+ test + (define (complex) (+ (exact) (* +i (exact) 1.0) 1.0)) + (for ([i (in-range 100)]) + (define c (complex)) + (check-= c (decode x:complex (encode x:complex c #f)) 0.1))) \ No newline at end of file diff --git a/xenomorph/xenomorph/optional.rkt b/xenomorph/xenomorph/optional.rkt new file mode 100644 index 00000000..0e83859c --- /dev/null +++ b/xenomorph/xenomorph/optional.rkt @@ -0,0 +1,66 @@ +#lang racket/base +(require "base.rkt" + racket/class + racket/match + racket/contract) +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/Optional.coffee +|# + +(define x:optional% + (class x:base% + (super-new) + (init-field [(@type type)] [(@condition condition)]) + + (unless (xenomorphic? @type) + (raise-argument-error 'x:optional "xenomorphic type" @type)) + + (define (resolve-condition parent) + (match @condition + [(? procedure? proc) (proc parent)] + [val val])) + + (define/augment (x:decode port parent) + (when (resolve-condition parent) + (send @type x:decode port parent))) + + (define/augment (x:encode val port [parent #f]) + (when (resolve-condition parent) + (send @type x:encode val port parent))) + + (define/augment (x:size [val #f] [parent #f]) + (if (resolve-condition parent) (send @type x:size val parent) 0)))) + +(define no-val (gensym)) + +(define (x:optional? x) (is-a? x x:optional%)) + +(define/contract (x:optional + [type-arg #f] + [cond-arg no-val] + #:type [type-kwarg #f] + #:condition [cond-kwarg no-val] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:optional%]) + (() + ((or/c xenomorphic? #false) + any/c + #:type (or/c xenomorphic? #false) + #:condition any/c + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:optional%))) + . ->* . + x:optional?) + (define type (or type-arg type-kwarg)) + (unless (xenomorphic? type) + (raise-argument-error 'x:optional "xenomorphic type" type)) + (define condition (cond + [(and (eq? cond-arg no-val) (eq? cond-kwarg no-val)) #true] + [(not (eq? cond-arg no-val)) cond-arg] + [(not (eq? cond-kwarg no-val)) cond-kwarg])) + (new (generate-subclass base-class pre-proc post-proc) [type type] [condition condition])) diff --git a/xenomorph/xenomorph/pointer.rkt b/xenomorph/xenomorph/pointer.rkt new file mode 100644 index 00000000..d088b4d1 --- /dev/null +++ b/xenomorph/xenomorph/pointer.rkt @@ -0,0 +1,164 @@ +#lang racket/base +(require "base.rkt" + "number.rkt" + racket/dict + racket/class + racket/promise + racket/match + racket/contract + sugar/unstable/dict) +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/Pointer.coffee +|# + +(define valid-pointer-relatives '(local immediate parent global)) + +(define (pointer-relative-value? x) + (and (symbol? x) (memq x valid-pointer-relatives))) + +(define (find-top-parent parent) + (cond + [(hash-ref parent x:parent-key #f) => find-top-parent] + [else parent])) + +(define (resolve-pointer type val) + (cond + [type (values type val)] + [(xvoid-pointer? val) (values (xvoid-pointer-type val) (xvoid-pointer-value val))] + [else (raise-argument-error 'x:pointer "VoidPointer" val)])) + +(define x:pointer% + (class x:base% + (super-new) + (init-field [(@ptr-val-type ptr-type)] + [(@dest-type dest-type)] + [(@pointer-relative-to pointer-relative-to)] + [(@allow-null? allow-null?)] + [(@null-value null-value)] + [(@pointer-lazy? pointer-lazy?)]) + + (define/augride (x:decode port parent) + (define offset (send @ptr-val-type x:decode port parent)) + (cond + [(and @allow-null? (= offset @null-value)) #false] ; handle null pointers + [else + (define relative (+ (case @pointer-relative-to + [(local) (hash-ref parent x:start-offset-key)] + [(immediate) (- (pos port) (send @ptr-val-type x:size))] + [(parent) (hash-ref (hash-ref parent x:parent-key) x:start-offset-key)] + [(global) (or (hash-ref (find-top-parent parent) x:start-offset-key) 0)] + [else (error 'unknown-pointer-style)]))) + (define ptr (+ offset relative)) + (cond + [@dest-type (define (decode-value) + (define orig-pos (pos port)) + (pos port ptr) + (begin0 + (send @dest-type x:decode port parent) + (pos port orig-pos))) + (if @pointer-lazy? (delay (decode-value)) (decode-value))] + [else ptr])])) + + (define/augride (x:encode val-in port [parent #f]) + (unless parent ; todo: furnish default pointer context? adapt from Struct? + (raise-argument-error 'xpointer-encode "valid pointer context" parent)) + (cond + [val-in + (define new-parent (case @pointer-relative-to + [(local immediate) parent] + [(parent) (hash-ref parent x:parent-key)] + [(global) (find-top-parent parent)] + [else (error 'unknown-pointer-style)])) + (define relative (+ (case @pointer-relative-to + [(local parent) (hash-ref new-parent x:start-offset-key)] + [(immediate) (+ (pos port) (send @ptr-val-type x:size val-in parent))] + [(global) 0]))) + (send @ptr-val-type x:encode (- (hash-ref new-parent x:pointer-offset-key) relative) port) + (define-values (type val) (resolve-pointer @dest-type val-in)) + (hash-update! new-parent x:pointers-key + (λ (ptrs) (append ptrs (list (x:ptr type val parent))))) + (hash-set! new-parent x:pointer-offset-key + (+ (hash-ref new-parent x:pointer-offset-key) (send type x:size val parent)))] + [else (send @ptr-val-type x:encode @null-value port)])) + + (define/augride (x:size [val-in #f] [parent #f]) + (define new-parent (case @pointer-relative-to + [(local immediate) parent] + [(parent) (hash-ref parent x:parent-key)] + [(global) (find-top-parent parent)] + [else (error 'unknown-pointer-style)])) + (define-values (type val) (resolve-pointer @dest-type val-in)) + (when (and val new-parent) + (hash-set! new-parent x:pointer-size-key + (and (hash-ref new-parent x:pointer-size-key #f) + (+ (hash-ref new-parent x:pointer-size-key) (send type x:size val new-parent))))) + (send @ptr-val-type x:size)))) + +#| +The arguments here are renamed slightly compared to the original. + +offsetType => offset-type +The type of the thing the pointer points to. + +type => type +The type of the pointer value itself. + +options.type => relative-to +The reference point of the pointer value (local, immediate, parent, global). It was confusing to have two things named `type`, however. + +relativeTo => [not supported] +This allows the pointer to be calculated relative to a property on the parent. I saw no use for this, so I dropped it. +|# + +(define (x:pointer? x) (is-a? x x:pointer%)) + +(define/contract (x:pointer + [ptr-type-arg #f] + [dest-type-arg #f] + #:type [ptr-type-kwarg uint32] + #:dest-type [dest-type-kwarg uint8] + #:relative-to [pointer-relative-to 'local] + #:lazy [pointer-lazy? #f] + #:allow-null [allow-null? #t] + #:null [null-value 0] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:pointer%]) + (() + ( + (or/c x:int? #false) + (or/c xenomorphic? 'void #false) + #:type (or/c x:int? #false) + #:dest-type (or/c xenomorphic? 'void #false) + #:relative-to pointer-relative-value? + #:lazy boolean? + #:allow-null boolean? + #:null any/c + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:pointer%))) + . ->* . + x:pointer?) + (unless (pointer-relative-value? pointer-relative-to) + (raise-argument-error 'x:pointer (format "~v" valid-pointer-relatives) pointer-relative-to)) + (new (generate-subclass base-class pre-proc post-proc) + [ptr-type (or ptr-type-arg ptr-type-kwarg)] + [dest-type (match (or dest-type-arg dest-type-kwarg) + ['void #false] + [type-in type-in])] + [pointer-relative-to pointer-relative-to] + [pointer-lazy? pointer-lazy?] + [allow-null? allow-null?] + [null-value null-value])) + +;; A pointer whose type is determined at decode time +(define x:void-pointer% (class x:base% + (super-new) + (init-field type value))) +(define (x:void-pointer . args) (apply make-object x:void-pointer% args)) +(define (xvoid-pointer? x) (is-a? x x:void-pointer%)) +(define (xvoid-pointer-type x) (get-field type x)) +(define (xvoid-pointer-value x) (get-field value x)) diff --git a/xenomorph/xenomorph/reserved.rkt b/xenomorph/xenomorph/reserved.rkt new file mode 100644 index 00000000..3953f735 --- /dev/null +++ b/xenomorph/xenomorph/reserved.rkt @@ -0,0 +1,56 @@ +#lang racket/base +(require racket/class + racket/contract + "base.rkt" + "util.rkt") +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/Reserved.coffee +|# + +(define x:reserved% + (class x:base% + (super-new) + (init-field [(@type type)] [(@count count)]) + + (unless (xenomorphic? @type) + (raise-argument-error 'x:reserved "xenomorphic type" @type)) + + (define/augment (x:decode port parent) + (pos port (+ (pos port) (x:size #f parent))) + (void)) + + (define/augment (x:encode val port [parent #f]) + (make-bytes (x:size val parent) 0)) + + (define/augment (x:size [val #f] [parent #f]) + (* (send @type x:size) (resolve-length @count #f parent))))) + +(define (x:reserved? x) (is-a? x x:reserved%)) + +(define/contract (x:reserved [type-arg #f] + [count-arg #f] + #:type [type-kwarg #f] + #:count [count-kwarg 1] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:reserved%]) + (() + ((or/c xenomorphic? #false) + (or/c exact-positive-integer? #false) + #:type (or/c xenomorphic? #false) + #:count exact-positive-integer? + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:reserved%))) + . ->* . + x:reserved?) + (define type (or type-arg type-kwarg)) + (unless (xenomorphic? type) + (raise-argument-error 'x:reserved "xenomorphic type" type)) + (define count (or count-arg count-kwarg)) + (new (generate-subclass base-class pre-proc post-proc) + [type type] + [count count])) \ No newline at end of file diff --git a/xenomorph/xenomorph/scribblings/xenomorph.scrbl b/xenomorph/xenomorph/scribblings/xenomorph.scrbl new file mode 100644 index 00000000..205452bc --- /dev/null +++ b/xenomorph/xenomorph/scribblings/xenomorph.scrbl @@ -0,0 +1,1865 @@ + #lang scribble/manual + +@(require scribble/eval (for-label racket/base racket/class racket/file racket/dict racket/list racket/stream racket/promise racket/vector xenomorph)) + +@(define my-eval (make-base-eval)) +@(my-eval '(require xenomorph racket/list racket/stream racket/vector)) + + +@title{Xenomorph: binary encoding & decoding} + +@author[(author+email "Matthew Butterick" "mb@mbtype.com")] + +@margin-note{This package is in development. I make no commitment to maintaining the public interface documented below.} + +@defmodule[xenomorph] + +Hands up: who likes working with binary formats? + +OK, just a few of you, in the back. You're free to go. + +As for everyone else: Xenomorph eases the pain of working with binary formats. Instead of laboriously counting bytes — + +@itemlist[#:style 'ordered +@item{You describe a binary format declaratively by using smaller ingredients — e.g., integers, strings, lists, pointers, dicts, and perhaps other nested encodings. This is known as a @deftech{xenomorphic object}.} + +@item{This xenomorphic object can then be used as a binary encoder, allowing you to convert Racket values to binary and write them out to a file.} + +@item{But wait, there's more: once defined, this xenomorphic object can @emph{also} be used as a binary decoder, reading bytes and parsing them into Racket values.} +] + +So one binary-format definition can be used for both input and output. Meanwhile, Xenomorph handles all the dull housekeeping of counting bytes (because somebody has to). + +This package is derived principally from Devon Govett's @link["https://github.com/devongovett/restructure"]{@tt{restructure}} library for Node.js. Thanks for doing the heavy lifting, dude. + + +@section{Installation} + +At the command line: + +@verbatim{raco pkg install xenomorph} + +After that, you can update the package from the command line: + +@verbatim{raco pkg update xenomorph} + +Invoke the library in a source file by importing it in the usual way: + +@verbatim{(require xenomorph)} + + + +@section{The big picture} + +@subsection{Bytes and byte strings} + +Suppose we have a file on disk. What's in the file? Without knowing anything else, we can at least say the file contains a sequence of @deftech{bytes}. A byte is the smallest unit of data storage. It's not, however, the smallest unit of information storage — that would be a @deftech{bit}. But when we read (or write) from disk (or other source, like memory), we work with bytes. A byte holds eight bits, so it can take on values between 0 and 255, inclusive. + +In Racket, a fixed-length array of bytes is also known as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string}. It prints as a series of values between quotation marks, prefixed with @litchar{#}: + +@racketblock[#"ABC"] + +Caution: though this looks similar to the ordinary string @racket["ABC"], we're better off thinking of it as a block of integers that are sometimes displayed as characters for convenience. For instance, the byte string above represents three bytes valued 65, 66, and 67. This byte string could also be written in hexadecimal like so: + +@(racketvalfont "#\"\\x41\\x42\\x43\"") + +Or octal like so: + +@(racketvalfont "#\"\\101\\102\\103\"") + +All three mean the same thing. (If you like, confirm this by trying them on the REPL.) + +We can also make an equivalent byte string with @racket[bytes]. As above, Racket doesn't care how we notate the values, as long as they're between 0 and 255: + +@examples[#:eval my-eval +(bytes 65 66 67) +(bytes (+ 31 34) (* 3 22) (- 100 33)) +(apply bytes (map char->integer '(#\A #\B #\C))) +] + +Byte values between 32 and 127 are printed as characters. Other values are printed in octal: + +@examples[#:eval my-eval +(bytes 65 66 67 154 206 255) +] + +If you think this printing convention is a little weird, I agree. But that's how Racket does it. + +If we prefer to deal with lists of integers, we can always use @racket[bytes->list] and @racket[list->bytes]: + +@examples[#:eval my-eval +(bytes->list #"ABC\232\316\377") +(list->bytes '(65 66 67 154 206 255)) +] + +The key point: the @litchar{#} prefix tells us we're looking at a byte string, not an ordinary string. + + +@subsection{Binary formats} + +Back to files. Files are classified as being either @deftech{binary} or @deftech{text}. (A distinction observed by Racket functions such as @racket[write-to-file].) When we speak of binary vs. text, we're saying something about the internal structure of the byte sequence — what values those bytes represent. We'll call this internal structure the @deftech{binary format} of the file. + +@margin-note{This internal structure is also called an @emph{encoding}. Here, however, I avoid using that term as a synonym for @tech{binary format}, because I prefer to reserve it for when we talk about encoding and decoding as operations on data.} + +@;{ +@subsubsection{Text encodings} + +Text files are a just a particular subset of binary files that use a @deftech{text encoding} — that is, a binary encoding that stores human-readable characters. + +But since we all have experience with text files, let's use text encoding as a way of starting to understand what's happening under the hood with binary encodings. + +For example, ASCII is a familiar encoding that stores each character in seven bits, so it can describe 128 distinct characters. Because every ASCII code is less than 255, we can store ASCII text with one byte per character. + +But if we want to use more than 128 distinct characters, we're stuck. That's why Racket instead uses the UTF-8 text encoding by default. UTF-8 uses between one and three bytes to encode each character, and can thus represent up to 1,112,064 distinct characters. We can see how this works by converting a string into an encoded byte sequence using @racket[string->bytes/utf-8]: + +@examples[#:eval my-eval +(string->bytes/utf-8 "ABCD") +(bytes->list (string->bytes/utf-8 "ABCD")) +(string->bytes/utf-8 "ABÇ战") +(bytes->list (string->bytes/utf-8 "ABÇ战")) +] + +For ASCII-compatible characters, UTF-8 uses one byte for each character. Thus, the string @racket["ABCD"] is four bytes long in UTF-8. + +Now consider the string @racket["ABÇ战"], which has four characters, but the second two aren't ASCII-compatible. In UTF-8, it's encoded as seven bytes: the first two characters are one byte each, the @racket["Ç"] takes two bytes, and the @racket["战"] takes three. + +Moreover, for further simplicity, text files typically rely on a small set of pre-defined encodings, like ASCII or UTF-8 or Latin-1, so that those who write programs that manipulate text only have to support a smallish set of encodings. + + +@subsubsection{Binary encodings} + + + +@subsubsection{In sum} + +Three corollaries follow: + +@itemlist[#:style 'ordered +@item{A given sequence of bytes can mean different things, depending on what encoding we use.} + +@item{We can only make sense of a sequence of bytes if we know its encoding.} + +@item{A byte sequence does not describe its own encoding.} + +] + +For those familiar with programming-language lingo, an encoding somewhat resembles a @deftech{grammar}, which is a tool for describing the syntactic structure of a program. A grammar doesn't describe one particular program. Rather, it describes all possible programs that are consistent with the grammar, and therefore can be used to parse any particular one. Likewise for an encoding. + +@margin-note{Can a grammar work as a binary encoding? In limited cases, but not enough to be practical. Most grammars have to assume the target program is context free, meaning that the grammar rules apply the same way everywhere. By contrast, binary files are nonrecursive and contextual.} + +} + + + +@;{ +@section{Quick tutorial} + +@examples[#:eval my-eval +(define four-ints (+ArrayT uint8 4)) + +(decode four-ints #"\1\2\3\4") +(decode four-ints #"\1\2\3") +(decode four-ints #"\1\2\3\4\5\6") + +(define op (open-output-string)) +(encode four-ints '(1 2 3 4) op) +(get-output-bytes op) + ] + + +} + +@section{Tutorials} + +@subsection{A binary format for complex numbers} + +Racket natively supports @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{complex numbers}. Suppose we want to encode these numbers in a binary format without losing precision. How would we do it? + +First, we need to understand Racket's recipe for a complex number: + +@itemlist[ +@item{A @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{complex number} has a @italic{real part} and an @italic{imaginary part}. The coeffiecient of each part is a @italic{real number}.} + +@item{A @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{real number} is either a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{inexact number} (that is, a @italic{floating-point number}) or an @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{exact number}.} + +@item{An @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{exact number} is a rational number — i.e., a number with a @italic{numerator} and @italic{denominator}.} + +@item{The @italic{numerator} and @italic{denominator} can each be an arbitrarily large signed integer, which we'll call a @italic{big integer} to distinguish it from fixed-size integers otherwise common in binary formats.} + +] + +To make a binary format for complex numbers, we build the format by composing smaller ingredients into bigger ones. So we'll work the recipe from bottom to top, composing our ingredients as we go. + +@subsubsection{Big integers} + +Let's start with the big integers. We can't use an existing signed-integer type like @racket[int32] because our big integers won't necessarily fit. For that matter, this also rules out any type derived from @racket[x:int%], because all xenomorphic integers have a fixed size. + +Instead, we need to use a variable-length type. How about an @racket[x:string]? If we don't specify a @racket[#:length] argument, it can be arbitrarily long. All we need to do is convert our number to a string before encoding (with @racket[number->string]) and then convert string to number after decoding (with @racket[string->number]). + + +@interaction[#:eval my-eval +(define bigint (x:string #:pre-encode number->string + #:post-decode string->number)) + +(define abigint (- (expt 2 80))) +abigint +(encode bigint abigint #f) +(decode bigint #"-1208925819614629174706176\0") +] + +@subsubsection{Exact numbers} + +Next, we handle exact numbers. An exact number is a combination of two big integers representing a numerator and a denominator. So in this case, we need a xenomorphic type that can store two values. How about an @racket[x:list]? The length of the list will be two, and the type of the list will be our new @racket[bigint] type. + +Similar to before, we use pre-encoding to convert our Racket value into an encodable shape. This time, we convert an exact number into a list of its @racket[numerator] and @racket[denominator]. After decoding, we take that list and convert its values back into an exact number (by using @racket[/]): + +@interaction[#:eval my-eval +(define exact (x:list #:type bigint + #:length 2 + #:pre-encode (λ (x) (list (numerator x) (denominator x))) + #:post-decode (λ (nd) (apply / nd)))) +(encode exact -1234/5678 #f) +(decode exact #"-617\0002839\0") +] + +@subsubsection{Real numbers} + +A real number is either a floating-point number (for which we can use Xenomorph's built-in @racket[float] type) or an exact number (for which we can use the @racket[exact] type we just defined). + +This time, we need an encoder that allows us to choose from among two possibilities. How about an @racket[x:versioned-dict]? We'll assign our exact numbers to version 0, and our floats to version 1. (These version numbers are arbitrary — we could pick any two values, but a small integer will fit inside a @racket[uint8].) + +We specify a @racket[#:version-key] of @racket['version]. Then in our pre-encode function, we choose the version of the encoding based on whether the input value is @racket[exact?]. + +@interaction[#:eval my-eval +(define real (x:versioned-dict + #:type uint8 + #:version-key 'version + #:versions + (list + (cons 0 (list (cons 'val exact))) + (cons 1 (list (cons 'val float)))) + #:pre-encode (λ (num) (list (cons 'val num) + (cons 'version (if (exact? num) + 0 + 1)))) + #:post-decode (λ (h) (hash-ref h 'val)))) +(encode real 123.45 #f) +(decode real #"\1f\346\366B") +(encode real -1/16 #f) +(decode real #"\0-1\00016\0") +] + +Notice that the float loses some precision during the encoding & decoding process. This is a natural part of how floating-point numbers work — they are called @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{inexact numbers} for this reason — so this is a feature, not a bug. + +@subsubsection{Complex numbers} + +Now we put it all together. A complex number is a combination of a real part and an imaginary part, each of which has a real coefficient. Therefore, we can model a complex number in a binary format just like we did for exact numbers: as a list of two values. + +Once again, we use a pre-encoder and post-decoder to massage the data. On the way in, the pre-encoder turns the complex number into a list of real-number coefficients with @racket[real-part] and @racket[imag-part]. On the way out, these coefficients are reformed into a complex number through some easy addition and multiplication. + + +@interaction[#:eval my-eval +(define complex (x:list #:type real + #:length 2 + #:pre-encode (λ (num) (list (real-part num) (imag-part num))) + #:post-decode (λ (ri) (+ (first ri) (* +i (second ri)))))) +(encode complex 123.45-6.789i #f) +(decode complex #"\1f\346\366B\1}?\331\300") +(encode complex 1/234-5/678i #f) +(decode complex #"\0001\000234\0\0-5\000678\0") +] + + + +@section{Main interface} + +@defproc[ +(xenomorphic? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:base%]. +} + +@defproc[ +(decode +[xenomorphic-obj xenomorphic?] +[byte-source (or/c bytes? input-port?) (current-input-port)] +[#:parent parent (or/c xenomorphic? #false) #false] +[arg any/c] ...) +any/c]{ +Read bytes from @racket[byte-source] and convert them to a Racket value using @racket[xenomorphic-obj] as the decoder. + +If @racket[byte-source] contains more bytes than @racket[xenomorphic-obj] needs to decode a value, it reads as many bytes as necessary and leaves the rest. +} + +@defproc[ +(encode +[xenomorphic-obj xenomorphic?] +[val any/c] +[byte-dest (or/c output-port? #false) (current-output-port)] +[#:parent parent (or/c xenomorphic? #false) #false] +[arg any/c] ... +) +(or/c void? bytes?)]{ +Convert @racket[val] to bytes using @racket[xenomorphic-obj] as the encoder. + +If @racket[byte-dest] is an @racket[output-port?], the bytes are written there and the return value is @racket[(void)]. If @racket[byte-dest] is @racket[#false], the encoded byte string is the return value. + +If @racket[val] does not match the @racket[xenomorphic-obj] type appropriately — for instance, you try to @racket[encode] a negative integer using an unsigned integer type like @racket[uint8] — then an error will arise. +} + + +@section{Core xenomorphic objects} + +These basic xenomorphic objects can be used on their own, or combined to make bigger xenomorphic objects. + +Note on naming: the main xenomorphic objects have an @litchar{x:} prefix to distinguish them from (and prevent name collisions with) the ordinary Racket thing (for instance, @racket[x:list] vs. @racket[list]). Other xenomorphic objects (like @racket[uint8]) don't have this prefix, because it seems unnecessary and therefore laborious. + +@defclass[x:base% object% ()]{ + +When making your own xenomorphic objects, usually you'll want to stick together existing core objects, or inherit from one of those classes. Inheriting from @racket[x:base%] is also allowed, but you have to do all the heavy lifting. + +@defmethod[ +#:mode pubment +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)] +[args any/c] ...) +any/c]{ +Read bytes from @racket[input-port] and convert them into a Racket value. Called by @racket[decode]. +} + +@defmethod[ +(post-decode +[val any/c]) +any/c]{ +Hook for post-processing on @racket[val] after it's returned by @racket[x:decode] but before it's returned by @racket[decode]. +} + +@defmethod[ +#:mode pubment +(x:encode +[val any/c] +[output-port output-port?] +[parent (or/c xenomorphic? #false)] +[args any/c] ...) +bytes?]{ +Convert a value into a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string} which is written to @racket[output-port]. Called by @racket[encode]. +} + +@defmethod[ +(pre-encode +[val any/c]) +any/c]{ +Hook for pre-processing on @racket[val] after it's passed to @racket[encode] but before it's passed to @racket[x:encode]. +} + +@defmethod[ +#:mode pubment +(x:size +[val any/c] +[parent (or/c xenomorphic? #false)] +[args any/c] ...) +exact-nonnegative-integer?]{ +The length of the @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string} that @racket[val] would produce if it were encoded using @racket[x:encode]. Called by @racket[size]. +} + +} + + + +@subsection{Numbers} + +@defmodule[xenomorph/number] + + +@subsubsection{Little endian vs. big endian} + +When an integer is more than one byte long, one has to consider how the bytes are ordered. If the byte representing the lowest 8 bits appears first, it's known as @emph{little endian} byte ordering. If this byte appears last, it's called @emph{big endian} byte ordering. + +For example, the integer 1 in 32-bit occupies four bytes. In little endian, the bytes would be in increasing order, or @racket[#"\1\0\0\0"]. In big endian, the bytes would be in decreasing order, or @racket[#"\0\0\0\1"]. + +When encoding and decoding binary formats, one has to be consistent about endianness, because it will change the meaning of the binary value. For instance, if we inadvertently treated the big-endian byte string @racket[#"\0\0\0\1"] as little endian, we'd get the result @racket[16777216] instead of the expected @racket[1]. + + +@defproc[ +(endian-value? +[val any/c]) +boolean?]{ +Whether @racket[val] is either @racket['be] (representing big endian) or @racket['le] (representing little endian). +} + + +@defthing[system-endian endian-value?]{ +The endian value of the current system. Big endian is represented as @racket['be] and little endian as @racket['le]. This can be used as an argument for classes that inherit from @racket[x:number%]. + +Use this value carefully, however. Binary formats are usually defined using one endian convention or the other (so that data can be exchanged among machines regardless of the endianness of the underlying system). + +} + +@defclass[x:number% x:base% ()]{ + +@defconstructor[ +([size exact-positive-integer?] +[signed? boolean?] +[endian endian-value?])]{ +Create class instance that represents a binary number format @racket[size] bytes long, either @racket[signed?] or not, with @racket[endian] byte ordering. The endian arugment can be @racket[system-endian]. +} + +} + +@subsubsection{Integers} + +@defclass[x:int% x:number% ()]{ +Base class for integer formats. Use @racket[x:int] to conveniently instantiate new integer formats. +} + +@defproc[ +(x:int? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:int%]. +} + +@defproc[ +(x:int +[size-arg (or/c exact-positive-integer? #false) #false] +[#:size size-kw exact-positive-integer? 2] +[#:signed signed boolean? #true] +[#:endian endian endian-value? system-endian] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:int%)) x:int%] +) +x:int?]{ +Generate an instance of @racket[x:int%] (or a subclass of @racket[x:int%]) with certain optional attributes. + +@racket[size-arg] or @racket[size-kw] (whichever is provided, though @racket[size-arg] takes precedence) controls the encoded size. + +@racket[signed] controls whether the integer is signed or unsigned. + +@racket[endian] controls the byte-ordering convention. + +@racket[pre-encode-proc] and @racket[post-decode-proc] control the pre-encoding and post-decoding procedures, respectively. Each takes as input the value to be processed and returns a new value. + +@racket[base-class] controls the class used for instantiation of the new object. +} + +@deftogether[ +(@defthing[int8 x:int?] +@defthing[int16 x:int?] +@defthing[int24 x:int?] +@defthing[int32 x:int?] +@defthing[int64 x:int?] +@defthing[uint8 x:int?] +@defthing[uint16 x:int?] +@defthing[uint24 x:int?] +@defthing[uint32 x:int?] +@defthing[uint64 x:int?]) +]{ +The common integer types, using @racket[system-endian] endianness. The @racket[u] prefix indicates unsigned. The numerical suffix indicates bit length. + +Use these carefully, however. Binary formats are usually defined using one endian convention or the other (so that data can be exchanged among machines regardless of the endianness of the underlying system). + +@examples[#:eval my-eval +(encode int8 1 #f) +(encode int16 1 #f) +(encode int24 1 #f) +(encode int32 1 #f) +(encode int64 1 #f) +(encode int8 -128 #f) +(encode int16 -128 #f) +(encode int24 -128 #f) +(encode int32 -128 #f) +(encode int64 -128 #f) +(encode uint8 1 #f) +(encode uint16 1 #f) +(encode uint24 1 #f) +(encode uint32 1 #f) +(encode uint64 1 #f) +(code:comment @#,t{negative numbers cannot be encoded as unsigned ints, of course}) +(encode uint8 -128 #f) +(encode uint16 -128 #f) +(encode uint24 -128 #f) +(encode uint32 -128 #f) +(encode uint64 -128 #f) +(decode int8 #"1" #f) +(decode int16 #"10" #f) +(decode int24 #"100" #f) +(decode int32 #"1000" #f) +(decode int64 #"10000000" #f) +(decode uint8 #"1" #f) +(decode uint16 #"10" #f) +(decode uint24 #"100" #f) +(decode uint32 #"1000" #f) +(decode uint64 #"10000000" #f) +] + +} + + +@deftogether[ +(@defthing[int8be x:int?] +@defthing[int16be x:int?] +@defthing[int24be x:int?] +@defthing[int32be x:int?] +@defthing[int64be x:int?] +@defthing[uint8be x:int?] +@defthing[uint16be x:int?] +@defthing[uint24be x:int?] +@defthing[uint32be x:int?] +@defthing[uint64be x:int?]) +]{ +Big-endian versions of the common integer types. The @racket[u] prefix indicates unsigned. The numerical suffix indicates bit length. @racket[int8be] and @racket[uint8be] are included for consistency, but as one-byte types, they are not affected by endianness. + + +@examples[#:eval my-eval +(encode int8be 1 #f) +(encode int16be 1 #f) +(encode int24be 1 #f) +(encode int32be 1 #f) +(encode int64be 1 #f) +(encode int8be -128 #f) +(encode int16be -128 #f) +(encode int24be -128 #f) +(encode int32be -128 #f) +(encode int64be -128 #f) +(encode uint8be 1 #f) +(encode uint16be 1 #f) +(encode uint24be 1 #f) +(encode uint32be 1 #f) +(encode uint64be 1 #f) +(decode int8be #"1" #f) +(decode int16be #"10" #f) +(decode int24be #"100" #f) +(decode int32be #"1000" #f) +(decode int64be #"10000000" #f) +(decode int8be #"1" #f) +(decode int16be #"10" #f) +(decode int24be #"100" #f) +(decode int32be #"1000" #f) +(decode int64be #"10000000" #f) +(decode uint8be #"1" #f) +(decode uint16be #"10" #f) +(decode uint24be #"100" #f) +(decode uint32be #"1000" #f) +(decode uint64be #"10000000" #f) +] + +} + +@deftogether[ +(@defthing[int8le x:int?] +@defthing[int16le x:int?] +@defthing[int24le x:int?] +@defthing[int32le x:int?] +@defthing[int64le x:int?] +@defthing[uint8le x:int?] +@defthing[uint16le x:int?] +@defthing[uint24le x:int?] +@defthing[uint32le x:int?] +@defthing[uint64le x:int?]) +]{ +Little-endian versions of the common integer types. The @racket[u] prefix indicates unsigned. The numerical suffix indicates bit length. @racket[int8le] and @racket[uint8le] are included for consistency, but as one-byte types, they are not affected by endianness. + +@examples[#:eval my-eval +(encode int8le 1 #f) +(encode int16le 1 #f) +(encode int24le 1 #f) +(encode int32le 1 #f) +(encode int64le 1 #f) +(encode int8le -128 #f) +(encode int16le -128 #f) +(encode int24le -128 #f) +(encode int32le -128 #f) +(encode int64le -128 #f) +(encode uint8le 1 #f) +(encode uint16le 1 #f) +(encode uint24le 1 #f) +(encode uint32le 1 #f) +(encode uint64le 1 #f) +(decode int8le #"1" #f) +(decode int16le #"10" #f) +(decode int24le #"100" #f) +(decode int32le #"1000" #f) +(decode int64le #"10000000" #f) +(decode uint8le #"1" #f) +(decode uint16le #"10" #f) +(decode uint24le #"100" #f) +(decode uint32le #"1000" #f) +(decode uint64le #"10000000" #f) +] +} + + +@subsubsection{Floats} + +@defclass[x:float% x:number% ()]{ +Base class for floating-point number formats. By convention, all floats are signed. Use @racket[x:float] to conveniently instantiate new floating-point number formats. +} + +@defproc[ +(x:float? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:float%]. +} + +@defproc[ +(x:float +[size-arg (or/c exact-positive-integer? #false) #false] +[#:size size-kw exact-positive-integer? 2] +[#:endian endian endian-value? system-endian] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:float%)) x:float%] +) +x:int?]{ +Generate an instance of @racket[x:float%] (or a subclass of @racket[x:float%]) with certain optional attributes. + +@racket[size-arg] or @racket[size-kw] (whichever is provided, though @racket[size-arg] takes precedence) controls the encoded size. + +@racket[endian] controls the byte-ordering convention. + +@racket[pre-encode-proc] and @racket[post-decode-proc] control the pre-encoding and post-decoding procedures, respectively. Each takes as input the value to be processed and returns a new value. + +@racket[base-class] controls the class used for instantiation of the new object. +} + +@deftogether[ +(@defthing[float x:float?] +@defthing[floatbe x:float?] +@defthing[floatle x:float?]) +]{ +The common 32-bit floating-point types. They differ in byte-ordering convention: @racket[floatbe] uses big endian, @racket[floatle] uses little endian, @racket[float] uses @racket[system-endian]. + +@examples[#:eval my-eval +(encode float 123.456 #f) +(encode floatbe 123.456 #f) +(encode floatle 123.456 #f) +(decode float #"y\351\366B" #f) +(decode floatbe #"y\351\366B" #f) +(decode floatle #"y\351\366B" #f) +] +} + +@deftogether[ +(@defthing[double x:float?] +@defthing[doublebe x:float?] +@defthing[doublele x:float?]) +]{ +The common 64-bit floating-point types. They differ in byte-ordering convention: @racket[doublebe] uses big endian, @racket[doublele] uses little endian, @racket[double] uses @racket[system-endian]. + +@examples[#:eval my-eval +(encode double 123.456 #f) +(encode doublebe 123.456 #f) +(encode doublele 123.456 #f) +(decode double #"w\276\237\32/\335^@" #f) +(decode doublebe #"w\276\237\32/\335^@" #f) +(decode doublele #"w\276\237\32/\335^@" #f) +] +} + + +@subsubsection{Fixed-point numbers} + + +@defclass[x:fixed% x:int% ()]{ +Base class for fixed-point number formats. Use @racket[x:fixed] to conveniently instantiate new fixed-point number formats. + +@defconstructor[ +([size exact-positive-integer?] +[signed? boolean?] +[endian endian-value?] +[fracbits exact-positive-integer?])]{ +Create class instance that represents a fixed-point number format @racket[size] bytes long, either @racket[signed?] or not, with @racket[endian] byte ordering and @racket[fracbits] of precision. +} + +} + +@defproc[ +(x:fixed? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:fixed%]. +} + +@defproc[ +(x:fixed +[size-arg (or/c exact-positive-integer? #false) #false] +[#:size size-kw exact-positive-integer? 2] +[#:endian endian endian-value? system-endian] +[#:fracbits fracbits (or/c exact-positive-integer? #false) (/ (* _size 8) 2)] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:fixed%)) x:fixed%] +) +x:int?]{ +Generate an instance of @racket[x:fixed%] (or a subclass of @racket[x:fixed%]) with certain optional attributes. + +@racket[size-arg] or @racket[size-kw] (whichever is provided, though @racket[size-arg] takes precedence) controls the encoded size. Defaults to @racket[2]. + +@racket[endian] controls the byte-ordering convention. + +@racket[fracbits] controls the number of bits of precision. If no value or @racket[#false] is passed, defaults to @racket[(/ (* _size 8) 2)]. + +@racket[pre-encode-proc] and @racket[post-decode-proc] control the pre-encoding and post-decoding procedures, respectively. Each takes as input the value to be processed and returns a new value. + +@racket[base-class] controls the class used for instantiation of the new object. +} + +@deftogether[ +(@defthing[fixed16 x:fixed?] +@defthing[fixed16be x:fixed?] +@defthing[fixed16le x:fixed?]) +]{ +The common 16-bit fixed-point number types with 2 bits of precision. They differ in byte-ordering convention: @racket[fixed16be] uses big endian, @racket[fixed16le] uses little endian, @racket[fixed16] uses @racket[system-endian]. + +Note that because of the limited precision, the byte encoding is possibly lossy (meaning, if you @racket[encode] and then @racket[decode], you may not get exactly the same number back). + +@examples[#:eval my-eval +(encode fixed16 123.45 #f) +(encode fixed16be 123.45 #f) +(encode fixed16le 123.45 #f) +(decode fixed16 #"s{" #f) +(decode fixed16be #"s{" #f) +(decode fixed16le #"s{" #f) +] +} + +@deftogether[ +(@defthing[fixed32 x:fixed?] +@defthing[fixed32be x:fixed?] +@defthing[fixed32le x:fixed?]) +]{ +The common 32-bit fixed-point number types with 4 bits of precision. They differ in byte-ordering convention: @racket[fixed32be] uses big endian, @racket[fixed32le] uses little endian, @racket[fixed32] uses @racket[system-endian]. + +Note that because of the limited precision, the byte encoding is possibly lossy (meaning, if you @racket[encode] and then @racket[decode], you may not get exactly the same number back). + +@examples[#:eval my-eval +(encode fixed32 123.45 #f) +(encode fixed32be 123.45 #f) +(encode fixed32le 123.45 #f) +(decode fixed32 #"3s{\0" #f) +(decode fixed32be #"3s{\0" #f) +(decode fixed32le #"3s{\0" #f) +] +} + + + +@subsection{Strings} + +@defmodule[xenomorph/string] + +Good old strings. + +@defproc[ +(supported-encoding? +[x any/c]) +boolean?]{ +Whether @racket[x] represents a supported encoding: either @racket['ascii] or @racket['utf8]. +} + +@defclass[x:string% x:base% ()]{ +Base class for string formats. Use @racket[x:string] to conveniently instantiate new string formats. + +@defconstructor[ +([len length-resolvable?] +[encoding (or/c procedure? supported-encoding?)])]{ +Create class instance that represents a string format of length @racket[len]. If @racket[len] is an integer, the string is fixed at that length, otherwise it can be any length. +} + +@defmethod[ +#:mode extend +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +string?]{ +Returns a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{string}. +} + +@defmethod[ +#:mode extend +(x:encode +[val any/c] +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +bytes?]{ +Take a @racket[val], convert it to a string if needed, and encode it as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string}. If @racket[_len] is a @racket[xenomorphic?] object, the length is encoded at the beginning of the string using that object as the encoder. +} + +} + +@defproc[ +(x:string? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:string%]. +} + +@defproc[ +(x:string +[len-arg (or/c length-resolvable? #false) #false] +[enc-arg (or/c procedure? supported-encoding? #false) #false] +[#:length len-kw (or/c length-resolvable? #false) #false] +[#:encoding enc-kw (or/c procedure? supported-encoding? #false) 'utf8] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:string%)) x:string%] +) +x:string?]{ +Generate an instance of @racket[x:string%] (or a subclass of @racket[x:string%]) with certain optional attributes. + +@racket[len-arg] or @racket[len-kw] (whichever is provided, though @racket[len-arg] takes precedence) determines the maximum length in bytes of the encoded string. + +@itemlist[ +@item{If this argument is an integer, the string is limited to that length. The length is not directly encoded.} + +@item{If it's a @racket[xenomorphic?] type, the length is variable, but limited to the size that can be represented by that type. For instance, if @racket[len-arg] is @racket[uint8], then the string can be a maximum of 255 bytes. The length is encoded at the beginning of the byte string.} + +@item{If it's another value, like @racket[#f], the string has variable length, and is null-terminated.} +] + +@racket[enc-arg] or @racket[enc-kw] (whichever is provided, though @racket[enc-arg] takes precedence) determines the encoding of the string. Default is @racket['utf8]. See also @racket[supported-encoding?]. + + +@examples[#:eval my-eval +(define any-ascii (x:string #f 'ascii)) +(encode any-ascii "ABC" #f) +(decode any-ascii #"ABC\0") +(decode any-ascii #"ABC\0DEF") +(decode any-ascii #"AB") +(define three-ascii (x:string 3 'ascii)) +(encode three-ascii "ABC" #f) +(encode three-ascii "ABCD" #f) +(encode three-ascii "ABÜ" #f) +(decode three-ascii #"ABC") +(decode three-ascii #"ABCD") +(decode three-ascii (string->bytes/utf-8 "ABÜ")) +(define 256-utf8 (x:string uint8 'utf8)) +(encode 256-utf8 "ABC" #f) +(encode 256-utf8 "ABCD" #f) +(encode 256-utf8 "ABÜ" #f) +(encode 256-utf8 (make-string 256 #\A) #f) +] + +@racket[pre-encode-proc] and @racket[post-decode-proc] control the pre-encoding and post-decoding procedures, respectively. Each takes as input the value to be processed and returns a new value. + +@racket[base-class] controls the class used for instantiation of the new object. + + +@examples[#:eval my-eval +(define (doubler str) (string-append str str)) +(define quad-str (x:string uint32be +#:pre-encode doubler +#:post-decode doubler)) +(encode quad-str "ABC" #f) +(decode quad-str #"\0\0\0\6ABCABC") +] + +} + +@subsection{Symbols} + +@defmodule[xenomorph/symbol] + +Under the hood, just a wrapper around the @racket[x:string%] class. + +@defclass[x:symbol% x:string% ()]{ +Base class for symbol formats. Use @racket[x:symbol] to conveniently instantiate new symbol formats. + +@defconstructor[ +([len length-resolvable?] +[encoding (or/c procedure? supported-encoding?)])]{ +Create class instance that represents a symbol format of length @racket[len]. If @racket[len] is an integer, the symbol is fixed at that length, otherwise it can be any length. +} + +@defmethod[ +#:mode extend +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +symbol?]{ +Returns a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{symbol}. +} + +@defmethod[ +#:mode extend +(x:encode +[val any/c] +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +bytes?]{ +Take a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{sequence} @racket[seq] of @racket[_type] items and encode it as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string}. +} + +} + +@defproc[ +(x:symbol? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:symbol%]. +} + +@defproc[ +(x:symbol +[len-arg (or/c length-resolvable? #false) #false] +[enc-arg (or/c procedure? supported-encoding? #false) #false] +[#:length len-kw (or/c length-resolvable? #false) #false] +[#:encoding enc-kw (or/c procedure? supported-encoding? #false) 'utf8] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:symbol%)) x:symbol%] +) +x:symbol?]{ +Generate an instance of @racket[x:symbol%] (or a subclass of @racket[x:symbol%]) with certain optional attributes, which are the same as @racket[x:string]. + +@examples[#:eval my-eval +(define any-ascii (x:symbol #f 'ascii)) +(encode any-ascii 'ABC #f) +(decode any-ascii #"ABC\0") +(decode any-ascii #"ABC\0DEF") +(decode any-ascii #"AB") +(define three-ascii (x:symbol 3 'ascii)) +(encode three-ascii 'ABC #f) +(encode three-ascii 'ABCD #f) +(encode three-ascii 'ABÜ #f) +(decode three-ascii #"ABC") +(decode three-ascii #"ABCD") +(decode three-ascii (string->bytes/utf-8 "ABÜ")) +(define 256-utf8 (x:symbol uint8 'utf8)) +(encode 256-utf8 'ABC #f) +(encode 256-utf8 'ABCD #f) +(encode 256-utf8 'ABÜ #f) +(encode 256-utf8 (make-string 256 #\A) #f) +(define (doubler sym) +(string->symbol (format "~a~a" sym sym))) +(define quad-str (x:symbol uint32be +#:pre-encode doubler +#:post-decode doubler)) +(encode quad-str "ABC" #f) +(decode quad-str #"\0\0\0\6ABCABC") +] + +} + + +@subsection{Lists} + +@defmodule[xenomorph/list] + +Lists in Xenomorph have a @emph{type} and maybe a @emph{length}. Every element in the list must have the same type. The list can have a specific length, but it doesn't need to (in which case the length is encoded as part of the data). + +If you want to store items of different types in a single Xenomorph list, wrap them in @secref{Pointers} so they have a uniform type. + +@defclass[x:list% x:base% ()]{ +Base class for list formats. Use @racket[x:list] to conveniently instantiate new list formats. + +@defconstructor[ +([type xenomorphic?] +[len length-resolvable?] +[count-bytes? boolean?])]{ +Create class instance that represents a list format with elements of type @racket[type]. If @racket[len] is an integer, the list is fixed at that length, otherwise it can be any length. +} + +@defmethod[ +#:mode extend +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +list?]{ +Returns a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{list} of values whose length is @racket[_len] and where each value is @racket[_type]. +} + +@defmethod[ +#:mode extend +(x:encode +[seq sequence?] +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +bytes?]{ +Take a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{sequence} @racket[seq] of @racket[_type] items and encode it as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string}. +} + +} + +@defproc[ +(x:list? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:list%]. +} + +@defproc[ +(x:list +[type-arg (or/c xenomorphic? #false) #false] +[len-arg (or/c length-resolvable? #false) #false] +[#:type type-kw (or/c xenomorphic? #false) #false] +[#:length len-kw (or/c length-resolvable? #false) #false] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:list%)) x:list%] +) +x:list?]{ +Generate an instance of @racket[x:list%] (or a subclass of @racket[x:list%]) with certain optional attributes. + +@racket[type-arg] or @racket[type-kw] (whichever is provided, though @racket[type-arg] takes precedence) determines the type of the elements in the list. + +@racket[len-arg] or @racket[len-kw] (whichever is provided, though @racket[len-arg] takes precedence) determines the length of the list. This can be an ordinary integer, but it can also be any value that is @racket[length-resolvable?]. + +@examples[#:eval my-eval +(define three-uint8s (x:list uint8 3)) +(encode three-uint8s '(1 2 3) #f) +(encode three-uint8s (string->bytes/utf-8 "ABC") #f) +(encode three-uint8s '(1 2 3 4) #f) +(encode three-uint8s '(1000 2000 3000) #f) +(encode three-uint8s '(A B C) #f) +(decode three-uint8s #"\1\2\3") +(decode three-uint8s #"\1\2\3\4") +(decode three-uint8s #"\1\2") +(define <256-uint8s (x:list #:type uint8 #:length uint8)) +(encode <256-uint8s '(1 2 3) #f) +(encode <256-uint8s (make-list 500 1) #f) +(decode <256-uint8s #"\3\1\2\3") +(decode <256-uint8s #"\3\1\2\3\4") +(decode <256-uint8s #"\3\1\2") +(define nested (x:list #:type <256-uint8s #:length uint8)) +(encode nested '((65) (66 66) (67 67 67)) #f) +(decode nested #"\3\1A\2BB\3CCC") +] + +@racket[pre-encode-proc] and @racket[post-decode-proc] control the pre-encoding and post-decoding procedures, respectively. Each takes as input the value to be processed and returns a new value. + +@racket[base-class] controls the class used for instantiation of the new object. + + +@examples[#:eval my-eval +(define (doubler xs) (append xs xs)) +(define quad-list (x:list uint16be +#:pre-encode doubler +#:post-decode doubler)) +(encode quad-list '(1 2 3) #f) +(decode quad-list #"\0\1\0\2\0\3\0\1\0\2\0\3") +] +} + + +@subsection{Streams} + +@defmodule[xenomorph/stream] + +Under the hood, just a wrapper around the @racket[x:list%] class that produces a stream rather than a list. + +The distinguishing feature of a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{stream} is that the evaluation is lazy: elements are only decoded as they are requested (and then they are cached for subsequent use). Therefore, a Xenomorph stream is a good choice when you don't want to incur the costs of decoding every element immediately (as you will when you use @secref{Lists}). + +@defclass[x:stream% x:list% ()]{ +Base class for stream formats. Use @racket[x:stream] to conveniently instantiate new stream formats. + +@defconstructor[ +([type xenomorphic?] +[len length-resolvable?] +[count-bytes? boolean?])]{ +Create class instance that represents a stream format with elements of type @racket[type]. If @racket[len] is an integer, the stream is fixed at that length, otherwise it can be any length. +} + +@defmethod[ +#:mode extend +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +stream?]{ +Returns a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{stream} of values whose length is @racket[_len] and where each value is @racket[_type]. +} + +@defmethod[ +#:mode extend +(x:encode +[seq sequence?] +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +bytes?]{ +Take a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{sequence} @racket[seq] of @racket[_type] items and encode it as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string}. +} + +} + +@defproc[ +(x:stream? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:stream%]. +} + +@defproc[ +(x:stream +[type-arg (or/c xenomorphic? #false) #false] +[len-arg (or/c length-resolvable? #false) #false] +[#:type type-kw (or/c xenomorphic? #false) #false] +[#:length len-kw (or/c length-resolvable? #false) #false] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:stream%)) x:stream%] +) +x:stream?]{ +Generate an instance of @racket[x:stream%] (or a subclass of @racket[x:stream%]) with certain optional attributes, which are the same as @racket[x:list]. + +@examples[#:eval my-eval +(define three-uint8s (x:stream uint8 3)) +(encode three-uint8s '(1 2 3) #f) +(encode three-uint8s (string->bytes/utf-8 "ABC") #f) +(encode three-uint8s '(1 2 3 4) #f) +(encode three-uint8s '(1000 2000 3000) #f) +(encode three-uint8s '(A B C) #f) +(decode three-uint8s #"\1\2\3") +(decode three-uint8s #"\1\2\3\4") +(decode three-uint8s #"\1\2") +(define <256-uint8s (x:stream #:type uint8 #:length uint8)) +(encode <256-uint8s '(1 2 3) #f) +(encode <256-uint8s (make-list 500 1) #f) +(stream->list (decode <256-uint8s #"\3\1\2\3")) +(for/list ([val (in-stream (decode <256-uint8s #"\3\1\2\3\4"))]) +val) +(stream->list (decode <256-uint8s #"\3\1\2")) +(define (doubler xs) (append (stream->list xs) (stream->list xs))) +(define quad-stream (x:stream uint16be +#:pre-encode doubler +#:post-decode doubler)) +(encode quad-stream '(1 2 3) #f) +(decode quad-stream #"\0\1\0\2\0\3\0\1\0\2\0\3") +] +} + +@subsection{Vectors} + +@defmodule[xenomorph/vector] + +Under the hood, just a wrapper around the @racket[x:list%] class that decodes to a vector rather than a list. + + +@defclass[x:vector% x:list% ()]{ +Base class for vector formats. Use @racket[x:vector] to conveniently instantiate new vector formats. + +@defconstructor[ +([type xenomorphic?] +[len length-resolvable?] +[count-bytes? boolean?])]{ +Create class instance that represents a vector format with elements of type @racket[type]. If @racket[len] is an integer, the vector is fixed at that length, otherwise it can be any length. +} + +@defmethod[ +#:mode extend +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +vector?]{ +Returns a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{vector} of values whose length is @racket[_len] and where each value is @racket[_type]. +} + +@defmethod[ +#:mode extend +(x:encode +[seq sequence?] +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +bytes?]{ +Take a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{sequence} @racket[seq] of @racket[_type] items and encode it as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string}. +} + +} + +@defproc[ +(x:vector? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:vector%]. +} + +@defproc[ +(x:vector +[type-arg (or/c xenomorphic? #false) #false] +[len-arg (or/c length-resolvable? #false) #false] +[#:type type-kw (or/c xenomorphic? #false) #false] +[#:length len-kw (or/c length-resolvable? #false) #false] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:vector%)) x:vector%] +) +x:vector?]{ +Generate an instance of @racket[x:vector%] (or a subclass of @racket[x:vector%]) with certain optional attributes, which are the same as @racket[x:list]. + +@examples[#:eval my-eval +(define three-uint8s (x:vector uint8 3)) +(encode three-uint8s '#(1 2 3) #f) +(encode three-uint8s (string->bytes/utf-8 "ABC") #f) +(encode three-uint8s '(1 2 3 4) #f) +(encode three-uint8s '(1000 2000 3000) #f) +(encode three-uint8s '(A B C) #f) +(decode three-uint8s #"\1\2\3") +(decode three-uint8s #"\1\2\3\4") +(decode three-uint8s #"\1\2") +(define <256-uint8s (x:vector #:type uint8 #:length uint8)) +(encode <256-uint8s '(1 2 3) #f) +(encode <256-uint8s (make-list 500 1) #f) +(vector->list (decode <256-uint8s #"\3\1\2\3")) +(for/list ([val (in-vector (decode <256-uint8s #"\3\1\2\3\4"))]) +val) +(vector->list (decode <256-uint8s #"\3\1\2")) +(define (doubler vec) (vector-append vec vec)) +(define quad-vec (x:vector uint16be +#:pre-encode doubler +#:post-decode doubler)) +(encode quad-vec '#(1 2 3) #f) +(decode quad-vec #"\0\1\0\2\0\3\0\1\0\2\0\3") +] +} + + +@subsection{Dicts} + +@defmodule[xenomorph/dict] + +A @deftech{dict} is a store of keys and values. The analogy to a Racket @racket[dict?] is intentional, but in Xenomorph a dict must also be @emph{ordered}, because a binary encoding doesn't make sense if it happens in a different order every time. The more precise analogy would be to an @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{association list} — a thing that has both dict-like and list-like qualities — but this would be a laborious name. + +@defclass[x:dict% x:base% ()]{ +Base class for dict formats. Use @racket[x:dict] to conveniently instantiate new dict formats. + +@defconstructor[ +([fields dict?])]{ +Create class instance that represents a dict format with @racket[fields] as a dictionary holding the key–value pairs that define the dict format. Each key must be a @racket[symbol?] and each value must be a @racket[xenomorphic?] type. +} + +@defmethod[ +#:mode extend +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +hash-eq?]{ +Returns a @racket[hasheq] whose keys are the same as the keys in @racket[_fields]. + +} + +@defmethod[ +#:mode extend +(x:encode +[kvs dict?] +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +bytes?]{ +Take the keys and values in @racket[kvs] and encode them as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string}. +} + +} + +@defproc[ +(x:dict? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:dict%]. +} + +@defproc[ +(x:dict +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:dict%)) x:dict%] +[dict (listof (pairof symbol? xenomorphic?))] ... +[key symbol?] [val-type xenomorphic?] ... ... +) +x:dict?]{ +Generate an instance of @racket[x:dict%] (or a subclass of @racket[x:dict%]) with certain optional attributes. + +The rest arguments determine the keys and value types of the dict. These arguments can either be alternating keys and value-type arguments (similar to the calling pattern for @racket[hasheq]) or @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{association lists}. + +@racket[pre-encode-proc] and @racket[post-decode-proc] control the pre-encoding and post-decoding procedures, respectively. Each takes as input the value to be processed and returns a new value. + +@racket[base-class] controls the class used for instantiation of the new object. + +@examples[#:eval my-eval +(define d1 (x:dict 'foo uint8 'bar (x:string #:length 5))) +(define d1-vals (hasheq 'foo 42 'bar "hello")) +(encode d1 d1-vals #f) +(decode d1 #"*hello") + +(define d2 (x:dict 'zam (x:list #:length 3 #:type uint8) + 'nested d1)) +(define d2-vals (hasheq 'zam '(42 43 44) + 'nested d1-vals)) +(encode d2 d2-vals #f) +(decode d2 #"*+,*hello") +] + +} + + + +@subsection{Versioned dicts} + +@defmodule[xenomorph/versioned-dict] + + +The versioned dict is a format derived from @racket[x:dict%] that contains multiple possible dict encodings. It also carries a version field to select among them. This version is stored with the encoded data, of course, so on decode, the correct version will be chosen. + + +@defproc[ +(version-type? +[x any/c]) +boolean?]{ +Whether @racket[x] can be used as the version type of a versioned dict. Valid types are @racket[integer?], @racket[procedure?], or @racket[xenomorphic?]. +} + + +@defclass[x:versioned-dict% x:dict% ()]{ +Base class for versioned dict formats. Use @racket[x:versioned-dict] to conveniently instantiate new dict formats. + +@defconstructor[ +([type version-type?] +[versions dict?] +[version-key symbol?] +[fields #false])]{ +Create class instance that represents a versioned dict format with @racket[type] as the encoded type of the version value, and @racket[versions] as a dictionary holding the key–value pairs that define the versioned dict. Each key of @racket[versions] must be a value consistent with @racket[type], and each value must either be a @racket[dict?] or @racket[x:dict?]. +} + +@defmethod[ +#:mode extend +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +hash-eq?]{ +Returns a @racket[hasheq] whose keys are the same as the keys in @racket[_fields]. + +} + +@defmethod[ +#:mode extend +(x:encode +[kvs dict?] +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +bytes?]{ +Take the keys and values in @racket[kvs] and encode them as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string}. +} + +} + +@defproc[ +(x:versioned-dict? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:versioned-dict%]. +} + +@defproc[ +(x:versioned-dict +[type-arg (or/c version-type? #false)] +[versions-arg (or/c dict? #false)] +[#:type type-kw (or/c version-type? #false)] +[#:versions versions-kw (or/c dict? #false)] +[#:version-key version-key (or/c symbol? #false) x:version-key] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:versioned-dict%)) x:versioned-dict%] +) +x:versioned-dict?]{ +Generate an instance of @racket[x:versioned-dict%] (or a subclass of @racket[x:versioned-dict%]) with certain optional attributes. + +@racket[type-arg] or @racket[type-kw] (whichever is provided, though @racket[type-arg] takes precedence) determines the type of the version value that is used to select from among available dicts. + +@racket[versions-arg] or @racket[versions-kw] (whichever is provided, though @racket[versions-arg] takes precedence) is a dictionary holding the key–value pairs that define the versioned dict. Each key of @racket[versions] must be a value consistent with @racket[type], and each value must either be a @racket[dict?] or @racket[x:dict?]. + +@racket[version-key] identifies the key that should be treated as the version selector. By default, it's a separate private key called @racket[x:version-key] that exists independently of the data fields. But if one of the existing data fields should be treated as the version key, you can pass it as the @racket[version-key] argument. + +@racket[pre-encode-proc] and @racket[post-decode-proc] control the pre-encoding and post-decoding procedures, respectively. Each takes as input the value to be processed and returns a new value. + +@racket[base-class] controls the class used for instantiation of the new object. + +@examples[#:eval my-eval +(define d1 (x:dict 'foo uint8 'bar (x:string #:length 5))) +(define d1-vals (hasheq 'foo 42 'bar "hello" 'my-version-key 'd1)) + +(define d2 (x:dict 'zam (x:list #:length 3 #:type uint8) + 'nested d1)) +(define d2-vals (hasheq 'zam '(42 43 44) + 'nested d1-vals + 'my-version-key 'd2)) + +(define vdict (x:versioned-dict + #:type (x:symbol) + #:version-key 'my-version-key + #:versions (hash 'd1 d1 'd2 d2))) + +(encode vdict d1-vals #f) +(decode vdict #"d1\0*hello") + +(encode vdict d2-vals #f) +(decode vdict #"d2\0*+,*hello") +] + +} + +@subsubsection{Reserved values} + +@declare-exporting[xenomorph] + +@defthing[x:version-key symbol? #:value 'x:version]{ +Key used by default to store & look up the version-selector value within the fields of a versioned dict. When the version dict is created, a different key can be specified. +} + + + + +@subsection{Pointers} + +@defmodule[xenomorph/pointer] + +A pointer can be thought of as a meta-object that can wrap any of the other binary formats here. It doesn't change how they work: they still take the same inputs (on @racket[encode]) and produce the same values (on @racket[decode]). + +What it does change is the underlying housekeeping, by creating a layer of indirection around the data. + +On @racket[encode], instead of storing the raw data at a certain point in the byte stream, it creates a reference — that is, a @deftech{pointer} — to that data at another location, and then puts the data at that location. + +On @racket[decode], the process is reversed: the pointer is dereferenced to discover the true location of the data, the data is read from that location, and then the decode proceeds as usual. + +Under the hood, this housekeeping is fiddly and annoying. But good news! It's already been done. Please do something worthwhile with the hours of your life that have been returned to you. + +Pointers can be useful for making data types of different sizes behave as if they were the same size. For instance, @secref{Lists} require all elements to have the same encoded size. What if you want to put different data types in the list? Wrap each item in a pointer, and you can make a list of pointers (because they have consistent size) that reference different kinds of data. + + + +@defclass[x:pointer% x:base% ()]{ +Base class for pointer formats. Use @racket[x:pointer] to conveniently instantiate new pointer formats. + +@defproc[ +(pointer-relative-value? +[x any/c]) +boolean?]{ +Whether @racket[x] can be used as a value for the @racket[_pointer-relative-to] field of @racket[x:pointer%]. Valid choices are @racket['(local immediate parent global)]. +} + +@defconstructor[ +([ptr-type x:int?] +[dest-type (or/c xenomorphic? 'void)] +[pointer-relative-to pointer-relative-value?] +[allow-null? boolean?] +[null-value any/c] +[pointer-lazy? boolean?])]{ +Create class instance that represents a pointer format. See @racket[x:pointer] for a description of the fields. + + + +} + +@defmethod[ +#:mode extend +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +any/c]{ +Returns the dereferenced value of the pointer whose type is controlled by @racket[_dest-type]. +} + +@defmethod[ +#:mode extend +(x:encode +[val any/c] +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +bytes?]{ +Take a value of type @racket[_dest-type], wrap it in a pointer, and encode it as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string}. +} + +} + +@defproc[ +(x:pointer? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:pointer%]. +} + +@defproc[ +(x:pointer +[ptr-type-arg (or/c x:int? #false) #false] +[dest-type-arg (or/c xenomorphic? 'void #false) #false] +[#:type ptr-type-kw (or/c x:int? #false) uint32] +[#:dest-type dest-type-kw (or/c xenomorphic? 'void #false) uint8] +[#:relative-to pointer-relative-to pointer-relative-value? 'local] +[#:allow-null allow-null? boolean? #true] +[#:null null-value any/c 0] +[#:lazy pointer-lazy? boolean? #false] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:pointer%)) x:pointer%] +) +x:pointer?]{ +Generate an instance of @racket[x:pointer%] (or a subclass of @racket[x:pointer%]) with certain optional attributes. + +@racket[ptr-type-arg] or @racket[ptr-type-kw] (whichever is provided, though @racket[ptr-type-arg] takes precedence) controls the type of the pointer value itself, which must be an @racket[x:int?]. Default is @racket[uint32]. + +@racket[dest-type-arg] or @racket[dest-type-kw] (whichever is provided, though @racket[dest-type-arg] takes precedence) controls the type of the thing being pointed at, which must be a @racket[xenomorphic?] object or the symbol @racket['void] to indicate a void pointer. Default is @racket[uint8]. + + +@racket[pointer-relative-to] controls how the byte-offset value stored in the pointer is calculated. It must be one of @racket['(local immediate parent global)]. Default is @racket['local]. + +@racket[allow-null?] controls whether the pointer can take on null values, and @racket[null-value] controls what that value is. Defaults are @racket[#true] and @racket[0], respectively. + +@racket[pointer-lazy?] controls whether the pointer is decoded immediately. If @racket[pointer-lazy?] is @racket[#true], then the decoding of the pointer is wrapped in a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{promise} that can later be evaluated with @racket[force]. Default is @racket[#false]. + +@racket[pre-encode-proc] and @racket[post-decode-proc] control the pre-encoding and post-decoding procedures, respectively. Each takes as input the value to be processed and returns a new value. + +@racket[base-class] controls the class used for instantiation of the new object. +} + +@subsubsection{Private values} + +@declare-exporting[xenomorph] + +@deftogether[(@defthing[x:start-offset-key symbol? #:value 'x:start-offset] +@defthing[x:current-offset-key symbol? #:value 'x:current-offset] +@defthing[x:parent-key symbol? #:value 'x:parent] +@defthing[x:pointer-size-key symbol? #:value 'x:ptr-size] +@defthing[x:pointers-key symbol? #:value 'x:pointers] +@defthing[x:pointer-offset-key symbol? #:value 'x:ptr-offset] +@defthing[x:pointer-type-key symbol? #:value 'x:ptr-type] +@defthing[x:length-key symbol? #:value 'x:length] +@defthing[x:val-key symbol? #:value 'x:val])]{ + +Private fields used for pointer housekeeping. There is no reason to mess with these. +} + + + + + +@subsection{Bitfields} + +@defmodule[xenomorph/bitfield] + +A @deftech{bitfield} is a compact encoding for Boolean values using an integer, where each bit of the integer indicates @racket[#true] or @racket[#false] (corresponding to a value of @racket[1] or @racket[0]). The bitfield object creates a mapping between the keys of the bitfield (called @deftech{flags}) and the integer bits. + +@defclass[x:bitfield% x:base% ()]{ +Base class for bitfield formats. Use @racket[x:bitfield] to conveniently instantiate new bitfield formats. + + +@defconstructor[ +([type x:int?] +[flags (listof (or/c symbol? #false))])]{ +Create class instance that represents a bitfield format. See @racket[x:bitfield] for a description of the fields. + +} + +@defmethod[ +#:mode extend +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +hash?]{ +Returns a hash whose keys are the names of the flags, and whose values are Booleans. +} + +@defmethod[ +#:mode extend +(x:encode +[flag-hash hash?] +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +bytes?]{ +Take a hash — where hash keys are the names of the flags, hash values are Booleans — and encode it as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string}. +} + +} + +@defproc[ +(x:bitfield? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:bitfield%]. +} + +@defproc[ +(x:bitfield +[type-arg (or/c x:int? #false) #false] +[flags-arg (listof any/c)] +[#:type type-kw (or/c x:int? #false) uint8] +[#:flags flags-kw (listof any/c) null] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:bitfield%)) x:bitfield%] +) +x:bitfield?]{ +Generate an instance of @racket[x:bitfield%] (or a subclass of @racket[x:bitfield%]) with certain optional attributes. + +@racket[type-arg] or @racket[type-kw] (whichever is provided, though @racket[type-arg] takes precedence) controls the type of the bitfield value itself, which must be an @racket[x:int?]. Default is @racket[uint8]. + +@racket[flags-arg] or @racket[flags-kw] (whichever is provided, though @racket[flags-arg] takes precedence) is a list of flag names corresponding to each bit. The number of names must be fewer than the number of bits in @racket[_type]. No name can be duplicated. Each flag name can be any value, but @racket[#false] indicates a skipped bit. Default is @racket[null]. + +@racket[pre-encode-proc] and @racket[post-decode-proc] control the pre-encoding and post-decoding procedures, respectively. Each takes as input the value to be processed and returns a new value. + +@racket[base-class] controls the class used for instantiation of the new object. + +@examples[#:eval my-eval +(define flags (x:bitfield uint8 '(alpha bravo charlie delta echo))) +(define vals (hasheq + 'alpha #true + 'charlie #true + 'echo #true)) +(encode flags vals #f) +(decode flags #"\25") +] + +} + + +@subsection{Enumerations} + +@defmodule[xenomorph/enum] + +An @deftech{enumeration} is a mapping of values to sequential integers. + + +@defclass[x:enum% x:base% ()]{ +Base class for list formats. Use @racket[x:enum] to conveniently instantiate new enumeration formats. + +@defconstructor[ +([type x:int?] +[values (listof any/c)])]{ +Create class instance that represents an enumeration format of type @racket[type], sequentially mapped to @racket[values]. +} + +@defmethod[ +#:mode extend +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +any/c]{ +Returns either the value associated with a certain integer, or if the value is @racket[#false] or doesn't exist, then the integer itself. +} + +@defmethod[ +#:mode extend +(x:encode +[val any/c] +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +bytes?]{ +Take value listed in the @racket[_values] field and encode it as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string}. +} + +} + +@defproc[ +(x:enum? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:enum%]. +} + +@defproc[ +(x:enum +[type-arg (or/c x:int? #false) #false] +[values-arg (listof any/c) #false] +[#:type type-kw (or/c x:int? #false) uint8] +[#:values values-kw (listof any/c) null] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:enum%)) x:enum%] +) +x:enum?]{ +Generate an instance of @racket[x:enum%] (or a subclass of @racket[x:enum%]) with certain optional attributes. + +@racket[type-arg] or @racket[type-kw] (whichever is provided, though @racket[type-arg] takes precedence) determines the integer type for the enumeration. Default is @racket[uint8]. + +@racket[values-arg] or @racket[values-kw] (whichever is provided, though @racket[values-arg] takes precedence) determines the mapping of values to integers, where each value corresponds to its index in the list. @racket[#false] indicates skipped values. Default is @racket[null]. + +@racket[pre-encode-proc] and @racket[post-decode-proc] control the pre-encoding and post-decoding procedures, respectively. Each takes as input the value to be processed and returns a new value. + +@racket[base-class] controls the class used for instantiation of the new object. + +@examples[#:eval my-eval +(define e (x:enum #:type uint8 + #:values '("foo" "bar" "baz" #f))) +(encode e "baz" #f) +(decode e #"\2") +(code:comment @#,t{corresponding enum value is #false, so we pass through input value}) +(decode e #"\3") +(code:comment @#,t{no corresponding enum value, so we pass through input value}) +(decode e #"\5") +] + +} + + + +@;{ + +@subsection{Optional} + +@defmodule[xenomorph/optional] + +A wrapper format that decodes or encodes only if the embedded condition evaluates to true. + + +@defclass[x:optional% x:base% ()]{ +Base class for optional formats. Use @racket[x:optional] to conveniently instantiate new optional formats. + + +@defconstructor[ +([type xenomorphic?] +[condition any/c])]{ +Create class instance that represents an optional format. See @racket[x:optional] for a description of the fields. + +} + +@defmethod[ +#:mode extend +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +hash?]{ +Returns a value if the condition is met, otherwise returns @racket[(void)]. +} + +@defmethod[ +#:mode extend +(x:encode +[val any/c] +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +bytes?]{ +Encodes @racket[val] as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string}, but only if the embedded condition is met. +} + +} + +@defproc[ +(x:optional? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:optional%]. +} + +@defproc[ +(x:optional +[type-arg (or/c xenomorphic? #false) #false] +[cond-arg any/c] +[#:type type-kw (or/c xenomorphic? #false)] +[#:condition cond-kw any/c #true] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:optional%)) x:optional%] +) +x:optional?]{ +Generate an instance of @racket[x:optional%] (or a subclass of @racket[x:optional%]) with certain optional attributes. + +@racket[type-arg] or @racket[type-kw] (whichever is provided, though @racket[type-arg] takes precedence) controls the type wrapped by the optional object, which must be @racket[xenomorphic?]. + +@racket[cond-arg] or @racket[cond-kw] (whichever is provided, though @racket[cond-arg] takes precedence) is the condition that is evaluated to determine if the optional object should encode or decode. + +If the condition is a procedure, the procedure is evaluated for its result. The procedure must take two arguments: the first is the optional object, the second is the parent object (if it exists). Default is @racket[#true]. + +@racket[pre-encode-proc] and @racket[post-decode-proc] control the pre-encoding and post-decoding procedures, respectively. Each takes as input the value to be processed and returns a new value. + +@racket[base-class] controls the class used for instantiation of the new object. +} + +} + + + +@subsection{Reserved} + +@defmodule[xenomorph/reserved] + +The reserved object simply skips data. The advantage of using a reserved object rather than the type itself is a) it clearly signals that the data is being ignored, and b) it prevents writing to that part of the data structure. + +@defclass[x:reserved% x:base% ()]{ +Base class for reserved formats. Use @racket[x:reserved] to conveniently instantiate new reserved formats. + + +@defconstructor[ +([type xenomorphic?] +[count exact-positive-integer?])]{ +Create class instance that represents an reserved format. See @racket[x:reserved] for a description of the fields. + +} + +@defmethod[ +#:mode extend +(x:decode +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +void?]{ +Returns @racket[(void)]. +} + +@defmethod[ +#:mode extend +(x:encode +[val any/c] +[input-port input-port?] +[parent (or/c xenomorphic? #false)]) +bytes?]{ +Encodes zeroes as a @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{byte string} that is the length of @racket[_type]. +} + +} + +@defproc[ +(x:reserved? +[x any/c]) +boolean?]{ +Whether @racket[x] is an object of type @racket[x:reserved%]. +} + +@defproc[ +(x:reserved +[type-arg (or/c xenomorphic? #false) #false] +[count-arg (or/c exact-positive-integer? #false)] +[#:type type-kw (or/c xenomorphic? #false)] +[#:count count-kw exact-positive-integer? 1] +[#:pre-encode pre-encode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:post-decode post-decode-proc (or/c (any/c . -> . any/c) #false) #false] +[#:base-class base-class (λ (c) (subclass? c x:reserved%)) x:reserved%] +) +x:reserved?]{ +Generate an instance of @racket[x:reserved%] (or a subclass of @racket[x:reserved%]) with certain optional attributes. + +@racket[type-arg] or @racket[type-kw] (whichever is provided, though @racket[type-arg] takes precedence) controls the type wrapped by the reserved object, which must be @racket[xenomorphic?]. + +@racket[count-arg] or @racket[count-kw] (whichever is provided, though @racket[count-arg] takes precedence) is the number of items of @racket[_type] that should be skipped. + +@racket[pre-encode-proc] and @racket[post-decode-proc] control the pre-encoding and post-decoding procedures, respectively. Each takes as input the value to be processed and returns a new value. + +@racket[base-class] controls the class used for instantiation of the new object. + +@examples[#:eval my-eval +(define res (x:reserved #:type uint32)) +(encode res 1 #f) +(encode res 1234 #f) +(encode res 12345678 #f) +(void? (decode res #"\0\0\0\0")) +] + +} + +@subsection{Utilities} + +@defmodule[xenomorph/util] + +@defproc[ +(length-resolvable? +[x any/c]) +boolean?]{ +Whether @racket[x] is something that can be used as a length argument with @racket[xenomorphic?] objects that have length. For instance, an @racket[x:list] or @racket[x:stream]. + +The following values are deemed to be resolvable: any @racket[exact-nonnegative-integer?], an @racket[x:int?], or any @racket[procedure?] that takes one argument (= the parent object) returns a @racket[exact-nonnegative-integer?]. +} + +@section{License & source code} + +This module is licensed under the MIT license. + + +Source repository at @link["http://github.com/mbutterick/xenomorph"]{http://github.com/mbutterick/xenomorph}. Suggestions & corrections welcome. + diff --git a/xenomorph/xenomorph/stream.rkt b/xenomorph/xenomorph/stream.rkt new file mode 100644 index 00000000..41588525 --- /dev/null +++ b/xenomorph/xenomorph/stream.rkt @@ -0,0 +1,112 @@ +#lang debug racket/base +(require racket/class + racket/contract + racket/match + racket/sequence + "base.rkt" "util.rkt" "number.rkt" "list.rkt" racket/stream sugar/unstable/dict) +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/LazyArray.coffee +|# + +(define x:stream% + (class x:list% + (super-new) + (inherit-field [@type type] [@len len]) + + (define/override (x:decode port parent) + (define starting-pos (pos port)) ; ! placement matters. `resolve-length` will change `pos` + (define maybe-len (resolve-length @len port parent)) + (define new-parent (if (x:int? @len) + (mhasheq x:parent-key parent + x:start-offset-key starting-pos + x:current-offset-key 0 + x:length-key @len) + parent)) + (define stream-starting-pos (pos port)) + (define item-size (send @type x:size #f new-parent)) + ;; have to be able to retreive nth item of stream, random access + (define stream-ending-pos (and maybe-len (+ stream-starting-pos (* maybe-len item-size)))) + (define item-indexes-retrieved null) + (begin0 + (for*/stream ([index (in-range (or maybe-len +inf.0))] + ;; for streams of indefinite length, stop gathering when we're at eof + #:break (and (not maybe-len) + (eof-object? (peek-byte port (+ stream-starting-pos (* item-size index)))))) + (define index-pos (+ stream-starting-pos (* item-size index))) + (pos port index-pos) + (when (eof-object? (peek-byte port)) + (raise-argument-error 'decode (format "at port position ~a, not enough bytes for item ~a" (pos port) index) (pos port))) + (begin0 + (send @type x:decode port new-parent) + (set! item-indexes-retrieved (cons index item-indexes-retrieved)) + (pos port stream-ending-pos))) + (let ([items-to-skip (or maybe-len (if (pair? item-indexes-retrieved) + (add1 (apply max item-indexes-retrieved)) + 0))]) + (pos port (+ (pos port) (* items-to-skip item-size)))))) + + (define/override (x:encode val-arg port [parent #f]) + (unless (or (stream? val-arg) (sequence? val-arg)) + (raise-argument-error 'encode "sequence or stream" val-arg)) + (define vals (match val-arg + [(? list?) val-arg] + [(? stream?) (stream->list val-arg)] + [_ (sequence->list val-arg)])) + (super x:encode vals port parent)) + + (define/override (x:size [val #f] [parent #f]) + (super x:size (if (stream? val) (stream->list val) val) parent)))) + +(define (x:stream? x) (is-a? x x:stream%)) + +(define/contract (x:stream + [type-arg #f] + [len-arg #f] + #:type [type-kwarg #f] + #:length [len-kwarg #f] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:stream%]) + (() + ((or/c xenomorphic? #false) + (or/c length-resolvable? #false) + #:type (or/c xenomorphic? #false) + #:length (or/c length-resolvable? #false) + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:stream%))) + . ->* . + x:stream?) + (define type (or type-arg type-kwarg)) + (unless (xenomorphic? type) + (raise-argument-error 'x:stream "xenomorphic type" type)) + (define len (or len-arg len-kwarg)) + (unless (length-resolvable? len) + (raise-argument-error 'x:stream "resolvable length" len)) + (new (generate-subclass base-class pre-proc post-proc) [type type] + [len len] + [count-bytes? #false])) + +(define x:lazy-array% x:stream%) +(define x:lazy-array x:stream) + +(module+ test + (require rackunit "number.rkt" "base.rkt") + (define bstr #"ABCD1234") + (define ds (open-input-bytes bstr)) + (define la (x:stream uint8 4)) + (define ila (decode la ds)) + (check-equal? (pos ds) 4) + (check-equal? (stream-ref ila 0) 65) + (check-equal? (pos ds) 4) + (check-equal? (stream-ref ila 1) 66) + (check-equal? (pos ds) 4) + (check-equal? (stream-ref ila 3) 68) + (check-equal? (pos ds) 4) + (check-equal? (stream->list ila) '(65 66 67 68)) + (define la2 (x:stream int16be (λ (t) 4))) + (check-equal? (encode la2 '(1 2 3 4) #f) #"\0\1\0\2\0\3\0\4") + (check-equal? (stream->list (decode la2 (open-input-bytes #"\0\1\0\2\0\3\0\4"))) '(1 2 3 4))) \ No newline at end of file diff --git a/xenomorph/xenomorph/string.rkt b/xenomorph/xenomorph/string.rkt new file mode 100644 index 00000000..167b25ef --- /dev/null +++ b/xenomorph/xenomorph/string.rkt @@ -0,0 +1,130 @@ +#lang racket/base +(require racket/class + racket/match + racket/contract + "base.rkt" + "util.rkt" + "number.rkt") +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/String.coffee +|# + +(define (decode-string len port [encoding 'ascii]) + (define decoder (case encoding + [(utf16le ucs2) (error 'unsupported-string-encoding)] + [(utf8) bytes->string/utf-8] + [(ascii) bytes->string/latin-1] + [else values])) + (decoder (read-bytes len port))) + +(define (string-ascii? string) + (for/and ([c (in-string string)]) + (<= 0 (char->integer c) 127))) + +(define (encode-string string [encoding 'ascii]) + (when (eq? encoding 'ascii) + (unless (string-ascii? string) + (raise-argument-error 'encode "ascii string" string))) + (define encoder (case encoding + [(ucs2 utf8 ascii) string->bytes/utf-8] + [(utf16le) (error 'swap-bytes-unimplemented)] + [else (error 'unsupported-string-encoding)])) + (encoder string)) + +(define (count-nonzero-chars port) + (bytes-length (car (regexp-match-peek "[^\u0]*" port)))) + +(define (bytes-left-in-port? port) + (not (eof-object? (peek-byte port)))) + +(define (supported-encoding? x) + (and (symbol? x) (memq x supported-encodings))) + +(define x:string% + (class x:base% + (super-new) + (init-field [(@len len)] [(@encoding encoding)]) + + (unless (length-resolvable? @len) + (raise-argument-error 'x:string "length-resolvable?" @len)) + (unless (or (procedure? @encoding) (supported-encoding? @encoding)) + (raise-argument-error 'x:string (format "procedure or member of ~v" supported-encodings) @encoding)) + + (define/augment (x:decode port parent) + (define len (or (resolve-length @len port parent) (count-nonzero-chars port))) + (define encoding (match @encoding + [(? procedure? proc) (or (proc parent) 'ascii)] + [enc enc])) + (define adjustment (if (and (not @len) (bytes-left-in-port? port)) 1 0)) + (define result (decode-string len port encoding)) + (pos port (+ (pos port) adjustment)) + (when (eq? @encoding 'ascii) + (unless (string-ascii? result) + (raise-result-error 'decode "ascii string" result))) + result) + + (define/augment (x:encode val-arg port [parent #f]) + (define val (if (string? val-arg) val-arg (format "~a" val-arg))) + (define encoding (match @encoding + [(? procedure?) (@encoding (and parent (hash-ref parent val)) 'ascii)] ; when does this happen? + [enc enc])) + (define encoded-str (encode-string val encoding)) + (define encoded-length (bytes-length encoded-str)) + (when (and (exact-nonnegative-integer? @len) (> encoded-length @len)) + (raise-argument-error 'encode (format "string no longer than ~a" @len) val)) + (when (x:int? @len) + (send @len x:encode encoded-length port parent)) + (define string-terminator (if @len (bytes) (bytes 0))) ; null terminated when no len + (bytes-append encoded-str string-terminator)) + + (define/augment (x:size [val-arg #f] [parent #f]) + (define val (cond + [(string? val-arg) val-arg] + [(not val-arg) #false] + [else (format "~a" val-arg)])) + (cond + [val (define encoding (if (procedure? @encoding) + (or (@encoding (and parent (hash-ref parent val)) 'ascii)) + @encoding)) + (define string-size (bytes-length (encode-string val encoding))) + (define strlen-size (cond + [(not @len) 1] + [(x:int? @len) (send @len x:size)] + [else 0])) + (+ string-size strlen-size)] + [else (resolve-length @len #f parent)])))) + +(define supported-encodings '(ascii utf8)) + +(define (x:string? x) (is-a? x x:string%)) + +(define/contract (x:string + [len-arg #f] + [enc-arg #f] + #:length [len-kwarg #f] + #:encoding [enc-kwarg 'utf8] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:string%]) + (() + ((or/c length-resolvable? #false) + (or/c procedure? supported-encoding? #false) + #:length (or/c length-resolvable? #false) + #:encoding (or/c procedure? supported-encoding? #false) + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:string%))) + . ->* . + x:string?) + (define len (or len-arg len-kwarg)) + (unless (length-resolvable? len) + (raise-argument-error 'x:string "resolvable length" len)) + (define encoding (or enc-arg enc-kwarg)) + (unless (or (supported-encoding? encoding) (procedure? encoding)) + (raise-argument-error 'x:string "valid encoding value" encoding)) + (new (generate-subclass base-class pre-proc post-proc) + [len len] + [encoding encoding])) diff --git a/xenomorph/xenomorph/symbol.rkt b/xenomorph/xenomorph/symbol.rkt new file mode 100644 index 00000000..5351f704 --- /dev/null +++ b/xenomorph/xenomorph/symbol.rkt @@ -0,0 +1,63 @@ +#lang racket/base +(require racket/class + racket/contract + "base.rkt" + "number.rkt" + "string.rkt" + "util.rkt") +(provide (all-defined-out)) + +(define x:symbol% + (class x:string% + (super-new) + + (define/override (pre-encode val) + (unless (or (string? val) (symbol? val)) + (raise-argument-error 'x:symbol-encode "symbol or string" val)) + (if (symbol? val) (symbol->string val) val)) + + (define/override (post-decode val) (string->symbol val)))) + +(define (x:symbol? x) (is-a? x x:symbol%)) + +(define/contract (x:symbol + [len-arg #f] + [enc-arg #f] + #:length [len-kwarg #f] + #:encoding [enc-kwarg 'utf8] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:symbol%]) + (() + ((or/c length-resolvable? #false) + (or/c procedure? supported-encoding? #false) + #:length (or/c length-resolvable? #false) + #:encoding (or/c procedure? supported-encoding? #false) + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:symbol%))) + . ->* . + x:symbol?) + (define len (or len-arg len-kwarg)) + (unless (length-resolvable? len) + (raise-argument-error 'x:symbol "resolvable length" len)) + (define encoding (or enc-arg enc-kwarg)) + (unless (or (supported-encoding? encoding) (procedure? encoding)) + (raise-argument-error 'x:symbol "valid encoding value" encoding)) + (new (generate-subclass base-class pre-proc post-proc) + [len len] + [encoding encoding])) + +(module+ test + (require rackunit "base.rkt") + (define S-fixed (x:string 4 'utf8)) + (check-equal? (encode S-fixed "Mike" #f) #"Mike") + (check-exn exn:fail? (λ () (encode S-fixed "Mikes" #f))) ; too long for fixed string + (define S (x:string uint8 'utf8)) + (check-equal? (decode S #"\2BCDEF") "BC") + (check-equal? (encode S "Mike" #f) #"\4Mike") + (check-equal? (send (x:string) x:size "foobar") 7) ; null terminated when no len + (check-equal? (decode (x:symbol 4) #"Mike") 'Mike) + (check-equal? (encode (x:symbol 4) 'Mike #f) #"Mike") + (check-equal? (encode (x:symbol 4) "Mike" #f) #"Mike") + (check-exn exn:fail:contract? (λ () (encode (x:symbol 4) 42 #f)))) \ No newline at end of file diff --git a/xenomorph/xenomorph/test/api-test.rkt b/xenomorph/xenomorph/test/api-test.rkt new file mode 100644 index 00000000..f45ee945 --- /dev/null +++ b/xenomorph/xenomorph/test/api-test.rkt @@ -0,0 +1,43 @@ +#lang br +(require xenomorph rackunit) + +(define-simple-check (check-xenomorphic type val) + (let ([de (decode type (encode type val #f))]) + (if (flonum? val) + (check-= de val 0.01) + (check-equal? de val)))) + +(define bigint (x:string #:pre-encode number->string + #:post-decode string->number)) + +(check-xenomorphic bigint 1234567890987654321) + +(define exact (x:list #:type bigint + #:length 2 + #:pre-encode (λ (x) (list (numerator x) (denominator x))) + #:post-decode (λ (nd) (apply / nd)))) + +(check-xenomorphic exact 12345678/8765) + +(define real (x:versioned-dict + #:type uint8 + #:version-key 'version + #:versions + (list + (cons 0 (list (cons 'val exact))) + (cons 1 (list (cons 'val float)))) + #:pre-encode (λ (num) (list (cons 'val num) + (cons 'version (if (exact? num) + 0 + 1)))) + #:post-decode (λ (h) (hash-ref h 'val)))) + +(define pi 3.141592653589793) +(check-xenomorphic real pi) + +(define complex (x:list #:type real + #:length 2 + #:pre-encode (λ (num) (list (real-part num) (imag-part num))) + #:post-decode (λ (ri) (+ (first ri) (* 0+1i (second ri)))))) + +(check-xenomorphic complex 3/4+5i) diff --git a/xenomorph/xenomorph/test/bitfield-test.rkt b/xenomorph/xenomorph/test/bitfield-test.rkt new file mode 100644 index 00000000..964bf7d3 --- /dev/null +++ b/xenomorph/xenomorph/test/bitfield-test.rkt @@ -0,0 +1,86 @@ +#lang racket/base +(require rackunit + racket/match + racket/class + racket/list + sugar/unstable/dict + "../number.rkt" + "../bitfield.rkt" + "../base.rkt") + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/Bitfield.coffee +|# + +(define bitfield (x:bitfield uint8 '(Jack Kack "Lack" Mack Nack Oack Pack Quack))) +(match-define (list JACK KACK LACK MACK NACK OACK PACK QUACK) + (map (λ (x) (arithmetic-shift 1 x)) (range 8))) + +(test-case + "bitfield: should have the right size" + (check-equal? (send bitfield x:size) 1)) + +(test-case + "bitfield: should reject too many flags" + (check-exn exn:fail? (λ () (x:bitfield uint8 '(Jack Kack Lack Mack Nack Oack Pack Quack Zack Wack))))) + +(test-case + "bitfield: should reject duplicate flags" + (check-exn exn:fail? (λ () (x:bitfield uint8 '(Jack Jack Jack Jack Jack))))) + +(test-case + "bitfield: should decode" + (parameterize ([current-input-port (open-input-bytes (bytes (bitwise-ior JACK MACK PACK NACK QUACK)))]) + (check-equal? (decode bitfield) (mhash 'Quack #t + 'Nack #t + "Lack" #f + 'Oack #f + 'Pack #t + 'Mack #t + 'Jack #t + 'Kack #f)))) + +(test-case + "bitfield: should decode with post-decode" + (parameterize ([current-input-port (open-input-bytes (bytes (bitwise-ior JACK MACK PACK NACK QUACK)))]) + (define bitfield (x:bitfield uint8 '(Jack Kack "Lack" Mack Nack Oack Pack Quack) #:post-decode (λ (fh) (hash-set! fh 'foo 42) fh))) + (check-equal? (decode bitfield) (mhash 'Quack #t + 'Nack #t + "Lack" #f + 'Oack #f + 'Pack #t + 'Mack #t + 'Jack #t + 'Kack #f + 'foo 42)))) + +(test-case + "bitfield: should encode" + (check-equal? (encode bitfield (mhash 'Quack #t + 'Nack #t + "Lack" #f + 'Oack #f + 'Pack #t + 'Mack #t + 'Jack #t + 'Kack #f) #f) + (bytes (bitwise-ior JACK MACK PACK NACK QUACK)))) + +(test-case + "bitfield: should encode with pre-encode" + (define bitfield (x:bitfield uint8 '(Jack Kack "Lack" Mack Nack Oack Pack Quack) + #:pre-encode (λ (fh) + (hash-set! fh 'Jack #f) + (hash-set! fh 'Mack #f) + (hash-set! fh 'Pack #f) + fh))) + (check-equal? (encode bitfield (mhash 'Quack #t + 'Nack #t + "Lack" #f + 'Oack #f + 'Pack #t + 'Mack #t + 'Jack #t + 'Kack #f) #f) + (bytes (bitwise-ior NACK QUACK)))) diff --git a/xenomorph/xenomorph/test/bytes-test.rkt b/xenomorph/xenomorph/test/bytes-test.rkt new file mode 100644 index 00000000..25968002 --- /dev/null +++ b/xenomorph/xenomorph/test/bytes-test.rkt @@ -0,0 +1,63 @@ +#lang racket/base +(require rackunit + racket/class + "../bytes.rkt" + "../number.rkt" + "../base.rkt") + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/Buffer.coffee +|# + +(test-case + "bytes: should decode" + (parameterize ([current-input-port (open-input-bytes (bytes #xab #xff #x1f #xb6))]) + (define buf (x:bytes #:length 2)) + (check-equal? (decode buf) (bytes #xab #xff)) + (check-equal? (decode buf) (bytes #x1f #xb6)))) + +(test-case + "bytes: should error on invalid length" + (check-exn exn:fail:contract? (λ () (x:bytes #:length #true)))) + +(test-case + "bytes: should decode with post-decode" + (parameterize ([current-input-port (open-input-bytes (bytes #xab #xff #x1f #xb6))]) + (define buf (x:bytes #:length 2 #:post-decode (λ (val) (bytes 1 2)))) + (check-equal? (decode buf) (bytes 1 2)) + (check-equal? (decode buf) (bytes 1 2)))) + +(test-case + "bytes: should decode with parent key length" + (parameterize ([current-input-port (open-input-bytes (bytes #xab #xff #x1f #xb6))]) + (define buf (x:bytes #:length (λ (p) (hash-ref p 'len)))) + (check-equal? (decode buf #:parent (hash 'len 3)) (bytes #xab #xff #x1f)) + (check-equal? (decode buf #:parent (hash 'len 1)) (bytes #xb6)))) + +(test-case + "bytes: hould return size" + (check-equal? (send (x:bytes #:length 2) x:size (bytes #xab #xff)) 2)) + +(test-case + "bytes: hould use defined length if no value given" + (check-equal? (send (x:bytes #:length 10) x:size) 10)) + +(test-case + "bytes: should encode" + (let ([buf (x:bytes 2)]) + (check-equal? (bytes-append + (encode buf (bytes #xab #xff) #f) + (encode buf (bytes #x1f #xb6) #f)) (bytes #xab #xff #x1f #xb6)))) + +(test-case + "bytes: should encode with pre-encode" + (let () + (define buf (x:bytes 2 #:pre-encode (λ (val) (bytes 1 2)))) + (check-equal? (bytes-append + (encode buf (bytes #xab #xff) #f) + (encode buf (bytes #x1f #xb6) #f)) (bytes 1 2 1 2)))) + +(test-case + "bytes: should encode length before bytes" + (check-equal? (encode (x:bytes #:length uint8) (bytes #xab #xff) #f) (bytes 2 #xab #xff))) \ No newline at end of file diff --git a/xenomorph/xenomorph/test/dict-test.rkt b/xenomorph/xenomorph/test/dict-test.rkt new file mode 100644 index 00000000..554dbfdd --- /dev/null +++ b/xenomorph/xenomorph/test/dict-test.rkt @@ -0,0 +1,93 @@ +#lang racket/base +(require rackunit racket/dict + racket/class + "../base.rkt" + "../dict.rkt" + "../string.rkt" + "../pointer.rkt" + "../number.rkt" + "../base.rkt" + sugar/unstable/dict) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/Struct.coffee +|# + +(test-case + "dict: decode into an object" + (parameterize ([current-input-port (open-input-bytes #"\x05roxyb\x15")]) + (check-equal? + (decode (x:dict 'name (x:string #:length uint8) 'age uint8)) + (mhasheq 'name "roxyb" 'age 21)))) + +(test-case + "dict: decode nested struct into an object" + (parameterize ([current-input-port (open-input-bytes #"\x05roxyb\x15\x05roxyb\x15")]) + (check-equal? + (decode (x:dict 'name (x:string #:length uint8) 'age uint8 + 'nested (x:dict 'name (x:string #:length uint8) 'age uint8))) + (mhasheq 'name "roxyb" 'age 21 'nested (mhasheq 'name "roxyb" 'age 21))))) + +(test-case + "dict: decode with process hook" + (parameterize ([current-input-port (open-input-bytes #"\x05roxyb\x20")]) + (define struct (x:dict #:post-decode (λ (o) (hash-set! o 'canDrink (>= (hash-ref o 'age) 21)) o) + 'name (x:string #:length uint8) 'age uint8)) + (check-equal? (decode struct) + (mhasheq 'name "roxyb" 'age 32 'canDrink #t)))) + +(test-case + "dict: decode supports function keys" + (parameterize ([current-input-port (open-input-bytes #"\x05roxyb\x20")]) + (define struct (x:dict 'name (x:string #:length uint8) 'age uint8 'canDrink (λ (o) (>= (hash-ref o 'age) 21)))) + (check-equal? (decode struct) + (mhasheq 'name "roxyb" 'age 32 'canDrink #t)))) + +(test-case + "dict: compute the correct size" + (check-equal? (send (x:dict 'name (x:string #:length uint8) 'age uint8) + x:size (hasheq 'name "roxyb" 'age 32)) 7)) + +(test-case + "dict: compute the correct size with pointers" + (check-equal? (send (x:dict 'name (x:string #:length uint8) + 'age uint8 + 'ptr (x:pointer #:type uint8 #:dest-type (x:string #:length uint8))) + x:size + (mhash 'name "roxyb" 'age 21 'ptr "hello")) 14)) + +(test-case + "dict: get the correct size when no value is given" + (check-equal? (send (x:dict 'name (x:string 4) 'age uint8) x:size) 5)) + +(test-case + "dict: throw when getting non-fixed length size and no value is given" + (check-exn exn:fail:contract? (λ () (send (x:dict 'name (x:string #:length uint8) 'age uint8) x:size)))) + +(test-case + "dict: encode objects to buffers" + (parameterize ([current-input-port (open-input-bytes #"\x05roxyb\x15")]) + (check-equal? (decode (x:dict 'name (x:string #:length uint8) 'age uint8)) + (mhasheq 'name "roxyb" 'age 21)))) + +(test-case + "dict: support pre-encode hook" + (parameterize ([current-output-port (open-output-bytes)]) + (define struct (x:dict #:pre-encode (λ (val) + (hash-set! val 'nameLength (string-length (hash-ref val 'name))) + val) + 'nameLength uint8 + 'name (x:string (λ (this) (hash-ref this 'nameLength))) + 'age uint8)) + (encode struct (mhasheq 'name "roxyb" 'age 21)) + (check-equal? (get-output-bytes (current-output-port)) #"\x05roxyb\x15"))) + +(test-case + "dict: encode pointer data after structure" + (parameterize ([current-output-port (open-output-bytes)]) + (define struct (x:dict 'name (x:string #:length uint8) + 'age uint8 + 'ptr (x:pointer uint8 #:dest-type (x:string #:length uint8)))) + (encode struct (hasheq 'name "roxyb" 'age 21 'ptr "hello")) + (check-equal? (get-output-bytes (current-output-port)) #"\x05roxyb\x15\x08\x05hello"))) \ No newline at end of file diff --git a/xenomorph/xenomorph/test/enum-test.rkt b/xenomorph/xenomorph/test/enum-test.rkt new file mode 100644 index 00000000..af615b1e --- /dev/null +++ b/xenomorph/xenomorph/test/enum-test.rkt @@ -0,0 +1,68 @@ +#lang racket/base +(require rackunit + racket/class + "../number.rkt" + "../enum.rkt" + "../base.rkt") + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/Enum.coffee +|# + +(define e (x:enum #:type uint8 + #:values '("foo" "bar" "baz" #f))) + +(test-case + "enum: should error with invalid type" + (check-exn exn:fail:contract? (λ () (x:enum 42)))) + +(test-case + "enum: should error with invalid values" + (check-exn exn:fail:contract? (λ () (x:enum #:values 42)))) + +(test-case + "enum: should have the right size" + (check-equal? (send e x:size) 1)) + +(test-case + "enum: decode should decode" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 0 3 4))]) + (check-equal? (decode e) "bar") + (check-equal? (decode e) "baz") + (check-equal? (decode e) "foo") + (check-equal? (decode e) 3) + (check-equal? (decode e) 4))) + +(test-case + "enum: decode should decode with post-decode" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 0))]) + (define e2 (x:enum #:type uint8 + #:values '("foo" "bar" "baz") + #:post-decode (λ (val) "foobar"))) + (check-equal? (decode e2) "foobar") + (check-equal? (decode e2) "foobar") + (check-equal? (decode e2) "foobar"))) + +(test-case + "enum: encode should encode" + (parameterize ([current-output-port (open-output-bytes)]) + (encode e "bar") + (encode e "baz") + (encode e "foo") + (check-equal? (get-output-bytes (current-output-port)) (bytes 1 2 0)))) + +(test-case + "enum: encode should encode with pre-encode" + (parameterize ([current-output-port (open-output-bytes)]) + (define e2 (x:enum #:type uint8 + #:values '("foo" "bar" "baz") + #:pre-encode (λ (val) "foo"))) + (encode e2 "bar") + (encode e2 "baz") + (encode e2 "foo") + (check-equal? (get-output-bytes (current-output-port)) (bytes 0 0 0)))) + +(test-case + "enum: should throw on unknown option" + (check-exn exn:fail:contract? (λ () (encode e "unknown" (open-output-bytes))))) \ No newline at end of file diff --git a/xenomorph/xenomorph/test/list-test.rkt b/xenomorph/xenomorph/test/list-test.rkt new file mode 100644 index 00000000..c9786555 --- /dev/null +++ b/xenomorph/xenomorph/test/list-test.rkt @@ -0,0 +1,115 @@ +#lang racket/base +(require rackunit + racket/class + "../list.rkt" + "../dict.rkt" + "../number.rkt" + "../pointer.rkt" + "../base.rkt" + sugar/unstable/dict) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/Array.coffee +|# + +(test-case + "list: decode fixed length" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:list #:type uint8 #:length 4)) '(1 2 3 4)))) + +(test-case + "list: decode nested" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:list #:type (x:dict 'foo uint8) #:length 4)) + (list (mhasheq 'foo 1) + (mhasheq 'foo 2) + (mhasheq 'foo 3) + (mhasheq 'foo 4))))) + +(test-case + "list: decode with post-decode" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (define xa (x:list #:type uint8 #:length 4 #:post-decode (λ (val) (map (λ (x) (* 2 x)) val)))) + (check-equal? (decode xa) '(2 4 6 8)))) + +(test-case + "list: decode fixed number of bytes" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:list #:type uint16be #:length 4 #:count-bytes #t)) '(258 772)))) + +(test-case + "list: decode length from parent key" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:list #:type uint8 #:length (λ (p) (hash-ref p 'len))) (current-input-port) #:parent (mhash 'len 4)) '(1 2 3 4)))) + +(test-case + "list: decode byte count from parent key" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:list #:type uint16be #:length (λ (p) (hash-ref p 'len)) #:count-bytes #t) (current-input-port) #:parent (mhash 'len 4)) '(258 772)))) + +(test-case + "list: decode length as number before array" + (parameterize ([current-input-port (open-input-bytes (bytes 4 1 2 3 4 5))]) + (check-equal? (decode (x:list #:type uint8 #:length uint8)) '(1 2 3 4)))) + +(test-case + "list: decode byte count as number before array" + (parameterize ([current-input-port (open-input-bytes (bytes 4 1 2 3 4 5))]) + (check-equal? (decode (x:list #:type uint16be #:length uint8 #:count-bytes #t)) '(258 772)))) + +(test-case + "list: decode length from function" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:list #:type uint8 #:length (λ _ 4))) '(1 2 3 4)))) + +(test-case + "list: decode byte count from function" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:list #:type uint16be #:length (λ _ 4) #:count-bytes #t)) '(258 772)))) + +(test-case + "list: decode to the end of parent if no length given" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:list #:type uint8) (current-input-port) #:parent (mhash x:length-key 4 x:start-offset-key 0)) '(1 2 3 4)))) + +(test-case + "list: decode to the end of the stream if parent exists, but its length is 0" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:list #:type uint8) (current-input-port) #:parent (mhash x:length-key 0 x:start-offset-key 0)) '(1 2 3 4 5)))) + +(test-case + "list: decode to the end of the stream if no parent and length is given" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4))]) + (check-equal? (decode (x:list #:type uint8)) '(1 2 3 4 )))) + +(test-case + "list: use array length" + (check-equal? (send (x:list #:type uint8 #:length 10) x:size '(1 2 3 4)) 4)) + +(test-case + "list: add size of length field before string" + (check-equal? (send (x:list #:type uint8 #:length uint8) x:size '(1 2 3 4)) 5)) + +(test-case + "list: use defined length if no value given" + (check-equal? (send (x:list #:type uint8 #:length 10) x:size) 10)) + +(test-case + "list: encode using array length" + (check-equal? (encode (x:list #:type uint8 #:length 4) '(1 2 3 4) #f) (bytes 1 2 3 4))) + +(test-case + "list: encode with pre-encode" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (define xa (x:list #:type uint8 #:length 4 #:pre-encode (λ (val) (map (λ (x) (* 2 x)) val)))) + (check-equal? (encode xa '(1 2 3 4) #f) (bytes 2 4 6 8)))) + +(test-case + "list: encode length as number before array" + (check-equal? (encode (x:list #:type uint8 #:length uint8) '(1 2 3 4) #f) (bytes 4 1 2 3 4))) + +(test-case + "list: add pointers after array if length is encoded at start" + (check-equal? (encode (x:list #:type (x:pointer uint8 uint8) + #:length uint8) '(1 2 3 4) #f) (bytes 4 5 6 7 8 1 2 3 4))) \ No newline at end of file diff --git a/xenomorph/xenomorph/test/main.rkt b/xenomorph/xenomorph/test/main.rkt new file mode 100644 index 00000000..57861dd2 --- /dev/null +++ b/xenomorph/xenomorph/test/main.rkt @@ -0,0 +1,17 @@ +#lang racket/base + +(require "bitfield-test.rkt" + "bytes-test.rkt" + "dict-test.rkt" + "enum-test.rkt" + "list-test.rkt" + "number-test.rkt" + "optional-test.rkt" + "pointer-test.rkt" + "reserved-test.rkt" + "stream-test.rkt" + "string-test.rkt" + "symbol-test.rkt" + "vector-test.rkt" + "versioned-dict-test.rkt" + "api-test.rkt") diff --git a/xenomorph/xenomorph/test/number-test.rkt b/xenomorph/xenomorph/test/number-test.rkt new file mode 100644 index 00000000..09d0d9af --- /dev/null +++ b/xenomorph/xenomorph/test/number-test.rkt @@ -0,0 +1,212 @@ +#lang racket/base +(require rackunit + racket/class + "../number.rkt" + "../base.rkt") + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/Number.coffee +|# + +(test-case + "number: uint8: decode, size, encode" + (parameterize ([current-input-port (open-input-bytes (bytes #xab #xff))]) + (check-equal? (decode uint8) #xab) + (check-equal? (decode uint8) #xff)) + (check-equal? (send uint8 x:size) 1) + (let ([port (open-output-bytes)]) + (encode uint8 #xab port) + (encode uint8 #xff port) + (check-equal? (get-output-bytes port) (bytes #xab #xff)))) + +(test-case + "number: uint8: decode with post-decode, size, encode with pre-encode" + (define myuint8 (x:int 1 #:signed #f + #:post-decode (λ (val) #xdeadbeef) + #:pre-encode (λ (val) #xcc))) + (parameterize ([current-input-port (open-input-bytes (bytes #xab #xff))]) + (check-equal? (decode myuint8) #xdeadbeef) + (check-equal? (decode myuint8) #xdeadbeef)) + (check-equal? (send myuint8 x:size) 1) + (let ([port (open-output-bytes)]) + (encode myuint8 #xab port) + (encode myuint8 #xff port) + (check-equal? (get-output-bytes port) (bytes #xcc #xcc)))) + +(test-case + "number: uint16 is the same endianness as the platform" + (check-equal? (decode uint16 (bytes 0 1)) + (decode (if (system-big-endian?) uint16be uint16le) (bytes 0 1)))) + +(test-case + "number: uint16be: decode, size, encode" + (check-equal? (decode uint16be (open-input-bytes (bytes #xab #xff))) #xabff) + (check-equal? (send uint16be x:size) 2) + (check-equal? (encode uint16be #xabff #f) (bytes #xab #xff))) + +(test-case + "number: uint16le: decode, size, encode" + (check-equal? (decode uint16le (open-input-bytes (bytes #xff #xab))) #xabff) + (check-equal? (send uint16le x:size) 2) + (check-equal? (encode uint16le #xabff #f) (bytes #xff #xab))) + +(test-case + "number: uint24 is the same endianness as the platform" + (check-equal? (decode uint24 (bytes 0 1 2)) + (decode (if (system-big-endian?) uint24be uint24le) (bytes 0 1 2)))) +(test-case + "number: uint24be: decode, size, encode" + (check-equal? (decode uint24be (open-input-bytes (bytes #xff #xab #x24))) #xffab24) + (check-equal? (send uint24be x:size) 3) + (check-equal? (encode uint24be #xffab24 #f) (bytes #xff #xab #x24))) + +(test-case + "number: uint24le: decode, size, encode" + (check-equal? (decode uint24le (open-input-bytes (bytes #x24 #xab #xff))) #xffab24) + (check-equal? (send uint24le x:size) 3) + (check-equal? (encode uint24le #xffab24 #f) (bytes #x24 #xab #xff))) + +(test-case + "number: uint32 is the same endianness as the platform" + (check-equal? (decode uint32 (bytes 0 1 2 3)) + (decode (if (system-big-endian?) uint32be uint32le) (bytes 0 1 2 3)))) +(test-case + "number: uint32be: decode, size, encode" + (check-equal? (decode uint32be (open-input-bytes (bytes #xff #xab #x24 #xbf))) #xffab24bf) + (check-equal? (send uint32be x:size) 4) + (check-equal? (encode uint32be #xffab24bf #f) (bytes #xff #xab #x24 #xbf))) + +(test-case + "number: uint32le: decode, size, encode" + (check-equal? (decode uint32le (open-input-bytes (bytes #xbf #x24 #xab #xff))) #xffab24bf) + (check-equal? (send uint32le x:size) 4) + (check-equal? (encode uint32le #xffab24bf #f) (bytes #xbf #x24 #xab #xff))) + +(test-case + "number: int8: decode, size, encode" + (let ([port (open-input-bytes (bytes #x7f #xff))]) + (check-equal? (decode int8 port) 127) + (check-equal? (decode int8 port) -1)) + (check-equal? (send int8 x:size) 1) + (let ([port (open-output-bytes)]) + (encode int8 127 port) + (encode int8 -1 port) + (check-equal? (get-output-bytes port) (bytes #x7f #xff)))) + +(test-case + "number: int32 is the same endianness as the platform" + (check-equal? (decode int16 (bytes 0 1)) + (decode (if (system-big-endian?) int16be int16le) (bytes 0 1)))) +(test-case + "number: int16be: decode, size, encode" + (let ([port (open-input-bytes (bytes #xff #xab))]) + (check-equal? (decode int16be port) -85)) + (check-equal? (send int16be x:size) 2) + (let ([port (open-output-bytes)]) + (encode int16be -85 port) + (check-equal? (get-output-bytes port) (bytes #xff #xab)))) + +(test-case + "number: int16le: decode, size, encode" + (check-equal? (decode int16le (open-input-bytes (bytes #xab #xff))) -85) + (check-equal? (send int16le x:size) 2) + (check-equal? (encode int16le -85 #f) (bytes #xab #xff))) + +(test-case + "number: int24 is the same endianness as the platform" + (check-equal? (decode int24 (bytes 0 1 2)) + (decode (if (system-big-endian?) int24be int24le) (bytes 0 1 2)))) +(test-case + "number: int24be: decode, size, encode" + (check-equal? (decode int24be (open-input-bytes (bytes #xff #xab #x24))) -21724) + (check-equal? (send int24be x:size) 3) + (check-equal? (encode int24be -21724 #f) (bytes #xff #xab #x24))) + +(test-case + "number: int24le: decode, size, encode" + (check-equal? (decode int24le (open-input-bytes (bytes #x24 #xab #xff))) -21724) + (check-equal? (send int24le x:size) 3) + (check-equal? (encode int24le -21724 #f) (bytes #x24 #xab #xff))) +(test-case + "number: int32 is the same endianness as the platform" + (check-equal? (decode int32 (bytes 0 1 2 3)) + (decode (if (system-big-endian?) int32be int32le) (bytes 0 1 2 3)))) + +(test-case + "number: int32be: decode, size, encode" + (check-equal? (decode int32be (open-input-bytes (bytes #xff #xab #x24 #xbf))) -5561153) + (check-equal? (send int32be x:size) 4) + (check-equal? (encode int32be -5561153 #f) (bytes #xff #xab #x24 #xbf))) + +(test-case + "number: int32le: decode, size, encode" + (check-equal? (decode int32le (open-input-bytes (bytes #xbf #x24 #xab #xff))) -5561153) + (check-equal? (send int32le x:size) 4) + (check-equal? (encode int32le -5561153 #f) (bytes #xbf #x24 #xab #xff))) + +(test-case + "number: float is the same endianness as the platform" + (check-equal? (decode float (bytes 0 1 2 3)) + (decode (if (system-big-endian?) floatbe floatle) (bytes 0 1 2 3)))) +(test-case + "number: floatbe: decode, size, encode" + (check-= (decode floatbe (open-input-bytes (bytes #x43 #x7a #x8c #xcd))) 250.55 0.01) + (check-equal? (send floatbe x:size) 4) + (check-equal? (encode floatbe 250.55 #f) (bytes #x43 #x7a #x8c #xcd))) + +(test-case + "number: floatle: decode, size, encode" + (check-= (decode floatle (open-input-bytes (bytes #xcd #x8c #x7a #x43))) 250.55 0.01) + (check-equal? (send floatle x:size) 4) + (check-equal? (encode floatle 250.55 #f) (bytes #xcd #x8c #x7a #x43))) + +(test-case + "number: double is the same endianness as the platform" + (check-equal? (decode double (bytes 0 1 2 3 4 5 6 7)) + (decode (if (system-big-endian?) doublebe doublele) (bytes 0 1 2 3 4 5 6 7)))) +(test-case + "number: doublebe: decode, size, encode" + (check-equal? (decode doublebe (open-input-bytes (bytes #x40 #x93 #x4a #x3d #x70 #xa3 #xd7 #x0a))) 1234.56) + (check-equal? (send doublebe x:size) 8) + (check-equal? (encode doublebe 1234.56 #f) (bytes #x40 #x93 #x4a #x3d #x70 #xa3 #xd7 #x0a))) + +(test-case + "number: doublele: decode, size, encode" + (check-equal? (decode doublele (open-input-bytes (bytes #x0a #xd7 #xa3 #x70 #x3d #x4a #x93 #x40))) 1234.56) + (check-equal? (send doublele x:size) 8) + (check-equal? (encode doublele 1234.56 #f) (bytes #x0a #xd7 #xa3 #x70 #x3d #x4a #x93 #x40))) + +(test-case + "number: fixed16 is the same endianness as the platform" + (check-equal? (decode fixed16 (bytes 0 1)) + (decode (if (system-big-endian?) fixed16be fixed16le) (bytes 0 1)))) + +(test-case + "number: fixed16be: decode, size, encode" + (check-= (decode fixed16be (open-input-bytes (bytes #x19 #x57))) 25.34 0.01) + (check-equal? (send fixed16be x:size) 2) + (check-equal? (encode fixed16be 25.34 #f) (bytes #x19 #x57))) + +(test-case + "number: fixed16le: decode, size, encode" + (check-= (decode fixed16le (open-input-bytes (bytes #x57 #x19))) 25.34 0.01) + (check-equal? (send fixed16le x:size) 2) + (check-equal? (encode fixed16le 25.34 #f) (bytes #x57 #x19))) + +(test-case + "number: fixed32 is the same endianness as the platform" + (check-equal? (decode fixed32 (bytes 0 1 2 3)) + (decode (if (system-big-endian?) fixed32be fixed32le) (bytes 0 1 2 3)))) + +(test-case + "number: fixed32be: decode, size, encode" + (check-= (decode fixed32be (open-input-bytes (bytes #x00 #xfa #x8c #xcc))) 250.55 0.01) + (check-equal? (send fixed32be x:size) 4) + (check-equal? (encode fixed32be 250.55 #f) (bytes #x00 #xfa #x8c #xcc))) + +(test-case + "number: fixed32le: decode, size, encode" + (check-= (decode fixed32le (open-input-bytes (bytes #xcc #x8c #xfa #x00))) 250.55 0.01) + (check-equal? (send fixed32le x:size) 4) + (check-equal? (encode fixed32le 250.55 #f) (bytes #xcc #x8c #xfa #x00))) \ No newline at end of file diff --git a/xenomorph/xenomorph/test/optional-test.rkt b/xenomorph/xenomorph/test/optional-test.rkt new file mode 100644 index 00000000..3e57c17f --- /dev/null +++ b/xenomorph/xenomorph/test/optional-test.rkt @@ -0,0 +1,116 @@ +#lang racket/base +(require rackunit + racket/class + "../base.rkt" + "../number.rkt" + "../optional.rkt" + "../base.rkt") + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/Optional.coffee +|# + +(test-case + "optional: decode should not decode when condition is falsy" + (parameterize ([current-input-port (open-input-bytes (bytes 0))]) + (define optional (x:optional uint8 #:condition #f)) + (check-equal? (decode optional) (void)) + (check-equal? (pos (current-input-port)) 0))) + +(test-case + "optional: decode with post-decode" + (parameterize ([current-input-port (open-input-bytes (bytes 0))]) + (define optional (x:optional uint8 #:condition #f #:post-decode (λ (val) 42))) + (check-equal? (decode optional) 42) + (check-equal? (pos (current-input-port)) 0))) + +(test-case + "optional: decode should not decode when condition is a function and falsy" + (parameterize ([current-input-port (open-input-bytes (bytes 0))]) + (define optional (x:optional uint8 #:condition (λ _ #f))) + (check-equal? (decode optional) (void)) + (check-equal? (pos (current-input-port)) 0))) + +(test-case + "optional: decode should decode when condition is omitted" + (parameterize ([current-input-port (open-input-bytes (bytes 0))]) + (define optional (x:optional uint8)) + (check-not-equal? (decode optional) (void)) + (check-equal? (pos (current-input-port)) 1))) + +(test-case + "optional: decode should decode when condition is truthy" + (parameterize ([current-input-port (open-input-bytes (bytes 0))]) + (define optional (x:optional uint8 #:condition #t)) + (check-not-equal? (decode optional) (void)) + (check-equal? (pos (current-input-port)) 1))) + +(test-case + "optional: decode should decode when condition is a function and truthy" + (parameterize ([current-input-port (open-input-bytes (bytes 0))]) + (define optional (x:optional uint8 #:condition (λ _ #t))) + (check-not-equal? (decode optional) (void)) + (check-equal? (pos (current-input-port)) 1))) + +(test-case + "optional: size" + (check-equal? (send (x:optional uint8 #:condition #f) x:size) 0)) + +(test-case + "optional: size should return 0 when condition is a function and falsy" + (check-equal? (send (x:optional uint8 #:condition (λ _ #f)) x:size) 0)) + +(test-case + "optional: size should return given type size when condition is omitted" + (check-equal? (send (x:optional uint8) x:size) 1)) + +(test-case + "optional: size should return given type size when condition is truthy" + (check-equal? (send (x:optional uint8 #:condition #t) x:size) 1)) + +(test-case + "optional: size should return given type size when condition is a function and truthy" + (check-equal? (send (x:optional uint8 #:condition (λ _ #t)) x:size) 1)) + +(test-case + "optional: encode should not encode when condition is falsy" + (parameterize ([current-output-port (open-output-bytes)]) + (define optional (x:optional uint8 #:condition #f)) + (encode optional 128) + (check-equal? (get-output-bytes (current-output-port)) (bytes)))) + +(test-case + "optional: encode with pre-encode" + (parameterize ([current-output-port (open-output-bytes)]) + (define optional (x:optional uint8 #:pre-encode (λ (val) 42))) + (encode optional 128) + (check-equal? (get-output-bytes (current-output-port)) (bytes 42)))) + +(test-case + "optional: encode should not encode when condition is a function and falsy" + (parameterize ([current-output-port (open-output-bytes)]) + (define optional (x:optional uint8 #:condition (λ _ #f))) + (encode optional 128) + (check-equal? (get-output-bytes (current-output-port)) (bytes)))) + +(test-case + "optional: encode should encode when condition is omitted" + (parameterize ([current-output-port (open-output-bytes)]) + (define optional (x:optional uint8)) + (encode optional 128) + (check-equal? (get-output-bytes (current-output-port)) (bytes 128)))) + +(test-case + "optional: encode should encode when condition is truthy" + (parameterize ([current-output-port (open-output-bytes)]) + (define optional (x:optional uint8 #:condition #t)) + (encode optional 128) + (check-equal? (get-output-bytes (current-output-port)) (bytes 128)))) + +(test-case + "optional: encode should encode when condition is a function and truthy" + (parameterize ([current-output-port (open-output-bytes)]) + (define optional (x:optional uint8 #:condition (λ _ #t))) + (encode optional 128) + (check-equal? (get-output-bytes (current-output-port)) (bytes 128)))) \ No newline at end of file diff --git a/xenomorph/xenomorph/test/pointer-test.rkt b/xenomorph/xenomorph/test/pointer-test.rkt new file mode 100644 index 00000000..26bf7340 --- /dev/null +++ b/xenomorph/xenomorph/test/pointer-test.rkt @@ -0,0 +1,181 @@ +#lang racket/base +(require rackunit + racket/dict + racket/class + "../base.rkt" + "../pointer.rkt" + "../number.rkt" + "../dict.rkt" + "../base.rkt" + racket/promise + sugar/unstable/dict) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/Pointer.coffee +|# + +(test-case + "pointer: decode should handle null pointers" + (parameterize ([current-input-port (open-input-bytes (bytes 0))]) + (check-false (decode (x:pointer) #:parent (mhash x:start-offset-key 50))))) + +(test-case + "pointer: decode should use local offsets from start of parent by default" + (parameterize ([current-input-port (open-input-bytes (bytes 1 53))]) + (check-equal? (decode (x:pointer uint8) #:parent (mhash x:start-offset-key 0)) 53))) + +(test-case + "pointer: decode should support immediate offsets" + (parameterize ([current-input-port (open-input-bytes (bytes 1 53))]) + (check-equal? (decode (x:pointer uint8 #:relative-to 'immediate)) 53))) + +(test-case + "pointer: decode should support offsets relative to the parent" + (parameterize ([current-input-port (open-input-bytes (bytes 0 0 1 53))]) + (pos (current-input-port) 2) + (check-equal? (decode (x:pointer uint8 #:relative-to 'parent) #:parent (mhash x:parent-key (mhash x:start-offset-key 2))) 53))) + +(test-case + "pointer: decode should support global offsets" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 4 0 0 0 53))]) + (pos (current-input-port) 2) + (check-equal? (decode (x:pointer #:relative-to 'global) #:parent (mhash x:parent-key (mhash x:parent-key (mhash x:start-offset-key 2)))) + 53))) + +;; skipping +;; 'should support offsets relative to a property on the parent' + +(test-case + "pointer: decode should support returning pointer if there is no decode type" + (parameterize ([current-input-port (open-input-bytes (bytes 4))]) + (check-equal? (decode (x:pointer uint8 'void) #:parent (mhash x:start-offset-key 0)) 4))) + +(test-case + "pointer: decode should support decoding pointers lazily" + (parameterize ([current-input-port (open-input-bytes (bytes 1 53))]) + (define res (decode (x:dict 'ptr (x:pointer uint8 #:lazy #t)))) + (check-true (promise? (hash-ref res 'ptr))) + (check-equal? (force (hash-ref res 'ptr)) 53))) + +(test-case + "pointer: size" + (let ([parent (mhash x:pointer-size-key 0)]) + (check-equal? (send (x:pointer uint8) x:size 10 parent) 1) + (check-equal? (hash-ref parent x:pointer-size-key) 1))) + +(test-case + "pointer: size should add to immediate pointerSize" + (let ([parent (mhash x:pointer-size-key 0)]) + (check-equal? (send (x:pointer uint8 #:relative-to 'immediate) x:size 10 parent) 1) + (check-equal? (hash-ref parent x:pointer-size-key) 1))) + +(test-case + "pointer: size should add to parent pointerSize" + (let ([parent (mhash x:parent-key (mhash x:pointer-size-key 0))]) + (check-equal? (send (x:pointer uint8 #:relative-to 'parent) x:size 10 parent) 1) + (check-equal? (hash-ref* parent x:parent-key x:pointer-size-key) 1))) + +(test-case + "pointer: size should add to global pointerSize" + (let ([parent (mhash x:parent-key (mhash x:parent-key (mhash x:parent-key (mhash x:pointer-size-key 0))))]) + (check-equal? (send (x:pointer uint8 #:relative-to 'global) x:size 10 parent) 1) + (check-equal? (hash-ref* parent x:parent-key x:parent-key x:parent-key x:pointer-size-key) 1))) + +(test-case + "pointer: size should handle void pointers" + (let ([parent (mhash x:pointer-size-key 0)]) + (check-equal? (send (x:pointer uint8 'void) x:size (x:void-pointer uint8 50) parent) 1) + (check-equal? (hash-ref parent x:pointer-size-key) 1))) + +(test-case + "pointer: size should throw if no type and not a void pointer" + (let ([parent (mhash x:pointer-size-key 0)]) + (check-exn exn:fail:contract? (λ () (send (x:pointer uint8 'void) x:size 30 parent))))) + +(test-case + "pointer: size should return a fixed size without a value" + (check-equal? (send (x:pointer uint8) x:size) 1)) + +(test-case + "pointer: encode should handle null pointers" + (parameterize ([current-output-port (open-output-bytes)]) + (define parent (mhash x:pointer-size-key 0 + x:start-offset-key 0 + x:pointer-offset-key 0 + x:pointers-key null)) + (encode (x:pointer uint8) #f #:parent parent) + (check-equal? (hash-ref parent x:pointer-size-key) 0) + (check-equal? (get-output-bytes (current-output-port)) (bytes 0)))) + +(test-case + "pointer: encode should handle local offsets" + (parameterize ([current-output-port (open-output-bytes)]) + (define parent (mhash x:pointer-size-key 0 + x:start-offset-key 0 + x:pointer-offset-key 1 + x:pointers-key null)) + (encode (x:pointer uint8) 10 #:parent parent) + (check-equal? (hash-ref parent x:pointer-offset-key) 2) + (check-equal? (hash-ref parent x:pointers-key) (list (x:ptr uint8 10 parent))) + (check-equal? (get-output-bytes (current-output-port)) (bytes 1)))) + +(test-case + "pointer: encode should handle immediate offsets" + (parameterize ([current-output-port (open-output-bytes)]) + (define parent (mhash x:pointer-size-key 0 + x:start-offset-key 0 + x:pointer-offset-key 1 + x:pointers-key null)) + (encode (x:pointer uint8 #:relative-to 'immediate) 10 #:parent parent) + (check-equal? (hash-ref parent x:pointer-offset-key) 2) + (check-equal? (hash-ref parent x:pointers-key) (list (x:ptr uint8 10 parent))) + (check-equal? (get-output-bytes (current-output-port)) (bytes 0)))) + +(test-case + "pointer: encode should handle offsets relative to parent" + (parameterize ([current-output-port (open-output-bytes)]) + (define parent (mhash x:parent-key (mhash x:pointer-size-key 0 + x:start-offset-key 3 + x:pointer-offset-key 5 + x:pointers-key null))) + (encode (x:pointer uint8 #:relative-to 'parent) 10 #:parent parent) + (check-equal? (hash-ref* parent x:parent-key x:pointer-offset-key) 6) + (check-equal? (hash-ref* parent x:parent-key x:pointers-key) (list (x:ptr uint8 10 parent))) + (check-equal? (get-output-bytes (current-output-port)) (bytes 2)))) + +(test-case + "pointer: encode should handle global offsets" + (parameterize ([current-output-port (open-output-bytes)]) + (define parent (mhash x:parent-key + (mhash x:parent-key + (mhash x:parent-key (mhash x:pointer-size-key 0 + x:start-offset-key 3 + x:pointer-offset-key 5 + x:pointers-key null))))) + (encode (x:pointer uint8 #:relative-to 'global) 10 #:parent parent) + (check-equal? (hash-ref* parent x:parent-key x:parent-key x:parent-key x:pointer-offset-key) 6) + (check-equal? (hash-ref* parent x:parent-key x:parent-key x:parent-key x:pointers-key) + (list (x:ptr uint8 10 parent))) + (check-equal? (get-output-bytes (current-output-port)) (bytes 5)))) + +(test-case + "pointer: encode should support void pointers" + (parameterize ([current-output-port (open-output-bytes)]) + (define parent (mhash x:pointer-size-key 0 + x:start-offset-key 0 + x:pointer-offset-key 1 + x:pointers-key null)) + (encode (x:pointer uint8 'void) (x:void-pointer uint8 55) #:parent parent) + (check-equal? (hash-ref parent x:pointer-offset-key) 2) + (check-equal? (hash-ref parent x:pointers-key) (list (x:ptr uint8 55 parent))) + (check-equal? (get-output-bytes (current-output-port)) (bytes 1)))) + +(test-case + "pointer: encode should throw if not a void pointer instance" + (parameterize ([current-output-port (open-output-bytes)]) + (define parent (mhash x:pointer-size-key 0 + x:start-offset-key 0 + x:pointer-offset-key 1 + x:pointers-key null)) + (check-exn exn:fail:contract? (λ () (encode (x:pointer uint8 'void) 44 #:parent parent))))) diff --git a/xenomorph/xenomorph/test/reserved-test.rkt b/xenomorph/xenomorph/test/reserved-test.rkt new file mode 100644 index 00000000..5700f69b --- /dev/null +++ b/xenomorph/xenomorph/test/reserved-test.rkt @@ -0,0 +1,48 @@ +#lang racket/base +(require rackunit + racket/class + "../number.rkt" + "../base.rkt" + "../reserved.rkt" + "../base.rkt") + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/Reserved.coffee +|# + +(test-case + "reserved: size should have a default count of 1" + (check-equal? (send (x:reserved uint8) x:size) 1)) + +(test-case + "reserved: size should allow custom counts and types" + (check-equal? (send (x:reserved uint16be 10) x:size) 20)) + +(test-case + "reserved: should decode" + (parameterize ([current-input-port (open-input-bytes (bytes 0 0))]) + (define reserved (x:reserved uint16be)) + (check-equal? (decode reserved) (void)) + (check-equal? (pos (current-input-port)) 2))) + +(test-case + "reserved: should decode with post-decode" + (parameterize ([current-input-port (open-input-bytes (bytes 0 0))]) + (define reserved (x:reserved uint16be #:post-decode (λ (val) 42))) + (check-equal? (decode reserved) 42) + (check-equal? (pos (current-input-port)) 2))) + +(test-case + "reserved: should encode" + (parameterize ([current-output-port (open-output-bytes)]) + (define reserved (x:reserved uint16be)) + (encode reserved #f) + (check-equal? (get-output-bytes (current-output-port)) (bytes 0 0)))) + +(test-case + "reserved: should encode with pre-encode" + (parameterize ([current-output-port (open-output-bytes)]) + (define reserved (x:reserved uint32be #:pre-encode (λ (val) 42))) + (encode reserved #f) + (check-equal? (get-output-bytes (current-output-port)) (bytes 0 0 0 0)))) \ No newline at end of file diff --git a/xenomorph/xenomorph/test/stream-test.rkt b/xenomorph/xenomorph/test/stream-test.rkt new file mode 100644 index 00000000..0f6aaee6 --- /dev/null +++ b/xenomorph/xenomorph/test/stream-test.rkt @@ -0,0 +1,74 @@ +#lang racket/base +(require rackunit + racket/class + racket/stream + "../list.rkt" + "../base.rkt" + "../number.rkt" + "../stream.rkt" + "../base.rkt") + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/LazyArray.coffee +|# + +(test-case + "stream: decode should decode items lazily" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (define xla (x:stream uint8 4)) + (define arr (decode xla)) + (check-equal? (stream-length arr) 4) + (check-equal? (pos (current-input-port)) 4) + (check-equal? (stream-ref arr 0) 1) + (check-equal? (stream-ref arr 1) 2) + (check-equal? (stream-ref arr 2) 3) + (check-equal? (stream-ref arr 3) 4))) + +(test-case + "stream: decode should decode items lazily with post-decode" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (define xla (x:stream uint8 4 #:post-decode (λ (str) (stream-map (λ (i) (* 2 i)) str)))) + (define arr (decode xla)) + (check-false (x:list? arr)) + (check-equal? (stream-length arr) 4) + (check-equal? (pos (current-input-port)) 4) + (check-equal? (stream-ref arr 0) 2) + (check-equal? (stream-ref arr 1) 4) + (check-equal? (stream-ref arr 2) 6) + (check-equal? (stream-ref arr 3) 8))) + +(test-case + "stream: should be able to convert to an array" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (define xla (x:stream uint8 4)) + (define arr (decode xla)) + (check-equal? (stream->list arr) '(1 2 3 4)))) + +(test-case + "stream: decode should decode length as number before array" + (parameterize ([current-input-port (open-input-bytes (bytes 4 1 2 3 4 5))]) + (define xla (x:stream uint8 uint8)) + (define arr (decode xla)) + (check-equal? (stream->list arr) '(1 2 3 4)))) + +(test-case + "stream: size should work with streams" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (define xla (x:stream uint8 4)) + (define arr (decode xla)) + (check-equal? (send xla x:size arr) 4))) + +(test-case + "stream: encode should work with streams" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (define xla (x:stream uint8 4)) + (define arr (decode xla)) + (check-equal? (encode xla arr #f) (bytes 1 2 3 4)))) + +(test-case + "stream: encode should work with streams with pre-encode" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (define xla (x:stream uint8 4 #:pre-encode (λ (str) (stream-map (λ (val) (* 2 val)) str)))) + (define arr (decode xla)) + (check-equal? (encode xla arr #f) (bytes 2 4 6 8)))) diff --git a/xenomorph/xenomorph/test/string-test.rkt b/xenomorph/xenomorph/test/string-test.rkt new file mode 100644 index 00000000..7d3e782e --- /dev/null +++ b/xenomorph/xenomorph/test/string-test.rkt @@ -0,0 +1,124 @@ +#lang racket/base +(require rackunit + racket/class + "../base.rkt" + "../string.rkt" + "../number.rkt" + "../base.rkt" + sugar/unstable/dict) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/String.coffee +|# + +(test-case + "string: decode fixed length" + (parameterize ([current-input-port (open-input-bytes #"testing")]) + (check-equal? (decode (x:string 7)) "testing"))) + +(test-case + "string: decode fixed length with post-decode" + (parameterize ([current-input-port (open-input-bytes #"testing")]) + (define xs (x:string 7 #:post-decode (λ (val) "ring a ding"))) + (check-equal? (decode xs) "ring a ding"))) + +(test-case + "string: decode length from parent key" + (parameterize ([current-input-port (open-input-bytes #"testing")]) + (check-equal? (decode (x:string (λ (p) (hash-ref p 'len))) (current-input-port) #:parent (mhash 'len 7)) "testing"))) + +(test-case + "string: decode length as number before string" + (parameterize ([current-input-port (open-input-bytes #"\x07testing")]) + (check-equal? (decode (x:string uint8) (current-input-port) #:parent (mhash 'len 7)) "testing"))) + +(test-case + "string: decode utf8" + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻"))]) + (check-equal? (decode (x:string 4 'utf8)) "🍻"))) + +(test-case + "string: decode encoding computed from function" + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻"))]) + (check-equal? (decode (x:string 4 (λ _ 'utf8))) "🍻"))) + +(test-case + "string: decode null-terminated string and read past terminator" + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻\x00"))]) + (check-equal? (decode (x:string #f 'utf8)) "🍻") + (check-equal? (pos (current-input-port)) 5))) + +(test-case + "string: decode remainder of buffer when null-byte missing" + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻"))]) + (check-equal? (decode (x:string #f 'utf8)) "🍻"))) + +(test-case + "string: size should use string length" + (check-equal? (send (x:string 7) x:size "testing") 7)) + +(test-case + "string: size should use correct encoding" + (check-equal? (send (x:string 10 'utf8) x:size "🍻") 4)) + +(test-case + "string: size should use encoding from function" + (check-equal? (send (x:string 10 (λ _ 'utf8)) x:size "🍻") 4)) + +(test-case + "string: should add size of length field before string" + (check-equal? (send (x:string uint8 'utf8) x:size "🍻") 5)) + +; todo: it "should work with utf16be encoding" + +(test-case + "string: size should take null-byte into account" + (check-equal? (send (x:string #f 'utf8) x:size "🍻") 5)) + +(test-case + "string: size should use defined length if no value given" + (check-equal? (send (x:string 10) x:size) 10)) + +(test-case + "string: encode using string length" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (x:string 7) "testing") + (check-equal? (get-output-bytes (current-output-port)) #"testing"))) + +(test-case + "string: encode using string length and pre-encode" + (parameterize ([current-output-port (open-output-bytes)]) + (define xs (x:string 7 #:pre-encode (λ (val) (list->string (reverse (string->list val)))))) + (encode xs "testing") + (check-equal? (get-output-bytes (current-output-port)) #"gnitset"))) + +(test-case + "string: encode length as number before string" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (x:string uint8) "testing") + (check-equal? (get-output-bytes (current-output-port)) #"\x07testing"))) + +(test-case + "string: encode length as number before string utf8" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (x:string uint8 'utf8) "testing 😜") + (check-equal? (get-output-bytes (current-output-port)) (string->bytes/utf-8 "\x0ctesting 😜")))) + +(test-case + "string: encode utf8" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (x:string 4 'utf8) "🍻" ) + (check-equal? (get-output-bytes (current-output-port)) (string->bytes/utf-8 "🍻")))) + +(test-case + "string: encode encoding computed from function" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (x:string 4 (λ _ 'utf8)) "🍻") + (check-equal? (get-output-bytes (current-output-port)) (string->bytes/utf-8 "🍻")))) + +(test-case + "string: encode null-terminated string" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (x:string #f 'utf8) "🍻" ) + (check-equal? (get-output-bytes (current-output-port)) (string->bytes/utf-8 "🍻\x00")))) \ No newline at end of file diff --git a/xenomorph/xenomorph/test/symbol-test.rkt b/xenomorph/xenomorph/test/symbol-test.rkt new file mode 100644 index 00000000..5f98298c --- /dev/null +++ b/xenomorph/xenomorph/test/symbol-test.rkt @@ -0,0 +1,120 @@ +#lang racket/base +(require rackunit + racket/class + "../base.rkt" + "../symbol.rkt" + "../number.rkt" + "../base.rkt" + sugar/unstable/dict) + + +(test-case + "symbol: decode fixed length" + (parameterize ([current-input-port (open-input-bytes #"testing")]) + (check-equal? (decode (x:symbol 7)) 'testing))) + +(test-case + "symbol: decode fixed length with post-decode" + (parameterize ([current-input-port (open-input-bytes #"testing")]) + (define xs (x:symbol 7 #:post-decode (λ (val) '|ring a ding|))) + (check-equal? (decode xs) '|ring a ding|))) + +(test-case + "symbol: decode length from parent key" + (parameterize ([current-input-port (open-input-bytes #"testing")]) + (check-equal? (decode (x:symbol (λ (p) (hash-ref p 'len))) (current-input-port) #:parent (mhash 'len 7)) 'testing))) + +(test-case + "symbol: decode length as number before symbol" + (parameterize ([current-input-port (open-input-bytes #"\x07testing")]) + (check-equal? (decode (x:symbol uint8) (current-input-port) #:parent (mhash 'len 7)) 'testing))) + +(test-case + "symbol: decode utf8" + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻"))]) + (check-equal? (decode (x:symbol 4 'utf8)) '🍻))) + +(test-case + "symbol: decode encoding computed from function" + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻"))]) + (check-equal? (decode (x:symbol 4 (λ _ 'utf8))) '🍻))) + +(test-case + "symbol: decode null-terminated symbol and read past terminator" + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻\x00"))]) + (check-equal? (decode (x:symbol #f 'utf8)) '🍻) + (check-equal? (pos (current-input-port)) 5))) + +(test-case + "symbol: decode remainder of buffer when null-byte missing" + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻"))]) + (check-equal? (decode (x:symbol #f 'utf8)) '🍻))) + +(test-case + "symbol: size should use symbol length" + (check-equal? (send (x:symbol 7) x:size 'testing) 7)) + +(test-case + "symbol: size should use correct encoding" + (check-equal? (send (x:symbol 10 'utf8) x:size '🍻) 4)) + +(test-case + "symbol: size should use encoding from function" + (check-equal? (send (x:symbol 10 (λ _ 'utf8)) x:size '🍻) 4)) + +(test-case + "symbol: should add size of length field before symbol" + (check-equal? (send (x:symbol uint8 'utf8) x:size '🍻) 5)) + +; todo: it "should work with utf16be encoding" + +(test-case + "symbol: size should take null-byte into account" + (check-equal? (send (x:symbol #f 'utf8) x:size '🍻) 5)) + +(test-case + "symbol: size should use defined length if no value given" + (check-equal? (send (x:symbol 10) x:size) 10)) + +(test-case + "symbol: encode using symbol length" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (x:symbol 7) 'testing) + (check-equal? (get-output-bytes (current-output-port)) #"testing"))) + +(test-case + "symbol: encode using symbol length and pre-encode" + (parameterize ([current-output-port (open-output-bytes)]) + (define xs (x:symbol 7 #:pre-encode (λ (val) (string->symbol (list->string (reverse (string->list (symbol->string val)))))))) + (encode xs 'testing) + (check-equal? (get-output-bytes (current-output-port)) #"gnitset"))) + +(test-case + "symbol: encode length as number before symbol" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (x:symbol uint8) 'testing) + (check-equal? (get-output-bytes (current-output-port)) #"\x07testing"))) + +(test-case + "symbol: encode length as number before symbol utf8" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (x:symbol uint8 'utf8) "testing 😜") + (check-equal? (get-output-bytes (current-output-port)) (string->bytes/utf-8 "\x0ctesting 😜")))) + +(test-case + "symbol: encode utf8" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (x:symbol 4 'utf8) '🍻 ) + (check-equal? (get-output-bytes (current-output-port)) (string->bytes/utf-8 "🍻")))) + +(test-case + "symbol: encode encoding computed from function" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (x:symbol 4 (λ _ 'utf8)) '🍻) + (check-equal? (get-output-bytes (current-output-port)) (string->bytes/utf-8 "🍻")))) + +(test-case + "symbol: encode null-terminated symbol" + (parameterize ([current-output-port (open-output-bytes)]) + (encode (x:symbol #f 'utf8) '🍻 ) + (check-equal? (get-output-bytes (current-output-port)) (string->bytes/utf-8 "🍻\x00")))) \ No newline at end of file diff --git a/xenomorph/xenomorph/test/vector-test.rkt b/xenomorph/xenomorph/test/vector-test.rkt new file mode 100644 index 00000000..6b7b9592 --- /dev/null +++ b/xenomorph/xenomorph/test/vector-test.rkt @@ -0,0 +1,116 @@ +#lang racket/base +(require rackunit + racket/class + racket/vector + "../vector.rkt" + "../dict.rkt" + "../number.rkt" + "../pointer.rkt" + "../base.rkt" + sugar/unstable/dict) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/Array.coffee +|# + +(test-case + "vector: decode fixed length" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:vector #:type uint8 #:length 4)) '#(1 2 3 4)))) + +(test-case + "vector: decode nested" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:vector #:type (x:dict 'foo uint8) #:length 4)) + (vector (mhasheq 'foo 1) + (mhasheq 'foo 2) + (mhasheq 'foo 3) + (mhasheq 'foo 4))))) + +(test-case + "vector: decode with post-decode" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (define xa (x:vector #:type uint8 #:length 4 #:post-decode (λ (val) (vector-map (λ (x) (* 2 x)) val)))) + (check-equal? (decode xa) '#(2 4 6 8)))) + +(test-case + "vector: decode fixed number of bytes" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:vector #:type uint16be #:length 4 #:count-bytes #t)) '#(258 772)))) + +(test-case + "vector: decode length from parent key" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:vector #:type uint8 #:length (λ (p) (hash-ref p 'len))) (current-input-port) #:parent (mhash 'len 4)) '#(1 2 3 4)))) + +(test-case + "vector: decode byte count from parent key" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:vector #:type uint16be #:length (λ (p) (hash-ref p 'len)) #:count-bytes #t) (current-input-port) #:parent (mhash 'len 4)) '#(258 772)))) + +(test-case + "vector: decode length as number before array" + (parameterize ([current-input-port (open-input-bytes (bytes 4 1 2 3 4 5))]) + (check-equal? (decode (x:vector #:type uint8 #:length uint8)) '#(1 2 3 4)))) + +(test-case + "vector: decode byte count as number before array" + (parameterize ([current-input-port (open-input-bytes (bytes 4 1 2 3 4 5))]) + (check-equal? (decode (x:vector #:type uint16be #:length uint8 #:count-bytes #t)) '#(258 772)))) + +(test-case + "vector: decode length from function" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:vector #:type uint8 #:length (λ _ 4))) '#(1 2 3 4)))) + +(test-case + "vector: decode byte count from function" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:vector #:type uint16be #:length (λ _ 4) #:count-bytes #t)) '#(258 772)))) + +(test-case + "vector: decode to the end of parent if no length given" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:vector #:type uint8) (current-input-port) #:parent (mhash x:length-key 4 x:start-offset-key 0)) '#(1 2 3 4)))) + +(test-case + "vector: decode to the end of the stream if parent exists, but its length is 0" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (check-equal? (decode (x:vector #:type uint8) (current-input-port) #:parent (mhash x:length-key 0 x:start-offset-key 0)) '#(1 2 3 4 5)))) + +(test-case + "vector: decode to the end of the stream if no parent and length is given" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4))]) + (check-equal? (decode (x:vector #:type uint8)) '#(1 2 3 4 )))) + +(test-case + "vector: use array length" + (check-equal? (send (x:vector #:type uint8 #:length 10) x:size '#(1 2 3 4)) 4)) + +(test-case + "vector: add size of length field before string" + (check-equal? (send (x:vector #:type uint8 #:length uint8) x:size '#(1 2 3 4)) 5)) + +(test-case + "vector: use defined length if no value given" + (check-equal? (send (x:vector #:type uint8 #:length 10) x:size) 10)) + +(test-case + "vector: encode using array length" + (check-equal? (encode (x:vector #:type uint8 #:length 4) '#(1 2 3 4) #f) (bytes 1 2 3 4))) + +(test-case + "vector: encode with pre-encode" + (parameterize ([current-input-port (open-input-bytes (bytes 1 2 3 4 5))]) + (define xa (x:vector #:type uint8 #:length 4 #:pre-encode (λ (val) (vector-map (λ (x) (* 2 x)) val)))) + (check-equal? (encode xa '#(1 2 3 4) #f) (bytes 2 4 6 8)))) + +(test-case + "vector: encode length as number before array" + (check-equal? (encode (x:vector #:type uint8 #:length uint8) '#(1 2 3 4) #f) (bytes 4 1 2 3 4))) + +(test-case + "vector: add pointers after array if length is encoded at start" + (check-equal? (encode (x:vector #:type (x:pointer uint8 #:dest-type uint8) + #:length uint8) '#(1 2 3 4) #f) (bytes 4 5 6 7 8 1 2 3 4))) \ No newline at end of file diff --git a/xenomorph/xenomorph/test/versioned-dict-test.rkt b/xenomorph/xenomorph/test/versioned-dict-test.rkt new file mode 100644 index 00000000..afd6402e --- /dev/null +++ b/xenomorph/xenomorph/test/versioned-dict-test.rkt @@ -0,0 +1,265 @@ +#lang racket/base +(require rackunit + racket/dict + racket/class + sugar/unstable/dict + "../base.rkt" + "../number.rkt" + "../string.rkt" + "../pointer.rkt" + "../dict.rkt" + "../base.rkt" + "../versioned-dict.rkt") + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/test/VersionedStruct.coffee +|# + +(test-case + "versioned dict: decode should get version from number type" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'gender uint8)))]) + (parameterize ([current-input-port (open-input-bytes #"\x00\x05roxyb\x15")]) + (check-equal? (decode vstruct) (mhasheq 'name "roxyb" 'age 21 x:version-key 0))) + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "\x01\x0aroxyb 🤘\x15\x00"))]) + (check-equal? (decode vstruct) (mhasheq 'name "roxyb 🤘" 'age 21 x:version-key 1 'gender 0))))) + +(test-case + "versioned dict: decode should get version from number type, nested" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8 + 'nested (x:dict 'foo uint8)) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'gender uint8 + 'nested (x:dict 'foo uint8))))]) + (parameterize ([current-input-port (open-input-bytes #"\x00\x05roxyb\x15\x2a")]) + (check-equal? (decode vstruct) (mhasheq 'name "roxyb" 'age 21 'nested (mhasheq 'foo 42) x:version-key 0))) + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "\x01\x0aroxyb 🤘\x15\x00\x2a"))]) + (check-equal? (decode vstruct) (mhasheq 'name "roxyb 🤘" 'age 21 x:version-key 1 'gender 0 'nested (mhasheq 'foo 42)))))) + +(test-case + "versioned dict: decode should throw for unknown version" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'gender uint8)))]) + (parameterize ([current-input-port (open-input-bytes #"\x05\x05roxyb\x15")]) + (check-exn exn:fail:contract? (λ () (decode vstruct)))))) + +(test-case + "versioned dict: decode should support common header block" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 'header (dictify 'age uint8 + 'alive uint8) + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii)) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'gender uint8)))]) + (parameterize ([current-input-port (open-input-bytes #"\x00\x15\x01\x05roxyb")]) + (check-equal? (decode vstruct) (mhasheq 'name "roxyb" + 'age 21 + 'alive 1 + x:version-key 0))) + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "\x01\x15\x01\x0aroxyb 🤘\x00"))]) + (check-equal? (decode vstruct) (mhasheq 'name "roxyb 🤘" + 'age 21 + x:version-key 1 + 'alive 1 + 'gender 0))))) + +(test-case + "versioned dict: decode should support parent version key via procedure" + (let ([vstruct (x:versioned-dict (λ (p) (hash-ref p x:version-key)) + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'gender uint8)))]) + (parameterize ([current-input-port (open-input-bytes #"\x05roxyb\x15")]) + (check-equal? (decode vstruct #:parent (mhash x:version-key 0)) + (mhasheq 'name "roxyb" 'age 21 x:version-key 0))) + (parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "\x0aroxyb 🤘\x15\x00"))]) + (check-equal? (decode vstruct #:parent (mhash x:version-key 1)) + (mhasheq 'name "roxyb 🤘" 'age 21 x:version-key 1 'gender 0))))) + +(test-case + "versioned dict: decode should support sub versioned dicts" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string uint8)) + 1 (dictify 'name (x:string uint8) + 'isDessert uint8)))))]) + (parameterize ([current-input-port (open-input-bytes #"\x00\x05roxyb\x15")]) + (check-equal? (decode vstruct #:parent (mhash x:version-key 0)) + (mhasheq 'name "roxyb" 'age 21 x:version-key 0))) + (parameterize ([current-input-port (open-input-bytes #"\x01\x00\x05pasta")]) + (check-equal? (decode vstruct #:parent (mhash x:version-key 0)) + (mhasheq 'name "pasta" x:version-key 0))) + (parameterize ([current-input-port (open-input-bytes #"\x01\x01\x09ice cream\x01")]) + (check-equal? (decode vstruct #:parent (mhash x:version-key 0)) + (mhasheq 'name "ice cream" 'isDessert 1 x:version-key 1))))) + +(test-case + "versioned dict: decode should support process hook" + (let ([vstruct (x:versioned-dict #:post-decode (λ (val) (hash-set! val 'processed "true") val) + uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'gender uint8)))]) + (parameterize ([current-input-port (open-input-bytes #"\x00\x05roxyb\x15")]) + (check-equal? (decode vstruct) + (mhasheq 'name "roxyb" 'processed "true" 'age 21 x:version-key 0))))) + +(test-case + "versioned dict: size should compute the correct size" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'gender uint8)))]) + (check-equal? (send vstruct x:size (mhasheq 'name "roxyb" + 'age 21 + x:version-key 0)) 8) + (check-equal? (send vstruct x:size (mhasheq 'name "roxyb 🤘" + 'gender 0 + 'age 21 + x:version-key 1)) 14))) + +(test-case + "versioned dict: size should throw for unknown version" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'gender uint8)))]) + (check-exn exn:fail:contract? (λ () (send vstruct x:size (mhasheq 'name "roxyb" 'age 21 x:version-key 5)))))) + +(test-case + "versioned dict: size should support common header block" + (let ([struct (x:versioned-dict uint8 + (dictify + 'header (dictify 'age uint8 + 'alive uint8) + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii)) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'gender uint8)))]) + (check-equal? (send struct x:size (mhasheq 'name "roxyb" 'age 21 'alive 1 x:version-key 0)) 9) + (check-equal? (send struct x:size (mhasheq 'name "roxyb 🤘" 'gender 0 'age 21 'alive 1 x:version-key 1)) 15))) + +(test-case + "versioned dict: size should compute the correct size with pointers" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'ptr (x:pointer uint8 #:dest-type (x:string uint8)))))]) + (check-equal? (send vstruct x:size (mhasheq 'name "roxyb" + 'age 21 + x:version-key 1 + 'ptr "hello")) 15))) + +(test-case + "versioned dict: size should throw if no value is given" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'gender uint8)))]) + (check-exn exn:fail:contract? (λ () (send vstruct x:size))))) + +(test-case + "versioned dict: encode should encode objects to buffers" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'gender uint8)))] + [op (open-output-bytes)]) + (encode vstruct (mhasheq 'name "roxyb" 'age 21 x:version-key 0) op) + (encode vstruct (mhasheq 'name "roxyb 🤘" 'age 21 'gender 0 x:version-key 1) op) + (check-equal? (get-output-bytes op) (string->bytes/utf-8 "\x00\x05roxyb\x15\x01\x0aroxyb 🤘\x15\x00")))) + +(test-case + "versioned dict: encode should throw for unknown version" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'gender uint8)))] + [op (open-output-bytes)]) + (check-exn exn:fail:contract? (λ () (encode vstruct op (mhasheq 'name "roxyb" 'age 21 x:version-key 5)))))) + +(test-case + "versioned dict: encode should support common header block" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 'header (dictify 'age uint8 + 'alive uint8) + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii)) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'gender uint8)))] + [op (open-output-bytes)]) + (encode vstruct (mhasheq 'name "roxyb" 'age 21 'alive 1 x:version-key 0) op) + (encode vstruct (mhasheq 'name "roxyb 🤘" 'gender 0 'age 21 'alive 1 x:version-key 1) op) + (check-equal? (get-output-bytes op) (string->bytes/utf-8 "\x00\x15\x01\x05roxyb\x01\x15\x01\x0aroxyb 🤘\x00")))) + +(test-case + "versioned dict: encode should encode pointer data after structure" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'ptr (x:pointer uint8 #:dest-type (x:string uint8)))))] + [op (open-output-bytes)]) + (encode vstruct (mhasheq x:version-key 1 'name "roxyb" 'age 21 'ptr "hello") op) + + (check-equal? (get-output-bytes op) (string->bytes/utf-8 "\x01\x05roxyb\x15\x09\x05hello")))) + +#;(test-case + "versioned dict: encode should support preEncode hook" + (let ([vstruct (x:versioned-dict uint8 + (dictify + 0 (dictify 'name (x:string #:length uint8 #:encoding 'ascii) + 'age uint8) + 1 (x:dict 'name (x:string #:length uint8 #:encoding 'utf8) + 'age uint8 + 'gender uint8)))] + [op (open-output-bytes)]) + (set-pre-encode! vstruct (λ (val) (hash-set! val x:version-key (if (hash-ref val 'gender #f) 1 0)) val)) + (encode vstruct (mhasheq 'name "roxyb" 'age 21 x:version-key 0) op) + (encode vstruct (mhasheq 'name "roxyb 🤘" 'age 21 'gender 0) op) + (check-equal? (get-output-bytes op) (string->bytes/utf-8 "\x00\x05roxyb\x15\x01\x0aroxyb 🤘\x15\x00")))) \ No newline at end of file diff --git a/xenomorph/xenomorph/util.rkt b/xenomorph/xenomorph/util.rkt new file mode 100644 index 00000000..793c495c --- /dev/null +++ b/xenomorph/xenomorph/util.rkt @@ -0,0 +1,52 @@ +#lang racket/base +(require racket/match racket/port racket/dict racket/format racket/string racket/sequence "int.rkt" "base.rkt") +(provide (all-defined-out)) + +(define (length-resolvable? x) + (or (not x) + (exact-nonnegative-integer? x) + (procedure? x) + (x:int? x))) + +(define (resolve-length x input-port [parent #f]) + (match x + [#false #false] + [(? exact-nonnegative-integer?) x] + [(? procedure? proc) (proc parent)] + [(? x:int?) #:when input-port (decode x input-port)] + [_ (raise-argument-error 'resolve-length "fixed-size argument" x)])) + +(define (pretty-print-bytes bstr + #:port [port-arg (current-output-port)] + #:radix [radix 16] + #:offset-min-width [offset-min-width 4] + #:row-length [bytes-per-row 16] + #:max-value [max-value 256]) + (define port (or port-arg (open-output-bytes))) + (define bs (bytes->list bstr)) + (define offset-str-length + (max offset-min-width + (string-length (let ([lbs (length bs)]) + (~r (- lbs (remainder lbs bytes-per-row))))))) + (parameterize ([current-output-port port]) + (display + (string-join + (for/list ([row-bs (in-slice bytes-per-row bs)] + [ridx (in-naturals)]) + (string-append + (let ([idxstr (~r (* ridx bytes-per-row))]) + (string-append idxstr + (make-string (- offset-str-length (string-length idxstr)) #\space))) + " " + (string-join + (let* ([max-digit-width (string-length (~r (sub1 max-value) #:base radix))] + [strs (for/list ([b (in-list row-bs)]) + (~r b #:base radix #:min-width max-digit-width #:pad-string "0"))]) + (for/list ([2strs (in-slice 2 strs)]) + (string-join 2strs "·"))) " ") + (let ([shortfall (* (- bytes-per-row (length row-bs)) 3)]) + (make-string shortfall #\space)) + " " + (format "~a" (bytes->string/utf-8 (apply bytes row-bs))))) "\n"))) + (unless port-arg + (get-output-string port))) \ No newline at end of file diff --git a/xenomorph/xenomorph/vector.rkt b/xenomorph/xenomorph/vector.rkt new file mode 100644 index 00000000..d0251f7c --- /dev/null +++ b/xenomorph/xenomorph/vector.rkt @@ -0,0 +1,63 @@ +#lang debug racket/base +(require racket/class + racket/contract + racket/match + racket/sequence + "base.rkt" + "number.rkt" + "util.rkt" + "list.rkt" + sugar/unstable/dict) +(provide (all-defined-out)) + +(define x:vector% + (class x:list% + (super-new) + + (define/override (pre-encode val) + (unless (or (vector? val) (sequence? val)) + (raise-argument-error 'encode "vector or sequence" val)) + (if (vector? val) (vector->list val) val)) + + (define/override (post-decode val) (list->vector val)))) + +(define (x:vector? x) (is-a? x x:vector%)) + +(define/contract (x:vector + [type-arg #f] + [len-arg #f] + #:type [type-kwarg #f] + #:length [len-kwarg #f] + #:count-bytes [count-bytes? #f] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:vector%]) + (() + ((or/c xenomorphic? #false) + (or/c length-resolvable? #false) + #:type (or/c xenomorphic? #false) + #:length (or/c length-resolvable? #false) + #:count-bytes boolean? + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:vector%))) + . ->* . + x:vector?) + (define type (or type-arg type-kwarg)) + (unless (xenomorphic? type) + (raise-argument-error 'x:vector "xenomorphic type" type)) + (define len (or len-arg len-kwarg)) + (unless (length-resolvable? len) + (raise-argument-error 'x:vector "resolvable length" len)) + (new (generate-subclass base-class pre-proc post-proc) + [type type] + [len len] + [count-bytes? count-bytes?])) + +(module+ test + (require rackunit) + (check-equal? (decode (x:vector uint16be 3) #"ABCDEF") '#(16706 17220 17734)) + (check-equal? (encode (x:vector uint16be 3) '(16706 17220 17734) #f) #"ABCDEF") + (check-equal? (encode (x:vector uint16be 3) '#(16706 17220 17734) #f) #"ABCDEF") + (check-equal? (send (x:vector uint16be) x:size '#(1 2 3)) 6) + (check-equal? (send (x:vector doublebe) x:size '#(1 2 3 4 5)) 40)) \ No newline at end of file diff --git a/xenomorph/xenomorph/versioned-dict.rkt b/xenomorph/xenomorph/versioned-dict.rkt new file mode 100644 index 00000000..706f09a6 --- /dev/null +++ b/xenomorph/xenomorph/versioned-dict.rkt @@ -0,0 +1,157 @@ +#lang debug racket/base +(require "base.rkt" + "dict.rkt" + "util.rkt" + racket/dict + racket/match + racket/class + racket/contract + sugar/unstable/dict) +(provide (all-defined-out)) + +#| +approximates +https://github.com/mbutterick/restructure/blob/master/src/VersionedStruct.coffee +|# + +(define (version-type? x) + (and x (or (length-resolvable? x) (xenomorphic? x)))) + +(define x:versioned-dict% + (class x:dict% + (super-new) + (init-field [(@type type)] + [(@versions versions)] + [(@version-key version-key)]) + + (unless (version-type? @type) + (raise-argument-error 'x:versioned-dict "integer, procedure, or xenomorphic" @type)) + + (unless (and (dict? @versions) (andmap (λ (v) (or (dict? v) (x:dict? v))) (dict-values @versions))) + (raise-argument-error 'x:versioned-dict "dict of dicts or structish" @versions)) + + (define (select-field-set val) + (define version-key + (or (dict-ref val @version-key #f) + (raise-argument-error 'encode "value for version key" @version-key))) + (define field-object + (or (dict-ref @versions version-key #f) + (raise-argument-error 'encode (format "valid field version: ~v" (dict-keys @versions)) version-key))) + (if (x:dict? field-object) (get-field fields field-object) field-object)) + + (define/override (x:decode port parent [length 0]) + (define res (setup-private-fields port parent length)) + (define which-version (match @type + [(? integer? int) int] + [(? procedure? proc) #:when parent (proc parent)] + [(or (? symbol?) (? procedure?)) + (raise-argument-error 'decode "valid parent" parent)] + [_ (send @type x:decode port parent)])) + (dict-set! res @version-key which-version) + + (match (dict-ref @versions 'header #f) + [#false (void)] + [header-val (parse-fields port res header-val)]) + + (match (dict-ref @versions which-version #f) + [#false (raise-argument-error 'decode + (format "valid field version: ~v" (dict-keys @versions)) which-version)] + [(? x:versioned-dict? vs) (send vs x:decode port parent)] + [field-object (parse-fields port res field-object)])) + + (define/override (pre-encode val) val) + + (define/override (x:encode field-data port [parent-arg #f]) + (unless (dict? field-data) + (raise-argument-error 'encode "dict" field-data)) + + (define parent (mhasheq x:pointers-key null + x:start-offset-key (pos port) + x:parent-key parent-arg + x:val-key field-data + x:pointer-size-key 0)) + (hash-set! parent x:pointer-offset-key (+ (pos port) (x:size field-data parent #f))) + + (unless (or (symbol? @type) (procedure? @type)) + (send @type x:encode (dict-ref field-data @version-key #f) port parent)) + + (for ([(key type) (in-dict (dict-ref @versions 'header null))]) + (send type x:encode (dict-ref field-data key) port parent)) + + (define fields (select-field-set field-data)) + (unless (andmap (λ (key) (member key (dict-keys field-data))) (dict-keys fields)) + (raise-argument-error 'encode (format "hash that contains superset of xversioned-dict keys: ~a" (dict-keys fields)) (dict-keys field-data))) + + (for ([(key type) (in-dict fields)]) + (send type x:encode (dict-ref field-data key) port parent)) + + (let loop ([i 0]) + (when (< i (length (dict-ref parent x:pointers-key))) + (define ptr (list-ref (dict-ref parent x:pointers-key) i)) + (match ptr + [(x:ptr type val parent) i (send type x:encode val port parent)]) + (loop (add1 i))))) + + (define/override (x:size [val #f] [parent-arg #f] [include-pointers #t]) + (unless val + (raise-argument-error 'size "value" val)) + + (define parent (mhasheq x:parent-key parent-arg + x:val-key val + x:pointer-size-key 0)) + (define version-size + (match @type + [(or (? symbol?) (? procedure?)) 0] + [_ (send @type x:size (dict-ref val @version-key) parent)])) + + (define header-size + (for/sum ([(key type) (in-dict (dict-ref @versions 'header null))]) + (send type x:size (and val (dict-ref val key)) parent))) + + (define fields-size + (for/sum ([(key type) (in-dict (select-field-set val))]) + (send type x:size (and val (send type pre-encode (dict-ref val key))) parent))) + + (define pointer-size (if include-pointers (dict-ref parent x:pointer-size-key) 0)) + + (+ version-size header-size fields-size pointer-size)))) + +(define (x:versioned-dict? x) (is-a? x x:versioned-dict%)) + +(define/contract (x:versioned-dict + [type-arg #false] + [versions-arg #false] + #:type [type-kw #false] + #:versions [versions-kw #false] + #:version-key [version-key x:version-key] + #:pre-encode [pre-proc #f] + #:post-decode [post-proc #f] + #:base-class [base-class x:versioned-dict%]) + (() + ((or/c version-type? #false) + (or/c dict? #false) + #:type (or/c version-type? #false) + #:versions (or/c dict? #false) + #:version-key (or/c symbol? #false) + #:pre-encode (or/c (any/c . -> . any/c) #false) + #:post-decode (or/c (any/c . -> . any/c) #false) + #:base-class (λ (c) (subclass? c x:versioned-dict%))) + . ->* . + x:versioned-dict?) + (define type (or type-arg type-kw)) + (unless (version-type? type) + (raise-argument-error 'x:versioned-dict "version-type?" type)) + (define versions (or versions-arg versions-kw)) + (unless (dict? versions) + (raise-argument-error 'x:versioned-dict "dict" versions)) + (new (generate-subclass base-class pre-proc post-proc) + [type type] + [versions versions] + [version-key version-key] + [fields #f])) + + +;; bw compat +(define x:versioned-struct% x:versioned-dict%) +(define x:versioned-struct? x:versioned-dict?) +(define x:versioned-struct x:versioned-dict) \ No newline at end of file