From 38c13f17b6e78a23be86bb02a21b4d2354443421 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sun, 16 Dec 2018 09:43:25 -0800 Subject: [PATCH] string --- xenomorph/xenomorph/string.rkt | 177 ++++++++++------------- xenomorph/xenomorph/test/string-test.rkt | 18 ++- 2 files changed, 92 insertions(+), 103 deletions(-) diff --git a/xenomorph/xenomorph/string.rkt b/xenomorph/xenomorph/string.rkt index 4432472b..a3f7fb66 100644 --- a/xenomorph/xenomorph/string.rkt +++ b/xenomorph/xenomorph/string.rkt @@ -1,5 +1,5 @@ #lang debug racket/base -(require racket/dict "helper.rkt" "util.rkt" "number.rkt") +(require racket/class racket/dict "helper.rkt" "util.rkt" "number.rkt") (provide (all-defined-out)) #| @@ -7,122 +7,105 @@ approximates https://github.com/mbutterick/restructure/blob/master/src/String.coffee |# -(define (read-encoded-string len [encoding 'ascii]) - (define proc (case encoding - [(utf16le) (error 'bah)] - [(ucs2) (error 'bleh)] - [(utf8) bytes->string/utf-8] - [(ascii) bytes->string/latin-1] - [else values])) - (proc (read-bytes len))) - -(define (write-encoded-string string [encoding 'ascii]) - ;; todo: handle encodings correctly. - ;; right now just utf8 and ascii are correct - (define proc (case encoding - [(ucs2 utf8 ascii) string->bytes/utf-8] - [(utf16le) (error 'swap-bytes-unimplemented)] - [else (error 'unsupported-string-encoding)])) - (write-bytes (proc string))) +(define (decode-string len port [encoding 'ascii]) + (define decoder (case encoding + [(utf16le ucs2) (error 'unsupported-string-encoding)] + [(utf8) bytes->string/utf-8] + [(ascii) bytes->string/latin-1] + [else values])) + (decoder (read-bytes len port))) + +(define (encode-string string [encoding 'ascii]) + (define encoder (case encoding + [(ucs2 utf8 ascii) string->bytes/utf-8] + [(utf16le) (error 'swap-bytes-unimplemented)] + [else (error 'unsupported-string-encoding)])) + (encoder string)) (define (count-nonzero-chars port) - ;; helper function for String - ;; counts nonzero chars from current position (bytes-length (car (regexp-match-peek "[^\u0]*" port)))) (define (bytes-left-in-port? port) (not (eof-object? (peek-byte port)))) -(define (byte-length val encoding) - (define encoder - (case encoding - [(ascii utf8) string->bytes/utf-8])) - (bytes-length (encoder (format "~a" val)))) - -(define/post-decode (xstring-decode xs [port-arg (current-input-port)] #:parent [parent #f]) - (define port (->input-port port-arg)) - (parameterize ([current-input-port port]) - (let ([len (or (resolve-length (xstring-len xs) #:parent parent) (count-nonzero-chars port))] - [encoding (if (procedure? (xstring-encoding xs)) - (or ((xstring-encoding xs) parent) 'ascii) - (xstring-encoding xs))] - [adjustment (if (and (not (xstring-len xs)) (bytes-left-in-port? port)) 1 0)]) - (define string (read-encoded-string len encoding)) - (pos port (+ (pos port) adjustment)) - string))) - -(define/pre-encode (xstring-encode xs val [port-arg (current-output-port)] #:parent [parent #f]) - (define port (if (output-port? port-arg) port-arg (open-output-bytes))) - (parameterize ([current-output-port port]) - (let* ([val (format "~a" val)] - [encoding (if (procedure? (xstring-encoding xs)) - (or ((xstring-encoding xs) (and parent (dict-ref parent val)) 'ascii)) - (xstring-encoding xs))]) - (define encoded-length (byte-length val encoding)) - (when (and (exact-nonnegative-integer? (xstring-len xs)) (> encoded-length (xstring-len xs))) - (raise-argument-error 'xstring-encode (format "string no longer than ~a" (xstring-len xs)) val)) - (when (xint? (xstring-len xs)) - (encode (xstring-len xs) encoded-length)) - (write-encoded-string val encoding) - (when (not (xstring-len xs)) (write-byte #x00)) ; null terminated when no len - (unless port-arg (get-output-bytes port))))) - -(define/finalize-size (xstring-size xs [val #f] #:parent [parent #f]) - (cond - [val (define encoding (if (procedure? (xstring-encoding xs)) - (or ((xstring-encoding xs) (and parent (dict-ref parent val)) 'ascii)) - (xstring-encoding xs))) - (define string-size (byte-length val (if (eq? encoding 'utf16be) 'utf16le encoding))) - (define strlen-size (cond - [(not (xstring-len xs)) 1] - [(xint? (xstring-len xs)) (size (xstring-len xs))] - [else 0])) - (+ string-size strlen-size)] - [else (resolve-length (xstring-len xs) #f #:parent parent)])) - -(struct xstring xbase (len encoding) #:transparent - #:methods gen:xenomorphic - [(define decode xstring-decode) - (define xdecode xstring-decode) - (define encode xstring-encode) - (define size xstring-size)]) +(define xstring% + (class xenobase% + (super-new) + (init-field [(@len len)] [(@encoding encoding)]) + + (unless (length-resolvable? @len) + (raise-argument-error 'xstring "length-resolvable?" @len)) + (unless (or (procedure? @encoding) (memq @encoding supported-encodings)) + (raise-argument-error 'xstring (format "procedure or member of ~v" supported-encodings) @encoding)) + + (define/augment (xxdecode port parent) + (define len (or (resolve-length @len port #:parent parent) (count-nonzero-chars port))) + (define encoding (if (procedure? @encoding) + (or (@encoding parent) 'ascii) + @encoding)) + (define adjustment (if (and (not @len) (bytes-left-in-port? port)) 1 0)) + (begin0 + (decode-string len port encoding) + (pos port (+ (pos port) adjustment)))) + + (define/augment (xxencode val-arg port [parent #f]) + (define val (format "~a" val-arg)) + (define encoding (if (procedure? @encoding) + (or (@encoding (and parent (dict-ref parent val)) 'ascii)) + @encoding)) + (define encoded-str (encode-string val encoding)) + (define encoded-length (bytes-length encoded-str)) + (when (and (exact-nonnegative-integer? @len) (> encoded-length @len)) + (raise-argument-error 'xstring-encode (format "string no longer than ~a" @len) val)) + (when (xint? @len) + (send @len xxencode encoded-length port parent)) + (define string-terminator (if (not @len) (bytes 0) (bytes))) ; null terminated when no len + (bytes-append encoded-str string-terminator)) + + (define/augment (xxsize [val #f] [parent #f]) + (cond + [val (define encoding (if (procedure? @encoding) + (or (@encoding (and parent (dict-ref parent val)) 'ascii)) + @encoding)) + (define string-size (bytes-length (encode-string val encoding))) + (define strlen-size (cond + [(not @len) 1] + [(xint? @len) (send @len xxsize)] + [else 0])) + (+ string-size strlen-size)] + [else (resolve-length @len #f #:parent parent)])))) (define supported-encodings '(ascii utf8)) (define (+xstring [len-arg #f] [enc-arg #f] - #:length [len-kwarg #f] #:encoding [enc-kwarg #f]) + #:length [len-kwarg #f] + #:encoding [enc-kwarg #f] + #:subclass [class xstring%]) (define len (or len-arg len-kwarg)) (define encoding (or enc-arg enc-kwarg 'ascii)) - (unless (length-resolvable? len) - (raise-argument-error '+xarray "length-resolvable?" len)) - (unless (or (procedure? encoding) (memq encoding supported-encodings)) - (raise-argument-error '+xarray (format "procedure or member of ~v" supported-encodings) encoding)) - (xstring len encoding)) - -(define (xsymbol-decode xs [port-arg (current-input-port)] #:parent [parent #f]) - (string->symbol (xstring-decode xs port-arg #:parent parent))) + (new class [len len] [encoding encoding])) -(define (xsymbol-encode xs val [port (current-output-port)] #:parent [parent #f]) - (unless (xsymbol? xs) - (raise-argument-error 'encode "xsymbol instance" xs)) - (unless (or (string? val) (symbol? val)) - (raise-argument-error 'xsymbol-encode "symbol or string" val)) - (xstring-encode xs (if (symbol? val) val (string->symbol val)) port #:parent parent)) +(define xsymbol% + (class xstring% + (super-new) + (inherit-field len encoding) -(struct xsymbol xstring () #:transparent - #:methods gen:xenomorphic - [(define decode xsymbol-decode) - (define xdecode xsymbol-decode) - (define encode xsymbol-encode) - (define size xstring-size)]) + (define/override (pre-encode val) + (unless (or (string? val) (symbol? val)) + (raise-argument-error 'xsymbol-encode "symbol or string" val)) + (if (symbol? val) (symbol->string val) val)) + + (define/override (post-decode val) (string->symbol val)))) (define (+xsymbol [len-arg #f] [enc-arg #f] - #:length [len-kwarg #f] #:encoding [enc-kwarg #f]) + #:length [len-kwarg #f] + #:encoding [enc-kwarg #f] + #:subclass [class xsymbol%]) (define len (or len-arg len-kwarg)) (define encoding (or enc-arg enc-kwarg 'utf8)) - (xsymbol len encoding)) + (new class [len len] [encoding encoding])) (module+ test - (require rackunit) + (require rackunit "generic.rkt") (define S-fixed (+xstring 4 'utf8)) (check-equal? (encode S-fixed "Mike" #f) #"Mike") (check-exn exn:fail? (λ () (encode S-fixed "Mikes" #f))) ; too long for fixed string diff --git a/xenomorph/xenomorph/test/string-test.rkt b/xenomorph/xenomorph/test/string-test.rkt index e600441b..d21b9bfb 100644 --- a/xenomorph/xenomorph/test/string-test.rkt +++ b/xenomorph/xenomorph/test/string-test.rkt @@ -1,8 +1,10 @@ #lang racket/base (require rackunit + racket/class "../helper.rkt" "../string.rkt" "../number.rkt" + "../generic.rkt" sugar/unstable/dict) #| @@ -18,19 +20,21 @@ https://github.com/mbutterick/restructure/blob/master/test/String.coffee (test-case "decode fixed length with post-decode" (parameterize ([current-input-port (open-input-bytes #"testing")]) - (define xs (+xstring 7)) - (set-post-decode! xs (λ (val) "ring a ding")) + (define mystr% (class xstring% + (super-new) + (define/override (post-decode val) "ring a ding"))) + (define xs (+xstring 7 #:subclass mystr%)) (check-equal? (decode xs) "ring a ding"))) (test-case "decode length from parent key" (parameterize ([current-input-port (open-input-bytes #"testing")]) - (check-equal? (xdecode (+xstring 'len) #:parent (mhash 'len 7)) "testing"))) + (check-equal? (send (+xstring 'len) xxdecode (current-input-port) (mhash 'len 7)) "testing"))) (test-case "decode length as number before string" (parameterize ([current-input-port (open-input-bytes #"\x07testing")]) - (check-equal? (xdecode (+xstring uint8) #:parent (mhash 'len 7)) "testing"))) + (check-equal? (send (+xstring uint8) xxdecode (current-input-port) (mhash 'len 7)) "testing"))) (test-case "decode utf8" @@ -88,8 +92,10 @@ https://github.com/mbutterick/restructure/blob/master/test/String.coffee (test-case "encode using string length and pre-encode" (parameterize ([current-output-port (open-output-bytes)]) - (define xs (+xstring 7)) - (set-pre-encode! xs (compose1 list->string reverse string->list)) + (define mystr% (class xstring% + (super-new) + (define/override (pre-encode val) (list->string (reverse (string->list val)))))) + (define xs (+xstring 7 #:subclass mystr%)) (encode xs "testing") (check-equal? (get-output-bytes (current-output-port)) #"gnitset")))