main
Matthew Butterick 6 years ago
parent 34a2c117a8
commit 38c13f17b6

@ -1,5 +1,5 @@
#lang debug racket/base #lang debug racket/base
(require racket/dict "helper.rkt" "util.rkt" "number.rkt") (require racket/class racket/dict "helper.rkt" "util.rkt" "number.rkt")
(provide (all-defined-out)) (provide (all-defined-out))
#| #|
@ -7,122 +7,105 @@ approximates
https://github.com/mbutterick/restructure/blob/master/src/String.coffee https://github.com/mbutterick/restructure/blob/master/src/String.coffee
|# |#
(define (read-encoded-string len [encoding 'ascii]) (define (decode-string len port [encoding 'ascii])
(define proc (case encoding (define decoder (case encoding
[(utf16le) (error 'bah)] [(utf16le ucs2) (error 'unsupported-string-encoding)]
[(ucs2) (error 'bleh)] [(utf8) bytes->string/utf-8]
[(utf8) bytes->string/utf-8] [(ascii) bytes->string/latin-1]
[(ascii) bytes->string/latin-1] [else values]))
[else values])) (decoder (read-bytes len port)))
(proc (read-bytes len)))
(define (encode-string string [encoding 'ascii])
(define (write-encoded-string string [encoding 'ascii]) (define encoder (case encoding
;; todo: handle encodings correctly. [(ucs2 utf8 ascii) string->bytes/utf-8]
;; right now just utf8 and ascii are correct [(utf16le) (error 'swap-bytes-unimplemented)]
(define proc (case encoding [else (error 'unsupported-string-encoding)]))
[(ucs2 utf8 ascii) string->bytes/utf-8] (encoder string))
[(utf16le) (error 'swap-bytes-unimplemented)]
[else (error 'unsupported-string-encoding)]))
(write-bytes (proc string)))
(define (count-nonzero-chars port) (define (count-nonzero-chars port)
;; helper function for String
;; counts nonzero chars from current position
(bytes-length (car (regexp-match-peek "[^\u0]*" port)))) (bytes-length (car (regexp-match-peek "[^\u0]*" port))))
(define (bytes-left-in-port? port) (define (bytes-left-in-port? port)
(not (eof-object? (peek-byte port)))) (not (eof-object? (peek-byte port))))
(define (byte-length val encoding) (define xstring%
(define encoder (class xenobase%
(case encoding (super-new)
[(ascii utf8) string->bytes/utf-8])) (init-field [(@len len)] [(@encoding encoding)])
(bytes-length (encoder (format "~a" val))))
(unless (length-resolvable? @len)
(define/post-decode (xstring-decode xs [port-arg (current-input-port)] #:parent [parent #f]) (raise-argument-error 'xstring "length-resolvable?" @len))
(define port (->input-port port-arg)) (unless (or (procedure? @encoding) (memq @encoding supported-encodings))
(parameterize ([current-input-port port]) (raise-argument-error 'xstring (format "procedure or member of ~v" supported-encodings) @encoding))
(let ([len (or (resolve-length (xstring-len xs) #:parent parent) (count-nonzero-chars port))]
[encoding (if (procedure? (xstring-encoding xs)) (define/augment (xxdecode port parent)
(or ((xstring-encoding xs) parent) 'ascii) (define len (or (resolve-length @len port #:parent parent) (count-nonzero-chars port)))
(xstring-encoding xs))] (define encoding (if (procedure? @encoding)
[adjustment (if (and (not (xstring-len xs)) (bytes-left-in-port? port)) 1 0)]) (or (@encoding parent) 'ascii)
(define string (read-encoded-string len encoding)) @encoding))
(pos port (+ (pos port) adjustment)) (define adjustment (if (and (not @len) (bytes-left-in-port? port)) 1 0))
string))) (begin0
(decode-string len port encoding)
(define/pre-encode (xstring-encode xs val [port-arg (current-output-port)] #:parent [parent #f]) (pos port (+ (pos port) adjustment))))
(define port (if (output-port? port-arg) port-arg (open-output-bytes)))
(parameterize ([current-output-port port]) (define/augment (xxencode val-arg port [parent #f])
(let* ([val (format "~a" val)] (define val (format "~a" val-arg))
[encoding (if (procedure? (xstring-encoding xs)) (define encoding (if (procedure? @encoding)
(or ((xstring-encoding xs) (and parent (dict-ref parent val)) 'ascii)) (or (@encoding (and parent (dict-ref parent val)) 'ascii))
(xstring-encoding xs))]) @encoding))
(define encoded-length (byte-length val encoding)) (define encoded-str (encode-string val encoding))
(when (and (exact-nonnegative-integer? (xstring-len xs)) (> encoded-length (xstring-len xs))) (define encoded-length (bytes-length encoded-str))
(raise-argument-error 'xstring-encode (format "string no longer than ~a" (xstring-len xs)) val)) (when (and (exact-nonnegative-integer? @len) (> encoded-length @len))
(when (xint? (xstring-len xs)) (raise-argument-error 'xstring-encode (format "string no longer than ~a" @len) val))
(encode (xstring-len xs) encoded-length)) (when (xint? @len)
(write-encoded-string val encoding) (send @len xxencode encoded-length port parent))
(when (not (xstring-len xs)) (write-byte #x00)) ; null terminated when no len (define string-terminator (if (not @len) (bytes 0) (bytes))) ; null terminated when no len
(unless port-arg (get-output-bytes port))))) (bytes-append encoded-str string-terminator))
(define/finalize-size (xstring-size xs [val #f] #:parent [parent #f]) (define/augment (xxsize [val #f] [parent #f])
(cond (cond
[val (define encoding (if (procedure? (xstring-encoding xs)) [val (define encoding (if (procedure? @encoding)
(or ((xstring-encoding xs) (and parent (dict-ref parent val)) 'ascii)) (or (@encoding (and parent (dict-ref parent val)) 'ascii))
(xstring-encoding xs))) @encoding))
(define string-size (byte-length val (if (eq? encoding 'utf16be) 'utf16le encoding))) (define string-size (bytes-length (encode-string val encoding)))
(define strlen-size (cond (define strlen-size (cond
[(not (xstring-len xs)) 1] [(not @len) 1]
[(xint? (xstring-len xs)) (size (xstring-len xs))] [(xint? @len) (send @len xxsize)]
[else 0])) [else 0]))
(+ string-size strlen-size)] (+ string-size strlen-size)]
[else (resolve-length (xstring-len xs) #f #:parent parent)])) [else (resolve-length @len #f #:parent parent)]))))
(struct xstring xbase (len encoding) #:transparent
#:methods gen:xenomorphic
[(define decode xstring-decode)
(define xdecode xstring-decode)
(define encode xstring-encode)
(define size xstring-size)])
(define supported-encodings '(ascii utf8)) (define supported-encodings '(ascii utf8))
(define (+xstring [len-arg #f] [enc-arg #f] (define (+xstring [len-arg #f] [enc-arg #f]
#:length [len-kwarg #f] #:encoding [enc-kwarg #f]) #:length [len-kwarg #f]
#:encoding [enc-kwarg #f]
#:subclass [class xstring%])
(define len (or len-arg len-kwarg)) (define len (or len-arg len-kwarg))
(define encoding (or enc-arg enc-kwarg 'ascii)) (define encoding (or enc-arg enc-kwarg 'ascii))
(unless (length-resolvable? len) (new class [len len] [encoding encoding]))
(raise-argument-error '+xarray "length-resolvable?" len))
(unless (or (procedure? encoding) (memq encoding supported-encodings))
(raise-argument-error '+xarray (format "procedure or member of ~v" supported-encodings) encoding))
(xstring len encoding))
(define (xsymbol-decode xs [port-arg (current-input-port)] #:parent [parent #f])
(string->symbol (xstring-decode xs port-arg #:parent parent)))
(define (xsymbol-encode xs val [port (current-output-port)] #:parent [parent #f]) (define xsymbol%
(unless (xsymbol? xs) (class xstring%
(raise-argument-error 'encode "xsymbol instance" xs)) (super-new)
(unless (or (string? val) (symbol? val)) (inherit-field len encoding)
(raise-argument-error 'xsymbol-encode "symbol or string" val))
(xstring-encode xs (if (symbol? val) val (string->symbol val)) port #:parent parent))
(struct xsymbol xstring () #:transparent (define/override (pre-encode val)
#:methods gen:xenomorphic (unless (or (string? val) (symbol? val))
[(define decode xsymbol-decode) (raise-argument-error 'xsymbol-encode "symbol or string" val))
(define xdecode xsymbol-decode) (if (symbol? val) (symbol->string val) val))
(define encode xsymbol-encode)
(define size xstring-size)]) (define/override (post-decode val) (string->symbol val))))
(define (+xsymbol [len-arg #f] [enc-arg #f] (define (+xsymbol [len-arg #f] [enc-arg #f]
#:length [len-kwarg #f] #:encoding [enc-kwarg #f]) #:length [len-kwarg #f]
#:encoding [enc-kwarg #f]
#:subclass [class xsymbol%])
(define len (or len-arg len-kwarg)) (define len (or len-arg len-kwarg))
(define encoding (or enc-arg enc-kwarg 'utf8)) (define encoding (or enc-arg enc-kwarg 'utf8))
(xsymbol len encoding)) (new class [len len] [encoding encoding]))
(module+ test (module+ test
(require rackunit) (require rackunit "generic.rkt")
(define S-fixed (+xstring 4 'utf8)) (define S-fixed (+xstring 4 'utf8))
(check-equal? (encode S-fixed "Mike" #f) #"Mike") (check-equal? (encode S-fixed "Mike" #f) #"Mike")
(check-exn exn:fail? (λ () (encode S-fixed "Mikes" #f))) ; too long for fixed string (check-exn exn:fail? (λ () (encode S-fixed "Mikes" #f))) ; too long for fixed string

@ -1,8 +1,10 @@
#lang racket/base #lang racket/base
(require rackunit (require rackunit
racket/class
"../helper.rkt" "../helper.rkt"
"../string.rkt" "../string.rkt"
"../number.rkt" "../number.rkt"
"../generic.rkt"
sugar/unstable/dict) sugar/unstable/dict)
#| #|
@ -18,19 +20,21 @@ https://github.com/mbutterick/restructure/blob/master/test/String.coffee
(test-case (test-case
"decode fixed length with post-decode" "decode fixed length with post-decode"
(parameterize ([current-input-port (open-input-bytes #"testing")]) (parameterize ([current-input-port (open-input-bytes #"testing")])
(define xs (+xstring 7)) (define mystr% (class xstring%
(set-post-decode! xs (λ (val) "ring a ding")) (super-new)
(define/override (post-decode val) "ring a ding")))
(define xs (+xstring 7 #:subclass mystr%))
(check-equal? (decode xs) "ring a ding"))) (check-equal? (decode xs) "ring a ding")))
(test-case (test-case
"decode length from parent key" "decode length from parent key"
(parameterize ([current-input-port (open-input-bytes #"testing")]) (parameterize ([current-input-port (open-input-bytes #"testing")])
(check-equal? (xdecode (+xstring 'len) #:parent (mhash 'len 7)) "testing"))) (check-equal? (send (+xstring 'len) xxdecode (current-input-port) (mhash 'len 7)) "testing")))
(test-case (test-case
"decode length as number before string" "decode length as number before string"
(parameterize ([current-input-port (open-input-bytes #"\x07testing")]) (parameterize ([current-input-port (open-input-bytes #"\x07testing")])
(check-equal? (xdecode (+xstring uint8) #:parent (mhash 'len 7)) "testing"))) (check-equal? (send (+xstring uint8) xxdecode (current-input-port) (mhash 'len 7)) "testing")))
(test-case (test-case
"decode utf8" "decode utf8"
@ -88,8 +92,10 @@ https://github.com/mbutterick/restructure/blob/master/test/String.coffee
(test-case (test-case
"encode using string length and pre-encode" "encode using string length and pre-encode"
(parameterize ([current-output-port (open-output-bytes)]) (parameterize ([current-output-port (open-output-bytes)])
(define xs (+xstring 7)) (define mystr% (class xstring%
(set-pre-encode! xs (compose1 list->string reverse string->list)) (super-new)
(define/override (pre-encode val) (list->string (reverse (string->list val))))))
(define xs (+xstring 7 #:subclass mystr%))
(encode xs "testing") (encode xs "testing")
(check-equal? (get-output-bytes (current-output-port)) #"gnitset"))) (check-equal? (get-output-bytes (current-output-port)) #"gnitset")))

Loading…
Cancel
Save