string tests pass
parent
53161dc964
commit
6f997f8b7e
@ -0,0 +1,125 @@
|
||||
#lang debug racket/base
|
||||
(require racket/dict "base.rkt" "util.rkt" "number.rkt")
|
||||
(provide (all-defined-out))
|
||||
|
||||
#|
|
||||
approximates
|
||||
https://github.com/mbutterick/restructure/blob/master/src/String.coffee
|
||||
|#
|
||||
|
||||
(define (read-encoded-string port len [encoding 'ascii])
|
||||
(define proc (case encoding
|
||||
[(utf16le) (error 'bah)]
|
||||
[(ucs2) (error 'bleh)]
|
||||
[(utf8) bytes->string/utf-8]
|
||||
[(ascii) bytes->string/latin-1]
|
||||
[else values]))
|
||||
(proc (read-bytes len port)))
|
||||
|
||||
(define (write-encoded-string port string [encoding 'ascii])
|
||||
;; todo: handle encodings correctly.
|
||||
;; right now just utf8 and ascii are correct
|
||||
(define proc (case encoding
|
||||
[(ucs2 utf8 ascii) string->bytes/utf-8]
|
||||
[(utf16le) (error 'swap-bytes-unimplemented)]
|
||||
[else (error 'unsupported-string-encoding)]))
|
||||
(write-bytes (proc string) port))
|
||||
|
||||
(define (count-nonzero-chars port)
|
||||
;; helper function for String
|
||||
;; counts nonzero chars from current position
|
||||
(bytes-length (car (regexp-match-peek "[^\u0]*" port))))
|
||||
|
||||
(define (bytes-left-in-port? port)
|
||||
(not (eof-object? (peek-byte port))))
|
||||
|
||||
(define (byte-length val encoding)
|
||||
(define encoder
|
||||
(case encoding
|
||||
[(ascii utf8) string->bytes/utf-8]))
|
||||
(bytes-length (encoder (format "~a" val))))
|
||||
|
||||
(define (xstring-decode xs [port-arg (current-input-port)] #:parent [parent #f])
|
||||
(define port (->input-port port-arg))
|
||||
(let ([len (or (resolve-length (xstring-len xs) port parent) (count-nonzero-chars port))]
|
||||
[encoding (if (procedure? (xstring-encoding xs))
|
||||
(or ((xstring-encoding xs) parent) 'ascii)
|
||||
(xstring-encoding xs))]
|
||||
[adjustment (if (and (not (xstring-len xs)) (bytes-left-in-port? port)) 1 0)])
|
||||
(define string (read-encoded-string port len encoding))
|
||||
(pos port (+ (pos port) adjustment))
|
||||
string))
|
||||
|
||||
(define (xstring-encode xs val [port-arg (current-output-port)] #:parent [parent #f])
|
||||
(define port (if (output-port? port-arg) port-arg (open-output-bytes)))
|
||||
(let* ([val (format "~a" val)]
|
||||
[encoding (if (procedure? (xstring-encoding xs))
|
||||
(or ((xstring-encoding xs) (and parent (dict-ref parent val)) 'ascii))
|
||||
(xstring-encoding xs))])
|
||||
(define encoded-length (byte-length val encoding))
|
||||
(when (and (exact-nonnegative-integer? (xstring-len xs)) (> encoded-length (xstring-len xs)))
|
||||
(raise-argument-error 'xstring-encode (format "string no longer than ~a" (xstring-len xs)) val))
|
||||
(when (xint? (xstring-len xs))
|
||||
(encode (xstring-len xs) encoded-length port))
|
||||
(write-encoded-string port val encoding)
|
||||
(when (not (xstring-len xs)) (write-byte #x00 port)) ; null terminated when no len
|
||||
(unless port-arg (get-output-bytes port))))
|
||||
|
||||
(define (xstring-size xs [val #f] [parent #f])
|
||||
(if (not val)
|
||||
(resolve-length (xstring-len xs) #f parent)
|
||||
(let* ([encoding (if (procedure? (xstring-encoding xs))
|
||||
(or ((xstring-encoding xs) (and parent (dict-ref parent val)) 'ascii))
|
||||
(xstring-encoding xs))]
|
||||
[encoding (if (eq? encoding 'utf16be) 'utf16le encoding)])
|
||||
(+ (byte-length val encoding) (cond
|
||||
[(not (xstring-len xs)) 1]
|
||||
[(xint? (xstring-len xs)) (size (xstring-len xs))]
|
||||
[else 0])))))
|
||||
|
||||
(struct xstring (len encoding) #:transparent
|
||||
#:methods gen:xenomorphic
|
||||
[(define decode xstring-decode)
|
||||
(define encode xstring-encode)
|
||||
(define size xstring-size)])
|
||||
|
||||
(define supported-encodings '(ascii utf8))
|
||||
(define (+xstring [len #f] [encoding 'ascii])
|
||||
(unless (length-resolvable? len)
|
||||
(raise-argument-error '+xarray "length-resolvable?" len))
|
||||
(unless (or (procedure? encoding) (memq encoding supported-encodings))
|
||||
(raise-argument-error '+xarray (format "procedure or member of ~v" supported-encodings) encoding))
|
||||
(xstring len encoding))
|
||||
|
||||
(define (xsymbol-decode xs [port-arg (current-input-port)] #:parent [parent #f])
|
||||
(string->symbol (xstring-decode xs port-arg #:parent parent)))
|
||||
|
||||
(define (xsymbol-encode xs val [port (current-output-port)] #:parent [parent #f])
|
||||
(unless (xsymbol? xs)
|
||||
(raise-argument-error 'encode "xsymbol instance" xs))
|
||||
(unless (or (string? val) (symbol? val))
|
||||
(raise-argument-error 'xsymbol-encode "symbol or string" val))
|
||||
(xstring-encode xs (if (symbol? val) val (string->symbol val)) port #:parent parent))
|
||||
|
||||
(struct xsymbol xstring () #:transparent
|
||||
#:methods gen:xenomorphic
|
||||
[(define decode xsymbol-decode)
|
||||
(define encode xsymbol-encode)
|
||||
(define size xstring-size)])
|
||||
|
||||
(define (+xsymbol [len #f] [encoding 'ascii])
|
||||
(xsymbol len encoding))
|
||||
|
||||
(module+ test
|
||||
(require rackunit)
|
||||
(define S-fixed (+xstring 4 'utf8))
|
||||
(check-equal? (encode S-fixed "Mike" #f) #"Mike")
|
||||
(check-exn exn:fail? (λ () (encode S-fixed "Mikes" #f))) ; too long for fixed string
|
||||
(define S (+xstring uint8 'utf8))
|
||||
(check-equal? (decode S #"\2BCDEF") "BC")
|
||||
(check-equal? (encode S "Mike" #f) #"\4Mike")
|
||||
(check-equal? (size (+xstring) "foobar") 7) ; null terminated when no len
|
||||
(check-equal? (decode (+xsymbol 4) #"Mike") 'Mike)
|
||||
(check-equal? (encode (+xsymbol 4) 'Mike #f) #"Mike")
|
||||
(check-equal? (encode (+xsymbol 4) "Mike" #f) #"Mike")
|
||||
(check-exn exn:fail:contract? (λ () (encode (+xsymbol 4) 42 #f))))
|
@ -0,0 +1,109 @@
|
||||
#lang racket/base
|
||||
(require rackunit
|
||||
"../base.rkt"
|
||||
"../string.rkt"
|
||||
"../number.rkt"
|
||||
sugar/unstable/dict)
|
||||
|
||||
#|
|
||||
approximates
|
||||
https://github.com/mbutterick/restructure/blob/master/test/String.coffee
|
||||
|#
|
||||
|
||||
(test-case
|
||||
"decode fixed length"
|
||||
(parameterize ([current-input-port (open-input-bytes #"testing")])
|
||||
(check-equal? (decode (+xstring 7)) "testing")))
|
||||
|
||||
(test-case
|
||||
"decode length from parent key"
|
||||
(parameterize ([current-input-port (open-input-bytes #"testing")])
|
||||
(check-equal? (decode (+xstring 'len) #:parent (mhash 'len 7)) "testing")))
|
||||
|
||||
(test-case
|
||||
"decode length as number before string"
|
||||
(parameterize ([current-input-port (open-input-bytes #"\x07testing")])
|
||||
(check-equal? (decode (+xstring uint8) #:parent (mhash 'len 7)) "testing")))
|
||||
|
||||
(test-case
|
||||
"decode utf8"
|
||||
(parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻"))])
|
||||
(check-equal? (decode (+xstring 4 'utf8)) "🍻")))
|
||||
|
||||
(test-case
|
||||
"decode encoding computed from function"
|
||||
(parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻"))])
|
||||
(check-equal? (decode (+xstring 4 (λ _ 'utf8))) "🍻")))
|
||||
|
||||
(test-case
|
||||
"decode null-terminated string and read past terminator"
|
||||
(parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻\x00"))])
|
||||
(check-equal? (decode (+xstring #f 'utf8)) "🍻")
|
||||
(check-equal? (pos (current-input-port)) 5)))
|
||||
|
||||
(test-case
|
||||
"decode remainder of buffer when null-byte missing"
|
||||
(parameterize ([current-input-port (open-input-bytes (string->bytes/utf-8 "🍻"))])
|
||||
(check-equal? (decode (+xstring #f 'utf8)) "🍻")))
|
||||
|
||||
(test-case
|
||||
"size should use string length"
|
||||
(check-equal? (size (+xstring 7) "testing") 7))
|
||||
|
||||
(test-case
|
||||
"size should use correct encoding"
|
||||
(check-equal? (size (+xstring 10 'utf8) "🍻") 4))
|
||||
|
||||
(test-case
|
||||
"size should use encoding from function"
|
||||
(check-equal? (size (+xstring 10 (λ _ 'utf8)) "🍻") 4))
|
||||
|
||||
(test-case
|
||||
"should add size of length field before string"
|
||||
(check-equal? (size (+xstring uint8 'utf8) "🍻") 5))
|
||||
|
||||
; todo: it 'should work with utf16be encoding', ->
|
||||
|
||||
(test-case
|
||||
"size should take null-byte into account"
|
||||
(check-equal? (size (+xstring #f 'utf8) "🍻") 5))
|
||||
|
||||
(test-case
|
||||
"size should use defined length if no value given"
|
||||
(check-equal? (size (+xstring 10)) 10))
|
||||
|
||||
(test-case
|
||||
"encode using string length"
|
||||
(parameterize ([current-output-port (open-output-bytes)])
|
||||
(encode (+xstring 7) "testing")
|
||||
(check-equal? (dump (current-output-port)) #"testing")))
|
||||
|
||||
(test-case
|
||||
"encode length as number before string"
|
||||
(parameterize ([current-output-port (open-output-bytes)])
|
||||
(encode (+xstring uint8) "testing")
|
||||
(check-equal? (dump (current-output-port)) #"\x07testing")))
|
||||
|
||||
(test-case
|
||||
"encode length as number before string utf8"
|
||||
(parameterize ([current-output-port (open-output-bytes)])
|
||||
(encode (+xstring uint8 'utf8) "testing 😜")
|
||||
(check-equal? (dump (current-output-port)) (string->bytes/utf-8 "\x0ctesting 😜"))))
|
||||
|
||||
(test-case
|
||||
"encode utf8"
|
||||
(parameterize ([current-output-port (open-output-bytes)])
|
||||
(encode (+xstring 4 'utf8) "🍻" )
|
||||
(check-equal? (dump (current-output-port)) (string->bytes/utf-8 "🍻"))))
|
||||
|
||||
(test-case
|
||||
"encode encoding computed from function"
|
||||
(parameterize ([current-output-port (open-output-bytes)])
|
||||
(encode (+xstring 4 (λ _ 'utf8)) "🍻")
|
||||
(check-equal? (dump (current-output-port)) (string->bytes/utf-8 "🍻"))))
|
||||
|
||||
(test-case
|
||||
"encode null-terminated string"
|
||||
(parameterize ([current-output-port (open-output-bytes)])
|
||||
(encode (+xstring #f 'utf8) "🍻" )
|
||||
(check-equal? (dump (current-output-port)) (string->bytes/utf-8 "🍻\x00"))))
|
Loading…
Reference in New Issue