From f847785c606af7faa8f81e2db98365512f886c47 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Wed, 29 Mar 2017 16:04:18 -0700 Subject: [PATCH] objects & streams --- pitfall/pitfall/parse.rkt | 59 ++++++++++++++++++++++++--------- pitfall/pitfall/parser-test.rkt | 6 ++++ pitfall/pitfall/parser.rkt | 14 +++++--- pitfall/pitfall/test.rkt | 46 +++++++++++++++++++++++-- pitfall/pitfall/tokenizer.rkt | 54 ++++++++++++++++-------------- 5 files changed, 132 insertions(+), 47 deletions(-) create mode 100644 pitfall/pitfall/parser-test.rkt diff --git a/pitfall/pitfall/parse.rkt b/pitfall/pitfall/parse.rkt index 4744d598..23015978 100644 --- a/pitfall/pitfall/parse.rkt +++ b/pitfall/pitfall/parse.rkt @@ -1,5 +1,5 @@ #lang at-exp br/quicklang -(require "parser.rkt" "tokenizer.rkt" gregor) +(require "parser.rkt" "tokenizer.rkt" gregor racket/bytes) (provide (matching-identifiers-out #rx"pf-" (all-defined-out))) (module+ test (require rackunit)) @@ -7,7 +7,7 @@ (module+ reader (provide read-syntax)) (define (read-syntax src port) - (define parse-tree (parse (make-tokenizer src port))) + (define parse-tree (parse (make-tokenizer port src))) (strip-bindings #`(module pitfall-parse-mod pitfall/parse #,parse-tree))) @@ -24,7 +24,7 @@ (define (pf-name str) (let* ([str (string-trim str "/" #:right? #f)] [str (regexp-replace* @pregexp{#(\d\d)} str (λ (m sub) (string (integer->char (string->number sub 16)))))]) - (string->symbol str))) + (string->symbol str))) (module+ test (check-equal? (pf-name "B#45#20NICE") '|BE NICE|)) @@ -34,25 +34,54 @@ [(andmap byte? (cons arg tail)) (cons arg tail)] [(string-prefix? arg "D:") #;(parameterize ([current-locale "en"]) - (parse-date "2015-03-15T02:02:02-04:00" "yyyy-MM-dd'T'HH:mm:ssxxx")) + (parse-date "2015-03-15T02:02:02-04:00" "yyyy-MM-dd'T'HH:mm:ssxxx")) #f] [else - (let* ([str (regexp-replace @regexp{^\((.*)\)$} arg "\\1")] - [str (regexp-replace* @pregexp{\\(\\|\))} str "\\1")] - [str (regexp-replace* @pregexp{\\(\d\d\d)} str (λ (m sub) (string (integer->char (string->number sub)))))]) + (let* ([str (regexp-replace @regexp{^\((.*)\)$} arg "\\1")] ; remove parens + [str (string-replace str (string-append "\\" "\n") "")] + [str (regexp-replace* @pregexp{\\(n|r|t|b|f|\(|\)|\\)} str (λ (m sub) + (case sub + [("n") "\n"] + [("r") "\r"] + [("t") "\t"] + [("b") "\b"] + [("f") "\f"] + [else sub])))] + [str (regexp-replace* @pregexp{\\(\d{2,3})} str (λ (m sub) (string (integer->char (string->number sub 8)))))]) str)])) (module+ test - (check-equal? (pf-string "(Testing)") "Testing") - (check-equal? (pf-string "(Test\\)ing)") "Test)ing") - (check-equal? (pf-string "(Test\\\\ing)") "Test\\ing") - (check-equal? (pf-string "(A\\043B)") "A+B") - #;(check-equal? (pf-string "(D:19990209153925-08\'00\')") ) - #;(check-true (andmap byte? (pf-string "<1C2D3F>"))) - #;(check-true (andmap byte? (pf-string "<1C 2D 3F>")))) + (check-equal? @pf-string{(Testing)} "Testing") + (check-equal? (pf-string @string-append{(Test\ + ing)}) "Testing") + (check-equal? @pf-string{(Test\)ing)} "Test)ing") + (check-equal? @pf-string{(Test\ning)} "Test\ning") + (check-equal? @pf-string{(Test\\ing)} "Test\\ing") + (check-equal? @pf-string{(A\53B)} "A+B") + (check-equal? @pf-string{(A\053B)} "A+B") + #;(check-equal? @pf-string{(D:19990209153925-08\'00\')}) + #;(check-true (andmap byte? @pf-string{<1C2D3F>})) + #;(check-true (andmap byte? @pf-string{<1C 2D 3F>}))) (define (pf-array . xs) xs) (define (pf-dict . args) (apply hash args)) - \ No newline at end of file + +(struct $stream (dict data) #:transparent) +(define (pf-stream dict str) + (define data (string->bytes/utf-8 str)) + (when (not (equal? (hash-ref dict 'Length) (bytes-length data))) + (raise-argument-error 'pf-stream (format "~a bytes of data" (hash-ref dict 'Length)) (format "~a = ~a" (bytes-length data) data))) + ($stream dict data)) + +(define indirect-objects (make-hash)) +(provide indirect-objects) + +(define (pf-indirect-object obj-num generation-num thing) + (hash-set! indirect-objects (vector obj-num generation-num) thing)) + +(define-macro (pf-indirect-object-ref (OBJ-NUM GENERATION-NUM "R")) + #'(hash-ref (report indirect-objects) (vector (string->number OBJ-NUM) + (string->number GENERATION-NUM)))) + \ No newline at end of file diff --git a/pitfall/pitfall/parser-test.rkt b/pitfall/pitfall/parser-test.rkt new file mode 100644 index 00000000..6fabf73a --- /dev/null +++ b/pitfall/pitfall/parser-test.rkt @@ -0,0 +1,6 @@ +#lang at-exp br +(require rackunit "parser.rkt" "tokenizer.rkt" brag/support) + +(apply-tokenizer-maker make-tokenizer @string-append{(string () here) << /A (B) >>}) + +#;(parse-to-datum (apply-tokenizer-maker make-tokenizer @string-append{(string () here) << /A (B) >>})) \ No newline at end of file diff --git a/pitfall/pitfall/parser.rkt b/pitfall/pitfall/parser.rkt index ba53d575..4d46e715 100644 --- a/pitfall/pitfall/parser.rkt +++ b/pitfall/pitfall/parser.rkt @@ -1,10 +1,14 @@ #lang brag pf-program : pf-thing* -@pf-thing : NULL | CHAR | BOOLEAN | INT | REAL | pf-name | pf-string | pf-array | pf-dict +@pf-thing : pf-null | CHAR | BOOLEAN | INT | REAL | pf-name | pf-string | pf-array | pf-dict | pf-stream | pf-indirect-object | pf-indirect-object-ref +@pf-null : NULL pf-name : NAME -pf-string : PAREN-TOK | /LEFT-ANGLE HEX-DIGIT-PAIR+ /RIGHT-ANGLE -pf-array : /LEFT-BRACKET pf-thing* /RIGHT-BRACKET -pf-dict : /DOUBLE-LEFT-ANGLE (pf-dict-key pf-dict-value)* /DOUBLE-RIGHT-ANGLE +pf-string : STRING-TOK | /"<" HEX-DIGIT-PAIR+ /">" +pf-array : /"[" pf-thing* /"]" +pf-dict : /"<<" (pf-dict-key pf-dict-value)* /">>" @pf-dict-key : pf-thing -@pf-dict-value : pf-thing \ No newline at end of file +@pf-dict-value : pf-thing +pf-stream : pf-dict STREAM-DATA +pf-indirect-object : INT INT /"obj" pf-thing /"endobj" +pf-indirect-object-ref : INDIRECT-OBJECT-REF-TOK \ No newline at end of file diff --git a/pitfall/pitfall/test.rkt b/pitfall/pitfall/test.rkt index 087f55fa..9c9e7b8c 100644 --- a/pitfall/pitfall/test.rkt +++ b/pitfall/pitfall/test.rkt @@ -15,19 +15,61 @@ false /ThisIsName37 /Lime#20Green /SSCN_SomeSecondClassName +/Adobe#20Green +/The_Key_of_F#23_Minor (Testing) (A\053B) + %(D:19990209153925-08'00') <1C2D3F> <1C 2D 3F> [0 0 612 792] [(T) -20.5 (H) 4 (E)] [[1 2 3][4 5 6]] +<< +/Type /Example +/Subtype /DictionaryExample +/Version 0.01 +/IntegerItem 12 +/StringItem (a string) +/Subdictionary << +/Item1 0.4 +/Item2 true +/LastItem (not!) +/VeryLastItem (OK) >> >> +( This string contains \245two octal characters\307 . ) +(Strings may contain balanced parentheses ( ) and special \ncharacters (*!&}^% and so on).) % a more human-readable dictionary << /Type /Page /Author (Leonard Rosenthol) - /Resources << /Font [ /F1 /F2 ] >> + /Resources 42 >> % stripped -<> \ No newline at end of file +<> + +<< + /Type /Xobject /Subtype /Image /Filter /FlateDecode /Length 4 /Height 32 /Width 32 +>> +stream +abcd +endstream + +12 0 obj +( Brillig ) +endobj + +8 0 obj +63 +endobj + +7 0 obj +<< /Length 8 0 R >> +stream +BT +/F1 12 Tf +72 712 Td +(A stream with an indirect length) Tj +ET +endstream +endobj \ No newline at end of file diff --git a/pitfall/pitfall/tokenizer.rkt b/pitfall/pitfall/tokenizer.rkt index 6ad2c321..33751114 100644 --- a/pitfall/pitfall/tokenizer.rkt +++ b/pitfall/pitfall/tokenizer.rkt @@ -1,40 +1,44 @@ -#lang br +#lang at-exp br (require brag/support) (provide make-tokenizer) (define-lex-abbrev digit (char-set "0123456789")) (define-lex-abbrev hex-digit (:or digit (char-set "ABCDEF"))) (define-lex-abbrev digits (:+ digit)) -(define-lex-abbrev sign (:? (:or "+" "-"))) -(define-lex-abbrev blackspace (:~ whitespace)) -(define-lex-abbrev ascii-char (char-set ascii)) +(define-lex-abbrev optional-sign (:? (:or "+" "-"))) +(define-lex-abbrev pdf-whitespace (char-set "\u0000\t\n\f\r ")) +(define-lex-abbrev pdf-delimiter (char-set "()<>[]{}/%")) +#;(define-lex-abbrev pdf-reg) +(define-lex-abbrev blackspace (:~ pdf-whitespace)) +(define-lex-abbrev not-right-paren (:~ ")")) +(define-lex-abbrev substring (:seq "(" (:* not-right-paren) ")")) (define-lex-abbrev nonreg-char (:seq "#" hex-digit hex-digit)) -(define (make-tokenizer src port) +(define (make-tokenizer port [src #f]) (port-count-lines! port) (lexer-file-path src) - (define lex-once + (define lex-one-token (lexer-srcloc [(eof) eof] - [(:or whitespace - (from/stop-before "%" "\n")) - (token 'IGNORE lexeme #:skip? #t)] + [(:seq digits (:+ pdf-whitespace) digits (:+ pdf-whitespace) "R") + (begin (println (string-split lexeme)) + (token 'INDIRECT-OBJECT-REF-TOK (string-split lexeme)))] + [(:or pdf-whitespace + (from/stop-before "%" #\newline)) (token 'IGNORE lexeme #:skip? #t)] [(:or "true" "false") (token 'BOOLEAN (equal? lexeme "true"))] - [(:seq sign digits) (token 'INT (string->number lexeme))] - [(:seq sign (:or (:seq digits "." (:? digits)) - (:seq "." digits))) + [(:seq optional-sign digits) (token 'INT (string->number lexeme))] + [(:seq optional-sign (:or (:seq digits "." (:? digits)) + (:seq "." digits))) (token 'REAL (string->number lexeme))] - [(:seq "/" (:+ (:or nonreg-char alphabetic "_" numeric))) - (token 'NAME lexeme)] - ["null" (token 'NULL 'null)] - [(from/to "(" ")") (token 'PAREN-TOK lexeme)] - [(:seq hex-digit hex-digit) (token 'HEX-DIGIT-PAIR (string->number lexeme 16))] - ["<" (token 'LEFT-ANGLE)] - [">" (token 'RIGHT-ANGLE)] - ["<<" (token 'DOUBLE-LEFT-ANGLE)] - [">>" (token 'DOUBLE-RIGHT-ANGLE)] - ["[" (token 'LEFT-BRACKET)] - ["]" (token 'RIGHT-BRACKET)] - [any-char (token 'CHAR lexeme)])) -(λ () (lex-once port))) \ No newline at end of file + [(from/stop-before "/" (:or pdf-delimiter pdf-whitespace)) (token 'NAME lexeme)] + ["null" (token 'NULL 'null)] + [(:seq "(" (:* (:or not-right-paren substring)) ")") (token 'STRING-TOK lexeme)] + [(:seq hex-digit hex-digit) (token 'HEX-DIGIT-PAIR (string->number lexeme 16))] + [(:or "<" ">" "<<" ">>" "[" "]" "obj" "endobj") (token lexeme lexeme)] + [(from/to "stream" "endstream") (token 'STREAM-DATA (string-trim (trim-ends "stream" lexeme "endstream")))] + [any-char (token 'CHAR lexeme)])) + (λ () (lex-one-token port))) + +(module+ test + (apply-tokenizer-maker make-tokenizer @string-append{(s(t)r) << /A (B) >>})) \ No newline at end of file