objects & streams

main
Matthew Butterick 8 years ago
parent 84434bbc49
commit f847785c60

@ -1,5 +1,5 @@
#lang at-exp br/quicklang
(require "parser.rkt" "tokenizer.rkt" gregor)
(require "parser.rkt" "tokenizer.rkt" gregor racket/bytes)
(provide (matching-identifiers-out #rx"pf-" (all-defined-out)))
(module+ test (require rackunit))
@ -7,7 +7,7 @@
(module+ reader (provide read-syntax))
(define (read-syntax src port)
(define parse-tree (parse (make-tokenizer src port)))
(define parse-tree (parse (make-tokenizer port src)))
(strip-bindings
#`(module pitfall-parse-mod pitfall/parse
#,parse-tree)))
@ -24,7 +24,7 @@
(define (pf-name str)
(let* ([str (string-trim str "/" #:right? #f)]
[str (regexp-replace* @pregexp{#(\d\d)} str (λ (m sub) (string (integer->char (string->number sub 16)))))])
(string->symbol str)))
(string->symbol str)))
(module+ test
(check-equal? (pf-name "B#45#20NICE") '|BE NICE|))
@ -34,25 +34,54 @@
[(andmap byte? (cons arg tail)) (cons arg tail)]
[(string-prefix? arg "D:")
#;(parameterize ([current-locale "en"])
(parse-date "2015-03-15T02:02:02-04:00" "yyyy-MM-dd'T'HH:mm:ssxxx"))
(parse-date "2015-03-15T02:02:02-04:00" "yyyy-MM-dd'T'HH:mm:ssxxx"))
#f]
[else
(let* ([str (regexp-replace @regexp{^\((.*)\)$} arg "\\1")]
[str (regexp-replace* @pregexp{\\(\\|\))} str "\\1")]
[str (regexp-replace* @pregexp{\\(\d\d\d)} str (λ (m sub) (string (integer->char (string->number sub)))))])
(let* ([str (regexp-replace @regexp{^\((.*)\)$} arg "\\1")] ; remove parens
[str (string-replace str (string-append "\\" "\n") "")]
[str (regexp-replace* @pregexp{\\(n|r|t|b|f|\(|\)|\\)} str (λ (m sub)
(case sub
[("n") "\n"]
[("r") "\r"]
[("t") "\t"]
[("b") "\b"]
[("f") "\f"]
[else sub])))]
[str (regexp-replace* @pregexp{\\(\d{2,3})} str (λ (m sub) (string (integer->char (string->number sub 8)))))])
str)]))
(module+ test
(check-equal? (pf-string "(Testing)") "Testing")
(check-equal? (pf-string "(Test\\)ing)") "Test)ing")
(check-equal? (pf-string "(Test\\\\ing)") "Test\\ing")
(check-equal? (pf-string "(A\\043B)") "A+B")
#;(check-equal? (pf-string "(D:19990209153925-08\'00\')") )
#;(check-true (andmap byte? (pf-string "<1C2D3F>")))
#;(check-true (andmap byte? (pf-string "<1C 2D 3F>"))))
(check-equal? @pf-string{(Testing)} "Testing")
(check-equal? (pf-string @string-append{(Test\
ing)}) "Testing")
(check-equal? @pf-string{(Test\)ing)} "Test)ing")
(check-equal? @pf-string{(Test\ning)} "Test\ning")
(check-equal? @pf-string{(Test\\ing)} "Test\\ing")
(check-equal? @pf-string{(A\53B)} "A+B")
(check-equal? @pf-string{(A\053B)} "A+B")
#;(check-equal? @pf-string{(D:19990209153925-08\'00\')})
#;(check-true (andmap byte? @pf-string{<1C2D3F>}))
#;(check-true (andmap byte? @pf-string{<1C 2D 3F>})))
(define (pf-array . xs) xs)
(define (pf-dict . args)
(apply hash args))
(struct $stream (dict data) #:transparent)
(define (pf-stream dict str)
(define data (string->bytes/utf-8 str))
(when (not (equal? (hash-ref dict 'Length) (bytes-length data)))
(raise-argument-error 'pf-stream (format "~a bytes of data" (hash-ref dict 'Length)) (format "~a = ~a" (bytes-length data) data)))
($stream dict data))
(define indirect-objects (make-hash))
(provide indirect-objects)
(define (pf-indirect-object obj-num generation-num thing)
(hash-set! indirect-objects (vector obj-num generation-num) thing))
(define-macro (pf-indirect-object-ref (OBJ-NUM GENERATION-NUM "R"))
#'(hash-ref (report indirect-objects) (vector (string->number OBJ-NUM)
(string->number GENERATION-NUM))))

@ -0,0 +1,6 @@
#lang at-exp br
(require rackunit "parser.rkt" "tokenizer.rkt" brag/support)
(apply-tokenizer-maker make-tokenizer @string-append{(string () here) << /A (B) >>})
#;(parse-to-datum (apply-tokenizer-maker make-tokenizer @string-append{(string () here) << /A (B) >>}))

@ -1,10 +1,14 @@
#lang brag
pf-program : pf-thing*
@pf-thing : NULL | CHAR | BOOLEAN | INT | REAL | pf-name | pf-string | pf-array | pf-dict
@pf-thing : pf-null | CHAR | BOOLEAN | INT | REAL | pf-name | pf-string | pf-array | pf-dict | pf-stream | pf-indirect-object | pf-indirect-object-ref
@pf-null : NULL
pf-name : NAME
pf-string : PAREN-TOK | /LEFT-ANGLE HEX-DIGIT-PAIR+ /RIGHT-ANGLE
pf-array : /LEFT-BRACKET pf-thing* /RIGHT-BRACKET
pf-dict : /DOUBLE-LEFT-ANGLE (pf-dict-key pf-dict-value)* /DOUBLE-RIGHT-ANGLE
pf-string : STRING-TOK | /"<" HEX-DIGIT-PAIR+ /">"
pf-array : /"[" pf-thing* /"]"
pf-dict : /"<<" (pf-dict-key pf-dict-value)* /">>"
@pf-dict-key : pf-thing
@pf-dict-value : pf-thing
@pf-dict-value : pf-thing
pf-stream : pf-dict STREAM-DATA
pf-indirect-object : INT INT /"obj" pf-thing /"endobj"
pf-indirect-object-ref : INDIRECT-OBJECT-REF-TOK

@ -15,19 +15,61 @@ false
/ThisIsName37
/Lime#20Green
/SSCN_SomeSecondClassName
/Adobe#20Green
/The_Key_of_F#23_Minor
(Testing)
(A\053B)
%(D:19990209153925-08'00')
<1C2D3F>
<1C 2D 3F>
[0 0 612 792]
[(T) -20.5 (H) 4 (E)]
[[1 2 3][4 5 6]]
<<
/Type /Example
/Subtype /DictionaryExample
/Version 0.01
/IntegerItem 12
/StringItem (a string)
/Subdictionary <<
/Item1 0.4
/Item2 true
/LastItem (not!)
/VeryLastItem (OK) >> >>
( This string contains \245two octal characters\307 . )
(Strings may contain balanced parentheses ( ) and special \ncharacters (*!&}^% and so on).)
% a more human-readable dictionary
<<
/Type /Page
/Author (Leonard Rosenthol)
/Resources << /Font [ /F1 /F2 ] >>
/Resources 42
>>
% stripped
<</Length 3112/Subtype/XML/Type/Metadata>>
<</Length 3112/Subtype/XML/Type/Metadata>>
<<
/Type /Xobject /Subtype /Image /Filter /FlateDecode /Length 4 /Height 32 /Width 32
>>
stream
abcd
endstream
12 0 obj
( Brillig )
endobj
8 0 obj
63
endobj
7 0 obj
<< /Length 8 0 R >>
stream
BT
/F1 12 Tf
72 712 Td
(A stream with an indirect length) Tj
ET
endstream
endobj

@ -1,40 +1,44 @@
#lang br
#lang at-exp br
(require brag/support)
(provide make-tokenizer)
(define-lex-abbrev digit (char-set "0123456789"))
(define-lex-abbrev hex-digit (:or digit (char-set "ABCDEF")))
(define-lex-abbrev digits (:+ digit))
(define-lex-abbrev sign (:? (:or "+" "-")))
(define-lex-abbrev blackspace (:~ whitespace))
(define-lex-abbrev ascii-char (char-set ascii))
(define-lex-abbrev optional-sign (:? (:or "+" "-")))
(define-lex-abbrev pdf-whitespace (char-set "\u0000\t\n\f\r "))
(define-lex-abbrev pdf-delimiter (char-set "()<>[]{}/%"))
#;(define-lex-abbrev pdf-reg)
(define-lex-abbrev blackspace (:~ pdf-whitespace))
(define-lex-abbrev not-right-paren (:~ ")"))
(define-lex-abbrev substring (:seq "(" (:* not-right-paren) ")"))
(define-lex-abbrev nonreg-char (:seq "#" hex-digit hex-digit))
(define (make-tokenizer src port)
(define (make-tokenizer port [src #f])
(port-count-lines! port)
(lexer-file-path src)
(define lex-once
(define lex-one-token
(lexer-srcloc
[(eof) eof]
[(:or whitespace
(from/stop-before "%" "\n"))
(token 'IGNORE lexeme #:skip? #t)]
[(:seq digits (:+ pdf-whitespace) digits (:+ pdf-whitespace) "R")
(begin (println (string-split lexeme))
(token 'INDIRECT-OBJECT-REF-TOK (string-split lexeme)))]
[(:or pdf-whitespace
(from/stop-before "%" #\newline)) (token 'IGNORE lexeme #:skip? #t)]
[(:or "true" "false") (token 'BOOLEAN (equal? lexeme "true"))]
[(:seq sign digits) (token 'INT (string->number lexeme))]
[(:seq sign (:or (:seq digits "." (:? digits))
(:seq "." digits)))
[(:seq optional-sign digits) (token 'INT (string->number lexeme))]
[(:seq optional-sign (:or (:seq digits "." (:? digits))
(:seq "." digits)))
(token 'REAL (string->number lexeme))]
[(:seq "/" (:+ (:or nonreg-char alphabetic "_" numeric)))
(token 'NAME lexeme)]
["null" (token 'NULL 'null)]
[(from/to "(" ")") (token 'PAREN-TOK lexeme)]
[(:seq hex-digit hex-digit) (token 'HEX-DIGIT-PAIR (string->number lexeme 16))]
["<" (token 'LEFT-ANGLE)]
[">" (token 'RIGHT-ANGLE)]
["<<" (token 'DOUBLE-LEFT-ANGLE)]
[">>" (token 'DOUBLE-RIGHT-ANGLE)]
["[" (token 'LEFT-BRACKET)]
["]" (token 'RIGHT-BRACKET)]
[any-char (token 'CHAR lexeme)]))
(λ () (lex-once port)))
[(from/stop-before "/" (:or pdf-delimiter pdf-whitespace)) (token 'NAME lexeme)]
["null" (token 'NULL 'null)]
[(:seq "(" (:* (:or not-right-paren substring)) ")") (token 'STRING-TOK lexeme)]
[(:seq hex-digit hex-digit) (token 'HEX-DIGIT-PAIR (string->number lexeme 16))]
[(:or "<" ">" "<<" ">>" "[" "]" "obj" "endobj") (token lexeme lexeme)]
[(from/to "stream" "endstream") (token 'STREAM-DATA (string-trim (trim-ends "stream" lexeme "endstream")))]
[any-char (token 'CHAR lexeme)]))
(λ () (lex-one-token port)))
(module+ test
(apply-tokenizer-maker make-tokenizer @string-append{(s(t)r) << /A (B) >>}))
Loading…
Cancel
Save