refac
parent
04f855bc70
commit
bc3d7f3b11
@ -1,34 +0,0 @@
|
||||
#lang racket/base
|
||||
(require
|
||||
racket/string
|
||||
racket/format
|
||||
sugar/list)
|
||||
(provide binprint)
|
||||
|
||||
(define (binprint in #:width [width 16])
|
||||
(unless (even? width) (raise-argument-error 'binprint "even width" width))
|
||||
(for-each displayln
|
||||
(for/list ([bs (in-port (λ (p) (read-bytes width p)) in)])
|
||||
(string-append (pad-string (hexline bs) (+ (* width 2) (sub1 (/ width 2)))) " " (alphaline bs)))))
|
||||
|
||||
|
||||
(define (pad-string str width)
|
||||
(string-append str (make-string (- width (string-length str)) #\ )))
|
||||
|
||||
|
||||
(define (hexline bs)
|
||||
(string-join
|
||||
(map string-append*
|
||||
(slice-at (for/list ([b (in-bytes bs)])
|
||||
(~r b #:base 16 #:min-width 2 #:pad-string "0")) 2)) " "))
|
||||
|
||||
|
||||
(define (alphaline bs)
|
||||
(define printable-ascii? (λ (b) (<= 32 b 126)))
|
||||
(list->string
|
||||
(for/list ([b (in-bytes bs)])
|
||||
(integer->char (if (printable-ascii? b) b 32)))))
|
||||
|
||||
(module+ test
|
||||
#;(binprint (open-input-bytes #"foobar is the name"))
|
||||
(binprint (open-input-file "../ptest/test12.pdf") #:width 24))
|
@ -0,0 +1,32 @@
|
||||
#lang racket/base
|
||||
(require racket/class "reference.rkt")
|
||||
(provide pdf-font%)
|
||||
|
||||
;; 181227 structifying the fonts didn't do anything for speed
|
||||
;; the class is implementation is equally fast, and less code
|
||||
|
||||
(define pdf-font%
|
||||
(class object%
|
||||
(super-new)
|
||||
(init-field [(@ascender ascender) #f]
|
||||
[(@descender descender) #f]
|
||||
[(@line-gap line-gap) #f]
|
||||
[(@bbox bbox) #f])
|
||||
(field [(@ref ref) #f]
|
||||
[@embedded #f])
|
||||
|
||||
(abstract embed encode string-width)
|
||||
|
||||
(define/public (make-font-ref)
|
||||
(unless @ref
|
||||
(set! @ref (make-ref)))
|
||||
@ref)
|
||||
|
||||
(define/public (font-end)
|
||||
(unless (or @embedded (not @ref))
|
||||
(embed)
|
||||
(set! @embedded #t)))
|
||||
|
||||
(define/public (line-height size [include-gap #f])
|
||||
(define gap (if include-gap @line-gap 0))
|
||||
(* (/ (+ @ascender gap (- @descender)) 1000.0) size))))
|
@ -1,32 +1,49 @@
|
||||
#lang racket/base
|
||||
(require racket/class "reference.rkt")
|
||||
(provide pdf-font%)
|
||||
#lang debug racket/base
|
||||
(require
|
||||
"core.rkt"
|
||||
racket/match
|
||||
racket/class
|
||||
"standard-font.rkt"
|
||||
"embedded-font.rkt")
|
||||
(provide (all-defined-out))
|
||||
|
||||
;; 181227 structifying the fonts didn't do anything for speed
|
||||
;; the class is implementation is equally fast, and less code
|
||||
(define (open-pdf-font name id)
|
||||
(make-object (if (standard-font-name? name) standard-font% embedded-font%) name id))
|
||||
|
||||
(define pdf-font%
|
||||
(class object%
|
||||
(super-new)
|
||||
(init-field [(@ascender ascender) #f]
|
||||
[(@descender descender) #f]
|
||||
[(@line-gap line-gap) #f]
|
||||
[(@bbox bbox) #f])
|
||||
(field [(@ref ref) #f]
|
||||
[@embedded #f])
|
||||
(define (current-line-height doc [include-gap #f])
|
||||
(send ($doc-current-font doc) line-height ($doc-current-font-size doc) include-gap))
|
||||
|
||||
(abstract embed encode string-width)
|
||||
|
||||
(define/public (make-font-ref)
|
||||
(unless @ref
|
||||
(set! @ref (make-ref)))
|
||||
@ref)
|
||||
(define (font doc src [size #f])
|
||||
;; check registered fonts if src is a string
|
||||
(define cache-key
|
||||
(match src
|
||||
[(? string?) #:when (hash-has-key? ($doc-registered-fonts doc) src)
|
||||
(define ck src)
|
||||
(set! src (hash-ref (hash-ref ($doc-registered-fonts doc) ck) 'src))
|
||||
ck]
|
||||
[(? string?) src]
|
||||
[_ #false]))
|
||||
|
||||
(when size (font-size doc size))
|
||||
|
||||
(define/public (font-end)
|
||||
(unless (or @embedded (not @ref))
|
||||
(embed)
|
||||
(set! @embedded #t)))
|
||||
(match (hash-ref ($doc-font-families doc) cache-key #f) ; check if the font is already in the PDF
|
||||
[(? values val) (set-$doc-current-font! doc val)]
|
||||
[_ ; if not, load the font
|
||||
(define font-index (add1 (length (hash-keys ($doc-font-families doc)))))
|
||||
(define id (string->symbol (format "F~a" font-index)))
|
||||
(set-$doc-current-font! doc (open-pdf-font src id))
|
||||
;; check for existing font families with the same name already in the PDF
|
||||
(match (hash-ref ($doc-font-families doc) (get-field name ($doc-current-font doc)) #f)
|
||||
[(? values font) (set-$doc-current-font! doc font)]
|
||||
[_ ;; save the font for reuse later
|
||||
(when cache-key (hash-set! ($doc-font-families doc) cache-key ($doc-current-font doc)))
|
||||
(hash-set! ($doc-font-families doc) (get-field name ($doc-current-font doc)) ($doc-current-font doc))])])
|
||||
doc)
|
||||
|
||||
(define/public (line-height size [include-gap #f])
|
||||
(define gap (if include-gap @line-gap 0))
|
||||
(* (/ (+ @ascender gap (- @descender)) 1000.0) size))))
|
||||
(define (font-size doc size)
|
||||
(set-$doc-current-font-size! doc size)
|
||||
doc)
|
||||
|
||||
(define (register-font doc name src)
|
||||
(hash-set! ($doc-registered-fonts doc) name (make-hasheq (list (cons 'src src))))
|
||||
doc)
|
||||
|
@ -1,49 +0,0 @@
|
||||
#lang debug racket/base
|
||||
(require
|
||||
"core.rkt"
|
||||
racket/match
|
||||
racket/class
|
||||
"standard-font.rkt"
|
||||
"embedded-font.rkt")
|
||||
(provide (all-defined-out))
|
||||
|
||||
(define (open-pdf-font name id)
|
||||
(make-object (if (standard-font-name? name) standard-font% embedded-font%) name id))
|
||||
|
||||
(define (current-line-height doc [include-gap #f])
|
||||
(send ($doc-current-font doc) line-height ($doc-current-font-size doc) include-gap))
|
||||
|
||||
(define (font doc src [size #f])
|
||||
;; check registered fonts if src is a string
|
||||
(define cache-key
|
||||
(match src
|
||||
[(? string?) #:when (hash-has-key? ($doc-registered-fonts doc) src)
|
||||
(define ck src)
|
||||
(set! src (hash-ref (hash-ref ($doc-registered-fonts doc) ck) 'src))
|
||||
ck]
|
||||
[(? string?) src]
|
||||
[_ #false]))
|
||||
|
||||
(when size (font-size doc size))
|
||||
|
||||
(match (hash-ref ($doc-font-families doc) cache-key #f) ; check if the font is already in the PDF
|
||||
[(? values val) (set-$doc-current-font! doc val)]
|
||||
[_ ; if not, load the font
|
||||
(define font-index (add1 (length (hash-keys ($doc-font-families doc)))))
|
||||
(define id (string->symbol (format "F~a" font-index)))
|
||||
(set-$doc-current-font! doc (open-pdf-font src id))
|
||||
;; check for existing font families with the same name already in the PDF
|
||||
(match (hash-ref ($doc-font-families doc) (get-field name ($doc-current-font doc)) #f)
|
||||
[(? values font) (set-$doc-current-font! doc font)]
|
||||
[_ ;; save the font for reuse later
|
||||
(when cache-key (hash-set! ($doc-font-families doc) cache-key ($doc-current-font doc)))
|
||||
(hash-set! ($doc-font-families doc) (get-field name ($doc-current-font doc)) ($doc-current-font doc))])])
|
||||
doc)
|
||||
|
||||
(define (font-size doc size)
|
||||
(set-$doc-current-font-size! doc size)
|
||||
doc)
|
||||
|
||||
(define (register-font doc name src)
|
||||
(hash-set! ($doc-registered-fonts doc) name (make-hasheq (list (cons 'src src))))
|
||||
doc)
|
@ -1,146 +0,0 @@
|
||||
#lang br
|
||||
(require racket/file sugar/cache sugar/unstable/port racket/dict (prefix-in zlib: pitfall/zlib))
|
||||
|
||||
(define dict-key-pattern #px"^/[-:\\w]+")
|
||||
|
||||
(struct ref (id) #:transparent)
|
||||
|
||||
(define (trim-whitespace bs) ; can't use string-trim with bytes
|
||||
(cadr (regexp-match #px"^\\s*(.*?)\\s*$" bs)))
|
||||
|
||||
(define (parse-dict p)
|
||||
(define terms (for/list ([next (in-port parse-one p)]
|
||||
#:break (eq? next 'end-dict))
|
||||
next))
|
||||
(for/list ([k (in-list terms)]
|
||||
[v (in-list (cdr terms))]
|
||||
[i (in-naturals)]
|
||||
#:when (even? i))
|
||||
(cons k v)))
|
||||
|
||||
(define (parse-list p)
|
||||
(for/list ([next (in-port parse-one p)]
|
||||
#:break (eq? next 'end-list))
|
||||
next))
|
||||
|
||||
(define current-pdf (make-parameter #f))
|
||||
|
||||
(struct $stream (dict bytes) #:transparent)
|
||||
|
||||
(define (stream-decode bs filter)
|
||||
(case filter
|
||||
[(FlateDecode) (zlib:inflate bs)]
|
||||
[else bs]))
|
||||
|
||||
(define (indirect-ref? val)
|
||||
(and (symbol? val) (regexp-match #rx"^ref-" (symbol->string val))))
|
||||
|
||||
(define (resolve-ref val)
|
||||
(doc-ref (current-pdf) (string->number (string-trim (format "~a" val) "ref-"))))
|
||||
|
||||
(define (parse-object p)
|
||||
(define result (parse-one p))
|
||||
(cond
|
||||
[(regexp-try-match #px"^\\s*stream(\n|\r\n?)" p)
|
||||
(define stream-dict result)
|
||||
(define stream-pos (port-position p))
|
||||
(define stream-length (let ([val (dict-ref stream-dict 'Length)])
|
||||
(if (indirect-ref? val)
|
||||
(resolve-ref val)
|
||||
val)))
|
||||
(define stream-bytes (read-bytes stream-length (port-position p stream-pos)))
|
||||
($stream stream-dict (if (dict-has-key? stream-dict 'Filter)
|
||||
(stream-decode stream-bytes (dict-ref stream-dict 'Filter))
|
||||
stream-bytes))]
|
||||
[else result]))
|
||||
|
||||
(define (parse-one p)
|
||||
(let loop ([p-in p])
|
||||
(define p (if (input-port? p-in) p-in (open-input-bytes p-in)))
|
||||
(cond
|
||||
;; skip whitespace
|
||||
[(regexp-try-match #px"^\\s+" p) (loop p)]
|
||||
;; indirect reference => reference int
|
||||
[(regexp-try-match #px"^(\\d+) (\\d+) R" p) => (λ (m) (string->symbol (format "ref-~a" (loop (cadr m)))))]
|
||||
;; object => trim and reparse
|
||||
[(regexp-try-match #px"^\\d+ 0 obj" p) (parse-object p)]
|
||||
;; integer string => int
|
||||
[(regexp-try-match #px"^(-)?\\d+[.\\d*]?" p) => (λ (m) (string->number (bytes->string/utf-8 (car m))))]
|
||||
;; dict => dispatch to handler
|
||||
[(regexp-try-match #rx"^<<" p) #;(report 'got-dict) (parse-dict p)]
|
||||
[(regexp-try-match #rx"^>>" p) 'end-dict]
|
||||
;; list => unroll
|
||||
[(regexp-try-match #rx"^\\[" p) (parse-list p)]
|
||||
[(regexp-try-match #rx"^]" p) 'end-list]
|
||||
;; slash-prefixed dictionary key => symbol
|
||||
[(regexp-try-match dict-key-pattern p) => (λ (m) (string->symbol (string-trim (bytes->string/utf-8 (car m)) "/" #:right? #f)))]
|
||||
[(regexp-try-match #rx"^startxref" p) (loop p)] ; pulls off next item, which is an integer
|
||||
[(regexp-try-match #rx"^<([0123456789abcdefABCDEF]*)>" p) => (λ (m) (cadr m))]
|
||||
[(eof-object? (peek-bytes 1 0 p)) eof]
|
||||
[else (report* (file-position p) (peek-bytes 3 0 p)) (error 'unknown-type)])))
|
||||
|
||||
(struct trailer (dict xref-ptr) #:transparent)
|
||||
(define (parse-trailer p)
|
||||
(define eof-position (file-size (object-name p)))
|
||||
(define trailer-pos (for*/first ([pos (in-range eof-position 0 -1)]
|
||||
[m (in-value (regexp-try-match #px"^\\s+trailer\\s*" p pos))]
|
||||
#:when m)
|
||||
pos))
|
||||
(unless trailer-pos (error 'no-trailer-found))
|
||||
(define dict-offset (caar (regexp-match-positions #rx"<<.*?>>" (port-position p trailer-pos))))
|
||||
(port-position p (+ trailer-pos dict-offset))
|
||||
(define trailer-dict (parse-one p))
|
||||
(define xref-ptr (parse-one p))
|
||||
(trailer trailer-dict xref-ptr))
|
||||
|
||||
|
||||
(define (parse-xref p offset)
|
||||
(define bs (car (regexp-match #rx"(?<=xref).*?(?=trailer)" (port-position p offset))))
|
||||
(define str (bytes->string/utf-8 bs))
|
||||
(for/hash ([k (in-naturals 1)]
|
||||
[v (in-list (drop (string-split str #px"\n|\r\n?") 2))])
|
||||
(values k (string->number (car (regexp-match #px"^\\d+" v))))))
|
||||
|
||||
(struct $pdf (port xrefs) #:transparent)
|
||||
(define/caching (doc-ref pdf idx)
|
||||
(define offset (hash-ref ($pdf-xrefs pdf) idx))
|
||||
(and (positive? offset)
|
||||
(parse-one (port-position ($pdf-port pdf) offset))))
|
||||
|
||||
(define/caching (doc-ref* pdf idx)
|
||||
;; resolve nested ref-x pointers
|
||||
(define visited empty) ; prevent circular references by tracking visited refs
|
||||
(let loop ([result (doc-ref pdf idx)])
|
||||
(cond
|
||||
[(dict? result) (for/list ([(k v) (in-dict result)])
|
||||
(cons k (loop v)))]
|
||||
[(list? result) (map loop result)]
|
||||
[(and (symbol? result) (regexp-match #px"^ref-(\\d+)" (symbol->string result)))
|
||||
=> (λ (m) (define next-idx (string->number (cadr m)))
|
||||
(define old-visited visited)
|
||||
(set! visited (cons next-idx visited))
|
||||
(if (member next-idx old-visited)
|
||||
result
|
||||
(loop (doc-ref pdf next-idx))))]
|
||||
[else result])))
|
||||
|
||||
|
||||
(define (port->pdf p)
|
||||
($pdf p (parse-xref p (trailer-xref-ptr (parse-trailer p)))))
|
||||
|
||||
(module+ test
|
||||
(require rackunit)
|
||||
(define p (open-input-file "test/minimal.pdf"))
|
||||
(define pdf (port->pdf p))
|
||||
(check-equal? (doc-ref pdf 1) '((Type . Catalog) (Pages . ref-2)))
|
||||
(check-equal? (doc-ref pdf 2) '((Type . Pages) (Kids ref-3) (Count . 1) (MediaBox 0 0 300 144)))
|
||||
(check-equal? (doc-ref pdf 3) '((Type . Page) (Parent . ref-2) (Resources (Font (F1 (Type . Font) (Subtype . Type1) (BaseFont . Times-Roman)))) (Contents . ref-4)))
|
||||
(check-equal? (doc-ref pdf 4) ($stream '((Length . 55)) #" BT\n /F1 18 Tf\n 0 0 Td\n (Hello World) Tj\n ET")))
|
||||
|
||||
|
||||
(define p (open-input-file "test/test-cff/textedit-sample.pdf"))
|
||||
(define pdf (port->pdf p))
|
||||
(current-pdf pdf)
|
||||
pdf
|
||||
|
||||
(doc-ref pdf 1)
|
@ -1,93 +0,0 @@
|
||||
#lang racket/base
|
||||
(require
|
||||
sugar/debug
|
||||
racket/file
|
||||
racket/contract
|
||||
racket/list
|
||||
sugar/unstable/dict)
|
||||
|
||||
(provide read-png)
|
||||
|
||||
#|
|
||||
Grab key chunks from PNG. Doesn't require heavy lifting from libpng.
|
||||
|#
|
||||
|
||||
(define (read-png ip-or-bytes)
|
||||
(define png (make-hasheq))
|
||||
(parameterize ([current-input-port (if (input-port? ip-or-bytes)
|
||||
ip-or-bytes
|
||||
(open-input-bytes ip-or-bytes))])
|
||||
(define header (read-bytes 8))
|
||||
(let loop ()
|
||||
(cond
|
||||
[(eof-object? (peek-byte)) png]
|
||||
[else
|
||||
(define chunk-size (read-32bit-integer))
|
||||
(define chunk-name (read-bytes 4))
|
||||
(case chunk-name
|
||||
[(#"IHDR") (hash-set*! png
|
||||
'width (read-32bit-integer)
|
||||
'height (read-32bit-integer)
|
||||
'bits (read-byte)
|
||||
'colorType (read-byte)
|
||||
'compressionMethod (read-byte)
|
||||
'filterMethod (read-byte)
|
||||
'interlaceMethod (read-byte))]
|
||||
[(#"PLTE") (hash-set*! png 'palette (read-bytes chunk-size))]
|
||||
[(#"IDAT") (hash-set*! png 'imgData (read-bytes chunk-size))]
|
||||
[(#"tRNS")
|
||||
;; This chunk can only occur once and it must occur after the
|
||||
;; PLTE chunk and before the IDAT chunk.
|
||||
(define transparency (mhash))
|
||||
(case (hash-ref png 'colorType (λ () (error 'read-png "PNG file is loco")))
|
||||
[(3)
|
||||
;; Indexed color, RGB. Each byte in this chunk is an alpha for
|
||||
;; the palette index in the PLTE ("palette") chunk up until the
|
||||
;; last non-opaque entry. Set up an array, stretching over all
|
||||
;; palette entries which will be 0 (opaque) or 1 (transparent).
|
||||
(hash-set! transparency 'indexed
|
||||
(append (read-bytes chunk-size)
|
||||
(make-list (min 0 (- 255 chunk-size)) 255)))]
|
||||
[(0)
|
||||
;; Greyscale. Corresponding to entries in the PLTE chunk.
|
||||
;; Grey is two bytes, range 0 .. (2 ^ bit-depth) - 1]
|
||||
(hash-set! transparency 'grayscale (bytes-ref (read-bytes chunk-size) 0))]
|
||||
[(2)
|
||||
;; True color with proper alpha channel.
|
||||
(hash-set! transparency 'rgb (read-bytes chunk-size))])
|
||||
(report (hash-set! png 'transparency transparency))]
|
||||
[(#"tEXt")
|
||||
(define text (read-bytes chunk-size))
|
||||
#|
|
||||
text = @read(chunkSize)
|
||||
index = text.indexOf(0)
|
||||
key = String.fromCharCode text.slice(0, index)...
|
||||
@text[key] = String.fromCharCode text.slice(index + 1)...
|
||||
|#
|
||||
42]
|
||||
[(#"IEND") (define color-value (case (hash-ref png 'colorType)
|
||||
[(0 3 4) 1]
|
||||
[(2 6) 3]))
|
||||
(define alpha-value (and (member (hash-ref png 'colorType) '(4 6)) (hash-ref png 'colorType)))
|
||||
(hash-set*! png
|
||||
'colors color-value
|
||||
'hasAlphaChannel alpha-value
|
||||
'pixelBitlength (* (hash-ref png 'bits) (+ color-value (if alpha-value 1 0)))
|
||||
'colorSpace (case color-value
|
||||
[(1) "DeviceGray"]
|
||||
[(3) "DeviceRGB"]))]
|
||||
[else (read-bytes chunk-size)])
|
||||
(read-bytes 4) ; skip crc
|
||||
(loop)]))))
|
||||
|
||||
|
||||
|
||||
(define (read-32bit-integer)
|
||||
(define signed #f) (define big-endian #t)
|
||||
(integer-bytes->integer (read-bytes 4) signed big-endian))
|
||||
|
||||
(module+ test
|
||||
(require rackunit)
|
||||
(check-equal?
|
||||
(read-png (open-input-file "../ptest/assets/test.png"))
|
||||
(read-png (file->bytes "../ptest/assets/test.png"))))
|
Loading…
Reference in New Issue