main
Matthew Butterick 5 years ago
parent 04f855bc70
commit bc3d7f3b11

@ -1,34 +0,0 @@
#lang racket/base
(require
racket/string
racket/format
sugar/list)
(provide binprint)
(define (binprint in #:width [width 16])
(unless (even? width) (raise-argument-error 'binprint "even width" width))
(for-each displayln
(for/list ([bs (in-port (λ (p) (read-bytes width p)) in)])
(string-append (pad-string (hexline bs) (+ (* width 2) (sub1 (/ width 2)))) " " (alphaline bs)))))
(define (pad-string str width)
(string-append str (make-string (- width (string-length str)) #\ )))
(define (hexline bs)
(string-join
(map string-append*
(slice-at (for/list ([b (in-bytes bs)])
(~r b #:base 16 #:min-width 2 #:pad-string "0")) 2)) " "))
(define (alphaline bs)
(define printable-ascii? (λ (b) (<= 32 b 126)))
(list->string
(for/list ([b (in-bytes bs)])
(integer->char (if (printable-ascii? b) b 32)))))
(module+ test
#;(binprint (open-input-bytes #"foobar is the name"))
(binprint (open-input-file "../ptest/test12.pdf") #:width 24))

@ -5,12 +5,12 @@
racket/format
racket/dict
sugar/unstable/dict
"annotations.rkt"
"annotation.rkt"
"reference.rkt"
"object.rkt"
"page.rkt"
"vector.rkt"
"fonts.rkt")
"font.rkt")
(provide (all-defined-out))
(define (store-ref doc ref)

@ -9,7 +9,7 @@
racket/list
racket/dict
sugar/unstable/dict
"font.rkt"
"font-base.rkt"
fontland)
(provide embedded-font%)

@ -0,0 +1,32 @@
#lang racket/base
(require racket/class "reference.rkt")
(provide pdf-font%)
;; 181227 structifying the fonts didn't do anything for speed
;; the class is implementation is equally fast, and less code
(define pdf-font%
(class object%
(super-new)
(init-field [(@ascender ascender) #f]
[(@descender descender) #f]
[(@line-gap line-gap) #f]
[(@bbox bbox) #f])
(field [(@ref ref) #f]
[@embedded #f])
(abstract embed encode string-width)
(define/public (make-font-ref)
(unless @ref
(set! @ref (make-ref)))
@ref)
(define/public (font-end)
(unless (or @embedded (not @ref))
(embed)
(set! @embedded #t)))
(define/public (line-height size [include-gap #f])
(define gap (if include-gap @line-gap 0))
(* (/ (+ @ascender gap (- @descender)) 1000.0) size))))

@ -1,32 +1,49 @@
#lang racket/base
(require racket/class "reference.rkt")
(provide pdf-font%)
#lang debug racket/base
(require
"core.rkt"
racket/match
racket/class
"standard-font.rkt"
"embedded-font.rkt")
(provide (all-defined-out))
;; 181227 structifying the fonts didn't do anything for speed
;; the class is implementation is equally fast, and less code
(define (open-pdf-font name id)
(make-object (if (standard-font-name? name) standard-font% embedded-font%) name id))
(define pdf-font%
(class object%
(super-new)
(init-field [(@ascender ascender) #f]
[(@descender descender) #f]
[(@line-gap line-gap) #f]
[(@bbox bbox) #f])
(field [(@ref ref) #f]
[@embedded #f])
(define (current-line-height doc [include-gap #f])
(send ($doc-current-font doc) line-height ($doc-current-font-size doc) include-gap))
(abstract embed encode string-width)
(define/public (make-font-ref)
(unless @ref
(set! @ref (make-ref)))
@ref)
(define (font doc src [size #f])
;; check registered fonts if src is a string
(define cache-key
(match src
[(? string?) #:when (hash-has-key? ($doc-registered-fonts doc) src)
(define ck src)
(set! src (hash-ref (hash-ref ($doc-registered-fonts doc) ck) 'src))
ck]
[(? string?) src]
[_ #false]))
(when size (font-size doc size))
(define/public (font-end)
(unless (or @embedded (not @ref))
(embed)
(set! @embedded #t)))
(match (hash-ref ($doc-font-families doc) cache-key #f) ; check if the font is already in the PDF
[(? values val) (set-$doc-current-font! doc val)]
[_ ; if not, load the font
(define font-index (add1 (length (hash-keys ($doc-font-families doc)))))
(define id (string->symbol (format "F~a" font-index)))
(set-$doc-current-font! doc (open-pdf-font src id))
;; check for existing font families with the same name already in the PDF
(match (hash-ref ($doc-font-families doc) (get-field name ($doc-current-font doc)) #f)
[(? values font) (set-$doc-current-font! doc font)]
[_ ;; save the font for reuse later
(when cache-key (hash-set! ($doc-font-families doc) cache-key ($doc-current-font doc)))
(hash-set! ($doc-font-families doc) (get-field name ($doc-current-font doc)) ($doc-current-font doc))])])
doc)
(define/public (line-height size [include-gap #f])
(define gap (if include-gap @line-gap 0))
(* (/ (+ @ascender gap (- @descender)) 1000.0) size))))
(define (font-size doc size)
(set-$doc-current-font-size! doc size)
doc)
(define (register-font doc name src)
(hash-set! ($doc-registered-fonts doc) name (make-hasheq (list (cons 'src src))))
doc)

@ -1,49 +0,0 @@
#lang debug racket/base
(require
"core.rkt"
racket/match
racket/class
"standard-font.rkt"
"embedded-font.rkt")
(provide (all-defined-out))
(define (open-pdf-font name id)
(make-object (if (standard-font-name? name) standard-font% embedded-font%) name id))
(define (current-line-height doc [include-gap #f])
(send ($doc-current-font doc) line-height ($doc-current-font-size doc) include-gap))
(define (font doc src [size #f])
;; check registered fonts if src is a string
(define cache-key
(match src
[(? string?) #:when (hash-has-key? ($doc-registered-fonts doc) src)
(define ck src)
(set! src (hash-ref (hash-ref ($doc-registered-fonts doc) ck) 'src))
ck]
[(? string?) src]
[_ #false]))
(when size (font-size doc size))
(match (hash-ref ($doc-font-families doc) cache-key #f) ; check if the font is already in the PDF
[(? values val) (set-$doc-current-font! doc val)]
[_ ; if not, load the font
(define font-index (add1 (length (hash-keys ($doc-font-families doc)))))
(define id (string->symbol (format "F~a" font-index)))
(set-$doc-current-font! doc (open-pdf-font src id))
;; check for existing font families with the same name already in the PDF
(match (hash-ref ($doc-font-families doc) (get-field name ($doc-current-font doc)) #f)
[(? values font) (set-$doc-current-font! doc font)]
[_ ;; save the font for reuse later
(when cache-key (hash-set! ($doc-font-families doc) cache-key ($doc-current-font doc)))
(hash-set! ($doc-font-families doc) (get-field name ($doc-current-font doc)) ($doc-current-font doc))])])
doc)
(define (font-size doc size)
(set-$doc-current-font-size! doc size)
doc)
(define (register-font doc name src)
(hash-set! ($doc-registered-fonts doc) name (make-hasheq (list (cons 'src src))))
doc)

@ -1,146 +0,0 @@
#lang br
(require racket/file sugar/cache sugar/unstable/port racket/dict (prefix-in zlib: pitfall/zlib))
(define dict-key-pattern #px"^/[-:\\w]+")
(struct ref (id) #:transparent)
(define (trim-whitespace bs) ; can't use string-trim with bytes
(cadr (regexp-match #px"^\\s*(.*?)\\s*$" bs)))
(define (parse-dict p)
(define terms (for/list ([next (in-port parse-one p)]
#:break (eq? next 'end-dict))
next))
(for/list ([k (in-list terms)]
[v (in-list (cdr terms))]
[i (in-naturals)]
#:when (even? i))
(cons k v)))
(define (parse-list p)
(for/list ([next (in-port parse-one p)]
#:break (eq? next 'end-list))
next))
(define current-pdf (make-parameter #f))
(struct $stream (dict bytes) #:transparent)
(define (stream-decode bs filter)
(case filter
[(FlateDecode) (zlib:inflate bs)]
[else bs]))
(define (indirect-ref? val)
(and (symbol? val) (regexp-match #rx"^ref-" (symbol->string val))))
(define (resolve-ref val)
(doc-ref (current-pdf) (string->number (string-trim (format "~a" val) "ref-"))))
(define (parse-object p)
(define result (parse-one p))
(cond
[(regexp-try-match #px"^\\s*stream(\n|\r\n?)" p)
(define stream-dict result)
(define stream-pos (port-position p))
(define stream-length (let ([val (dict-ref stream-dict 'Length)])
(if (indirect-ref? val)
(resolve-ref val)
val)))
(define stream-bytes (read-bytes stream-length (port-position p stream-pos)))
($stream stream-dict (if (dict-has-key? stream-dict 'Filter)
(stream-decode stream-bytes (dict-ref stream-dict 'Filter))
stream-bytes))]
[else result]))
(define (parse-one p)
(let loop ([p-in p])
(define p (if (input-port? p-in) p-in (open-input-bytes p-in)))
(cond
;; skip whitespace
[(regexp-try-match #px"^\\s+" p) (loop p)]
;; indirect reference => reference int
[(regexp-try-match #px"^(\\d+) (\\d+) R" p) => (λ (m) (string->symbol (format "ref-~a" (loop (cadr m)))))]
;; object => trim and reparse
[(regexp-try-match #px"^\\d+ 0 obj" p) (parse-object p)]
;; integer string => int
[(regexp-try-match #px"^(-)?\\d+[.\\d*]?" p) => (λ (m) (string->number (bytes->string/utf-8 (car m))))]
;; dict => dispatch to handler
[(regexp-try-match #rx"^<<" p) #;(report 'got-dict) (parse-dict p)]
[(regexp-try-match #rx"^>>" p) 'end-dict]
;; list => unroll
[(regexp-try-match #rx"^\\[" p) (parse-list p)]
[(regexp-try-match #rx"^]" p) 'end-list]
;; slash-prefixed dictionary key => symbol
[(regexp-try-match dict-key-pattern p) => (λ (m) (string->symbol (string-trim (bytes->string/utf-8 (car m)) "/" #:right? #f)))]
[(regexp-try-match #rx"^startxref" p) (loop p)] ; pulls off next item, which is an integer
[(regexp-try-match #rx"^<([0123456789abcdefABCDEF]*)>" p) => (λ (m) (cadr m))]
[(eof-object? (peek-bytes 1 0 p)) eof]
[else (report* (file-position p) (peek-bytes 3 0 p)) (error 'unknown-type)])))
(struct trailer (dict xref-ptr) #:transparent)
(define (parse-trailer p)
(define eof-position (file-size (object-name p)))
(define trailer-pos (for*/first ([pos (in-range eof-position 0 -1)]
[m (in-value (regexp-try-match #px"^\\s+trailer\\s*" p pos))]
#:when m)
pos))
(unless trailer-pos (error 'no-trailer-found))
(define dict-offset (caar (regexp-match-positions #rx"<<.*?>>" (port-position p trailer-pos))))
(port-position p (+ trailer-pos dict-offset))
(define trailer-dict (parse-one p))
(define xref-ptr (parse-one p))
(trailer trailer-dict xref-ptr))
(define (parse-xref p offset)
(define bs (car (regexp-match #rx"(?<=xref).*?(?=trailer)" (port-position p offset))))
(define str (bytes->string/utf-8 bs))
(for/hash ([k (in-naturals 1)]
[v (in-list (drop (string-split str #px"\n|\r\n?") 2))])
(values k (string->number (car (regexp-match #px"^\\d+" v))))))
(struct $pdf (port xrefs) #:transparent)
(define/caching (doc-ref pdf idx)
(define offset (hash-ref ($pdf-xrefs pdf) idx))
(and (positive? offset)
(parse-one (port-position ($pdf-port pdf) offset))))
(define/caching (doc-ref* pdf idx)
;; resolve nested ref-x pointers
(define visited empty) ; prevent circular references by tracking visited refs
(let loop ([result (doc-ref pdf idx)])
(cond
[(dict? result) (for/list ([(k v) (in-dict result)])
(cons k (loop v)))]
[(list? result) (map loop result)]
[(and (symbol? result) (regexp-match #px"^ref-(\\d+)" (symbol->string result)))
=> (λ (m) (define next-idx (string->number (cadr m)))
(define old-visited visited)
(set! visited (cons next-idx visited))
(if (member next-idx old-visited)
result
(loop (doc-ref pdf next-idx))))]
[else result])))
(define (port->pdf p)
($pdf p (parse-xref p (trailer-xref-ptr (parse-trailer p)))))
(module+ test
(require rackunit)
(define p (open-input-file "test/minimal.pdf"))
(define pdf (port->pdf p))
(check-equal? (doc-ref pdf 1) '((Type . Catalog) (Pages . ref-2)))
(check-equal? (doc-ref pdf 2) '((Type . Pages) (Kids ref-3) (Count . 1) (MediaBox 0 0 300 144)))
(check-equal? (doc-ref pdf 3) '((Type . Page) (Parent . ref-2) (Resources (Font (F1 (Type . Font) (Subtype . Type1) (BaseFont . Times-Roman)))) (Contents . ref-4)))
(check-equal? (doc-ref pdf 4) ($stream '((Length . 55)) #" BT\n /F1 18 Tf\n 0 0 Td\n (Hello World) Tj\n ET")))
(define p (open-input-file "test/test-cff/textedit-sample.pdf"))
(define pdf (port->pdf p))
(current-pdf pdf)
pdf
(doc-ref pdf 1)

@ -11,8 +11,8 @@
(test-mode #t)
(require rackunit pitfall/document pitfall/vector pitfall/color pitfall/text pitfall/fonts pitfall/images racket/runtime-path racket/class)
(provide (all-from-out rackunit racket/runtime-path pitfall/document pitfall/vector pitfall/text pitfall/color pitfall/fonts pitfall/images racket/class))
(require rackunit pitfall/document pitfall/vector pitfall/color pitfall/text pitfall/font pitfall/image racket/runtime-path racket/class)
(provide (all-from-out rackunit racket/runtime-path pitfall/document pitfall/vector pitfall/text pitfall/color pitfall/font pitfall/image racket/class))
(define (this->control this) (path-add-extension this #"" #" copy."))

@ -1,93 +0,0 @@
#lang racket/base
(require
sugar/debug
racket/file
racket/contract
racket/list
sugar/unstable/dict)
(provide read-png)
#|
Grab key chunks from PNG. Doesn't require heavy lifting from libpng.
|#
(define (read-png ip-or-bytes)
(define png (make-hasheq))
(parameterize ([current-input-port (if (input-port? ip-or-bytes)
ip-or-bytes
(open-input-bytes ip-or-bytes))])
(define header (read-bytes 8))
(let loop ()
(cond
[(eof-object? (peek-byte)) png]
[else
(define chunk-size (read-32bit-integer))
(define chunk-name (read-bytes 4))
(case chunk-name
[(#"IHDR") (hash-set*! png
'width (read-32bit-integer)
'height (read-32bit-integer)
'bits (read-byte)
'colorType (read-byte)
'compressionMethod (read-byte)
'filterMethod (read-byte)
'interlaceMethod (read-byte))]
[(#"PLTE") (hash-set*! png 'palette (read-bytes chunk-size))]
[(#"IDAT") (hash-set*! png 'imgData (read-bytes chunk-size))]
[(#"tRNS")
;; This chunk can only occur once and it must occur after the
;; PLTE chunk and before the IDAT chunk.
(define transparency (mhash))
(case (hash-ref png 'colorType (λ () (error 'read-png "PNG file is loco")))
[(3)
;; Indexed color, RGB. Each byte in this chunk is an alpha for
;; the palette index in the PLTE ("palette") chunk up until the
;; last non-opaque entry. Set up an array, stretching over all
;; palette entries which will be 0 (opaque) or 1 (transparent).
(hash-set! transparency 'indexed
(append (read-bytes chunk-size)
(make-list (min 0 (- 255 chunk-size)) 255)))]
[(0)
;; Greyscale. Corresponding to entries in the PLTE chunk.
;; Grey is two bytes, range 0 .. (2 ^ bit-depth) - 1]
(hash-set! transparency 'grayscale (bytes-ref (read-bytes chunk-size) 0))]
[(2)
;; True color with proper alpha channel.
(hash-set! transparency 'rgb (read-bytes chunk-size))])
(report (hash-set! png 'transparency transparency))]
[(#"tEXt")
(define text (read-bytes chunk-size))
#|
text = @read(chunkSize)
index = text.indexOf(0)
key = String.fromCharCode text.slice(0, index)...
@text[key] = String.fromCharCode text.slice(index + 1)...
|#
42]
[(#"IEND") (define color-value (case (hash-ref png 'colorType)
[(0 3 4) 1]
[(2 6) 3]))
(define alpha-value (and (member (hash-ref png 'colorType) '(4 6)) (hash-ref png 'colorType)))
(hash-set*! png
'colors color-value
'hasAlphaChannel alpha-value
'pixelBitlength (* (hash-ref png 'bits) (+ color-value (if alpha-value 1 0)))
'colorSpace (case color-value
[(1) "DeviceGray"]
[(3) "DeviceRGB"]))]
[else (read-bytes chunk-size)])
(read-bytes 4) ; skip crc
(loop)]))))
(define (read-32bit-integer)
(define signed #f) (define big-endian #t)
(integer-bytes->integer (read-bytes 4) signed big-endian))
(module+ test
(require rackunit)
(check-equal?
(read-png (open-input-file "../ptest/assets/test.png"))
(read-png (file->bytes "../ptest/assets/test.png"))))

@ -4,13 +4,15 @@
"reference.rkt"
"core.rkt"
racket/dict
racket/list
racket/file
racket/draw
sugar/unstable/dict)
#|
https://github.com/mbutterick/pdfkit/blob/master/lib/image/png.coffee
|#
(require "png-reader.rkt" "zlib.rkt")
(require "zlib.rkt")
(provide (all-defined-out))
@ -116,4 +118,89 @@ https://github.com/mbutterick/pdfkit/blob/master/lib/image/png.coffee
;; http://www.libpng.org/pub/png/png-sitemap.html#images
(module+ test
(define pic (make-png (open-input-file "../ptest/assets/death-alpha.png")))
(define-values (img alpha) (split-alpha-channel pic)))
(define-values (img alpha) (split-alpha-channel pic)))
#|
Grab key chunks from PNG. Doesn't require heavy lifting from libpng.
|#
(define (read-png ip-or-bytes)
(define png (make-hasheq))
(parameterize ([current-input-port (if (input-port? ip-or-bytes)
ip-or-bytes
(open-input-bytes ip-or-bytes))])
(define header (read-bytes 8))
(let loop ()
(cond
[(eof-object? (peek-byte)) png]
[else
(define chunk-size (read-32bit-integer))
(define chunk-name (read-bytes 4))
(case chunk-name
[(#"IHDR") (hash-set*! png
'width (read-32bit-integer)
'height (read-32bit-integer)
'bits (read-byte)
'colorType (read-byte)
'compressionMethod (read-byte)
'filterMethod (read-byte)
'interlaceMethod (read-byte))]
[(#"PLTE") (hash-set*! png 'palette (read-bytes chunk-size))]
[(#"IDAT") (hash-set*! png 'imgData (read-bytes chunk-size))]
[(#"tRNS")
;; This chunk can only occur once and it must occur after the
;; PLTE chunk and before the IDAT chunk.
(define transparency (mhash))
(case (hash-ref png 'colorType (λ () (error 'read-png "PNG file is loco")))
[(3)
;; Indexed color, RGB. Each byte in this chunk is an alpha for
;; the palette index in the PLTE ("palette") chunk up until the
;; last non-opaque entry. Set up an array, stretching over all
;; palette entries which will be 0 (opaque) or 1 (transparent).
(hash-set! transparency 'indexed
(append (read-bytes chunk-size)
(make-list (min 0 (- 255 chunk-size)) 255)))]
[(0)
;; Greyscale. Corresponding to entries in the PLTE chunk.
;; Grey is two bytes, range 0 .. (2 ^ bit-depth) - 1]
(hash-set! transparency 'grayscale (bytes-ref (read-bytes chunk-size) 0))]
[(2)
;; True color with proper alpha channel.
(hash-set! transparency 'rgb (read-bytes chunk-size))])
(hash-set! png 'transparency transparency)]
[(#"tEXt")
(define text (read-bytes chunk-size))
#|
text = @read(chunkSize)
index = text.indexOf(0)
key = String.fromCharCode text.slice(0, index)...
@text[key] = String.fromCharCode text.slice(index + 1)...
|#
42]
[(#"IEND") (define color-value (case (hash-ref png 'colorType)
[(0 3 4) 1]
[(2 6) 3]))
(define alpha-value (and (member (hash-ref png 'colorType) '(4 6)) (hash-ref png 'colorType)))
(hash-set*! png
'colors color-value
'hasAlphaChannel alpha-value
'pixelBitlength (* (hash-ref png 'bits) (+ color-value (if alpha-value 1 0)))
'colorSpace (case color-value
[(1) "DeviceGray"]
[(3) "DeviceRGB"]))]
[else (read-bytes chunk-size)])
(read-bytes 4) ; skip crc
(loop)]))))
(define (read-32bit-integer)
(define signed #f) (define big-endian #t)
(integer-bytes->integer (read-bytes 4) signed big-endian))
(module+ test
(require rackunit)
(check-equal?
(read-png (open-input-file "../ptest/assets/test.png"))
(read-png (file->bytes "../ptest/assets/test.png"))))

@ -4,7 +4,7 @@
racket/string
racket/match
sugar/unstable/dict
"font.rkt"
"font-base.rkt"
"core.rkt"
"reference.rkt"
fontland

@ -2,8 +2,8 @@
(require
"core.rkt"
"page.rkt"
"annotations.rkt"
"fonts.rkt"
"annotation.rkt"
"font.rkt"
"vector.rkt"
"color.rkt"
racket/class

Loading…
Cancel
Save