main
Matthew Butterick 6 years ago
parent fd34ea3c00
commit fbe1525f3c

@ -63,7 +63,7 @@
(for/list ([kv (in-slice 2 items)] (for/list ([kv (in-slice 2 items)]
;; suppress these keys so we can compare pdfkit & pitfall output ;; suppress these keys so we can compare pdfkit & pitfall output
#:unless (member (car kv) (list #"/Producer" #"/Creator" #"/CreationDate"))) #:unless (member (car kv) (list #"/Producer" #"/Creator" #"/CreationDate")))
(apply cons kv)) (apply cons kv))
bytes<? bytes<?
#:key car)) #:key car))
(cond ;; might have a stream (cond ;; might have a stream
@ -71,9 +71,14 @@
(define stream-length (define stream-length
(read (open-input-bytes (cdr (assoc #"/Length" dic))))) (read (open-input-bytes (cdr (assoc #"/Length" dic)))))
(define stream (read-bytes stream-length ip)) (define stream (read-bytes stream-length ip))
(append dic (list (cons 'stream (if #R (dict-ref dic '/FlateDecode #f) (define compressed? (equal? (dict-ref dic #"/Filter" #f) #"/FlateDecode"))
(zlib:inflate stream) (dict-update
stream))))] (append dic
(list (cons 'stream (if compressed?
(zlib:inflate stream)
stream))))
;; compressed length may vary, so just set to #"0"
#"/Length" (λ (val) (if compressed? #"0" val)))]
[else dic])] [else dic])]
[else [else
(pat-lex ip (pat-lex ip
@ -88,7 +93,7 @@
(define (parse-pdf-bytes bs) (define (parse-pdf-bytes bs)
(for/list ([tok (in-port parse-1 (open-input-bytes bs))]) (for/list ([tok (in-port parse-1 (open-input-bytes bs))])
tok)) tok))
(define (pdf->dict pdf) (define (pdf->dict pdf)
(define pdf-bs (file->bytes pdf)) (define pdf-bs (file->bytes pdf))
@ -100,14 +105,14 @@
(sort ; sort by byte offset (sort ; sort by byte offset
(cdr ; drop zeroth record: there is no zeroth object (cdr ; drop zeroth record: there is no zeroth object
(for/list ([i (in-range ref-count)]) (for/list ([i (in-range ref-count)])
(cons i (read (open-input-bytes (car (regexp-match #px"\\d{10}" xref-ip))))))) (cons i (read (open-input-bytes (car (regexp-match #px"\\d{10}" xref-ip)))))))
< #:key cdr) < #:key cdr)
(list (cons #f xoff)))) (list (cons #f xoff))))
(sort ; sort by index (sort ; sort by index
(parameterize ([current-input-port (open-input-bytes pdf-bs)]) (parameterize ([current-input-port (open-input-bytes pdf-bs)])
(for/list ([(idx start) (in-dict obj-locations)] (for/list ([(idx start) (in-dict obj-locations)]
[(_ end) (in-dict (cdr obj-locations))]) [(_ end) (in-dict (cdr obj-locations))])
(cons idx (parse-pdf-bytes (peek-bytes (- end start) start))))) (cons idx (parse-pdf-bytes (peek-bytes (- end start) start)))))
< #:key car)) < #:key car))
(define (dict-compare d1 d2) (define (dict-compare d1 d2)
@ -115,19 +120,19 @@
(= (length d1) (length d2)) (= (length d1) (length d2))
(for/and ([(k1 v1) (in-dict d1)] (for/and ([(k1 v1) (in-dict d1)]
[(k2 v2) (in-dict d2)]) [(k2 v2) (in-dict d2)])
(unless (equal? k1 k2) (unless (equal? k1 k2)
(error (format "keys unequal: ~a ~a" k1 k2))) (error (format "keys unequal: ~a ~a" k1 k2)))
(unless (equal? v1 v2) (unless (equal? v1 v2)
(define val1 (if (and (bytes? v1) (> (bytes-length v1) 200)) (define val1 (if (and (bytes? v1) (> (bytes-length v1) 200))
(subbytes v1 0 200) (subbytes v1 0 200)
v1)) v1))
(define val2 (if (and (bytes? v2) (> (bytes-length v2) 200)) (define val2 (if (and (bytes? v2) (> (bytes-length v2) 200))
(subbytes v2 0 200) (subbytes v2 0 200)
v2)) v2))
(error (format "values unequal: ~a ~a" val1 val2))) (error (format "values unequal: ~a ~a" val1 val2)))
(when (dict? v1) (when (dict? v1)
(dict-compare v1 v2)) (dict-compare v1 v2))
#true))) #true)))
(define-simple-check (check-pdfs-equal? ps1 ps2) (define-simple-check (check-pdfs-equal? ps1 ps2)
(dict-compare (pdf->dict ps1) (pdf->dict ps2))) (dict-compare (pdf->dict ps1) (pdf->dict ps2)))
@ -135,5 +140,5 @@
#;(module+ main #;(module+ main
(for ([p (in-directory)] (for ([p (in-directory)]
#:when (path-has-extension? p #"pdf")) #:when (path-has-extension? p #"pdf"))
(with-handlers ([exn:fail? (λ (exn) (println (format "~a failed" p)))]) (with-handlers ([exn:fail? (λ (exn) (println (format "~a failed" p)))])
(pdf->dict p)))) (pdf->dict p))))

@ -24,5 +24,5 @@
(define-runtime-path this "test10rkt.pdf") (define-runtime-path this "test10rkt.pdf")
(make-doc this #f proc) (make-doc this #f proc)
#;(define-runtime-path that "test10crkt.pdf") (define-runtime-path that "test10crkt.pdf")
#;(make-doc that #t proc) (make-doc that #t proc)

Loading…
Cancel
Save