From 148419d16ec2d16182e0a22e7e52c7ad6f8bd600 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Thu, 1 Jan 2015 14:35:13 -0800 Subject: [PATCH] delete superfluities --- quad/main.rkt | 2 +- quad/world.rkt | 1 + quad/wrap.rkt | 235 ++++++++++++++----------------------------------- 3 files changed, 66 insertions(+), 172 deletions(-) diff --git a/quad/main.rkt b/quad/main.rkt index bb8fe4c6..d3d09084 100644 --- a/quad/main.rkt +++ b/quad/main.rkt @@ -221,7 +221,7 @@ (require "render.rkt" racket/class profile) (require "samples.rkt") (activate-logger quad-logger) - (parameterize ([world:quality-default 50] + (parameterize ([world:quality-default world:adaptive-quality] [world:paper-width-default 412] [world:paper-height-default 600]) (define to (begin (time (typeset (jude0))))) diff --git a/quad/world.rkt b/quad/world.rkt index bea434c9..40e09a83 100644 --- a/quad/world.rkt +++ b/quad/world.rkt @@ -29,6 +29,7 @@ (define max-quality 100) +(define adaptive-quality 50) (define draft-quality 20) (define-key-and-parameter quality 'quality max-quality) diff --git a/quad/wrap.rkt b/quad/wrap.rkt index 4e86e87e..c9cbfcf1 100644 --- a/quad/wrap.rkt +++ b/quad/wrap.rkt @@ -1,21 +1,22 @@ #lang racket/base -(require sugar/coerce sugar/define sugar/list sugar/debug racket/list racket/format racket/function racket/string (for-syntax racket/base racket/syntax) math/flonum racket/vector sugar/cache) +(require (for-syntax racket/base racket/syntax)) +(require sugar/define sugar/list sugar/debug racket/list racket/function math/flonum racket/vector math/statistics) (require "ocm.rkt" "quads.rkt" "utils.rkt" "measure.rkt" "world.rkt" "logger.rkt" ) ;; predicate for the soft hyphen (define+provide/contract (soft-hyphen? x) (string? . -> . boolean?) - (equal? (~a world:soft-hyphen) x)) + (equal? (format "~a" world:soft-hyphen) x)) ;; visible characters that also mark possible breakpoints (define+provide/contract (visible-breakable? x) - (string? . -> . coerce/boolean?) - (member x world:hyphens-and-dashes)) + (string? . -> . boolean?) + (and (member x world:hyphens-and-dashes) #t)) ;; invisible characters that denote possible breakpoints (define+provide/contract (invisible-breakable? x) - (string? . -> . coerce/boolean?) - (member x (cons world:empty-string world:spaces))) + (string? . -> . boolean?) + (and (member x (cons world:empty-string world:spaces)) #t)) ;; union of visible & invisible (define+provide/contract (breakable? x) @@ -91,8 +92,8 @@ ;; Try the attr first, and if it's not available, compute the width. ;; comes in fast or slow versions. ;; not designed to update the source quad. -(define+provide/contract (quad-width q [fast? #f]) - ((quad?) (boolean?) . ->* . flonum?) +(define+provide/contract (quad-width q) + (quad? . -> . flonum?) (cond [(quad-has-attr? q world:width-key) (fl (quad-attr-ref q world:width-key))] [(ormap (λ(pred) (pred q)) (list char? run? word? word-break?)) @@ -101,9 +102,6 @@ [(line? q) (fold-fl+ (map quad-width (quad-list q)))] [else 0.0])) -;; shorthand for fast version of quad-width. -(define+provide (quad-width-fast q) - (quad-width q #t)) ;; get the ascent (distance from top of text to baseline) ;; used by renderer to align text runs baseline-to-baseline. @@ -268,7 +266,7 @@ (define merged-quads (join-quads line-quads-maybe-with-opticals)) (define merged-quad-widths (map measure-quad-proc merged-quads)) ; 10% of function time - (log-quad-debug "making pieces into line = ~v" (string-append* (map quad->string merged-quads))) + (log-quad-debug "making pieces into line = ~v" (apply string-append (map quad->string merged-quads))) ;; if measure key isn't present, allow an error, because that's weird (when (not (quad-has-attr? (first line-quads) world:measure-key)) @@ -289,7 +287,7 @@ ;(pieces? . -> . flonum?) (for*/sum ([rendered-piece (in-list (render-pieces ps))] [piece-quad (in-list (quad-list rendered-piece))]) - (quad-width-fast piece-quad))) + (quad-width piece-quad))) @@ -306,7 +304,7 @@ (let ([op (open-output-string)]) (parameterize ([current-output-port op]) (define result (time expr)) - (report (string-trim (get-output-string op)) name) + (report ((dynamic-require string-trim 'racket/string) (get-output-string op)) name) (values result)))) (define-syntax-rule (report-time name expr) @@ -336,13 +334,13 @@ (define (install-measurement-keys p) - (define basic-width (round-float (apply + (map quad-width-fast (quad-list p))))) + (define basic-width (round-float (apply + (map quad-width (quad-list p))))) (define p-word-break (quad-attr-ref p world:word-break-key #f)) (define before-break-width (fl+ basic-width (if p-word-break - (quad-width-fast (word (quad-attrs p-word-break) (quad-attr-ref p-word-break world:before-break-key))) + (quad-width (word (quad-attrs p-word-break) (quad-attr-ref p-word-break world:before-break-key))) 0.0))) (define no-break-width (fl+ basic-width (if p-word-break - (quad-width-fast (word (quad-attrs p-word-break) (quad-attr-ref p-word-break world:no-break-key))) + (quad-width (word (quad-attrs p-word-break) (quad-attr-ref p-word-break world:no-break-key))) 0.0))) (quad-attr-set* p 'bb-width before-break-width 'nb-width no-break-width)) @@ -364,33 +362,20 @@ (for/flvector ([p (in-list pieces-measured)]) (fl+ (vector-ref p 0) (vector-ref p 2))))) ; first = word length, third = bb length -(define (make-trial-line pieces-rendered-widths pieces-rendered-before-break-widths i j) - (define flvec (flvector-copy pieces-rendered-widths i j)) - (flvector-set! flvec (sub1 (flvector-length flvec)) (flvector-ref pieces-rendered-before-break-widths (sub1 j))) - flvec) - +(define-syntax-rule (make-trial-line pieces-rendered-widths pieces-rendered-before-break-widths i j) + (let ([flvec (flvector-copy pieces-rendered-widths i j)]) + (flvector-set! flvec (sub1 (flvector-length flvec)) (flvector-ref pieces-rendered-before-break-widths (sub1 j))) + flvec)) -(define (get-line-width line) (round-float (fold-fl+ (flvector->list line)))) +(define-syntax-rule (get-line-width line) + (round-float (fold-fl+ (flvector->list line)))) -;; optimal linefitting: minimize the penalty function across all linebreaks in the paragraph -;; slower but nicer. TeX algorithm + SMAWK speed. -(define+provide (best-fit-proc pieces measure) +;; top-level adaptive wrap proc. +;; first-fit and best-fit are variants. +(define+provide (adaptive-fit-proc pieces measure [use-first? #t] [use-best? #t]) ;((pieces? . -> . width?) . -> . (pieces? measure? . -> . breakpoints?)) - ;; don't use struct for penalty, because of read/write overhead - (define $penalty vector) - (define ($penalty-width x) (vector-ref x 1)) - (define ($penalty-hyphens x) (vector-ref x 0)) - ;; Reduce the vector to an integer by treating it as magnitude from origin. - ;(define ($penalty->integer v) (sqrt (apply + (map (compose1 (curryr expt 2)) (list ($penalty-width v)))))) - (define ($penalty->value v) ($penalty-width v)) - (define initial-value ($penalty 0 0.0)) - - ;(define initial-value 0) - (define matrix-value->number identity) - - (define checked-ijs (make-hash)) ;; this is the winning performance strategy: extract the numbers first, then just wrap on those. ;; todo: how to avoid re-measuring pieces later? ;; todo: how to retain information about words per line and hyphen at end? @@ -398,128 +383,54 @@ (make-piece-vectors pieces)) (define pieces-with-word-space (vector-map (λ(piece) (and (quad-has-attr? piece world:word-break-key) (equal? (quad-attr-ref (quad-attr-ref piece world:word-break-key) 'nb) " "))) pieces)) - (log-quad-debug "~a pieces to wrap = ~v" (vector-length pieces) (vector-map quad->string pieces)) - (define (penalty i j) - (hash-set! checked-ijs (cons i j) #t) - (define out-of-bounds-signal ($penalty 0 (fl* -1.0 (fl i)))) ; for ocm - (define last-line? (= j (vector-length pieces))) - (cond - [(or (>= i j) ; implies negative or zero length line - (> j (vector-length pieces))) ; exceeds available pieces - out-of-bounds-signal] - [else - (define penalty-up-to-i (ocm-min-value ocm i)) - (define words (fl (vector-count identity (vector-copy pieces-with-word-space i (sub1 j))))) - (define last-piece-to-test (vector-ref pieces (sub1 j))) - (define new-hyphen? - (and (quad-has-attr? last-piece-to-test world:word-break-key) - (equal? (quad-attr-ref (quad-attr-ref last-piece-to-test world:word-break-key) world:before-break-key) "-"))) - (define cumulative-hyphens (if (not new-hyphen?) - 0 - (add1 ($penalty-hyphens penalty-up-to-i)))) - - - (define line-width (get-line-width (make-trial-line pieces-rendered-widths pieces-rendered-before-break-widths i j))) - - ($penalty - cumulative-hyphens - (round-float - (fl+s - (if (> cumulative-hyphens world:hyphen-limit) - (fl world:hyphen-penalty) - 0.0) - (fl world:new-line-penalty) - ($penalty->value penalty-up-to-i) - (cond - ;; overfull line: huge penalty prevents break; multiplier is essential for monotonicity. - ;; multiply by -1 because line-width is longer than measure, thus diff is negative - [(fl> line-width (fl* world:allowed-overfull-ratio measure)) - (fl* (fl- line-width measure) (flexpt 10.0 7.0))] - ;; standard penalty, optionally also applied to last line (by changing operator) - [((if world:last-line-can-be-short < <=) j (vector-length pieces)) (fl/ (flexpt (fl- measure line-width) 2.0) (flmax 1.0 words))] - ;; only option left is (= j (length pieces)), meaning we're on the last line. - ;; 0 penalty means any length is ok. - ;[(< (length pieces-to-test) (world:minimum-last-line-pieces)) 50000] - [else 0.0]))))])) - - (define ocm (make-ocm penalty initial-value $penalty->value)) - - ;; starting from last position, ask ocm for position of row minimum (= new-pos) - ;; collect this value, and use it as the input next time - ;; until you reach first position. - (define first-position 0) - (define last-position (vector-length pieces)) - (define result (let loop ([pos last-position][acc null]) - (let ([next-pos (ocm-min-index ocm pos)]) ; first look ahead ... - (if (= next-pos first-position) ; therefore we're done - acc - (loop next-pos (cons next-pos acc)))))) - (log-quad-debug "penalty pieces vs. pairs checked = ~a ~a" (vector-length pieces) (exact->inexact (/ (length (hash-keys checked-ijs)) (vector-length pieces)))) - (log-quad-debug "best-fit breakpoints = ~a" result) - result) - -(require math/statistics) -(define+provide (adaptive-fit-proc pieces measure) - ;((pieces? . -> . width?) . -> . (pieces? measure? . -> . breakpoints?)) - - ;; this is the winning performance strategy: extract the numbers first, then just wrap on those. - ;; todo: how to avoid re-measuring pieces later? - ;; todo: how to retain information about words per line and hyphen at end? - (define-values (pieces-rendered-widths pieces-rendered-before-break-widths) - (make-piece-vectors pieces)) - (define pieces-with-word-space (vector-map (λ(piece) (and (quad-has-attr? piece world:word-break-key) (equal? (quad-attr-ref (quad-attr-ref piece world:word-break-key) 'nb) " "))) pieces)) - - (define-values (folded-bps folded-widths) - (for/fold ([bps '(0)][line-widths empty])([j-1 (in-range (vector-length pieces))]) - (define line-width (get-line-width (make-trial-line pieces-rendered-widths - pieces-rendered-before-break-widths - (car bps) (add1 j-1)))) - (if (fl> line-width (fl* world:allowed-overfull-ratio measure)) - (values (cons j-1 bps) (cons line-width line-widths)) - (values bps line-widths)))) - (define first-fit-bps (cdr (reverse folded-bps))) - (define trial-line-widths (reverse folded-widths)) - (log-quad-debug "adaptive-first-fit breakpoints = ~a" first-fit-bps) - + (define (make-first-fit-bps-and-widths) + (define-values (folded-bps folded-widths) + (for/fold ([bps '(0)][line-widths empty])([j-1 (in-range (vector-length pieces))]) + (define line-width (get-line-width (make-trial-line pieces-rendered-widths + pieces-rendered-before-break-widths + (car bps) (add1 j-1)))) + (if (fl> line-width (fl* world:allowed-overfull-ratio measure)) + (values (cons j-1 bps) (cons line-width line-widths)) + (values bps line-widths)))) + (values (cdr (reverse folded-bps)) (reverse folded-widths))) (define (fu-formula) - (define line-count (add1 (length first-fit-bps))) + (define line-count (length trial-line-widths)) (cond [(<= line-count 2) 1.0] ; signals that first-fit is always OK with 1 or 2 lines [else ; only measure middle lines. we know bps has at least 2 bps (define looseness-stddev (stddev (map (curryr calc-looseness measure) (drop-right (drop trial-line-widths 1) 1)))) (define piece-count (flvector-length pieces-rendered-widths)) - (define pieces-per-line (fl/ (fl piece-count) (sub1 (fl line-count)))) ; would be more accurate to count only pieces in middle + (define pieces-per-line (fl/ (fl piece-count) (sub1 (fl line-count)))) ; todo: more accurate to count only pieces in middle (fl+s 2.2 (fllog (flabs looseness-stddev)) (fl* 0.09 pieces-per-line))])) ; the FU FORMULA + ;; only buy first-fit-bps if use-first? is true. + (define-values (first-fit-bps trial-line-widths) (if use-first? (make-first-fit-bps-and-widths) (values (void) (void)))) + (cond - [(fl> (fu-formula) 0.0) first-fit-bps] + ;; possible outcomes at this branch: + ;; adaptive wrap: use-first and use-best are true, so first-fit-bps will exist, and fu-formula will be used. + ;; first-fit wrap: use-first is true but not use-best. So first-fit-bps will be returned regardless. + ;; best-fit wrap: use-first is false but use-best is true. So first-fit-bps will be skipped, and move on to best-fit. + [(and use-first? (if use-best? (fl> (fu-formula) 0.0) #t)) + (log-quad-debug "first-fit breakpoints = ~a" first-fit-bps) + first-fit-bps] [else - ;; don't use struct for penalty, because of read/write overhead - (define $penalty vector) + (define $penalty vector) ; don't use struct for penalty, because of read/write overhead (define ($penalty-width x) (vector-ref x 1)) (define ($penalty-hyphens x) (vector-ref x 0)) - ;; Reduce the vector to an integer by treating it as magnitude from origin. - ;(define ($penalty->integer v) (sqrt (apply + (map (compose1 (curryr expt 2)) (list ($penalty-width v)))))) (define ($penalty->value v) ($penalty-width v)) (define initial-value ($penalty 0 0.0)) - ;(define initial-value 0) - (define matrix-value->number identity) - - (log-quad-debug "~a pieces to wrap = ~v" (vector-length pieces) (vector-map quad->string pieces)) (define (penalty i j) - (define out-of-bounds-signal ($penalty 0 (fl* -1.0 (fl i)))) ; for ocm - (define last-line? (= j (vector-length pieces))) (cond [(or (>= i j) ; implies negative or zero length line (> j (vector-length pieces))) ; exceeds available pieces - out-of-bounds-signal] + ($penalty 0 (fl* -1.0 (fl i)))] ; ocm out of bounds signal [else (define penalty-up-to-i (ocm-min-value ocm i)) - (define words (fl (vector-count identity (vector-copy pieces-with-word-space i (sub1 j))))) (define last-piece-to-test (vector-ref pieces (sub1 j))) (define new-hyphen? (and (quad-has-attr? last-piece-to-test world:word-break-key) @@ -528,9 +439,6 @@ 0 (add1 ($penalty-hyphens penalty-up-to-i)))) - - (define line-width (get-line-width (make-trial-line pieces-rendered-widths pieces-rendered-before-break-widths i j))) - ($penalty cumulative-hyphens (round-float @@ -540,17 +448,20 @@ 0.0) (fl world:new-line-penalty) ($penalty->value penalty-up-to-i) - (cond - ;; overfull line: huge penalty prevents break; multiplier is essential for monotonicity. - ;; multiply by -1 because line-width is longer than measure, thus diff is negative - [(fl> line-width (fl* world:allowed-overfull-ratio measure)) - (fl* (fl- line-width measure) (flexpt 10.0 7.0))] - ;; standard penalty, optionally also applied to last line (by changing operator) - [((if world:last-line-can-be-short < <=) j (vector-length pieces)) (fl/ (flexpt (fl- measure line-width) 2.0) (flmax 1.0 words))] - ;; only option left is (= j (length pieces)), meaning we're on the last line. - ;; 0 penalty means any length is ok. - ;[(< (length pieces-to-test) (world:minimum-last-line-pieces)) 50000] - [else 0.0]))))])) + (let ([line-width (get-line-width (make-trial-line pieces-rendered-widths pieces-rendered-before-break-widths i j))]) + (cond + ;; overfull line: huge penalty prevents break; multiplier is essential for monotonicity. + ;; multiply by -1 because line-width is longer than measure, thus diff is negative + [(fl> line-width (fl* world:allowed-overfull-ratio measure)) + (fl* (fl- line-width measure) (flexpt 10.0 7.0))] + ;; standard penalty, optionally also applied to last line (by changing operator) + [((if world:last-line-can-be-short < <=) j (vector-length pieces)) + (define words (fl (vector-count identity (vector-copy pieces-with-word-space i (sub1 j))))) + (fl/ (flexpt (fl- measure line-width) 2.0) (flmax 1.0 words))] + ;; only option left is (= j (vector-length pieces)), meaning we're on the last line. + ;; 0 penalty means any length is ok. + ;[(< (length pieces-to-test) (world:minimum-last-line-pieces)) 50000] + [else 0.0])))))])) (define ocm (make-ocm penalty initial-value $penalty->value)) @@ -569,24 +480,6 @@ result])) -;; greedy linefitting: find the biggest line that will fit, then set the next -;; faster but coarser. Web browsers & most word processors use this approach. -(define+provide (first-fit-proc pieces measure) - ;((pieces? . -> . width?) . -> . (pieces? measure? . -> . breakpoints?)) - (define-values (pieces-rendered-widths pieces-rendered-before-break-widths) - (make-piece-vectors pieces)) - - - (define bps - (for/fold ([bps '(0)])([j-1 (in-range (vector-length pieces))]) - (if (fl> (get-line-width (make-trial-line pieces-rendered-widths - pieces-rendered-before-break-widths - (car bps) (add1 j-1))) - (fl* world:allowed-overfull-ratio measure)) - (cons j-1 bps) - bps))) - (log-quad-debug "first-fit breakpoints = ~a" (cdr (reverse bps))) - (cdr (reverse bps))) ;; wrap proc based on greedy proc @@ -594,14 +487,14 @@ #:make-pieces-proc make-pieces #:measure-quad-proc quad-width #:compose-line-proc pieces->line - #:find-breakpoints-proc first-fit-proc)) + #:find-breakpoints-proc (curryr adaptive-fit-proc #t #f))) ;; wrap proc based on penalty function (define+provide wrap-best (make-wrap-proc #:make-pieces-proc make-pieces #:measure-quad-proc quad-width #:compose-line-proc pieces->line - #:find-breakpoints-proc best-fit-proc)) + #:find-breakpoints-proc (curryr adaptive-fit-proc #f #t))) (define+provide wrap-adaptive (make-wrap-proc #:make-pieces-proc make-pieces