You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
typesetting/quad/stats.rkt

194 lines
8.4 KiB
Racket

#lang racket
(require math/statistics sugar racket/serialize plot)
(require (except-in "quads.rkt" line) "utils.rkt" "wrap.rkt" "world.rkt" "measure.rkt" "logger.rkt" "main.rkt")
(define+provide (make-wrap-proc-bps
#:make-pieces-proc make-pieces-proc
#:measure-quad-proc measure-quad-proc
#:compose-line-proc compose-line-proc
#:fast-measure-pieces-proc [fast-measure-pieces-proc (compose1 measure-quad-proc compose-line-proc)]
#:find-breakpoints-proc find-breakpoints-proc)
(λ(qs [measure #f])
(let* ([measure (or measure (quad-attr-ref/parameter (car qs) world:measure-key))]
[qs (if (quad-has-attr? (car qs) world:measure-key)
qs
(map (curryr quad-attr-set world:measure-key measure) qs))])
(log-quad-debug "wrapping on measure = ~a" measure)
(define pieces (make-pieces-proc qs))
(log-quad-debug "avg piece length for breakpoints = ~a"
(/ (for/sum ([p (in-list pieces)])
(for/sum ([q (in-list (quad-list p))])
(define str (quad->string q))
(if (equal? str "")
(quad-attr-ref q world:width-key 0)
(apply measure-text (quad->string q) (font-attributes-with-defaults q)))))
(length pieces)))
(define bps (find-breakpoints-proc (list->vector pieces) (+ 0.0 measure)))
(values pieces bps (map (curryr compose-line-proc measure-quad-proc) (break-at pieces bps))))))
;; wrap proc based on greedy proc
(define wrap-first-bps (make-wrap-proc-bps
#:make-pieces-proc (make-caching-proc make-pieces)
#:measure-quad-proc quad-width
#:compose-line-proc pieces->line
#:fast-measure-pieces-proc measure-potential-line
#:find-breakpoints-proc first-fit-proc))
;; wrap proc based on penalty function
(define wrap-best-bps (make-wrap-proc-bps
#:make-pieces-proc (make-caching-proc make-pieces)
#:measure-quad-proc quad-width
#:compose-line-proc pieces->line
#:fast-measure-pieces-proc measure-potential-line
#:find-breakpoints-proc best-fit-proc))
(define ti (block '(measure 54 leading 18) "Meg is an ally."))
(define (looseness-spread lines)
(if (<= (length lines) 1)
0
(let ([lines-to-measure (drop-right lines 1)]) ; exclude last line from looseness calculation
(define measures (map (λ(line) (quad-attr-ref line world:line-looseness-key 0)) lines-to-measure))
(round-float (- (apply max measures) (apply min measures))))))
(define (geometric-mean lines)
(if (<= (length lines) 1)
0
(let ([lines-to-measure (drop-right lines 1)]) ; exclude last line from looseness calculation
(define measures (map (λ(line) (quad-attr-ref line world:line-looseness-key 0)) lines-to-measure))
(expt (apply * measures) (/ 1 (length measures))))))
(define (looseness-stddev lines)
(if (<= (length lines) 1)
0
(let ([lines-to-measure (drop-right lines 1)]) ; exclude last line from looseness calculation
(define measures (map (λ(line) (quad-attr-ref line world:line-looseness-key 0)) lines-to-measure))
(stddev measures))))
(define (looseness-var lines)
(if (<= (length lines) 1)
0
(let ([lines-to-measure (drop-right lines 1)]) ; exclude last line from looseness calculation
(define measures (map (λ(line) (quad-attr-ref line world:line-looseness-key 0)) lines-to-measure))
(variance measures))))
(define (list->hash0 . xs)
(define mh (make-hash))
(for ([(k v) (in-hash (apply hash xs))])
(hash-set! mh k v))
mh)
(define (piece-length p)
(apply + (map quad-width-fast (quad-list p))))
(define (trial wrap-proc ti measure)
(match-define-values (result time _ _) (time-apply wrap-proc (append (list ti) (list measure))))
(match-define (list pieces bps lines) result)
(define line-count (length lines))
(define piece-count (length pieces))
(define avg-piece-length (/ (apply + (map piece-length pieces)) (length pieces)))
(define avg-looseness (average-looseness lines))
(define looseness-sd (looseness-stddev lines))
(define looseness-variance (looseness-var lines))
(define spread (looseness-spread lines))
(define looses (map (λ(line) (quad-attr-ref line world:line-looseness-key 0)) lines))
(hash 'bps bps 'time time 'line-count line-count 'looses looses
'piece-count piece-count 'avg-piece-length avg-piece-length
'avg-looseness avg-looseness 'looseness-spread spread
'looseness-sd looseness-sd
'looseness-variance looseness-variance))
(define (improved? h1 h2)
(define h1-bps (if (hash? h1) (hash-ref h1 'bps) h1))
(define h2-bps (if (hash? h2) (hash-ref h2 'bps) h2))
(define min-length (min (length h1-bps) (length h2-bps)))
(apply + (map abs (map - (take h1-bps min-length) (take h2-bps min-length)))))
(define (trial-set measure ti)
(define text (quad->string ti))
(define ti-unhyphenated (split-quad ti))
(define fu (trial wrap-first-bps ti-unhyphenated measure))
(define bu (trial wrap-best-bps ti-unhyphenated measure))
(define ti-hyphenated (split-quad (hyphenate-quad ti)))
(define fh (trial wrap-first-bps ti-hyphenated measure))
(define bh (trial wrap-best-bps ti-hyphenated measure))
(hash 'fu fu 'bu bu 'fh fh 'bh bh
'text text
'measure measure
'fh-improved (improved? fu fh)
'bu-improved (improved? fu bu)
'bh-improved (improved? fh bh)))
(define (make-blocks textfile)
(define strings (filter (λ(s) (> (string-length s) 10)) (map (λ(s) (regexp-replace* #rx"\n" s " ")) (map string-trim (string-split (file->string textfile) "\n\n")))))
(map (λ(t) (block '(font "Equity Text B" leading 14 column-count 1 column-gutter 10 size 11.5 x-align justify x-align-last-line left) t)) strings))
(define (refresh [filename "jude0.txt"] [measures '(150 180 210 240 270 300 330 360 390)])
(define paragraphs (make-blocks filename))
;; only use paragraphs > 2 lines because last line is disregarded for looseness spread calculation
(define results (filter
(λ(rec) (> (hash-ref (hash-ref rec 'fu) 'line-count) 2)) (append-map (compose1 (λ(m) (map (λ(p) (trial-set m p)) paragraphs)) (λ(m) (report m))) measures)))
(write-to-file (serialize results) "stats-data.txt" #:exists 'replace))
(define (load-data-records)
(deserialize (file->value "stats-data.txt")))
(define recs (load-data-records))
(define (plot-lists xs ys zs kx ky kz)
(parameterize ([plot-width 700]
[plot-height 700]
[plot-x-label kx]
[plot-y-label ky]
[plot-z-label kz])
(plot3d
;#:x-max 2 #:x-min -2
#:y-min 140 #:y-max 400
#:z-max 1.5
#:angle 0
#:altitude 32
(points3d (map vector xs ys zs)
#:sym 'fullcircle7
#:alpha 0.1
#:color 42))))
(define (fu-formula rec)
(define pieces-per-line (/ (hash-ref (hash-ref rec 'fu) 'piece-count)
(hash-ref (hash-ref rec 'fu) 'line-count) 1.0))
(+ 2.2 (log (abs (hash-ref (hash-ref rec 'fu) 'looseness-sd)))
(* 0.09 pieces-per-line)))
(define (geo-mean rec)
(define looses (hash-ref (hash-ref rec 'fu) 'looses))
(expt (abs (apply * looses)) (/ 1 (length looses))))
(define (magic2 rec)
(define looses (map abs (hash-ref (hash-ref rec 'fu) 'looses)))
(expt (log (+ 1 (/ (variance looses) (expt (mean looses) 2)))) 0.5))
(define (plot-it)
(define-values (helped or-not) (partition (λ(rec) (< 0 (hash-ref rec 'bu-improved))) recs))
(define-values (fu-pos fu-neg) (partition (λ(rec) (> (fu-formula rec) 0)) recs))
(report* (length helped) (length or-not) (length fu-pos) (length fu-neg))
(let ([recs recs])
(plot-lists
(map fu-formula recs)
(map (λ(rec) (hash-ref rec 'measure)) recs)
(map (λ(rec) (if (= 0 (hash-ref rec 'bu-improved)) 0 1)) recs) "fu-formula" "measure" "improve?")))
(plot-it)
(define (looseness-improvement rec)
(/ (abs (- (hash-ref (hash-ref rec 'bu) 'avg-looseness) (hash-ref (hash-ref rec 'fu) 'avg-looseness)))
(hash-ref (hash-ref rec 'bu) 'avg-looseness)))
(define zs (filter positive? (sort (map looseness-improvement recs) <)))