You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
typesetting/quad/quad/atomize.rkt

157 lines
6.7 KiB
Racket

#lang debug racket/base
(require racket/string
racket/hash
racket/match
racket/list
racket/struct
txexpr
sugar/list
racket/function
"unicode/emoji.rkt"
fontland
"quad.rkt"
"qexpr.rkt"
"param.rkt"
"util.rkt")
(provide (all-defined-out))
(module+ test
(require rackunit))
(define (update-with! h . update-hashes)
(apply hash-union! #:combine (λ (v1 v2) v2) h update-hashes))
(define (update-with base-hash . update-hashes)
;; starting with base-hash, add or update keys found in update-hashes
(define h (make-hasheq))
(apply update-with! h base-hash update-hashes)
h)
(module+ test
(check-equal?
((hasheq 'foo "bar" 'zim "zam") . update-with . (hasheq 'zim "BANG") (hasheq 'toe "jam") (hasheq 'foo "zay"))
(make-hasheq '((zim . "BANG") (foo . "zay") (toe . "jam")))))
(define whitespace-pat #px"\\s+")
(define (merge-and-isolate-white str)
(for/list ([(m idx) (in-indexed (regexp-match* whitespace-pat str #:gap-select? #t))]
#:when (non-empty-string? m))
(if (even? idx) m " ")))
(define (merge-adjacent-strings xs [isolate-white? #false])
(let loop ([xs xs][acc null])
(match xs
[(== empty) (reverse acc)]
[(list (? string? strs) ..1 others ...)
(loop others (append (reverse ((if isolate-white?
merge-and-isolate-white
list) (apply string-append strs))) acc))]
[(cons x others) (loop others (cons x acc))])))
(define run-key 'run)
(define (same-run? qa qb)
(eq? (quad-ref qa run-key) (quad-ref qb run-key)))
(define handle-fallback
(let ([font-cache (make-hash)]
[gid-cache (make-hash)])
(λ (missing-glyph-action str attrs fallback-font emoji-font)
(match missing-glyph-action
;; #false = no op
[#false (list (cons attrs str))]
[action
(define font-path (hash-ref attrs 'font-path))
(define f (hash-ref! font-cache font-path (λ () (open-font font-path))))
(define glyph-ids+chars
(for/list ([c (in-string str)])
(define glyph-id
(hash-ref! gid-cache (cons c font-path)
(λ () (glyph-id (vector-ref (glyphrun-glyphs (layout f (string c))) 0)))))
(define fallback-result (and (zero? glyph-id) (if (emoji? c) 'emoji 'fallback)))
(cons fallback-result c)))
(for*/list ([cprs (in-list (contiguous-group-by car glyph-ids+chars eq?))]
[fallback-val (in-value (car (car cprs)))]
#:unless (and fallback-val (eq? action 'omit)))
(define str (list->string (map cdr cprs)))
(define maybe-fallback-attrs
(cond
[(not fallback-val) attrs]
[(eq? action 'warning)
(displayln (format "warning: glyph ~a is not available in font ~a" str (path->string font-path)))
attrs]
[(eq? action 'error)
(raise-argument-error 'quad (format "glyph that exists in font ~a" (path->string font-path)) str)]
[(eq? fallback-val 'emoji) (let ([h (hash-copy attrs)])
(hash-set! h 'font-path emoji-font)
h)]
[(eq? fallback-val 'fallback) (let ([h (hash-copy attrs)])
(hash-set! h 'font-path fallback-font)
h)]))
(cons maybe-fallback-attrs str))]))))
(define (atomize qx #:attrs-proc [attrs-proc values]
#:fallback [fallback-font #f]
#:emoji [emoji-font #f])
;; atomize a quad by reducing it to the smallest indivisible formatting units.
;; which are multi-character quads with the same formatting.
(define missing-glyph-action (current-missing-glyph-action))
(let loop ([x (make-quad qx)]
[attrs (hash-copy (current-default-attrs))]
[key (eq-hash-code (current-default-attrs))])
(match-define-values (next-key next-attrs)
;; make a new run when we encounter non-empty attrs
(match (quad-attrs x)
[(? hash-empty?) (values key attrs)]
[this-attrs (define next-key (eq-hash-code this-attrs))
(define next-attrs (attrs . update-with . this-attrs))
(hash-set! next-attrs run-key next-key)
(attrs-proc next-attrs)
(values next-key next-attrs)]))
(match (quad-elems x)
[(? null?) ((quad-attrs x) . update-with! . next-attrs) (list x)]
[_
;; we don't use `struct-copy` here because it needs to have the structure id at compile time.
;; whereas with this technique, we can extract a constructor for any structure type.
;; notice that the technique depends on
;; 1) we only need to update attrs and elems
;; 2) we make them the first two fields, so we know to drop the first two fields of x-tail
(define x-constructor (derive-quad-constructor x))
(define x-tail (drop (struct->list x) 2))
(match (merge-adjacent-strings (quad-elems x) 'isolate-white)
[(? pair? merged-elems)
(append*
(for/list ([elem (in-list merged-elems)])
(match elem
[(? string? str)
(for/list ([attrstr (in-list
(handle-fallback missing-glyph-action str next-attrs fallback-font emoji-font))])
(match-define (cons attrs str) attrstr)
(apply x-constructor attrs (list str) x-tail))]
[_ (loop elem next-attrs next-key)])))]
;; if merged elements are empty (for instance, series of empty strings)
;; then zero out the elements in the quad.
[_ (list (apply x-constructor next-attrs null x-tail))])])))
(module+ test
(define (filter-private-keys qs)
(for-each (λ (q) (when (hash-has-key? (quad-attrs q) 'run)
(hash-remove! (quad-attrs q) 'run))) qs)
qs)
(struct $br quad ())
(define br (q #:type $br (make-hasheq '((br . "time")))))
(check-equal? (filter-private-keys (atomize (q (q "a b") br (q "x y"))))
(list (q "a") (q " ") (q "b") br (q "x") (q " ") (q "y")))
(check-equal?
(filter-private-keys (atomize (q (hasheq 'foo 42) (q "Hi" " idiot" (q (hasheq 'bar 84) "There") "Eve" "ry" "one"))))
(list (q (hasheq 'foo 42) "Hi")
(q (hasheq 'foo 42) " ")
(q (hasheq 'foo 42) "idiot")
(q (hasheq 'foo 42 'bar 84) "There")
(q (hasheq 'foo 42) "Everyone")))
(check-true (andmap quad=? (atomize (qexpr->quad '(q))) (atomize (qexpr->quad '(q ""))))))