You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
typesetting/quad/quad/atomize.rkt

97 lines
4.1 KiB
Racket

#lang debug racket/base
5 years ago
(require racket/string
racket/hash
racket/match
racket/list
txexpr
racket/function
"quad.rkt"
"param.rkt")
8 years ago
(provide (all-defined-out))
5 years ago
(module+ test
(require rackunit))
8 years ago
6 years ago
(define (update-with base-hash . update-hashes)
;; starting with base-hash, add or update keys found in update-hashes
6 years ago
(define h (make-hasheq))
5 years ago
(apply hash-union! #:combine (λ (v1 v2) v2) h base-hash update-hashes)
6 years ago
h)
6 years ago
(module+ test
(check-equal?
((hasheq 'foo "bar" 'zim "zam") . update-with . (hasheq 'zim "BANG") (hasheq 'toe "jam") (hasheq 'foo "zay"))
6 years ago
(make-hasheq '((zim . "BANG") (foo . "zay") (toe . "jam")))))
6 years ago
6 years ago
(define whitespace-pat #px"\\s+")
5 years ago
(define (merge-and-isolate-white str)
(for/list ([(m idx) (in-indexed (regexp-match* whitespace-pat str #:gap-select? #t))]
#:when (non-empty-string? m))
(if (even? idx) m " ")))
6 years ago
6 years ago
(define (merge-adjacent-strings xs [isolate-white? #false])
(let loop ([xs xs][acc null])
(match xs
5 years ago
[(== empty) (reverse acc)]
6 years ago
[(list (? string? strs) ..1 others ...)
(loop others (append (reverse ((if isolate-white?
merge-and-isolate-white
6 years ago
list) (apply string-append strs))) acc))]
6 years ago
[(cons x others) (loop others (cons x acc))])))
6 years ago
(define run-key 'run)
(define (same-run? qa qb)
5 years ago
(eq? (hash-ref (quad-attrs qa) run-key) (hash-ref (quad-attrs qb) run-key)))
6 years ago
5 years ago
(define (atomize qx)
;; atomize a quad by reducing it to the smallest indivisible formatting units.
6 years ago
;; which are multi-character quads with the same formatting.
5 years ago
(let loop ([x (make-quad qx)]
[attrs (hash-copy (current-default-attrs))]
[key (eq-hash-code (current-default-attrs))])
(match-define-values (next-key next-attrs)
;; make a new run when we encounter non-empty attrs
(match (quad-attrs x)
[(? hash-empty?) (values key attrs)]
[this-attrs (define next-key (eq-hash-code this-attrs))
(define next-attrs (attrs . update-with . this-attrs))
(hash-set! next-attrs run-key next-key)
(values next-key next-attrs)]))
(match (quad-elems x)
[(? pair? elems)
(append*
(for/list ([elem (in-list (merge-adjacent-strings elems 'isolate-white))])
(match elem
[(? string?)
5 years ago
;; 190116 caveat: all quads with strings as elements will be atomized.
;; however, if the starting quad has a struct subtype of quad,
;; this subtype will be lost.
;; IOW, all atomized quads are of the base `quad` type.
;; this is because we can't get access to any subtype constructors here.
;; corollary: quads that need to keep their types should not have any strings as elements.
;; also, they will not have any run keys embedded
;; (but they shouldn't need it because they're not part of text runs)
;; overall I am persuaded that `atomize` is very texty and needs a name befitting that role.
5 years ago
(list ((quad-copier x) x next-attrs (list elem)))]
5 years ago
[_ (loop elem next-attrs next-key)])))]
[_ (list x)])))
6 years ago
5 years ago
(module+ test
(define (filter-private-keys qs)
(for-each (λ (q) (when (hash-has-key? (quad-attrs q) 'run)
(hash-remove! (quad-attrs q) 'run))) qs)
qs)
(struct $br quad ())
(define br (q #:type $br (hasheq 'br "time")))
(check-equal? (filter-private-keys (atomize (q (q "a b") br (q "x y"))))
(list (q "a") (q " ") (q "b") br (q "x") (q " ") (q "y")))
(check-equal?
(filter-private-keys (atomize (q (hasheq 'foo 42) (q "Hi" " idiot" (q (hasheq 'bar 84) "There") "Eve" "ry" "one"))))
(list (q (hasheq 'foo 42) "Hi")
(q (hasheq 'foo 42) " ")
(q (hasheq 'foo 42) "idiot")
(q (hasheq 'foo 42 'bar 84) "There")
(q (hasheq 'foo 42) "Everyone"))))