Compare commits

...

3 Commits

Author SHA1 Message Date
Matthew Butterick 6acec6864d Merge branch 'main' of https://git.matthewbutterick.com/mbutterick/typesetting 2 years ago
Matthew Butterick ef47cafdf8 add some iExceptions 2 years ago
Matthew Butterick d74075b724 improve handling of intercapped words
Intercapped (aka camelCase) words are treated like a list of subwords joined by hyphens, but only if the pieces meet the usual min-length threshold. But don’t use this splitting technique when unhyphenating.
2 years ago

@ -162,14 +162,32 @@
(define (joiner->string joiner) (format "~a" joiner))
(define (apply-proc proc x [omit-string (λ (x) #f)] [omit-txexpr (λ (x) #f)] [joiner default-joiner])
(define (apply-proc proc x
[omit-string (λ (x) #f)]
[omit-txexpr (λ (x) #f)]
[joiner default-joiner]
#:intercap-min-length [intercap-min-length #false])
(let loop ([x x])
(cond
[(and (string? x) (not (omit-string x)))
;; handle intercapped words as capitalized pieces
(define letter-before-uc #px"(?<=\\p{Ll})(?=\\p{Lu}\\p{Ll})") ; match xXx but not xXX or XXX
(string-join (for/list ([x (in-list (string-split x letter-before-uc))])
(proc x)) (joiner->string joiner))]
(define words
(cond
[intercap-min-length
;; handle intercapped words as a list of subwords,
;; subject to the intercap-min-length
(define zero-length-quantifier "")
(define letter-before-uc
;; match xXx but not xXX or XXX
(pregexp (format "(?<=\\p{L}{~a})(?=\\p{Lu}\\p{Ll}{~a})"
(if (> intercap-min-length 0)
intercap-min-length
zero-length-quantifier)
(if (> intercap-min-length 1)
(sub1 intercap-min-length)
zero-length-quantifier))))
(string-split x letter-before-uc)]
[else (list x)]))
(string-join (map proc words) (joiner->string joiner))]
[(and (txexpr? x) (not (omit-txexpr x)))
(make-txexpr (get-tag x) (get-attrs x) (map loop (get-elements x)))]
[else x])))
@ -199,7 +217,8 @@
[else word])]))
(define (insert-hyphens text) (regexp-replace* word-pattern text replacer))
(begin0
(apply-proc insert-hyphens x omit-string? omit-txexpr? joiner)
(apply-proc insert-hyphens x omit-string? omit-txexpr? joiner
#:intercap-min-length min-length)
;; deleting from the main cache is cheaper than having to do two cache lookups for every word
;; (missing words will just be regenerated later)
(for-each (λ (ee) (remove-exception-word word-cache ee)) extra-exceptions)))

@ -22,7 +22,9 @@
(check-equal? (unhyphenate "poly\u00ADmor\u00ADphism") "polymorphism")
(check-equal? (hyphenate "polymorphism" #\-) "poly-mor-phism")
(check-equal? (hyphenate "compotumi" #\-) "com-po-tu-mi")
(check-equal? (hyphenate "CompOtumi" #\-) "Comp-Otu-mi")
(check-equal? (hyphenate "CompOtumi" #\-) "Com-pO-tu-mi")
(check-equal? (hyphenate "iMagnificence" #\-) "iMag-nif-i-cence")
(check-equal? (hyphenate "CompOtumi" #:min-length 4 #\-) "Comp-Otu-mi")
(check-equal? (hyphenate "polymorphism" "foo") "polyfoomorfoophism")
(check-equal? (unhyphenate "polyfoomorfoophism" "foo") "polymorphism")
(check-equal? (hyphenate "circular polymorphism squandering") "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism squan\u00ADder\u00ADing")

@ -4,6 +4,12 @@ as-so-ciate
as-so-ciates
dec-li-na-tion
dis-tri-bu-tion
iMac
iMacs
iPad
iPads
iPhone
iPhones
law-yer
law-yer-ing
law-yer-ly
@ -24,3 +30,4 @@ ref-or-ma-tion
ret-ri-bu-tion
ta-ble
ty-pog-ra-pher
ty-pog-ra-phers

Loading…
Cancel
Save