add #:min-hyphens option

main
Matthew Butterick 5 years ago
parent 16ce7e19c7
commit 32cca4c6ab

@ -23,7 +23,8 @@
#:omit-string (string? . -> . any/c)
#:omit-txexpr (txexpr? . -> . any/c)
#:min-left-length (or/c (and/c integer? positive?) #f)
#:min-right-length (or/c (and/c integer? positive?) #f)) . ->* . xexpr/c))
#:min-right-length (or/c (and/c integer? positive?) #f)
#:min-hyphens (and/c integer? positive?)) . ->* . xexpr/c))
(define (make-hyphenate-function patterns exceptions)
(make-keyword-procedure

@ -11,6 +11,7 @@
(define default-min-left-length 2)
(define default-min-right-length 2)
(define default-joiner #\u00AD)
(define default-min-hyphen-count 1)
(define (exception-word->word+pattern ew)
@ -108,11 +109,11 @@
[substr (in-value (substring word-with-boundaries start (add1 end)))]
[partial-pattern (in-value (hash-ref pattern-cache (string->symbol substr) #f))]
#:when partial-pattern)
;; pad out partial-pattern to full length
;; (so we can compare patterns to find max value for each slot)
(define left-zeroes (make-list start 0))
(define right-zeroes (make-list (- (add1 word-length) (length partial-pattern) start) 0))
(append left-zeroes partial-pattern right-zeroes)))
;; pad out partial-pattern to full length
;; (so we can compare patterns to find max value for each slot)
(define left-zeroes (make-list start 0))
(define right-zeroes (make-list (- (add1 word-length) (length partial-pattern) start) 0))
(append left-zeroes partial-pattern right-zeroes)))
(define max-pattern (calculate-max-pattern (cons default-zero-pattern matching-patterns)))
;; for point list generated from a pattern,
;; drop first two elements because they represent hyphenation weight
@ -141,20 +142,20 @@
[left-zeroes (min (or min-left-length default-min-left-length) (length points))]
[right-zeroes (min (or min-right-length default-min-right-length) (length points))])
(for/list ([(point idx) (in-indexed points)])
(if (<= left-zeroes (add1 idx) (- (length points) right-zeroes))
point
0))))
(if (<= left-zeroes (add1 idx) (- (length points) right-zeroes))
point
0))))
;; odd-valued points in the pattern denote hyphenation points
(define odd-point-indexes (for/list ([(wp idx) (in-indexed word-points)]
#:when (odd? wp))
idx))
idx))
;; the hyphenation goes after the indexed letter, so add1 to the raw points for slicing
(define breakpoints (append (list 0) (map add1 odd-point-indexes) (list (string-length word))))
(for/list ([start (in-list breakpoints)]
[end (in-list (cdr breakpoints))]) ; shorter list controls exit of loop
(substring word start end))]))
(substring word start end))]))
;; joiner contract allows char or string; this coerces to string.
@ -168,7 +169,7 @@
;; handle intercapped words as capitalized pieces
(define letter-before-uc #px"(?<=\\p{Ll})(?=\\p{Lu}\\p{Ll})") ; match xXx but not xXX or XXX
(string-join (for/list ([x (in-list (string-split x letter-before-uc))])
(proc x)) (joiner->string joiner))]
(proc x)) (joiner->string joiner))]
[(and (txexpr? x) (not (omit-txexpr x)))
(make-txexpr (get-tag x) (get-attrs x) (map loop (get-elements x)))]
[else x])))
@ -179,6 +180,7 @@
#:min-length [min-length default-min-length]
#:min-left-length [min-left-length default-min-left-length]
#:min-right-length [min-right-length default-min-right-length]
#:min-hyphens [min-hyphens-added default-min-hyphen-count]
#:omit-word [omit-word? (λ (x) #f)]
#:omit-string [omit-string? (λ (x) #f)]
#:omit-txexpr [omit-txexpr? (λ (x) #f)])
@ -187,15 +189,14 @@
(for-each (λ (ee) (add-exception-word word-cache ee)) extra-exceptions)
(define word-pattern #px"\\p{L}+") ;; more restrictive than exception-word
(define (replacer word . words)
(if (omit-word? word)
word
(string-join (word->hyphenation-points word
word-cache
pattern-cache
min-length
min-left-length
min-right-length)
(joiner->string joiner))))
(cond
[(omit-word? word) word]
[else (define hyphenation-points
(word->hyphenation-points word word-cache pattern-cache min-length min-left-length min-right-length))
(cond
[(>= (sub1 (length hyphenation-points)) min-hyphens-added)
(string-join hyphenation-points (joiner->string joiner))]
[else word])]))
(define (insert-hyphens text) (regexp-replace* word-pattern text replacer))
(begin0
(apply-proc insert-hyphens x omit-string? omit-txexpr? joiner)

@ -41,6 +41,7 @@ Safe mode enables the function contracts documented below. Use safe mode by impo
[#:min-length length (or/c integer? false?) 5]
[#:min-left-length left-length (or/c (and/c integer? positive?) #f) 2]
[#:min-right-length right-length (or/c (and/c integer? positive?) #f) 2]
[#:min-hyphens min-hyphen-count (and/c integer? positive?) 1]
[#:omit-word word-test (string? . -> . any/c) (λ (x) #f)]
[#:omit-string string-test (string? . -> . any/c) (λ (x) #f)]
[#:omit-txexpr txexpr-test (txexpr? . -> . any/c) (λ (x) #f)])
@ -50,23 +51,34 @@ Hyphenate @racket[_xexpr] by calculating hyphenation points and inserting @racke
@margin-note{The REPL displays a soft hyphen as @code{\u00AD}. But in ordinary use, you'll only see a soft hyphen when it appears at the end of a line or page as part of a hyphenated word. Otherwise it's not displayed. In most of the examples here, I use a standard hyphen for clarity (by adding @code{#\-} as an argument).}
@examples[#:eval my-eval
(hyphenate "ergo polymorphism")
(hyphenate "ergo polymorphism" #\-)
(hyphenate "ergo polymorphism" #:min-length 13)
(hyphenate "ergo polymorphism" #:min-length #f)
(hyphenate "snowman polymorphism")
(hyphenate "snowman polymorphism" #\-)
(hyphenate "snowman polymorphism" #:min-length 13)
(hyphenate "snowman polymorphism" #:min-length #f)
]
The @racket[#:min-left-length] and @racket[#:min-right-length] keyword arguments set the minimum distance between a potential hyphen and the left or right ends of the word. The default is 2 characters. Larger values will reduce hyphens, but also prevent small words from breaking. These values will override a smaller @racket[#:min-length] value.
@examples[#:eval my-eval
(hyphenate "ergo polymorphism" #\-)
(hyphenate "ergo polymorphism" #\- #:min-left-length #f)
(hyphenate "ergo polymorphism" #\- #:min-length 2 #:min-left-length 5)
(hyphenate "ergo polymorphism" #\- #:min-right-length 6)
(hyphenate "snowman polymorphism" #\-)
(hyphenate "snowman polymorphism" #\- #:min-left-length #f)
(hyphenate "snowman polymorphism" #\- #:min-length 2 #:min-left-length 5)
(hyphenate "snowman polymorphism" #\- #:min-right-length 6)
(code:comment @#,t{Next words won't be hyphenated becase of large #:min-left-length})
(hyphenate "ergo polymorphism" #\- #:min-length #f #:min-left-length 15)
(hyphenate "snowman polymorphism" #\- #:min-length #f #:min-left-length 15)
]
Another way of controlling hyphen frequency is with the @racket[#:min-hyphens] keyword argument, which sets the minimum number of hyphens in a hyphenatable word. (It has no effect on non-hyphenatable words.) The default is 1 hyphen. Larger values will reduce hyphens, but also prevent small words from breaking.
@examples[#:eval my-eval
(hyphenate "snowman polymorphism" #\-)
(hyphenate "snowman polymorphism" #\- #:min-hyphens 1)
(code:comment @#,t{next "snowman" won't be hyphenated becase it doesn't have 2 hyphens})
(hyphenate "snowman polymorphism" #\- #:min-hyphens 2)
(code:comment @#,t{next "polymorphism" won't be hyphenated becase it doesn't have 3 hyphens})
(hyphenate "snowman polymorphism" #\- #:min-hyphens 3)
]
Because the hyphenation is based on an algorithm rather than a dictionary, it makes good guesses with unusual words:
@examples[#:eval my-eval

@ -11,9 +11,14 @@
(check-equal? (hyphenate "edges") "edges") ;; word without matching patterns
(check-equal? (hyphenate "polymorphism") "poly\u00ADmor\u00ADphism")
(check-equal? (hyphenate "polymorphism" #:min-hyphens 1) "poly\u00ADmor\u00ADphism")
(check-equal? (hyphenate "polymorphism" #:min-hyphens 2) "poly\u00ADmor\u00ADphism")
(check-equal? (hyphenate "polymorphism" #:min-hyphens 3) "polymorphism")
(check-equal? (hyphenate "polymorphism" #:min-hyphens 42) "polymorphism")
(check-equal? (hyphenate "POLYmorPHISM") "POLY\u00ADmor\u00ADPHISM")
(check-equal? (hyphenate "polymorphism" #:min-length 100) "polymorphism")
(check-equal? (hyphenate "ugly" #:min-length 1) "ug\u00ADly")
(check-equal? (hyphenate "ugly" #:min-length 1 #:min-hyphens 2) "ugly")
(check-equal? (unhyphenate "poly\u00ADmor\u00ADphism") "polymorphism")
(check-equal? (hyphenate "polymorphism" #\-) "poly-mor-phism")
(check-equal? (hyphenate "compotumi" #\-) "com-po-tu-mi")
@ -21,6 +26,8 @@
(check-equal? (hyphenate "polymorphism" "foo") "polyfoomorfoophism")
(check-equal? (unhyphenate "polyfoomorfoophism" "foo") "polymorphism")
(check-equal? (hyphenate "circular polymorphism squandering") "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism squan\u00ADder\u00ADing")
(check-equal? (hyphenate "circular polymorphism squandering" #:min-hyphens 2) "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism squan\u00ADder\u00ADing")
(check-equal? (hyphenate "circular polymorphism squandering" #:min-hyphens 3) "circular polymorphism squandering")
(check-equal? (hyphenate '(p "circular polymorphism" amp (em "squandering"))) '(p "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism" amp (em "squan\u00ADder\u00ADing")))
(check-equal? (hyphenate "present project") "present project") ; exception words
;; test these last so exceptions have been set up already

Loading…
Cancel
Save