|
|
|
@ -1,5 +1,5 @@
|
|
|
|
|
#lang racket/base
|
|
|
|
|
(require txexpr/base racket/string racket/list "params.rkt")
|
|
|
|
|
(require txexpr/base racket/string racket/list sugar/list "params.rkt")
|
|
|
|
|
(provide hyphenate unhyphenate word->hyphenation-points convert-exception-word string->hashpair)
|
|
|
|
|
|
|
|
|
|
(module+ test
|
|
|
|
@ -16,7 +16,7 @@
|
|
|
|
|
(define (convert-exception-word exception)
|
|
|
|
|
(define (make-key x) (format ".~a." (string-replace x "-" "")))
|
|
|
|
|
(define (make-value x)
|
|
|
|
|
`(0 ,@(map (λ(x) (if (equal? x "-") 1 0)) (regexp-split #px"\\p{L}" x)) 0))
|
|
|
|
|
(append (list 0) (map (λ(x) (if (equal? x "-") 1 0)) (regexp-split #px"\\p{L}" x)) (list 0)))
|
|
|
|
|
;; use list instead of cons because value is itself a list, and consing will muddy the waters
|
|
|
|
|
(list (make-key exception) (make-value exception)))
|
|
|
|
|
|
|
|
|
@ -120,44 +120,41 @@
|
|
|
|
|
|
|
|
|
|
;; Find hyphenation points in a word. This is not quite synonymous with syllables.
|
|
|
|
|
(define (word->hyphenation-points word
|
|
|
|
|
[min-length-in #f]
|
|
|
|
|
[min-left-length-in #f]
|
|
|
|
|
[min-right-length-in #f])
|
|
|
|
|
[min-length #f]
|
|
|
|
|
[min-left-length #f]
|
|
|
|
|
[min-right-length #f])
|
|
|
|
|
|
|
|
|
|
(define min-length (or min-length-in default-min-length))
|
|
|
|
|
(define min-left-length (or min-left-length-in default-min-left-length))
|
|
|
|
|
(define min-right-length (or min-right-length-in default-min-right-length))
|
|
|
|
|
|
|
|
|
|
(define (add-no-hyphen-zone points)
|
|
|
|
|
(let ([left-zeroes (min min-left-length (length points))]
|
|
|
|
|
[right-zeroes (min min-right-length (length points))])
|
|
|
|
|
;; points is a list corresponding to the letters of the word.
|
|
|
|
|
(for/list ([(point idx) (in-indexed points)])
|
|
|
|
|
;; to create a no-hyphenation zone of length n, zero out the first n-1 points
|
|
|
|
|
;; and the last n points (because the last value in points is always superfluous)
|
|
|
|
|
(if (<= left-zeroes (add1 idx) (- (length points) right-zeroes))
|
|
|
|
|
point
|
|
|
|
|
0))))
|
|
|
|
|
|
|
|
|
|
(define (make-pieces word)
|
|
|
|
|
(define-values (word-pieces last-piece)
|
|
|
|
|
(for/fold ([word-pieces empty]
|
|
|
|
|
[current-piece empty])
|
|
|
|
|
;; explodes word into list of one-character strings (char list is slower)
|
|
|
|
|
([str (in-list (regexp-match* #rx"." word))]
|
|
|
|
|
[point (in-list (add-no-hyphen-zone (make-points word)))])
|
|
|
|
|
(define updated-current-piece (cons str current-piece))
|
|
|
|
|
(if (even? point)
|
|
|
|
|
(values word-pieces updated-current-piece) ; even point denotes character
|
|
|
|
|
(values (cons (string-join (reverse updated-current-piece) "") word-pieces) empty)))) ; odd point denotes char + syllable
|
|
|
|
|
(reverse (cons (string-join (reverse last-piece) "") word-pieces)))
|
|
|
|
|
(if (and min-length (< (string-length word) min-length))
|
|
|
|
|
(list word)
|
|
|
|
|
(make-pieces word)))
|
|
|
|
|
(cond
|
|
|
|
|
[(< (string-length word) (or min-length default-min-length)) (list word)]
|
|
|
|
|
[else
|
|
|
|
|
;; points is a list corresponding to the letters of the word.
|
|
|
|
|
;; to create a no-hyphenation zone of length n, zero out the first n-1 points
|
|
|
|
|
;; and the last n points (because the last value in points is always superfluous)
|
|
|
|
|
(define word-points
|
|
|
|
|
(let* ([points (make-points word)]
|
|
|
|
|
[left-zeroes (min (or min-left-length default-min-left-length) (length points))]
|
|
|
|
|
[right-zeroes (min (or min-right-length default-min-right-length) (length points))])
|
|
|
|
|
(for/list ([(point idx) (in-indexed points)])
|
|
|
|
|
(if (<= left-zeroes (add1 idx) (- (length points) right-zeroes))
|
|
|
|
|
point
|
|
|
|
|
0))))
|
|
|
|
|
|
|
|
|
|
;; odd-valued points in the pattern denote hyphenation points
|
|
|
|
|
(define odd-points (for/list ([(wp idx) (in-indexed word-points)]
|
|
|
|
|
#:when (odd? wp))
|
|
|
|
|
idx))
|
|
|
|
|
|
|
|
|
|
;; the hyphenation goes after the indexed letter, so add1 to the raw points for slicing
|
|
|
|
|
(define odd-points-plus-one (map add1 odd-points))
|
|
|
|
|
(for/list ([start (in-list (cons 0 odd-points-plus-one))]
|
|
|
|
|
[end (in-list (append odd-points-plus-one (list (string-length word))))])
|
|
|
|
|
(substring word start end))]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;; joiner contract allows char or string; this coerces to string.
|
|
|
|
|
(define (joiner->string joiner) (format "~a" joiner))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (apply-proc proc x [omit-string (λ(x) #f)] [omit-txexpr (λ(x) #f)])
|
|
|
|
|
(let loop ([x x])
|
|
|
|
|
(cond
|
|
|
|
@ -183,8 +180,7 @@
|
|
|
|
|
(if (not (omit-word? word))
|
|
|
|
|
(string-join (word->hyphenation-points word min-length min-left-length min-right-length) (joiner->string joiner))
|
|
|
|
|
word))
|
|
|
|
|
(define (insert-hyphens text)
|
|
|
|
|
(regexp-replace* word-pattern text replacer))
|
|
|
|
|
(define (insert-hyphens text) (regexp-replace* word-pattern text replacer))
|
|
|
|
|
(apply-proc insert-hyphens x omit-string? omit-txexpr?))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -197,7 +193,5 @@
|
|
|
|
|
(if (not (omit-word? word))
|
|
|
|
|
(string-replace word (joiner->string joiner) "")
|
|
|
|
|
word))
|
|
|
|
|
(define (remove-hyphens text)
|
|
|
|
|
(regexp-replace* word-pattern text replacer))
|
|
|
|
|
|
|
|
|
|
(define (remove-hyphens text) (regexp-replace* word-pattern text replacer))
|
|
|
|
|
(apply-proc remove-hyphens x omit-string? omit-txexpr?))
|
|
|
|
|