diff --git a/hyphenate/hyphenate/private/core.rkt b/hyphenate/hyphenate/private/core.rkt index b921cd61..401e91c6 100644 --- a/hyphenate/hyphenate/private/core.rkt +++ b/hyphenate/hyphenate/private/core.rkt @@ -18,7 +18,7 @@ (define word (string-replace ew "-" "")) ;; pattern has same number of points as word letters. 1 marks hyphenation point; 0 no hyphenation (define points - (cdr (map (λ(x) (if (equal? x "-") 1 0)) (regexp-split #px"\\p{L}" ew)))) + (cdr (map (λ (x) (if (equal? x "-") 1 0)) (regexp-split #px"\\p{L}" ew)))) ;; use list here so we can `apply` in `add-exception-word` (list word points)) @@ -161,7 +161,7 @@ (define (joiner->string joiner) (format "~a" joiner)) -(define (apply-proc proc x [omit-string (λ(x) #f)] [omit-txexpr (λ(x) #f)] [joiner default-joiner]) +(define (apply-proc proc x [omit-string (λ (x) #f)] [omit-txexpr (λ (x) #f)] [joiner default-joiner]) (let loop ([x x]) (cond [(and (string? x) (not (omit-string x))) @@ -179,29 +179,35 @@ #:min-length [min-length default-min-length] #:min-left-length [min-left-length default-min-left-length] #:min-right-length [min-right-length default-min-right-length] - #:omit-word [omit-word? (λ(x) #f)] - #:omit-string [omit-string? (λ(x) #f)] - #:omit-txexpr [omit-txexpr? (λ(x) #f)]) + #:omit-word [omit-word? (λ (x) #f)] + #:omit-string [omit-string? (λ (x) #f)] + #:omit-txexpr [omit-txexpr? (λ (x) #f)]) ;; todo?: connect this regexp pattern to the one used in word? predicate - (for-each (λ(ee) (add-exception-word word-cache ee)) extra-exceptions) + (for-each (λ (ee) (add-exception-word word-cache ee)) extra-exceptions) (define word-pattern #px"\\w+") ;; more restrictive than exception-word (define (replacer word . words) - (if (not (omit-word? word)) - (string-join (word->hyphenation-points word word-cache pattern-cache min-length min-left-length min-right-length) (joiner->string joiner)) - word)) - (define (insert-hyphens text) (regexp-replace* word-pattern text replacer)) - (define result (apply-proc insert-hyphens x omit-string? omit-txexpr? joiner)) - ;; deleting from the main cache is cheaper than having to do two cache lookups for every word - ;; (missing words will just be regenerated later) - (for-each (λ (ee) (remove-exception-word word-cache ee)) extra-exceptions) - result) + (if (omit-word? word) + word + (string-join (word->hyphenation-points word + word-cache + pattern-cache + min-length + min-left-length + min-right-length) + (joiner->string joiner)))) + (define (insert-hyphens text) (regexp-replace* word-pattern text replacer)) + (begin0 + (apply-proc insert-hyphens x omit-string? omit-txexpr? joiner) + ;; deleting from the main cache is cheaper than having to do two cache lookups for every word + ;; (missing words will just be regenerated later) + (for-each (λ (ee) (remove-exception-word word-cache ee)) extra-exceptions))) (define (unhyphenate x [joiner default-joiner] - #:omit-word [omit-word? (λ(x) #f)] - #:omit-string [omit-string? (λ(x) #f)] - #:omit-txexpr [omit-txexpr? (λ(x) #f)]) + #:omit-word [omit-word? (λ (x) #f)] + #:omit-string [omit-string? (λ (x) #f)] + #:omit-txexpr [omit-txexpr? (λ (x) #f)]) (define word-pattern (pregexp (format "[\\w~a]+" joiner))) (define (replacer word . words) (if (not (omit-word? word)) diff --git a/hyphenate/hyphenate/tests.rkt b/hyphenate/hyphenate/tests.rkt index 461b3cb3..fa420882 100644 --- a/hyphenate/hyphenate/tests.rkt +++ b/hyphenate/hyphenate/tests.rkt @@ -1,12 +1,12 @@ #lang racket/base (require (submod hyphenate safe) txexpr/base rackunit) -(define omit-em-tag (λ(x) (member (car x) '(em)))) -(define omit-p-tag (λ(x) (member (car x) '(p)))) -(define omit-foo-zam-tag (λ(x) (member (car x) '(foo zam)))) -(define ends-with-s (λ(x) (regexp-match #rx"s$" x))) -(define omit-script-tag (λ(x) (member (car x) '(script)))) -(define tx-with-attr (λ(x) (with-handlers ([exn:fail? (λ(exn) #f)]) +(define omit-em-tag (λ (x) (member (car x) '(em)))) +(define omit-p-tag (λ (x) (member (car x) '(p)))) +(define omit-foo-zam-tag (λ (x) (member (car x) '(foo zam)))) +(define ends-with-s (λ (x) (regexp-match #rx"s$" x))) +(define omit-script-tag (λ (x) (member (car x) '(script)))) +(define tx-with-attr (λ (x) (with-handlers ([exn:fail? (λ (exn) #f)]) (equal? (attr-ref x 'hyphens) "no-thanks")))) (check-equal? (hyphenate "edges") "edges") ;; word without matching patterns