diff --git a/hyphenate/hyphenate/private/bootstrap.rkt b/hyphenate/hyphenate/private/bootstrap.rkt index f3438a7a..8614510b 100644 --- a/hyphenate/hyphenate/private/bootstrap.rkt +++ b/hyphenate/hyphenate/private/bootstrap.rkt @@ -14,9 +14,7 @@ sugar/define (only-in xml xexpr/c) (prefix-in core: hyphenate/private/core) - hyphenate/private/params patterns-path exceptions-path) - (provide (all-from-out hyphenate/private/params)) (module+ safe ;; An exception-word is a string of word characters or hyphens. @@ -35,10 +33,13 @@ #:min-left-length (or/c (and/c integer? positive?) #f) #:min-right-length (or/c (and/c integer? positive?) #f)) . ->* . xexpr/c) (make-keyword-procedure - (λ (kws kw-args . rest) - (parameterize ([current-word-cache (make-hash (map (λ(xs) (apply cons xs)) exceptions))] - [current-patterns patterns]) - (keyword-apply core:hyphenate kws kw-args rest))))) + ;; put caches here so they can persist across successive invocations of the function. + ;; but remain distinct between instantiations of this module (say, us vs fr) + ;; pass them as arguments to the core:hyphenate func + (let ([word-cache exceptions] ; preload exceptions + [pattern-cache patterns]) + (λ (kws kw-args . rest) + (keyword-apply core:hyphenate kws kw-args (list* word-cache pattern-cache rest)))))) (define+provide+safe unhyphenate ((xexpr/c) ((or/c char? string?) diff --git a/hyphenate/hyphenate/private/core.rkt b/hyphenate/hyphenate/private/core.rkt index 91137722..3205bf15 100644 --- a/hyphenate/hyphenate/private/core.rkt +++ b/hyphenate/hyphenate/private/core.rkt @@ -1,5 +1,5 @@ #lang racket/base -(require txexpr/base racket/string racket/list "params.rkt") +(require txexpr/base racket/string racket/list) (provide hyphenate unhyphenate word->hyphenation-points exception-word->word+pattern string->hashpair) (module+ test @@ -29,13 +29,13 @@ (check-equal? (exception-word->word+pattern "sn-ôw-ma-n") '("snôwman" (0 1 0 1 0 1 0)))) -(define (add-exception-word word) +(define (add-exception-word word-cache word) ;; `hash-set!` not `hash-ref!`, because we want an exception to override an existing value - (apply hash-set! (current-word-cache) (exception-word->word+pattern word))) + (apply hash-set! word-cache (exception-word->word+pattern word))) -(define (remove-exception-word word) - (hash-remove! (current-word-cache) (string-replace word "-" ""))) +(define (remove-exception-word word-cache word) + (hash-remove! word-cache (string-replace word "-" ""))) (define (string->natural i) @@ -95,8 +95,8 @@ (check-equal? (calculate-max-pattern '((1 2 3) (2 3 1) (3 1 2))) '(3 3 3))) -(define (make-points word) - (hash-ref! (current-word-cache) word +(define (make-points word word-cache pattern-cache) + (hash-ref! word-cache word (λ () ; compute pattern when missing from cache (define word-with-dots (format ".~a." (string-downcase word))) (define word-length (string-length word-with-dots)) @@ -106,7 +106,7 @@ (for*/list ([start (in-range word-length)] [end (in-range start word-length)] [substr (in-value (substring word-with-dots start (add1 end)))] - [partial-pattern (in-value (hash-ref (current-patterns) substr #f))] + [partial-pattern (in-value (hash-ref pattern-cache substr #f))] #:when partial-pattern) ;; pad out partial-pattern to full length ;; (so we can compare patterns to find max value for each slot) @@ -124,7 +124,8 @@ ;; Find hyphenation points in a word. This is not quite synonymous with syllables. -(define (word->hyphenation-points word +(define (word->hyphenation-points word + word-cache pattern-cache [min-length #f] [min-left-length #f] [min-right-length #f]) @@ -136,7 +137,7 @@ ;; to create a no-hyphenation zone of length n, zero out the first n-1 points ;; and the last n points (because the last value in points is always superfluous) (define word-points - (let* ([points (make-points word)] + (let* ([points (make-points word word-cache pattern-cache)] [left-zeroes (min (or min-left-length default-min-left-length) (length points))] [right-zeroes (min (or min-right-length default-min-right-length) (length points))]) (for/list ([(point idx) (in-indexed points)]) @@ -169,7 +170,7 @@ [else x]))) -(define (hyphenate x [joiner default-joiner] +(define (hyphenate word-cache pattern-cache x [joiner default-joiner] #:exceptions [extra-exceptions empty] #:min-length [min-length default-min-length] #:min-left-length [min-left-length default-min-left-length] @@ -179,17 +180,17 @@ #:omit-txexpr [omit-txexpr? (λ(x) #f)]) ;; todo?: connect this regexp pattern to the one used in word? predicate - (for-each add-exception-word extra-exceptions) + (for-each (λ(ee) (add-exception-word word-cache ee)) extra-exceptions) (define word-pattern #px"\\w+") ;; more restrictive than exception-word (define (replacer word . words) (if (not (omit-word? word)) - (string-join (word->hyphenation-points word min-length min-left-length min-right-length) (joiner->string joiner)) + (string-join (word->hyphenation-points word word-cache pattern-cache min-length min-left-length min-right-length) (joiner->string joiner)) word)) (define (insert-hyphens text) (regexp-replace* word-pattern text replacer)) (define result (apply-proc insert-hyphens x omit-string? omit-txexpr?)) ;; deleting from the main cache is cheaper than having to do two cache lookups for every word ;; (missing words will just be regenerated later) - (for-each remove-exception-word extra-exceptions) + (for-each (λ (ee) (remove-exception-word word-cache ee)) extra-exceptions) result) diff --git a/hyphenate/hyphenate/private/exception-prep.rkt b/hyphenate/hyphenate/private/exception-prep.rkt index 289d16b3..4812b6f7 100644 --- a/hyphenate/hyphenate/private/exception-prep.rkt +++ b/hyphenate/hyphenate/private/exception-prep.rkt @@ -14,4 +14,5 @@ #'(module exception-prep racket/base (require racket/string racket/list hyphenate/private/core) (provide exceptions) - (define exceptions (map exception-word->word+pattern (string-split str)))))))) \ No newline at end of file + (define exceptions + (make-hash (map (λ(xs) (apply cons xs)) (map exception-word->word+pattern (string-split str)))))))))) \ No newline at end of file diff --git a/hyphenate/hyphenate/private/params.rkt b/hyphenate/hyphenate/private/params.rkt deleted file mode 100644 index 202d3dc4..00000000 --- a/hyphenate/hyphenate/private/params.rkt +++ /dev/null @@ -1,5 +0,0 @@ -#lang racket/base -(provide (all-defined-out)) - -(define current-patterns (make-parameter (make-hash))) -(define current-word-cache (make-parameter (make-hash))) \ No newline at end of file diff --git a/hyphenate/hyphenate/tests.rkt b/hyphenate/hyphenate/tests.rkt index 65bc583d..329c4a39 100644 --- a/hyphenate/hyphenate/tests.rkt +++ b/hyphenate/hyphenate/tests.rkt @@ -1,6 +1,5 @@ #lang racket/base -(require (submod hyphenate safe) txexpr/base rackunit "private/params.rkt") -(require/expose "private/core.rkt" [add-exception-word]) +(require (submod hyphenate safe) txexpr/base rackunit) (define omit-em-tag (λ(x) (member (car x) '(em)))) (define omit-p-tag (λ(x) (member (car x) '(p)))) @@ -71,13 +70,10 @@ (check-equal? (hyphenate "polymorphism" #\* #:exceptions '("polymo-rphism")) "polymo*rphism") (check-equal? (hyphenate "polymorphism" #\-) "poly-mor-phism") ; exceptions are temporary -(add-exception-word "snow-man") -(check-equal? (hyphenate "snowman" "-") "snow-man") - (check-equal? (hyphenate "formidable" #\-) "for-mi-da-ble") (module french racket/base (require (submod hyphenate/fr safe) rackunit) (check-equal? (hyphenate "formidable" #\-) "for-mi-dable")) ; hyphenates differently in French -(require 'french) +(require 'french) \ No newline at end of file