restructure caching scheme

main
Matthew Butterick 9 years ago
parent 33fc788683
commit 55f4cbfa57

@ -14,9 +14,7 @@
sugar/define
(only-in xml xexpr/c)
(prefix-in core: hyphenate/private/core)
hyphenate/private/params
patterns-path exceptions-path)
(provide (all-from-out hyphenate/private/params))
(module+ safe
;; An exception-word is a string of word characters or hyphens.
@ -35,10 +33,13 @@
#:min-left-length (or/c (and/c integer? positive?) #f)
#:min-right-length (or/c (and/c integer? positive?) #f)) . ->* . xexpr/c)
(make-keyword-procedure
(λ (kws kw-args . rest)
(parameterize ([current-word-cache (make-hash (map (λ(xs) (apply cons xs)) exceptions))]
[current-patterns patterns])
(keyword-apply core:hyphenate kws kw-args rest)))))
;; put caches here so they can persist across successive invocations of the function.
;; but remain distinct between instantiations of this module (say, us vs fr)
;; pass them as arguments to the core:hyphenate func
(let ([word-cache exceptions] ; preload exceptions
[pattern-cache patterns])
(λ (kws kw-args . rest)
(keyword-apply core:hyphenate kws kw-args (list* word-cache pattern-cache rest))))))
(define+provide+safe unhyphenate
((xexpr/c) ((or/c char? string?)

@ -1,5 +1,5 @@
#lang racket/base
(require txexpr/base racket/string racket/list "params.rkt")
(require txexpr/base racket/string racket/list)
(provide hyphenate unhyphenate word->hyphenation-points exception-word->word+pattern string->hashpair)
(module+ test
@ -29,13 +29,13 @@
(check-equal? (exception-word->word+pattern "sn-ôw-ma-n") '("snôwman" (0 1 0 1 0 1 0))))
(define (add-exception-word word)
(define (add-exception-word word-cache word)
;; `hash-set!` not `hash-ref!`, because we want an exception to override an existing value
(apply hash-set! (current-word-cache) (exception-word->word+pattern word)))
(apply hash-set! word-cache (exception-word->word+pattern word)))
(define (remove-exception-word word)
(hash-remove! (current-word-cache) (string-replace word "-" "")))
(define (remove-exception-word word-cache word)
(hash-remove! word-cache (string-replace word "-" "")))
(define (string->natural i)
@ -95,8 +95,8 @@
(check-equal? (calculate-max-pattern '((1 2 3) (2 3 1) (3 1 2))) '(3 3 3)))
(define (make-points word)
(hash-ref! (current-word-cache) word
(define (make-points word word-cache pattern-cache)
(hash-ref! word-cache word
(λ () ; compute pattern when missing from cache
(define word-with-dots (format ".~a." (string-downcase word)))
(define word-length (string-length word-with-dots))
@ -106,7 +106,7 @@
(for*/list ([start (in-range word-length)]
[end (in-range start word-length)]
[substr (in-value (substring word-with-dots start (add1 end)))]
[partial-pattern (in-value (hash-ref (current-patterns) substr #f))]
[partial-pattern (in-value (hash-ref pattern-cache substr #f))]
#:when partial-pattern)
;; pad out partial-pattern to full length
;; (so we can compare patterns to find max value for each slot)
@ -124,7 +124,8 @@
;; Find hyphenation points in a word. This is not quite synonymous with syllables.
(define (word->hyphenation-points word
(define (word->hyphenation-points word
word-cache pattern-cache
[min-length #f]
[min-left-length #f]
[min-right-length #f])
@ -136,7 +137,7 @@
;; to create a no-hyphenation zone of length n, zero out the first n-1 points
;; and the last n points (because the last value in points is always superfluous)
(define word-points
(let* ([points (make-points word)]
(let* ([points (make-points word word-cache pattern-cache)]
[left-zeroes (min (or min-left-length default-min-left-length) (length points))]
[right-zeroes (min (or min-right-length default-min-right-length) (length points))])
(for/list ([(point idx) (in-indexed points)])
@ -169,7 +170,7 @@
[else x])))
(define (hyphenate x [joiner default-joiner]
(define (hyphenate word-cache pattern-cache x [joiner default-joiner]
#:exceptions [extra-exceptions empty]
#:min-length [min-length default-min-length]
#:min-left-length [min-left-length default-min-left-length]
@ -179,17 +180,17 @@
#:omit-txexpr [omit-txexpr? (λ(x) #f)])
;; todo?: connect this regexp pattern to the one used in word? predicate
(for-each add-exception-word extra-exceptions)
(for-each (λ(ee) (add-exception-word word-cache ee)) extra-exceptions)
(define word-pattern #px"\\w+") ;; more restrictive than exception-word
(define (replacer word . words)
(if (not (omit-word? word))
(string-join (word->hyphenation-points word min-length min-left-length min-right-length) (joiner->string joiner))
(string-join (word->hyphenation-points word word-cache pattern-cache min-length min-left-length min-right-length) (joiner->string joiner))
word))
(define (insert-hyphens text) (regexp-replace* word-pattern text replacer))
(define result (apply-proc insert-hyphens x omit-string? omit-txexpr?))
;; deleting from the main cache is cheaper than having to do two cache lookups for every word
;; (missing words will just be regenerated later)
(for-each remove-exception-word extra-exceptions)
(for-each (λ (ee) (remove-exception-word word-cache ee)) extra-exceptions)
result)

@ -14,4 +14,5 @@
#'(module exception-prep racket/base
(require racket/string racket/list hyphenate/private/core)
(provide exceptions)
(define exceptions (map exception-word->word+pattern (string-split str))))))))
(define exceptions
(make-hash (map (λ(xs) (apply cons xs)) (map exception-word->word+pattern (string-split str))))))))))

@ -1,5 +0,0 @@
#lang racket/base
(provide (all-defined-out))
(define current-patterns (make-parameter (make-hash)))
(define current-word-cache (make-parameter (make-hash)))

@ -1,6 +1,5 @@
#lang racket/base
(require (submod hyphenate safe) txexpr/base rackunit "private/params.rkt")
(require/expose "private/core.rkt" [add-exception-word])
(require (submod hyphenate safe) txexpr/base rackunit)
(define omit-em-tag (λ(x) (member (car x) '(em))))
(define omit-p-tag (λ(x) (member (car x) '(p))))
@ -71,13 +70,10 @@
(check-equal? (hyphenate "polymorphism" #\* #:exceptions '("polymo-rphism")) "polymo*rphism")
(check-equal? (hyphenate "polymorphism" #\-) "poly-mor-phism") ; exceptions are temporary
(add-exception-word "snow-man")
(check-equal? (hyphenate "snowman" "-") "snow-man")
(check-equal? (hyphenate "formidable" #\-) "for-mi-da-ble")
(module french racket/base
(require (submod hyphenate/fr safe) rackunit)
(check-equal? (hyphenate "formidable" #\-) "for-mi-dable")) ; hyphenates differently in French
(require 'french)
(require 'french)
Loading…
Cancel
Save