From 25f7b73abd72cf81861ec4fc6aa0f6ae2483ec72 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Fri, 24 Jun 2016 07:45:24 -0700 Subject: [PATCH] shorten cache key --- hyphenate/hyphenate/private/core.rkt | 50 ++++++++++++++-------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/hyphenate/hyphenate/private/core.rkt b/hyphenate/hyphenate/private/core.rkt index b38ae474..81dd9e0c 100644 --- a/hyphenate/hyphenate/private/core.rkt +++ b/hyphenate/hyphenate/private/core.rkt @@ -5,6 +5,7 @@ (module+ test (require rackunit)) + ;; module default values (define default-min-length 5) (define default-min-left-length 2) @@ -12,32 +13,24 @@ (define default-joiner #\u00AD) -(define (trim-points points) - ;; for point list generated from a pattern, - ;; drop first two elements because they represent hyphenation weight - ;; before the starting "." and between "." and the first letter. - ;; drop last element because it represents hyphen after last "." - ;; after you drop these two, then each number corresponds to - ;; whether a hyphen goes after that letter. - (drop-right (drop points 2) 1)) - - -;; Convert the hyphenated pattern into a point array for use later. -(define (convert-exception-word exception) - (define (make-key x) (format ".~a." (string-replace x "-" ""))) - (define (make-value x) - (append (list 0) (map (λ(x) (if (equal? x "-") 1 0)) (regexp-split #px"\\p{L}" x)) (list 0))) - ;; use list instead of cons because value is itself a list, and consing will muddy the waters - (list (make-key exception) (trim-points (make-value exception)))) +(define (convert-exception-word ew) + ;; an exception word indicates its breakpoints with added hyphens + (define word (string-replace ew "-" "")) + ;; pattern has same number of points as word letters. 1 marks hyphenation point; 0 no hyphenation + (define points + (cdr (map (λ(x) (if (equal? x "-") 1 0)) (regexp-split #px"\\p{L}" ew)))) + ;; use list here so we can `apply` in `add-exception-word` + (list word points)) (module+ test - (check-equal? (convert-exception-word "snôw-man") '(".snôwman." (0 0 0 1 0 0 0))) - (check-equal? (convert-exception-word "snôwman") '(".snôwman." (0 0 0 0 0 0 0))) - (check-equal? (convert-exception-word "sn-ôw-ma-n") '(".snôwman." (0 1 0 1 0 1 0)))) + (check-equal? (convert-exception-word "snôw-man") '("snôwman" (0 0 0 1 0 0 0))) + (check-equal? (convert-exception-word "snôwman") '("snôwman" (0 0 0 0 0 0 0))) + (check-equal? (convert-exception-word "sn-ôw-ma-n") '("snôwman" (0 1 0 1 0 1 0)))) (define (add-exception-word word) + ;; `hash-set!` not `hash-ref!`, because we want an exception to override an existing value (apply hash-set! (current-word-cache) (convert-exception-word word))) @@ -99,9 +92,9 @@ (define (make-points word) - (define word-with-dots (format ".~a." (string-downcase word))) - (hash-ref! (current-word-cache) word-with-dots + (hash-ref! (current-word-cache) word (λ () ; compute pattern when missing from cache + (define word-with-dots (format ".~a." (string-downcase word))) (define word-length (string-length word-with-dots)) (define default-zero-pattern (make-list (add1 word-length) 0)) ;; walk through all the substrings and see if there's a matching pattern. @@ -116,7 +109,14 @@ (define left-zeroes (make-list start 0)) (define right-zeroes (make-list (- (add1 word-length) (length partial-pattern) start) 0)) (append left-zeroes partial-pattern right-zeroes))) - (trim-points (calculate-max-pattern (cons default-zero-pattern matching-patterns)))))) + (define max-pattern (calculate-max-pattern (cons default-zero-pattern matching-patterns))) + ;; for point list generated from a pattern, + ;; drop first two elements because they represent hyphenation weight + ;; before the starting "." and between "." and the first letter. + ;; drop last element because it represents hyphen after last "." + ;; after you drop these two, then each number corresponds to + ;; whether a hyphen goes after that letter. + (drop-right (drop max-pattern 2) 1)))) ;; Find hyphenation points in a word. This is not quite synonymous with syllables. @@ -141,12 +141,12 @@ 0)))) ;; odd-valued points in the pattern denote hyphenation points - (define odd-points (for/list ([(wp idx) (in-indexed word-points)] + (define odd-point-indexes (for/list ([(wp idx) (in-indexed word-points)] #:when (odd? wp)) idx)) ;; the hyphenation goes after the indexed letter, so add1 to the raw points for slicing - (define breakpoints (append (list 0) (map add1 odd-points) (list (string-length word)))) + (define breakpoints (append (list 0) (map add1 odd-point-indexes) (list (string-length word)))) (for/list ([start (in-list breakpoints)] [end (in-list (cdr breakpoints))]) ; shorter list controls exit of loop (substring word start end))]))