From bb00063f540ce9ac4a1b4957cbe8ab546e33e9ae Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Fri, 24 Jun 2016 07:03:15 -0700 Subject: [PATCH] avoid a list operation --- hyphenate/hyphenate/private/core.rkt | 61 ++++++++++++++-------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/hyphenate/hyphenate/private/core.rkt b/hyphenate/hyphenate/private/core.rkt index f43efc25..b38ae474 100644 --- a/hyphenate/hyphenate/private/core.rkt +++ b/hyphenate/hyphenate/private/core.rkt @@ -12,19 +12,29 @@ (define default-joiner #\u00AD) +(define (trim-points points) + ;; for point list generated from a pattern, + ;; drop first two elements because they represent hyphenation weight + ;; before the starting "." and between "." and the first letter. + ;; drop last element because it represents hyphen after last "." + ;; after you drop these two, then each number corresponds to + ;; whether a hyphen goes after that letter. + (drop-right (drop points 2) 1)) + + ;; Convert the hyphenated pattern into a point array for use later. (define (convert-exception-word exception) (define (make-key x) (format ".~a." (string-replace x "-" ""))) (define (make-value x) (append (list 0) (map (λ(x) (if (equal? x "-") 1 0)) (regexp-split #px"\\p{L}" x)) (list 0))) ;; use list instead of cons because value is itself a list, and consing will muddy the waters - (list (make-key exception) (make-value exception))) + (list (make-key exception) (trim-points (make-value exception)))) (module+ test - (check-equal? (convert-exception-word "snôw-man") '(".snôwman." (0 0 0 0 0 1 0 0 0 0))) - (check-equal? (convert-exception-word "snôwman") '(".snôwman." (0 0 0 0 0 0 0 0 0 0))) - (check-equal? (convert-exception-word "sn-ôw-ma-n") '(".snôwman." (0 0 0 1 0 1 0 1 0 0)))) + (check-equal? (convert-exception-word "snôw-man") '(".snôwman." (0 0 0 1 0 0 0))) + (check-equal? (convert-exception-word "snôwman") '(".snôwman." (0 0 0 0 0 0 0))) + (check-equal? (convert-exception-word "sn-ôw-ma-n") '(".snôwman." (0 1 0 1 0 1 0)))) (define (add-exception-word word) @@ -90,32 +100,23 @@ (define (make-points word) (define word-with-dots (format ".~a." (string-downcase word))) - (define max-pattern - (hash-ref! (current-word-cache) word-with-dots - (λ () ; compute pattern when missing from cache - (define word-length (string-length word-with-dots)) - (define default-zero-pattern (make-list (add1 word-length) 0)) - ;; walk through all the substrings and see if there's a matching pattern. - (define matching-patterns - (for*/list ([start (in-range word-length)] - [end (in-range start word-length)] - [substr (in-value (substring word-with-dots start (add1 end)))] - [partial-pattern (in-value (hash-ref (current-patterns) substr #f))] - #:when partial-pattern) - ;; pad out partial-pattern to full length - ;; (so we can compare patterns to find max value for each slot) - (define left-zeroes (make-list start 0)) - (define right-zeroes (make-list (- (add1 word-length) (length partial-pattern) start) 0)) - (append left-zeroes partial-pattern right-zeroes))) - (calculate-max-pattern (cons default-zero-pattern matching-patterns))))) - - ;; for point list, - ;; drop first two elements because they represent hyphenation weight - ;; before the starting "." and between "." and the first letter. - ;; drop last element because it represents hyphen after last "." - ;; after you drop these two, then each number corresponds to - ;; whether a hyphen goes after that letter. - (drop-right (drop max-pattern 2) 1)) + (hash-ref! (current-word-cache) word-with-dots + (λ () ; compute pattern when missing from cache + (define word-length (string-length word-with-dots)) + (define default-zero-pattern (make-list (add1 word-length) 0)) + ;; walk through all the substrings and see if there's a matching pattern. + (define matching-patterns + (for*/list ([start (in-range word-length)] + [end (in-range start word-length)] + [substr (in-value (substring word-with-dots start (add1 end)))] + [partial-pattern (in-value (hash-ref (current-patterns) substr #f))] + #:when partial-pattern) + ;; pad out partial-pattern to full length + ;; (so we can compare patterns to find max value for each slot) + (define left-zeroes (make-list start 0)) + (define right-zeroes (make-list (- (add1 word-length) (length partial-pattern) start) 0)) + (append left-zeroes partial-pattern right-zeroes))) + (trim-points (calculate-max-pattern (cons default-zero-pattern matching-patterns)))))) ;; Find hyphenation points in a word. This is not quite synonymous with syllables.