diff --git a/hyphenate/hyphenate/private/bootstrap.rkt b/hyphenate/hyphenate/private/bootstrap.rkt index 4c4f373d..f3438a7a 100644 --- a/hyphenate/hyphenate/private/bootstrap.rkt +++ b/hyphenate/hyphenate/private/bootstrap.rkt @@ -7,38 +7,43 @@ [(_ dir) (with-syntax ([patterns-path (format "~a/patterns.rkt" (syntax->datum #'dir))] [exceptions-path (format "~a/exceptions.rkt" (syntax->datum #'dir))]) - (replace-context stx - #'(begin - (require txexpr/base sugar/define (only-in xml xexpr/c) - (prefix-in core: hyphenate/private/core) hyphenate/private/params patterns-path exceptions-path) - (provide (all-from-out hyphenate/private/params)) - - (module+ safe - ;; An exception-word is a string of word characters or hyphens. - (define (exception-word? x) - (and (string? x) (regexp-match #px"^(\\p{L}|-)+$" x) #t)) - (define (exception-words? xs) - (and (list? xs) (andmap exception-word? xs)))) - - (define+provide+safe hyphenate - ((xexpr?) ((or/c char? string?) - #:exceptions exception-words? - #:min-length (or/c integer? #f) - #:omit-word (string? . -> . any/c) - #:omit-string (string? . -> . any/c) - #:omit-txexpr (txexpr? . -> . any/c) - #:min-left-length (or/c (and/c integer? positive?) #f) - #:min-right-length (or/c (and/c integer? positive?) #f)) . ->* . xexpr/c) - (make-keyword-procedure (λ (kws kw-args . rest) - (parameterize ([current-word-cache (make-hash)] - [current-patterns patterns] - [current-exceptions exceptions]) - (keyword-apply core:hyphenate kws kw-args rest))))) - - (define+provide+safe unhyphenate - ((xexpr/c) ((or/c char? string?) - #:omit-word (string? . -> . any/c) - #:omit-string (string? . -> . any/c) - #:omit-txexpr (txexpr? . -> . any/c)) . ->* . xexpr/c) - (make-keyword-procedure (λ (kws kw-args . rest) - (keyword-apply core:unhyphenate kws kw-args rest)))))))])) \ No newline at end of file + (replace-context + stx + #'(begin + (require txexpr/base + sugar/define + (only-in xml xexpr/c) + (prefix-in core: hyphenate/private/core) + hyphenate/private/params + patterns-path exceptions-path) + (provide (all-from-out hyphenate/private/params)) + + (module+ safe + ;; An exception-word is a string of word characters or hyphens. + (define (exception-word? x) + (and (string? x) (regexp-match #px"^(\\p{L}|-)+$" x) #t)) + (define (exception-words? xs) + (and (list? xs) (andmap exception-word? xs)))) + + (define+provide+safe hyphenate + ((xexpr?) ((or/c char? string?) + #:exceptions exception-words? + #:min-length (or/c integer? #f) + #:omit-word (string? . -> . any/c) + #:omit-string (string? . -> . any/c) + #:omit-txexpr (txexpr? . -> . any/c) + #:min-left-length (or/c (and/c integer? positive?) #f) + #:min-right-length (or/c (and/c integer? positive?) #f)) . ->* . xexpr/c) + (make-keyword-procedure + (λ (kws kw-args . rest) + (parameterize ([current-word-cache (make-hash (map (λ(xs) (apply cons xs)) exceptions))] + [current-patterns patterns]) + (keyword-apply core:hyphenate kws kw-args rest))))) + + (define+provide+safe unhyphenate + ((xexpr/c) ((or/c char? string?) + #:omit-word (string? . -> . any/c) + #:omit-string (string? . -> . any/c) + #:omit-txexpr (txexpr? . -> . any/c)) . ->* . xexpr/c) + (make-keyword-procedure (λ (kws kw-args . rest) + (keyword-apply core:unhyphenate kws kw-args rest)))))))])) \ No newline at end of file diff --git a/hyphenate/hyphenate/private/core.rkt b/hyphenate/hyphenate/private/core.rkt index 8a4d0fe6..ab18a4bd 100644 --- a/hyphenate/hyphenate/private/core.rkt +++ b/hyphenate/hyphenate/private/core.rkt @@ -11,15 +11,13 @@ (define default-min-right-length 2) (define default-joiner #\u00AD) -(define (cache-word pat) - (hash-set! (current-word-cache) (car pat) (cdr pat))) - ;; Convert the hyphenated pattern into a point array for use later. (define (convert-exception-word exception) (define (make-key x) (format ".~a." (string-replace x "-" ""))) (define (make-value x) `(0 ,@(map (λ(x) (if (equal? x "-") 1 0)) (regexp-split #px"\\p{L}" x)) 0)) + ;; use list instead of cons because value is itself a list, and consing will muddy the waters (list (make-key exception) (make-value exception))) @@ -30,7 +28,7 @@ (define (add-exception-word word) - (current-exceptions (apply hash-set (current-exceptions) (convert-exception-word word)))) + (apply hash-set! (current-word-cache) (convert-exception-word word))) (define (string->natural i) @@ -75,7 +73,7 @@ (check-equal? (string->hashpair ".ach4") '(".ach" (0 0 0 0 4)))) -(define (apply-map-max patterns) +(define (calculate-max-pattern patterns) ;; each pattern is a list of numbers ;; all the patterns have the same length (if (empty? patterns) @@ -85,40 +83,36 @@ (module+ test (require rackunit) - (check-equal? (apply-map-max empty) empty) - (check-equal? (apply-map-max '((1 0 0))) '(1 0 0)) - (check-equal? (apply-map-max '((1 0 0) (0 1 0))) '(1 1 0)) - (check-equal? (apply-map-max '((1 0 0) (0 1 0) (0 0 1))) '(1 1 1)) - (check-equal? (apply-map-max '((1 2 3) (2 3 1) (3 1 2))) '(3 3 3))) + (check-equal? (calculate-max-pattern empty) empty) + (check-equal? (calculate-max-pattern '((1 0 0))) '(1 0 0)) + (check-equal? (calculate-max-pattern '((1 0 0) (0 1 0))) '(1 1 0)) + (check-equal? (calculate-max-pattern '((1 0 0) (0 1 0) (0 0 1))) '(1 1 1)) + (check-equal? (calculate-max-pattern '((1 2 3) (2 3 1) (3 1 2))) '(3 3 3))) (define (make-points word) - ;; walk through all the substrings and see if there's a matching pattern. - ;; if so, pad it out to full length (so we can `apply-map-max` later on) (define word-with-dots (format ".~a." (string-downcase word))) - (define matching-patterns - (cond - [(or (hash-ref (current-word-cache) word-with-dots #f) - (hash-ref (current-exceptions) word-with-dots #f)) => list] - [else - (define word-length (string-length word-with-dots)) - ;; ensures there's at least one (null) element in return value - (define starting-value (make-list (add1 word-length) 0)) - (for*/fold ([acc (cons starting-value null)]) - ([start (in-range word-length)] - [end (in-range start word-length)] - [substr (in-value (substring word-with-dots start (add1 end)))] - #:when (hash-has-key? (current-patterns) substr)) - (define partial-pattern (hash-ref (current-patterns) substr)) - ;; put together head padding + value + tail padding - (define full-length-pattern - (append (make-list start 0) - partial-pattern - (make-list (- (add1 word-length) (length partial-pattern) start) 0))) - (cons full-length-pattern acc))])) - - (define max-value-pattern (apply-map-max matching-patterns)) - (cache-word (cons word-with-dots max-value-pattern)) + (define max-pattern + (hash-ref! (current-word-cache) word-with-dots + (λ () ; compute pattern when missing from cache + (define word-length (string-length word-with-dots)) + (define default-return-value (make-list (add1 word-length) 0)) + (define matching-patterns + ;; walk through all the substrings and see if there's a matching pattern. + ;; if so, pad it out to full length (so we can `calculate-max-pattern` later on) + (for*/fold ([acc (cons default-return-value null)]) + ([start (in-range word-length)] + [end (in-range start word-length)] + [substr (in-value (substring word-with-dots start (add1 end)))] + #:when (hash-has-key? (current-patterns) substr)) + (define partial-pattern (hash-ref (current-patterns) substr)) + (define full-length-pattern + ;; put together head padding + value + tail padding + (append (make-list start 0) + partial-pattern + (make-list (- (add1 word-length) (length partial-pattern) start) 0))) + (cons full-length-pattern acc))) + (calculate-max-pattern matching-patterns)))) ;; for point list, ;; drop first two elements because they represent hyphenation weight @@ -126,7 +120,7 @@ ;; drop last element because it represents hyphen after last "." ;; after you drop these two, then each number corresponds to ;; whether a hyphen goes after that letter. - (drop-right (drop max-value-pattern 2) 1)) + (drop-right (drop max-pattern 2) 1)) ;; Find hyphenation points in a word. This is not quite synonymous with syllables. @@ -177,6 +171,7 @@ (make-txexpr (get-tag x) (get-attrs x) (map loop (get-elements x)))] [else x]))) +(require sugar/debug) (define (hyphenate x [joiner default-joiner] #:exceptions [extra-exceptions empty] diff --git a/hyphenate/hyphenate/private/exception-prep.rkt b/hyphenate/hyphenate/private/exception-prep.rkt index fd5165e8..fbdf0454 100644 --- a/hyphenate/hyphenate/private/exception-prep.rkt +++ b/hyphenate/hyphenate/private/exception-prep.rkt @@ -14,4 +14,4 @@ #'(module exception-prep racket/base (require racket/string racket/list hyphenate/private/core) (provide exceptions) - (define exceptions (apply hash (append-map convert-exception-word (string-split str))))))))) \ No newline at end of file + (define exceptions (map convert-exception-word (string-split str)))))))) \ No newline at end of file diff --git a/hyphenate/hyphenate/private/params.rkt b/hyphenate/hyphenate/private/params.rkt index 3c072a11..202d3dc4 100644 --- a/hyphenate/hyphenate/private/params.rkt +++ b/hyphenate/hyphenate/private/params.rkt @@ -2,5 +2,4 @@ (provide (all-defined-out)) (define current-patterns (make-parameter (make-hash))) -(define current-exceptions (make-parameter (hash))) (define current-word-cache (make-parameter (make-hash))) \ No newline at end of file