get rid of exceptions cache; fix dumb caching error

main
Matthew Butterick 8 years ago
parent 95fc9019d7
commit d443aa276c

@ -7,38 +7,43 @@
[(_ dir) [(_ dir)
(with-syntax ([patterns-path (format "~a/patterns.rkt" (syntax->datum #'dir))] (with-syntax ([patterns-path (format "~a/patterns.rkt" (syntax->datum #'dir))]
[exceptions-path (format "~a/exceptions.rkt" (syntax->datum #'dir))]) [exceptions-path (format "~a/exceptions.rkt" (syntax->datum #'dir))])
(replace-context stx (replace-context
#'(begin stx
(require txexpr/base sugar/define (only-in xml xexpr/c) #'(begin
(prefix-in core: hyphenate/private/core) hyphenate/private/params patterns-path exceptions-path) (require txexpr/base
(provide (all-from-out hyphenate/private/params)) sugar/define
(only-in xml xexpr/c)
(module+ safe (prefix-in core: hyphenate/private/core)
;; An exception-word is a string of word characters or hyphens. hyphenate/private/params
(define (exception-word? x) patterns-path exceptions-path)
(and (string? x) (regexp-match #px"^(\\p{L}|-)+$" x) #t)) (provide (all-from-out hyphenate/private/params))
(define (exception-words? xs)
(and (list? xs) (andmap exception-word? xs)))) (module+ safe
;; An exception-word is a string of word characters or hyphens.
(define+provide+safe hyphenate (define (exception-word? x)
((xexpr?) ((or/c char? string?) (and (string? x) (regexp-match #px"^(\\p{L}|-)+$" x) #t))
#:exceptions exception-words? (define (exception-words? xs)
#:min-length (or/c integer? #f) (and (list? xs) (andmap exception-word? xs))))
#:omit-word (string? . -> . any/c)
#:omit-string (string? . -> . any/c) (define+provide+safe hyphenate
#:omit-txexpr (txexpr? . -> . any/c) ((xexpr?) ((or/c char? string?)
#:min-left-length (or/c (and/c integer? positive?) #f) #:exceptions exception-words?
#:min-right-length (or/c (and/c integer? positive?) #f)) . ->* . xexpr/c) #:min-length (or/c integer? #f)
(make-keyword-procedure (λ (kws kw-args . rest) #:omit-word (string? . -> . any/c)
(parameterize ([current-word-cache (make-hash)] #:omit-string (string? . -> . any/c)
[current-patterns patterns] #:omit-txexpr (txexpr? . -> . any/c)
[current-exceptions exceptions]) #:min-left-length (or/c (and/c integer? positive?) #f)
(keyword-apply core:hyphenate kws kw-args rest))))) #:min-right-length (or/c (and/c integer? positive?) #f)) . ->* . xexpr/c)
(make-keyword-procedure
(define+provide+safe unhyphenate (λ (kws kw-args . rest)
((xexpr/c) ((or/c char? string?) (parameterize ([current-word-cache (make-hash (map (λ(xs) (apply cons xs)) exceptions))]
#:omit-word (string? . -> . any/c) [current-patterns patterns])
#:omit-string (string? . -> . any/c) (keyword-apply core:hyphenate kws kw-args rest)))))
#:omit-txexpr (txexpr? . -> . any/c)) . ->* . xexpr/c)
(make-keyword-procedure (λ (kws kw-args . rest) (define+provide+safe unhyphenate
(keyword-apply core:unhyphenate kws kw-args rest)))))))])) ((xexpr/c) ((or/c char? string?)
#:omit-word (string? . -> . any/c)
#:omit-string (string? . -> . any/c)
#:omit-txexpr (txexpr? . -> . any/c)) . ->* . xexpr/c)
(make-keyword-procedure (λ (kws kw-args . rest)
(keyword-apply core:unhyphenate kws kw-args rest)))))))]))

@ -11,15 +11,13 @@
(define default-min-right-length 2) (define default-min-right-length 2)
(define default-joiner #\u00AD) (define default-joiner #\u00AD)
(define (cache-word pat)
(hash-set! (current-word-cache) (car pat) (cdr pat)))
;; Convert the hyphenated pattern into a point array for use later. ;; Convert the hyphenated pattern into a point array for use later.
(define (convert-exception-word exception) (define (convert-exception-word exception)
(define (make-key x) (format ".~a." (string-replace x "-" ""))) (define (make-key x) (format ".~a." (string-replace x "-" "")))
(define (make-value x) (define (make-value x)
`(0 ,@(map (λ(x) (if (equal? x "-") 1 0)) (regexp-split #px"\\p{L}" x)) 0)) `(0 ,@(map (λ(x) (if (equal? x "-") 1 0)) (regexp-split #px"\\p{L}" x)) 0))
;; use list instead of cons because value is itself a list, and consing will muddy the waters
(list (make-key exception) (make-value exception))) (list (make-key exception) (make-value exception)))
@ -30,7 +28,7 @@
(define (add-exception-word word) (define (add-exception-word word)
(current-exceptions (apply hash-set (current-exceptions) (convert-exception-word word)))) (apply hash-set! (current-word-cache) (convert-exception-word word)))
(define (string->natural i) (define (string->natural i)
@ -75,7 +73,7 @@
(check-equal? (string->hashpair ".ach4") '(".ach" (0 0 0 0 4)))) (check-equal? (string->hashpair ".ach4") '(".ach" (0 0 0 0 4))))
(define (apply-map-max patterns) (define (calculate-max-pattern patterns)
;; each pattern is a list of numbers ;; each pattern is a list of numbers
;; all the patterns have the same length ;; all the patterns have the same length
(if (empty? patterns) (if (empty? patterns)
@ -85,40 +83,36 @@
(module+ test (module+ test
(require rackunit) (require rackunit)
(check-equal? (apply-map-max empty) empty) (check-equal? (calculate-max-pattern empty) empty)
(check-equal? (apply-map-max '((1 0 0))) '(1 0 0)) (check-equal? (calculate-max-pattern '((1 0 0))) '(1 0 0))
(check-equal? (apply-map-max '((1 0 0) (0 1 0))) '(1 1 0)) (check-equal? (calculate-max-pattern '((1 0 0) (0 1 0))) '(1 1 0))
(check-equal? (apply-map-max '((1 0 0) (0 1 0) (0 0 1))) '(1 1 1)) (check-equal? (calculate-max-pattern '((1 0 0) (0 1 0) (0 0 1))) '(1 1 1))
(check-equal? (apply-map-max '((1 2 3) (2 3 1) (3 1 2))) '(3 3 3))) (check-equal? (calculate-max-pattern '((1 2 3) (2 3 1) (3 1 2))) '(3 3 3)))
(define (make-points word) (define (make-points word)
;; walk through all the substrings and see if there's a matching pattern.
;; if so, pad it out to full length (so we can `apply-map-max` later on)
(define word-with-dots (format ".~a." (string-downcase word))) (define word-with-dots (format ".~a." (string-downcase word)))
(define matching-patterns (define max-pattern
(cond (hash-ref! (current-word-cache) word-with-dots
[(or (hash-ref (current-word-cache) word-with-dots #f) (λ () ; compute pattern when missing from cache
(hash-ref (current-exceptions) word-with-dots #f)) => list] (define word-length (string-length word-with-dots))
[else (define default-return-value (make-list (add1 word-length) 0))
(define word-length (string-length word-with-dots)) (define matching-patterns
;; ensures there's at least one (null) element in return value ;; walk through all the substrings and see if there's a matching pattern.
(define starting-value (make-list (add1 word-length) 0)) ;; if so, pad it out to full length (so we can `calculate-max-pattern` later on)
(for*/fold ([acc (cons starting-value null)]) (for*/fold ([acc (cons default-return-value null)])
([start (in-range word-length)] ([start (in-range word-length)]
[end (in-range start word-length)] [end (in-range start word-length)]
[substr (in-value (substring word-with-dots start (add1 end)))] [substr (in-value (substring word-with-dots start (add1 end)))]
#:when (hash-has-key? (current-patterns) substr)) #:when (hash-has-key? (current-patterns) substr))
(define partial-pattern (hash-ref (current-patterns) substr)) (define partial-pattern (hash-ref (current-patterns) substr))
;; put together head padding + value + tail padding (define full-length-pattern
(define full-length-pattern ;; put together head padding + value + tail padding
(append (make-list start 0) (append (make-list start 0)
partial-pattern partial-pattern
(make-list (- (add1 word-length) (length partial-pattern) start) 0))) (make-list (- (add1 word-length) (length partial-pattern) start) 0)))
(cons full-length-pattern acc))])) (cons full-length-pattern acc)))
(calculate-max-pattern matching-patterns))))
(define max-value-pattern (apply-map-max matching-patterns))
(cache-word (cons word-with-dots max-value-pattern))
;; for point list, ;; for point list,
;; drop first two elements because they represent hyphenation weight ;; drop first two elements because they represent hyphenation weight
@ -126,7 +120,7 @@
;; drop last element because it represents hyphen after last "." ;; drop last element because it represents hyphen after last "."
;; after you drop these two, then each number corresponds to ;; after you drop these two, then each number corresponds to
;; whether a hyphen goes after that letter. ;; whether a hyphen goes after that letter.
(drop-right (drop max-value-pattern 2) 1)) (drop-right (drop max-pattern 2) 1))
;; Find hyphenation points in a word. This is not quite synonymous with syllables. ;; Find hyphenation points in a word. This is not quite synonymous with syllables.
@ -177,6 +171,7 @@
(make-txexpr (get-tag x) (get-attrs x) (map loop (get-elements x)))] (make-txexpr (get-tag x) (get-attrs x) (map loop (get-elements x)))]
[else x]))) [else x])))
(require sugar/debug)
(define (hyphenate x [joiner default-joiner] (define (hyphenate x [joiner default-joiner]
#:exceptions [extra-exceptions empty] #:exceptions [extra-exceptions empty]

@ -14,4 +14,4 @@
#'(module exception-prep racket/base #'(module exception-prep racket/base
(require racket/string racket/list hyphenate/private/core) (require racket/string racket/list hyphenate/private/core)
(provide exceptions) (provide exceptions)
(define exceptions (apply hash (append-map convert-exception-word (string-split str))))))))) (define exceptions (map convert-exception-word (string-split str))))))))

@ -2,5 +2,4 @@
(provide (all-defined-out)) (provide (all-defined-out))
(define current-patterns (make-parameter (make-hash))) (define current-patterns (make-parameter (make-hash)))
(define current-exceptions (make-parameter (hash)))
(define current-word-cache (make-parameter (make-hash))) (define current-word-cache (make-parameter (make-hash)))
Loading…
Cancel
Save