From eba216c3afe25e05f4dfca35c9188fc75941dded Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sun, 5 Apr 2020 21:10:43 -0700 Subject: [PATCH] better plural hiding --- words/data/words.rktd | 1 + words/index.rkt | 15 ++++++++++++--- words/main.rkt | 4 ++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/words/data/words.rktd b/words/data/words.rktd index e633e2f..36828ba 100644 --- a/words/data/words.rktd +++ b/words/data/words.rktd @@ -173774,6 +173774,7 @@ Sassanidae Sassanide Sassenach sassolite +sass sassy sassywood Sastean diff --git a/words/index.rkt b/words/index.rkt index cdb2680..5a6c02d 100644 --- a/words/index.rkt +++ b/words/index.rkt @@ -9,6 +9,7 @@ (define (word-rec-word val) (vector-ref val 0)) (define (word-rec-charint val) (vector-ref val 1)) (define (word-rec-length val) (vector-ref val 2)) +(define (word-rec-plural? val) (vector-ref val 3)) (define (char->bitindex c) @@ -47,11 +48,19 @@ ;; so it's sufficient to test whether a cap exists in the idx (positive? (bitwise-and charidx-entry capitalized-mask))) -(define-runtime-path words-data "data/words.rktd") +(define-runtime-path words-file "data/words.rktd") +(require racket/set racket/match) (define (make-word-recs) - (for/vector ([w (in-lines (open-input-file words-data))]) - (vector w (word->charidx w) (string-length w)))) + (define words (for/set ([word (in-lines (open-input-file words-file))]) + word)) + (for/vector ([word (in-set words)]) + (vector word + (word->charidx word) + (string-length word) + (match (regexp-match #rx"^(.+)e?s$" word) + [(list _ prefix) #:when (set-member? words prefix) #true] + [_ #false])))) (define (regenerate-word-index!) (make-parent-directory* wordidx-file) diff --git a/words/main.rkt b/words/main.rkt index 059f038..d56aae4 100644 --- a/words/main.rkt +++ b/words/main.rkt @@ -7,7 +7,7 @@ #:mandatory [mandatory #f] #:min [min-length 5] #:max [max-length 10] - #:hide-plurals [hide-plurals? #f] + #:hide-plurals [hide-plurals? #t] #:proper-names [proper-names? #f] #:random [random #t] #:max-words [max-words 10] @@ -53,7 +53,7 @@ ;; maybe only proper names (if proper-names? (capitalized? word-charidx) (not (capitalized? word-charidx))) ;; maybe hide plurals - (if hide-plurals? (not (regexp-match #rx"s$" word)) #true))) + (or (not hide-plurals?) (not (word-rec-plural? rec))))) (values (cons (capitalizer word) word-acc) (add1 count)))) (module+ test