better plural hiding

master
Matthew Butterick 5 years ago
parent 5c953816bb
commit eba216c3af

@ -173774,6 +173774,7 @@ Sassanidae
Sassanide Sassanide
Sassenach Sassenach
sassolite sassolite
sass
sassy sassy
sassywood sassywood
Sastean Sastean

@ -9,6 +9,7 @@
(define (word-rec-word val) (vector-ref val 0)) (define (word-rec-word val) (vector-ref val 0))
(define (word-rec-charint val) (vector-ref val 1)) (define (word-rec-charint val) (vector-ref val 1))
(define (word-rec-length val) (vector-ref val 2)) (define (word-rec-length val) (vector-ref val 2))
(define (word-rec-plural? val) (vector-ref val 3))
(define (char->bitindex c) (define (char->bitindex c)
@ -47,11 +48,19 @@
;; so it's sufficient to test whether a cap exists in the idx ;; so it's sufficient to test whether a cap exists in the idx
(positive? (bitwise-and charidx-entry capitalized-mask))) (positive? (bitwise-and charidx-entry capitalized-mask)))
(define-runtime-path words-data "data/words.rktd") (define-runtime-path words-file "data/words.rktd")
(require racket/set racket/match)
(define (make-word-recs) (define (make-word-recs)
(for/vector ([w (in-lines (open-input-file words-data))]) (define words (for/set ([word (in-lines (open-input-file words-file))])
(vector w (word->charidx w) (string-length w)))) word))
(for/vector ([word (in-set words)])
(vector word
(word->charidx word)
(string-length word)
(match (regexp-match #rx"^(.+)e?s$" word)
[(list _ prefix) #:when (set-member? words prefix) #true]
[_ #false]))))
(define (regenerate-word-index!) (define (regenerate-word-index!)
(make-parent-directory* wordidx-file) (make-parent-directory* wordidx-file)

@ -7,7 +7,7 @@
#:mandatory [mandatory #f] #:mandatory [mandatory #f]
#:min [min-length 5] #:min [min-length 5]
#:max [max-length 10] #:max [max-length 10]
#:hide-plurals [hide-plurals? #f] #:hide-plurals [hide-plurals? #t]
#:proper-names [proper-names? #f] #:proper-names [proper-names? #f]
#:random [random #t] #:random [random #t]
#:max-words [max-words 10] #:max-words [max-words 10]
@ -53,7 +53,7 @@
;; maybe only proper names ;; maybe only proper names
(if proper-names? (capitalized? word-charidx) (not (capitalized? word-charidx))) (if proper-names? (capitalized? word-charidx) (not (capitalized? word-charidx)))
;; maybe hide plurals ;; maybe hide plurals
(if hide-plurals? (not (regexp-match #rx"s$" word)) #true))) (or (not hide-plurals?) (not (word-rec-plural? rec)))))
(values (cons (capitalizer word) word-acc) (add1 count)))) (values (cons (capitalizer word) word-acc) (add1 count))))
(module+ test (module+ test

Loading…
Cancel
Save