diff --git a/words/index.rkt b/words/index.rkt index a58cdff..017e833 100644 --- a/words/index.rkt +++ b/words/index.rkt @@ -6,30 +6,10 @@ (define-runtime-path wordidx-file "compiled/words/words-index.rktd") -(struct word-rec (word charint length) #:prefab) +(define (word-rec-word val) (vector-ref val 0)) +(define (word-rec-charint val) (vector-ref val 1)) +(define (word-rec-length val) (vector-ref val 2)) -(define (make-word-recs) - (define reverse-string (compose1 list->string reverse string->list)) - (define omit-words (map reverse-string (file->lines "data/omit.rktd"))) - (for/vector ([w (in-lines (open-input-file "data/words.rktd"))] - #:when (and (not (regexp-match "'" w)) ; no apostrophes - (regexp-match #rx"^[A-Za-z]+$" w) ; no accented letters - (not (member w omit-words)))) ; no bad words - (word-rec w - (word->charidx w) - (string-length w)))) - -(define (regenerate-word-index!) - (make-parent-directory* wordidx-file) - (s-exp->fasl - (make-word-recs) - (open-output-file wordidx-file #:exists 'replace))) - -(define wordrecs - (fasl->s-exp (open-input-file (and - (unless (file-exists? wordidx-file) - (regenerate-word-index!)) - wordidx-file)))) (define (char->bitindex c) ;; 64-bit layout @@ -65,4 +45,28 @@ (define (capitalized? charidx-entry) ;; a cap only appears at the beginning of a word, ;; so it's sufficient to test whether a cap exists in the idx - (positive? (bitwise-and charidx-entry capitalized-mask))) \ No newline at end of file + (positive? (bitwise-and charidx-entry capitalized-mask))) + + +(define (make-word-recs) + (define reverse-string (compose1 list->string reverse string->list)) + (define omit-words (map reverse-string (file->lines "data/omit.rktd"))) + (for/vector ([w (in-lines (open-input-file "data/words.rktd"))] + #:when (and (not (regexp-match "'" w)) ; no apostrophes + (regexp-match #rx"^[A-Za-z]+$" w) ; no accented letters + (not (member w omit-words)))) ; no bad words + (vector w + (word->charidx w) + (string-length w)))) + +(define (regenerate-word-index!) + (make-parent-directory* wordidx-file) + (s-exp->fasl + (make-word-recs) + (open-output-file wordidx-file #:exists 'replace))) + +(define wordrecs + (fasl->s-exp (open-input-file (and + (unless (file-exists? wordidx-file) + (regenerate-word-index!)) + wordidx-file)))) \ No newline at end of file diff --git a/words/main.rkt b/words/main.rkt index fe901bb..97492e7 100644 --- a/words/main.rkt +++ b/words/main.rkt @@ -37,8 +37,8 @@ #:result word-acc) ([idx (in-list ((if random shuffle values) (range (vector-length wordrecs))))] [rec (in-value (vector-ref wordrecs idx))] - [w (in-value (word-rec-word rec))] - [w-charidx (in-value (word-rec-charint rec))] + [word (in-value (word-rec-word rec))] + [word-charidx (in-value (word-rec-charint rec))] #:break (= count (or max-words +inf.0)) #:when (and ;; between min and max length @@ -46,15 +46,15 @@ ;; word contains each mandatory char, case-insensitive (or (not mandatory) (for/and ([mc (in-list mandatory-cs)]) - (w-charidx . contains-char? . mc))) + (word-charidx . contains-char? . mc))) ;; word contains only letters + mandatory, case-insensitive - (for/and ([wc (in-list (map char-downcase (charidx->chars w-charidx)))]) + (for/and ([wc (in-list (map char-downcase (charidx->chars word-charidx)))]) (letter-cs-charidx . contains-char? . wc)) ;; maybe only proper names - (if proper-names? (capitalized? w-charidx) (not (capitalized? w-charidx))) + (if proper-names? (capitalized? word-charidx) (not (capitalized? word-charidx))) ;; maybe hide plurals - (if hide-plurals? (not (regexp-match #rx"s$" w)) #t))) - (values (cons (capitalizer w) word-acc) (add1 count)))) + (if hide-plurals? (not (regexp-match #rx"s$" word)) #t))) + (values (cons (capitalizer word) word-acc) (add1 count)))) (module+ test (require rackunit)