faster with vector rather than struct

master
Matthew Butterick 5 years ago
parent c2f785f7d7
commit f63f9b7f86

@ -6,30 +6,10 @@
(define-runtime-path wordidx-file "compiled/words/words-index.rktd")
(struct word-rec (word charint length) #:prefab)
(define (word-rec-word val) (vector-ref val 0))
(define (word-rec-charint val) (vector-ref val 1))
(define (word-rec-length val) (vector-ref val 2))
(define (make-word-recs)
(define reverse-string (compose1 list->string reverse string->list))
(define omit-words (map reverse-string (file->lines "data/omit.rktd")))
(for/vector ([w (in-lines (open-input-file "data/words.rktd"))]
#:when (and (not (regexp-match "'" w)) ; no apostrophes
(regexp-match #rx"^[A-Za-z]+$" w) ; no accented letters
(not (member w omit-words)))) ; no bad words
(word-rec w
(word->charidx w)
(string-length w))))
(define (regenerate-word-index!)
(make-parent-directory* wordidx-file)
(s-exp->fasl
(make-word-recs)
(open-output-file wordidx-file #:exists 'replace)))
(define wordrecs
(fasl->s-exp (open-input-file (and
(unless (file-exists? wordidx-file)
(regenerate-word-index!))
wordidx-file))))
(define (char->bitindex c)
;; 64-bit layout
@ -66,3 +46,27 @@
;; a cap only appears at the beginning of a word,
;; so it's sufficient to test whether a cap exists in the idx
(positive? (bitwise-and charidx-entry capitalized-mask)))
(define (make-word-recs)
(define reverse-string (compose1 list->string reverse string->list))
(define omit-words (map reverse-string (file->lines "data/omit.rktd")))
(for/vector ([w (in-lines (open-input-file "data/words.rktd"))]
#:when (and (not (regexp-match "'" w)) ; no apostrophes
(regexp-match #rx"^[A-Za-z]+$" w) ; no accented letters
(not (member w omit-words)))) ; no bad words
(vector w
(word->charidx w)
(string-length w))))
(define (regenerate-word-index!)
(make-parent-directory* wordidx-file)
(s-exp->fasl
(make-word-recs)
(open-output-file wordidx-file #:exists 'replace)))
(define wordrecs
(fasl->s-exp (open-input-file (and
(unless (file-exists? wordidx-file)
(regenerate-word-index!))
wordidx-file))))

@ -37,8 +37,8 @@
#:result word-acc)
([idx (in-list ((if random shuffle values) (range (vector-length wordrecs))))]
[rec (in-value (vector-ref wordrecs idx))]
[w (in-value (word-rec-word rec))]
[w-charidx (in-value (word-rec-charint rec))]
[word (in-value (word-rec-word rec))]
[word-charidx (in-value (word-rec-charint rec))]
#:break (= count (or max-words +inf.0))
#:when (and
;; between min and max length
@ -46,15 +46,15 @@
;; word contains each mandatory char, case-insensitive
(or (not mandatory)
(for/and ([mc (in-list mandatory-cs)])
(w-charidx . contains-char? . mc)))
(word-charidx . contains-char? . mc)))
;; word contains only letters + mandatory, case-insensitive
(for/and ([wc (in-list (map char-downcase (charidx->chars w-charidx)))])
(for/and ([wc (in-list (map char-downcase (charidx->chars word-charidx)))])
(letter-cs-charidx . contains-char? . wc))
;; maybe only proper names
(if proper-names? (capitalized? w-charidx) (not (capitalized? w-charidx)))
(if proper-names? (capitalized? word-charidx) (not (capitalized? word-charidx)))
;; maybe hide plurals
(if hide-plurals? (not (regexp-match #rx"s$" w)) #t)))
(values (cons (capitalizer w) word-acc) (add1 count))))
(if hide-plurals? (not (regexp-match #rx"s$" word)) #t)))
(values (cons (capitalizer word) word-acc) (add1 count))))
(module+ test
(require rackunit)

Loading…
Cancel
Save