master
Matthew Butterick 5 years ago
parent 42145f256d
commit ed368e92d1

@ -0,0 +1,6 @@
#lang info
(define collection 'multi)
(define version "0.0")
(define deps '())
(define build-deps '())

@ -0,0 +1,47 @@
#lang debug racket/base
(require racket/file
racket/fasl
"words.rkt")
(provide (all-defined-out))
(define (char->bitindex c)
;; 64-bit layout
;; __________ZYXWVUTSRQPONMLKJIHGFEDCBA______zyxwvutsrqponmlkjihgfedcba
(cond
[(char<=? #\a c #\z) (- (char->integer c) 97)] ; 97 = (char->integer #\a)
[(char<=? #\A c #\Z) (- (char->integer c) 33)] ; 65 = (char->integer #\A)
[else 0]))
(define (word->charidx word)
(apply bitwise-ior
(for/list ([c (in-string word)])
(expt 2 (char->bitindex c)))))
(define (bitindex->char i)
(cond
[(<= 0 i 26) (integer->char (+ i 97))]
[(<= 32 i 59) (integer->char (+ i 33))]
[else (error 'bong)]))
(define (charidx->chars int)
(for/list ([i (in-range 64)]
#:when (bitwise-bit-set? int i))
(bitindex->char i)))
(define charidx-file "data/charidx.rktd")
(define (regenerate-char-index!)
(s-exp->fasl (map word->charidx usable-words) (open-output-file charidx-file #:exists 'replace)))
(define charidx (let ()
(unless (file-exists? charidx-file)
(regenerate-char-index!))
(fasl->s-exp (open-input-file charidx-file))))
(define (contains-char? charidx-entry c)
(bitwise-bit-set? charidx-entry (char->bitindex c)))
(module+ test
(require rackunit)
(check-equal? (length (filter (λ (ce) (contains-char? ce #\z)) charidx)) 7830)
(check-equal? (charidx->chars (word->charidx "abuzz")) '(#\a #\b #\u #\z)))

Binary file not shown.

@ -0,0 +1,11 @@
kcuf
tnuc
tihs
yssup
elohssa
kcoc
nmad
traf
gaf
tihs
reggin

File diff suppressed because it is too large Load Diff

@ -0,0 +1,62 @@
#lang debug racket/base
(require racket/list
racket/file
"words.rkt"
"char-index.rkt")
(define (wordlist #:letters [letters "etaoinshrdluw"]
#:mandatory [mandatory #f]
#:min [min-length 3]
#:max [max-length 10]
#:hide-plurals [hide-plurals? #f]
#:proper-names [proper-names? #f]
#:random [random #t]
#:max-words [max-words 10]
#:all-caps [all-caps? #f]
#:initial-caps [initial-caps? #f]
#:suffix [suffix ""])
;; do local filtering here (i.e., filters that are true per query)
(define taker #f)
(let* ([ws (for/list ([w (in-list (if taker (take usable-words taker) usable-words))]
[w-charidx (in-list (if taker (take charidx taker) charidx))]
[idx (in-naturals)]
#:when (and
;; between min and max length
(<= min-length (string-length w) max-length)
;; contains each mandatory, case-insensitive
;; slow: string match
#;(if mandatory
(for/and ([c (in-string mandatory)])
(regexp-match (regexp (format "(?i:~a)" c)) w))
#t)
;; fast: index math
(or (not mandatory)
(for*/or ([mandatory-cs (in-value (string->list mandatory))]
[c (in-list (map char-downcase (charidx->chars w-charidx)))])
(memv c mandatory-cs)))
;; contains only letters + mandatory, case-insensitive
;; slow: string match
#;(regexp-match (regexp (format "^(?i:[~a]+)$" (string-append letters (or mandatory "")))) w)
;; fast: index match
(for*/and ([letter-cs (in-value (string->list letters))]
[c (in-list (map char-downcase (charidx->chars w-charidx)))])
(memv c letter-cs))
;; maybe only proper names
(regexp-match (if proper-names? #rx"^[A-Z]" #rx"^[a-z]") w)
;; maybe hide plurals
(if hide-plurals? (not (regexp-match #rx"s$" w)) #t)))
w)]
[ws ((if random shuffle values) ws)]
[ws (if max-words (take ws (min (length ws) max-words)) ws)]
[ws (map (cond
[all-caps? string-upcase]
[initial-caps? string-titlecase]
[else values]) ws)]
[ws (if (positive? (string-length suffix))
(map (λ (w) (string-append w "." suffix)) ws)
ws)])
ws))
(module+ test
(time (wordlist)))

@ -0,0 +1,21 @@
#lang debug racket/base
(require racket/file
racket/string)
(provide usable-words)
(define reverse-string (compose1 list->string reverse string->list))
(define omit-words (map reverse-string (file->lines "data/omit.rktd")))
(define (make-wordlist)
;; do global filtering here (i.e., filters that are always true)
(define ws
(for/list ([w (in-lines (open-input-file "data/words.rktd"))]
#:when (and (not (regexp-match "'" w)) ; no apostrophes
(regexp-match #rx"^[A-Za-z]+$" w) ; no accented letters
(not (member w omit-words)) ; no bad words
))
w))
ws)
(define usable-words (make-wordlist))
Loading…
Cancel
Save