initial
parent
42145f256d
commit
ed368e92d1
@ -0,0 +1,6 @@
|
||||
#lang info
|
||||
(define collection 'multi)
|
||||
|
||||
(define version "0.0")
|
||||
(define deps '())
|
||||
(define build-deps '())
|
@ -0,0 +1,47 @@
|
||||
#lang debug racket/base
|
||||
(require racket/file
|
||||
racket/fasl
|
||||
"words.rkt")
|
||||
(provide (all-defined-out))
|
||||
|
||||
(define (char->bitindex c)
|
||||
;; 64-bit layout
|
||||
;; __________ZYXWVUTSRQPONMLKJIHGFEDCBA______zyxwvutsrqponmlkjihgfedcba
|
||||
(cond
|
||||
[(char<=? #\a c #\z) (- (char->integer c) 97)] ; 97 = (char->integer #\a)
|
||||
[(char<=? #\A c #\Z) (- (char->integer c) 33)] ; 65 = (char->integer #\A)
|
||||
[else 0]))
|
||||
|
||||
(define (word->charidx word)
|
||||
(apply bitwise-ior
|
||||
(for/list ([c (in-string word)])
|
||||
(expt 2 (char->bitindex c)))))
|
||||
|
||||
(define (bitindex->char i)
|
||||
(cond
|
||||
[(<= 0 i 26) (integer->char (+ i 97))]
|
||||
[(<= 32 i 59) (integer->char (+ i 33))]
|
||||
[else (error 'bong)]))
|
||||
|
||||
(define (charidx->chars int)
|
||||
(for/list ([i (in-range 64)]
|
||||
#:when (bitwise-bit-set? int i))
|
||||
(bitindex->char i)))
|
||||
|
||||
(define charidx-file "data/charidx.rktd")
|
||||
|
||||
(define (regenerate-char-index!)
|
||||
(s-exp->fasl (map word->charidx usable-words) (open-output-file charidx-file #:exists 'replace)))
|
||||
|
||||
(define charidx (let ()
|
||||
(unless (file-exists? charidx-file)
|
||||
(regenerate-char-index!))
|
||||
(fasl->s-exp (open-input-file charidx-file))))
|
||||
|
||||
(define (contains-char? charidx-entry c)
|
||||
(bitwise-bit-set? charidx-entry (char->bitindex c)))
|
||||
|
||||
(module+ test
|
||||
(require rackunit)
|
||||
(check-equal? (length (filter (λ (ce) (contains-char? ce #\z)) charidx)) 7830)
|
||||
(check-equal? (charidx->chars (word->charidx "abuzz")) '(#\a #\b #\u #\z)))
|
Binary file not shown.
@ -0,0 +1,11 @@
|
||||
kcuf
|
||||
tnuc
|
||||
tihs
|
||||
yssup
|
||||
elohssa
|
||||
kcoc
|
||||
nmad
|
||||
traf
|
||||
gaf
|
||||
tihs
|
||||
reggin
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,62 @@
|
||||
#lang debug racket/base
|
||||
(require racket/list
|
||||
racket/file
|
||||
"words.rkt"
|
||||
"char-index.rkt")
|
||||
|
||||
(define (wordlist #:letters [letters "etaoinshrdluw"]
|
||||
#:mandatory [mandatory #f]
|
||||
#:min [min-length 3]
|
||||
#:max [max-length 10]
|
||||
#:hide-plurals [hide-plurals? #f]
|
||||
#:proper-names [proper-names? #f]
|
||||
#:random [random #t]
|
||||
#:max-words [max-words 10]
|
||||
#:all-caps [all-caps? #f]
|
||||
#:initial-caps [initial-caps? #f]
|
||||
#:suffix [suffix ""])
|
||||
;; do local filtering here (i.e., filters that are true per query)
|
||||
(define taker #f)
|
||||
(let* ([ws (for/list ([w (in-list (if taker (take usable-words taker) usable-words))]
|
||||
[w-charidx (in-list (if taker (take charidx taker) charidx))]
|
||||
[idx (in-naturals)]
|
||||
#:when (and
|
||||
;; between min and max length
|
||||
(<= min-length (string-length w) max-length)
|
||||
;; contains each mandatory, case-insensitive
|
||||
;; slow: string match
|
||||
#;(if mandatory
|
||||
(for/and ([c (in-string mandatory)])
|
||||
(regexp-match (regexp (format "(?i:~a)" c)) w))
|
||||
#t)
|
||||
;; fast: index math
|
||||
(or (not mandatory)
|
||||
(for*/or ([mandatory-cs (in-value (string->list mandatory))]
|
||||
[c (in-list (map char-downcase (charidx->chars w-charidx)))])
|
||||
(memv c mandatory-cs)))
|
||||
;; contains only letters + mandatory, case-insensitive
|
||||
;; slow: string match
|
||||
#;(regexp-match (regexp (format "^(?i:[~a]+)$" (string-append letters (or mandatory "")))) w)
|
||||
;; fast: index match
|
||||
|
||||
(for*/and ([letter-cs (in-value (string->list letters))]
|
||||
[c (in-list (map char-downcase (charidx->chars w-charidx)))])
|
||||
(memv c letter-cs))
|
||||
;; maybe only proper names
|
||||
(regexp-match (if proper-names? #rx"^[A-Z]" #rx"^[a-z]") w)
|
||||
;; maybe hide plurals
|
||||
(if hide-plurals? (not (regexp-match #rx"s$" w)) #t)))
|
||||
w)]
|
||||
[ws ((if random shuffle values) ws)]
|
||||
[ws (if max-words (take ws (min (length ws) max-words)) ws)]
|
||||
[ws (map (cond
|
||||
[all-caps? string-upcase]
|
||||
[initial-caps? string-titlecase]
|
||||
[else values]) ws)]
|
||||
[ws (if (positive? (string-length suffix))
|
||||
(map (λ (w) (string-append w "." suffix)) ws)
|
||||
ws)])
|
||||
ws))
|
||||
|
||||
(module+ test
|
||||
(time (wordlist)))
|
@ -0,0 +1,21 @@
|
||||
#lang debug racket/base
|
||||
(require racket/file
|
||||
racket/string)
|
||||
(provide usable-words)
|
||||
|
||||
(define reverse-string (compose1 list->string reverse string->list))
|
||||
|
||||
(define omit-words (map reverse-string (file->lines "data/omit.rktd")))
|
||||
|
||||
(define (make-wordlist)
|
||||
;; do global filtering here (i.e., filters that are always true)
|
||||
(define ws
|
||||
(for/list ([w (in-lines (open-input-file "data/words.rktd"))]
|
||||
#:when (and (not (regexp-match "'" w)) ; no apostrophes
|
||||
(regexp-match #rx"^[A-Za-z]+$" w) ; no accented letters
|
||||
(not (member w omit-words)) ; no bad words
|
||||
))
|
||||
w))
|
||||
ws)
|
||||
|
||||
(define usable-words (make-wordlist))
|
Loading…
Reference in New Issue