initial
parent
42145f256d
commit
ed368e92d1
@ -0,0 +1,6 @@
|
|||||||
|
#lang info
|
||||||
|
(define collection 'multi)
|
||||||
|
|
||||||
|
(define version "0.0")
|
||||||
|
(define deps '())
|
||||||
|
(define build-deps '())
|
@ -0,0 +1,47 @@
|
|||||||
|
#lang debug racket/base
|
||||||
|
(require racket/file
|
||||||
|
racket/fasl
|
||||||
|
"words.rkt")
|
||||||
|
(provide (all-defined-out))
|
||||||
|
|
||||||
|
(define (char->bitindex c)
|
||||||
|
;; 64-bit layout
|
||||||
|
;; __________ZYXWVUTSRQPONMLKJIHGFEDCBA______zyxwvutsrqponmlkjihgfedcba
|
||||||
|
(cond
|
||||||
|
[(char<=? #\a c #\z) (- (char->integer c) 97)] ; 97 = (char->integer #\a)
|
||||||
|
[(char<=? #\A c #\Z) (- (char->integer c) 33)] ; 65 = (char->integer #\A)
|
||||||
|
[else 0]))
|
||||||
|
|
||||||
|
(define (word->charidx word)
|
||||||
|
(apply bitwise-ior
|
||||||
|
(for/list ([c (in-string word)])
|
||||||
|
(expt 2 (char->bitindex c)))))
|
||||||
|
|
||||||
|
(define (bitindex->char i)
|
||||||
|
(cond
|
||||||
|
[(<= 0 i 26) (integer->char (+ i 97))]
|
||||||
|
[(<= 32 i 59) (integer->char (+ i 33))]
|
||||||
|
[else (error 'bong)]))
|
||||||
|
|
||||||
|
(define (charidx->chars int)
|
||||||
|
(for/list ([i (in-range 64)]
|
||||||
|
#:when (bitwise-bit-set? int i))
|
||||||
|
(bitindex->char i)))
|
||||||
|
|
||||||
|
(define charidx-file "data/charidx.rktd")
|
||||||
|
|
||||||
|
(define (regenerate-char-index!)
|
||||||
|
(s-exp->fasl (map word->charidx usable-words) (open-output-file charidx-file #:exists 'replace)))
|
||||||
|
|
||||||
|
(define charidx (let ()
|
||||||
|
(unless (file-exists? charidx-file)
|
||||||
|
(regenerate-char-index!))
|
||||||
|
(fasl->s-exp (open-input-file charidx-file))))
|
||||||
|
|
||||||
|
(define (contains-char? charidx-entry c)
|
||||||
|
(bitwise-bit-set? charidx-entry (char->bitindex c)))
|
||||||
|
|
||||||
|
(module+ test
|
||||||
|
(require rackunit)
|
||||||
|
(check-equal? (length (filter (λ (ce) (contains-char? ce #\z)) charidx)) 7830)
|
||||||
|
(check-equal? (charidx->chars (word->charidx "abuzz")) '(#\a #\b #\u #\z)))
|
Binary file not shown.
@ -0,0 +1,11 @@
|
|||||||
|
kcuf
|
||||||
|
tnuc
|
||||||
|
tihs
|
||||||
|
yssup
|
||||||
|
elohssa
|
||||||
|
kcoc
|
||||||
|
nmad
|
||||||
|
traf
|
||||||
|
gaf
|
||||||
|
tihs
|
||||||
|
reggin
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,62 @@
|
|||||||
|
#lang debug racket/base
|
||||||
|
(require racket/list
|
||||||
|
racket/file
|
||||||
|
"words.rkt"
|
||||||
|
"char-index.rkt")
|
||||||
|
|
||||||
|
(define (wordlist #:letters [letters "etaoinshrdluw"]
|
||||||
|
#:mandatory [mandatory #f]
|
||||||
|
#:min [min-length 3]
|
||||||
|
#:max [max-length 10]
|
||||||
|
#:hide-plurals [hide-plurals? #f]
|
||||||
|
#:proper-names [proper-names? #f]
|
||||||
|
#:random [random #t]
|
||||||
|
#:max-words [max-words 10]
|
||||||
|
#:all-caps [all-caps? #f]
|
||||||
|
#:initial-caps [initial-caps? #f]
|
||||||
|
#:suffix [suffix ""])
|
||||||
|
;; do local filtering here (i.e., filters that are true per query)
|
||||||
|
(define taker #f)
|
||||||
|
(let* ([ws (for/list ([w (in-list (if taker (take usable-words taker) usable-words))]
|
||||||
|
[w-charidx (in-list (if taker (take charidx taker) charidx))]
|
||||||
|
[idx (in-naturals)]
|
||||||
|
#:when (and
|
||||||
|
;; between min and max length
|
||||||
|
(<= min-length (string-length w) max-length)
|
||||||
|
;; contains each mandatory, case-insensitive
|
||||||
|
;; slow: string match
|
||||||
|
#;(if mandatory
|
||||||
|
(for/and ([c (in-string mandatory)])
|
||||||
|
(regexp-match (regexp (format "(?i:~a)" c)) w))
|
||||||
|
#t)
|
||||||
|
;; fast: index math
|
||||||
|
(or (not mandatory)
|
||||||
|
(for*/or ([mandatory-cs (in-value (string->list mandatory))]
|
||||||
|
[c (in-list (map char-downcase (charidx->chars w-charidx)))])
|
||||||
|
(memv c mandatory-cs)))
|
||||||
|
;; contains only letters + mandatory, case-insensitive
|
||||||
|
;; slow: string match
|
||||||
|
#;(regexp-match (regexp (format "^(?i:[~a]+)$" (string-append letters (or mandatory "")))) w)
|
||||||
|
;; fast: index match
|
||||||
|
|
||||||
|
(for*/and ([letter-cs (in-value (string->list letters))]
|
||||||
|
[c (in-list (map char-downcase (charidx->chars w-charidx)))])
|
||||||
|
(memv c letter-cs))
|
||||||
|
;; maybe only proper names
|
||||||
|
(regexp-match (if proper-names? #rx"^[A-Z]" #rx"^[a-z]") w)
|
||||||
|
;; maybe hide plurals
|
||||||
|
(if hide-plurals? (not (regexp-match #rx"s$" w)) #t)))
|
||||||
|
w)]
|
||||||
|
[ws ((if random shuffle values) ws)]
|
||||||
|
[ws (if max-words (take ws (min (length ws) max-words)) ws)]
|
||||||
|
[ws (map (cond
|
||||||
|
[all-caps? string-upcase]
|
||||||
|
[initial-caps? string-titlecase]
|
||||||
|
[else values]) ws)]
|
||||||
|
[ws (if (positive? (string-length suffix))
|
||||||
|
(map (λ (w) (string-append w "." suffix)) ws)
|
||||||
|
ws)])
|
||||||
|
ws))
|
||||||
|
|
||||||
|
(module+ test
|
||||||
|
(time (wordlist)))
|
@ -0,0 +1,21 @@
|
|||||||
|
#lang debug racket/base
|
||||||
|
(require racket/file
|
||||||
|
racket/string)
|
||||||
|
(provide usable-words)
|
||||||
|
|
||||||
|
(define reverse-string (compose1 list->string reverse string->list))
|
||||||
|
|
||||||
|
(define omit-words (map reverse-string (file->lines "data/omit.rktd")))
|
||||||
|
|
||||||
|
(define (make-wordlist)
|
||||||
|
;; do global filtering here (i.e., filters that are always true)
|
||||||
|
(define ws
|
||||||
|
(for/list ([w (in-lines (open-input-file "data/words.rktd"))]
|
||||||
|
#:when (and (not (regexp-match "'" w)) ; no apostrophes
|
||||||
|
(regexp-match #rx"^[A-Za-z]+$" w) ; no accented letters
|
||||||
|
(not (member w omit-words)) ; no bad words
|
||||||
|
))
|
||||||
|
w))
|
||||||
|
ws)
|
||||||
|
|
||||||
|
(define usable-words (make-wordlist))
|
Loading…
Reference in New Issue