one index
parent
9e7fa0d5b4
commit
c2f785f7d7
@ -1,57 +0,0 @@
|
||||
#lang debug racket/base
|
||||
(require racket/file
|
||||
racket/fasl
|
||||
racket/vector
|
||||
"words.rkt")
|
||||
(provide (all-defined-out))
|
||||
|
||||
(define (char->bitindex c)
|
||||
;; 64-bit layout
|
||||
;; __________ZYXWVUTSRQPONMLKJIHGFEDCBA______zyxwvutsrqponmlkjihgfedcba
|
||||
(cond
|
||||
[(char<=? #\a c #\z) (- (char->integer c) 97)] ; 97 = (char->integer #\a)
|
||||
[(char<=? #\A c #\Z) (- (char->integer c) 33)] ; 65 = (char->integer #\A)
|
||||
[else 0]))
|
||||
|
||||
(define (word->charidx word)
|
||||
(apply bitwise-ior
|
||||
(for/list ([c (in-string word)])
|
||||
(expt 2 (char->bitindex c)))))
|
||||
|
||||
(define (bitindex->char i)
|
||||
(cond
|
||||
[(<= 0 i 26) (integer->char (+ i 97))]
|
||||
[(<= 32 i 59) (integer->char (+ i 33))]
|
||||
[else (error 'bong)]))
|
||||
|
||||
(define (charidx->chars int)
|
||||
(for/list ([i (in-range 64)]
|
||||
#:when (bitwise-bit-set? int i))
|
||||
(bitindex->char i)))
|
||||
|
||||
(define charidx-file "compiled/charidx.rktd")
|
||||
|
||||
(define (regenerate-char-index!)
|
||||
(s-exp->fasl (vector-map word->charidx usable-words) (open-output-file charidx-file #:exists 'replace)))
|
||||
|
||||
(define charidx (let ()
|
||||
(unless (file-exists? charidx-file)
|
||||
(regenerate-char-index!))
|
||||
(fasl->s-exp (open-input-file charidx-file))))
|
||||
|
||||
(define (contains-char? charidx-entry c)
|
||||
(bitwise-bit-set? charidx-entry (char->bitindex c)))
|
||||
|
||||
(define capitalized-mask
|
||||
(for/sum ([i (in-range 32 59)])
|
||||
(expt 2 i)))
|
||||
|
||||
(define (capitalized? charidx-entry)
|
||||
;; a cap only appears at the beginning of a word,
|
||||
;; so it's sufficient to test whether a cap exists in the idx
|
||||
(positive? (bitwise-and charidx-entry capitalized-mask)))
|
||||
|
||||
(module+ test
|
||||
(require rackunit racket/vector)
|
||||
(check-equal? (vector-length (vector-filter (λ (ce) (contains-char? ce #\z)) charidx)) 7830)
|
||||
(check-equal? (charidx->chars (word->charidx "abuzz")) '(#\a #\b #\u #\z)))
|
@ -0,0 +1,68 @@
|
||||
#lang debug racket/base
|
||||
(require racket/file
|
||||
racket/fasl
|
||||
racket/runtime-path)
|
||||
(provide (all-defined-out))
|
||||
|
||||
(define-runtime-path wordidx-file "compiled/words/words-index.rktd")
|
||||
|
||||
(struct word-rec (word charint length) #:prefab)
|
||||
|
||||
(define (make-word-recs)
|
||||
(define reverse-string (compose1 list->string reverse string->list))
|
||||
(define omit-words (map reverse-string (file->lines "data/omit.rktd")))
|
||||
(for/vector ([w (in-lines (open-input-file "data/words.rktd"))]
|
||||
#:when (and (not (regexp-match "'" w)) ; no apostrophes
|
||||
(regexp-match #rx"^[A-Za-z]+$" w) ; no accented letters
|
||||
(not (member w omit-words)))) ; no bad words
|
||||
(word-rec w
|
||||
(word->charidx w)
|
||||
(string-length w))))
|
||||
|
||||
(define (regenerate-word-index!)
|
||||
(make-parent-directory* wordidx-file)
|
||||
(s-exp->fasl
|
||||
(make-word-recs)
|
||||
(open-output-file wordidx-file #:exists 'replace)))
|
||||
|
||||
(define wordrecs
|
||||
(fasl->s-exp (open-input-file (and
|
||||
(unless (file-exists? wordidx-file)
|
||||
(regenerate-word-index!))
|
||||
wordidx-file))))
|
||||
|
||||
(define (char->bitindex c)
|
||||
;; 64-bit layout
|
||||
;; __________ZYXWVUTSRQPONMLKJIHGFEDCBA______zyxwvutsrqponmlkjihgfedcba
|
||||
(cond
|
||||
[(char<=? #\a c #\z) (- (char->integer c) 97)] ; 97 = (char->integer #\a)
|
||||
[(char<=? #\A c #\Z) (- (char->integer c) 33)] ; 65 = (char->integer #\A)
|
||||
[else 0]))
|
||||
|
||||
(define (word->charidx word)
|
||||
(apply bitwise-ior
|
||||
(for/list ([c (in-string word)])
|
||||
(expt 2 (char->bitindex c)))))
|
||||
|
||||
(define (bitindex->char i)
|
||||
(cond
|
||||
[(<= 0 i 26) (integer->char (+ i 97))]
|
||||
[(<= 32 i 59) (integer->char (+ i 33))]
|
||||
[else (error 'bong)]))
|
||||
|
||||
(define (charidx->chars int)
|
||||
(for/list ([i (in-range 64)]
|
||||
#:when (bitwise-bit-set? int i))
|
||||
(bitindex->char i)))
|
||||
|
||||
(define (contains-char? charidx-entry c)
|
||||
(bitwise-bit-set? charidx-entry (char->bitindex c)))
|
||||
|
||||
(define capitalized-mask
|
||||
(for/sum ([i (in-range 32 59)])
|
||||
(expt 2 i)))
|
||||
|
||||
(define (capitalized? charidx-entry)
|
||||
;; a cap only appears at the beginning of a word,
|
||||
;; so it's sufficient to test whether a cap exists in the idx
|
||||
(positive? (bitwise-and charidx-entry capitalized-mask)))
|
@ -1,17 +0,0 @@
|
||||
#lang debug racket/base
|
||||
(require racket/file
|
||||
racket/fasl
|
||||
racket/vector
|
||||
"words.rkt")
|
||||
(provide (all-defined-out))
|
||||
|
||||
(define lengthidx-file "compiled/lengthidx.rktd")
|
||||
|
||||
(define (regenerate-length-index!)
|
||||
(s-exp->fasl (vector-map string-length usable-words) (open-output-file lengthidx-file #:exists 'replace)))
|
||||
|
||||
(define lengthidx (let ()
|
||||
(unless (file-exists? lengthidx-file)
|
||||
(regenerate-length-index!))
|
||||
(fasl->s-exp (open-input-file lengthidx-file))))
|
||||
|
@ -1,30 +0,0 @@
|
||||
#lang debug racket/base
|
||||
(require racket/file
|
||||
racket/fasl
|
||||
racket/string)
|
||||
(provide usable-words)
|
||||
|
||||
(define reverse-string (compose1 list->string reverse string->list))
|
||||
|
||||
(define omit-words (map reverse-string (file->lines "data/omit.rktd")))
|
||||
|
||||
(define (make-wordlist)
|
||||
;; do global filtering here (i.e., filters that are always true)
|
||||
(define ws
|
||||
(for/list ([w (in-lines (open-input-file "data/words.rktd"))]
|
||||
#:when (and (not (regexp-match "'" w)) ; no apostrophes
|
||||
(regexp-match #rx"^[A-Za-z]+$" w) ; no accented letters
|
||||
(not (member w omit-words)) ; no bad words
|
||||
))
|
||||
w))
|
||||
ws)
|
||||
|
||||
(define wordidx-file "compiled/wordidx.rktd")
|
||||
|
||||
(define (regenerate-word-index!)
|
||||
(s-exp->fasl (make-wordlist) (open-output-file wordidx-file #:exists 'replace)))
|
||||
|
||||
(define usable-words (let ()
|
||||
(unless (file-exists? wordidx-file)
|
||||
(regenerate-word-index!))
|
||||
(list->vector (fasl->s-exp (open-input-file wordidx-file)))))
|
Loading…
Reference in New Issue