|
|
|
(module front mzscheme
|
|
|
|
(require (prefix is: mzlib/integer-set)
|
|
|
|
mzlib/list
|
|
|
|
syntax/stx
|
|
|
|
"util.rkt"
|
|
|
|
"stx.rkt"
|
|
|
|
"re.rkt"
|
|
|
|
"deriv.rkt")
|
|
|
|
|
|
|
|
(provide build-lexer)
|
|
|
|
|
|
|
|
(define-syntax time-label
|
|
|
|
(syntax-rules ()
|
|
|
|
((_ l e ...)
|
|
|
|
(begin
|
|
|
|
(printf "~a: " l)
|
|
|
|
(time (begin e ...))))))
|
|
|
|
|
|
|
|
;; A table is either
|
|
|
|
;; - (vector-of (union #f nat))
|
|
|
|
;; - (vector-of (vector-of (vector nat nat nat)))
|
|
|
|
|
|
|
|
(define loc:integer-set-contents is:integer-set-contents)
|
|
|
|
|
|
|
|
;; dfa->1d-table : dfa -> (same as build-lexer)
|
|
|
|
(define (dfa->1d-table dfa)
|
|
|
|
(let ((state-table (make-vector (dfa-num-states dfa) #f))
|
|
|
|
(transition-cache (make-hash-table 'equal)))
|
|
|
|
(for-each
|
|
|
|
(lambda (trans)
|
|
|
|
(let* ((from-state (car trans))
|
|
|
|
(all-chars/to (cdr trans))
|
|
|
|
(flat-all-chars/to
|
|
|
|
(sort
|
|
|
|
(apply append
|
|
|
|
(map (lambda (chars/to)
|
|
|
|
(let ((char-ranges (loc:integer-set-contents (car chars/to)))
|
|
|
|
(to (cdr chars/to)))
|
|
|
|
(map (lambda (char-range)
|
|
|
|
(let ((entry (vector (car char-range) (cdr char-range) to)))
|
|
|
|
(hash-table-get transition-cache entry
|
|
|
|
(lambda ()
|
|
|
|
(hash-table-put! transition-cache
|
|
|
|
entry
|
|
|
|
entry)
|
|
|
|
entry))))
|
|
|
|
char-ranges)))
|
|
|
|
all-chars/to))
|
|
|
|
(lambda (a b)
|
|
|
|
(< (vector-ref a 0) (vector-ref b 0))))))
|
|
|
|
(vector-set! state-table from-state (list->vector flat-all-chars/to))))
|
|
|
|
(dfa-transitions dfa))
|
|
|
|
state-table))
|
|
|
|
|
|
|
|
|
|
|
|
(define loc:foldr is:foldr)
|
|
|
|
|
|
|
|
;; dfa->2d-table : dfa -> (same as build-lexer)
|
|
|
|
(define (dfa->2d-table dfa)
|
|
|
|
(let (
|
|
|
|
;; char-table : (vector-of (union #f nat))
|
|
|
|
;; The lexer table, one entry per state per char.
|
|
|
|
;; Each entry specifies a state to transition to.
|
|
|
|
;; #f indicates no transition
|
|
|
|
(char-table (make-vector (* 256 (dfa-num-states dfa)) #f)))
|
|
|
|
|
|
|
|
;; Fill the char-table vector
|
|
|
|
(for-each
|
|
|
|
(lambda (trans)
|
|
|
|
(let ((from-state (car trans)))
|
|
|
|
(for-each (lambda (chars/to)
|
|
|
|
(let ((to-state (cdr chars/to)))
|
|
|
|
(loc:foldr (lambda (char _)
|
|
|
|
(vector-set! char-table
|
|
|
|
(bitwise-ior
|
|
|
|
char
|
|
|
|
(arithmetic-shift from-state 8))
|
|
|
|
to-state))
|
|
|
|
(void)
|
|
|
|
(car chars/to))))
|
|
|
|
(cdr trans))))
|
|
|
|
(dfa-transitions dfa))
|
|
|
|
char-table))
|
|
|
|
|
|
|
|
|
|
|
|
;; dfa->actions : dfa -> (vector-of (union #f syntax-object))
|
|
|
|
;; The action for each final state, #f if the state isn't final
|
|
|
|
(define (dfa->actions dfa)
|
|
|
|
(let ((actions (make-vector (dfa-num-states dfa) #f)))
|
|
|
|
(for-each (lambda (state/action)
|
|
|
|
(vector-set! actions (car state/action) (cdr state/action)))
|
|
|
|
(dfa-final-states/actions dfa))
|
|
|
|
actions))
|
|
|
|
|
|
|
|
;; dfa->no-look : dfa -> (vector-of bool)
|
|
|
|
;; For each state whether the lexer can ignore the next input.
|
|
|
|
;; It can do this only if there are no transitions out of the
|
|
|
|
;; current state.
|
|
|
|
(define (dfa->no-look dfa)
|
|
|
|
(let ((no-look (make-vector (dfa-num-states dfa) #t)))
|
|
|
|
(for-each (lambda (trans)
|
|
|
|
(vector-set! no-look (car trans) #f))
|
|
|
|
(dfa-transitions dfa))
|
|
|
|
no-look))
|
|
|
|
|
|
|
|
(test-block ((d1 (make-dfa 1 1 (list) (list)))
|
|
|
|
(d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
|
|
|
|
(list (cons 1 (list (cons (is:make-range 49 50) 1)
|
|
|
|
(cons (is:make-range 51) 2)))
|
|
|
|
(cons 2 (list (cons (is:make-range 49) 3))))))
|
|
|
|
(d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
|
|
|
|
(list (cons 1 (list (cons (is:make-range 100 200) 0)
|
|
|
|
(cons (is:make-range 49 50) 1)
|
|
|
|
(cons (is:make-range 51) 2)))
|
|
|
|
(cons 2 (list (cons (is:make-range 49) 3)))))))
|
|
|
|
((dfa->2d-table d1) (make-vector 256 #f))
|
|
|
|
((dfa->2d-table d2) (let ((v (make-vector 1024 #f)))
|
|
|
|
(vector-set! v 305 1)
|
|
|
|
(vector-set! v 306 1)
|
|
|
|
(vector-set! v 307 2)
|
|
|
|
(vector-set! v 561 3)
|
|
|
|
v))
|
|
|
|
((dfa->1d-table d1) (make-vector 1 #f))
|
|
|
|
((dfa->1d-table d2) #(#f
|
|
|
|
#(#(49 50 1) #(51 51 2))
|
|
|
|
#(#(49 49 3))
|
|
|
|
#f))
|
|
|
|
((dfa->1d-table d3) #(#f
|
|
|
|
#(#(49 50 1) #(51 51 2) #(100 200 0))
|
|
|
|
#(#(49 49 3))
|
|
|
|
#f))
|
|
|
|
((dfa->actions d1) (vector #f))
|
|
|
|
((dfa->actions d2) (vector #f #f 2 3))
|
|
|
|
((dfa->no-look d1) (vector #t))
|
|
|
|
((dfa->no-look d2) (vector #t #f #f #t)))
|
|
|
|
|
|
|
|
;; build-lexer : syntax-object list ->
|
|
|
|
;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object))
|
|
|
|
;; each syntax object has the form (re action)
|
|
|
|
(define (build-lexer sos)
|
|
|
|
(let* ((disappeared-uses (box null))
|
|
|
|
(s-re-acts (map (lambda (so)
|
|
|
|
(cons (parse (stx-car so) disappeared-uses)
|
|
|
|
(stx-car (stx-cdr so))))
|
|
|
|
sos))
|
|
|
|
|
|
|
|
(cache (make-cache))
|
|
|
|
|
|
|
|
(re-acts (map (lambda (s-re-act)
|
|
|
|
(cons (->re (car s-re-act) cache)
|
|
|
|
(cdr s-re-act)))
|
|
|
|
s-re-acts))
|
|
|
|
|
|
|
|
(dfa (build-dfa re-acts cache))
|
|
|
|
(table (dfa->1d-table dfa)))
|
|
|
|
;(print-dfa dfa)
|
|
|
|
#;(let ((num-states (vector-length table))
|
|
|
|
(num-vectors (length (filter values (vector->list table))))
|
|
|
|
(num-entries (apply + (map
|
|
|
|
(lambda (x) (if x (vector-length x) 0))
|
|
|
|
(vector->list table))))
|
|
|
|
(num-different-entries
|
|
|
|
(let ((ht (make-hash-table)))
|
|
|
|
(for-each
|
|
|
|
(lambda (x)
|
|
|
|
(when x
|
|
|
|
(for-each
|
|
|
|
(lambda (y)
|
|
|
|
(hash-table-put! ht y #t))
|
|
|
|
(vector->list x))))
|
|
|
|
(vector->list table))
|
|
|
|
(length (hash-table-map ht cons)))))
|
|
|
|
(printf "~a states, ~aKB\n"
|
|
|
|
num-states
|
|
|
|
(/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries
|
|
|
|
(* 5 num-different-entries))) 1024)))
|
|
|
|
(values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)
|
|
|
|
(unbox disappeared-uses))))
|
|
|
|
)
|