You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
br-parser-tools/collects/parser-tools/private-lex/front.ss

112 lines
4.3 KiB
Scheme

(module front mzscheme
(require (prefix is: (lib "integer-set.ss"))
"util.ss"
"stx.ss"
"re.ss"
"deriv.ss")
(provide build-lexer)
(define-syntax time-label
(syntax-rules ()
((_ l e ...)
(begin
(printf "~a: " l)
(time (begin e ...))))))
;; A table is either
;; - (vector-of (union #f nat))
;; - (vector-of (vector-of (cons (cons nat nat) nat)))
;; dfa->1d-table : dfa -> (same as build-lexer)
; (define (dfa->1d-table dfa)
; (let (
(define loc:foldr is:foldr)
;; dfa->2d-table : dfa -> (same as build-lexer)
(define (dfa->2d-table dfa)
(let (
;; char-table : (vector-of (union #f nat))
;; The lexer table, one entry per state per char.
;; Each entry specifies a state to transition to.
;; #f indicates no transition
(char-table (make-vector (* 256 (dfa-num-states dfa)) #f)))
;; Fill the char-table vector
(for-each
(lambda (trans)
(let ((from-state (car trans)))
(for-each (lambda (chars/to)
(let ((to-state (cdr chars/to)))
(loc:foldr (lambda (char _)
(vector-set! char-table
(bitwise-ior
char
(arithmetic-shift from-state 8))
to-state))
(void)
(car chars/to))))
(cdr trans))))
(dfa-transitions dfa))
char-table))
;; dfa->actions : dfa -> (vector-of (union #f syntax-object))
;; The action for each final state, #f if the state isn't final
(define (dfa->actions dfa)
(let ((actions (make-vector (dfa-num-states dfa) #f)))
(for-each (lambda (state/action)
(vector-set! actions (car state/action) (cdr state/action)))
(dfa-final-states/actions dfa))
actions))
;; dfa->no-look : dfa -> (vector-of bool)
;; For each state whether the lexer can ignore the next input.
;; It can do this only if there are no transitions out of the
;; current state.
(define (dfa->no-look dfa)
(let ((no-look (make-vector (dfa-num-states dfa) #t)))
(for-each (lambda (trans)
(vector-set! no-look (car trans) #f))
(dfa-transitions dfa))
no-look))
(test-block ((d1 (make-dfa 1 1 (list) (list)))
(d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
(list (cons 1 (list (cons (is:make-range 49 50) 1)
(cons (is:make-range 51) 2)))
(cons 2 (list (cons (is:make-range 49) 3)))))))
((dfa->2d-table d1) (make-vector 256 #f))
((dfa->2d-table d2) (let ((v (make-vector 1024 #f)))
(vector-set! v 305 1)
(vector-set! v 306 1)
(vector-set! v 307 2)
(vector-set! v 561 3)
v))
((dfa->actions d1) (vector #f))
((dfa->actions d2) (vector #f #f 2 3))
((dfa->no-look d1) (vector #t))
((dfa->no-look d2) (vector #t #f #f #t)))
;; build-lexer : syntax-object list -> (values table nat (vector-of (union #f syntax-object)) (vector-of bool))
;; each syntax object has the form (re action)
(define (build-lexer sos)
(let* ((s-re-acts (map (lambda (so)
(cons (parse (car (syntax->list so)))
(cadr (syntax->list so))))
sos))
(cache (make-cache))
(re-acts (time (map (lambda (s-re-act)
(cons (->re (car s-re-act) cache)
(cdr s-re-act)))
s-re-acts)))
(dfa (time (build-dfa re-acts cache))))
;(print-dfa dfa)
;(printf "states: ~a~n" (dfa-num-states dfa))
(values (time (dfa->2d-table dfa)) (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa))))
)