diff --git a/collects/parser-tools/lex.ss b/collects/parser-tools/lex.ss index feb969f..6f430fe 100644 --- a/collects/parser-tools/lex.ss +++ b/collects/parser-tools/lex.ss @@ -115,8 +115,7 @@ #f "Form should be (define-lex-abbrevs (name re) ...)" stx)))) - - +#; (define (lexer-body start-state trans-table actions no-lookahead special-action special-error-action has-special-comment-action? special-comment-action eof-action wrap?) @@ -197,6 +196,106 @@ length longest-match-length))))))))))))) lexer)) + + (define (get-next-state-helper char min max table) + (if (>= min max) + #f + (let* ((try (quotient (+ min max) 2)) + (el (vector-ref table try)) + (range (car el)) + (r1 (car range)) + (r2 (cdr range))) + (cond + ((and (>= char r1) (<= char r2)) (cdr el)) + ((< char r1) (get-next-state-helper char min try table)) + (else (get-next-state-helper char (add1 try) max table)))))) + + + + + (define (get-next-state char table) + (if table + (get-next-state-helper char 0 (vector-length table) table) + #f)) + + (define (lexer-body start-state trans-table actions no-lookahead + special-action special-error-action + has-special-comment-action? special-comment-action eof-action wrap?) + (letrec ((lexer + (lambda (ip) + (unless (input-port? ip) + (raise-type-error + 'lexer + "input-port" + 0 + ip)) + (let ((first-pos (get-position ip)) + (first-char (peek-char-or-special ip 0))) + (cond + ((eq? 'special first-char) + (let* ((comment? #f) + (error? #f) + (spec (with-handlers ((exn:special-comment? + (lambda (x) (set! comment? #t))) + (not-break-exn? + (lambda (ex) (set! error? #t) ex))) + (read-char-or-special ip)))) + (cond + ((and comment? (not has-special-comment-action?)) + (lexer ip)) + (else + (do-match ip first-pos (cond + (comment? special-comment-action) + (error? special-error-action) + (else special-action)) + spec wrap?))))) + ((eof-object? first-char) + (do-match ip first-pos eof-action (read-char-or-special ip) wrap?)) + (else + (let lexer-loop ( + ;; current-state + (state start-state) + ;; the character to transition on + (char first-char) + ;; action for the longest match seen thus far + ;; including a match at the current state + (longest-match-action + (vector-ref actions start-state)) + ;; how many characters have been read + ;; including the one just read + (length 1) + ;; how many characters are in the longest match + (longest-match-length 1)) + (let ((next-state + (cond + ((eof-object? char) #f) + ((eq? char 'special) #f) + (else (get-next-state (char->integer char) + (vector-ref trans-table state)))))) + (cond + ((not next-state) + (check-match ip first-pos longest-match-length + length longest-match-action wrap?)) + ((vector-ref no-lookahead next-state) + (let ((act (vector-ref actions next-state))) + (check-match ip + first-pos + (if act length longest-match-length) + length + (if act act longest-match-action) + wrap?))) + (else + (let ((act (vector-ref actions next-state))) + (lexer-loop next-state + (peek-char-or-special ip length) + (if act + act + longest-match-action) + (add1 length) + (if act + length + longest-match-length))))))))))))) + lexer)) (define id (lambda (x) x)) diff --git a/collects/parser-tools/private-lex/front.ss b/collects/parser-tools/private-lex/front.ss index 260c803..aa1abb4 100644 --- a/collects/parser-tools/private-lex/front.ss +++ b/collects/parser-tools/private-lex/front.ss @@ -1,5 +1,6 @@ (module front mzscheme (require (prefix is: (lib "integer-set.ss")) + (lib "list.ss") "util.ss" "stx.ss" "re.ss" @@ -18,10 +19,31 @@ ;; - (vector-of (union #f nat)) ;; - (vector-of (vector-of (cons (cons nat nat) nat))) + (define loc:integer-set-contents is:integer-set-contents) + ;; dfa->1d-table : dfa -> (same as build-lexer) -; (define (dfa->1d-table dfa) -; (let ( + (define (dfa->1d-table dfa) + (let ((state-table (make-vector (dfa-num-states dfa) #f))) + (for-each (lambda (trans) + (let* ((from-state (car trans)) + (all-chars/to (cdr trans)) + (flat-all-chars/to + (mergesort + (apply append + (map (lambda (chars/to) + (let ((char-ranges (loc:integer-set-contents (car chars/to))) + (to (cdr chars/to))) + (map (lambda (char-range) + (cons char-range to)) + char-ranges))) + all-chars/to)) + (lambda (a b) + (< (caar a) (caar b)))))) + (vector-set! state-table from-state (list->vector flat-all-chars/to)))) + (dfa-transitions dfa)) + state-table)) + (define loc:foldr is:foldr) ;; dfa->2d-table : dfa -> (same as build-lexer) @@ -76,6 +98,11 @@ (d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) (list (cons 1 (list (cons (is:make-range 49 50) 1) (cons (is:make-range 51) 2))) + (cons 2 (list (cons (is:make-range 49) 3)))))) + (d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) + (list (cons 1 (list (cons (is:make-range 100 200) 0) + (cons (is:make-range 49 50) 1) + (cons (is:make-range 51) 2))) (cons 2 (list (cons (is:make-range 49) 3))))))) ((dfa->2d-table d1) (make-vector 256 #f)) ((dfa->2d-table d2) (let ((v (make-vector 1024 #f))) @@ -84,6 +111,15 @@ (vector-set! v 307 2) (vector-set! v 561 3) v)) + ((dfa->1d-table d1) (make-vector 1 #f)) + ((dfa->1d-table d2) #(#f + #(((49 . 50) . 1) ((51 . 51) . 2)) + #(((49 . 49) . 3)) + #f)) + ((dfa->1d-table d3) #(#f + #(((49 . 50) . 1) ((51 . 51) . 2) ((100 . 200) . 0)) + #(((49 . 49) . 3)) + #f)) ((dfa->actions d1) (vector #f)) ((dfa->actions d2) (vector #f #f 2 3)) ((dfa->no-look d1) (vector #t)) @@ -107,5 +143,5 @@ (dfa (time (build-dfa re-acts cache)))) ;(print-dfa dfa) ;(printf "states: ~a~n" (dfa-num-states dfa)) - (values (time (dfa->2d-table dfa)) (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)))) + (values (time (dfa->1d-table dfa)) (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)))) )