diff --git a/collects/parser-tools/lex.ss b/collects/parser-tools/lex.ss index 73a650e..aabe1d5 100644 --- a/collects/parser-tools/lex.ss +++ b/collects/parser-tools/lex.ss @@ -16,7 +16,7 @@ (define file-path (make-parameter #f)) - (define-syntaxes (lexer-exp lexer-src-pos-exp) + #;(define-syntaxes (lexer-exp lexer-src-pos-exp) (let ((build-lexer (lambda (wrap?) (lambda (stx) @@ -60,24 +60,52 @@ "expects regular expression / action pairs" x)))) (syntax->list (syntax (re-act ...)))) - (let ((table (generate-table (syntax (re-act ...)) stx))) - (with-syntax ((start-state-stx (table-start table)) - (trans-table-stx (table-trans table)) - (eof-table-stx (table-eof table)) - (no-lookahead-stx (table-no-lookahead table)) - (actions-stx `(vector ,@(vector->list (table-actions table)))) - (wrap? wrap?)) - (syntax - (lexer-body start-state-stx - trans-table-stx - eof-table-stx - actions-stx - no-lookahead-stx - wrap?))))))))))) + (let* ((re-act-lst (syntax->list (syntax (re-act ...)))) + (spec-act (let loop ((lst re-act-lst)) + (cond + ((null? lst) + #'(void)) + (else + (syntax-case (car lst) () + (((special) act) + (eq? (syntax-e (syntax special)) 'special) + (syntax act)) + (_ (loop (cdr lst)))))))) + (re-act-lst (let loop ((lst re-act-lst)) + (cond + ((null? lst) + null) + (else + (syntax-case (car lst) () + (((special) act) + (eq? (syntax-e (syntax special)) 'special) + (loop (cdr lst))) + (_ (cons (car lst) (loop (cdr lst))))))))) + (spec-act (datum->syntax-object + spec-act + `(lambda (start-pos end-pos lexeme return-without-pos input-port) + ,spec-act) + spec-act))) + (let ((table (generate-table re-act-lst stx))) + (with-syntax ((start-state-stx (table-start table)) + (trans-table-stx (table-trans table)) + (eof-table-stx (table-eof table)) + (no-lookahead-stx (table-no-lookahead table)) + (actions-stx `(vector ,@(vector->list (table-actions table)))) + (spec-act-stx spec-act) + (wrap? wrap?)) + (syntax + (lexer-body start-state-stx + trans-table-stx + eof-table-stx + actions-stx + no-lookahead-stx + spec-act-stx + wrap?)))))))))))) (values (build-lexer #f) (build-lexer #t)))) - + (define-syntax (define-lex-abbrev stx) (syntax-case stx () ((_ name re) @@ -114,7 +142,7 @@ "Form should be (define-lex-abbrevs (name re) ...)" stx)))) - (define (compiled-lexer-body lexer actions wrap?) + #;(define (compiled-lexer-body lexer actions wrap?) (lambda (ip) (unless (input-port? ip) (raise-type-error @@ -127,7 +155,7 @@ (lexer ip peek-string))) (do-match ip first-pos longest-match-length length (vector-ref actions longest-match-action) wrap?))))) - (define (lexer-body start-state trans-table eof-table actions no-lookahead wrap?) + (define (lexer-body start-state trans-table eof-table actions no-lookahead special-action wrap?) (lambda (ip) (unless (input-port? ip) (raise-type-error @@ -135,52 +163,76 @@ "input-port" 0 ip)) - (let ((first-pos (get-position ip))) - (let lexer-loop ( - ;; current-state - (state start-state) - ;; the character to transition on - (char (peek-string 1 0 ip)) - ;; action for the longest match seen thus far - ;; including a match at the current state - (longest-match-action - (vector-ref actions start-state)) - ;; how many characters have been read - ;; including the one just read - (length 1) - ;; how many characters are in the longest match - (longest-match-length 1)) - (let ((next-state - (cond - ((eof-object? char) - (vector-ref eof-table state)) - (else - (vector-ref - trans-table - (+ (char->integer (string-ref char 0)) (* 256 state))))))) - (cond - ((not next-state) - (do-match ip first-pos longest-match-length length longest-match-action wrap?)) - ((vector-ref no-lookahead next-state) - (let ((act (vector-ref actions next-state))) - (do-match ip - first-pos - (if act length longest-match-length) - length - (if act act longest-match-action) - wrap?))) - (else - (let ((act (vector-ref actions next-state))) - (lexer-loop next-state - (peek-string 1 length ip) - (if act - act - longest-match-action) - (add1 length) - (if act - length - longest-match-length)))))))))) - + (let ((first-pos (get-position ip)) + (first-char (peek-char-or-special ip 0))) + (cond + ((eq? 'special first-char) + (let* ((val (read-char-or-special ip)) + (end-pos (get-position ip))) + (cond + (wrap? + (let/ec ret + (list (special-action first-pos + end-pos + val + ret + ip) + first-pos + end-pos))) + (else + (special-action first-pos + end-pos + val + id + ip))))) + (else + (let lexer-loop ( + ;; current-state + (state start-state) + ;; the character to transition on + (char first-char) + ;; action for the longest match seen thus far + ;; including a match at the current state + (longest-match-action + (vector-ref actions start-state)) + ;; how many characters have been read + ;; including the one just read + (length 1) + ;; how many characters are in the longest match + (longest-match-length 1)) + (let ((next-state + (cond + ((eof-object? char) + (vector-ref eof-table state)) + ((eq? char 'special) + #f) + (else + (vector-ref + trans-table + (+ (char->integer char) (* 256 state))))))) + (cond + ((not next-state) + (do-match ip first-pos longest-match-length length longest-match-action wrap?)) + ((vector-ref no-lookahead next-state) + (let ((act (vector-ref actions next-state))) + (do-match ip + first-pos + (if act length longest-match-length) + length + (if act act longest-match-action) + wrap?))) + (else + (let ((act (vector-ref actions next-state))) + (lexer-loop next-state + (peek-char-or-special ip length) + (if act + act + longest-match-action) + (add1 length) + (if act + length + longest-match-length)))))))))))) + (define id (lambda (x) x)) (define (do-match lb first-pos longest-match-length length longest-match-action wrap?)