diff --git a/br-parser-tools-lib/br-parser-tools/lex.rkt b/br-parser-tools-lib/br-parser-tools/lex.rkt index 5bd3f87..c26aeef 100644 --- a/br-parser-tools-lib/br-parser-tools/lex.rkt +++ b/br-parser-tools-lib/br-parser-tools/lex.rkt @@ -4,6 +4,7 @@ ;; create and use the buffer that the lexer reads from. See docs. (require (for-syntax racket/list + racket/syntax syntax/stx syntax/define syntax/boundmap @@ -69,80 +70,79 @@ [input-port (make-rename-transformer #'input-port-p)] [lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)]) action-stx))))) - + (define-for-syntax (make-lexer-macro caller src-loc-style) (λ (stx) (syntax-case stx () [(_ . RE+ACTS) - (let () - (define spec/re-acts (syntax->list #'RE+ACTS)) - (for/and ([x (in-list spec/re-acts)]) - (syntax-case x () - [(RE ACT) #t] - [else (raise-syntax-error caller "not a regular expression / action pair" stx x)])) - (define eof-act (get-special-action spec/re-acts #'eof (case src-loc-style - [(lexer-src-pos) #'(return-without-pos eof)] - [(lexer-srcloc) #'(return-without-srcloc eof)] - [else #'eof]))) - (define spec-act (get-special-action spec/re-acts #'special #'(void))) - (define spec-comment-act (get-special-action spec/re-acts #'special-comment #'#f)) - (define ids (list #'special #'special-comment #'eof)) - (define re-acts (filter (λ (spec/re-act) - (syntax-case spec/re-act () - [((special) act) - (not (ormap - (λ (x) - (and (identifier? #'special) - (module-or-top-identifier=? #'special x))) - ids))] - [_ #t])) spec/re-acts)) - (define names (map (λ (x) (datum->syntax #f (gensym))) re-acts)) - (define acts (map (λ (x) (stx-car (stx-cdr x))) re-acts)) - (define re-actnames (map (λ (re-act name) (list (stx-car re-act) name)) re-acts names)) - (when (null? spec/re-acts) - (raise-syntax-error caller "expected at least one action" stx)) - (define-values (trans start action-names no-look disappeared-uses) (build-lexer re-actnames)) - (when (vector-ref action-names start) ;; Start state is final - (unless (and - ;; All the successor states are final - (vector? (vector-ref trans start)) - (andmap (λ (x) (vector-ref action-names (vector-ref x 2))) - (vector->list (vector-ref trans start))) - ;; Each character has a successor state - (let loop ([check 0] - [nexts (vector->list (vector-ref trans start))]) - (cond - [(null? nexts) #f] - [else - (let ([next (car nexts)]) - (and (= (vector-ref next 0) check) - (let ([next-check (vector-ref next 1)]) - (or (>= next-check max-char-num) - (loop (add1 next-check) (cdr nexts))))))]))) - (eprintf "warning: lexer at ~a can accept the empty string\n" stx))) - (with-syntax ([START-STATE-STX start] - [TRANS-TABLE-STX trans] - [NO-LOOKAHEAD-STX no-look] - [(NAME ...) names] - [(ACT ...) (map (λ (a) (wrap-action a src-loc-style)) acts)] - [(ACT-NAME ...) (vector->list action-names)] - [SPEC-ACT-STX (wrap-action spec-act src-loc-style)] - [HAS-COMMENT-ACT?-STX (if (syntax-e spec-comment-act) #t #f)] - [SPEC-COMMENT-ACT-STX (wrap-action spec-comment-act src-loc-style)] - [EOF-ACT-STX (wrap-action eof-act src-loc-style)]) - (syntax-property - (syntax/loc stx (let ([NAME ACT] ...) - (let ([proc (lexer-body START-STATE-STX - TRANS-TABLE-STX - (vector ACT-NAME ...) - NO-LOOKAHEAD-STX - SPEC-ACT-STX - HAS-COMMENT-ACT?-STX - SPEC-COMMENT-ACT-STX - EOF-ACT-STX)]) - ;; reverse eta to get named procedures: - (λ (port) (proc port))))) - 'disappeared-use disappeared-uses)))]))) + (with-disappeared-uses + (let () + (define spec/re-acts (syntax->list #'RE+ACTS)) + (for/and ([x (in-list spec/re-acts)]) + (syntax-case x () + [(RE ACT) #t] + [else (raise-syntax-error caller "not a regular expression / action pair" stx x)])) + (define eof-act (get-special-action spec/re-acts #'eof (case src-loc-style + [(lexer-src-pos) #'(return-without-pos eof)] + [(lexer-srcloc) #'(return-without-srcloc eof)] + [else #'eof]))) + (define spec-act (get-special-action spec/re-acts #'special #'(void))) + (define spec-comment-act (get-special-action spec/re-acts #'special-comment #'#f)) + (define ids (list #'special #'special-comment #'eof)) + (define re-acts (filter (λ (spec/re-act) + (syntax-case spec/re-act () + [((special) act) + (not (ormap + (λ (x) + (and (identifier? #'special) + (module-or-top-identifier=? #'special x))) + ids))] + [_ #t])) spec/re-acts)) + (define names (map (λ (x) (datum->syntax #f (gensym))) re-acts)) + (define acts (map (λ (x) (stx-car (stx-cdr x))) re-acts)) + (define re-actnames (map (λ (re-act name) (list (stx-car re-act) name)) re-acts names)) + (when (null? spec/re-acts) + (raise-syntax-error caller "expected at least one action" stx)) + (define-values (trans start action-names no-look) (build-lexer re-actnames)) + (when (vector-ref action-names start) ;; Start state is final + (unless (and + ;; All the successor states are final + (vector? (vector-ref trans start)) + (andmap (λ (x) (vector-ref action-names (vector-ref x 2))) + (vector->list (vector-ref trans start))) + ;; Each character has a successor state + (let loop ([check 0] + [nexts (vector->list (vector-ref trans start))]) + (cond + [(null? nexts) #f] + [else + (let ([next (car nexts)]) + (and (= (vector-ref next 0) check) + (let ([next-check (vector-ref next 1)]) + (or (>= next-check max-char-num) + (loop (add1 next-check) (cdr nexts))))))]))) + (eprintf "warning: lexer at ~a can accept the empty string\n" stx))) + (with-syntax ([START-STATE-STX start] + [TRANS-TABLE-STX trans] + [NO-LOOKAHEAD-STX no-look] + [(NAME ...) names] + [(ACT ...) (map (λ (a) (wrap-action a src-loc-style)) acts)] + [(ACT-NAME ...) (vector->list action-names)] + [SPEC-ACT-STX (wrap-action spec-act src-loc-style)] + [HAS-COMMENT-ACT?-STX (if (syntax-e spec-comment-act) #t #f)] + [SPEC-COMMENT-ACT-STX (wrap-action spec-comment-act src-loc-style)] + [EOF-ACT-STX (wrap-action eof-act src-loc-style)]) + (syntax/loc stx (let ([NAME ACT] ...) + (let ([proc (lexer-body START-STATE-STX + TRANS-TABLE-STX + (vector ACT-NAME ...) + NO-LOOKAHEAD-STX + SPEC-ACT-STX + HAS-COMMENT-ACT?-STX + SPEC-COMMENT-ACT-STX + EOF-ACT-STX)]) + ;; reverse eta to get named procedures: + (λ (port) (proc port))))))))]))) (define-syntax lexer (make-lexer-macro 'lexer #f)) (define-syntax lexer-src-pos (make-lexer-macro 'lexer-src-pos 'lexer-src-pos)) diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt index 603b32b..c34eba2 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt @@ -125,9 +125,8 @@ ;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object)) ;; each syntax object has the form (re action) (define (build-lexer sos) - (define disappeared-uses (box null)) (define s-re-acts (for/list ([so (in-list sos)]) - (cons (parse (stx-car so) disappeared-uses) + (cons (parse (stx-car so)) (stx-car (stx-cdr so))))) (define cache (make-cache)) (define re-acts (for/list ([s-re-act (in-list s-re-acts)]) @@ -156,6 +155,5 @@ num-states (/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries (* 5 num-different-entries))) 1024))) - (values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa) - (unbox disappeared-uses))) + (values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa))) diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt index 326ecb3..6b5ab9a 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt @@ -1,5 +1,5 @@ #lang racket/base -(require "util.rkt" syntax/id-table) +(require "util.rkt" syntax/id-table racket/syntax) (provide parse) (define (bad-args stx num) @@ -32,24 +32,21 @@ ;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt) ;; checks for errors and generates the plain s-exp form for s ;; Expands lex-abbrevs and applies lex-trans. -(define (parse stx disappeared-uses) +(define (parse stx) (let loop ([stx stx] - [disappeared-uses disappeared-uses] ;; seen-lex-abbrevs: id-table [seen-lex-abbrevs (make-immutable-free-id-table)]) (let ([recur (λ (s) (loop (syntax-rearm s stx) - disappeared-uses seen-lex-abbrevs))] [recur/abbrev (λ (s id) (loop (syntax-rearm s stx) - disappeared-uses (free-id-table-set seen-lex-abbrevs id id)))]) (syntax-case (disarm stx) (repetition union intersection complement concatenation char-range char-complement) [_ (identifier? stx) - (let ([expansion (syntax-local-value stx (λ () #f))]) + (let ([expansion (syntax-local-value/record stx (λ (v) #t))]) (unless (lex-abbrev? expansion) (raise-syntax-error 'regular-expression "undefined abbreviation" @@ -61,7 +58,6 @@ stx #f (list (free-id-table-ref seen-lex-abbrevs stx)))) - (set-box! disappeared-uses (cons stx (unbox disappeared-uses))) (recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))] [_ (or (char? (syntax-e stx)) (string? (syntax-e stx))) @@ -111,8 +107,7 @@ `(char-complement ,parsed))] ((OP form ...) (identifier? #'OP) - (let* ([expansion (syntax-local-value #'OP (λ () #f))]) - (set-box! disappeared-uses (cons #'OP (unbox disappeared-uses))) + (let* ([expansion (syntax-local-value/record #'OP (λ (v) #t))]) (cond [(lex-trans? expansion) (recur ((lex-trans-f expansion) (disarm stx)))] @@ -164,27 +159,25 @@ ;; and by "now", I mean it's been broken since before we ;; moved to git. (module+ test - (check-equal? (parse #'#\a null) #\a) - (check-equal? (parse #'"1" null) "1") - (check-equal? (parse #'(repetition 1 1 #\1) null) + (check-equal? (parse #'#\a) #\a) + (check-equal? (parse #'"1") "1") + (check-equal? (parse #'(repetition 1 1 #\1)) '(repetition 1 1 #\1)) - (check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1)) - (check-equal? (parse #'(union #\1 (union "2") (union)) null) + (check-equal? (parse #'(repetition 0 +inf.0 #\1)) '(repetition 0 +inf.0 #\1)) + (check-equal? (parse #'(union #\1 (union "2") (union))) '(union #\1 (union "2") (union))) - (check-equal? (parse #'(intersection #\1 (intersection "2") (intersection)) - null) + (check-equal? (parse #'(intersection #\1 (intersection "2") (intersection))) '(intersection #\1 (intersection "2") (intersection))) - (check-equal? (parse #'(complement (union #\1 #\2)) - null) + (check-equal? (parse #'(complement (union #\1 #\2))) '(complement (union #\1 #\2))) - (check-equal? (parse #'(concatenation "1" "2" (concatenation)) null) + (check-equal? (parse #'(concatenation "1" "2" (concatenation))) '(concatenation "1" "2" (concatenation))) - (check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1)) - (check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1)) - (check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3)) - (check-equal? (parse #'(char-complement (union "1" "2")) null) + (check-equal? (parse #'(char-range "1" #\1)) '(char-range #\1 #\1)) + (check-equal? (parse #'(char-range #\1 "1")) '(char-range #\1 #\1)) + (check-equal? (parse #'(char-range "1" "3")) '(char-range #\1 #\3)) + (check-equal? (parse #'(char-complement (union "1" "2"))) '(char-complement (union "1" "2"))) - (check-equal? (parse #'(char-complement (repetition 1 1 "1")) null) + (check-equal? (parse #'(char-complement (repetition 1 1 "1"))) '(char-complement (repetition 1 1 "1"))) (check-exn #rx"not a character set" - (λ () (parse #'(char-complement (repetition 6 6 "1")) null)))) + (λ () (parse #'(char-complement (repetition 6 6 "1"))))))