From a1d0b422c8b5e443f20f58c7cffd091a352bcdbf Mon Sep 17 00:00:00 2001 From: "Jesse A. Tov" Date: Sun, 28 Jul 2019 20:17:35 -0500 Subject: [PATCH 1/3] Fixes Check Syntax arrows for lexer generator. There was a bug in disappeared-use tracking--a missing syntax-local-introduce. This change uses `with-disappeared-uses` and `syntax-local-value/record`, instead of the (broken) manual tracking that was present before. I believe that non-BR parser-tools also has this bug, but I haven't ported the fix yet. --- br-parser-tools-lib/br-parser-tools/lex.rkt | 39 +++++++++++-------- .../br-parser-tools/private-lex/front.rkt | 6 +-- .../br-parser-tools/private-lex/stx.rkt | 13 ++----- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/br-parser-tools-lib/br-parser-tools/lex.rkt b/br-parser-tools-lib/br-parser-tools/lex.rkt index 5bd3f87..5441f12 100644 --- a/br-parser-tools-lib/br-parser-tools/lex.rkt +++ b/br-parser-tools-lib/br-parser-tools/lex.rkt @@ -4,6 +4,7 @@ ;; create and use the buffer that the lexer reads from. See docs. (require (for-syntax racket/list + racket/syntax syntax/stx syntax/define syntax/boundmap @@ -69,12 +70,20 @@ [input-port (make-rename-transformer #'input-port-p)] [lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)]) action-stx))))) - + +(begin-for-syntax + ; This macro only exists to keep the indentation below the same, + ; in order to make the diff easier to read. So it probably makes + ; sense to inline it after reading. + (define-syntax-rule (begin-with-disappeared-uses body0 body ...) + (with-disappeared-uses + (let () body0 body ...)))) + (define-for-syntax (make-lexer-macro caller src-loc-style) (λ (stx) (syntax-case stx () [(_ . RE+ACTS) - (let () + (begin-with-disappeared-uses (define spec/re-acts (syntax->list #'RE+ACTS)) (for/and ([x (in-list spec/re-acts)]) (syntax-case x () @@ -101,7 +110,7 @@ (define re-actnames (map (λ (re-act name) (list (stx-car re-act) name)) re-acts names)) (when (null? spec/re-acts) (raise-syntax-error caller "expected at least one action" stx)) - (define-values (trans start action-names no-look disappeared-uses) (build-lexer re-actnames)) + (define-values (trans start action-names no-look) (build-lexer re-actnames)) (when (vector-ref action-names start) ;; Start state is final (unless (and ;; All the successor states are final @@ -130,19 +139,17 @@ [HAS-COMMENT-ACT?-STX (if (syntax-e spec-comment-act) #t #f)] [SPEC-COMMENT-ACT-STX (wrap-action spec-comment-act src-loc-style)] [EOF-ACT-STX (wrap-action eof-act src-loc-style)]) - (syntax-property - (syntax/loc stx (let ([NAME ACT] ...) - (let ([proc (lexer-body START-STATE-STX - TRANS-TABLE-STX - (vector ACT-NAME ...) - NO-LOOKAHEAD-STX - SPEC-ACT-STX - HAS-COMMENT-ACT?-STX - SPEC-COMMENT-ACT-STX - EOF-ACT-STX)]) - ;; reverse eta to get named procedures: - (λ (port) (proc port))))) - 'disappeared-use disappeared-uses)))]))) + (syntax/loc stx (let ([NAME ACT] ...) + (let ([proc (lexer-body START-STATE-STX + TRANS-TABLE-STX + (vector ACT-NAME ...) + NO-LOOKAHEAD-STX + SPEC-ACT-STX + HAS-COMMENT-ACT?-STX + SPEC-COMMENT-ACT-STX + EOF-ACT-STX)]) + ;; reverse eta to get named procedures: + (λ (port) (proc port)))))))]))) (define-syntax lexer (make-lexer-macro 'lexer #f)) (define-syntax lexer-src-pos (make-lexer-macro 'lexer-src-pos 'lexer-src-pos)) diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt index 603b32b..c34eba2 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt @@ -125,9 +125,8 @@ ;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object)) ;; each syntax object has the form (re action) (define (build-lexer sos) - (define disappeared-uses (box null)) (define s-re-acts (for/list ([so (in-list sos)]) - (cons (parse (stx-car so) disappeared-uses) + (cons (parse (stx-car so)) (stx-car (stx-cdr so))))) (define cache (make-cache)) (define re-acts (for/list ([s-re-act (in-list s-re-acts)]) @@ -156,6 +155,5 @@ num-states (/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries (* 5 num-different-entries))) 1024))) - (values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa) - (unbox disappeared-uses))) + (values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa))) diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt index 326ecb3..07e80c3 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt @@ -1,5 +1,5 @@ #lang racket/base -(require "util.rkt" syntax/id-table) +(require "util.rkt" syntax/id-table racket/syntax) (provide parse) (define (bad-args stx num) @@ -32,24 +32,21 @@ ;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt) ;; checks for errors and generates the plain s-exp form for s ;; Expands lex-abbrevs and applies lex-trans. -(define (parse stx disappeared-uses) +(define (parse stx) (let loop ([stx stx] - [disappeared-uses disappeared-uses] ;; seen-lex-abbrevs: id-table [seen-lex-abbrevs (make-immutable-free-id-table)]) (let ([recur (λ (s) (loop (syntax-rearm s stx) - disappeared-uses seen-lex-abbrevs))] [recur/abbrev (λ (s id) (loop (syntax-rearm s stx) - disappeared-uses (free-id-table-set seen-lex-abbrevs id id)))]) (syntax-case (disarm stx) (repetition union intersection complement concatenation char-range char-complement) [_ (identifier? stx) - (let ([expansion (syntax-local-value stx (λ () #f))]) + (let ([expansion (syntax-local-value/record stx (λ (v) #t))]) (unless (lex-abbrev? expansion) (raise-syntax-error 'regular-expression "undefined abbreviation" @@ -61,7 +58,6 @@ stx #f (list (free-id-table-ref seen-lex-abbrevs stx)))) - (set-box! disappeared-uses (cons stx (unbox disappeared-uses))) (recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))] [_ (or (char? (syntax-e stx)) (string? (syntax-e stx))) @@ -111,8 +107,7 @@ `(char-complement ,parsed))] ((OP form ...) (identifier? #'OP) - (let* ([expansion (syntax-local-value #'OP (λ () #f))]) - (set-box! disappeared-uses (cons #'OP (unbox disappeared-uses))) + (let* ([expansion (syntax-local-value/record #'OP (λ (v) #t))]) (cond [(lex-trans? expansion) (recur ((lex-trans-f expansion) (disarm stx)))] -- 2.25.1 From 0e0e758851bffae2806fbc0903a219ee3a67b8cb Mon Sep 17 00:00:00 2001 From: "Jesse A. Tov" Date: Sun, 28 Jul 2019 20:20:57 -0500 Subject: [PATCH 2/3] Inlines `begin-with-disappeared-uses`, which means the big indentation change is in this commit instead of the previous one. --- br-parser-tools-lib/br-parser-tools/lex.rkt | 143 ++++++++++---------- 1 file changed, 68 insertions(+), 75 deletions(-) diff --git a/br-parser-tools-lib/br-parser-tools/lex.rkt b/br-parser-tools-lib/br-parser-tools/lex.rkt index 5441f12..c26aeef 100644 --- a/br-parser-tools-lib/br-parser-tools/lex.rkt +++ b/br-parser-tools-lib/br-parser-tools/lex.rkt @@ -71,85 +71,78 @@ [lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)]) action-stx))))) -(begin-for-syntax - ; This macro only exists to keep the indentation below the same, - ; in order to make the diff easier to read. So it probably makes - ; sense to inline it after reading. - (define-syntax-rule (begin-with-disappeared-uses body0 body ...) - (with-disappeared-uses - (let () body0 body ...)))) - (define-for-syntax (make-lexer-macro caller src-loc-style) (λ (stx) (syntax-case stx () [(_ . RE+ACTS) - (begin-with-disappeared-uses - (define spec/re-acts (syntax->list #'RE+ACTS)) - (for/and ([x (in-list spec/re-acts)]) - (syntax-case x () - [(RE ACT) #t] - [else (raise-syntax-error caller "not a regular expression / action pair" stx x)])) - (define eof-act (get-special-action spec/re-acts #'eof (case src-loc-style - [(lexer-src-pos) #'(return-without-pos eof)] - [(lexer-srcloc) #'(return-without-srcloc eof)] - [else #'eof]))) - (define spec-act (get-special-action spec/re-acts #'special #'(void))) - (define spec-comment-act (get-special-action spec/re-acts #'special-comment #'#f)) - (define ids (list #'special #'special-comment #'eof)) - (define re-acts (filter (λ (spec/re-act) - (syntax-case spec/re-act () - [((special) act) - (not (ormap - (λ (x) - (and (identifier? #'special) - (module-or-top-identifier=? #'special x))) - ids))] - [_ #t])) spec/re-acts)) - (define names (map (λ (x) (datum->syntax #f (gensym))) re-acts)) - (define acts (map (λ (x) (stx-car (stx-cdr x))) re-acts)) - (define re-actnames (map (λ (re-act name) (list (stx-car re-act) name)) re-acts names)) - (when (null? spec/re-acts) - (raise-syntax-error caller "expected at least one action" stx)) - (define-values (trans start action-names no-look) (build-lexer re-actnames)) - (when (vector-ref action-names start) ;; Start state is final - (unless (and - ;; All the successor states are final - (vector? (vector-ref trans start)) - (andmap (λ (x) (vector-ref action-names (vector-ref x 2))) - (vector->list (vector-ref trans start))) - ;; Each character has a successor state - (let loop ([check 0] - [nexts (vector->list (vector-ref trans start))]) - (cond - [(null? nexts) #f] - [else - (let ([next (car nexts)]) - (and (= (vector-ref next 0) check) - (let ([next-check (vector-ref next 1)]) - (or (>= next-check max-char-num) - (loop (add1 next-check) (cdr nexts))))))]))) - (eprintf "warning: lexer at ~a can accept the empty string\n" stx))) - (with-syntax ([START-STATE-STX start] - [TRANS-TABLE-STX trans] - [NO-LOOKAHEAD-STX no-look] - [(NAME ...) names] - [(ACT ...) (map (λ (a) (wrap-action a src-loc-style)) acts)] - [(ACT-NAME ...) (vector->list action-names)] - [SPEC-ACT-STX (wrap-action spec-act src-loc-style)] - [HAS-COMMENT-ACT?-STX (if (syntax-e spec-comment-act) #t #f)] - [SPEC-COMMENT-ACT-STX (wrap-action spec-comment-act src-loc-style)] - [EOF-ACT-STX (wrap-action eof-act src-loc-style)]) - (syntax/loc stx (let ([NAME ACT] ...) - (let ([proc (lexer-body START-STATE-STX - TRANS-TABLE-STX - (vector ACT-NAME ...) - NO-LOOKAHEAD-STX - SPEC-ACT-STX - HAS-COMMENT-ACT?-STX - SPEC-COMMENT-ACT-STX - EOF-ACT-STX)]) - ;; reverse eta to get named procedures: - (λ (port) (proc port)))))))]))) + (with-disappeared-uses + (let () + (define spec/re-acts (syntax->list #'RE+ACTS)) + (for/and ([x (in-list spec/re-acts)]) + (syntax-case x () + [(RE ACT) #t] + [else (raise-syntax-error caller "not a regular expression / action pair" stx x)])) + (define eof-act (get-special-action spec/re-acts #'eof (case src-loc-style + [(lexer-src-pos) #'(return-without-pos eof)] + [(lexer-srcloc) #'(return-without-srcloc eof)] + [else #'eof]))) + (define spec-act (get-special-action spec/re-acts #'special #'(void))) + (define spec-comment-act (get-special-action spec/re-acts #'special-comment #'#f)) + (define ids (list #'special #'special-comment #'eof)) + (define re-acts (filter (λ (spec/re-act) + (syntax-case spec/re-act () + [((special) act) + (not (ormap + (λ (x) + (and (identifier? #'special) + (module-or-top-identifier=? #'special x))) + ids))] + [_ #t])) spec/re-acts)) + (define names (map (λ (x) (datum->syntax #f (gensym))) re-acts)) + (define acts (map (λ (x) (stx-car (stx-cdr x))) re-acts)) + (define re-actnames (map (λ (re-act name) (list (stx-car re-act) name)) re-acts names)) + (when (null? spec/re-acts) + (raise-syntax-error caller "expected at least one action" stx)) + (define-values (trans start action-names no-look) (build-lexer re-actnames)) + (when (vector-ref action-names start) ;; Start state is final + (unless (and + ;; All the successor states are final + (vector? (vector-ref trans start)) + (andmap (λ (x) (vector-ref action-names (vector-ref x 2))) + (vector->list (vector-ref trans start))) + ;; Each character has a successor state + (let loop ([check 0] + [nexts (vector->list (vector-ref trans start))]) + (cond + [(null? nexts) #f] + [else + (let ([next (car nexts)]) + (and (= (vector-ref next 0) check) + (let ([next-check (vector-ref next 1)]) + (or (>= next-check max-char-num) + (loop (add1 next-check) (cdr nexts))))))]))) + (eprintf "warning: lexer at ~a can accept the empty string\n" stx))) + (with-syntax ([START-STATE-STX start] + [TRANS-TABLE-STX trans] + [NO-LOOKAHEAD-STX no-look] + [(NAME ...) names] + [(ACT ...) (map (λ (a) (wrap-action a src-loc-style)) acts)] + [(ACT-NAME ...) (vector->list action-names)] + [SPEC-ACT-STX (wrap-action spec-act src-loc-style)] + [HAS-COMMENT-ACT?-STX (if (syntax-e spec-comment-act) #t #f)] + [SPEC-COMMENT-ACT-STX (wrap-action spec-comment-act src-loc-style)] + [EOF-ACT-STX (wrap-action eof-act src-loc-style)]) + (syntax/loc stx (let ([NAME ACT] ...) + (let ([proc (lexer-body START-STATE-STX + TRANS-TABLE-STX + (vector ACT-NAME ...) + NO-LOOKAHEAD-STX + SPEC-ACT-STX + HAS-COMMENT-ACT?-STX + SPEC-COMMENT-ACT-STX + EOF-ACT-STX)]) + ;; reverse eta to get named procedures: + (λ (port) (proc port))))))))]))) (define-syntax lexer (make-lexer-macro 'lexer #f)) (define-syntax lexer-src-pos (make-lexer-macro 'lexer-src-pos 'lexer-src-pos)) -- 2.25.1 From ea176e2153147756581ea04693be70aea57c137f Mon Sep 17 00:00:00 2001 From: "Jesse A. Tov" Date: Sun, 28 Jul 2019 20:45:48 -0500 Subject: [PATCH 3/3] Updated tests. --- .../br-parser-tools/private-lex/stx.rkt | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt index 07e80c3..6b5ab9a 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt @@ -159,27 +159,25 @@ ;; and by "now", I mean it's been broken since before we ;; moved to git. (module+ test - (check-equal? (parse #'#\a null) #\a) - (check-equal? (parse #'"1" null) "1") - (check-equal? (parse #'(repetition 1 1 #\1) null) + (check-equal? (parse #'#\a) #\a) + (check-equal? (parse #'"1") "1") + (check-equal? (parse #'(repetition 1 1 #\1)) '(repetition 1 1 #\1)) - (check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1)) - (check-equal? (parse #'(union #\1 (union "2") (union)) null) + (check-equal? (parse #'(repetition 0 +inf.0 #\1)) '(repetition 0 +inf.0 #\1)) + (check-equal? (parse #'(union #\1 (union "2") (union))) '(union #\1 (union "2") (union))) - (check-equal? (parse #'(intersection #\1 (intersection "2") (intersection)) - null) + (check-equal? (parse #'(intersection #\1 (intersection "2") (intersection))) '(intersection #\1 (intersection "2") (intersection))) - (check-equal? (parse #'(complement (union #\1 #\2)) - null) + (check-equal? (parse #'(complement (union #\1 #\2))) '(complement (union #\1 #\2))) - (check-equal? (parse #'(concatenation "1" "2" (concatenation)) null) + (check-equal? (parse #'(concatenation "1" "2" (concatenation))) '(concatenation "1" "2" (concatenation))) - (check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1)) - (check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1)) - (check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3)) - (check-equal? (parse #'(char-complement (union "1" "2")) null) + (check-equal? (parse #'(char-range "1" #\1)) '(char-range #\1 #\1)) + (check-equal? (parse #'(char-range #\1 "1")) '(char-range #\1 #\1)) + (check-equal? (parse #'(char-range "1" "3")) '(char-range #\1 #\3)) + (check-equal? (parse #'(char-complement (union "1" "2"))) '(char-complement (union "1" "2"))) - (check-equal? (parse #'(char-complement (repetition 1 1 "1")) null) + (check-equal? (parse #'(char-complement (repetition 1 1 "1"))) '(char-complement (repetition 1 1 "1"))) (check-exn #rx"not a character set" - (λ () (parse #'(char-complement (repetition 6 6 "1")) null)))) + (λ () (parse #'(char-complement (repetition 6 6 "1")))))) -- 2.25.1