From c6375182fae9826b40ecb99c6812a7f1797415b1 Mon Sep 17 00:00:00 2001 From: Danny Yoo Date: Fri, 1 Feb 2013 14:25:07 -0700 Subject: [PATCH] PR 13471: Add cycle-detection during lex-abbrev expansion to raise appropriate syntax error. Also add test cases for lex-abbrev cycle detection. original commit: 7146289c34c8256e54bda9157fcdc337047ae043 --- .../parser-tools/private-lex/error-tests.rkt | 29 +++ collects/parser-tools/private-lex/stx.rkt | 213 ++++++++++-------- 2 files changed, 143 insertions(+), 99 deletions(-) diff --git a/collects/parser-tools/private-lex/error-tests.rkt b/collects/parser-tools/private-lex/error-tests.rkt index c4afaee..32ad774 100644 --- a/collects/parser-tools/private-lex/error-tests.rkt +++ b/collects/parser-tools/private-lex/error-tests.rkt @@ -40,6 +40,35 @@ (let () (lexer ((a) 1)))))) +;; Detecting mutual recursion cycle: +(check-regexp-match #rx"regular-expression" + (catch-syn-error + (let () + (define-lex-abbrev a b) + (define-lex-abbrev b a) + (let () + (lexer ((a) 1)))))) + +(check-regexp-match #rx"regular-expression" + (catch-syn-error + (let () + (define-lex-abbrev a (repetition 0 1 b)) + (define-lex-abbrev b (repetition 0 1 a)) + (let () + (lexer ((a) 1)))))) + +;; Detecting cycle within same abbreviation: +(check-regexp-match #rx"regular-expression" + (catch-syn-error + (let () + (define-lex-abbrev balanced + (union (concatenation "(" balanced ")" balanced) + any-char)) + (lexer + [balanced (string-append lexeme (balanced input-port))] + [(eof) ""])))) + + (check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (1 1)))) (check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition) 1)))) (check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition #\1 #\1 "3") 1)))) diff --git a/collects/parser-tools/private-lex/stx.rkt b/collects/parser-tools/private-lex/stx.rkt index f178847..67cc38a 100644 --- a/collects/parser-tools/private-lex/stx.rkt +++ b/collects/parser-tools/private-lex/stx.rkt @@ -1,6 +1,6 @@ (module stx mzscheme - (require syntax/boundmap - "util.rkt") + (require "util.rkt" + syntax/id-table) (provide parse) @@ -37,103 +37,118 @@ ;; checks for errors and generates the plain s-exp form for s ;; Expands lex-abbrevs and applies lex-trans. (define (parse stx disappeared-uses) - (let ((parse - (lambda (s) - (parse (syntax-rearm s stx) - disappeared-uses)))) - (syntax-case (disarm stx) (repetition union intersection complement concatenation - char-range char-complement) - (_ - (identifier? stx) - (let ((expansion (syntax-local-value stx (lambda () #f)))) - (unless (lex-abbrev? expansion) - (raise-syntax-error 'regular-expression - "undefined abbreviation" - stx)) - (set-box! disappeared-uses (cons stx (unbox disappeared-uses))) - (parse ((lex-abbrev-get-abbrev expansion))))) - (_ - (or (char? (syntax-e stx)) (string? (syntax-e stx))) - (syntax-e stx)) - ((repetition arg ...) - (let ((arg-list (syntax->list (syntax (arg ...))))) - (unless (= 3 (length arg-list)) - (bad-args stx 2)) - (let ((low (syntax-e (car arg-list))) - (high (syntax-e (cadr arg-list))) - (re (caddr arg-list))) - (unless (and (number? low) (exact? low) (integer? low) (>= low 0)) - (raise-syntax-error #f - "not a non-negative exact integer" - stx - (car arg-list))) - (unless (or (and (number? high) (exact? high) (integer? high) (>= high 0)) - (eq? high +inf.0)) - (raise-syntax-error #f - "not a non-negative exact integer or +inf.0" - stx - (cadr arg-list))) - (unless (<= low high) - (raise-syntax-error - #f - "the first argument is not less than or equal to the second argument" - stx)) - `(repetition ,low ,high ,(parse re))))) - ((union re ...) - `(union ,@(map parse (syntax->list (syntax (re ...)))))) - ((intersection re ...) - `(intersection ,@(map parse (syntax->list (syntax (re ...)))))) - ((complement re ...) - (let ((re-list (syntax->list (syntax (re ...))))) - (unless (= 1 (length re-list)) - (bad-args stx 1)) - `(complement ,(parse (car re-list))))) - ((concatenation re ...) - `(concatenation ,@(map parse (syntax->list (syntax (re ...)))))) - ((char-range arg ...) - (let ((arg-list (syntax->list (syntax (arg ...))))) - (unless (= 2 (length arg-list)) - (bad-args stx 2)) - (let ((i1 (char-range-arg (car arg-list) stx)) - (i2 (char-range-arg (cadr arg-list) stx))) - (if (<= i1 i2) - `(char-range ,(integer->char i1) ,(integer->char i2)) - (raise-syntax-error - #f - "the first argument does not precede or equal second argument" - stx))))) - ((char-complement arg ...) - (let ((arg-list (syntax->list (syntax (arg ...))))) - (unless (= 1 (length arg-list)) - (bad-args stx 1)) - (let ((parsed (parse (car arg-list)))) - (unless (char-set? parsed) - (raise-syntax-error #f - "not a character set" - stx - (car arg-list))) - `(char-complement ,parsed)))) - ((op form ...) - (identifier? (syntax op)) - (let* ((o (syntax op)) - (expansion (syntax-local-value o (lambda () #f)))) - (set-box! disappeared-uses (cons o (unbox disappeared-uses))) - (cond - ((lex-trans? expansion) - (parse ((lex-trans-f expansion) (disarm stx)))) - (expansion - (raise-syntax-error 'regular-expression - "not a lex-trans" - stx)) - (else - (raise-syntax-error 'regular-expression - "undefined operator" - stx))))) - (_ - (raise-syntax-error - 'regular-expression - "not a char, string, identifier, or (op args ...)" - stx))))) + (let loop ([stx stx] + [disappeared-uses disappeared-uses] + ;; seen-lex-abbrevs: id-table + [seen-lex-abbrevs (make-immutable-free-id-table)]) + (let ([recur (lambda (s) + (loop (syntax-rearm s stx) + disappeared-uses + seen-lex-abbrevs))] + [recur/abbrev (lambda (s id) + (loop (syntax-rearm s stx) + disappeared-uses + (free-id-table-set seen-lex-abbrevs id id)))]) + (syntax-case (disarm stx) (repetition union intersection complement concatenation + char-range char-complement) + (_ + (identifier? stx) + (let ((expansion (syntax-local-value stx (lambda () #f)))) + (unless (lex-abbrev? expansion) + (raise-syntax-error 'regular-expression + "undefined abbreviation" + stx)) + ;; Check for cycles. + (when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f)) + (raise-syntax-error 'regular-expression + "illegal lex-abbrev cycle detected" + stx + #f + (list (free-id-table-ref seen-lex-abbrevs stx)))) + (set-box! disappeared-uses (cons stx (unbox disappeared-uses))) + (recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))) + (_ + (or (char? (syntax-e stx)) (string? (syntax-e stx))) + (syntax-e stx)) + ((repetition arg ...) + (let ((arg-list (syntax->list (syntax (arg ...))))) + (unless (= 3 (length arg-list)) + (bad-args stx 2)) + (let ((low (syntax-e (car arg-list))) + (high (syntax-e (cadr arg-list))) + (re (caddr arg-list))) + (unless (and (number? low) (exact? low) (integer? low) (>= low 0)) + (raise-syntax-error #f + "not a non-negative exact integer" + stx + (car arg-list))) + (unless (or (and (number? high) (exact? high) (integer? high) (>= high 0)) + (eq? high +inf.0)) + (raise-syntax-error #f + "not a non-negative exact integer or +inf.0" + stx + (cadr arg-list))) + (unless (<= low high) + (raise-syntax-error + #f + "the first argument is not less than or equal to the second argument" + stx)) + `(repetition ,low ,high ,(recur re))))) + ((union re ...) + `(union ,@(map recur (syntax->list (syntax (re ...)))))) + ((intersection re ...) + `(intersection ,@(map recur (syntax->list (syntax (re ...)))))) + ((complement re ...) + (let ((re-list (syntax->list (syntax (re ...))))) + (unless (= 1 (length re-list)) + (bad-args stx 1)) + `(complement ,(recur (car re-list))))) + ((concatenation re ...) + `(concatenation ,@(map recur (syntax->list (syntax (re ...)))))) + ((char-range arg ...) + (let ((arg-list (syntax->list (syntax (arg ...))))) + (unless (= 2 (length arg-list)) + (bad-args stx 2)) + (let ((i1 (char-range-arg (car arg-list) stx)) + (i2 (char-range-arg (cadr arg-list) stx))) + (if (<= i1 i2) + `(char-range ,(integer->char i1) ,(integer->char i2)) + (raise-syntax-error + #f + "the first argument does not precede or equal second argument" + stx))))) + ((char-complement arg ...) + (let ((arg-list (syntax->list (syntax (arg ...))))) + (unless (= 1 (length arg-list)) + (bad-args stx 1)) + (let ((parsed (recur (car arg-list)))) + (unless (char-set? parsed) + (raise-syntax-error #f + "not a character set" + stx + (car arg-list))) + `(char-complement ,parsed)))) + ((op form ...) + (identifier? (syntax op)) + (let* ((o (syntax op)) + (expansion (syntax-local-value o (lambda () #f)))) + (set-box! disappeared-uses (cons o (unbox disappeared-uses))) + (cond + ((lex-trans? expansion) + (recur ((lex-trans-f expansion) (disarm stx)))) + (expansion + (raise-syntax-error 'regular-expression + "not a lex-trans" + stx)) + (else + (raise-syntax-error 'regular-expression + "undefined operator" + stx))))) + (_ + (raise-syntax-error + 'regular-expression + "not a char, string, identifier, or (op args ...)" + stx))))))