PR 13471: Add cycle-detection during lex-abbrev expansion to raise appropriate syntax error.

Also add test cases for lex-abbrev cycle detection.

original commit: 7146289c34c8256e54bda9157fcdc337047ae043
tokens
Danny Yoo 11 years ago
parent ec6cb60c00
commit c6375182fa

@ -40,6 +40,35 @@
(let () (let ()
(lexer ((a) 1)))))) (lexer ((a) 1))))))
;; Detecting mutual recursion cycle:
(check-regexp-match #rx"regular-expression"
(catch-syn-error
(let ()
(define-lex-abbrev a b)
(define-lex-abbrev b a)
(let ()
(lexer ((a) 1))))))
(check-regexp-match #rx"regular-expression"
(catch-syn-error
(let ()
(define-lex-abbrev a (repetition 0 1 b))
(define-lex-abbrev b (repetition 0 1 a))
(let ()
(lexer ((a) 1))))))
;; Detecting cycle within same abbreviation:
(check-regexp-match #rx"regular-expression"
(catch-syn-error
(let ()
(define-lex-abbrev balanced
(union (concatenation "(" balanced ")" balanced)
any-char))
(lexer
[balanced (string-append lexeme (balanced input-port))]
[(eof) ""]))))
(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (1 1)))) (check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (1 1))))
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition) 1)))) (check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition) 1))))
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition #\1 #\1 "3") 1)))) (check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition #\1 #\1 "3") 1))))

@ -1,6 +1,6 @@
(module stx mzscheme (module stx mzscheme
(require syntax/boundmap (require "util.rkt"
"util.rkt") syntax/id-table)
(provide parse) (provide parse)
@ -37,103 +37,118 @@
;; checks for errors and generates the plain s-exp form for s ;; checks for errors and generates the plain s-exp form for s
;; Expands lex-abbrevs and applies lex-trans. ;; Expands lex-abbrevs and applies lex-trans.
(define (parse stx disappeared-uses) (define (parse stx disappeared-uses)
(let ((parse (let loop ([stx stx]
(lambda (s) [disappeared-uses disappeared-uses]
(parse (syntax-rearm s stx) ;; seen-lex-abbrevs: id-table
disappeared-uses)))) [seen-lex-abbrevs (make-immutable-free-id-table)])
(syntax-case (disarm stx) (repetition union intersection complement concatenation (let ([recur (lambda (s)
char-range char-complement) (loop (syntax-rearm s stx)
(_ disappeared-uses
(identifier? stx) seen-lex-abbrevs))]
(let ((expansion (syntax-local-value stx (lambda () #f)))) [recur/abbrev (lambda (s id)
(unless (lex-abbrev? expansion) (loop (syntax-rearm s stx)
(raise-syntax-error 'regular-expression disappeared-uses
"undefined abbreviation" (free-id-table-set seen-lex-abbrevs id id)))])
stx)) (syntax-case (disarm stx) (repetition union intersection complement concatenation
(set-box! disappeared-uses (cons stx (unbox disappeared-uses))) char-range char-complement)
(parse ((lex-abbrev-get-abbrev expansion))))) (_
(_ (identifier? stx)
(or (char? (syntax-e stx)) (string? (syntax-e stx))) (let ((expansion (syntax-local-value stx (lambda () #f))))
(syntax-e stx)) (unless (lex-abbrev? expansion)
((repetition arg ...) (raise-syntax-error 'regular-expression
(let ((arg-list (syntax->list (syntax (arg ...))))) "undefined abbreviation"
(unless (= 3 (length arg-list)) stx))
(bad-args stx 2)) ;; Check for cycles.
(let ((low (syntax-e (car arg-list))) (when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f))
(high (syntax-e (cadr arg-list))) (raise-syntax-error 'regular-expression
(re (caddr arg-list))) "illegal lex-abbrev cycle detected"
(unless (and (number? low) (exact? low) (integer? low) (>= low 0)) stx
(raise-syntax-error #f #f
"not a non-negative exact integer" (list (free-id-table-ref seen-lex-abbrevs stx))))
stx (set-box! disappeared-uses (cons stx (unbox disappeared-uses)))
(car arg-list))) (recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx)))
(unless (or (and (number? high) (exact? high) (integer? high) (>= high 0)) (_
(eq? high +inf.0)) (or (char? (syntax-e stx)) (string? (syntax-e stx)))
(raise-syntax-error #f (syntax-e stx))
"not a non-negative exact integer or +inf.0" ((repetition arg ...)
stx (let ((arg-list (syntax->list (syntax (arg ...)))))
(cadr arg-list))) (unless (= 3 (length arg-list))
(unless (<= low high) (bad-args stx 2))
(raise-syntax-error (let ((low (syntax-e (car arg-list)))
#f (high (syntax-e (cadr arg-list)))
"the first argument is not less than or equal to the second argument" (re (caddr arg-list)))
stx)) (unless (and (number? low) (exact? low) (integer? low) (>= low 0))
`(repetition ,low ,high ,(parse re))))) (raise-syntax-error #f
((union re ...) "not a non-negative exact integer"
`(union ,@(map parse (syntax->list (syntax (re ...)))))) stx
((intersection re ...) (car arg-list)))
`(intersection ,@(map parse (syntax->list (syntax (re ...)))))) (unless (or (and (number? high) (exact? high) (integer? high) (>= high 0))
((complement re ...) (eq? high +inf.0))
(let ((re-list (syntax->list (syntax (re ...))))) (raise-syntax-error #f
(unless (= 1 (length re-list)) "not a non-negative exact integer or +inf.0"
(bad-args stx 1)) stx
`(complement ,(parse (car re-list))))) (cadr arg-list)))
((concatenation re ...) (unless (<= low high)
`(concatenation ,@(map parse (syntax->list (syntax (re ...)))))) (raise-syntax-error
((char-range arg ...) #f
(let ((arg-list (syntax->list (syntax (arg ...))))) "the first argument is not less than or equal to the second argument"
(unless (= 2 (length arg-list)) stx))
(bad-args stx 2)) `(repetition ,low ,high ,(recur re)))))
(let ((i1 (char-range-arg (car arg-list) stx)) ((union re ...)
(i2 (char-range-arg (cadr arg-list) stx))) `(union ,@(map recur (syntax->list (syntax (re ...))))))
(if (<= i1 i2) ((intersection re ...)
`(char-range ,(integer->char i1) ,(integer->char i2)) `(intersection ,@(map recur (syntax->list (syntax (re ...))))))
(raise-syntax-error ((complement re ...)
#f (let ((re-list (syntax->list (syntax (re ...)))))
"the first argument does not precede or equal second argument" (unless (= 1 (length re-list))
stx))))) (bad-args stx 1))
((char-complement arg ...) `(complement ,(recur (car re-list)))))
(let ((arg-list (syntax->list (syntax (arg ...))))) ((concatenation re ...)
(unless (= 1 (length arg-list)) `(concatenation ,@(map recur (syntax->list (syntax (re ...))))))
(bad-args stx 1)) ((char-range arg ...)
(let ((parsed (parse (car arg-list)))) (let ((arg-list (syntax->list (syntax (arg ...)))))
(unless (char-set? parsed) (unless (= 2 (length arg-list))
(raise-syntax-error #f (bad-args stx 2))
"not a character set" (let ((i1 (char-range-arg (car arg-list) stx))
stx (i2 (char-range-arg (cadr arg-list) stx)))
(car arg-list))) (if (<= i1 i2)
`(char-complement ,parsed)))) `(char-range ,(integer->char i1) ,(integer->char i2))
((op form ...) (raise-syntax-error
(identifier? (syntax op)) #f
(let* ((o (syntax op)) "the first argument does not precede or equal second argument"
(expansion (syntax-local-value o (lambda () #f)))) stx)))))
(set-box! disappeared-uses (cons o (unbox disappeared-uses))) ((char-complement arg ...)
(cond (let ((arg-list (syntax->list (syntax (arg ...)))))
((lex-trans? expansion) (unless (= 1 (length arg-list))
(parse ((lex-trans-f expansion) (disarm stx)))) (bad-args stx 1))
(expansion (let ((parsed (recur (car arg-list))))
(raise-syntax-error 'regular-expression (unless (char-set? parsed)
"not a lex-trans" (raise-syntax-error #f
stx)) "not a character set"
(else stx
(raise-syntax-error 'regular-expression (car arg-list)))
"undefined operator" `(char-complement ,parsed))))
stx))))) ((op form ...)
(_ (identifier? (syntax op))
(raise-syntax-error (let* ((o (syntax op))
'regular-expression (expansion (syntax-local-value o (lambda () #f))))
"not a char, string, identifier, or (op args ...)" (set-box! disappeared-uses (cons o (unbox disappeared-uses)))
stx))))) (cond
((lex-trans? expansion)
(recur ((lex-trans-f expansion) (disarm stx))))
(expansion
(raise-syntax-error 'regular-expression
"not a lex-trans"
stx))
(else
(raise-syntax-error 'regular-expression
"undefined operator"
stx)))))
(_
(raise-syntax-error
'regular-expression
"not a char, string, identifier, or (op args ...)"
stx))))))

Loading…
Cancel
Save