Fixes "Check Syntax" arrows for lexers #6

Merged
tov merged 3 commits from br-lexer-arrows into master 5 years ago

@ -4,6 +4,7 @@
;; create and use the buffer that the lexer reads from. See docs.
(require (for-syntax racket/list
racket/syntax
syntax/stx
syntax/define
syntax/boundmap
@ -69,80 +70,79 @@
[input-port (make-rename-transformer #'input-port-p)]
[lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)])
action-stx)))))
(define-for-syntax (make-lexer-macro caller src-loc-style)
(λ (stx)
(syntax-case stx ()
[(_ . RE+ACTS)
(let ()
(define spec/re-acts (syntax->list #'RE+ACTS))
(for/and ([x (in-list spec/re-acts)])
(syntax-case x ()
[(RE ACT) #t]
[else (raise-syntax-error caller "not a regular expression / action pair" stx x)]))
(define eof-act (get-special-action spec/re-acts #'eof (case src-loc-style
[(lexer-src-pos) #'(return-without-pos eof)]
[(lexer-srcloc) #'(return-without-srcloc eof)]
[else #'eof])))
(define spec-act (get-special-action spec/re-acts #'special #'(void)))
(define spec-comment-act (get-special-action spec/re-acts #'special-comment #'#f))
(define ids (list #'special #'special-comment #'eof))
(define re-acts (filter (λ (spec/re-act)
(syntax-case spec/re-act ()
[((special) act)
(not (ormap
(λ (x)
(and (identifier? #'special)
(module-or-top-identifier=? #'special x)))
ids))]
[_ #t])) spec/re-acts))
(define names (map (λ (x) (datum->syntax #f (gensym))) re-acts))
(define acts (map (λ (x) (stx-car (stx-cdr x))) re-acts))
(define re-actnames (map (λ (re-act name) (list (stx-car re-act) name)) re-acts names))
(when (null? spec/re-acts)
(raise-syntax-error caller "expected at least one action" stx))
(define-values (trans start action-names no-look disappeared-uses) (build-lexer re-actnames))
(when (vector-ref action-names start) ;; Start state is final
(unless (and
;; All the successor states are final
(vector? (vector-ref trans start))
(andmap (λ (x) (vector-ref action-names (vector-ref x 2)))
(vector->list (vector-ref trans start)))
;; Each character has a successor state
(let loop ([check 0]
[nexts (vector->list (vector-ref trans start))])
(cond
[(null? nexts) #f]
[else
(let ([next (car nexts)])
(and (= (vector-ref next 0) check)
(let ([next-check (vector-ref next 1)])
(or (>= next-check max-char-num)
(loop (add1 next-check) (cdr nexts))))))])))
(eprintf "warning: lexer at ~a can accept the empty string\n" stx)))
(with-syntax ([START-STATE-STX start]
[TRANS-TABLE-STX trans]
[NO-LOOKAHEAD-STX no-look]
[(NAME ...) names]
[(ACT ...) (map (λ (a) (wrap-action a src-loc-style)) acts)]
[(ACT-NAME ...) (vector->list action-names)]
[SPEC-ACT-STX (wrap-action spec-act src-loc-style)]
[HAS-COMMENT-ACT?-STX (if (syntax-e spec-comment-act) #t #f)]
[SPEC-COMMENT-ACT-STX (wrap-action spec-comment-act src-loc-style)]
[EOF-ACT-STX (wrap-action eof-act src-loc-style)])
(syntax-property
(syntax/loc stx (let ([NAME ACT] ...)
(let ([proc (lexer-body START-STATE-STX
TRANS-TABLE-STX
(vector ACT-NAME ...)
NO-LOOKAHEAD-STX
SPEC-ACT-STX
HAS-COMMENT-ACT?-STX
SPEC-COMMENT-ACT-STX
EOF-ACT-STX)])
;; reverse eta to get named procedures:
(λ (port) (proc port)))))
'disappeared-use disappeared-uses)))])))
(with-disappeared-uses
(let ()
(define spec/re-acts (syntax->list #'RE+ACTS))
(for/and ([x (in-list spec/re-acts)])
(syntax-case x ()
[(RE ACT) #t]
[else (raise-syntax-error caller "not a regular expression / action pair" stx x)]))
(define eof-act (get-special-action spec/re-acts #'eof (case src-loc-style
[(lexer-src-pos) #'(return-without-pos eof)]
[(lexer-srcloc) #'(return-without-srcloc eof)]
[else #'eof])))
(define spec-act (get-special-action spec/re-acts #'special #'(void)))
(define spec-comment-act (get-special-action spec/re-acts #'special-comment #'#f))
(define ids (list #'special #'special-comment #'eof))
(define re-acts (filter (λ (spec/re-act)
(syntax-case spec/re-act ()
[((special) act)
(not (ormap
(λ (x)
(and (identifier? #'special)
(module-or-top-identifier=? #'special x)))
ids))]
[_ #t])) spec/re-acts))
(define names (map (λ (x) (datum->syntax #f (gensym))) re-acts))
(define acts (map (λ (x) (stx-car (stx-cdr x))) re-acts))
(define re-actnames (map (λ (re-act name) (list (stx-car re-act) name)) re-acts names))
(when (null? spec/re-acts)
(raise-syntax-error caller "expected at least one action" stx))
(define-values (trans start action-names no-look) (build-lexer re-actnames))
(when (vector-ref action-names start) ;; Start state is final
(unless (and
;; All the successor states are final
(vector? (vector-ref trans start))
(andmap (λ (x) (vector-ref action-names (vector-ref x 2)))
(vector->list (vector-ref trans start)))
;; Each character has a successor state
(let loop ([check 0]
[nexts (vector->list (vector-ref trans start))])
(cond
[(null? nexts) #f]
[else
(let ([next (car nexts)])
(and (= (vector-ref next 0) check)
(let ([next-check (vector-ref next 1)])
(or (>= next-check max-char-num)
(loop (add1 next-check) (cdr nexts))))))])))
(eprintf "warning: lexer at ~a can accept the empty string\n" stx)))
(with-syntax ([START-STATE-STX start]
[TRANS-TABLE-STX trans]
[NO-LOOKAHEAD-STX no-look]
[(NAME ...) names]
[(ACT ...) (map (λ (a) (wrap-action a src-loc-style)) acts)]
[(ACT-NAME ...) (vector->list action-names)]
[SPEC-ACT-STX (wrap-action spec-act src-loc-style)]
[HAS-COMMENT-ACT?-STX (if (syntax-e spec-comment-act) #t #f)]
[SPEC-COMMENT-ACT-STX (wrap-action spec-comment-act src-loc-style)]
[EOF-ACT-STX (wrap-action eof-act src-loc-style)])
(syntax/loc stx (let ([NAME ACT] ...)
(let ([proc (lexer-body START-STATE-STX
TRANS-TABLE-STX
(vector ACT-NAME ...)
NO-LOOKAHEAD-STX
SPEC-ACT-STX
HAS-COMMENT-ACT?-STX
SPEC-COMMENT-ACT-STX
EOF-ACT-STX)])
;; reverse eta to get named procedures:
(λ (port) (proc port))))))))])))
(define-syntax lexer (make-lexer-macro 'lexer #f))
(define-syntax lexer-src-pos (make-lexer-macro 'lexer-src-pos 'lexer-src-pos))

@ -125,9 +125,8 @@
;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object))
;; each syntax object has the form (re action)
(define (build-lexer sos)
(define disappeared-uses (box null))
(define s-re-acts (for/list ([so (in-list sos)])
(cons (parse (stx-car so) disappeared-uses)
(cons (parse (stx-car so))
(stx-car (stx-cdr so)))))
(define cache (make-cache))
(define re-acts (for/list ([s-re-act (in-list s-re-acts)])
@ -156,6 +155,5 @@
num-states
(/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries
(* 5 num-different-entries))) 1024)))
(values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)
(unbox disappeared-uses)))
(values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)))

@ -1,5 +1,5 @@
#lang racket/base
(require "util.rkt" syntax/id-table)
(require "util.rkt" syntax/id-table racket/syntax)
(provide parse)
(define (bad-args stx num)
@ -32,24 +32,21 @@
;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt)
;; checks for errors and generates the plain s-exp form for s
;; Expands lex-abbrevs and applies lex-trans.
(define (parse stx disappeared-uses)
(define (parse stx)
(let loop ([stx stx]
[disappeared-uses disappeared-uses]
;; seen-lex-abbrevs: id-table
[seen-lex-abbrevs (make-immutable-free-id-table)])
(let ([recur (λ (s)
(loop (syntax-rearm s stx)
disappeared-uses
seen-lex-abbrevs))]
[recur/abbrev (λ (s id)
(loop (syntax-rearm s stx)
disappeared-uses
(free-id-table-set seen-lex-abbrevs id id)))])
(syntax-case (disarm stx) (repetition union intersection complement concatenation
char-range char-complement)
[_
(identifier? stx)
(let ([expansion (syntax-local-value stx (λ () #f))])
(let ([expansion (syntax-local-value/record stx (λ (v) #t))])
(unless (lex-abbrev? expansion)
(raise-syntax-error 'regular-expression
"undefined abbreviation"
@ -61,7 +58,6 @@
stx
#f
(list (free-id-table-ref seen-lex-abbrevs stx))))
(set-box! disappeared-uses (cons stx (unbox disappeared-uses)))
(recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))]
[_
(or (char? (syntax-e stx)) (string? (syntax-e stx)))
@ -111,8 +107,7 @@
`(char-complement ,parsed))]
((OP form ...)
(identifier? #'OP)
(let* ([expansion (syntax-local-value #'OP (λ () #f))])
(set-box! disappeared-uses (cons #'OP (unbox disappeared-uses)))
(let* ([expansion (syntax-local-value/record #'OP (λ (v) #t))])
(cond
[(lex-trans? expansion)
(recur ((lex-trans-f expansion) (disarm stx)))]
@ -164,27 +159,25 @@
;; and by "now", I mean it's been broken since before we
;; moved to git.
(module+ test
(check-equal? (parse #'#\a null) #\a)
(check-equal? (parse #'"1" null) "1")
(check-equal? (parse #'(repetition 1 1 #\1) null)
(check-equal? (parse #'#\a) #\a)
(check-equal? (parse #'"1") "1")
(check-equal? (parse #'(repetition 1 1 #\1))
'(repetition 1 1 #\1))
(check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1))
(check-equal? (parse #'(union #\1 (union "2") (union)) null)
(check-equal? (parse #'(repetition 0 +inf.0 #\1)) '(repetition 0 +inf.0 #\1))
(check-equal? (parse #'(union #\1 (union "2") (union)))
'(union #\1 (union "2") (union)))
(check-equal? (parse #'(intersection #\1 (intersection "2") (intersection))
null)
(check-equal? (parse #'(intersection #\1 (intersection "2") (intersection)))
'(intersection #\1 (intersection "2") (intersection)))
(check-equal? (parse #'(complement (union #\1 #\2))
null)
(check-equal? (parse #'(complement (union #\1 #\2)))
'(complement (union #\1 #\2)))
(check-equal? (parse #'(concatenation "1" "2" (concatenation)) null)
(check-equal? (parse #'(concatenation "1" "2" (concatenation)))
'(concatenation "1" "2" (concatenation)))
(check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1))
(check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1))
(check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3))
(check-equal? (parse #'(char-complement (union "1" "2")) null)
(check-equal? (parse #'(char-range "1" #\1)) '(char-range #\1 #\1))
(check-equal? (parse #'(char-range #\1 "1")) '(char-range #\1 #\1))
(check-equal? (parse #'(char-range "1" "3")) '(char-range #\1 #\3))
(check-equal? (parse #'(char-complement (union "1" "2")))
'(char-complement (union "1" "2")))
(check-equal? (parse #'(char-complement (repetition 1 1 "1")) null)
(check-equal? (parse #'(char-complement (repetition 1 1 "1")))
'(char-complement (repetition 1 1 "1")))
(check-exn #rx"not a character set"
(λ () (parse #'(char-complement (repetition 6 6 "1")) null))))
(λ () (parse #'(char-complement (repetition 6 6 "1"))))))

Loading…
Cancel
Save