Fix make-rule-parser (#28)

When using `make-rule-parser` for any rule that is not the start
rule, and applying the resulting parser while giving it a source-path
it would just use the entire grammar from the start rule.
pull/33/head
Markus Pfeiffer 2 years ago committed by GitHub
parent d3405dd0ab
commit 6983208426
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -18,7 +18,7 @@
(define-for-syntax (rules->token-types rules) (define-for-syntax (rules->token-types rules)
(define-values (implicit-tokens explicit-tokens) (rules-collect-token-types rules)) (define-values (implicit-tokens explicit-tokens) (rules-collect-token-types rules))
(remove-duplicates (append (for/list ([it (in-list implicit-tokens)]) (remove-duplicates (append (for/list ([it (in-list implicit-tokens)])
(string->symbol (syntax-e it))) (string->symbol (syntax-e it)))
(map syntax-e explicit-tokens)) eq?)) (map syntax-e explicit-tokens)) eq?))
(define-syntax (brag-module-begin rules-stx) (define-syntax (brag-module-begin rules-stx)
@ -38,7 +38,7 @@
(with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule. (with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule.
[((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...) [((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...)
(for/list ([tt (in-list (rules->token-types rules))]) (for/list ([tt (in-list (rules->token-types rules))])
(cons tt (string->symbol (format "token-~a" tt))))] (cons tt (string->symbol (format "token-~a" tt))))]
;; Flatten rules to use the yacc-style ruleset that br-parser-tools supports ;; Flatten rules to use the yacc-style ruleset that br-parser-tools supports
[GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))] [GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))]
;; main exports. Break hygiene so they're also available at top-level / repl ;; main exports. Break hygiene so they're also available at top-level / repl
@ -71,41 +71,38 @@
(cons eof token-EOF) (cons eof token-EOF)
(cons 'TOKEN-TYPE TOKEN-TYPE-CONSTRUCTOR) ...))) (cons 'TOKEN-TYPE TOKEN-TYPE-CONSTRUCTOR) ...)))
(define-syntax (MAKE-RULE-PARSER rule-id-stx) (define-syntax (MAKE-RULE-PARSER stx)
(syntax-case rule-id-stx () (syntax-case stx ()
[(_ start-rule) [(_ START-RULE-ID)
(and (identifier? #'start-rule) (and (identifier? #'START-RULE-ID) (member (syntax-e #'START-RULE-ID) 'RULE-IDS))
(member (syntax-e #'start-rule) 'RULE-IDS))
;; The cfg-parser depends on the start-rule provided in (start ...) to have the same ;; The cfg-parser depends on the start-rule provided in (start ...) to have the same
;; context as the rest of this body. Hence RECOLORED-START-RULE ;; context as the rest of this body. Hence RECOLORED-START-RULE
(with-syntax ([RECOLORED-START-RULE (datum->syntax #'RULES-STX (syntax-e #'start-rule))]) (with-syntax ([RECOLORED-START-RULE (datum->syntax #'RULES-STX (syntax-e #'START-RULE-ID))])
#'(let ([THE-GRAMMAR (cfg-parser (tokens enumerated-tokens) #'(let ()
(src-pos) (define (rule-parser tokenizer)
(start RECOLORED-START-RULE) (define rule-grammar (cfg-parser (tokens enumerated-tokens)
(end EOF) (src-pos)
(error THE-ERROR-HANDLER) (start RECOLORED-START-RULE)
(grammar . GENERATED-RULE-CODES))]) (end EOF)
(procedure-rename (error the-error-handler)
(case-lambda [(tokenizer) (grammar . GENERATED-RULE-CODES)))
(define next-token (define next-token (make-permissive-tokenizer tokenizer all-tokens-hash/mutable))
(make-permissive-tokenizer tokenizer all-tokens-hash/mutable)) ;; here's how we support grammar "cuts" on top rule name
;; little post-processor to support cuts on top rule name (define parse-tree-stx (rule-grammar next-token))
(define parse-tree-stx (THE-GRAMMAR next-token)) (syntax-case parse-tree-stx ()
(define top-rule-name-stx (syntax-case parse-tree-stx () [(TOP-RULE-NAME . _)
[(TRN . REST) #'TRN] (if (eq? (syntax-property #'TOP-RULE-NAME 'hide-or-splice?) 'hide)
[_ (error 'malformed-parse-tree)])) (remove-rule-name parse-tree-stx) ; use `remove-rule-name` so we get the same housekeeping
(if (eq? (syntax-property top-rule-name-stx 'hide-or-splice?) 'hide) parse-tree-stx)]
;; use `remove-rule-name` so we get the same housekeeping [_ (error 'malformed-parse-tree)]))
(remove-rule-name parse-tree-stx) (case-lambda [(tokenizer) (rule-parser tokenizer)]
parse-tree-stx)] [(source tokenizer)
[(source tokenizer) (parameterize ([current-source source])
(parameterize ([current-source source]) (rule-parser tokenizer))])))]
(PARSE tokenizer))])
(string->symbol (format "~a-rule-parser" 'start-rule)))))]
[(_ not-a-rule-id) [(_ not-a-rule-id)
(raise-syntax-error #f (raise-syntax-error #f
(format "Rule ~a is not defined in the grammar" (syntax-e #'not-a-rule-id)) (format "Rule ~a is not defined in the grammar" (syntax-e #'not-a-rule-id))
rule-id-stx)])) stx)]))
;; start-id has to be a value, not an expr, because make-rule-parser is a macro ;; start-id has to be a value, not an expr, because make-rule-parser is a macro
(define PARSE (procedure-rename (MAKE-RULE-PARSER START-ID) 'PARSE)) (define PARSE (procedure-rename (MAKE-RULE-PARSER START-ID) 'PARSE))

@ -7,7 +7,7 @@
brag/private/internal-support) brag/private/internal-support)
(provide THE-ERROR-HANDLER (provide the-error-handler
make-permissive-tokenizer make-permissive-tokenizer
atomic-datum->syntax atomic-datum->syntax
positions->srcloc positions->srcloc
@ -19,7 +19,7 @@
;; The level of indirection here is necessary since the yacc grammar wants a ;; The level of indirection here is necessary since the yacc grammar wants a
;; function value for the error handler up front. We want to delay that decision ;; function value for the error handler up front. We want to delay that decision
;; till parse time. ;; till parse time.
(define (THE-ERROR-HANDLER tok-ok? tok-name tok-value start-pos end-pos) (define (the-error-handler tok-ok? tok-name tok-value start-pos end-pos)
(match (positions->srcloc start-pos end-pos) (match (positions->srcloc start-pos end-pos)
[(list src line col offset span) [(list src line col offset span)
((current-parser-error-handler) tok-name ((current-parser-error-handler) tok-name

@ -0,0 +1,4 @@
#lang brag
start: next
next: "0"

@ -0,0 +1,17 @@
#lang racket/base
(require rackunit
brag/support
brag/examples/subrule)
(define parse-next (make-rule-parser next))
(define parse-start (make-rule-parser start))
(check-equal? (syntax->datum (parse #f "0")) '(start (next "0")))
(check-equal? (syntax->datum (parse #f "0")) (syntax->datum (parse "0")))
(check-equal? (syntax->datum (parse-start #f "0")) '(start (next "0")))
(check-equal? (syntax->datum (parse-start #f "0")) (syntax->datum (parse-start "0")))
(check-equal? (syntax->datum (parse-next #f "0")) '(next "0"))
(check-equal? (syntax->datum (parse-next #f "0")) (syntax->datum (parse-next "0")))
Loading…
Cancel
Save