From 6983208426fc4970b3753eea075cc492b64d5103 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Tue, 16 Nov 2021 17:35:40 +0000 Subject: [PATCH] Fix make-rule-parser (#28) When using `make-rule-parser` for any rule that is not the start rule, and applying the resulting parser while giving it a source-path it would just use the entire grammar from the start rule. --- brag-lib/brag/codegen/expander.rkt | 61 ++++++++++---------- brag-lib/brag/codegen/runtime.rkt | 4 +- brag-lib/brag/examples/subrule.rkt | 4 ++ brag-lib/brag/test/test-make-rule-parser.rkt | 17 ++++++ 4 files changed, 52 insertions(+), 34 deletions(-) create mode 100644 brag-lib/brag/examples/subrule.rkt create mode 100644 brag-lib/brag/test/test-make-rule-parser.rkt diff --git a/brag-lib/brag/codegen/expander.rkt b/brag-lib/brag/codegen/expander.rkt index 39690d7..cf1d137 100755 --- a/brag-lib/brag/codegen/expander.rkt +++ b/brag-lib/brag/codegen/expander.rkt @@ -18,7 +18,7 @@ (define-for-syntax (rules->token-types rules) (define-values (implicit-tokens explicit-tokens) (rules-collect-token-types rules)) (remove-duplicates (append (for/list ([it (in-list implicit-tokens)]) - (string->symbol (syntax-e it))) + (string->symbol (syntax-e it))) (map syntax-e explicit-tokens)) eq?)) (define-syntax (brag-module-begin rules-stx) @@ -38,7 +38,7 @@ (with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule. [((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...) (for/list ([tt (in-list (rules->token-types rules))]) - (cons tt (string->symbol (format "token-~a" tt))))] + (cons tt (string->symbol (format "token-~a" tt))))] ;; Flatten rules to use the yacc-style ruleset that br-parser-tools supports [GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))] ;; main exports. Break hygiene so they're also available at top-level / repl @@ -71,41 +71,38 @@ (cons eof token-EOF) (cons 'TOKEN-TYPE TOKEN-TYPE-CONSTRUCTOR) ...))) - (define-syntax (MAKE-RULE-PARSER rule-id-stx) - (syntax-case rule-id-stx () - [(_ start-rule) - (and (identifier? #'start-rule) - (member (syntax-e #'start-rule) 'RULE-IDS)) + (define-syntax (MAKE-RULE-PARSER stx) + (syntax-case stx () + [(_ START-RULE-ID) + (and (identifier? #'START-RULE-ID) (member (syntax-e #'START-RULE-ID) 'RULE-IDS)) ;; The cfg-parser depends on the start-rule provided in (start ...) to have the same ;; context as the rest of this body. Hence RECOLORED-START-RULE - (with-syntax ([RECOLORED-START-RULE (datum->syntax #'RULES-STX (syntax-e #'start-rule))]) - #'(let ([THE-GRAMMAR (cfg-parser (tokens enumerated-tokens) - (src-pos) - (start RECOLORED-START-RULE) - (end EOF) - (error THE-ERROR-HANDLER) - (grammar . GENERATED-RULE-CODES))]) - (procedure-rename - (case-lambda [(tokenizer) - (define next-token - (make-permissive-tokenizer tokenizer all-tokens-hash/mutable)) - ;; little post-processor to support cuts on top rule name - (define parse-tree-stx (THE-GRAMMAR next-token)) - (define top-rule-name-stx (syntax-case parse-tree-stx () - [(TRN . REST) #'TRN] - [_ (error 'malformed-parse-tree)])) - (if (eq? (syntax-property top-rule-name-stx 'hide-or-splice?) 'hide) - ;; use `remove-rule-name` so we get the same housekeeping - (remove-rule-name parse-tree-stx) - parse-tree-stx)] - [(source tokenizer) - (parameterize ([current-source source]) - (PARSE tokenizer))]) - (string->symbol (format "~a-rule-parser" 'start-rule)))))] + (with-syntax ([RECOLORED-START-RULE (datum->syntax #'RULES-STX (syntax-e #'START-RULE-ID))]) + #'(let () + (define (rule-parser tokenizer) + (define rule-grammar (cfg-parser (tokens enumerated-tokens) + (src-pos) + (start RECOLORED-START-RULE) + (end EOF) + (error the-error-handler) + (grammar . GENERATED-RULE-CODES))) + (define next-token (make-permissive-tokenizer tokenizer all-tokens-hash/mutable)) + ;; here's how we support grammar "cuts" on top rule name + (define parse-tree-stx (rule-grammar next-token)) + (syntax-case parse-tree-stx () + [(TOP-RULE-NAME . _) + (if (eq? (syntax-property #'TOP-RULE-NAME 'hide-or-splice?) 'hide) + (remove-rule-name parse-tree-stx) ; use `remove-rule-name` so we get the same housekeeping + parse-tree-stx)] + [_ (error 'malformed-parse-tree)])) + (case-lambda [(tokenizer) (rule-parser tokenizer)] + [(source tokenizer) + (parameterize ([current-source source]) + (rule-parser tokenizer))])))] [(_ not-a-rule-id) (raise-syntax-error #f (format "Rule ~a is not defined in the grammar" (syntax-e #'not-a-rule-id)) - rule-id-stx)])) + stx)])) ;; start-id has to be a value, not an expr, because make-rule-parser is a macro (define PARSE (procedure-rename (MAKE-RULE-PARSER START-ID) 'PARSE)) diff --git a/brag-lib/brag/codegen/runtime.rkt b/brag-lib/brag/codegen/runtime.rkt index 315caf4..d2ef10b 100755 --- a/brag-lib/brag/codegen/runtime.rkt +++ b/brag-lib/brag/codegen/runtime.rkt @@ -7,7 +7,7 @@ brag/private/internal-support) -(provide THE-ERROR-HANDLER +(provide the-error-handler make-permissive-tokenizer atomic-datum->syntax positions->srcloc @@ -19,7 +19,7 @@ ;; The level of indirection here is necessary since the yacc grammar wants a ;; function value for the error handler up front. We want to delay that decision ;; till parse time. -(define (THE-ERROR-HANDLER tok-ok? tok-name tok-value start-pos end-pos) +(define (the-error-handler tok-ok? tok-name tok-value start-pos end-pos) (match (positions->srcloc start-pos end-pos) [(list src line col offset span) ((current-parser-error-handler) tok-name diff --git a/brag-lib/brag/examples/subrule.rkt b/brag-lib/brag/examples/subrule.rkt new file mode 100644 index 0000000..7183bfc --- /dev/null +++ b/brag-lib/brag/examples/subrule.rkt @@ -0,0 +1,4 @@ +#lang brag + +start: next +next: "0" diff --git a/brag-lib/brag/test/test-make-rule-parser.rkt b/brag-lib/brag/test/test-make-rule-parser.rkt new file mode 100644 index 0000000..51a47a2 --- /dev/null +++ b/brag-lib/brag/test/test-make-rule-parser.rkt @@ -0,0 +1,17 @@ +#lang racket/base +(require rackunit + brag/support + brag/examples/subrule) + +(define parse-next (make-rule-parser next)) +(define parse-start (make-rule-parser start)) + +(check-equal? (syntax->datum (parse #f "0")) '(start (next "0"))) +(check-equal? (syntax->datum (parse #f "0")) (syntax->datum (parse "0"))) + +(check-equal? (syntax->datum (parse-start #f "0")) '(start (next "0"))) +(check-equal? (syntax->datum (parse-start #f "0")) (syntax->datum (parse-start "0"))) + +(check-equal? (syntax->datum (parse-next #f "0")) '(next "0")) +(check-equal? (syntax->datum (parse-next #f "0")) (syntax->datum (parse-next "0"))) +