diff --git a/base/grammar.rkt b/base/grammar.rkt index 2996e3c..544ba14 100644 --- a/base/grammar.rkt +++ b/base/grammar.rkt @@ -7,17 +7,15 @@ (provide (struct-out terminal-symbol) (struct-out nonterminal-symbol) - (struct-out context-free-grammar) - (struct-out context-free-production-rule) + (struct-out cf-grammar) + (struct-out cf-production-rule) (contract-out [grammar-symbol? predicate/c] - [grammar-start-rules - (-> context-free-grammar? (set/c context-free-production-rule? #:kind 'immutable))] - [make-grammar - (-> #:rules (sequence/c context-free-production-rule?) #:start-symbol any/c context-free-grammar?)] - [make-rule + [cf-grammar-start-rules (-> cf-grammar? (set/c cf-production-rule? #:kind 'immutable))] + [make-cf-grammar (-> #:rules (sequence/c cf-production-rule?) #:start-symbol any/c cf-grammar?)] + [make-cf-production-rule (-> #:symbol any/c #:substitution (sequence/c grammar-symbol?) #:label any/c - context-free-production-rule?)])) + cf-production-rule?)])) (require racket/sequence @@ -35,19 +33,19 @@ ;; S: the nonterminals the grammar rules are defined in terms of. ;; L: the labels that grammar rules may have attached to them. These show up in parse tree ;; branches, and can be used to determine which production rule produced a derivation. -(struct context-free-grammar (rules start-symbol) #:transparent) +(struct cf-grammar (rules start-symbol) #:transparent) -(define (grammar-start-rules grammar) - (define start (context-free-grammar-start-symbol grammar)) - (for/set ([rule (in-vector (context-free-grammar-rules grammar))] - #:when (equal? (context-free-production-rule-nonterminal rule) start)) +(define (cf-grammar-start-rules grammar) + (define start (cf-grammar-start-symbol grammar)) + (for/set ([rule (in-vector (cf-grammar-rules grammar))] + #:when (equal? (cf-production-rule-nonterminal rule) start)) rule)) ;; A (Context-Free-Production-Rule T S L) contains a nonterminal symbol of type S, a label of type L, ;; and a substitution sequence of (Grammar-Symbol T S) values, stored in an immutable vector. -(struct context-free-production-rule (nonterminal label substitution) #:transparent) +(struct cf-production-rule (nonterminal label substitution) #:transparent) ;; A (Grammar-Symbol T S) is either a (Terminal-Symbol T) or a (Nonterminal-Symbol S) @@ -59,9 +57,9 @@ (struct nonterminal-symbol (value) #:transparent) -(define (make-grammar #:rules rules #:start-symbol start) - (context-free-grammar (sequence->vector rules) start)) +(define (make-cf-grammar #:rules rules #:start-symbol start) + (cf-grammar (sequence->vector rules) start)) -(define (make-rule #:symbol symbol #:substitution substitution #:label label) - (context-free-production-rule symbol label (sequence->vector substitution))) +(define (make-cf-production-rule #:symbol symbol #:substitution substitution #:label label) + (cf-production-rule symbol label (sequence->vector substitution))) diff --git a/parser/earley.rkt b/parser/earley.rkt index 048d23e..7c821a2 100644 --- a/parser/earley.rkt +++ b/parser/earley.rkt @@ -6,7 +6,7 @@ (provide (contract-out - [earley-parser (-> context-free-grammar? parser?)])) + [earley-parser (-> cf-grammar? parser?)])) (require racket/contract @@ -102,7 +102,7 @@ (guard (complete-sppf-key? key) then (define tok (vector-ref tokens (complete-sppf-key-input-start key))) (stream (terminal-derivation (token-value tok)))) - (define label (context-free-production-rule-label (incomplete-sppf-key-rule key))) + (define label (cf-production-rule-label (incomplete-sppf-key-rule key))) (define possible-children (possible-children-lists forest key)) (for*/stream ([children (in-stream possible-children)] [processed-children (in-stream (cartesian-stream (map loop children)))]) @@ -118,9 +118,9 @@ (λ (_) 'earley-state) (λ (this) (define rule (earley-state-rule this)) - (define substitution (context-free-production-rule-substitution rule)) + (define substitution (cf-production-rule-substitution rule)) (define pos (earley-state-substitution-position this)) - (append (list (context-free-production-rule-nonterminal rule) '->) + (append (list (cf-production-rule-nonterminal rule) '->) (for/list ([sym (in-vector substitution 0 pos)]) (if (terminal-symbol? sym) (terminal-symbol-value sym) @@ -134,14 +134,14 @@ (define (initial-earley-states grammar) - (for/set ([rule (grammar-start-rules grammar)]) + (for/set ([rule (cf-grammar-start-rules grammar)]) (earley-state rule 0 0 #false))) (define (earley-state-represents-successful-parse? state grammar) (and (zero? (earley-state-input-position state)) - (equal? (context-free-production-rule-nonterminal (earley-state-rule state)) - (context-free-grammar-start-symbol grammar)))) + (equal? (cf-production-rule-nonterminal (earley-state-rule state)) + (cf-grammar-start-symbol grammar)))) (define (earley-parse-datum grammar token-sequence) @@ -164,7 +164,7 @@ (guard (completed-state? next) then ;; find all states in S(j) of the form (X → α • Y β, j) and add (X → α Y • β, j) (define j (earley-state-input-position next)) - (define completed (context-free-production-rule-nonterminal (earley-state-rule next))) + (define completed (cf-production-rule-nonterminal (earley-state-rule next))) (define parent-states (if (equal? j k) (set-union unprocessed processed) @@ -194,13 +194,13 @@ (define (completed-state? state) (match-define (earley-state rule substitution-position _ _) state) (equal? substitution-position - (vector-length (context-free-production-rule-substitution rule)))) + (vector-length (cf-production-rule-substitution rule)))) (define/contract (earley-state-next-symbol state) (-> (and/c earley-state? (not/c completed-state?)) grammar-symbol?) (match-define (earley-state rule substitution-position _ _) state) - (vector-ref (context-free-production-rule-substitution rule) substitution-position)) + (vector-ref (cf-production-rule-substitution rule) substitution-position)) (define (earley-state-advance-substitution state #:key key) @@ -224,8 +224,8 @@ (define (predictor-states grammar nonterminal k) ;; add (Y → • γ, k) for every production in the grammar with Y on the left-hand side - (for/set ([rule (in-vector (context-free-grammar-rules grammar))] - #:when (equal? (context-free-production-rule-nonterminal rule) nonterminal)) + (for/set ([rule (in-vector (cf-grammar-rules grammar))] + #:when (equal? (cf-production-rule-nonterminal rule) nonterminal)) (earley-state rule 0 k #false))) @@ -259,25 +259,31 @@ ;; Grammar and input taken from https://en.wikipedia.org/wiki/Earley_parser#Example (test-case "datum parser" - (define P-rule (make-rule #:symbol 'P #:label 'P #:substitution (list (nonterminal-symbol 'S)))) + (define P-rule + (make-cf-production-rule + #:symbol 'P #:label 'P #:substitution (list (nonterminal-symbol 'S)))) (define S-rule0 - (make-rule + (make-cf-production-rule #:symbol 'S #:label 'S0 #:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M)))) (define S-rule1 - (make-rule #:symbol 'S #:label 'S1 #:substitution (list (nonterminal-symbol 'M)))) + (make-cf-production-rule + #:symbol 'S #:label 'S1 #:substitution (list (nonterminal-symbol 'M)))) (define M-rule0 - (make-rule + (make-cf-production-rule #:symbol 'M #:label 'M0 #:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T)))) (define M-rule1 - (make-rule #:symbol 'M #:label 'M1 #:substitution (list (nonterminal-symbol 'T)))) + (make-cf-production-rule + #:symbol 'M #:label 'M1 #:substitution (list (nonterminal-symbol 'T)))) (define T-rule - (make-rule #:symbol 'T #:label 'T #:substitution (list (terminal-symbol 'number)))) + (make-cf-production-rule + #:symbol 'T #:label 'T #:substitution (list (terminal-symbol 'number)))) (define arithmetic-grammar - (make-grammar #:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P)) + (make-cf-grammar + #:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P)) (define input-tokens (list (token 'number 2) (token '+ 'plus) (token 'number 3) (token '* 'times) (token 'number 4))) @@ -299,35 +305,36 @@ (test-case "syntax parser" (define P-rule - (make-rule + (make-cf-production-rule #:symbol 'P #:label (syntax-label 'P) #:substitution (list (nonterminal-symbol 'S)))) (define S-rule0 - (make-rule + (make-cf-production-rule #:symbol 'S #:label (syntax-label 'S0) #:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M)))) (define S-rule1 - (make-rule + (make-cf-production-rule #:symbol 'S #:label (syntax-label 'S1) #:substitution (list (nonterminal-symbol 'M)))) (define M-rule0 - (make-rule + (make-cf-production-rule #:symbol 'M #:label (syntax-label 'M0) #:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T)))) (define M-rule1 - (make-rule + (make-cf-production-rule #:symbol 'M #:label (syntax-label 'M1) #:substitution (list (nonterminal-symbol 'T)))) (define T-rule - (make-rule + (make-cf-production-rule #:symbol 'T #:label (syntax-label 'T) #:substitution (list (terminal-symbol 'number)))) (define arithmetic-grammar - (make-grammar #:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P)) + (make-cf-grammar + #:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P)) (define input-tokens