From fa729383683cd49b6aa1045827bf3190d0c3ff43 Mon Sep 17 00:00:00 2001 From: Jack Firth Date: Tue, 5 Apr 2022 16:04:07 -0700 Subject: [PATCH] Derive datum and syntax parsers together --- base/derivation.rkt | 59 ++++++++++---------- parser.rkt | 53 +++++++++--------- parser/earley.rkt | 128 ++++++++++++-------------------------------- 3 files changed, 92 insertions(+), 148 deletions(-) diff --git a/base/derivation.rkt b/base/derivation.rkt index 73d72d3..de8b141 100644 --- a/base/derivation.rkt +++ b/base/derivation.rkt @@ -17,6 +17,7 @@ (-> semantic-action? parser-derivation? #:rest (listof parser-derivation?) nonterminal-derivation?))] [parser-derivation->syntax (-> parser-derivation? syntax?)] + [parser-derivation->datum (-> parser-derivation? any/c)] [semantic-action? predicate/c] [cut-action cut-action?] [cut-action? predicate/c] @@ -46,6 +47,35 @@ ;@---------------------------------------------------------------------------------------------------- +(define (semantic-action? v) + (or (cut-action? v) (splice-action? v) (label-action? v))) + + +(struct cut-action () #:transparent #:constructor-name constructor:cut-action #:omit-define-syntaxes) +(define cut-action (constructor:cut-action)) + + +(struct splice-action () + #:transparent #:constructor-name constructor:splice-action #:omit-define-syntaxes) +(define splice-action (constructor:splice-action)) + + +(struct label-action (value expression-properties properties) + #:transparent + #:constructor-name constructor:label-action + #:omit-define-syntaxes + #:guard + (struct-guard/c any/c + (hash/c any/c any/c #:immutable #true #:flat? #true) + (hash/c any/c any/c #:immutable #true #:flat? #true))) + + +(define (label-action value + #:properties [properties (hash)] + #:expression-properties [expression-properties (hash)]) + (constructor:label-action value properties expression-properties)) + + (define (parser-derivation? v) (or (terminal-derivation? v) (nonterminal-derivation? v))) @@ -146,35 +176,6 @@ (check-equal? (parser-derivation-last-terminal derivation) 3)))) -(define (semantic-action? v) - (or (cut-action? v) (splice-action? v) (label-action? v))) - - -(struct cut-action () #:transparent #:constructor-name constructor:cut-action #:omit-define-syntaxes) -(define cut-action (constructor:cut-action)) - - -(struct splice-action () - #:transparent #:constructor-name constructor:splice-action #:omit-define-syntaxes) -(define splice-action (constructor:splice-action)) - - -(struct label-action (value expression-properties properties) - #:transparent - #:constructor-name constructor:label-action - #:omit-define-syntaxes - #:guard - (struct-guard/c any/c - (hash/c any/c any/c #:immutable #true #:flat? #true) - (hash/c any/c any/c #:immutable #true #:flat? #true))) - - -(define (label-action value - #:properties [properties (hash)] - #:expression-properties [expression-properties (hash)]) - (constructor:label-action value properties expression-properties)) - - (define (parser-derivation->syntax derivation) (define (->splice derivation) (match derivation diff --git a/parser.rkt b/parser.rkt index 6a7c009..09b76e2 100644 --- a/parser.rkt +++ b/parser.rkt @@ -7,9 +7,9 @@ (provide (contract-out [parser? predicate/c] - [parse-datum (-> parser? (sequence/c token?) parser-derivation?)] + [parse-datum (-> parser? (sequence/c token?) any/c)] [parse-syntax (-> parser? (sequence/c syntax-token?) syntax?)] - [parse-ambiguous-datum (-> parser? (sequence/c token?) (set/c parser-derivation?))] + [parse-ambiguous-datum (-> parser? (sequence/c token?) (set/c any/c))] [parse-ambiguous-syntax (-> parser? (sequence/c syntax-token?) (set/c syntax?))])) @@ -17,9 +17,7 @@ (provide (contract-out [make-parser - (-> #:datum-function (-> (sequence/c token?) (stream/c parser-derivation?)) - #:syntax-function (-> (sequence/c syntax-token?) (stream/c syntax?)) - parser?)]))) + (-> #:deriver (-> (sequence/c token?) (stream/c parser-derivation?)) parser?)]))) (require racket/sequence @@ -32,48 +30,53 @@ ;@---------------------------------------------------------------------------------------------------- -(struct parser (datum-function syntax-function)) +(struct parser (deriver)) -(define (make-parser #:datum-function datum-function #:syntax-function syntax-function) - (parser datum-function syntax-function)) +(define (make-parser #:deriver deriver) + (parser deriver)) (define (parse-ambiguous-syntax p tokens) - (for/set ([stx (in-stream ((parser-syntax-function p) tokens))]) - stx)) + (for/set ([derivation (in-stream ((parser-deriver p) tokens))]) + (parser-derivation->syntax derivation))) (define (parse-ambiguous-datum p tokens) - (for/set ([derivation (in-stream ((parser-datum-function p) tokens))]) - derivation)) + (for/set ([derivation (in-stream ((parser-deriver p) tokens))]) + (parser-derivation->datum derivation))) -(define (parse-syntax p tokens) - (define stx-stream ((parser-syntax-function p) tokens)) - (when (stream-empty? stx-stream) +(define (parse-syntax p token-sequence) + (define tokens + (for/vector ([t token-sequence]) + (token (syntax-token-type t) t))) + (define derivations ((parser-deriver p) tokens)) + (when (stream-empty? derivations) (raise-arguments-error 'parse-syntax "no parse trees produced" "parser" p "tokens" tokens)) - (define stx (stream-first stx-stream)) - (unless (stream-empty? (stream-rest stx-stream)) + (define stx (parser-derivation->syntax (stream-first derivations))) + (unless (stream-empty? (stream-rest derivations)) (raise-arguments-error 'parse-syntax "ambiguous parse, multiple parse trees produced" "parser" p "tokens" tokens "first parse tree" stx - "second parse tree" (stream-first (stream-rest stx-stream)))) + "second parse tree" + (parser-derivation->syntax (stream-first (stream-rest derivations))))) stx) (define (parse-datum p tokens) - (define derivation-stream ((parser-datum-function p) tokens)) - (when (stream-empty? derivation-stream) + (define derivations ((parser-deriver p) tokens)) + (when (stream-empty? derivations) (raise-arguments-error 'parse-datum "no parse trees produced" "parser" p "tokens" tokens)) - (define derivation (stream-first derivation-stream)) - (unless (stream-empty? (stream-rest derivation-stream)) + (define datum (parser-derivation->datum (stream-first derivations))) + (unless (stream-empty? (stream-rest derivations)) (raise-arguments-error 'parse-datum "ambiguous parse, multiple parse trees produced" "parser" p "tokens" tokens - "first parse tree" derivation - "second parse tree" (stream-first (stream-rest derivation-stream)))) - derivation) + "first parse tree" datum + "second parse tree" + (parser-derivation->datum (stream-first (stream-rest derivations))))) + datum) diff --git a/parser/earley.rkt b/parser/earley.rkt index e24de91..f31b8f6 100644 --- a/parser/earley.rkt +++ b/parser/earley.rkt @@ -32,8 +32,7 @@ (define (earley-parser grammar) - (make-parser #:datum-function (λ (tokens) (earley-parse-datum grammar tokens)) - #:syntax-function (λ (tokens) (earley-parse-syntax grammar tokens)))) + (make-parser #:deriver (λ (tokens) (earley-parse grammar tokens)))) ;; The hash keys are sppf-keys and the values are a list of sppf-child-pairs @@ -102,11 +101,11 @@ (guard (complete-sppf-key? key) then (define tok (vector-ref tokens (complete-sppf-key-input-start key))) (stream (terminal-derivation (token-value tok)))) - (define label (cf-production-rule-action (incomplete-sppf-key-rule key))) + (define action (cf-production-rule-action (incomplete-sppf-key-rule key))) (define possible-children (possible-children-lists forest key)) (for*/stream ([children (in-stream possible-children)] [processed-children (in-stream (cartesian-stream (map loop children)))]) - (nonterminal-derivation label processed-children))))) + (nonterminal-derivation action processed-children))))) (struct earley-state (rule substitution-position input-position key) @@ -144,7 +143,7 @@ (cf-grammar-start-symbol grammar)))) -(define (earley-parse-datum grammar token-sequence) +(define (earley-parse grammar token-sequence) (define tokens (sequence->vector token-sequence)) (define token-count (vector-length tokens)) (define position-count (add1 token-count)) @@ -185,9 +184,9 @@ (vector-set! states (add1 k) next-states))) (define last-state-set (vector-ref states (sub1 position-count))) - (for/set ([s (in-set last-state-set)] - #:when (earley-state-represents-successful-parse? s grammar) - [derivation (in-stream (sppf-forest-derivations forest (earley-state-key s) tokens))]) + (for/stream ([s (in-set last-state-set)] + #:when (earley-state-represents-successful-parse? s grammar) + [derivation (in-stream (sppf-forest-derivations forest (earley-state-key s) tokens))]) derivation)) @@ -245,102 +244,46 @@ (earley-state-advance-substitution s #:key new-key))) -(define (earley-parse-syntax grammar token-sequence) - (define tokens - (for/vector ([t token-sequence]) - (token (syntax-token-type t) t))) - (for/set ([derivation (in-set (earley-parse-datum grammar tokens))]) - (parser-derivation->syntax derivation))) - - (module+ test (test-case "earley-parser integration test" ;; Grammar and input taken from https://en.wikipedia.org/wiki/Earley_parser#Example + (define P-rule + (make-cf-production-rule + #:symbol 'P #:action (label-action 'P) #:substitution (list (nonterminal-symbol 'S)))) + (define S-rule0 + (make-cf-production-rule + #:symbol 'S + #:action (label-action 'S0) + #:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M)))) + (define S-rule1 + (make-cf-production-rule + #:symbol 'S #:action (label-action 'S1) #:substitution (list (nonterminal-symbol 'M)))) + (define M-rule0 + (make-cf-production-rule + #:symbol 'M + #:action (label-action 'M0) + #:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T)))) + (define M-rule1 + (make-cf-production-rule + #:symbol 'M #:action (label-action 'M1) #:substitution (list (nonterminal-symbol 'T)))) + (define T-rule + (make-cf-production-rule + #:symbol 'T #:action (label-action 'T) #:substitution (list (terminal-symbol 'number)))) + (define arithmetic-grammar + (make-cf-grammar + #:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P)) + (define parser (earley-parser arithmetic-grammar)) (test-case "datum parser" - (define P-rule - (make-cf-production-rule - #:symbol 'P #:action (label-action 'P) #:substitution (list (nonterminal-symbol 'S)))) - (define S-rule0 - (make-cf-production-rule - #:symbol 'S - #:action (label-action 'S0) - #:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M)))) - (define S-rule1 - (make-cf-production-rule - #:symbol 'S #:action (label-action 'S1) #:substitution (list (nonterminal-symbol 'M)))) - (define M-rule0 - (make-cf-production-rule - #:symbol 'M - #:action (label-action 'M0) - #:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T)))) - (define M-rule1 - (make-cf-production-rule - #:symbol 'M #:action (label-action 'M1) #:substitution (list (nonterminal-symbol 'T)))) - (define T-rule - (make-cf-production-rule - #:symbol 'T #:action (label-action 'T) #:substitution (list (terminal-symbol 'number)))) - (define arithmetic-grammar - (make-cf-grammar - #:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P)) (define input-tokens (list (token 'number 2) (token '+ 'plus) (token 'number 3) (token '* 'times) (token 'number 4))) - (define parser (earley-parser arithmetic-grammar)) (define expected-arithmetic-parse-tree - (parser-derivation - (label-action 'P) - (parser-derivation - (label-action 'S0) - (parser-derivation - (label-action 'S1) - (parser-derivation - (label-action 'M1) (parser-derivation (label-action 'T) (parser-derivation 2)))) - (parser-derivation 'plus) - (parser-derivation - (label-action 'M0) - (parser-derivation - (label-action 'M1) (parser-derivation (label-action 'T) (parser-derivation 3))) - (parser-derivation 'times) - (parser-derivation (label-action 'T) (parser-derivation 4)))))) - + '(P (S0 (S1 (M1 (T 2))) plus (M0 (M1 (T 3)) times (T 4))))) (check-equal? (parse-datum parser input-tokens) expected-arithmetic-parse-tree)) (test-case "syntax parser" - (define P-rule - (make-cf-production-rule - #:symbol 'P #:action (label-action 'P) #:substitution (list (nonterminal-symbol 'S)))) - - (define S-rule0 - (make-cf-production-rule - #:symbol 'S - #:action (label-action 'S0) - #:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M)))) - - (define S-rule1 - (make-cf-production-rule - #:symbol 'S #:action (label-action 'S1) #:substitution (list (nonterminal-symbol 'M)))) - - (define M-rule0 - (make-cf-production-rule - #:symbol 'M - #:action (label-action 'M0) - #:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T)))) - - (define M-rule1 - (make-cf-production-rule - #:symbol 'M #:action (label-action 'M1) #:substitution (list (nonterminal-symbol 'T)))) - - (define T-rule - (make-cf-production-rule - #:symbol 'T #:action (label-action 'T) #:substitution (list (terminal-symbol 'number)))) - - (define arithmetic-grammar - (make-cf-grammar - #:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P)) - - (define input-tokens (list (syntax-token 'number 2 #:position 1 #:span 1) @@ -348,8 +291,5 @@ (syntax-token 'number 3 #:position 3 #:span 1) (syntax-token '* #:position 4 #:span 1) (syntax-token 'number 4 #:position 5 #:span 1))) - - (define parser (earley-parser arithmetic-grammar)) - (check-equal? (syntax->datum (parse-syntax parser input-tokens)) '(P (S0 (S1 (M1 (T 2))) + (M0 (M1 (T 3)) * (T 4))))))))