From 605d943d5034b6fb540d096fec5cc5c1fbcb25d2 Mon Sep 17 00:00:00 2001 From: Jack Firth Date: Mon, 4 Apr 2022 20:25:41 -0700 Subject: [PATCH] Fix more stuff --- base/derivation.rkt | 101 ++++++++++++++++++++++++++----- private/primitive-grammar.rkt | 109 +++++++++++++++++++--------------- 2 files changed, 147 insertions(+), 63 deletions(-) diff --git a/base/derivation.rkt b/base/derivation.rkt index 83c90e4..d7c9ff3 100644 --- a/base/derivation.rkt +++ b/base/derivation.rkt @@ -7,7 +7,6 @@ (provide (struct-out terminal-derivation) (struct-out nonterminal-derivation) - (struct-out syntax-label) (contract-out [parser-derivation? predicate/c] [parser-derivation-first-terminal (-> parser-derivation? any/c)] @@ -16,16 +15,27 @@ (case-> (-> any/c terminal-derivation?) (-> any/c parser-derivation? #:rest (listof parser-derivation?) nonterminal-derivation?))] - [parser-derivation->syntax (-> parser-derivation? syntax?)])) + [parser-derivation->syntax (-> parser-derivation? syntax?)] + [syntax-label? predicate/c] + [syntax-label (->* (any/c) (#:properties hash? #:expression-properties hash?) syntax-label?)] + [syntax-label-value (-> syntax-label? any/c)] + [syntax-label-properties (-> syntax-label? hash?)] + [syntax-label-expression-properties (-> syntax-label? hash?)])) (require racket/match racket/sequence racket/struct rebellion/collection/vector + rebellion/private/static-name yaragg/base/token) +(module+ test + (require (submod "..") + rackunit)) + + ;@---------------------------------------------------------------------------------------------------- @@ -44,7 +54,7 @@ (struct nonterminal-derivation (label children) #:guard - (let ([contract-guard (struct-guard/c any/c (sequence/c parser-derivation?))]) + (let ([contract-guard (struct-guard/c any/c (sequence/c parser-derivation? #:min-count 1))]) (λ (label children name) (let-values ([(label children) (contract-guard label children name)]) (values label (sequence->vector children))))) @@ -69,37 +79,86 @@ (define (parser-derivation-first-terminal derivation) (match derivation [(terminal-derivation value) value] - [(nonterminal-derivation _ (list first-child _ ...)) + [(nonterminal-derivation _ (vector first-child _ ...)) (parser-derivation-first-terminal first-child)])) (define (parser-derivation-last-terminal derivation) (match derivation [(terminal-derivation value) value] - [(nonterminal-derivation _ (list _ ... last-child)) - (parser-derivation-first-terminal last-child)])) + [(nonterminal-derivation _ (vector _ ... last-child)) + (parser-derivation-last-terminal last-child)])) + + +(module+ test + (test-case (name-string parser-derivation-first-terminal) + + (test-case "terminal" + (check-equal? (parser-derivation-first-terminal (terminal-derivation 1)) 1)) + + (test-case "nonterminal of terminals" + (define derivation + (parser-derivation + 'a + (parser-derivation 1) + (parser-derivation 2) + (parser-derivation 3))) + (check-equal? (parser-derivation-first-terminal derivation) 1)) + + (test-case "nonterminal of nonterminals and terminals" + (define derivation + (parser-derivation + 'a + (parser-derivation 'b (parser-derivation 1)) + (parser-derivation 2) + (parser-derivation 3))) + (check-equal? (parser-derivation-first-terminal derivation) 1))) + + (test-case (name-string parser-derivation-last-terminal) + + (test-case "terminal" + (check-equal? (parser-derivation-last-terminal (terminal-derivation 1)) 1)) + + (test-case "nonterminal of terminals" + (define derivation + (parser-derivation + 'a + (parser-derivation 1) + (parser-derivation 2) + (parser-derivation 3))) + (check-equal? (parser-derivation-last-terminal derivation) 3)) + + (test-case "nonterminal of nonterminals and terminals" + (define derivation + (parser-derivation + 'a + (parser-derivation 1) + (parser-derivation 2) + (parser-derivation 'b (parser-derivation 3)))) + (check-equal? (parser-derivation-last-terminal derivation) 3)))) (struct syntax-label (value expression-properties properties) #:transparent + #:constructor-name constructor:syntax-label + #:omit-define-syntaxes #:guard (struct-guard/c any/c (hash/c any/c any/c #:immutable #true #:flat? #true) (hash/c any/c any/c #:immutable #true #:flat? #true))) +(define (syntax-label value + #:properties [properties (hash)] + #:expression-properties [expression-properties (hash)]) + (constructor:syntax-label value properties expression-properties)) + + (define (parser-derivation->syntax derivation) (match derivation [(terminal-derivation t) (syntax-token->syntax t)] [(nonterminal-derivation label children) (define first-token (parser-derivation-first-terminal derivation)) - (define last-token (parser-derivation-last-terminal derivation)) - (define location - (srcloc (syntax-token-source first-token) - (syntax-token-line first-token) - (syntax-token-column first-token) - (syntax-token-position first-token) - (- (syntax-token-position first-token) (syntax-token-end-position last-token)))) (define label-location (srcloc (syntax-token-source first-token) (syntax-token-line first-token) @@ -110,6 +169,18 @@ (for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)]) ([(key value) (in-hash (syntax-label-properties label))]) (syntax-property stx key value))) - (for/fold ([stx (datum->syntax #false (cons label-stx children) location #false)]) + (define children-syntaxes + (for/list ([child (in-vector children)]) + (parser-derivation->syntax child))) + (define last-token (parser-derivation-last-terminal derivation)) + (define expression-location + (srcloc (syntax-token-source first-token) + (syntax-token-line first-token) + (syntax-token-column first-token) + (syntax-token-position first-token) + (- (syntax-token-end-position last-token) (syntax-token-position first-token)))) + (define expression-stx + (datum->syntax #false (cons label-stx children-syntaxes) expression-location #false)) + (for/fold ([expression-stx expression-stx]) ([(key value) (in-hash (syntax-label-expression-properties label))]) - (syntax-property stx key value))])) \ No newline at end of file + (syntax-property expression-stx key value))])) diff --git a/private/primitive-grammar.rkt b/private/primitive-grammar.rkt index 2798e02..c638857 100644 --- a/private/primitive-grammar.rkt +++ b/private/primitive-grammar.rkt @@ -208,10 +208,10 @@ (vector-set! states (add1 k) next-states))) (define last-state-set (vector-ref states (sub1 position-count))) - (apply stream-append - (for/list ([s (in-set last-state-set)] - #:when (earley-state-represents-successful-parse? s grammar)) - (sppf-forest-derivations forest (earley-state-key s) tokens)))) + (for/set ([s (in-set last-state-set)] + #:when (earley-state-represents-successful-parse? s grammar) + [derivation (in-stream (sppf-forest-derivations forest (earley-state-key s) tokens))]) + derivation)) (define (completed-state? state) @@ -268,6 +268,14 @@ (earley-state-advance-substitution s #:key new-key))) +(define (grammar-parse-to-syntax grammar token-sequence) + (define tokens + (for/vector ([t token-sequence]) + (token (syntax-token-type t) t))) + (for/set ([derivation (in-set (earley-parse grammar tokens))]) + (parser-derivation->syntax derivation))) + + (module+ test (test-case "earley-parse integration test" @@ -297,54 +305,59 @@ (define expected-arithmetic-parse-tree (parser-derivation 'P - (parser-derivation 'S0 - (parser-derivation 'S1 (parser-derivation 'M1 (parser-derivation 'T (parser-derivation 2)))) - (parser-derivation 'plus) - (parser-derivation 'M0 - (parser-derivation 'M1 (parser-derivation 'T (parser-derivation 3))) - (parser-derivation 'times) - (parser-derivation 'T (parser-derivation 4)))))) + (parser-derivation + 'S0 + (parser-derivation 'S1 (parser-derivation 'M1 (parser-derivation 'T (parser-derivation 2)))) + (parser-derivation 'plus) + (parser-derivation + 'M0 + (parser-derivation 'M1 (parser-derivation 'T (parser-derivation 3))) + (parser-derivation 'times) + (parser-derivation 'T (parser-derivation 4)))))) - (check-equal? (stream->list arithmetic-parse-forest) (list expected-arithmetic-parse-tree)))) + (check-equal? arithmetic-parse-forest (set expected-arithmetic-parse-tree)))) -(struct cf-syntax-production-rule (nonterminal label substitution properties label-properties) - #:transparent) +;; Grammar, input, and states taken from https://en.wikipedia.org/wiki/Earley_parser#Example +(define P-rule + (make-rule #:symbol 'P #:label (syntax-label 'P) #:substitution (list (nonterminal-symbol 'S)))) +(define S-rule0 + (make-rule + #:symbol 'S + #:label (syntax-label 'S0) + #:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M)))) -(struct syntax-label (value expression-properties properties) #:transparent) +(define S-rule1 + (make-rule #:symbol 'S #:label (syntax-label 'S1) #:substitution (list (nonterminal-symbol 'M)))) +(define M-rule0 + (make-rule + #:symbol 'M + #:label (syntax-label 'M0) + #:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T)))) -(define (grammar-parse-to-syntax grammar token-sequence) - (define tokens - (for/vector ([t token-sequence]) - (token (syntax-token-type t) t))) - (for/set ([derivation (in-set (earley-parse tokens))]) - (derivation->syntax derivation))) - - -(define (derivation->syntax derivation) - (match derivation - [(terminal-derivation t) (syntax-token->syntax t)] - [(nonterminal-derivation label children) - (define first-token (parser-derivation-first-terminal derivation)) - (define last-token (parser-derivation-last-terminal derivation)) - (define location - (srcloc (syntax-token-source first-token) - (syntax-token-line first-token) - (syntax-token-column first-token) - (syntax-token-position first-token) - (- (syntax-token-position first-token) (syntax-token-end-position last-token)))) - (define label-location - (srcloc (syntax-token-source first-token) - (syntax-token-line first-token) - (syntax-token-column first-token) - (syntax-token-position first-token) - 0)) - (define label-stx - (for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)]) - ([(key value) (in-hash (syntax-label-properties label))]) - (syntax-property stx key value))) - (for/fold ([stx (datum->syntax #false (cons label-stx children) location #false)]) - ([(key value) (in-hash (syntax-label-expression-properties label))]) - (syntax-property stx key value))])) +(define M-rule1 + (make-rule #:symbol 'M #:label (syntax-label 'M1) #:substitution (list (nonterminal-symbol 'T)))) + +(define T-rule + (make-rule #:symbol 'T #:label (syntax-label 'T) #:substitution (list (terminal-symbol 'number)))) + +(define arithmetic-grammar + (make-grammar #:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P)) + + +(define input-tokens + (list + (syntax-token 'number 2 #:position 1 #:span 1) + (syntax-token '+ #:position 2 #:span 1) + (syntax-token 'number 3 #:position 3 #:span 1) + (syntax-token '* #:position 4 #:span 1) + (syntax-token 'number 4 #:position 5 #:span 1))) + + +(grammar-parse-to-syntax arithmetic-grammar input-tokens) + + +(define arithmetic-parse-forest + (grammar-parse-to-syntax arithmetic-grammar input-tokens))