From 31174e67f417d4f029f589836544bb9b311586a2 Mon Sep 17 00:00:00 2001 From: Jack Firth Date: Mon, 4 Apr 2022 23:24:15 -0700 Subject: [PATCH] Add support for cuts and splices --- base/derivation.rkt | 212 ++++++++++++++++++++++++++++++++++++-------- base/grammar.rkt | 5 +- parser/earley.rkt | 40 +++++---- 3 files changed, 201 insertions(+), 56 deletions(-) diff --git a/base/derivation.rkt b/base/derivation.rkt index d7c9ff3..4e118d7 100644 --- a/base/derivation.rkt +++ b/base/derivation.rkt @@ -7,6 +7,7 @@ (provide (struct-out terminal-derivation) (struct-out nonterminal-derivation) + (struct-out datum-label) (contract-out [parser-derivation? predicate/c] [parser-derivation-first-terminal (-> parser-derivation? any/c)] @@ -14,8 +15,14 @@ [parser-derivation (case-> (-> any/c terminal-derivation?) - (-> any/c parser-derivation? #:rest (listof parser-derivation?) nonterminal-derivation?))] + (-> derivation-label? parser-derivation? #:rest (listof parser-derivation?) + nonterminal-derivation?))] [parser-derivation->syntax (-> parser-derivation? syntax?)] + [derivation-label? predicate/c] + [cut-label cut-label?] + [cut-label? predicate/c] + [splice-label splice-label?] + [splice-label? predicate/c] [syntax-label? predicate/c] [syntax-label (->* (any/c) (#:properties hash? #:expression-properties hash?) syntax-label?)] [syntax-label-value (-> syntax-label? any/c)] @@ -33,6 +40,7 @@ (module+ test (require (submod "..") + racket/syntax-srcloc rackunit)) @@ -99,7 +107,7 @@ (test-case "nonterminal of terminals" (define derivation (parser-derivation - 'a + (datum-label 'a) (parser-derivation 1) (parser-derivation 2) (parser-derivation 3))) @@ -108,8 +116,8 @@ (test-case "nonterminal of nonterminals and terminals" (define derivation (parser-derivation - 'a - (parser-derivation 'b (parser-derivation 1)) + (datum-label 'a) + (parser-derivation (datum-label 'b) (parser-derivation 1)) (parser-derivation 2) (parser-derivation 3))) (check-equal? (parser-derivation-first-terminal derivation) 1))) @@ -122,7 +130,7 @@ (test-case "nonterminal of terminals" (define derivation (parser-derivation - 'a + (datum-label 'a) (parser-derivation 1) (parser-derivation 2) (parser-derivation 3))) @@ -131,13 +139,28 @@ (test-case "nonterminal of nonterminals and terminals" (define derivation (parser-derivation - 'a + (datum-label 'a) (parser-derivation 1) (parser-derivation 2) - (parser-derivation 'b (parser-derivation 3)))) + (parser-derivation (datum-label 'b) (parser-derivation 3)))) (check-equal? (parser-derivation-last-terminal derivation) 3)))) +(define (derivation-label? v) + (or (cut-label? v) (splice-label? v) (datum-label? v) (syntax-label? v))) + + +(struct cut-label () #:transparent #:constructor-name constructor:cut-label #:omit-define-syntaxes) +(define cut-label (constructor:cut-label)) + + +(struct splice-label () + #:transparent #:constructor-name constructor:splice-label #:omit-define-syntaxes) +(define splice-label (constructor:splice-label)) + + +(struct datum-label (value) #:transparent) + (struct syntax-label (value expression-properties properties) #:transparent #:constructor-name constructor:syntax-label @@ -155,32 +178,149 @@ (define (parser-derivation->syntax derivation) - (match derivation - [(terminal-derivation t) (syntax-token->syntax t)] - [(nonterminal-derivation label children) - (define first-token (parser-derivation-first-terminal derivation)) - (define label-location - (srcloc (syntax-token-source first-token) - (syntax-token-line first-token) - (syntax-token-column first-token) - (syntax-token-position first-token) - 0)) - (define label-stx - (for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)]) - ([(key value) (in-hash (syntax-label-properties label))]) - (syntax-property stx key value))) - (define children-syntaxes - (for/list ([child (in-vector children)]) - (parser-derivation->syntax child))) - (define last-token (parser-derivation-last-terminal derivation)) - (define expression-location - (srcloc (syntax-token-source first-token) - (syntax-token-line first-token) - (syntax-token-column first-token) - (syntax-token-position first-token) - (- (syntax-token-end-position last-token) (syntax-token-position first-token)))) - (define expression-stx - (datum->syntax #false (cons label-stx children-syntaxes) expression-location #false)) - (for/fold ([expression-stx expression-stx]) - ([(key value) (in-hash (syntax-label-expression-properties label))]) - (syntax-property expression-stx key value))])) + (define (->splice derivation) + (match derivation + [(terminal-derivation t) (list (syntax-token->syntax t))] + [(nonterminal-derivation (? cut-label?) _) '()] + [(nonterminal-derivation (? splice-label?) children) + (for*/list ([child (in-vector children)] + [stx (in-list (->splice child))]) + stx)] + [(nonterminal-derivation (? syntax-label? label) children) + (define first-token (parser-derivation-first-terminal derivation)) + (define label-location + (srcloc (syntax-token-source first-token) + (syntax-token-line first-token) + (syntax-token-column first-token) + (syntax-token-position first-token) + 0)) + (define label-stx + (for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)]) + ([(key value) (in-hash (syntax-label-properties label))]) + (syntax-property stx key value))) + (define children-syntaxes + (for*/list ([child (in-vector children)] + [spliced-child (in-list (->splice child))]) + spliced-child)) + (define last-token (parser-derivation-last-terminal derivation)) + (define expression-location + (srcloc (syntax-token-source first-token) + (syntax-token-line first-token) + (syntax-token-column first-token) + (syntax-token-position first-token) + (- (syntax-token-end-position last-token) (syntax-token-position first-token)))) + (define expression-stx + (datum->syntax #false (cons label-stx children-syntaxes) expression-location #false)) + (list (for/fold ([expression-stx expression-stx]) + ([(key value) (in-hash (syntax-label-expression-properties label))]) + (syntax-property expression-stx key value)))])) + (define first-token (parser-derivation-first-terminal derivation)) + (define last-token (parser-derivation-last-terminal derivation)) + (define top-level-location + (srcloc (syntax-token-source first-token) + (syntax-token-line first-token) + (syntax-token-column first-token) + (syntax-token-position first-token) + (- (syntax-token-end-position last-token) (syntax-token-position first-token)))) + (define top-level-stxs (->splice derivation)) + (match top-level-stxs + [(list stx) stx])) + + +(define (parser-derivation->datum derivation) + (define (->splice derivation) + (match derivation + [(terminal-derivation t) (list t)] + [(nonterminal-derivation (? cut-label?) _) '()] + [(nonterminal-derivation (? splice-label?) children) + (for*/list ([child (in-vector children)] + [datum (in-list (->splice child))]) + datum)] + [(nonterminal-derivation (datum-label value) children) + (define child-data + (for*/list ([child (in-vector children)] + [spliced-child (in-list (->splice child))]) + spliced-child)) + (list (cons value child-data))])) + (define top-level-data (->splice derivation)) + (match top-level-data + [(list datum) datum])) + + +(module+ test + (test-case (name-string parser-derivation->datum) + + (test-case "datum terminals" + (define derivation (parser-derivation 'a)) + (check-equal? (parser-derivation->datum derivation) 'a)) + + (test-case "datum nonterminals" + (define derivation + (parser-derivation (datum-label 'a) + (parser-derivation 'b) + (parser-derivation 'c) + (parser-derivation 'd))) + (check-equal? (parser-derivation->datum derivation) '(a b c d))) + + (test-case "datum cuts" + (define derivation + (parser-derivation (datum-label 'a) + (parser-derivation cut-label (parser-derivation 'b)) + (parser-derivation 'c) + (parser-derivation cut-label (parser-derivation 'd)))) + (check-equal? (parser-derivation->datum derivation) '(a c))) + + (test-case "datum splices" + (define derivation + (parser-derivation (datum-label 'a) + (parser-derivation 'b) + (parser-derivation splice-label + (parser-derivation 'c1) + (parser-derivation 'c2) + (parser-derivation 'c3)) + (parser-derivation 'd))) + (check-equal? (parser-derivation->datum derivation) '(a b c1 c2 c3 d)))) + + (test-case (name-string parser-derivation->syntax) + + (test-case "syntax terminals" + (define derivation (parser-derivation (syntax-token 'a #:position 1 #:span 1))) + (define actual (parser-derivation->syntax derivation)) + (check-equal? (syntax->datum actual) 'a) + (check-equal? (syntax-srcloc actual) (srcloc #false #false #false 1 1))) + + (test-case "syntax nonterminals" + (define derivation + (parser-derivation + (syntax-label 'a) + (parser-derivation (syntax-token 'b #:position 1 #:span 1)) + (parser-derivation (syntax-token 'c #:position 2 #:span 1)) + (parser-derivation (syntax-token 'd #:position 3 #:span 1)))) + (define actual (parser-derivation->syntax derivation)) + (check-equal? (syntax->datum actual) '(a b c d)) + (check-equal? (syntax-srcloc actual) (srcloc #false #false #false 1 3))) + + (test-case "syntax cuts" + (define derivation + (parser-derivation + (syntax-label 'a) + (parser-derivation cut-label (parser-derivation (syntax-token 'b #:position 1 #:span 1))) + (parser-derivation (syntax-token 'c #:position 2 #:span 1)) + (parser-derivation cut-label (parser-derivation (syntax-token 'd #:position 3 #:span 1))))) + (define actual (parser-derivation->syntax derivation)) + (check-equal? (syntax->datum actual) '(a c)) + (check-equal? (syntax-srcloc actual) (srcloc #false #false #false 1 3))) + + (test-case "syntax splices" + (define derivation + (parser-derivation + (syntax-label 'a) + (parser-derivation (syntax-token 'b #:position 1 #:span 1)) + (parser-derivation splice-label + (parser-derivation (syntax-token 'c1 #:position 2 #:span 1)) + (parser-derivation (syntax-token 'c2 #:position 3 #:span 1)) + (parser-derivation (syntax-token 'c3 #:position 4 #:span 1))) + (parser-derivation (syntax-token 'd #:position 5 #:span 1)))) + (define actual (parser-derivation->syntax derivation)) + (check-equal? (syntax->datum actual) '(a b c1 c2 c3 d)) + (check-equal? (syntax-srcloc actual) (srcloc #false #false #false 1 5))))) diff --git a/base/grammar.rkt b/base/grammar.rkt index 544ba14..09b00ad 100644 --- a/base/grammar.rkt +++ b/base/grammar.rkt @@ -14,13 +14,14 @@ [cf-grammar-start-rules (-> cf-grammar? (set/c cf-production-rule? #:kind 'immutable))] [make-cf-grammar (-> #:rules (sequence/c cf-production-rule?) #:start-symbol any/c cf-grammar?)] [make-cf-production-rule - (-> #:symbol any/c #:substitution (sequence/c grammar-symbol?) #:label any/c + (-> #:symbol any/c #:substitution (sequence/c grammar-symbol?) #:label derivation-label? cf-production-rule?)])) (require racket/sequence racket/set - rebellion/collection/vector) + rebellion/collection/vector + yaragg/base/derivation) ;@---------------------------------------------------------------------------------------------------- diff --git a/parser/earley.rkt b/parser/earley.rkt index 7c821a2..182729b 100644 --- a/parser/earley.rkt +++ b/parser/earley.rkt @@ -36,7 +36,7 @@ #:syntax-function (λ (tokens) (earley-parse-syntax grammar tokens)))) -;; The hash keys are sppf-labels and the values are a list of sppf-child-pairs +;; The hash keys are sppf-keys and the values are a list of sppf-child-pairs (struct sppf-forest (hash)) @@ -44,15 +44,15 @@ (sppf-forest (make-hash))) -(define (sppf-forest-add-node! forest label) +(define (sppf-forest-add-node! forest key) (define h (sppf-forest-hash forest)) - (unless (hash-has-key? h label) - (hash-set! h label '()))) + (unless (hash-has-key? h key) + (hash-set! h key '()))) -(define (sppf-forest-add-child-pair! forest label #:left left-child #:right right-child) +(define (sppf-forest-add-child-pair! forest key #:left left-child #:right right-child) (define pair (sppf-child-pair left-child right-child)) - (hash-update! (sppf-forest-hash forest) label (λ (children) (cons pair children)) '())) + (hash-update! (sppf-forest-hash forest) key (λ (children) (cons pair children)) '())) ;; SPPF trees walk each rule down the left side, and each right child of the left spine corresponds to @@ -261,26 +261,26 @@ (test-case "datum parser" (define P-rule (make-cf-production-rule - #:symbol 'P #:label 'P #:substitution (list (nonterminal-symbol 'S)))) + #:symbol 'P #:label (datum-label 'P) #:substitution (list (nonterminal-symbol 'S)))) (define S-rule0 (make-cf-production-rule #:symbol 'S - #:label 'S0 + #:label (datum-label 'S0) #:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M)))) (define S-rule1 (make-cf-production-rule - #:symbol 'S #:label 'S1 #:substitution (list (nonterminal-symbol 'M)))) + #:symbol 'S #:label (datum-label 'S1) #:substitution (list (nonterminal-symbol 'M)))) (define M-rule0 (make-cf-production-rule #:symbol 'M - #:label 'M0 + #:label (datum-label 'M0) #:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T)))) (define M-rule1 (make-cf-production-rule - #:symbol 'M #:label 'M1 #:substitution (list (nonterminal-symbol 'T)))) + #:symbol 'M #:label (datum-label 'M1) #:substitution (list (nonterminal-symbol 'T)))) (define T-rule (make-cf-production-rule - #:symbol 'T #:label 'T #:substitution (list (terminal-symbol 'number)))) + #:symbol 'T #:label (datum-label 'T) #:substitution (list (terminal-symbol 'number)))) (define arithmetic-grammar (make-cf-grammar #:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P)) @@ -290,16 +290,20 @@ (define parser (earley-parser arithmetic-grammar)) (define expected-arithmetic-parse-tree (parser-derivation - 'P + (datum-label 'P) (parser-derivation - 'S0 - (parser-derivation 'S1 (parser-derivation 'M1 (parser-derivation 'T (parser-derivation 2)))) + (datum-label 'S0) + (parser-derivation + (datum-label 'S1) + (parser-derivation + (datum-label 'M1) (parser-derivation (datum-label 'T) (parser-derivation 2)))) (parser-derivation 'plus) (parser-derivation - 'M0 - (parser-derivation 'M1 (parser-derivation 'T (parser-derivation 3))) + (datum-label 'M0) + (parser-derivation + (datum-label 'M1) (parser-derivation (datum-label 'T) (parser-derivation 3))) (parser-derivation 'times) - (parser-derivation 'T (parser-derivation 4)))))) + (parser-derivation (datum-label 'T) (parser-derivation 4)))))) (check-equal? (parse-datum parser input-tokens) expected-arithmetic-parse-tree))