Add support for cuts and splices

remotes/jackfirth/master
Jack Firth 2 years ago
parent 2300abf00c
commit 31174e67f4

@ -7,6 +7,7 @@
(provide
(struct-out terminal-derivation)
(struct-out nonterminal-derivation)
(struct-out datum-label)
(contract-out
[parser-derivation? predicate/c]
[parser-derivation-first-terminal (-> parser-derivation? any/c)]
@ -14,8 +15,14 @@
[parser-derivation
(case->
(-> any/c terminal-derivation?)
(-> any/c parser-derivation? #:rest (listof parser-derivation?) nonterminal-derivation?))]
(-> derivation-label? parser-derivation? #:rest (listof parser-derivation?)
nonterminal-derivation?))]
[parser-derivation->syntax (-> parser-derivation? syntax?)]
[derivation-label? predicate/c]
[cut-label cut-label?]
[cut-label? predicate/c]
[splice-label splice-label?]
[splice-label? predicate/c]
[syntax-label? predicate/c]
[syntax-label (->* (any/c) (#:properties hash? #:expression-properties hash?) syntax-label?)]
[syntax-label-value (-> syntax-label? any/c)]
@ -33,6 +40,7 @@
(module+ test
(require (submod "..")
racket/syntax-srcloc
rackunit))
@ -99,7 +107,7 @@
(test-case "nonterminal of terminals"
(define derivation
(parser-derivation
'a
(datum-label 'a)
(parser-derivation 1)
(parser-derivation 2)
(parser-derivation 3)))
@ -108,8 +116,8 @@
(test-case "nonterminal of nonterminals and terminals"
(define derivation
(parser-derivation
'a
(parser-derivation 'b (parser-derivation 1))
(datum-label 'a)
(parser-derivation (datum-label 'b) (parser-derivation 1))
(parser-derivation 2)
(parser-derivation 3)))
(check-equal? (parser-derivation-first-terminal derivation) 1)))
@ -122,7 +130,7 @@
(test-case "nonterminal of terminals"
(define derivation
(parser-derivation
'a
(datum-label 'a)
(parser-derivation 1)
(parser-derivation 2)
(parser-derivation 3)))
@ -131,13 +139,28 @@
(test-case "nonterminal of nonterminals and terminals"
(define derivation
(parser-derivation
'a
(datum-label 'a)
(parser-derivation 1)
(parser-derivation 2)
(parser-derivation 'b (parser-derivation 3))))
(parser-derivation (datum-label 'b) (parser-derivation 3))))
(check-equal? (parser-derivation-last-terminal derivation) 3))))
(define (derivation-label? v)
(or (cut-label? v) (splice-label? v) (datum-label? v) (syntax-label? v)))
(struct cut-label () #:transparent #:constructor-name constructor:cut-label #:omit-define-syntaxes)
(define cut-label (constructor:cut-label))
(struct splice-label ()
#:transparent #:constructor-name constructor:splice-label #:omit-define-syntaxes)
(define splice-label (constructor:splice-label))
(struct datum-label (value) #:transparent)
(struct syntax-label (value expression-properties properties)
#:transparent
#:constructor-name constructor:syntax-label
@ -155,32 +178,149 @@
(define (parser-derivation->syntax derivation)
(match derivation
[(terminal-derivation t) (syntax-token->syntax t)]
[(nonterminal-derivation label children)
(define first-token (parser-derivation-first-terminal derivation))
(define label-location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
0))
(define label-stx
(for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)])
([(key value) (in-hash (syntax-label-properties label))])
(syntax-property stx key value)))
(define children-syntaxes
(for/list ([child (in-vector children)])
(parser-derivation->syntax child)))
(define last-token (parser-derivation-last-terminal derivation))
(define expression-location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
(- (syntax-token-end-position last-token) (syntax-token-position first-token))))
(define expression-stx
(datum->syntax #false (cons label-stx children-syntaxes) expression-location #false))
(for/fold ([expression-stx expression-stx])
([(key value) (in-hash (syntax-label-expression-properties label))])
(syntax-property expression-stx key value))]))
(define (->splice derivation)
(match derivation
[(terminal-derivation t) (list (syntax-token->syntax t))]
[(nonterminal-derivation (? cut-label?) _) '()]
[(nonterminal-derivation (? splice-label?) children)
(for*/list ([child (in-vector children)]
[stx (in-list (->splice child))])
stx)]
[(nonterminal-derivation (? syntax-label? label) children)
(define first-token (parser-derivation-first-terminal derivation))
(define label-location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
0))
(define label-stx
(for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)])
([(key value) (in-hash (syntax-label-properties label))])
(syntax-property stx key value)))
(define children-syntaxes
(for*/list ([child (in-vector children)]
[spliced-child (in-list (->splice child))])
spliced-child))
(define last-token (parser-derivation-last-terminal derivation))
(define expression-location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
(- (syntax-token-end-position last-token) (syntax-token-position first-token))))
(define expression-stx
(datum->syntax #false (cons label-stx children-syntaxes) expression-location #false))
(list (for/fold ([expression-stx expression-stx])
([(key value) (in-hash (syntax-label-expression-properties label))])
(syntax-property expression-stx key value)))]))
(define first-token (parser-derivation-first-terminal derivation))
(define last-token (parser-derivation-last-terminal derivation))
(define top-level-location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
(- (syntax-token-end-position last-token) (syntax-token-position first-token))))
(define top-level-stxs (->splice derivation))
(match top-level-stxs
[(list stx) stx]))
(define (parser-derivation->datum derivation)
(define (->splice derivation)
(match derivation
[(terminal-derivation t) (list t)]
[(nonterminal-derivation (? cut-label?) _) '()]
[(nonterminal-derivation (? splice-label?) children)
(for*/list ([child (in-vector children)]
[datum (in-list (->splice child))])
datum)]
[(nonterminal-derivation (datum-label value) children)
(define child-data
(for*/list ([child (in-vector children)]
[spliced-child (in-list (->splice child))])
spliced-child))
(list (cons value child-data))]))
(define top-level-data (->splice derivation))
(match top-level-data
[(list datum) datum]))
(module+ test
(test-case (name-string parser-derivation->datum)
(test-case "datum terminals"
(define derivation (parser-derivation 'a))
(check-equal? (parser-derivation->datum derivation) 'a))
(test-case "datum nonterminals"
(define derivation
(parser-derivation (datum-label 'a)
(parser-derivation 'b)
(parser-derivation 'c)
(parser-derivation 'd)))
(check-equal? (parser-derivation->datum derivation) '(a b c d)))
(test-case "datum cuts"
(define derivation
(parser-derivation (datum-label 'a)
(parser-derivation cut-label (parser-derivation 'b))
(parser-derivation 'c)
(parser-derivation cut-label (parser-derivation 'd))))
(check-equal? (parser-derivation->datum derivation) '(a c)))
(test-case "datum splices"
(define derivation
(parser-derivation (datum-label 'a)
(parser-derivation 'b)
(parser-derivation splice-label
(parser-derivation 'c1)
(parser-derivation 'c2)
(parser-derivation 'c3))
(parser-derivation 'd)))
(check-equal? (parser-derivation->datum derivation) '(a b c1 c2 c3 d))))
(test-case (name-string parser-derivation->syntax)
(test-case "syntax terminals"
(define derivation (parser-derivation (syntax-token 'a #:position 1 #:span 1)))
(define actual (parser-derivation->syntax derivation))
(check-equal? (syntax->datum actual) 'a)
(check-equal? (syntax-srcloc actual) (srcloc #false #false #false 1 1)))
(test-case "syntax nonterminals"
(define derivation
(parser-derivation
(syntax-label 'a)
(parser-derivation (syntax-token 'b #:position 1 #:span 1))
(parser-derivation (syntax-token 'c #:position 2 #:span 1))
(parser-derivation (syntax-token 'd #:position 3 #:span 1))))
(define actual (parser-derivation->syntax derivation))
(check-equal? (syntax->datum actual) '(a b c d))
(check-equal? (syntax-srcloc actual) (srcloc #false #false #false 1 3)))
(test-case "syntax cuts"
(define derivation
(parser-derivation
(syntax-label 'a)
(parser-derivation cut-label (parser-derivation (syntax-token 'b #:position 1 #:span 1)))
(parser-derivation (syntax-token 'c #:position 2 #:span 1))
(parser-derivation cut-label (parser-derivation (syntax-token 'd #:position 3 #:span 1)))))
(define actual (parser-derivation->syntax derivation))
(check-equal? (syntax->datum actual) '(a c))
(check-equal? (syntax-srcloc actual) (srcloc #false #false #false 1 3)))
(test-case "syntax splices"
(define derivation
(parser-derivation
(syntax-label 'a)
(parser-derivation (syntax-token 'b #:position 1 #:span 1))
(parser-derivation splice-label
(parser-derivation (syntax-token 'c1 #:position 2 #:span 1))
(parser-derivation (syntax-token 'c2 #:position 3 #:span 1))
(parser-derivation (syntax-token 'c3 #:position 4 #:span 1)))
(parser-derivation (syntax-token 'd #:position 5 #:span 1))))
(define actual (parser-derivation->syntax derivation))
(check-equal? (syntax->datum actual) '(a b c1 c2 c3 d))
(check-equal? (syntax-srcloc actual) (srcloc #false #false #false 1 5)))))

@ -14,13 +14,14 @@
[cf-grammar-start-rules (-> cf-grammar? (set/c cf-production-rule? #:kind 'immutable))]
[make-cf-grammar (-> #:rules (sequence/c cf-production-rule?) #:start-symbol any/c cf-grammar?)]
[make-cf-production-rule
(-> #:symbol any/c #:substitution (sequence/c grammar-symbol?) #:label any/c
(-> #:symbol any/c #:substitution (sequence/c grammar-symbol?) #:label derivation-label?
cf-production-rule?)]))
(require racket/sequence
racket/set
rebellion/collection/vector)
rebellion/collection/vector
yaragg/base/derivation)
;@----------------------------------------------------------------------------------------------------

@ -36,7 +36,7 @@
#:syntax-function (λ (tokens) (earley-parse-syntax grammar tokens))))
;; The hash keys are sppf-labels and the values are a list of sppf-child-pairs
;; The hash keys are sppf-keys and the values are a list of sppf-child-pairs
(struct sppf-forest (hash))
@ -44,15 +44,15 @@
(sppf-forest (make-hash)))
(define (sppf-forest-add-node! forest label)
(define (sppf-forest-add-node! forest key)
(define h (sppf-forest-hash forest))
(unless (hash-has-key? h label)
(hash-set! h label '())))
(unless (hash-has-key? h key)
(hash-set! h key '())))
(define (sppf-forest-add-child-pair! forest label #:left left-child #:right right-child)
(define (sppf-forest-add-child-pair! forest key #:left left-child #:right right-child)
(define pair (sppf-child-pair left-child right-child))
(hash-update! (sppf-forest-hash forest) label (λ (children) (cons pair children)) '()))
(hash-update! (sppf-forest-hash forest) key (λ (children) (cons pair children)) '()))
;; SPPF trees walk each rule down the left side, and each right child of the left spine corresponds to
@ -261,26 +261,26 @@
(test-case "datum parser"
(define P-rule
(make-cf-production-rule
#:symbol 'P #:label 'P #:substitution (list (nonterminal-symbol 'S))))
#:symbol 'P #:label (datum-label 'P) #:substitution (list (nonterminal-symbol 'S))))
(define S-rule0
(make-cf-production-rule
#:symbol 'S
#:label 'S0
#:label (datum-label 'S0)
#:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M))))
(define S-rule1
(make-cf-production-rule
#:symbol 'S #:label 'S1 #:substitution (list (nonterminal-symbol 'M))))
#:symbol 'S #:label (datum-label 'S1) #:substitution (list (nonterminal-symbol 'M))))
(define M-rule0
(make-cf-production-rule
#:symbol 'M
#:label 'M0
#:label (datum-label 'M0)
#:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T))))
(define M-rule1
(make-cf-production-rule
#:symbol 'M #:label 'M1 #:substitution (list (nonterminal-symbol 'T))))
#:symbol 'M #:label (datum-label 'M1) #:substitution (list (nonterminal-symbol 'T))))
(define T-rule
(make-cf-production-rule
#:symbol 'T #:label 'T #:substitution (list (terminal-symbol 'number))))
#:symbol 'T #:label (datum-label 'T) #:substitution (list (terminal-symbol 'number))))
(define arithmetic-grammar
(make-cf-grammar
#:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P))
@ -290,16 +290,20 @@
(define parser (earley-parser arithmetic-grammar))
(define expected-arithmetic-parse-tree
(parser-derivation
'P
(datum-label 'P)
(parser-derivation
'S0
(parser-derivation 'S1 (parser-derivation 'M1 (parser-derivation 'T (parser-derivation 2))))
(datum-label 'S0)
(parser-derivation
(datum-label 'S1)
(parser-derivation
(datum-label 'M1) (parser-derivation (datum-label 'T) (parser-derivation 2))))
(parser-derivation 'plus)
(parser-derivation
'M0
(parser-derivation 'M1 (parser-derivation 'T (parser-derivation 3)))
(datum-label 'M0)
(parser-derivation
(datum-label 'M1) (parser-derivation (datum-label 'T) (parser-derivation 3)))
(parser-derivation 'times)
(parser-derivation 'T (parser-derivation 4))))))
(parser-derivation (datum-label 'T) (parser-derivation 4))))))
(check-equal? (parse-datum parser input-tokens) expected-arithmetic-parse-tree))

Loading…
Cancel
Save