Add support for cuts and splices

remotes/jackfirth/master
Jack Firth 2 years ago
parent 2300abf00c
commit 31174e67f4

@ -7,6 +7,7 @@
(provide (provide
(struct-out terminal-derivation) (struct-out terminal-derivation)
(struct-out nonterminal-derivation) (struct-out nonterminal-derivation)
(struct-out datum-label)
(contract-out (contract-out
[parser-derivation? predicate/c] [parser-derivation? predicate/c]
[parser-derivation-first-terminal (-> parser-derivation? any/c)] [parser-derivation-first-terminal (-> parser-derivation? any/c)]
@ -14,8 +15,14 @@
[parser-derivation [parser-derivation
(case-> (case->
(-> any/c terminal-derivation?) (-> any/c terminal-derivation?)
(-> any/c parser-derivation? #:rest (listof parser-derivation?) nonterminal-derivation?))] (-> derivation-label? parser-derivation? #:rest (listof parser-derivation?)
nonterminal-derivation?))]
[parser-derivation->syntax (-> parser-derivation? syntax?)] [parser-derivation->syntax (-> parser-derivation? syntax?)]
[derivation-label? predicate/c]
[cut-label cut-label?]
[cut-label? predicate/c]
[splice-label splice-label?]
[splice-label? predicate/c]
[syntax-label? predicate/c] [syntax-label? predicate/c]
[syntax-label (->* (any/c) (#:properties hash? #:expression-properties hash?) syntax-label?)] [syntax-label (->* (any/c) (#:properties hash? #:expression-properties hash?) syntax-label?)]
[syntax-label-value (-> syntax-label? any/c)] [syntax-label-value (-> syntax-label? any/c)]
@ -33,6 +40,7 @@
(module+ test (module+ test
(require (submod "..") (require (submod "..")
racket/syntax-srcloc
rackunit)) rackunit))
@ -99,7 +107,7 @@
(test-case "nonterminal of terminals" (test-case "nonterminal of terminals"
(define derivation (define derivation
(parser-derivation (parser-derivation
'a (datum-label 'a)
(parser-derivation 1) (parser-derivation 1)
(parser-derivation 2) (parser-derivation 2)
(parser-derivation 3))) (parser-derivation 3)))
@ -108,8 +116,8 @@
(test-case "nonterminal of nonterminals and terminals" (test-case "nonterminal of nonterminals and terminals"
(define derivation (define derivation
(parser-derivation (parser-derivation
'a (datum-label 'a)
(parser-derivation 'b (parser-derivation 1)) (parser-derivation (datum-label 'b) (parser-derivation 1))
(parser-derivation 2) (parser-derivation 2)
(parser-derivation 3))) (parser-derivation 3)))
(check-equal? (parser-derivation-first-terminal derivation) 1))) (check-equal? (parser-derivation-first-terminal derivation) 1)))
@ -122,7 +130,7 @@
(test-case "nonterminal of terminals" (test-case "nonterminal of terminals"
(define derivation (define derivation
(parser-derivation (parser-derivation
'a (datum-label 'a)
(parser-derivation 1) (parser-derivation 1)
(parser-derivation 2) (parser-derivation 2)
(parser-derivation 3))) (parser-derivation 3)))
@ -131,13 +139,28 @@
(test-case "nonterminal of nonterminals and terminals" (test-case "nonterminal of nonterminals and terminals"
(define derivation (define derivation
(parser-derivation (parser-derivation
'a (datum-label 'a)
(parser-derivation 1) (parser-derivation 1)
(parser-derivation 2) (parser-derivation 2)
(parser-derivation 'b (parser-derivation 3)))) (parser-derivation (datum-label 'b) (parser-derivation 3))))
(check-equal? (parser-derivation-last-terminal derivation) 3)))) (check-equal? (parser-derivation-last-terminal derivation) 3))))
(define (derivation-label? v)
(or (cut-label? v) (splice-label? v) (datum-label? v) (syntax-label? v)))
(struct cut-label () #:transparent #:constructor-name constructor:cut-label #:omit-define-syntaxes)
(define cut-label (constructor:cut-label))
(struct splice-label ()
#:transparent #:constructor-name constructor:splice-label #:omit-define-syntaxes)
(define splice-label (constructor:splice-label))
(struct datum-label (value) #:transparent)
(struct syntax-label (value expression-properties properties) (struct syntax-label (value expression-properties properties)
#:transparent #:transparent
#:constructor-name constructor:syntax-label #:constructor-name constructor:syntax-label
@ -155,32 +178,149 @@
(define (parser-derivation->syntax derivation) (define (parser-derivation->syntax derivation)
(match derivation (define (->splice derivation)
[(terminal-derivation t) (syntax-token->syntax t)] (match derivation
[(nonterminal-derivation label children) [(terminal-derivation t) (list (syntax-token->syntax t))]
(define first-token (parser-derivation-first-terminal derivation)) [(nonterminal-derivation (? cut-label?) _) '()]
(define label-location [(nonterminal-derivation (? splice-label?) children)
(srcloc (syntax-token-source first-token) (for*/list ([child (in-vector children)]
(syntax-token-line first-token) [stx (in-list (->splice child))])
(syntax-token-column first-token) stx)]
(syntax-token-position first-token) [(nonterminal-derivation (? syntax-label? label) children)
0)) (define first-token (parser-derivation-first-terminal derivation))
(define label-stx (define label-location
(for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)]) (srcloc (syntax-token-source first-token)
([(key value) (in-hash (syntax-label-properties label))]) (syntax-token-line first-token)
(syntax-property stx key value))) (syntax-token-column first-token)
(define children-syntaxes (syntax-token-position first-token)
(for/list ([child (in-vector children)]) 0))
(parser-derivation->syntax child))) (define label-stx
(define last-token (parser-derivation-last-terminal derivation)) (for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)])
(define expression-location ([(key value) (in-hash (syntax-label-properties label))])
(srcloc (syntax-token-source first-token) (syntax-property stx key value)))
(syntax-token-line first-token) (define children-syntaxes
(syntax-token-column first-token) (for*/list ([child (in-vector children)]
(syntax-token-position first-token) [spliced-child (in-list (->splice child))])
(- (syntax-token-end-position last-token) (syntax-token-position first-token)))) spliced-child))
(define expression-stx (define last-token (parser-derivation-last-terminal derivation))
(datum->syntax #false (cons label-stx children-syntaxes) expression-location #false)) (define expression-location
(for/fold ([expression-stx expression-stx]) (srcloc (syntax-token-source first-token)
([(key value) (in-hash (syntax-label-expression-properties label))]) (syntax-token-line first-token)
(syntax-property expression-stx key value))])) (syntax-token-column first-token)
(syntax-token-position first-token)
(- (syntax-token-end-position last-token) (syntax-token-position first-token))))
(define expression-stx
(datum->syntax #false (cons label-stx children-syntaxes) expression-location #false))
(list (for/fold ([expression-stx expression-stx])
([(key value) (in-hash (syntax-label-expression-properties label))])
(syntax-property expression-stx key value)))]))
(define first-token (parser-derivation-first-terminal derivation))
(define last-token (parser-derivation-last-terminal derivation))
(define top-level-location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
(- (syntax-token-end-position last-token) (syntax-token-position first-token))))
(define top-level-stxs (->splice derivation))
(match top-level-stxs
[(list stx) stx]))
(define (parser-derivation->datum derivation)
(define (->splice derivation)
(match derivation
[(terminal-derivation t) (list t)]
[(nonterminal-derivation (? cut-label?) _) '()]
[(nonterminal-derivation (? splice-label?) children)
(for*/list ([child (in-vector children)]
[datum (in-list (->splice child))])
datum)]
[(nonterminal-derivation (datum-label value) children)
(define child-data
(for*/list ([child (in-vector children)]
[spliced-child (in-list (->splice child))])
spliced-child))
(list (cons value child-data))]))
(define top-level-data (->splice derivation))
(match top-level-data
[(list datum) datum]))
(module+ test
(test-case (name-string parser-derivation->datum)
(test-case "datum terminals"
(define derivation (parser-derivation 'a))
(check-equal? (parser-derivation->datum derivation) 'a))
(test-case "datum nonterminals"
(define derivation
(parser-derivation (datum-label 'a)
(parser-derivation 'b)
(parser-derivation 'c)
(parser-derivation 'd)))
(check-equal? (parser-derivation->datum derivation) '(a b c d)))
(test-case "datum cuts"
(define derivation
(parser-derivation (datum-label 'a)
(parser-derivation cut-label (parser-derivation 'b))
(parser-derivation 'c)
(parser-derivation cut-label (parser-derivation 'd))))
(check-equal? (parser-derivation->datum derivation) '(a c)))
(test-case "datum splices"
(define derivation
(parser-derivation (datum-label 'a)
(parser-derivation 'b)
(parser-derivation splice-label
(parser-derivation 'c1)
(parser-derivation 'c2)
(parser-derivation 'c3))
(parser-derivation 'd)))
(check-equal? (parser-derivation->datum derivation) '(a b c1 c2 c3 d))))
(test-case (name-string parser-derivation->syntax)
(test-case "syntax terminals"
(define derivation (parser-derivation (syntax-token 'a #:position 1 #:span 1)))
(define actual (parser-derivation->syntax derivation))
(check-equal? (syntax->datum actual) 'a)
(check-equal? (syntax-srcloc actual) (srcloc #false #false #false 1 1)))
(test-case "syntax nonterminals"
(define derivation
(parser-derivation
(syntax-label 'a)
(parser-derivation (syntax-token 'b #:position 1 #:span 1))
(parser-derivation (syntax-token 'c #:position 2 #:span 1))
(parser-derivation (syntax-token 'd #:position 3 #:span 1))))
(define actual (parser-derivation->syntax derivation))
(check-equal? (syntax->datum actual) '(a b c d))
(check-equal? (syntax-srcloc actual) (srcloc #false #false #false 1 3)))
(test-case "syntax cuts"
(define derivation
(parser-derivation
(syntax-label 'a)
(parser-derivation cut-label (parser-derivation (syntax-token 'b #:position 1 #:span 1)))
(parser-derivation (syntax-token 'c #:position 2 #:span 1))
(parser-derivation cut-label (parser-derivation (syntax-token 'd #:position 3 #:span 1)))))
(define actual (parser-derivation->syntax derivation))
(check-equal? (syntax->datum actual) '(a c))
(check-equal? (syntax-srcloc actual) (srcloc #false #false #false 1 3)))
(test-case "syntax splices"
(define derivation
(parser-derivation
(syntax-label 'a)
(parser-derivation (syntax-token 'b #:position 1 #:span 1))
(parser-derivation splice-label
(parser-derivation (syntax-token 'c1 #:position 2 #:span 1))
(parser-derivation (syntax-token 'c2 #:position 3 #:span 1))
(parser-derivation (syntax-token 'c3 #:position 4 #:span 1)))
(parser-derivation (syntax-token 'd #:position 5 #:span 1))))
(define actual (parser-derivation->syntax derivation))
(check-equal? (syntax->datum actual) '(a b c1 c2 c3 d))
(check-equal? (syntax-srcloc actual) (srcloc #false #false #false 1 5)))))

@ -14,13 +14,14 @@
[cf-grammar-start-rules (-> cf-grammar? (set/c cf-production-rule? #:kind 'immutable))] [cf-grammar-start-rules (-> cf-grammar? (set/c cf-production-rule? #:kind 'immutable))]
[make-cf-grammar (-> #:rules (sequence/c cf-production-rule?) #:start-symbol any/c cf-grammar?)] [make-cf-grammar (-> #:rules (sequence/c cf-production-rule?) #:start-symbol any/c cf-grammar?)]
[make-cf-production-rule [make-cf-production-rule
(-> #:symbol any/c #:substitution (sequence/c grammar-symbol?) #:label any/c (-> #:symbol any/c #:substitution (sequence/c grammar-symbol?) #:label derivation-label?
cf-production-rule?)])) cf-production-rule?)]))
(require racket/sequence (require racket/sequence
racket/set racket/set
rebellion/collection/vector) rebellion/collection/vector
yaragg/base/derivation)
;@---------------------------------------------------------------------------------------------------- ;@----------------------------------------------------------------------------------------------------

@ -36,7 +36,7 @@
#:syntax-function (λ (tokens) (earley-parse-syntax grammar tokens)))) #:syntax-function (λ (tokens) (earley-parse-syntax grammar tokens))))
;; The hash keys are sppf-labels and the values are a list of sppf-child-pairs ;; The hash keys are sppf-keys and the values are a list of sppf-child-pairs
(struct sppf-forest (hash)) (struct sppf-forest (hash))
@ -44,15 +44,15 @@
(sppf-forest (make-hash))) (sppf-forest (make-hash)))
(define (sppf-forest-add-node! forest label) (define (sppf-forest-add-node! forest key)
(define h (sppf-forest-hash forest)) (define h (sppf-forest-hash forest))
(unless (hash-has-key? h label) (unless (hash-has-key? h key)
(hash-set! h label '()))) (hash-set! h key '())))
(define (sppf-forest-add-child-pair! forest label #:left left-child #:right right-child) (define (sppf-forest-add-child-pair! forest key #:left left-child #:right right-child)
(define pair (sppf-child-pair left-child right-child)) (define pair (sppf-child-pair left-child right-child))
(hash-update! (sppf-forest-hash forest) label (λ (children) (cons pair children)) '())) (hash-update! (sppf-forest-hash forest) key (λ (children) (cons pair children)) '()))
;; SPPF trees walk each rule down the left side, and each right child of the left spine corresponds to ;; SPPF trees walk each rule down the left side, and each right child of the left spine corresponds to
@ -261,26 +261,26 @@
(test-case "datum parser" (test-case "datum parser"
(define P-rule (define P-rule
(make-cf-production-rule (make-cf-production-rule
#:symbol 'P #:label 'P #:substitution (list (nonterminal-symbol 'S)))) #:symbol 'P #:label (datum-label 'P) #:substitution (list (nonterminal-symbol 'S))))
(define S-rule0 (define S-rule0
(make-cf-production-rule (make-cf-production-rule
#:symbol 'S #:symbol 'S
#:label 'S0 #:label (datum-label 'S0)
#:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M)))) #:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M))))
(define S-rule1 (define S-rule1
(make-cf-production-rule (make-cf-production-rule
#:symbol 'S #:label 'S1 #:substitution (list (nonterminal-symbol 'M)))) #:symbol 'S #:label (datum-label 'S1) #:substitution (list (nonterminal-symbol 'M))))
(define M-rule0 (define M-rule0
(make-cf-production-rule (make-cf-production-rule
#:symbol 'M #:symbol 'M
#:label 'M0 #:label (datum-label 'M0)
#:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T)))) #:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T))))
(define M-rule1 (define M-rule1
(make-cf-production-rule (make-cf-production-rule
#:symbol 'M #:label 'M1 #:substitution (list (nonterminal-symbol 'T)))) #:symbol 'M #:label (datum-label 'M1) #:substitution (list (nonterminal-symbol 'T))))
(define T-rule (define T-rule
(make-cf-production-rule (make-cf-production-rule
#:symbol 'T #:label 'T #:substitution (list (terminal-symbol 'number)))) #:symbol 'T #:label (datum-label 'T) #:substitution (list (terminal-symbol 'number))))
(define arithmetic-grammar (define arithmetic-grammar
(make-cf-grammar (make-cf-grammar
#:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P)) #:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P))
@ -290,16 +290,20 @@
(define parser (earley-parser arithmetic-grammar)) (define parser (earley-parser arithmetic-grammar))
(define expected-arithmetic-parse-tree (define expected-arithmetic-parse-tree
(parser-derivation (parser-derivation
'P (datum-label 'P)
(parser-derivation (parser-derivation
'S0 (datum-label 'S0)
(parser-derivation 'S1 (parser-derivation 'M1 (parser-derivation 'T (parser-derivation 2)))) (parser-derivation
(datum-label 'S1)
(parser-derivation
(datum-label 'M1) (parser-derivation (datum-label 'T) (parser-derivation 2))))
(parser-derivation 'plus) (parser-derivation 'plus)
(parser-derivation (parser-derivation
'M0 (datum-label 'M0)
(parser-derivation 'M1 (parser-derivation 'T (parser-derivation 3))) (parser-derivation
(datum-label 'M1) (parser-derivation (datum-label 'T) (parser-derivation 3)))
(parser-derivation 'times) (parser-derivation 'times)
(parser-derivation 'T (parser-derivation 4)))))) (parser-derivation (datum-label 'T) (parser-derivation 4))))))
(check-equal? (parse-datum parser input-tokens) expected-arithmetic-parse-tree)) (check-equal? (parse-datum parser input-tokens) expected-arithmetic-parse-tree))

Loading…
Cancel
Save