Fix more stuff

remotes/jackfirth/master
Jack Firth 3 years ago
parent 9c6624f19e
commit 605d943d50

@ -7,7 +7,6 @@
(provide
(struct-out terminal-derivation)
(struct-out nonterminal-derivation)
(struct-out syntax-label)
(contract-out
[parser-derivation? predicate/c]
[parser-derivation-first-terminal (-> parser-derivation? any/c)]
@ -16,16 +15,27 @@
(case->
(-> any/c terminal-derivation?)
(-> any/c parser-derivation? #:rest (listof parser-derivation?) nonterminal-derivation?))]
[parser-derivation->syntax (-> parser-derivation? syntax?)]))
[parser-derivation->syntax (-> parser-derivation? syntax?)]
[syntax-label? predicate/c]
[syntax-label (->* (any/c) (#:properties hash? #:expression-properties hash?) syntax-label?)]
[syntax-label-value (-> syntax-label? any/c)]
[syntax-label-properties (-> syntax-label? hash?)]
[syntax-label-expression-properties (-> syntax-label? hash?)]))
(require racket/match
racket/sequence
racket/struct
rebellion/collection/vector
rebellion/private/static-name
yaragg/base/token)
(module+ test
(require (submod "..")
rackunit))
;@----------------------------------------------------------------------------------------------------
@ -44,7 +54,7 @@
(struct nonterminal-derivation (label children)
#:guard
(let ([contract-guard (struct-guard/c any/c (sequence/c parser-derivation?))])
(let ([contract-guard (struct-guard/c any/c (sequence/c parser-derivation? #:min-count 1))])
(λ (label children name)
(let-values ([(label children) (contract-guard label children name)])
(values label (sequence->vector children)))))
@ -69,37 +79,86 @@
(define (parser-derivation-first-terminal derivation)
(match derivation
[(terminal-derivation value) value]
[(nonterminal-derivation _ (list first-child _ ...))
[(nonterminal-derivation _ (vector first-child _ ...))
(parser-derivation-first-terminal first-child)]))
(define (parser-derivation-last-terminal derivation)
(match derivation
[(terminal-derivation value) value]
[(nonterminal-derivation _ (list _ ... last-child))
(parser-derivation-first-terminal last-child)]))
[(nonterminal-derivation _ (vector _ ... last-child))
(parser-derivation-last-terminal last-child)]))
(module+ test
(test-case (name-string parser-derivation-first-terminal)
(test-case "terminal"
(check-equal? (parser-derivation-first-terminal (terminal-derivation 1)) 1))
(test-case "nonterminal of terminals"
(define derivation
(parser-derivation
'a
(parser-derivation 1)
(parser-derivation 2)
(parser-derivation 3)))
(check-equal? (parser-derivation-first-terminal derivation) 1))
(test-case "nonterminal of nonterminals and terminals"
(define derivation
(parser-derivation
'a
(parser-derivation 'b (parser-derivation 1))
(parser-derivation 2)
(parser-derivation 3)))
(check-equal? (parser-derivation-first-terminal derivation) 1)))
(test-case (name-string parser-derivation-last-terminal)
(test-case "terminal"
(check-equal? (parser-derivation-last-terminal (terminal-derivation 1)) 1))
(test-case "nonterminal of terminals"
(define derivation
(parser-derivation
'a
(parser-derivation 1)
(parser-derivation 2)
(parser-derivation 3)))
(check-equal? (parser-derivation-last-terminal derivation) 3))
(test-case "nonterminal of nonterminals and terminals"
(define derivation
(parser-derivation
'a
(parser-derivation 1)
(parser-derivation 2)
(parser-derivation 'b (parser-derivation 3))))
(check-equal? (parser-derivation-last-terminal derivation) 3))))
(struct syntax-label (value expression-properties properties)
#:transparent
#:constructor-name constructor:syntax-label
#:omit-define-syntaxes
#:guard
(struct-guard/c any/c
(hash/c any/c any/c #:immutable #true #:flat? #true)
(hash/c any/c any/c #:immutable #true #:flat? #true)))
(define (syntax-label value
#:properties [properties (hash)]
#:expression-properties [expression-properties (hash)])
(constructor:syntax-label value properties expression-properties))
(define (parser-derivation->syntax derivation)
(match derivation
[(terminal-derivation t) (syntax-token->syntax t)]
[(nonterminal-derivation label children)
(define first-token (parser-derivation-first-terminal derivation))
(define last-token (parser-derivation-last-terminal derivation))
(define location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
(- (syntax-token-position first-token) (syntax-token-end-position last-token))))
(define label-location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
@ -110,6 +169,18 @@
(for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)])
([(key value) (in-hash (syntax-label-properties label))])
(syntax-property stx key value)))
(for/fold ([stx (datum->syntax #false (cons label-stx children) location #false)])
(define children-syntaxes
(for/list ([child (in-vector children)])
(parser-derivation->syntax child)))
(define last-token (parser-derivation-last-terminal derivation))
(define expression-location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
(- (syntax-token-end-position last-token) (syntax-token-position first-token))))
(define expression-stx
(datum->syntax #false (cons label-stx children-syntaxes) expression-location #false))
(for/fold ([expression-stx expression-stx])
([(key value) (in-hash (syntax-label-expression-properties label))])
(syntax-property stx key value))]))
(syntax-property expression-stx key value))]))

@ -208,10 +208,10 @@
(vector-set! states (add1 k) next-states)))
(define last-state-set (vector-ref states (sub1 position-count)))
(apply stream-append
(for/list ([s (in-set last-state-set)]
#:when (earley-state-represents-successful-parse? s grammar))
(sppf-forest-derivations forest (earley-state-key s) tokens))))
(for/set ([s (in-set last-state-set)]
#:when (earley-state-represents-successful-parse? s grammar)
[derivation (in-stream (sppf-forest-derivations forest (earley-state-key s) tokens))])
derivation))
(define (completed-state? state)
@ -268,6 +268,14 @@
(earley-state-advance-substitution s #:key new-key)))
(define (grammar-parse-to-syntax grammar token-sequence)
(define tokens
(for/vector ([t token-sequence])
(token (syntax-token-type t) t)))
(for/set ([derivation (in-set (earley-parse grammar tokens))])
(parser-derivation->syntax derivation)))
(module+ test
(test-case "earley-parse integration test"
@ -297,54 +305,59 @@
(define expected-arithmetic-parse-tree
(parser-derivation
'P
(parser-derivation 'S0
(parser-derivation
'S0
(parser-derivation 'S1 (parser-derivation 'M1 (parser-derivation 'T (parser-derivation 2))))
(parser-derivation 'plus)
(parser-derivation 'M0
(parser-derivation
'M0
(parser-derivation 'M1 (parser-derivation 'T (parser-derivation 3)))
(parser-derivation 'times)
(parser-derivation 'T (parser-derivation 4))))))
(check-equal? (stream->list arithmetic-parse-forest) (list expected-arithmetic-parse-tree))))
(check-equal? arithmetic-parse-forest (set expected-arithmetic-parse-tree))))
(struct cf-syntax-production-rule (nonterminal label substitution properties label-properties)
#:transparent)
;; Grammar, input, and states taken from https://en.wikipedia.org/wiki/Earley_parser#Example
(define P-rule
(make-rule #:symbol 'P #:label (syntax-label 'P) #:substitution (list (nonterminal-symbol 'S))))
(define S-rule0
(make-rule
#:symbol 'S
#:label (syntax-label 'S0)
#:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M))))
(struct syntax-label (value expression-properties properties) #:transparent)
(define S-rule1
(make-rule #:symbol 'S #:label (syntax-label 'S1) #:substitution (list (nonterminal-symbol 'M))))
(define M-rule0
(make-rule
#:symbol 'M
#:label (syntax-label 'M0)
#:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T))))
(define (grammar-parse-to-syntax grammar token-sequence)
(define tokens
(for/vector ([t token-sequence])
(token (syntax-token-type t) t)))
(for/set ([derivation (in-set (earley-parse tokens))])
(derivation->syntax derivation)))
(define (derivation->syntax derivation)
(match derivation
[(terminal-derivation t) (syntax-token->syntax t)]
[(nonterminal-derivation label children)
(define first-token (parser-derivation-first-terminal derivation))
(define last-token (parser-derivation-last-terminal derivation))
(define location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
(- (syntax-token-position first-token) (syntax-token-end-position last-token))))
(define label-location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
0))
(define label-stx
(for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)])
([(key value) (in-hash (syntax-label-properties label))])
(syntax-property stx key value)))
(for/fold ([stx (datum->syntax #false (cons label-stx children) location #false)])
([(key value) (in-hash (syntax-label-expression-properties label))])
(syntax-property stx key value))]))
(define M-rule1
(make-rule #:symbol 'M #:label (syntax-label 'M1) #:substitution (list (nonterminal-symbol 'T))))
(define T-rule
(make-rule #:symbol 'T #:label (syntax-label 'T) #:substitution (list (terminal-symbol 'number))))
(define arithmetic-grammar
(make-grammar #:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P))
(define input-tokens
(list
(syntax-token 'number 2 #:position 1 #:span 1)
(syntax-token '+ #:position 2 #:span 1)
(syntax-token 'number 3 #:position 3 #:span 1)
(syntax-token '* #:position 4 #:span 1)
(syntax-token 'number 4 #:position 5 #:span 1)))
(grammar-parse-to-syntax arithmetic-grammar input-tokens)
(define arithmetic-parse-forest
(grammar-parse-to-syntax arithmetic-grammar input-tokens))

Loading…
Cancel
Save