Derive datum and syntax parsers together

remotes/jackfirth/master
Jack Firth 2 years ago
parent 2e53bca272
commit fa72938368

@ -17,6 +17,7 @@
(-> semantic-action? parser-derivation? #:rest (listof parser-derivation?) (-> semantic-action? parser-derivation? #:rest (listof parser-derivation?)
nonterminal-derivation?))] nonterminal-derivation?))]
[parser-derivation->syntax (-> parser-derivation? syntax?)] [parser-derivation->syntax (-> parser-derivation? syntax?)]
[parser-derivation->datum (-> parser-derivation? any/c)]
[semantic-action? predicate/c] [semantic-action? predicate/c]
[cut-action cut-action?] [cut-action cut-action?]
[cut-action? predicate/c] [cut-action? predicate/c]
@ -46,6 +47,35 @@
;@---------------------------------------------------------------------------------------------------- ;@----------------------------------------------------------------------------------------------------
(define (semantic-action? v)
(or (cut-action? v) (splice-action? v) (label-action? v)))
(struct cut-action () #:transparent #:constructor-name constructor:cut-action #:omit-define-syntaxes)
(define cut-action (constructor:cut-action))
(struct splice-action ()
#:transparent #:constructor-name constructor:splice-action #:omit-define-syntaxes)
(define splice-action (constructor:splice-action))
(struct label-action (value expression-properties properties)
#:transparent
#:constructor-name constructor:label-action
#:omit-define-syntaxes
#:guard
(struct-guard/c any/c
(hash/c any/c any/c #:immutable #true #:flat? #true)
(hash/c any/c any/c #:immutable #true #:flat? #true)))
(define (label-action value
#:properties [properties (hash)]
#:expression-properties [expression-properties (hash)])
(constructor:label-action value properties expression-properties))
(define (parser-derivation? v) (define (parser-derivation? v)
(or (terminal-derivation? v) (nonterminal-derivation? v))) (or (terminal-derivation? v) (nonterminal-derivation? v)))
@ -146,35 +176,6 @@
(check-equal? (parser-derivation-last-terminal derivation) 3)))) (check-equal? (parser-derivation-last-terminal derivation) 3))))
(define (semantic-action? v)
(or (cut-action? v) (splice-action? v) (label-action? v)))
(struct cut-action () #:transparent #:constructor-name constructor:cut-action #:omit-define-syntaxes)
(define cut-action (constructor:cut-action))
(struct splice-action ()
#:transparent #:constructor-name constructor:splice-action #:omit-define-syntaxes)
(define splice-action (constructor:splice-action))
(struct label-action (value expression-properties properties)
#:transparent
#:constructor-name constructor:label-action
#:omit-define-syntaxes
#:guard
(struct-guard/c any/c
(hash/c any/c any/c #:immutable #true #:flat? #true)
(hash/c any/c any/c #:immutable #true #:flat? #true)))
(define (label-action value
#:properties [properties (hash)]
#:expression-properties [expression-properties (hash)])
(constructor:label-action value properties expression-properties))
(define (parser-derivation->syntax derivation) (define (parser-derivation->syntax derivation)
(define (->splice derivation) (define (->splice derivation)
(match derivation (match derivation

@ -7,9 +7,9 @@
(provide (provide
(contract-out (contract-out
[parser? predicate/c] [parser? predicate/c]
[parse-datum (-> parser? (sequence/c token?) parser-derivation?)] [parse-datum (-> parser? (sequence/c token?) any/c)]
[parse-syntax (-> parser? (sequence/c syntax-token?) syntax?)] [parse-syntax (-> parser? (sequence/c syntax-token?) syntax?)]
[parse-ambiguous-datum (-> parser? (sequence/c token?) (set/c parser-derivation?))] [parse-ambiguous-datum (-> parser? (sequence/c token?) (set/c any/c))]
[parse-ambiguous-syntax (-> parser? (sequence/c syntax-token?) (set/c syntax?))])) [parse-ambiguous-syntax (-> parser? (sequence/c syntax-token?) (set/c syntax?))]))
@ -17,9 +17,7 @@
(provide (provide
(contract-out (contract-out
[make-parser [make-parser
(-> #:datum-function (-> (sequence/c token?) (stream/c parser-derivation?)) (-> #:deriver (-> (sequence/c token?) (stream/c parser-derivation?)) parser?)])))
#:syntax-function (-> (sequence/c syntax-token?) (stream/c syntax?))
parser?)])))
(require racket/sequence (require racket/sequence
@ -32,48 +30,53 @@
;@---------------------------------------------------------------------------------------------------- ;@----------------------------------------------------------------------------------------------------
(struct parser (datum-function syntax-function)) (struct parser (deriver))
(define (make-parser #:datum-function datum-function #:syntax-function syntax-function) (define (make-parser #:deriver deriver)
(parser datum-function syntax-function)) (parser deriver))
(define (parse-ambiguous-syntax p tokens) (define (parse-ambiguous-syntax p tokens)
(for/set ([stx (in-stream ((parser-syntax-function p) tokens))]) (for/set ([derivation (in-stream ((parser-deriver p) tokens))])
stx)) (parser-derivation->syntax derivation)))
(define (parse-ambiguous-datum p tokens) (define (parse-ambiguous-datum p tokens)
(for/set ([derivation (in-stream ((parser-datum-function p) tokens))]) (for/set ([derivation (in-stream ((parser-deriver p) tokens))])
derivation)) (parser-derivation->datum derivation)))
(define (parse-syntax p tokens) (define (parse-syntax p token-sequence)
(define stx-stream ((parser-syntax-function p) tokens)) (define tokens
(when (stream-empty? stx-stream) (for/vector ([t token-sequence])
(token (syntax-token-type t) t)))
(define derivations ((parser-deriver p) tokens))
(when (stream-empty? derivations)
(raise-arguments-error 'parse-syntax "no parse trees produced" "parser" p "tokens" tokens)) (raise-arguments-error 'parse-syntax "no parse trees produced" "parser" p "tokens" tokens))
(define stx (stream-first stx-stream)) (define stx (parser-derivation->syntax (stream-first derivations)))
(unless (stream-empty? (stream-rest stx-stream)) (unless (stream-empty? (stream-rest derivations))
(raise-arguments-error 'parse-syntax (raise-arguments-error 'parse-syntax
"ambiguous parse, multiple parse trees produced" "ambiguous parse, multiple parse trees produced"
"parser" p "parser" p
"tokens" tokens "tokens" tokens
"first parse tree" stx "first parse tree" stx
"second parse tree" (stream-first (stream-rest stx-stream)))) "second parse tree"
(parser-derivation->syntax (stream-first (stream-rest derivations)))))
stx) stx)
(define (parse-datum p tokens) (define (parse-datum p tokens)
(define derivation-stream ((parser-datum-function p) tokens)) (define derivations ((parser-deriver p) tokens))
(when (stream-empty? derivation-stream) (when (stream-empty? derivations)
(raise-arguments-error 'parse-datum "no parse trees produced" "parser" p "tokens" tokens)) (raise-arguments-error 'parse-datum "no parse trees produced" "parser" p "tokens" tokens))
(define derivation (stream-first derivation-stream)) (define datum (parser-derivation->datum (stream-first derivations)))
(unless (stream-empty? (stream-rest derivation-stream)) (unless (stream-empty? (stream-rest derivations))
(raise-arguments-error 'parse-datum (raise-arguments-error 'parse-datum
"ambiguous parse, multiple parse trees produced" "ambiguous parse, multiple parse trees produced"
"parser" p "parser" p
"tokens" tokens "tokens" tokens
"first parse tree" derivation "first parse tree" datum
"second parse tree" (stream-first (stream-rest derivation-stream)))) "second parse tree"
derivation) (parser-derivation->datum (stream-first (stream-rest derivations)))))
datum)

@ -32,8 +32,7 @@
(define (earley-parser grammar) (define (earley-parser grammar)
(make-parser #:datum-function (λ (tokens) (earley-parse-datum grammar tokens)) (make-parser #:deriver (λ (tokens) (earley-parse grammar tokens))))
#:syntax-function (λ (tokens) (earley-parse-syntax grammar tokens))))
;; The hash keys are sppf-keys and the values are a list of sppf-child-pairs ;; The hash keys are sppf-keys and the values are a list of sppf-child-pairs
@ -102,11 +101,11 @@
(guard (complete-sppf-key? key) then (guard (complete-sppf-key? key) then
(define tok (vector-ref tokens (complete-sppf-key-input-start key))) (define tok (vector-ref tokens (complete-sppf-key-input-start key)))
(stream (terminal-derivation (token-value tok)))) (stream (terminal-derivation (token-value tok))))
(define label (cf-production-rule-action (incomplete-sppf-key-rule key))) (define action (cf-production-rule-action (incomplete-sppf-key-rule key)))
(define possible-children (possible-children-lists forest key)) (define possible-children (possible-children-lists forest key))
(for*/stream ([children (in-stream possible-children)] (for*/stream ([children (in-stream possible-children)]
[processed-children (in-stream (cartesian-stream (map loop children)))]) [processed-children (in-stream (cartesian-stream (map loop children)))])
(nonterminal-derivation label processed-children))))) (nonterminal-derivation action processed-children)))))
(struct earley-state (rule substitution-position input-position key) (struct earley-state (rule substitution-position input-position key)
@ -144,7 +143,7 @@
(cf-grammar-start-symbol grammar)))) (cf-grammar-start-symbol grammar))))
(define (earley-parse-datum grammar token-sequence) (define (earley-parse grammar token-sequence)
(define tokens (sequence->vector token-sequence)) (define tokens (sequence->vector token-sequence))
(define token-count (vector-length tokens)) (define token-count (vector-length tokens))
(define position-count (add1 token-count)) (define position-count (add1 token-count))
@ -185,9 +184,9 @@
(vector-set! states (add1 k) next-states))) (vector-set! states (add1 k) next-states)))
(define last-state-set (vector-ref states (sub1 position-count))) (define last-state-set (vector-ref states (sub1 position-count)))
(for/set ([s (in-set last-state-set)] (for/stream ([s (in-set last-state-set)]
#:when (earley-state-represents-successful-parse? s grammar) #:when (earley-state-represents-successful-parse? s grammar)
[derivation (in-stream (sppf-forest-derivations forest (earley-state-key s) tokens))]) [derivation (in-stream (sppf-forest-derivations forest (earley-state-key s) tokens))])
derivation)) derivation))
@ -245,102 +244,46 @@
(earley-state-advance-substitution s #:key new-key))) (earley-state-advance-substitution s #:key new-key)))
(define (earley-parse-syntax grammar token-sequence)
(define tokens
(for/vector ([t token-sequence])
(token (syntax-token-type t) t)))
(for/set ([derivation (in-set (earley-parse-datum grammar tokens))])
(parser-derivation->syntax derivation)))
(module+ test (module+ test
(test-case "earley-parser integration test" (test-case "earley-parser integration test"
;; Grammar and input taken from https://en.wikipedia.org/wiki/Earley_parser#Example ;; Grammar and input taken from https://en.wikipedia.org/wiki/Earley_parser#Example
(define P-rule
(make-cf-production-rule
#:symbol 'P #:action (label-action 'P) #:substitution (list (nonterminal-symbol 'S))))
(define S-rule0
(make-cf-production-rule
#:symbol 'S
#:action (label-action 'S0)
#:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M))))
(define S-rule1
(make-cf-production-rule
#:symbol 'S #:action (label-action 'S1) #:substitution (list (nonterminal-symbol 'M))))
(define M-rule0
(make-cf-production-rule
#:symbol 'M
#:action (label-action 'M0)
#:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T))))
(define M-rule1
(make-cf-production-rule
#:symbol 'M #:action (label-action 'M1) #:substitution (list (nonterminal-symbol 'T))))
(define T-rule
(make-cf-production-rule
#:symbol 'T #:action (label-action 'T) #:substitution (list (terminal-symbol 'number))))
(define arithmetic-grammar
(make-cf-grammar
#:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P))
(define parser (earley-parser arithmetic-grammar))
(test-case "datum parser" (test-case "datum parser"
(define P-rule
(make-cf-production-rule
#:symbol 'P #:action (label-action 'P) #:substitution (list (nonterminal-symbol 'S))))
(define S-rule0
(make-cf-production-rule
#:symbol 'S
#:action (label-action 'S0)
#:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M))))
(define S-rule1
(make-cf-production-rule
#:symbol 'S #:action (label-action 'S1) #:substitution (list (nonterminal-symbol 'M))))
(define M-rule0
(make-cf-production-rule
#:symbol 'M
#:action (label-action 'M0)
#:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T))))
(define M-rule1
(make-cf-production-rule
#:symbol 'M #:action (label-action 'M1) #:substitution (list (nonterminal-symbol 'T))))
(define T-rule
(make-cf-production-rule
#:symbol 'T #:action (label-action 'T) #:substitution (list (terminal-symbol 'number))))
(define arithmetic-grammar
(make-cf-grammar
#:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P))
(define input-tokens (define input-tokens
(list (list
(token 'number 2) (token '+ 'plus) (token 'number 3) (token '* 'times) (token 'number 4))) (token 'number 2) (token '+ 'plus) (token 'number 3) (token '* 'times) (token 'number 4)))
(define parser (earley-parser arithmetic-grammar))
(define expected-arithmetic-parse-tree (define expected-arithmetic-parse-tree
(parser-derivation '(P (S0 (S1 (M1 (T 2))) plus (M0 (M1 (T 3)) times (T 4)))))
(label-action 'P)
(parser-derivation
(label-action 'S0)
(parser-derivation
(label-action 'S1)
(parser-derivation
(label-action 'M1) (parser-derivation (label-action 'T) (parser-derivation 2))))
(parser-derivation 'plus)
(parser-derivation
(label-action 'M0)
(parser-derivation
(label-action 'M1) (parser-derivation (label-action 'T) (parser-derivation 3)))
(parser-derivation 'times)
(parser-derivation (label-action 'T) (parser-derivation 4))))))
(check-equal? (parse-datum parser input-tokens) expected-arithmetic-parse-tree)) (check-equal? (parse-datum parser input-tokens) expected-arithmetic-parse-tree))
(test-case "syntax parser" (test-case "syntax parser"
(define P-rule
(make-cf-production-rule
#:symbol 'P #:action (label-action 'P) #:substitution (list (nonterminal-symbol 'S))))
(define S-rule0
(make-cf-production-rule
#:symbol 'S
#:action (label-action 'S0)
#:substitution (list (nonterminal-symbol 'S) (terminal-symbol '+) (nonterminal-symbol 'M))))
(define S-rule1
(make-cf-production-rule
#:symbol 'S #:action (label-action 'S1) #:substitution (list (nonterminal-symbol 'M))))
(define M-rule0
(make-cf-production-rule
#:symbol 'M
#:action (label-action 'M0)
#:substitution (list (nonterminal-symbol 'M) (terminal-symbol '*) (nonterminal-symbol 'T))))
(define M-rule1
(make-cf-production-rule
#:symbol 'M #:action (label-action 'M1) #:substitution (list (nonterminal-symbol 'T))))
(define T-rule
(make-cf-production-rule
#:symbol 'T #:action (label-action 'T) #:substitution (list (terminal-symbol 'number))))
(define arithmetic-grammar
(make-cf-grammar
#:rules (list P-rule S-rule0 S-rule1 M-rule0 M-rule1 T-rule) #:start-symbol 'P))
(define input-tokens (define input-tokens
(list (list
(syntax-token 'number 2 #:position 1 #:span 1) (syntax-token 'number 2 #:position 1 #:span 1)
@ -348,8 +291,5 @@
(syntax-token 'number 3 #:position 3 #:span 1) (syntax-token 'number 3 #:position 3 #:span 1)
(syntax-token '* #:position 4 #:span 1) (syntax-token '* #:position 4 #:span 1)
(syntax-token 'number 4 #:position 5 #:span 1))) (syntax-token 'number 4 #:position 5 #:span 1)))
(define parser (earley-parser arithmetic-grammar))
(check-equal? (syntax->datum (parse-syntax parser input-tokens)) (check-equal? (syntax->datum (parse-syntax parser input-tokens))
'(P (S0 (S1 (M1 (T 2))) + (M0 (M1 (T 3)) * (T 4)))))))) '(P (S0 (S1 (M1 (T 2))) + (M0 (M1 (T 3)) * (T 4))))))))

Loading…
Cancel
Save