|
|
@ -3,8 +3,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
(require racket/contract
|
|
|
|
(require racket/contract
|
|
|
|
racket/match
|
|
|
|
racket/match
|
|
|
|
racket/sequence
|
|
|
|
|
|
|
|
racket/set
|
|
|
|
racket/set
|
|
|
|
|
|
|
|
racket/stream
|
|
|
|
racket/struct
|
|
|
|
racket/struct
|
|
|
|
rebellion/collection/vector
|
|
|
|
rebellion/collection/vector
|
|
|
|
rebellion/private/guarded-block)
|
|
|
|
rebellion/private/guarded-block)
|
|
|
@ -68,7 +68,17 @@
|
|
|
|
;; A (Nonterminal-Derivation T S L) represents a nonterminal that was matched by the grammar. It
|
|
|
|
;; A (Nonterminal-Derivation T S L) represents a nonterminal that was matched by the grammar. It
|
|
|
|
;; contains the nonterminal symbol of type T that was matched, the label of type L of the specific
|
|
|
|
;; contains the nonterminal symbol of type T that was matched, the label of type L of the specific
|
|
|
|
;; rule that matched, and an immutable vector of subderivations
|
|
|
|
;; rule that matched, and an immutable vector of subderivations
|
|
|
|
(struct nonterminal-derivation parser-derivation (symbol label children) #:transparent)
|
|
|
|
(struct nonterminal-derivation parser-derivation (symbol label children)
|
|
|
|
|
|
|
|
#:transparent
|
|
|
|
|
|
|
|
#:property prop:custom-print-quotable 'never
|
|
|
|
|
|
|
|
#:methods gen:custom-write
|
|
|
|
|
|
|
|
[(define write-proc
|
|
|
|
|
|
|
|
(make-constructor-style-printer
|
|
|
|
|
|
|
|
(λ (_) 'nonterminal-derivation)
|
|
|
|
|
|
|
|
(λ (this)
|
|
|
|
|
|
|
|
(list* (nonterminal-derivation-symbol this)
|
|
|
|
|
|
|
|
(nonterminal-derivation-label this)
|
|
|
|
|
|
|
|
(vector->list (nonterminal-derivation-children this))))))])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (make-nonterminal-derivation symbol label [children '()])
|
|
|
|
(define (make-nonterminal-derivation symbol label [children '()])
|
|
|
@ -112,7 +122,78 @@
|
|
|
|
;; Earley parser
|
|
|
|
;; Earley parser
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(struct earley-state (rule substitution-position input-position)
|
|
|
|
;; The hash keys are sppf-labels and the values are a list of sppf-child-pairs
|
|
|
|
|
|
|
|
(struct sppf-forest (hash))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (make-sppf-forest)
|
|
|
|
|
|
|
|
(sppf-forest (make-hash)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (sppf-forest-add-node! forest label)
|
|
|
|
|
|
|
|
(define h (sppf-forest-hash forest))
|
|
|
|
|
|
|
|
(unless (hash-has-key? h label)
|
|
|
|
|
|
|
|
(hash-set! h label '())))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (sppf-forest-add-child-pair! forest label #:left left-child #:right right-child)
|
|
|
|
|
|
|
|
(define pair (sppf-child-pair left-child right-child))
|
|
|
|
|
|
|
|
(hash-update! (sppf-forest-hash forest) label (λ (children) (cons pair children)) '()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(struct sppf-child-pair (left-child right-child) #:transparent)
|
|
|
|
|
|
|
|
(struct sppf-key () #:transparent)
|
|
|
|
|
|
|
|
(struct complete-sppf-key sppf-key (symbol input-start input-end) #:transparent)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(struct incomplete-sppf-key sppf-key (rule substitution-position input-start input-end)
|
|
|
|
|
|
|
|
#:transparent)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (sppf-key-input-end key)
|
|
|
|
|
|
|
|
(if (complete-sppf-key? key)
|
|
|
|
|
|
|
|
(complete-sppf-key-input-end key)
|
|
|
|
|
|
|
|
(incomplete-sppf-key-input-end key)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (possible-children-lists forest key)
|
|
|
|
|
|
|
|
(define hash (sppf-forest-hash forest))
|
|
|
|
|
|
|
|
(let loop ([key key] [right-children '()])
|
|
|
|
|
|
|
|
(guarded-block
|
|
|
|
|
|
|
|
(guard key else
|
|
|
|
|
|
|
|
(stream right-children))
|
|
|
|
|
|
|
|
(match-define (list (sppf-child-pair left right) ...) (hash-ref hash key '()))
|
|
|
|
|
|
|
|
(apply stream-append
|
|
|
|
|
|
|
|
(for/list ([l (in-list left)]
|
|
|
|
|
|
|
|
[r (in-list right)])
|
|
|
|
|
|
|
|
(loop l (cons r right-children)))))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (cartesian-stream streams)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (combine s1 s2)
|
|
|
|
|
|
|
|
(for*/stream ([x (in-stream s1)]
|
|
|
|
|
|
|
|
[y (in-stream s2)])
|
|
|
|
|
|
|
|
(cons x y)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(foldr combine (stream '()) streams))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (sppf-forest-derivations forest key tokens)
|
|
|
|
|
|
|
|
(define hash (sppf-forest-hash forest))
|
|
|
|
|
|
|
|
(let loop ([key key])
|
|
|
|
|
|
|
|
(guarded-block
|
|
|
|
|
|
|
|
(guard (complete-sppf-key? key) then
|
|
|
|
|
|
|
|
(define tok (vector-ref tokens (complete-sppf-key-input-start key)))
|
|
|
|
|
|
|
|
(stream (terminal-derivation (token-value tok))))
|
|
|
|
|
|
|
|
(define symbol (context-free-production-rule-nonterminal (incomplete-sppf-key-rule key)))
|
|
|
|
|
|
|
|
(define label (context-free-production-rule-label (incomplete-sppf-key-rule key)))
|
|
|
|
|
|
|
|
(define possible-children (possible-children-lists forest key))
|
|
|
|
|
|
|
|
(for*/stream ([children (in-stream possible-children)]
|
|
|
|
|
|
|
|
[processed-children (in-stream (cartesian-stream (map loop children)))])
|
|
|
|
|
|
|
|
(make-nonterminal-derivation symbol label processed-children)))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(struct earley-state (rule substitution-position input-position key)
|
|
|
|
#:transparent
|
|
|
|
#:transparent
|
|
|
|
#:property prop:custom-print-quotable 'never
|
|
|
|
#:property prop:custom-print-quotable 'never
|
|
|
|
#:methods gen:custom-write
|
|
|
|
#:methods gen:custom-write
|
|
|
@ -123,7 +204,7 @@
|
|
|
|
(define rule (earley-state-rule this))
|
|
|
|
(define rule (earley-state-rule this))
|
|
|
|
(define substitution (context-free-production-rule-substitution rule))
|
|
|
|
(define substitution (context-free-production-rule-substitution rule))
|
|
|
|
(define pos (earley-state-substitution-position this))
|
|
|
|
(define pos (earley-state-substitution-position this))
|
|
|
|
(list (list (context-free-production-rule-nonterminal rule) '->)
|
|
|
|
(append (list (context-free-production-rule-nonterminal rule) '->)
|
|
|
|
(for/list ([sym (in-vector substitution 0 pos)])
|
|
|
|
(for/list ([sym (in-vector substitution 0 pos)])
|
|
|
|
(if (terminal-symbol? sym)
|
|
|
|
(if (terminal-symbol? sym)
|
|
|
|
(terminal-symbol-value sym)
|
|
|
|
(terminal-symbol-value sym)
|
|
|
@ -133,18 +214,25 @@
|
|
|
|
(if (terminal-symbol? sym)
|
|
|
|
(if (terminal-symbol? sym)
|
|
|
|
(terminal-symbol-value sym)
|
|
|
|
(terminal-symbol-value sym)
|
|
|
|
(nonterminal-symbol-value sym)))
|
|
|
|
(nonterminal-symbol-value sym)))
|
|
|
|
(earley-state-input-position this)))))])
|
|
|
|
(list (earley-state-input-position this) (earley-state-key this))))))])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (initial-earley-states grammar)
|
|
|
|
(define (initial-earley-states grammar)
|
|
|
|
(for/set ([rule (grammar-start-rules grammar)])
|
|
|
|
(for/set ([rule (grammar-start-rules grammar)])
|
|
|
|
(earley-state rule 0 0)))
|
|
|
|
(earley-state rule 0 0 #false)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (earley-state-represents-successful-parse? state grammar)
|
|
|
|
|
|
|
|
(and (zero? (earley-state-input-position state))
|
|
|
|
|
|
|
|
(equal? (context-free-production-rule-nonterminal (earley-state-rule state))
|
|
|
|
|
|
|
|
(context-free-grammar-start-symbol grammar))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (earley-parse grammar token-sequence)
|
|
|
|
(define (earley-parse grammar token-sequence)
|
|
|
|
(define tokens (sequence->vector token-sequence))
|
|
|
|
(define tokens (sequence->vector token-sequence))
|
|
|
|
(define token-count (vector-length tokens))
|
|
|
|
(define token-count (vector-length tokens))
|
|
|
|
(define position-count (add1 token-count))
|
|
|
|
(define position-count (add1 token-count))
|
|
|
|
|
|
|
|
(define forest (make-sppf-forest))
|
|
|
|
(define states (make-vector position-count (set)))
|
|
|
|
(define states (make-vector position-count (set)))
|
|
|
|
(vector-set! states 0 (initial-earley-states grammar))
|
|
|
|
(vector-set! states 0 (initial-earley-states grammar))
|
|
|
|
(for ([k (in-range 0 position-count)])
|
|
|
|
(for ([k (in-range 0 position-count)])
|
|
|
@ -165,7 +253,7 @@
|
|
|
|
(if (equal? j k)
|
|
|
|
(if (equal? j k)
|
|
|
|
(set-union unprocessed processed)
|
|
|
|
(set-union unprocessed processed)
|
|
|
|
(vector-ref states j)))
|
|
|
|
(vector-ref states j)))
|
|
|
|
(completer-states completed parent-states))
|
|
|
|
(completer-states completed parent-states (earley-state-key next) #:forest forest))
|
|
|
|
(define symbol (earley-state-next-symbol next))
|
|
|
|
(define symbol (earley-state-next-symbol next))
|
|
|
|
(guard (nonterminal-symbol? symbol) else
|
|
|
|
(guard (nonterminal-symbol? symbol) else
|
|
|
|
(set))
|
|
|
|
(set))
|
|
|
@ -177,48 +265,67 @@
|
|
|
|
(define processed (process-states (vector-ref states k) (set)))
|
|
|
|
(define processed (process-states (vector-ref states k) (set)))
|
|
|
|
(vector-set! states k processed)
|
|
|
|
(vector-set! states k processed)
|
|
|
|
(unless (equal? k token-count)
|
|
|
|
(unless (equal? k token-count)
|
|
|
|
(vector-set! states (add1 k) (scanner-states processed k (vector-ref tokens k)))))
|
|
|
|
(define next-states (scanner-states processed k (vector-ref tokens k) #:forest forest))
|
|
|
|
states)
|
|
|
|
(vector-set! states (add1 k) next-states)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define last-state-set (vector-ref states (sub1 position-count)))
|
|
|
|
|
|
|
|
(for/first ([s (in-set last-state-set)]
|
|
|
|
|
|
|
|
#:when (earley-state-represents-successful-parse? s grammar))
|
|
|
|
|
|
|
|
(sppf-forest-derivations forest (earley-state-key s) tokens)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (completed-state? state)
|
|
|
|
(define (completed-state? state)
|
|
|
|
(match-define (earley-state rule substitution-position _) state)
|
|
|
|
(match-define (earley-state rule substitution-position _ _) state)
|
|
|
|
(equal? substitution-position
|
|
|
|
(equal? substitution-position
|
|
|
|
(vector-length (context-free-production-rule-substitution rule))))
|
|
|
|
(vector-length (context-free-production-rule-substitution rule))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define/contract (earley-state-next-symbol state)
|
|
|
|
(define/contract (earley-state-next-symbol state)
|
|
|
|
(-> (and/c earley-state? (not/c completed-state?)) grammar-symbol?)
|
|
|
|
(-> (and/c earley-state? (not/c completed-state?)) grammar-symbol?)
|
|
|
|
(match-define (earley-state rule substitution-position _) state)
|
|
|
|
(match-define (earley-state rule substitution-position _ _) state)
|
|
|
|
(vector-ref (context-free-production-rule-substitution rule) substitution-position))
|
|
|
|
(vector-ref (context-free-production-rule-substitution rule) substitution-position))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (earley-state-advance-substitution state)
|
|
|
|
(define (earley-state-advance-substitution state #:key key)
|
|
|
|
(match-define (earley-state rule substitution-position input-position) state)
|
|
|
|
(match-define (earley-state rule substitution-position input-position _) state)
|
|
|
|
(earley-state rule (add1 substitution-position) input-position))
|
|
|
|
(earley-state rule (add1 substitution-position) input-position key))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (completer-states completed-nonterminal states)
|
|
|
|
(define (completer-states completed-nonterminal states completed-key #:forest forest)
|
|
|
|
|
|
|
|
(define expected (nonterminal-symbol completed-nonterminal))
|
|
|
|
(for/set ([s (in-set states)]
|
|
|
|
(for/set ([s (in-set states)]
|
|
|
|
#:when (equal? (earley-state-next-symbol s) (nonterminal-symbol completed-nonterminal)))
|
|
|
|
#:when (equal? (earley-state-next-symbol s) expected))
|
|
|
|
(earley-state-advance-substitution s)))
|
|
|
|
(define rule (earley-state-rule s))
|
|
|
|
|
|
|
|
(define start (earley-state-input-position s))
|
|
|
|
|
|
|
|
(define end (sppf-key-input-end completed-key))
|
|
|
|
|
|
|
|
(define old-key (earley-state-key s))
|
|
|
|
|
|
|
|
(define new-key
|
|
|
|
|
|
|
|
(incomplete-sppf-key rule (add1 (earley-state-substitution-position s)) start end))
|
|
|
|
|
|
|
|
(sppf-forest-add-child-pair! forest new-key #:left old-key #:right completed-key)
|
|
|
|
|
|
|
|
(earley-state-advance-substitution s #:key new-key)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (predictor-states grammar nonterminal k)
|
|
|
|
(define (predictor-states grammar nonterminal k)
|
|
|
|
;; add (Y → • γ, k) for every production in the grammar with Y on the left-hand side
|
|
|
|
;; add (Y → • γ, k) for every production in the grammar with Y on the left-hand side
|
|
|
|
(for/set ([rule (in-vector (context-free-grammar-rules grammar))]
|
|
|
|
(for/set ([rule (in-vector (context-free-grammar-rules grammar))]
|
|
|
|
#:when (equal? (context-free-production-rule-nonterminal rule) nonterminal))
|
|
|
|
#:when (equal? (context-free-production-rule-nonterminal rule) nonterminal))
|
|
|
|
(earley-state rule 0 k)))
|
|
|
|
(earley-state rule 0 k #false)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (scanner-states states k next-token)
|
|
|
|
(define (scanner-states states k next-token #:forest forest)
|
|
|
|
(define type (token-type next-token))
|
|
|
|
(define expected (terminal-symbol (token-type next-token)))
|
|
|
|
(define expected (terminal-symbol type))
|
|
|
|
|
|
|
|
(for/set ([s (in-set states)]
|
|
|
|
(for/set ([s (in-set states)]
|
|
|
|
#:when (not (completed-state? s))
|
|
|
|
#:when (not (completed-state? s))
|
|
|
|
#:when (equal? (earley-state-next-symbol s) expected))
|
|
|
|
#:when (equal? (earley-state-next-symbol s) expected))
|
|
|
|
(earley-state-advance-substitution s)))
|
|
|
|
(define rule (earley-state-rule s))
|
|
|
|
|
|
|
|
(define start (earley-state-input-position s))
|
|
|
|
|
|
|
|
(define end (add1 k))
|
|
|
|
|
|
|
|
(define old-key (earley-state-key s))
|
|
|
|
|
|
|
|
(define new-key
|
|
|
|
|
|
|
|
(incomplete-sppf-key rule (add1 (earley-state-substitution-position s)) start end))
|
|
|
|
|
|
|
|
(define scanned-key (complete-sppf-key expected k end))
|
|
|
|
|
|
|
|
(sppf-forest-add-child-pair! forest new-key #:left old-key #:right scanned-key)
|
|
|
|
|
|
|
|
(earley-state-advance-substitution s #:key new-key)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(module+ test
|
|
|
|
(module+ test
|
|
|
@ -244,45 +351,18 @@
|
|
|
|
(define input-tokens
|
|
|
|
(define input-tokens
|
|
|
|
(list (token 'number 2) (token '+ '+) (token 'number 3) (token '* '*) (token 'number 4)))
|
|
|
|
(list (token 'number 2) (token '+ '+) (token 'number 3) (token '* '*) (token 'number 4)))
|
|
|
|
|
|
|
|
|
|
|
|
(define arithmetic-parse-table
|
|
|
|
(define arithmetic-parse-forest
|
|
|
|
(earley-parse arithmetic-grammar input-tokens))
|
|
|
|
(earley-parse arithmetic-grammar input-tokens))
|
|
|
|
|
|
|
|
|
|
|
|
(define expected-arithmetic-parse-table
|
|
|
|
(define expected-arithmetic-parse-tree
|
|
|
|
(vector
|
|
|
|
(derivation
|
|
|
|
(set
|
|
|
|
'P 0
|
|
|
|
(earley-state P-rule 0 0)
|
|
|
|
(derivation 'S 0
|
|
|
|
(earley-state S-rule0 0 0)
|
|
|
|
(derivation 'S 1 (derivation 'M 1 (derivation 'T 0 (derivation 2))))
|
|
|
|
(earley-state S-rule1 0 0)
|
|
|
|
(derivation '+)
|
|
|
|
(earley-state M-rule0 0 0)
|
|
|
|
(derivation 'M 0
|
|
|
|
(earley-state M-rule1 0 0)
|
|
|
|
(derivation 'M 1 (derivation 'T 0 (derivation 3)))
|
|
|
|
(earley-state T-rule 0 0))
|
|
|
|
(derivation '*)
|
|
|
|
(set
|
|
|
|
(derivation 'T 0 (derivation 4))))))
|
|
|
|
(earley-state T-rule 1 0)
|
|
|
|
|
|
|
|
(earley-state M-rule0 1 0)
|
|
|
|
(check-equal? (stream->list arithmetic-parse-forest) (list expected-arithmetic-parse-tree))))
|
|
|
|
(earley-state M-rule1 1 0)
|
|
|
|
|
|
|
|
(earley-state S-rule0 1 0)
|
|
|
|
|
|
|
|
(earley-state S-rule1 1 0)
|
|
|
|
|
|
|
|
(earley-state P-rule 1 0))
|
|
|
|
|
|
|
|
(set
|
|
|
|
|
|
|
|
(earley-state S-rule0 2 0)
|
|
|
|
|
|
|
|
(earley-state M-rule0 0 2)
|
|
|
|
|
|
|
|
(earley-state M-rule1 0 2)
|
|
|
|
|
|
|
|
(earley-state T-rule 0 2))
|
|
|
|
|
|
|
|
(set
|
|
|
|
|
|
|
|
(earley-state T-rule 1 2)
|
|
|
|
|
|
|
|
(earley-state M-rule0 1 2)
|
|
|
|
|
|
|
|
(earley-state M-rule1 1 2)
|
|
|
|
|
|
|
|
(earley-state S-rule0 3 0)
|
|
|
|
|
|
|
|
(earley-state S-rule0 1 0)
|
|
|
|
|
|
|
|
(earley-state P-rule 1 0))
|
|
|
|
|
|
|
|
(set
|
|
|
|
|
|
|
|
(earley-state M-rule0 2 2)
|
|
|
|
|
|
|
|
(earley-state T-rule 0 4))
|
|
|
|
|
|
|
|
(set
|
|
|
|
|
|
|
|
(earley-state T-rule 1 4)
|
|
|
|
|
|
|
|
(earley-state M-rule0 3 2)
|
|
|
|
|
|
|
|
(earley-state M-rule0 1 2)
|
|
|
|
|
|
|
|
(earley-state S-rule0 3 0)
|
|
|
|
|
|
|
|
(earley-state S-rule0 1 0)
|
|
|
|
|
|
|
|
(earley-state P-rule 1 0))))
|
|
|
|
|
|
|
|
(check-equal? arithmetic-parse-table expected-arithmetic-parse-table)))
|
|
|
|
|
|
|
|