You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
498 lines
19 KiB
Racket
498 lines
19 KiB
Racket
#lang racket/base
|
|
|
|
|
|
(require racket/contract/base)
|
|
|
|
|
|
(provide
|
|
(contract-out
|
|
[grammar (-> #:rules (sequence/c production-rule?) #:start-symbol nonterminal-symbol? grammar?)]
|
|
[grammar? predicate/c]
|
|
[production-rule
|
|
(-> #:nonterminal nonterminal-symbol?
|
|
#:action semantic-action?
|
|
#:substitution (or/c production-expression? grammar-symbol?)
|
|
production-rule?)]
|
|
[production-rule? predicate/c]
|
|
[production-expression? predicate/c]
|
|
[group-expression (-> (sequence/c (or/c production-expression? grammar-symbol?)) group-expression?)]
|
|
[group-expression? predicate/c]
|
|
[choice-expression
|
|
(-> (sequence/c (or/c production-expression? grammar-symbol?)) choice-expression?)]
|
|
[choice-expression? predicate/c]
|
|
[repetition-expression
|
|
(->* ((or/c production-expression? grammar-symbol?))
|
|
(#:min-count exact-nonnegative-integer? #:max-count (or/c exact-nonnegative-integer? +inf.0))
|
|
repetition-expression?)]
|
|
[repetition-expression? predicate/c]
|
|
[cut-expression (-> (or/c production-expression? grammar-symbol?) cut-expression?)]
|
|
[cut-expression? predicate/c]))
|
|
|
|
|
|
(module+ private
|
|
(provide
|
|
(contract-out
|
|
[grammar-flatten (-> grammar? flat-grammar?)]
|
|
[flat-grammar
|
|
(-> #:rules (sequence/c flat-production-rule?) #:start-symbol nonterminal-symbol? flat-grammar?)]
|
|
[flat-grammar? predicate/c]
|
|
[flat-grammar-rules (-> flat-grammar? (vectorof flat-production-rule? #:immutable #true))]
|
|
[flat-grammar-start-rules (-> flat-grammar? (set/c flat-production-rule? #:kind 'immutable))]
|
|
[flat-grammar-start-symbol (-> flat-grammar? any/c)]
|
|
[flat-production-rule
|
|
(-> #:nonterminal nonterminal-symbol?
|
|
#:substitution (sequence/c grammar-symbol?)
|
|
#:action semantic-action?
|
|
flat-production-rule?)]
|
|
[flat-production-rule? predicate/c]
|
|
[flat-production-rule-action (-> flat-production-rule? semantic-action?)]
|
|
[flat-production-rule-nonterminal (-> flat-production-rule? any/c)]
|
|
[flat-production-rule-substitution
|
|
(-> flat-production-rule? (vectorof grammar-symbol? #:immutable #true))])))
|
|
|
|
|
|
(require racket/list
|
|
racket/match
|
|
racket/sequence
|
|
racket/set
|
|
rebellion/collection/vector
|
|
rebellion/collection/vector/builder
|
|
yaragg/base/semantic-action
|
|
yaragg/base/symbol)
|
|
|
|
|
|
(module+ test
|
|
(require (submod "..")
|
|
rackunit
|
|
rebellion/private/static-name))
|
|
|
|
|
|
;@----------------------------------------------------------------------------------------------------
|
|
|
|
|
|
;; Parsing takes a (Grammar T S A) and a sequence of (Token T V) and produces a set of
|
|
;; (Parser-Derivation V A) (also called a "parse forest"). A grammar contains an immutable
|
|
;; vector of (Context-Free-Production-Rule T S A) and a start symbol of type S.
|
|
;; T: the terminals the grammar parses. Corresponds to the type field of the input tokens.
|
|
;; S: the nonterminals the grammar rules are defined in terms of.
|
|
;; A: the labels that grammar rules may have attached to them via the (Label-Action A) semantic
|
|
;; action. These show up in parse tree branches, and can be used to determine which production
|
|
;; rule produced a derivation.
|
|
(struct flat-grammar (rules start-symbol)
|
|
#:transparent
|
|
#:omit-define-syntaxes
|
|
#:constructor-name constructor:flat-grammar)
|
|
|
|
|
|
(define (flat-grammar-start-rules grammar)
|
|
(define start (flat-grammar-start-symbol grammar))
|
|
(for/set ([rule (in-vector (flat-grammar-rules grammar))]
|
|
#:when (equal? (flat-production-rule-nonterminal rule) start))
|
|
rule))
|
|
|
|
|
|
;; A (Context-Free-Production-Rule T S A) contains a nonterminal symbol of type S, semantic action of
|
|
;; type (Semnatic-Action A), and a substitution sequence of (Grammar-Symbol T S) values, stored in an
|
|
;; immutable vector.
|
|
(struct flat-production-rule (nonterminal action substitution)
|
|
#:transparent
|
|
#:omit-define-syntaxes
|
|
#:constructor-name constructor:flat-production-rule)
|
|
|
|
|
|
(define (flat-grammar #:rules rules #:start-symbol start)
|
|
(constructor:flat-grammar (sequence->vector rules) start))
|
|
|
|
|
|
(define (flat-production-rule #:nonterminal nonterminal
|
|
#:action action
|
|
#:substitution substitution)
|
|
(constructor:flat-production-rule nonterminal action (sequence->vector substitution)))
|
|
|
|
|
|
(struct grammar (rules start-symbol)
|
|
#:transparent
|
|
#:guard (λ (rules start-symbol _) (values (sequence->vector rules) start-symbol))
|
|
#:omit-define-syntaxes
|
|
#:constructor-name constructor:grammar)
|
|
|
|
|
|
(define (grammar #:rules rules #:start-symbol start-symbol)
|
|
(constructor:grammar rules start-symbol))
|
|
|
|
|
|
(struct production-rule (nonterminal action substitution)
|
|
#:transparent
|
|
#:omit-define-syntaxes
|
|
#:constructor-name constructor:production-rule)
|
|
|
|
|
|
(define (production-rule #:nonterminal nonterminal #:action action #:substitution substitution)
|
|
(constructor:production-rule nonterminal action substitution))
|
|
|
|
|
|
(struct production-expression () #:transparent)
|
|
|
|
|
|
(struct group-expression production-expression (subexpressions)
|
|
#:transparent
|
|
#:guard (λ (subexpressions _) (sequence->vector subexpressions)))
|
|
|
|
|
|
(struct choice-expression production-expression (choices)
|
|
#:guard (λ (choices _) (sequence->vector choices))
|
|
#:transparent)
|
|
|
|
|
|
(struct repetition-expression production-expression (subexpression min-count max-count)
|
|
#:transparent
|
|
#:omit-define-syntaxes
|
|
#:constructor-name constructor:repetition-expression)
|
|
|
|
|
|
(define (repetition-expression subexpression #:min-count [min-count 0] #:max-count [max-count +inf.0])
|
|
(constructor:repetition-expression subexpression min-count max-count))
|
|
|
|
|
|
(struct cut-expression production-expression (subexpression) #:transparent)
|
|
|
|
|
|
(struct virtual-symbol (base-symbol counter) #:transparent)
|
|
|
|
|
|
(define (production-rule-flatten rule)
|
|
|
|
(define original-nonterminal (production-rule-nonterminal rule))
|
|
|
|
(define new-rules (make-vector-builder))
|
|
(define counter 0)
|
|
|
|
(define (fresh-symbol!)
|
|
(define sym
|
|
(nonterminal-symbol (virtual-symbol (nonterminal-symbol-value original-nonterminal) counter)))
|
|
(set! counter (add1 counter))
|
|
sym)
|
|
|
|
(define (process-top-level-expression expression)
|
|
(match expression
|
|
[(group-expression subexpressions)
|
|
(for/vector ([expression (in-vector subexpressions)])
|
|
(process-expression expression))]
|
|
[_ (list (process-expression expression))]))
|
|
|
|
(define (process-expression expression)
|
|
(match expression
|
|
[(? terminal-symbol?) expression]
|
|
[(? nonterminal-symbol?) expression]
|
|
|
|
[(group-expression subexpressions)
|
|
(define subrule-symbol (fresh-symbol!))
|
|
(define group-symbols
|
|
(for/vector ([subexpr (in-vector subexpressions)])
|
|
(process-expression subexpr)))
|
|
(define group-rule
|
|
(flat-production-rule
|
|
#:nonterminal subrule-symbol #:action splice-action #:substitution group-symbols))
|
|
(vector-builder-add new-rules group-rule)
|
|
subrule-symbol]
|
|
|
|
[(choice-expression choices)
|
|
(define subrule-symbol (fresh-symbol!))
|
|
(define choice-symbol-vectors
|
|
(for/list ([choice (in-vector choices)])
|
|
(process-top-level-expression choice)))
|
|
|
|
;; We reverse the order the choice symbols are added to ensure the final vector of added rules
|
|
;; is in the right order.
|
|
(for ([choice-symbols (in-list (reverse choice-symbol-vectors))])
|
|
(define choice-rule
|
|
(flat-production-rule
|
|
#:nonterminal subrule-symbol #:action splice-action #:substitution choice-symbols))
|
|
(vector-builder-add new-rules choice-rule))
|
|
subrule-symbol]
|
|
|
|
[(? repetition-expression?)
|
|
#:when (and (equal? (repetition-expression-min-count expression) 0)
|
|
(equal? (repetition-expression-max-count expression) +inf.0))
|
|
(define subexpression (repetition-expression-subexpression expression))
|
|
(define subrule-symbol (fresh-symbol!))
|
|
(define repetition-symbols (process-top-level-expression subexpression))
|
|
(define empty-rule
|
|
(flat-production-rule
|
|
#:nonterminal subrule-symbol #:action splice-action #:substitution '()))
|
|
(define repetition-rule
|
|
(flat-production-rule
|
|
#:nonterminal subrule-symbol
|
|
#:action splice-action
|
|
#:substitution
|
|
(sequence-append repetition-symbols (list subrule-symbol))))
|
|
(vector-builder-add new-rules repetition-rule empty-rule)
|
|
subrule-symbol]
|
|
|
|
[(cut-expression subexpression)
|
|
(define subrule-symbol (fresh-symbol!))
|
|
(define subexpr-symbols (process-top-level-expression subexpression))
|
|
|
|
(define choice-rule
|
|
(flat-production-rule
|
|
#:nonterminal subrule-symbol #:action cut-action #:substitution subexpr-symbols))
|
|
(vector-builder-add new-rules choice-rule)
|
|
subrule-symbol]))
|
|
|
|
(define processed
|
|
(process-top-level-expression
|
|
(production-expression-simplify (production-rule-substitution rule))))
|
|
|
|
(define top-level-rule
|
|
(flat-production-rule
|
|
#:nonterminal original-nonterminal
|
|
#:action (production-rule-action rule)
|
|
#:substitution processed))
|
|
(vector-builder-add new-rules top-level-rule)
|
|
(vector-reverse (build-vector new-rules)))
|
|
|
|
|
|
(define (production-expression-simplify expression)
|
|
(match expression
|
|
[(? terminal-symbol?) expression]
|
|
[(? nonterminal-symbol?) expression]
|
|
|
|
[(group-expression subexpressions)
|
|
(group-expression
|
|
(for/vector ([subexpr (in-vector subexpressions)])
|
|
(production-expression-simplify subexpr)))]
|
|
|
|
[(choice-expression choices)
|
|
(choice-expression
|
|
(for/vector ([subexpr (in-vector choices)])
|
|
(production-expression-simplify subexpr)))]
|
|
|
|
[(? repetition-expression?)
|
|
(match* ((repetition-expression-min-count expression)
|
|
(repetition-expression-max-count expression))
|
|
[(0 +inf.0)
|
|
(repetition-expression
|
|
(production-expression-simplify (repetition-expression-subexpression expression)))]
|
|
[(0 max)
|
|
(define subexpr
|
|
(production-expression-simplify (repetition-expression-subexpression expression)))
|
|
(for/fold ([expr (choice-expression (list (group-expression '()) subexpr))])
|
|
([_ (in-range (sub1 max))])
|
|
(choice-expression (list (group-expression '()) (group-expression (list subexpr expr)))))]
|
|
[(min +inf.0)
|
|
(define subexpr
|
|
(production-expression-simplify (repetition-expression-subexpression expression)))
|
|
(group-expression
|
|
(sequence-append (make-list min subexpr) (list (repetition-expression subexpr))))]
|
|
[(min max)
|
|
(define subexpr
|
|
(production-expression-simplify (repetition-expression-subexpression expression)))
|
|
(define tail-expr
|
|
(for/fold ([expr (choice-expression (list (group-expression '()) subexpr))])
|
|
([_ (in-range (- max min 1))])
|
|
(choice-expression (list (group-expression '()) (group-expression (list subexpr expr))))))
|
|
(group-expression (sequence-append (make-list min subexpr) (list tail-expr)))])]
|
|
|
|
[(cut-expression subexpr) (cut-expression (production-expression-simplify subexpr))]))
|
|
|
|
|
|
(define (vector-reverse vec)
|
|
(define size (vector-length vec))
|
|
(define copy (make-vector size))
|
|
(for/vector ([i (in-range 0 size)])
|
|
(define j (- size i 1))
|
|
(vector-set! copy j (vector-ref vec i)))
|
|
(vector->immutable-vector copy))
|
|
|
|
|
|
(define (grammar-flatten grammar)
|
|
(define builder (make-vector-builder))
|
|
(for ([rule (in-vector (grammar-rules grammar))])
|
|
(vector-builder-add-all builder (production-rule-flatten rule)))
|
|
(flat-grammar #:rules (build-vector builder) #:start-symbol (grammar-start-symbol grammar)))
|
|
|
|
|
|
(module+ test
|
|
(test-case (name-string production-rule-flatten)
|
|
|
|
(define x (nonterminal-symbol 'x))
|
|
(define x.0 (nonterminal-symbol (virtual-symbol 'x 0)))
|
|
(define x.1 (nonterminal-symbol (virtual-symbol 'x 1)))
|
|
(define x.2 (nonterminal-symbol (virtual-symbol 'x 2)))
|
|
(define x.3 (nonterminal-symbol (virtual-symbol 'x 3)))
|
|
(define x.4 (nonterminal-symbol (virtual-symbol 'x 4)))
|
|
(define a (terminal-symbol 'a))
|
|
(define b (terminal-symbol 'b))
|
|
(define c (terminal-symbol 'c))
|
|
(define d (terminal-symbol 'd))
|
|
|
|
;; x: a, (b, c), d
|
|
;;
|
|
;; ->
|
|
;;
|
|
;; x: a, x.0, d
|
|
;; x.0: b, c (* splice *)
|
|
(test-case "grouping"
|
|
(define rule
|
|
(production-rule
|
|
#:nonterminal x
|
|
#:action (label-action 'x)
|
|
#:substitution (group-expression (list a (group-expression (list b c)) d))))
|
|
(define expected-rules
|
|
(vector
|
|
(flat-production-rule
|
|
#:nonterminal x #:action (label-action 'x) #:substitution (list a x.0 d))
|
|
(flat-production-rule #:nonterminal x.0 #:action splice-action #:substitution (list b c))))
|
|
(check-equal? (production-rule-flatten rule) expected-rules))
|
|
|
|
;; x: a, (b | c), d
|
|
;;
|
|
;; ->
|
|
;;
|
|
;; x: a, x.0, d
|
|
;; x.0: b (* splice *)
|
|
;; x.0: c (* splice *)
|
|
(test-case "choice"
|
|
(define rule
|
|
(production-rule
|
|
#:nonterminal x
|
|
#:action (label-action 'x)
|
|
#:substitution (group-expression (list a (choice-expression (list b c)) d))))
|
|
(define expected-rules
|
|
(vector
|
|
(flat-production-rule
|
|
#:nonterminal x #:action (label-action 'x) #:substitution (list a x.0 d))
|
|
(flat-production-rule #:nonterminal x.0 #:action splice-action #:substitution (list b))
|
|
(flat-production-rule #:nonterminal x.0 #:action splice-action #:substitution (list c))))
|
|
(check-equal? (production-rule-flatten rule) expected-rules))
|
|
|
|
;; x: a b*
|
|
;;
|
|
;; ->
|
|
;;
|
|
;; x: a x.0
|
|
;; x.0: (* splice *)
|
|
;; x.0: b x.0 (* splice *)
|
|
(test-case "repeating"
|
|
(define rule
|
|
(production-rule
|
|
#:nonterminal x
|
|
#:action (label-action 'x)
|
|
#:substitution (group-expression (list a (repetition-expression b) c))))
|
|
(define expected-rules
|
|
(vector
|
|
(flat-production-rule
|
|
#:nonterminal x #:action (label-action 'x) #:substitution (list a x.0 c))
|
|
(flat-production-rule #:nonterminal x.0 #:action splice-action #:substitution '())
|
|
(flat-production-rule #:nonterminal x.0 #:action splice-action #:substitution (list b x.0))))
|
|
(check-equal? (production-rule-flatten rule) expected-rules))
|
|
|
|
;; x: a b{3..} c
|
|
;;
|
|
;; ->
|
|
;;
|
|
;; x: a x.0 c
|
|
;; x.0: b b b x.1 (* splice *)
|
|
;; x.1: (* splice *)
|
|
;; x.1: b x.1 (* splice *)
|
|
(test-case "repeating with min count"
|
|
(define rule
|
|
(production-rule
|
|
#:nonterminal x
|
|
#:action (label-action 'x)
|
|
#:substitution (group-expression (list a (repetition-expression b #:min-count 3) c))))
|
|
(define expected-rules
|
|
(vector
|
|
(flat-production-rule
|
|
#:nonterminal x #:action (label-action 'x) #:substitution (list a x.0 c))
|
|
(flat-production-rule
|
|
#:nonterminal x.0 #:action splice-action #:substitution (list b b b x.1))
|
|
(flat-production-rule #:nonterminal x.1 #:action splice-action #:substitution '())
|
|
(flat-production-rule #:nonterminal x.1 #:action splice-action #:substitution (list b x.1))))
|
|
(check-equal? (production-rule-flatten rule) expected-rules))
|
|
|
|
;; x: a b{0..5} c
|
|
;;
|
|
;; ->
|
|
;;
|
|
;; x: a x.0 c
|
|
;; x.0: (* splice *)
|
|
;; x.0: b x.1 (* splice *)
|
|
;; x.1: (* splice *)
|
|
;; x.1: b x.2 (* splice *)
|
|
;; x.2: (* splice *)
|
|
;; x.2: b x.3 (* splice *)
|
|
;; x.3: (* splice *)
|
|
;; x.3: b x.4 (* splice *)
|
|
;; x.4: (* splice *)
|
|
;; x.4: b (* splice *)
|
|
(test-case "repeating with max count"
|
|
(define rule
|
|
(production-rule
|
|
#:nonterminal x
|
|
#:action (label-action 'x)
|
|
#:substitution (group-expression (list a (repetition-expression b #:max-count 5) c))))
|
|
(define expected-rules
|
|
(vector
|
|
(flat-production-rule
|
|
#:nonterminal x #:action (label-action 'x) #:substitution (list a x.0 c))
|
|
(flat-production-rule #:nonterminal x.0 #:action splice-action #:substitution '())
|
|
(flat-production-rule #:nonterminal x.0 #:action splice-action #:substitution (list b x.1))
|
|
(flat-production-rule #:nonterminal x.1 #:action splice-action #:substitution '())
|
|
(flat-production-rule #:nonterminal x.1 #:action splice-action #:substitution (list b x.2))
|
|
(flat-production-rule #:nonterminal x.2 #:action splice-action #:substitution '())
|
|
(flat-production-rule #:nonterminal x.2 #:action splice-action #:substitution (list b x.3))
|
|
(flat-production-rule #:nonterminal x.3 #:action splice-action #:substitution '())
|
|
(flat-production-rule #:nonterminal x.3 #:action splice-action #:substitution (list b x.4))
|
|
(flat-production-rule #:nonterminal x.4 #:action splice-action #:substitution '())
|
|
(flat-production-rule #:nonterminal x.4 #:action splice-action #:substitution (list b))))
|
|
(check-equal? (production-rule-flatten rule) expected-rules))
|
|
|
|
;; x: a b{3..5} c
|
|
;;
|
|
;; ->
|
|
;;
|
|
;; x: a x.0 c
|
|
;; x.0: b b b x.1 (* splice *)
|
|
;; x.1: (* splice *)
|
|
;; x.1: b x.2 (* splice *)
|
|
;; x.2: (* splice *)
|
|
;; x.2: b (* splice *)
|
|
(test-case "repeating with min and max count"
|
|
(define rule
|
|
(production-rule
|
|
#:nonterminal x
|
|
#:action (label-action 'x)
|
|
#:substitution
|
|
(group-expression (list a (repetition-expression b #:min-count 3 #:max-count 5) c))))
|
|
(define expected-rules
|
|
(vector
|
|
(flat-production-rule
|
|
#:nonterminal x #:action (label-action 'x) #:substitution (list a x.0 c))
|
|
(flat-production-rule
|
|
#:nonterminal x.0 #:action splice-action #:substitution (list b b b x.1))
|
|
(flat-production-rule #:nonterminal x.1 #:action splice-action #:substitution '())
|
|
(flat-production-rule #:nonterminal x.1 #:action splice-action #:substitution (list b x.2))
|
|
(flat-production-rule #:nonterminal x.2 #:action splice-action #:substitution '())
|
|
(flat-production-rule #:nonterminal x.2 #:action splice-action #:substitution (list b))))
|
|
(check-equal? (production-rule-flatten rule) expected-rules))
|
|
|
|
;; x: a \(b c) d
|
|
;;
|
|
;; ->
|
|
;;
|
|
;; x: a x.0 c
|
|
;; x.0: b c (* cut *)
|
|
(test-case "cuts"
|
|
(define rule
|
|
(production-rule
|
|
#:nonterminal x
|
|
#:action (label-action 'x)
|
|
#:substitution (group-expression (list a (cut-expression (group-expression (list b c))) d))))
|
|
(define expected-rules
|
|
(vector
|
|
(flat-production-rule
|
|
#:nonterminal x #:action (label-action 'x) #:substitution (list a x.0 d))
|
|
(flat-production-rule #:nonterminal x.0 #:action cut-action #:substitution (list b c))))
|
|
(check-equal? (production-rule-flatten rule) expected-rules))))
|