Rearrange things and implement syntax object builder

remotes/jackfirth/master
Jack Firth 3 years ago
parent beba017631
commit 9c6624f19e

@ -0,0 +1,115 @@
#lang racket/base
(require racket/contract/base)
(provide
(struct-out terminal-derivation)
(struct-out nonterminal-derivation)
(struct-out syntax-label)
(contract-out
[parser-derivation? predicate/c]
[parser-derivation-first-terminal (-> parser-derivation? any/c)]
[parser-derivation-last-terminal (-> parser-derivation? any/c)]
[parser-derivation
(case->
(-> any/c terminal-derivation?)
(-> any/c parser-derivation? #:rest (listof parser-derivation?) nonterminal-derivation?))]
[parser-derivation->syntax (-> parser-derivation? syntax?)]))
(require racket/match
racket/sequence
racket/struct
rebellion/collection/vector
yaragg/base/token)
;@----------------------------------------------------------------------------------------------------
(define (parser-derivation? v)
(or (terminal-derivation? v) (nonterminal-derivation? v)))
;; A (Terminal-Derivation V) represents a terminal that was matched by the grammar. It contains the
;; value V of the (Token T V) that was matched.
(struct terminal-derivation (value) #:transparent)
;; A (Nonterminal-Derivation V L) represents a nonterminal that was matched by the grammar. It
;; contains the label of type L of the production rule that matched, and an immutable vector of
;; subderivations
(struct nonterminal-derivation (label children)
#:guard
(let ([contract-guard (struct-guard/c any/c (sequence/c parser-derivation?))])
(λ (label children name)
(let-values ([(label children) (contract-guard label children name)])
(values label (sequence->vector children)))))
#:transparent
#:property prop:custom-print-quotable 'never
#:methods gen:custom-write
[(define write-proc
(make-constructor-style-printer
(λ (_) 'nonterminal-derivation)
(λ (this)
(cons (nonterminal-derivation-label this)
(vector->list (nonterminal-derivation-children this))))))])
(define parser-derivation
(case-lambda
[(value) (terminal-derivation value)]
[(label first-child . children) (nonterminal-derivation label (cons first-child children))]))
(define (parser-derivation-first-terminal derivation)
(match derivation
[(terminal-derivation value) value]
[(nonterminal-derivation _ (list first-child _ ...))
(parser-derivation-first-terminal first-child)]))
(define (parser-derivation-last-terminal derivation)
(match derivation
[(terminal-derivation value) value]
[(nonterminal-derivation _ (list _ ... last-child))
(parser-derivation-first-terminal last-child)]))
(struct syntax-label (value expression-properties properties)
#:transparent
#:guard
(struct-guard/c any/c
(hash/c any/c any/c #:immutable #true #:flat? #true)
(hash/c any/c any/c #:immutable #true #:flat? #true)))
(define (parser-derivation->syntax derivation)
(match derivation
[(terminal-derivation t) (syntax-token->syntax t)]
[(nonterminal-derivation label children)
(define first-token (parser-derivation-first-terminal derivation))
(define last-token (parser-derivation-last-terminal derivation))
(define location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
(- (syntax-token-position first-token) (syntax-token-end-position last-token))))
(define label-location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
0))
(define label-stx
(for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)])
([(key value) (in-hash (syntax-label-properties label))])
(syntax-property stx key value)))
(for/fold ([stx (datum->syntax #false (cons label-stx children) location #false)])
([(key value) (in-hash (syntax-label-expression-properties label))])
(syntax-property stx key value))]))

@ -0,0 +1,116 @@
#lang racket/base
(require (for-syntax racket/base
syntax/parse)
racket/contract/base
racket/match
racket/symbol
rebellion/private/printer-markup)
(provide
(struct-out token)
(contract-out
[syntax-token
(->* (any/c #:position exact-positive-integer? #:span exact-nonnegative-integer?)
(any/c
#:source any/c
#:line (or/c exact-positive-integer? #false)
#:column (or/c exact-nonnegative-integer? #false)
#:skip? boolean?
#:properties hash?)
syntax-token?)]
[syntax-token? predicate/c]
[syntax-token-type (-> syntax-token? any/c)]
[syntax-token-value (-> syntax-token? any/c)]
[syntax-token-location (-> syntax-token? srcloc?)]
[syntax-token-skip? (-> syntax-token? boolean?)]
[syntax-token-source (-> syntax-token? any/c)]
[syntax-token-position (-> syntax-token? exact-positive-integer?)]
[syntax-token-span (-> syntax-token? exact-nonnegative-integer?)]
[syntax-token-end-position (-> syntax-token? exact-positive-integer?)]
[syntax-token-line (-> syntax-token? (or/c exact-positive-integer? #false))]
[syntax-token-column (-> syntax-token? (or/c exact-nonnegative-integer? #false))]
[syntax-token-properties (-> syntax-token? hash?)]
[syntax-token->syntax (-> syntax-token? syntax?)]))
;@----------------------------------------------------------------------------------------------------
(struct token (type value) #:transparent)
(struct syntax-token (type value source position span line column skip? properties)
#:constructor-name constructor:token
#:transparent
#:omit-define-syntaxes
#:property prop:custom-print-quotable 'never
#:methods gen:custom-write
[(define write-proc
(make-constructor-style-printer-with-markup
'token
(λ (this)
(append (list (syntax-token-type this))
(if (syntax-token-value this) (list (syntax-token-value this)) '())
(optional-keyword-argument-markup "#:source" (syntax-token-source this))
(optional-keyword-argument-markup "#:position" (syntax-token-position this))
(optional-keyword-argument-markup "#:span" (syntax-token-span this))
(optional-keyword-argument-markup "#:line" (syntax-token-line this))
(optional-keyword-argument-markup "#:column" (syntax-token-column this))
(optional-keyword-argument-markup "#:skip?" (syntax-token-skip? this))
(optional-keyword-argument-markup "#:properties" (syntax-token-properties this))))))])
(define (syntax-token type
[value type]
#:source [source #false]
#:position position
#:span span
#:line [line #false]
#:column [column #false]
#:skip? [skip? #false]
#:properties [properties (hash)])
(constructor:token type value source position span line column skip? properties))
(define (syntax-token-location token)
(srcloc (syntax-token-source token)
(syntax-token-line token)
(syntax-token-column token)
(syntax-token-position token)
(syntax-token-span token)))
(define (syntax-token->syntax token)
(for/fold ([stx (datum->syntax #false (syntax-token-value token) (syntax-token-location token))])
([(key value) (in-hash (syntax-token-properties token))])
(syntax-property stx key value)))
(define (optional-keyword-argument-markup kw-string value)
(if value (list (sequence-markup (list (unquoted-printing-string kw-string) value))) '()))
(define (syntax-token-end-position token)
(+ (syntax-token-position token) (syntax-token-span token)))
#|
;; yaragg/support
(struct token-struct (type val offset line column span skip?)
#:auto-value #f
#:transparent)
;; yaragg/parser-tools/cfg-parser
(struct tok (name orig-name val start end))
;; yaragg/parser-tools/private-lex/token
(struct token (name value) #:transparent)
(struct position-token (token start-pos end-pos) #:inspector #f)
(struct srcloc-token (token srcloc) #:inspector #f)
|#

@ -7,7 +7,9 @@
racket/stream
racket/struct
rebellion/collection/vector
rebellion/private/guarded-block)
rebellion/private/guarded-block
yaragg/base/derivation
yaragg/base/token)
(module+ test
@ -35,11 +37,6 @@
rule))
;; A (Token T V) is a tagged value. The grammar rules are defined in terms of the type tag,
;; whereas the value is what appears in leaf nodes of the resulting parse trees.
(struct token (type value) #:transparent)
;; A (Context-Free-Production-Rule T S L) contains a nonterminal symbol of type S, a label of type L,
;; and a substitution sequence of (Grammar-Symbol T S) values, stored in an immutable vector.
(struct context-free-production-rule (nonterminal label substitution) #:transparent)
@ -59,38 +56,6 @@
(context-free-production-rule symbol label (sequence->vector substitution)))
;; A (Parser-Derivation V L) is either a (Terminal-Derivation V) or a (Nonterminal-Derivation V L)
(struct parser-derivation () #:transparent)
;; A (Terminal-Derivation V) represents a terminal that was matched by the grammar. It contains the
;; value of the (Token T V) that was matched.
(struct terminal-derivation parser-derivation (value) #:transparent)
;; A (Nonterminal-Derivation V L) represents a nonterminal that was matched by the grammar. It
;; contains the label of type L of the production rule that matched, and an immutable vector of
;; subderivations
(struct nonterminal-derivation parser-derivation (label children)
#:transparent
#:property prop:custom-print-quotable 'never
#:methods gen:custom-write
[(define write-proc
(make-constructor-style-printer
(λ (_) 'nonterminal-derivation)
(λ (this)
(cons (nonterminal-derivation-label this)
(vector->list (nonterminal-derivation-children this))))))])
(define (make-nonterminal-derivation label [children '()])
(nonterminal-derivation label (sequence->vector children)))
(define derivation
(case-lambda
[(value) (terminal-derivation value)]
[(label first-child . children) (make-nonterminal-derivation label (cons first-child children))]))
;; Earley parser
@ -164,7 +129,7 @@
(define possible-children (possible-children-lists forest key))
(for*/stream ([children (in-stream possible-children)]
[processed-children (in-stream (cartesian-stream (map loop children)))])
(make-nonterminal-derivation label processed-children)))))
(nonterminal-derivation label processed-children)))))
(struct earley-state (rule substitution-position input-position key)
@ -330,14 +295,56 @@
(earley-parse arithmetic-grammar input-tokens))
(define expected-arithmetic-parse-tree
(derivation
(parser-derivation
'P
(derivation 'S0
(derivation 'S1 (derivation 'M1 (derivation 'T (derivation 2))))
(derivation 'plus)
(derivation 'M0
(derivation 'M1 (derivation 'T (derivation 3)))
(derivation 'times)
(derivation 'T (derivation 4))))))
(parser-derivation 'S0
(parser-derivation 'S1 (parser-derivation 'M1 (parser-derivation 'T (parser-derivation 2))))
(parser-derivation 'plus)
(parser-derivation 'M0
(parser-derivation 'M1 (parser-derivation 'T (parser-derivation 3)))
(parser-derivation 'times)
(parser-derivation 'T (parser-derivation 4))))))
(check-equal? (stream->list arithmetic-parse-forest) (list expected-arithmetic-parse-tree))))
(struct cf-syntax-production-rule (nonterminal label substitution properties label-properties)
#:transparent)
(struct syntax-label (value expression-properties properties) #:transparent)
(define (grammar-parse-to-syntax grammar token-sequence)
(define tokens
(for/vector ([t token-sequence])
(token (syntax-token-type t) t)))
(for/set ([derivation (in-set (earley-parse tokens))])
(derivation->syntax derivation)))
(define (derivation->syntax derivation)
(match derivation
[(terminal-derivation t) (syntax-token->syntax t)]
[(nonterminal-derivation label children)
(define first-token (parser-derivation-first-terminal derivation))
(define last-token (parser-derivation-last-terminal derivation))
(define location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
(- (syntax-token-position first-token) (syntax-token-end-position last-token))))
(define label-location
(srcloc (syntax-token-source first-token)
(syntax-token-line first-token)
(syntax-token-column first-token)
(syntax-token-position first-token)
0))
(define label-stx
(for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)])
([(key value) (in-hash (syntax-label-properties label))])
(syntax-property stx key value)))
(for/fold ([stx (datum->syntax #false (cons label-stx children) location #false)])
([(key value) (in-hash (syntax-label-expression-properties label))])
(syntax-property stx key value))]))

@ -1,128 +0,0 @@
#lang racket/base
(require (for-syntax racket/base
syntax/parse)
racket/contract/base
racket/match
racket/symbol
rebellion/private/printer-markup)
(provide
token
(contract-out
[token? predicate/c]
[token-type (-> token? (and/c symbol? symbol-interned?))]
[token-value (-> token? any/c)]
[token-location (-> token? (or/c srcloc? #false))]
[token-skip? (-> token? boolean?)]
[source-location
(->* ()
(#:source any/c
#:position (or/c exact-positive-integer? #false)
#:line (or/c exact-positive-integer? #false)
#:column (or/c exact-nonnegative-integer? #false)
#:span (or/c exact-nonnegative-integer? #false))
srcloc?)]
[token-position (-> token? (or/c exact-positive-integer? #false))]
[token-end-position (-> token? (or/c exact-positive-integer? #false))]))
;@----------------------------------------------------------------------------------------------------
(struct token (type value location skip?)
#:constructor-name constructor:token
#:transparent
#:omit-define-syntaxes
#:property prop:custom-print-quotable 'never
#:methods gen:custom-write
[(define write-proc
(make-constructor-style-printer-with-markup
'token
(λ (this)
(append (list (token-type this))
(if (token-value this) (list (token-value this)) '())
(if (token-location this)
(list
(sequence-markup
(list (unquoted-printing-string "#:location") (token-location this))))
'())
(if (token-skip? this)
(list
(sequence-markup (list (unquoted-printing-string "#:skip?") (token-skip? this))))
'())))))])
(define (source-location #:source [source #false]
#:position [position #false]
#:span [span #false]
#:line [line #false]
#:column [column #false])
(srcloc source line column position span))
(define (unchecked:token type
[value #false]
#:location [location #false]
#:skip? [skip? #false])
(let ([type
(cond [(string? type) (string->symbol type)]
[(not (symbol-interned? type)) (string->symbol (symbol->immutable-string type))]
[else type])])
(constructor:token type value location skip?)))
(define-module-boundary-contract contracted:token unchecked:token
(->* ((or/c symbol? string?)) (any/c #:location (or/c srcloc? #false) #:skip? boolean?)
token?)
#:name-for-blame token)
(define-match-expander token
(syntax-parser
[(_
(~alt
(~once value-pattern:expr)
(~optional (~seq #:type type-pattern:expr) #:defaults ([type-pattern #'_]))
(~optional (~seq #:location location-pattern:expr) #:defaults ([location-pattern #'_]))
(~optional (~seq #:skip? skip-pattern:expr) #:defaults ([skip-pattern #'_])))
...)
#'(? token?
(app token-value value-pattern)
(app token-type type-pattern)
(app token-location location-pattern)
(app token-skip? skip-pattern))])
(make-rename-transformer #'contracted:token))
(define (token-position t)
(define loc (token-location t))
(and loc (srcloc-position loc)))
(define (token-end-position t)
(define loc (token-location t))
(define start (and loc (srcloc-position loc)))
(define span (and loc (srcloc-span loc)))
(and start span (+ start span)))
#|
;; yaragg/support
(struct token-struct (type val offset line column span skip?)
#:auto-value #f
#:transparent)
;; yaragg/parser-tools/cfg-parser
(struct tok (name orig-name val start end))
;; yaragg/parser-tools/private-lex/token
(struct token (name value) #:transparent)
(struct position-token (token start-pos end-pos) #:inspector #f)
(struct srcloc-token (token srcloc) #:inspector #f)
|#
Loading…
Cancel
Save