From 9c6624f19ef739a3c0db5dd1cd48668e6c26ab9b Mon Sep 17 00:00:00 2001 From: Jack Firth Date: Mon, 4 Apr 2022 19:18:02 -0700 Subject: [PATCH] Rearrange things and implement syntax object builder --- base/derivation.rkt | 115 +++++++++++++++++ base/token.rkt | 116 +++++++++++++++++ grammar.rkt => private/primitive-grammar.rkt | 101 ++++++++------- token.rkt | 128 ------------------- 4 files changed, 285 insertions(+), 175 deletions(-) create mode 100644 base/derivation.rkt create mode 100644 base/token.rkt rename grammar.rkt => private/primitive-grammar.rkt (83%) delete mode 100644 token.rkt diff --git a/base/derivation.rkt b/base/derivation.rkt new file mode 100644 index 0000000..83c90e4 --- /dev/null +++ b/base/derivation.rkt @@ -0,0 +1,115 @@ +#lang racket/base + + +(require racket/contract/base) + + +(provide + (struct-out terminal-derivation) + (struct-out nonterminal-derivation) + (struct-out syntax-label) + (contract-out + [parser-derivation? predicate/c] + [parser-derivation-first-terminal (-> parser-derivation? any/c)] + [parser-derivation-last-terminal (-> parser-derivation? any/c)] + [parser-derivation + (case-> + (-> any/c terminal-derivation?) + (-> any/c parser-derivation? #:rest (listof parser-derivation?) nonterminal-derivation?))] + [parser-derivation->syntax (-> parser-derivation? syntax?)])) + + +(require racket/match + racket/sequence + racket/struct + rebellion/collection/vector + yaragg/base/token) + + +;@---------------------------------------------------------------------------------------------------- + + +(define (parser-derivation? v) + (or (terminal-derivation? v) (nonterminal-derivation? v))) + + +;; A (Terminal-Derivation V) represents a terminal that was matched by the grammar. It contains the +;; value V of the (Token T V) that was matched. +(struct terminal-derivation (value) #:transparent) + + +;; A (Nonterminal-Derivation V L) represents a nonterminal that was matched by the grammar. It +;; contains the label of type L of the production rule that matched, and an immutable vector of +;; subderivations +(struct nonterminal-derivation (label children) + + #:guard + (let ([contract-guard (struct-guard/c any/c (sequence/c parser-derivation?))]) + (λ (label children name) + (let-values ([(label children) (contract-guard label children name)]) + (values label (sequence->vector children))))) + + #:transparent + #:property prop:custom-print-quotable 'never + #:methods gen:custom-write + [(define write-proc + (make-constructor-style-printer + (λ (_) 'nonterminal-derivation) + (λ (this) + (cons (nonterminal-derivation-label this) + (vector->list (nonterminal-derivation-children this))))))]) + + +(define parser-derivation + (case-lambda + [(value) (terminal-derivation value)] + [(label first-child . children) (nonterminal-derivation label (cons first-child children))])) + + +(define (parser-derivation-first-terminal derivation) + (match derivation + [(terminal-derivation value) value] + [(nonterminal-derivation _ (list first-child _ ...)) + (parser-derivation-first-terminal first-child)])) + + +(define (parser-derivation-last-terminal derivation) + (match derivation + [(terminal-derivation value) value] + [(nonterminal-derivation _ (list _ ... last-child)) + (parser-derivation-first-terminal last-child)])) + + +(struct syntax-label (value expression-properties properties) + #:transparent + #:guard + (struct-guard/c any/c + (hash/c any/c any/c #:immutable #true #:flat? #true) + (hash/c any/c any/c #:immutable #true #:flat? #true))) + + +(define (parser-derivation->syntax derivation) + (match derivation + [(terminal-derivation t) (syntax-token->syntax t)] + [(nonterminal-derivation label children) + (define first-token (parser-derivation-first-terminal derivation)) + (define last-token (parser-derivation-last-terminal derivation)) + (define location + (srcloc (syntax-token-source first-token) + (syntax-token-line first-token) + (syntax-token-column first-token) + (syntax-token-position first-token) + (- (syntax-token-position first-token) (syntax-token-end-position last-token)))) + (define label-location + (srcloc (syntax-token-source first-token) + (syntax-token-line first-token) + (syntax-token-column first-token) + (syntax-token-position first-token) + 0)) + (define label-stx + (for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)]) + ([(key value) (in-hash (syntax-label-properties label))]) + (syntax-property stx key value))) + (for/fold ([stx (datum->syntax #false (cons label-stx children) location #false)]) + ([(key value) (in-hash (syntax-label-expression-properties label))]) + (syntax-property stx key value))])) \ No newline at end of file diff --git a/base/token.rkt b/base/token.rkt new file mode 100644 index 0000000..38ab21e --- /dev/null +++ b/base/token.rkt @@ -0,0 +1,116 @@ +#lang racket/base + + +(require (for-syntax racket/base + syntax/parse) + racket/contract/base + racket/match + racket/symbol + rebellion/private/printer-markup) + + +(provide + (struct-out token) + (contract-out + [syntax-token + (->* (any/c #:position exact-positive-integer? #:span exact-nonnegative-integer?) + (any/c + #:source any/c + #:line (or/c exact-positive-integer? #false) + #:column (or/c exact-nonnegative-integer? #false) + #:skip? boolean? + #:properties hash?) + syntax-token?)] + [syntax-token? predicate/c] + [syntax-token-type (-> syntax-token? any/c)] + [syntax-token-value (-> syntax-token? any/c)] + [syntax-token-location (-> syntax-token? srcloc?)] + [syntax-token-skip? (-> syntax-token? boolean?)] + [syntax-token-source (-> syntax-token? any/c)] + [syntax-token-position (-> syntax-token? exact-positive-integer?)] + [syntax-token-span (-> syntax-token? exact-nonnegative-integer?)] + [syntax-token-end-position (-> syntax-token? exact-positive-integer?)] + [syntax-token-line (-> syntax-token? (or/c exact-positive-integer? #false))] + [syntax-token-column (-> syntax-token? (or/c exact-nonnegative-integer? #false))] + [syntax-token-properties (-> syntax-token? hash?)] + [syntax-token->syntax (-> syntax-token? syntax?)])) + + +;@---------------------------------------------------------------------------------------------------- + + +(struct token (type value) #:transparent) + + +(struct syntax-token (type value source position span line column skip? properties) + #:constructor-name constructor:token + #:transparent + #:omit-define-syntaxes + + #:property prop:custom-print-quotable 'never + + #:methods gen:custom-write + + [(define write-proc + (make-constructor-style-printer-with-markup + 'token + (λ (this) + (append (list (syntax-token-type this)) + (if (syntax-token-value this) (list (syntax-token-value this)) '()) + (optional-keyword-argument-markup "#:source" (syntax-token-source this)) + (optional-keyword-argument-markup "#:position" (syntax-token-position this)) + (optional-keyword-argument-markup "#:span" (syntax-token-span this)) + (optional-keyword-argument-markup "#:line" (syntax-token-line this)) + (optional-keyword-argument-markup "#:column" (syntax-token-column this)) + (optional-keyword-argument-markup "#:skip?" (syntax-token-skip? this)) + (optional-keyword-argument-markup "#:properties" (syntax-token-properties this))))))]) + + +(define (syntax-token type + [value type] + #:source [source #false] + #:position position + #:span span + #:line [line #false] + #:column [column #false] + #:skip? [skip? #false] + #:properties [properties (hash)]) + (constructor:token type value source position span line column skip? properties)) + + +(define (syntax-token-location token) + (srcloc (syntax-token-source token) + (syntax-token-line token) + (syntax-token-column token) + (syntax-token-position token) + (syntax-token-span token))) + + +(define (syntax-token->syntax token) + (for/fold ([stx (datum->syntax #false (syntax-token-value token) (syntax-token-location token))]) + ([(key value) (in-hash (syntax-token-properties token))]) + (syntax-property stx key value))) + + +(define (optional-keyword-argument-markup kw-string value) + (if value (list (sequence-markup (list (unquoted-printing-string kw-string) value))) '())) + + +(define (syntax-token-end-position token) + (+ (syntax-token-position token) (syntax-token-span token))) + + +#| +;; yaragg/support +(struct token-struct (type val offset line column span skip?) + #:auto-value #f + #:transparent) + +;; yaragg/parser-tools/cfg-parser +(struct tok (name orig-name val start end)) + +;; yaragg/parser-tools/private-lex/token +(struct token (name value) #:transparent) +(struct position-token (token start-pos end-pos) #:inspector #f) +(struct srcloc-token (token srcloc) #:inspector #f) +|# diff --git a/grammar.rkt b/private/primitive-grammar.rkt similarity index 83% rename from grammar.rkt rename to private/primitive-grammar.rkt index 9973ffa..2798e02 100644 --- a/grammar.rkt +++ b/private/primitive-grammar.rkt @@ -7,7 +7,9 @@ racket/stream racket/struct rebellion/collection/vector - rebellion/private/guarded-block) + rebellion/private/guarded-block + yaragg/base/derivation + yaragg/base/token) (module+ test @@ -35,11 +37,6 @@ rule)) -;; A (Token T V) is a tagged value. The grammar rules are defined in terms of the type tag, -;; whereas the value is what appears in leaf nodes of the resulting parse trees. -(struct token (type value) #:transparent) - - ;; A (Context-Free-Production-Rule T S L) contains a nonterminal symbol of type S, a label of type L, ;; and a substitution sequence of (Grammar-Symbol T S) values, stored in an immutable vector. (struct context-free-production-rule (nonterminal label substitution) #:transparent) @@ -59,38 +56,6 @@ (context-free-production-rule symbol label (sequence->vector substitution))) -;; A (Parser-Derivation V L) is either a (Terminal-Derivation V) or a (Nonterminal-Derivation V L) -(struct parser-derivation () #:transparent) - -;; A (Terminal-Derivation V) represents a terminal that was matched by the grammar. It contains the -;; value of the (Token T V) that was matched. -(struct terminal-derivation parser-derivation (value) #:transparent) - -;; A (Nonterminal-Derivation V L) represents a nonterminal that was matched by the grammar. It -;; contains the label of type L of the production rule that matched, and an immutable vector of -;; subderivations -(struct nonterminal-derivation parser-derivation (label children) - #:transparent - #:property prop:custom-print-quotable 'never - #:methods gen:custom-write - [(define write-proc - (make-constructor-style-printer - (λ (_) 'nonterminal-derivation) - (λ (this) - (cons (nonterminal-derivation-label this) - (vector->list (nonterminal-derivation-children this))))))]) - - -(define (make-nonterminal-derivation label [children '()]) - (nonterminal-derivation label (sequence->vector children))) - - -(define derivation - (case-lambda - [(value) (terminal-derivation value)] - [(label first-child . children) (make-nonterminal-derivation label (cons first-child children))])) - - ;; Earley parser @@ -164,7 +129,7 @@ (define possible-children (possible-children-lists forest key)) (for*/stream ([children (in-stream possible-children)] [processed-children (in-stream (cartesian-stream (map loop children)))]) - (make-nonterminal-derivation label processed-children))))) + (nonterminal-derivation label processed-children))))) (struct earley-state (rule substitution-position input-position key) @@ -330,14 +295,56 @@ (earley-parse arithmetic-grammar input-tokens)) (define expected-arithmetic-parse-tree - (derivation + (parser-derivation 'P - (derivation 'S0 - (derivation 'S1 (derivation 'M1 (derivation 'T (derivation 2)))) - (derivation 'plus) - (derivation 'M0 - (derivation 'M1 (derivation 'T (derivation 3))) - (derivation 'times) - (derivation 'T (derivation 4)))))) + (parser-derivation 'S0 + (parser-derivation 'S1 (parser-derivation 'M1 (parser-derivation 'T (parser-derivation 2)))) + (parser-derivation 'plus) + (parser-derivation 'M0 + (parser-derivation 'M1 (parser-derivation 'T (parser-derivation 3))) + (parser-derivation 'times) + (parser-derivation 'T (parser-derivation 4)))))) (check-equal? (stream->list arithmetic-parse-forest) (list expected-arithmetic-parse-tree)))) + + +(struct cf-syntax-production-rule (nonterminal label substitution properties label-properties) + #:transparent) + + +(struct syntax-label (value expression-properties properties) #:transparent) + + +(define (grammar-parse-to-syntax grammar token-sequence) + (define tokens + (for/vector ([t token-sequence]) + (token (syntax-token-type t) t))) + (for/set ([derivation (in-set (earley-parse tokens))]) + (derivation->syntax derivation))) + + +(define (derivation->syntax derivation) + (match derivation + [(terminal-derivation t) (syntax-token->syntax t)] + [(nonterminal-derivation label children) + (define first-token (parser-derivation-first-terminal derivation)) + (define last-token (parser-derivation-last-terminal derivation)) + (define location + (srcloc (syntax-token-source first-token) + (syntax-token-line first-token) + (syntax-token-column first-token) + (syntax-token-position first-token) + (- (syntax-token-position first-token) (syntax-token-end-position last-token)))) + (define label-location + (srcloc (syntax-token-source first-token) + (syntax-token-line first-token) + (syntax-token-column first-token) + (syntax-token-position first-token) + 0)) + (define label-stx + (for/fold ([stx (datum->syntax #false (syntax-label-value label) label-location #false)]) + ([(key value) (in-hash (syntax-label-properties label))]) + (syntax-property stx key value))) + (for/fold ([stx (datum->syntax #false (cons label-stx children) location #false)]) + ([(key value) (in-hash (syntax-label-expression-properties label))]) + (syntax-property stx key value))])) diff --git a/token.rkt b/token.rkt deleted file mode 100644 index ce12b11..0000000 --- a/token.rkt +++ /dev/null @@ -1,128 +0,0 @@ -#lang racket/base - - -(require (for-syntax racket/base - syntax/parse) - racket/contract/base - racket/match - racket/symbol - rebellion/private/printer-markup) - - -(provide - token - (contract-out - [token? predicate/c] - [token-type (-> token? (and/c symbol? symbol-interned?))] - [token-value (-> token? any/c)] - [token-location (-> token? (or/c srcloc? #false))] - [token-skip? (-> token? boolean?)] - [source-location - (->* () - (#:source any/c - #:position (or/c exact-positive-integer? #false) - #:line (or/c exact-positive-integer? #false) - #:column (or/c exact-nonnegative-integer? #false) - #:span (or/c exact-nonnegative-integer? #false)) - srcloc?)] - [token-position (-> token? (or/c exact-positive-integer? #false))] - [token-end-position (-> token? (or/c exact-positive-integer? #false))])) - - -;@---------------------------------------------------------------------------------------------------- - - -(struct token (type value location skip?) - #:constructor-name constructor:token - #:transparent - #:omit-define-syntaxes - - #:property prop:custom-print-quotable 'never - - #:methods gen:custom-write - - [(define write-proc - (make-constructor-style-printer-with-markup - 'token - (λ (this) - (append (list (token-type this)) - (if (token-value this) (list (token-value this)) '()) - (if (token-location this) - (list - (sequence-markup - (list (unquoted-printing-string "#:location") (token-location this)))) - '()) - (if (token-skip? this) - (list - (sequence-markup (list (unquoted-printing-string "#:skip?") (token-skip? this)))) - '())))))]) - - -(define (source-location #:source [source #false] - #:position [position #false] - #:span [span #false] - #:line [line #false] - #:column [column #false]) - (srcloc source line column position span)) - - -(define (unchecked:token type - [value #false] - #:location [location #false] - #:skip? [skip? #false]) - (let ([type - (cond [(string? type) (string->symbol type)] - [(not (symbol-interned? type)) (string->symbol (symbol->immutable-string type))] - [else type])]) - (constructor:token type value location skip?))) - - -(define-module-boundary-contract contracted:token unchecked:token - (->* ((or/c symbol? string?)) (any/c #:location (or/c srcloc? #false) #:skip? boolean?) - token?) - #:name-for-blame token) - - -(define-match-expander token - (syntax-parser - [(_ - (~alt - (~once value-pattern:expr) - (~optional (~seq #:type type-pattern:expr) #:defaults ([type-pattern #'_])) - (~optional (~seq #:location location-pattern:expr) #:defaults ([location-pattern #'_])) - (~optional (~seq #:skip? skip-pattern:expr) #:defaults ([skip-pattern #'_]))) - ...) - #'(? token? - (app token-value value-pattern) - (app token-type type-pattern) - (app token-location location-pattern) - (app token-skip? skip-pattern))]) - (make-rename-transformer #'contracted:token)) - - -(define (token-position t) - (define loc (token-location t)) - (and loc (srcloc-position loc))) - - -(define (token-end-position t) - (define loc (token-location t)) - (define start (and loc (srcloc-position loc))) - (define span (and loc (srcloc-span loc))) - (and start span (+ start span))) - - -#| -;; yaragg/support -(struct token-struct (type val offset line column span skip?) - #:auto-value #f - #:transparent) - -;; yaragg/parser-tools/cfg-parser -(struct tok (name orig-name val start end)) - -;; yaragg/parser-tools/private-lex/token -(struct token (name value) #:transparent) -(struct position-token (token start-pos end-pos) #:inspector #f) -(struct srcloc-token (token srcloc) #:inspector #f) -|#