You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
brag/yaragg-lib/yaragg/codegen/expander.rkt

114 lines
6.3 KiB
Racket

8 years ago
#lang racket/base
7 years ago
(require (for-syntax racket/base
racket/list
yaragg/codegen/codegen
yaragg/codegen/runtime
yaragg/codegen/flatten)
yaragg-parser-tools/lex
yaragg-parser-tools/cfg-parser
(prefix-in bs: yaragg/support)
7 years ago
racket/set)
(provide (except-out (all-from-out racket/base) #%module-begin)
(rename-out [brag-module-begin #%module-begin]
[bs:apply-lexer apply-lexer] ; for repl
[bs:apply-tokenizer-maker apply-tokenizer-maker])) ; for repl
;; -> (listof symbol)
(define-for-syntax (rules->token-types rules)
7 years ago
(define-values (implicit-tokens explicit-tokens) (rules-collect-token-types rules))
(remove-duplicates (append (for/list ([it (in-list implicit-tokens)])
(string->symbol (syntax-e it)))
7 years ago
(map syntax-e explicit-tokens)) eq?))
7 years ago
(define-syntax (brag-module-begin rules-stx)
(syntax-case rules-stx ()
[(_) (raise-syntax-error 'brag
(format "The grammar does not appear to have any rules")
(syntax-source rules-stx))]
7 years ago
[(_ . RULES)
(let ([rules (syntax->list #'RULES)]) ;; (listof stx)
(check-all-rules-defined! rules)
(check-all-rules-no-duplicates! rules)
(check-all-rules-satisfiable! rules)
(define rule-ids (map rule-id rules))
(with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule.
[((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...)
(for/list ([tt (in-list (rules->token-types rules))])
(cons tt (string->symbol (format "token-~a" tt))))]
;; Flatten rules to use the yacc-style ruleset that yaragg-parser-tools supports
[GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))]
;; main exports. Break hygiene so they're also available at top-level / repl
[(PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES)
(map (λ (sym) (datum->syntax rules-stx sym))
'(parse parse-to-datum parse-tree make-rule-parser all-token-types))]
[TOKEN (datum->syntax rules-stx 'token)] ; for repl
[RULE-IDS (map syntax-e rule-ids)]
[RULES-STX rules-stx])
3 years ago
;; this stx object represents the top level of a #lang yaragg module.
7 years ago
;; so any `define`s are automatically available at the repl.
;; and only identifiers explicitly `provide`d are visible on import.
#'(#%module-begin
(provide PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES)
7 years ago
;; handle yaragg/support `token` with special identifier
7 years ago
;; so it doesn't conflict with brag's internal `token` macro
;; defined but deliberately not provided so it's available at repl, but not on import
7 years ago
(define TOKEN bs:token)
(define-tokens enumerated-tokens (TOKEN-TYPE ...))
;; all-token-types lists all the tokens (except for EOF)
(define ALL-TOKEN-TYPES (set-remove (set 'TOKEN-TYPE ...) 'EOF))
;; For internal use by the permissive tokenizer only:
(define all-tokens-hash/mutable
(make-hash (list ;; Note: we also allow the eof object here, to make
;; the permissive tokenizer even nicer to work with.
(cons eof token-EOF)
(cons 'TOKEN-TYPE TOKEN-TYPE-CONSTRUCTOR) ...)))
(define-syntax (MAKE-RULE-PARSER stx)
(syntax-case stx ()
[(_ START-RULE-ID)
(and (identifier? #'START-RULE-ID) (member (syntax-e #'START-RULE-ID) 'RULE-IDS))
7 years ago
;; The cfg-parser depends on the start-rule provided in (start ...) to have the same
;; context as the rest of this body. Hence RECOLORED-START-RULE
(with-syntax ([RECOLORED-START-RULE (datum->syntax #'RULES-STX (syntax-e #'START-RULE-ID))])
#'(let ()
(define (rule-parser tokenizer)
(define rule-grammar (cfg-parser (tokens enumerated-tokens)
(src-pos)
(start RECOLORED-START-RULE)
(end EOF)
(error the-error-handler)
(grammar . GENERATED-RULE-CODES)))
(define next-token (make-permissive-tokenizer tokenizer all-tokens-hash/mutable))
;; here's how we support grammar "cuts" on top rule name
(define parse-tree-stx (rule-grammar next-token))
(syntax-case parse-tree-stx ()
[(TOP-RULE-NAME . _)
(if (eq? (syntax-property #'TOP-RULE-NAME 'hide-or-splice?) 'hide)
(remove-rule-name parse-tree-stx) ; use `remove-rule-name` so we get the same housekeeping
parse-tree-stx)]
[_ (error 'malformed-parse-tree)]))
(case-lambda [(tokenizer) (rule-parser tokenizer)]
[(source tokenizer)
(parameterize ([current-source source])
(rule-parser tokenizer))])))]
7 years ago
[(_ not-a-rule-id)
(raise-syntax-error #f
(format "Rule ~a is not defined in the grammar" (syntax-e #'not-a-rule-id))
stx)]))
7 years ago
;; start-id has to be a value, not an expr, because make-rule-parser is a macro
(define PARSE (procedure-rename (MAKE-RULE-PARSER START-ID) 'PARSE))
(define (PARSE-TO-DATUM x) (syntax->datum (PARSE x)))
7 years ago
(define PARSE-TREE PARSE-TO-DATUM))))]))
8 years ago