diff --git a/beautiful-racket-lib/info.rkt b/beautiful-racket-lib/info.rkt index b175a1a..3086d63 100644 --- a/beautiful-racket-lib/info.rkt +++ b/beautiful-racket-lib/info.rkt @@ -4,4 +4,3 @@ (define version "0.01") (define deps '("base" "sugar")) (define build-deps '("racket-doc" "rackunit-lib" "scribble-lib")) -(define scribblings '(("br/scribblings/br.scrbl" ()))) diff --git a/beautiful-racket-ragg/LICENSE b/beautiful-racket-ragg/LICENSE new file mode 100755 index 0000000..65c5ca8 --- /dev/null +++ b/beautiful-racket-ragg/LICENSE @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/beautiful-racket-ragg/README.md b/beautiful-racket-ragg/README.md new file mode 100755 index 0000000..f4bb9e4 --- /dev/null +++ b/beautiful-racket-ragg/README.md @@ -0,0 +1,8 @@ +This repo contains Danny Yoo's RAGG, a Racket AST Generator Generator, +also known as a parser generator. + +install it using + +``` +raco pkg install ragg +``` diff --git a/beautiful-racket-ragg/br/ragg/Makefile b/beautiful-racket-ragg/br/ragg/Makefile new file mode 100755 index 0000000..579d424 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/Makefile @@ -0,0 +1,12 @@ +doc: + scribble ++xref-in setup/xref load-collections-xref --redirect-main http://docs.racket-lang.org/ --dest-name index.html manual.scrbl + +clean: + git clean -fdx . + +web: clean plt doc + scp -r * hashcollision.org:webapps/htdocs/ragg/ + + +plt: + raco pack --collect ragg.plt ragg diff --git a/beautiful-racket-ragg/br/ragg/cfg-parser/cfg-parser.rkt b/beautiful-racket-ragg/br/ragg/cfg-parser/cfg-parser.rkt new file mode 100755 index 0000000..0d56f8e --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/cfg-parser/cfg-parser.rkt @@ -0,0 +1,921 @@ +#lang racket/base +;; This module implements a parser form like the parser-tools's +;; `parser', except that it works on an arbitrary CFG (returning +;; the first sucecssful parse). + +;; I'm pretty sure that this is an implementation of Earley's +;; algorithm. + +;; To a first approximation, it's a backtracking parser. Alternative +;; for a non-terminal are computed in parallel, and multiple attempts +;; to compute the same result block until the first one completes. If +;; you get into deadlock, such as when trying to match +;; := +;; then it means that there's no successful parse, so everything +;; that's blocked fails. + +;; A cache holds the series of results for a particular non-terminal +;; at a particular starting location. (A series is used, instead of a +;; sinlge result, for backtracking.) Otherwise, the parser uses +;; backtracking search. Backtracking is implemented through explicit +;; success and failure continuations. Multiple results for a +;; particular nonterminal and location are kept only when they have +;; different lengths. (Otherwise, in the spirit of finding one +;; successful parse, only the first result is kept.) + +;; The parser-tools's `parse' is used to transform tokens in the +;; grammar to tokens specific to this parser. In other words, this +;; parser uses `parser' so that it doesn't have to know anything about +;; tokens. +;; + + + +(require parser-tools/yacc + parser-tools/lex) + +(require (for-syntax racket/base + syntax/boundmap + parser-tools/private-lex/token-syntax)) + +(provide cfg-parser) + +;; A raw token, wrapped so that we can recognize it: +(define-struct tok (name orig-name val start end)) + +;; Represents the thread scheduler: +(define-struct tasks (active active-back waits multi-waits cache progress?)) + +(define-for-syntax make-token-identifier-mapping make-hasheq) +(define-for-syntax token-identifier-mapping-get + (case-lambda + [(t tok) + (hash-ref t (syntax-e tok))] + [(t tok fail) + (hash-ref t (syntax-e tok) fail)])) +(define-for-syntax token-identifier-mapping-put! + (lambda (t tok v) + (hash-set! t (syntax-e tok) v))) +(define-for-syntax token-identifier-mapping-map + (lambda (t f) + (hash-map t f))) + +;; Used to calculate information on the grammar, such as whether +;; a particular non-terminal is "simple" instead of recursively defined. +(define-for-syntax (nt-fixpoint nts proc nt-ids patss) + (define (ormap-all val f as bs) + (cond + [(null? as) val] + [else (ormap-all (or (f (car as) (car bs)) val) + f + (cdr as) (cdr bs))])) + (let loop () + (when (ormap-all #f + (lambda (nt pats) + (let ([old (bound-identifier-mapping-get nts nt)]) + (let ([new (proc nt pats old)]) + (if (equal? old new) + #f + (begin + (bound-identifier-mapping-put! nts nt new) + #t))))) + nt-ids patss) + (loop)))) + +;; Tries parse-a followed by parse-b. If parse-a is not simple, +;; then after parse-a succeeds once, we parallelize parse-b +;; and trying a second result for parse-a. +(define (parse-and simple-a? parse-a parse-b + stream last-consumed-token depth end success-k fail-k + max-depth tasks) + (letrec ([mk-got-k + (lambda (success-k fail-k) + (lambda (val stream last-consumed-token depth max-depth tasks next1-k) + (if simple-a? + (parse-b val stream last-consumed-token depth end + (mk-got2-k success-k fail-k next1-k) + (mk-fail2-k success-k fail-k next1-k) + max-depth tasks) + (parallel-or + (lambda (success-k fail-k max-depth tasks) + (parse-b val stream last-consumed-token depth end + success-k fail-k + max-depth tasks)) + (lambda (success-k fail-k max-depth tasks) + (next1-k (mk-got-k success-k fail-k) + fail-k max-depth tasks)) + success-k fail-k max-depth tasks))))] + [mk-got2-k + (lambda (success-k fail-k next1-k) + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (success-k val stream last-consumed-token depth max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (next-k (mk-got2-k success-k fail-k next1-k) + (mk-fail2-k success-k fail-k next1-k) + max-depth tasks)))))] + [mk-fail2-k + (lambda (success-k fail-k next1-k) + (lambda (max-depth tasks) + (next1-k (mk-got-k success-k fail-k) + fail-k + max-depth + tasks)))]) + (parse-a stream last-consumed-token depth end + (mk-got-k success-k fail-k) + fail-k + max-depth tasks))) + +;; Parallel or for non-terminal alternatives +(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks) + (parallel-or (lambda (success-k fail-k max-depth tasks) + (parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks)) + (lambda (success-k fail-k max-depth tasks) + (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)) + success-k fail-k max-depth tasks)) + +;; Generic parallel-or +(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks) + (define answer-key (gensym)) + (letrec ([gota-k + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (report-answer answer-key + max-depth + tasks + (list val stream last-consumed-token depth next-k)))] + [faila-k + (lambda (max-depth tasks) + (report-answer answer-key + max-depth + tasks + null))]) + (let* ([tasks (queue-task + tasks + (lambda (max-depth tasks) + (parse-a gota-k + faila-k + max-depth tasks)))] + [tasks (queue-task + tasks + (lambda (max-depth tasks) + (parse-b gota-k + faila-k + max-depth tasks)))] + [queue-next (lambda (next-k tasks) + (queue-task tasks + (lambda (max-depth tasks) + (next-k gota-k + faila-k + max-depth tasks))))]) + (letrec ([mk-got-one + (lambda (immediate-next? get-nth success-k) + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (let ([tasks (if immediate-next? + (queue-next next-k tasks) + tasks)]) + (success-k val stream last-consumed-token depth max-depth + tasks + (lambda (success-k fail-k max-depth tasks) + (let ([tasks (if immediate-next? + tasks + (queue-next next-k tasks))]) + (get-nth max-depth tasks success-k fail-k)))))))] + [get-first + (lambda (max-depth tasks success-k fail-k) + (wait-for-answer #f max-depth tasks answer-key + (mk-got-one #t get-first success-k) + (lambda (max-depth tasks) + (get-second max-depth tasks success-k fail-k)) + #f))] + [get-second + (lambda (max-depth tasks success-k fail-k) + (wait-for-answer #f max-depth tasks answer-key + (mk-got-one #f get-second success-k) + fail-k #f))]) + (get-first max-depth tasks success-k fail-k))))) + +;; Non-terminal alternatives where the first is "simple" can be done +;; sequentially, which is simpler +(define (parse-or parse-a parse-b + stream last-consumed-token depth end success-k fail-k max-depth tasks) + (letrec ([mk-got-k + (lambda (success-k fail-k) + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (success-k val stream last-consumed-token depth + max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (next-k (mk-got-k success-k fail-k) + (mk-fail-k success-k fail-k) + max-depth tasks)))))] + [mk-fail-k + (lambda (success-k fail-k) + (lambda (max-depth tasks) + (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))]) + (parse-a stream last-consumed-token depth end + (mk-got-k success-k fail-k) + (mk-fail-k success-k fail-k) + max-depth tasks))) + +;; Starts a thread +(define queue-task + (lambda (tasks t [progress? #t]) + (make-tasks (tasks-active tasks) + (cons t (tasks-active-back tasks)) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + (or progress? (tasks-progress? tasks))))) + +;; Reports an answer to a waiting thread: +(define (report-answer answer-key max-depth tasks val) + (let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))]) + (if v + (let ([tasks (make-tasks (cons (v val) + (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (hash-remove! (tasks-waits tasks) answer-key) + (swap-task max-depth tasks)) + ;; We have an answer ready too fast; wait + (swap-task max-depth + (queue-task tasks + (lambda (max-depth tasks) + (report-answer answer-key max-depth tasks val)) + #f))))) + +;; Reports an answer to multiple waiting threads: +(define (report-answer-all answer-key max-depth tasks val k) + (let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))]) + (hash-remove! (tasks-multi-waits tasks) answer-key) + (let ([tasks (make-tasks (append (map (lambda (a) (a val)) v) + (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (k max-depth tasks)))) + +;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise +;; there might be many. Use wither #t or #f (and `report-answer' or +;; `report-answer-all', resptively) consistently for a particular answer key. +(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k) + (let ([wait (lambda (val) + (lambda (max-depth tasks) + (if val + (if (null? val) + (fail-k max-depth tasks) + (let-values ([(val stream last-consumed-token depth next-k) (apply values val)]) + (success-k val stream last-consumed-token depth max-depth tasks next-k))) + (deadlock-k max-depth tasks))))]) + (if multi? + (hash-set! (tasks-multi-waits tasks) answer-key + (cons wait (hash-ref (tasks-multi-waits tasks) answer-key + (lambda () null)))) + (hash-set! (tasks-waits tasks) answer-key wait)) + (let ([tasks (make-tasks (tasks-active tasks) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (swap-task max-depth tasks)))) + +;; Swap thread +(define (swap-task max-depth tasks) + ;; Swap in first active: + (if (null? (tasks-active tasks)) + (if (tasks-progress? tasks) + (swap-task max-depth + (make-tasks (reverse (tasks-active-back tasks)) + null + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #f)) + ;; No progress, so issue failure for all multi-waits + (if (zero? (hash-count (tasks-multi-waits tasks))) + (error 'swap-task "Deadlock") + (swap-task max-depth + (make-tasks (apply + append + (hash-map (tasks-multi-waits tasks) + (lambda (k l) + (map (lambda (v) (v #f)) l)))) + (tasks-active-back tasks) + (tasks-waits tasks) + (make-hasheq) + (tasks-cache tasks) + #t)))) + (let ([t (car (tasks-active tasks))] + [tasks (make-tasks (cdr (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + (tasks-progress? tasks))]) + (t max-depth tasks)))) + +;; Finds the symbolic representative of a token class +(define-for-syntax (map-token toks tok) + (car (token-identifier-mapping-get toks tok))) + +(define no-pos-val (make-position #f #f #f)) +(define-for-syntax no-pos + (let ([npv ((syntax-local-certifier) #'no-pos-val)]) + (lambda (stx) npv))) +(define-for-syntax at-tok-pos + (lambda (sel expr) + (lambda (stx) + #`(let ([v #,expr]) (if v (#,sel v) no-pos-val))))) + +;; Builds a matcher for a particular alternative +(define-for-syntax (build-match nts toks pat handle $ctx) + (let loop ([pat pat] + [pos 1]) + (if (null? pat) + #`(success-k #,handle stream last-consumed-token depth max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (fail-k max-depth tasks))) + (let ([id (datum->syntax (car pat) + (string->symbol (format "$~a" pos)))] + [id-start-pos (datum->syntax (car pat) + (string->symbol (format "$~a-start-pos" pos)))] + [id-end-pos (datum->syntax (car pat) + (string->symbol (format "$~a-end-pos" pos)))] + [n-end-pos (and (null? (cdr pat)) + (datum->syntax (car pat) '$n-end-pos))]) + (cond + [(bound-identifier-mapping-get nts (car pat) (lambda () #f)) + ;; Match non-termimal + #`(parse-and + ;; First part is simple? (If so, we don't have to parallelize the `and'.) + #,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))]) + (or (not l) + (andmap values (caddr l)))) + #,(car pat) + (let ([original-stream stream]) + (lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks) + (let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream) + tok-end + tok-start) + #'(if (eq? original-stream stream) + last-consumed-token + (and (pair? original-stream) + (car original-stream))))] + [#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)] + #,@(if n-end-pos + #`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]) + null)) + #,(loop (cdr pat) (add1 pos))))) + stream last-consumed-token depth + #,(let ([cnt (apply + + (map (lambda (item) + (cond + [(bound-identifier-mapping-get nts item (lambda () #f)) + => (lambda (l) (car l))] + [else 1])) + (cdr pat)))]) + #`(- end #,cnt)) + success-k fail-k max-depth tasks)] + [else + ;; Match token + (let ([tok-id (map-token toks (car pat))]) + #`(if (and (pair? stream) + (eq? '#,tok-id (tok-name (car stream)))) + (let* ([stream-a (car stream)] + [#,id (tok-val stream-a)] + [last-consumed-token (car stream)] + [stream (cdr stream)] + [depth (add1 depth)]) + (let ([max-depth (max max-depth depth)]) + (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)] + [#,id-end-pos (at-tok-pos #'tok-end #'stream-a)] + #,@(if n-end-pos + #`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)]) + null)) + #,(loop (cdr pat) (add1 pos))))) + (fail-k max-depth tasks)))]))))) + +;; Starts parsing to match a non-terminal. There's a minor +;; optimization that checks for known starting tokens. Otherwise, +;; use the cache, block if someone else is already trying the match, +;; and cache the result if it's computed. +;; The cache maps nontermial+startingpos+iteration to a result, where +;; the iteration is 0 for the first match attempt, 1 for the second, +;; etc. +(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k) + (if (and (positive? min-cnt) + (pair? stream) + (not (memq (tok-name (car stream)) init-tokens))) + ;; No such leading token; give up + (fail-k max-depth tasks) + ;; Run pattern + (let loop ([n 0] + [success-k success-k] + [fail-k fail-k] + [max-depth max-depth] + [tasks tasks] + [k k]) + (let ([answer-key (gensym)] + [table-key (vector key depth n)] + [old-depth depth] + [old-stream stream]) + #;(printf "Loop ~a\n" table-key) + (cond + [(hash-ref (tasks-cache tasks) table-key (lambda () #f)) + => (lambda (result) + #;(printf "Reuse ~a\n" table-key) + (result success-k fail-k max-depth tasks))] + [else + #;(printf "Try ~a ~a\n" table-key (map tok-name stream)) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + #;(printf "Wait ~a ~a\n" table-key answer-key) + (wait-for-answer #t max-depth tasks answer-key success-k fail-k + (lambda (max-depth tasks) + #;(printf "Deadlock ~a ~a\n" table-key answer-key) + (fail-k max-depth tasks))))) + (let result-loop ([max-depth max-depth][tasks tasks][k k]) + (letrec ([orig-stream stream] + [new-got-k + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + ;; Check whether we already have a result that consumed the same amount: + (let ([result-key (vector #f key old-depth depth)]) + (cond + [(hash-ref (tasks-cache tasks) result-key (lambda () #f)) + ;; Go for the next-result + (result-loop max-depth + tasks + (lambda (end max-depth tasks success-k fail-k) + (next-k success-k fail-k max-depth tasks)))] + [else + #;(printf "Success ~a ~a\n" table-key + (map tok-name (let loop ([d old-depth][s old-stream]) + (if (= d depth) + null + (cons (car s) (loop (add1 d) (cdr s))))))) + (let ([next-k (lambda (success-k fail-k max-depth tasks) + (loop (add1 n) + success-k + fail-k + max-depth + tasks + (lambda (end max-depth tasks success-k fail-k) + (next-k success-k fail-k max-depth tasks))))]) + (hash-set! (tasks-cache tasks) result-key #t) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + (success-k val stream last-consumed-token depth max-depth tasks next-k))) + (report-answer-all answer-key + max-depth + tasks + (list val stream last-consumed-token depth next-k) + (lambda (max-depth tasks) + (success-k val stream last-consumed-token depth max-depth tasks next-k))))])))] + [new-fail-k + (lambda (max-depth tasks) + #;(printf "Failure ~a\n" table-key) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + (fail-k max-depth tasks))) + (report-answer-all answer-key + max-depth + tasks + null + (lambda (max-depth tasks) + (fail-k max-depth tasks))))]) + (k end max-depth tasks new-got-k new-fail-k)))]))))) + +(define-syntax (cfg-parser stx) + (syntax-case stx () + [(_ clause ...) + (let ([clauses (syntax->list #'(clause ...))]) + (let-values ([(start grammar cfg-error parser-clauses src-pos?) + (let ([all-toks (apply + append + (map (lambda (clause) + (syntax-case clause (tokens) + [(tokens t ...) + (apply + append + (map (lambda (t) + (let ([v (syntax-local-value t (lambda () #f))]) + (cond + [(terminals-def? v) + (map (lambda (v) + (cons v #f)) + (syntax->list (terminals-def-t v)))] + [(e-terminals-def? v) + (map (lambda (v) + (cons v #t)) + (syntax->list (e-terminals-def-t v)))] + [else null]))) + (syntax->list #'(t ...))))] + [_else null])) + clauses))] + [all-end-toks (apply + append + (map (lambda (clause) + (syntax-case clause (end) + [(end t ...) + (syntax->list #'(t ...))] + [_else null])) + clauses))]) + (let loop ([clauses clauses] + [cfg-start #f] + [cfg-grammar #f] + [cfg-error #f] + [src-pos? #f] + [parser-clauses null]) + (if (null? clauses) + (values cfg-start + cfg-grammar + cfg-error + (reverse parser-clauses) + src-pos?) + (syntax-case (car clauses) (start error grammar src-pos) + [(start tok) + (loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)] + [(error expr) + (loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)] + [(grammar [nt [pat handle0 handle ...] ...] ...) + (let ([nts (make-bound-identifier-mapping)] + [toks (make-token-identifier-mapping)] + [end-toks (make-token-identifier-mapping)] + [nt-ids (syntax->list #'(nt ...))] + [patss (map (lambda (stx) + (map syntax->list (syntax->list stx))) + (syntax->list #'((pat ...) ...)))]) + (for-each (lambda (nt) + (bound-identifier-mapping-put! nts nt (list 0))) + nt-ids) + (for-each (lambda (t) + (token-identifier-mapping-put! end-toks t #t)) + all-end-toks) + (for-each (lambda (t) + (unless (token-identifier-mapping-get end-toks (car t) (lambda () #f)) + (let ([id (gensym (syntax-e (car t)))]) + (token-identifier-mapping-put! toks (car t) + (cons id (cdr t)))))) + all-toks) + ;; Compute min max size for each non-term: + (nt-fixpoint + nts + (lambda (nt pats old-list) + (let ([new-cnt + (apply + min + (map (lambda (pat) + (apply + + + (map (lambda (elem) + (car + (bound-identifier-mapping-get nts + elem + (lambda () (list 1))))) + pat))) + pats))]) + (if (new-cnt . > . (car old-list)) + (cons new-cnt (cdr old-list)) + old-list))) + nt-ids patss) + ;; Compute set of toks that must appear at the beginning + ;; for a non-terminal + (nt-fixpoint + nts + (lambda (nt pats old-list) + (let ([new-list + (apply + append + (map (lambda (pat) + (let loop ([pat pat]) + (if (pair? pat) + (let ([l (bound-identifier-mapping-get + nts + (car pat) + (lambda () + (list 1 (map-token toks (car pat)))))]) + ;; If the non-terminal can match 0 things, + ;; then it might match something from the + ;; next pattern element. Otherwise, it must + ;; match the first element: + (if (zero? (car l)) + (append (cdr l) (loop (cdr pat))) + (cdr l))) + null))) + pats))]) + (let ([new (filter (lambda (id) + (andmap (lambda (id2) + (not (eq? id id2))) + (cdr old-list))) + new-list)]) + (if (pair? new) + ;; Drop dups in new list: + (let ([new (let loop ([new new]) + (if (null? (cdr new)) + new + (if (ormap (lambda (id) + (eq? (car new) id)) + (cdr new)) + (loop (cdr new)) + (cons (car new) (loop (cdr new))))))]) + (cons (car old-list) (append new (cdr old-list)))) + old-list)))) + nt-ids patss) + ;; Determine left-recursive clauses: + (for-each (lambda (nt pats) + (let ([l (bound-identifier-mapping-get nts nt)]) + (bound-identifier-mapping-put! nts nt (list (car l) + (cdr l) + (map (lambda (x) #f) pats))))) + nt-ids patss) + (nt-fixpoint + nts + (lambda (nt pats old-list) + (list (car old-list) + (cadr old-list) + (map (lambda (pat simple?) + (or simple? + (let ([l (map (lambda (elem) + (bound-identifier-mapping-get + nts + elem + (lambda () #f))) + pat)]) + (andmap (lambda (i) + (or (not i) + (andmap values (caddr i)))) + l)))) + pats (caddr old-list)))) + nt-ids patss) + ;; Build a definition for each non-term: + (loop (cdr clauses) + cfg-start + (map (lambda (nt pats handles $ctxs) + (define info (bound-identifier-mapping-get nts nt)) + (list nt + #`(let ([key (gensym '#,nt)]) + (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + (parse-nt/share + key #,(car info) '#,(cadr info) stream last-consumed-token depth end + max-depth tasks + success-k fail-k + (lambda (end max-depth tasks success-k fail-k) + #,(let loop ([pats pats] + [handles (syntax->list handles)] + [$ctxs (syntax->list $ctxs)] + [simple?s (caddr info)]) + (if (null? pats) + #'(fail-k max-depth tasks) + #`(#,(if (or (null? (cdr pats)) + (car simple?s)) + #'parse-or + #'parse-parallel-or) + (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + #,(build-match nts + toks + (car pats) + (car handles) + (car $ctxs))) + (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + #,(loop (cdr pats) + (cdr handles) + (cdr $ctxs) + (cdr simple?s))) + stream last-consumed-token depth end success-k fail-k max-depth tasks))))))))) + nt-ids + patss + (syntax->list #'(((begin handle0 handle ...) ...) ...)) + (syntax->list #'((handle0 ...) ...))) + cfg-error + src-pos? + (list* + (with-syntax ([((tok tok-id . $e) ...) + (token-identifier-mapping-map toks + (lambda (k v) + (list* k + (car v) + (if (cdr v) + #f + '$1))))] + [(pos ...) + (if src-pos? + #'($1-start-pos $1-end-pos) + #'(#f #f))]) + #`(grammar (start [() null] + [(atok start) (cons $1 $2)]) + (atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) + #`(start start) + parser-clauses)))] + [(grammar . _) + (raise-syntax-error + #f + "bad grammar clause" + stx + (car clauses))] + [(src-pos) + (loop (cdr clauses) + cfg-start + cfg-grammar + cfg-error + #t + (cons (car clauses) parser-clauses))] + [_else + (loop (cdr clauses) + cfg-start + cfg-grammar + cfg-error + src-pos? + (cons (car clauses) parser-clauses))]))))]) + #`(let ([orig-parse (parser + [error (lambda (a b c) + (error 'cfg-parser "unexpected ~a token: ~a" b c))] + . #,parser-clauses)] + [error-proc #,cfg-error]) + (letrec #,grammar + (lambda (get-tok) + (let ([tok-list (orig-parse get-tok)]) + (letrec ([success-k + (lambda (val stream last-consumed-token depth max-depth tasks next) + (if (null? stream) + val + (next success-k fail-k max-depth tasks)))] + [fail-k (lambda (max-depth tasks) + (cond + [(null? tok-list) + (if error-proc + (error-proc #t + 'no-tokens + #f + (make-position #f #f #f) + (make-position #f #f #f)) + (error + 'cfg-parse + "no tokens"))] + [else + (let ([bad-tok (list-ref tok-list + (min (sub1 (length tok-list)) + max-depth))]) + (if error-proc + (error-proc #t + (tok-orig-name bad-tok) + (tok-val bad-tok) + (tok-start bad-tok) + (tok-end bad-tok)) + (error + 'cfg-parse + "failed at ~a" + (tok-val bad-tok))))]))]) + (#,start tok-list + ;; we simulate a token at the very beginning with zero width + ;; for use with the position-generating code (*-start-pos, *-end-pos). + (if (null? tok-list) + (tok #f #f #f + (position 1 + #,(if src-pos? #'1 #'#f) + #,(if src-pos? #'0 #'#f)) + (position 1 + #,(if src-pos? #'1 #'#f) + #,(if src-pos? #'0 #'#f))) + (tok (tok-name (car tok-list)) + (tok-orig-name (car tok-list)) + (tok-val (car tok-list)) + (tok-start (car tok-list)) + (tok-start (car tok-list)))) + 0 + (length tok-list) + success-k + fail-k + 0 + (make-tasks null null + (make-hasheq) (make-hasheq) + (make-hash) #t)))))))))])) + + +(module* test racket/base + (require (submod "..") + parser-tools/lex + racket/block + rackunit) + + ;; Test: parsing regular expressions. + ;; Here is a test case on locations: + (block + (define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF)) + (define lex (lexer-src-pos ["|" (token-OR lexeme)] + ["^" (token-ANCHOR lexeme)] + ["*" (token-STAR lexeme)] + [(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)] + ["(" (token-LPAREN lexeme)] + [")" (token-RPAREN lexeme)] + [whitespace (return-without-pos (lex input-port))] + [(eof) (token-EOF 'eof)])) + (define -parse (cfg-parser + (tokens regexp-tokens) + (start top) + (end EOF) + (src-pos) + (grammar [top [(maybe-anchor regexp) + (cond [$1 + `(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] + [else + `(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]] + [maybe-anchor [(ANCHOR) #t] + [() #f]] + [regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] + [(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] + [(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] + [(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]]))) + (define (pos->sexp pos) + (position-offset pos)) + + (define (parse s) + (define ip (open-input-string s)) + (port-count-lines! ip) + (-parse (lambda () (lex ip)))) + + (check-equal? (parse "abc") + '(unanchored (lit "abc" 1 4) 1 4)) + (check-equal? (parse "a | (b*) | c") + '(unanchored (or (or (lit "a" 1 2) + (group (star (lit "b" 6 7) 6 8) 5 9) + 1 9) + (lit "c" 12 13) + 1 13) + 1 13))) + + + + + + ;; Tests used during development + (define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF)) + + (define lex + (lexer + ["+" (token-PLUS '+)] + ["-" (token-MINUS '-)] + ["*" (token-STAR '*)] + ["|" (token-BAR '||)] + [":" (token-COLON '|:|)] + [whitespace (lex input-port)] + [(eof) (token-EOF 'eof)])) + + (define parse + (cfg-parser + (tokens non-terminals) + (start ) + (end EOF) + (error (lambda (a b stx) + (error 'parse "failed at ~s" stx))) + (grammar [ [(PLUS) "plus"] + [( BAR ) (list $1 $2 $3)] + [( COLON) (list $1)]] + [ [(MINUS) "minus"] + [( STAR) (cons $1 $2)]] + [ [( MINUS) "yes"]] + [ [(PLUS) 'plus] + [(MINUS) 'minus]] + [ [() '0] + [( PLUS) (add1 $1)] + [( PLUS) (add1 $1)]]))) + + (let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**" + #;"+*|+**|-" #;"-|-*|-|-*" + #;"-|-*|-|-**|-|-*|-|-**" + "-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-*** + |-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****| + -|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****" + ;; This one fails: + #;"+*")]) + (check-equal? (parse (lambda () (lex p))) + '((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *) + || + (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *)) + . + *) + || + (((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *) + || + (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *)) + . + *))))) diff --git a/beautiful-racket-ragg/br/ragg/codegen/codegen.rkt b/beautiful-racket-ragg/br/ragg/codegen/codegen.rkt new file mode 100755 index 0000000..d5594a8 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/codegen/codegen.rkt @@ -0,0 +1,429 @@ +#lang racket/base + +(require (for-template racket/base) + racket/list + racket/set + racket/syntax + syntax/srcloc + br/ragg/rules/stx-types + "flatten.rkt" + syntax/id-table + (prefix-in sat: "satisfaction.rkt") + (prefix-in support: br/ragg/support) + (prefix-in stxparse: syntax/parse)) + +(provide rules-codegen) + + +;; Generates the body of the module. +;; FIXME: abstract this so we can just call (rules ...) without +;; generating the whole module body. +(define (rules-codegen stx + #:parser-provider-module [parser-provider-module 'parser-tools/yacc] + #:parser-provider-form [parser-provider-form 'parser]) + (syntax-case stx () + [(_ r ...) + (begin + ;; (listof stx) + (define rules (syntax->list #'(r ...))) + + (when (empty? rules) + (raise-syntax-error 'ragg + (format "The grammar does not appear to have any rules") + stx)) + + (check-all-rules-defined! rules) + (check-all-rules-no-duplicates! rules) + (check-all-rules-satisfiable! rules) + + ;; We flatten the rules so we can use the yacc-style ruleset that parser-tools + ;; supports. + (define flattened-rules (flatten-rules rules)) + + (define generated-rule-codes (map flat-rule->yacc-rule flattened-rules)) + + ;; The first rule, by default, is the start rule. + (define rule-ids (for/list ([a-rule (in-list rules)]) + (rule-id a-rule))) + (define start-id (first rule-ids)) + + + (define-values (implicit-tokens ;; (listof identifier) + explicit-tokens) ;; (listof identifier) + (rules-collect-token-types rules)) + + ;; (listof symbol) + (define implicit-token-types + (map string->symbol + (set->list (list->set (map syntax-e implicit-tokens))))) + + ;; (listof symbol) + (define explicit-token-types + (set->list (list->set (map syntax-e explicit-tokens)))) + + ;; (listof symbol) + (define token-types + (set->list (list->set (append (map (lambda (x) (string->symbol (syntax-e x))) + implicit-tokens) + (map syntax-e explicit-tokens))))) + + (with-syntax ([start-id start-id] + + [(token-type ...) token-types] + + [(token-type-constructor ...) + (map (lambda (x) (string->symbol (format "token-~a" x))) + token-types)] + + [(explicit-token-types ...) explicit-token-types] + [(implicit-token-types ...) implicit-token-types] + [(implicit-token-types-str ...) (map symbol->string implicit-token-types)] + [(implicit-token-type-constructor ...) + (map (lambda (x) (string->symbol (format "token-~a" x))) + implicit-token-types)] + [generated-grammar #`(grammar #,@generated-rule-codes)] + [parser-module parser-provider-module] + [parser-form parser-provider-form]) + (quasisyntax/loc stx + (begin + (require parser-tools/lex + parser-module + br/ragg/codegen/runtime + br/ragg/support + br/ragg/private/internal-support + racket/set + (for-syntax syntax/parse racket/base)) + + (provide parse + make-rule-parser + all-token-types + #;current-source + #;current-parser-error-handler + #;current-tokenizer-error-handler + #;[struct-out exn:fail:parsing] + ) + + (define-tokens enumerated-tokens (token-type ...)) + + ;; all-token-types lists all the tokens (except for EOF) + (define all-token-types + (set-remove (set 'token-type ...) 'EOF)) + + ;; For internal use by the permissive tokenizer only: + (define all-tokens-hash/mutable + (make-hash (list ;; Note: we also allow the eof object here, to make + ;; the permissive tokenizer even nicer to work with. + (cons eof token-EOF) + (cons 'token-type token-type-constructor) ...))) + + + #;(define default-lex/1 + (lexer-src-pos [implicit-token-types-str + (token 'implicit-token-types lexeme)] + ... + [(eof) (token eof)])) + + (define-syntax (make-rule-parser stx-2) + (syntax-parse stx-2 + [(_ start-rule:id) + (begin + ;; HACK HACK HACK + ;; The cfg-parser depends on the start-rule provided in (start ...) to have the same + ;; context as the rest of this body, so I need to hack this. I don't like this, but + ;; I don't know what else to do. Hence recolored-start-rule. + (unless (member (syntax-e #'start-rule) + '#,(map syntax-e rule-ids)) + (raise-syntax-error #f + (format "Rule ~a is not defined in the grammar" (syntax-e #'start-rule)) + stx-2)) + + (define recolored-start-rule (datum->syntax (syntax #,stx) (syntax-e #'start-rule))) + #`(let ([THE-GRAMMAR (parser-form (tokens enumerated-tokens) + (src-pos) + (start #,recolored-start-rule) + (end EOF) + (error THE-ERROR-HANDLER) + generated-grammar)]) + (case-lambda [(tokenizer) + (define next-token + (make-permissive-tokenizer tokenizer all-tokens-hash/mutable)) + (THE-GRAMMAR next-token)] + [(source tokenizer) + (parameterize ([current-source source]) + (parse tokenizer))])))])) + + (define parse (make-rule-parser start-id))))))])) + + +;; Given a flattened rule, returns a syntax for the code that +;; preserves as much source location as possible. +;; +;; Each rule is defined to return a list with the following structure: +;; +;; stx :== (name (U tokens rule-stx) ...) +;; +(define (flat-rule->yacc-rule a-flat-rule) + (syntax-case a-flat-rule () + [(rule-type origin name clauses ...) + (begin + (define translated-clauses + (map (lambda (clause) (translate-clause clause #'name #'origin)) + (syntax->list #'(clauses ...)))) + (with-syntax ([(translated-clause ...) translated-clauses]) + #`[name translated-clause ...]))])) + + + +;; translates a single primitive rule clause. +;; A clause is a simple list of ids, lit, vals, and inferred-id elements. +;; The action taken depends on the pattern type. +(define (translate-clause a-clause rule-name/false origin) + (define translated-patterns + (let loop ([primitive-patterns (syntax->list a-clause)]) + (cond + [(empty? primitive-patterns) + '()] + [else + (cons (syntax-case (first primitive-patterns) (id lit token inferred-id) + [(id val) + #'val] + [(lit val) + (datum->syntax #f (string->symbol (syntax-e #'val)) #'val)] + [(token val) + #'val] + [(inferred-id val reason) + #'val]) + (loop (rest primitive-patterns)))]))) + + (define translated-actions + (for/list ([translated-pattern (in-list translated-patterns)] + [primitive-pattern (syntax->list a-clause)] + [pos (in-naturals 1)]) + (with-syntax ([$X + (format-id translated-pattern "$~a" pos)] + [$X-start-pos + (format-id translated-pattern "$~a-start-pos" pos)] + [$X-end-pos + (format-id translated-pattern "$~a-end-pos" pos)]) + (syntax-case primitive-pattern (id lit token inferred-id) + ;; When a rule usage is inferred, the value of $X is a syntax object + ;; whose head is the name of the inferred rule . We strip that out, + ;; leaving the residue to be absorbed. + [(inferred-id val reason) + #'(syntax-case $X () + [(inferred-rule-name . rest) + (syntax->list #'rest)])] + [(id val) + #`(list $X)] + [(lit val) + #`(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))] + [(token val) + #`(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))])))) + + (define whole-rule-loc + (if (empty? translated-patterns) + #'(list (current-source) #f #f #f #f) + (with-syntax ([$1-start-pos (datum->syntax (first translated-patterns) '$1-start-pos)] + [$n-end-pos (format-id (last translated-patterns) "$~a-end-pos" (length translated-patterns))]) + #`(positions->srcloc $1-start-pos $n-end-pos)))) + + (with-syntax ([(translated-pattern ...) translated-patterns] + [(translated-action ...) translated-actions]) + #`[(translated-pattern ...) + (rule-components->syntax '#,rule-name/false translated-action ... + #:srcloc #,whole-rule-loc)])) + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; collect-token-types: (listof rule-syntax) -> (values (listof identifier) (listof identifier)) +;; +;; Given a rule, automatically derive the list of implicit and +;; explicit token types we need to generate. +;; +;; Note: EOF is reserved, and will always be included in the list +;; of explicit token types, though the user is not allow to express it themselves. +(define (rules-collect-token-types rules) + (define-values (implicit explicit) + (for/fold ([implicit '()] + [explicit (list (datum->syntax (first rules) 'EOF))]) + ([r (in-list rules)]) + (rule-collect-token-types r implicit explicit))) + (values (reverse implicit) (reverse explicit))) + +(define (rule-collect-token-types a-rule implicit explicit) + (syntax-case a-rule (rule) + [(rule id a-pattern) + (pattern-collect-implicit-token-types #'a-pattern implicit explicit)])) + +(define (pattern-collect-implicit-token-types a-pattern implicit explicit) + (let loop ([a-pattern a-pattern] + [implicit implicit] + [explicit explicit]) + (syntax-case a-pattern (id lit token choice repeat maybe seq) + [(id val) + (values implicit explicit)] + [(lit val) + (values (cons #'val implicit) explicit)] + [(token val) + (begin + (when (eq? (syntax-e #'val) 'EOF) + (raise-syntax-error #f "Token EOF is reserved and can not be used in a grammar" #'val)) + (values implicit (cons #'val explicit)))] + [(choice vals ...) + (for/fold ([implicit implicit] + [explicit explicit]) + ([v (in-list (syntax->list #'(vals ...)))]) + (loop v implicit explicit))] + [(repeat min val) + (loop #'val implicit explicit)] + [(maybe val) + (loop #'val implicit explicit)] + [(seq vals ...) + (for/fold ([implicit implicit] + [explicit explicit]) + ([v (in-list (syntax->list #'(vals ...)))]) + (loop v implicit explicit))]))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; rule-id: rule -> identifier-stx +;; Get the binding id of a rule. +(define (rule-id a-rule) + (syntax-case a-rule (rule) + [(rule id a-pattern) + #'id])) + +(define (rule-pattern a-rule) + (syntax-case a-rule (rule) + [(rule id a-pattern) + #'a-pattern])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; check-all-rules-defined!: (listof rule-stx) -> void +(define (check-all-rules-defined! rules) + (define table (make-free-id-table)) + ;; Pass one: collect all the defined rule names. + (for ([a-rule (in-list rules)]) + (free-id-table-set! table (rule-id a-rule) #t)) + ;; Pass two: check each referenced id, and make sure it's been defined. + (for ([a-rule (in-list rules)]) + (for ([referenced-id (in-list (rule-collect-used-ids a-rule))]) + (unless (free-id-table-ref table referenced-id (lambda () #f)) + (raise-syntax-error #f (format "Rule ~a has no definition" (syntax-e referenced-id)) + referenced-id))))) + +;; check-all-rules-no-duplicates!: (listof rule-stx) -> void +(define (check-all-rules-no-duplicates! rules) + (define table (make-free-id-table)) + ;; Pass one: collect all the defined rule names. + (for ([a-rule (in-list rules)]) + (define maybe-other-rule-id (free-id-table-ref table (rule-id a-rule) (lambda () #f))) + (when maybe-other-rule-id + (raise-syntax-error #f (format "Rule ~a has a duplicate definition" (syntax-e (rule-id a-rule))) + (rule-id a-rule) + #f + (list (rule-id a-rule) maybe-other-rule-id))) + (free-id-table-set! table (rule-id a-rule) (rule-id a-rule)))) + + + +;; rule-collect-used-ids: rule-stx -> (listof identifier) +;; Given a rule, extracts a list of identifiers +(define (rule-collect-used-ids a-rule) + (syntax-case a-rule (rule) + [(rule id a-pattern) + (pattern-collect-used-ids #'a-pattern '())])) + +;; pattern-collect-used-ids: pattern-stx (listof identifier) -> (listof identifier) +;; Returns a flat list of rule identifiers referenced in the pattern. +(define (pattern-collect-used-ids a-pattern acc) + (let loop ([a-pattern a-pattern] + [acc acc]) + (syntax-case a-pattern (id lit token choice repeat maybe seq) + [(id val) + (cons #'val acc)] + [(lit val) + acc] + [(token val) + acc] + [(choice vals ...) + (for/fold ([acc acc]) + ([v (in-list (syntax->list #'(vals ...)))]) + (loop v acc))] + [(repeat min val) + (loop #'val acc)] + [(maybe val) + (loop #'val acc)] + [(seq vals ...) + (for/fold ([acc acc]) + ([v (in-list (syntax->list #'(vals ...)))]) + (loop v acc))]))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; check-all-rules-satisfiable: (listof rule-stx) -> void +;; Does a simple graph traversal / topological sort-like thing to make sure that, for +;; any rule, there's some finite sequence of tokens that +;; satisfies it. If this is not the case, then something horrible +;; has happened, and we need to tell the user about it. +;; +;; NOTE: Assumes all referenced rules have definitions. +(define (check-all-rules-satisfiable! rules) + (define toplevel-rule-table (make-free-id-table)) + (for ([a-rule (in-list rules)]) + (free-id-table-set! toplevel-rule-table + (rule-id a-rule) + (sat:make-and))) + (define leaves '()) + + (define (make-leaf) + (define a-leaf (sat:make-and)) + (set! leaves (cons a-leaf leaves)) + a-leaf) + + (define (process-pattern a-pattern) + (syntax-case a-pattern (id lit token choice repeat maybe seq) + [(id val) + (free-id-table-ref toplevel-rule-table #'val)] + [(lit val) + (make-leaf)] + [(token val) + (make-leaf)] + [(choice vals ...) + (begin + (define an-or-node (sat:make-or)) + (for ([v (in-list (syntax->list #'(vals ...)))]) + (define a-child (process-pattern v)) + (sat:add-child! an-or-node a-child)) + an-or-node)] + [(repeat min val) + (syntax-case #'min () + [0 + (make-leaf)] + [else + (process-pattern #'val)])] + [(maybe val) + (make-leaf)] + [(seq vals ...) + (begin + (define an-and-node (sat:make-and)) + (for ([v (in-list (syntax->list #'(vals ...)))]) + (define a-child (process-pattern v)) + (sat:add-child! an-and-node a-child)) + an-and-node)])) + + (for ([a-rule (in-list rules)]) + (define rule-node (free-id-table-ref toplevel-rule-table (rule-id a-rule))) + (sat:add-child! rule-node (process-pattern (rule-pattern a-rule)))) + + (for ([a-leaf leaves]) + (sat:visit! a-leaf)) + + (for ([a-rule (in-list rules)]) + (define rule-node (free-id-table-ref toplevel-rule-table (rule-id a-rule))) + (unless (sat:node-yes? rule-node) + (raise-syntax-error #f (format "Rule ~a has no finite derivation" (syntax-e (rule-id a-rule))) + (rule-id a-rule))))) diff --git a/beautiful-racket-ragg/br/ragg/codegen/flatten.rkt b/beautiful-racket-ragg/br/ragg/codegen/flatten.rkt new file mode 100755 index 0000000..44a78d3 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/codegen/flatten.rkt @@ -0,0 +1,186 @@ +#lang racket/base +(require br/ragg/rules/stx-types + (for-syntax racket/base)) + +(provide flatten-rule + flatten-rules + prim-rule) + + + +(define (make-fresh-name) + (let ([n 0]) + (lambda () + (set! n (add1 n)) + (string->symbol (format "%rule~a" n))))) + +(define default-fresh-name + (make-fresh-name)) + + +;; Translates rules to lists of primitive rules. + + +(define (flatten-rules rules #:fresh-name [fresh-name default-fresh-name]) + (define ht (make-hash)) + (apply append (map (lambda (a-rule) (flatten-rule a-rule + #:ht ht + #:fresh-name fresh-name)) + rules))) + + +;; flatten-rule: rule -> (listof primitive-rule) +(define (flatten-rule a-rule + #:fresh-name [fresh-name default-fresh-name] + + ;; ht: (hashtableof pattern-hash-key pat) + #:ht [ht (make-hash)]) + + (let recur ([a-rule a-rule] + [inferred? #f]) + + ;; lift-nonprimitive-pattern: pattern -> (values (listof primitive-rule) pattern) + ;; Turns non-primitive patterns into primitive patterns, and produces a set of + ;; derived rules. + (define (lift-nonprimitive-pattern a-pat) + (cond + [(primitive-pattern? a-pat) + (values '() (linearize-primitive-pattern a-pat))] + [(hash-has-key? ht (pattern->hash-key a-pat)) + (values '() (list (hash-ref ht (pattern->hash-key a-pat))))] + [else + (define head (syntax-case a-pat () [(head rest ...) #'head])) + (define new-name (datum->syntax #f (fresh-name) a-pat)) + (define new-inferred-id (datum->syntax #f `(inferred-id ,new-name ,head) a-pat)) + (hash-set! ht (pattern->hash-key a-pat) new-inferred-id) + (values (recur #`(rule #,new-name #,a-pat) head) + (list new-inferred-id))])) + + (define (lift-nonprimitive-patterns pats) + (define-values (rules patterns) + (for/fold ([inferred-ruless '()] + [patternss '()]) + ([p (in-list pats)]) + (define-values (new-rules new-ps) + (lift-nonprimitive-pattern p)) + (values (cons new-rules inferred-ruless) + (cons new-ps patternss)))) + (values (apply append (reverse rules)) + (apply append (reverse patterns)))) + + (with-syntax ([head (if inferred? #'inferred-prim-rule #'prim-rule)] + [origin (syntax-case a-rule (rule) [(rule name (pat-head rest ...)) #'pat-head])]) + (syntax-case a-rule (rule) + [(rule name pat) + (syntax-case #'pat (id inferred-id lit token choice repeat maybe seq) + + ;; The primitive types stay as they are: + [(id val) + (list #'(head origin name [pat]))] + [(inferred-id val reason) + (list #'(head origin name [pat]))] + [(lit val) + (list #'(head origin name [pat]))] + [(token val) + (list #'(head origin name [pat]))] + + + ;; Everything else might need lifting: + [(choice sub-pat ...) + (begin + (define-values (inferred-ruless/rev new-sub-patss/rev) + (for/fold ([rs '()] [ps '()]) + ([p (syntax->list #'(sub-pat ...))]) + (let-values ([(new-r new-p) + (lift-nonprimitive-pattern p)]) + (values (cons new-r rs) (cons new-p ps))))) + (with-syntax ([((sub-pat ...) ...) (reverse new-sub-patss/rev)]) + (append (list #'(head origin name [sub-pat ...] ...)) + (apply append (reverse inferred-ruless/rev)))))] + + [(repeat min sub-pat) + (begin + (define-values (inferred-rules new-sub-pats) + (lift-nonprimitive-pattern #'sub-pat)) + (with-syntax ([(sub-pat ...) new-sub-pats]) + (cons (cond [(= (syntax-e #'min) 0) + #`(head origin name + [(inferred-id name repeat) sub-pat ...] + [])] + [(= (syntax-e #'min) 1) + #`(head origin name + [(inferred-id name repeat) sub-pat ...] + [sub-pat ...])]) + inferred-rules)))] + + [(maybe sub-pat) + (begin + (define-values (inferred-rules new-sub-pats) + (lift-nonprimitive-pattern #'sub-pat)) + (with-syntax ([(sub-pat ...) new-sub-pats]) + (cons #'(head origin name + [sub-pat ...] + []) + inferred-rules)))] + + [(seq sub-pat ...) + (begin + (define-values (inferred-rules new-sub-pats) + (lift-nonprimitive-patterns (syntax->list #'(sub-pat ...)))) + (with-syntax ([(sub-pat ...) new-sub-pats]) + (cons #'(head origin name [sub-pat ...]) + inferred-rules)))])])))) + + +;; Given a pattern, return a key appropriate for a hash. +(define (pattern->hash-key a-pat) + (syntax->datum a-pat)) + + +;; Returns true if the pattern looks primitive +(define (primitive-pattern? a-pat) + (syntax-case a-pat (id lit token choice repeat maybe seq) + [(id val) + #t] + [(lit val) + #t] + [(token val) + #t] + [(choice sub-pat ...) + #f] + [(repeat min val) + #f] + [(maybe sub-pat) + #f] + [(seq sub-pat ...) + (andmap primitive-pattern? (syntax->list #'(sub-pat ...)))])) + + +;; Given a primitive pattern (id, lit, token, and seqs only containing +;; primitive patterns), returns a linear sequence of just id, lits, +;; and tokens. +(define (linearize-primitive-pattern a-pat) + (define (traverse a-pat acc) + (syntax-case a-pat (id inferred-id lit token seq) + [(id val) + (cons a-pat acc)] + [(inferred-id val reason) + (cons a-pat acc)] + [(lit val) + (cons a-pat acc)] + [(token val) + (cons a-pat acc)] + [(seq vals ...) + (foldl traverse acc (syntax->list #'(vals ...)))])) + (reverse (traverse a-pat '()))) + + + +(define-syntax (prim-rule stx) + (raise-syntax-error #f "internal error: should not be macro expanded" stx)) + +(define-syntax (inferred-prim-rule stx) + (raise-syntax-error #f "internal error: should not be macro expanded" stx)) + +(define-syntax (inferred-id stx) + (raise-syntax-error #f "internal error: should not be macro expanded" stx)) diff --git a/beautiful-racket-ragg/br/ragg/codegen/lang/reader.rkt b/beautiful-racket-ragg/br/ragg/codegen/lang/reader.rkt new file mode 100755 index 0000000..b741674 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/codegen/lang/reader.rkt @@ -0,0 +1,68 @@ +#lang s-exp syntax/module-reader +br/ragg/codegen/sexp-based-lang +#:read my-read +#:read-syntax my-read-syntax +#:info my-get-info +#:whole-body-readers? #t + +(require br/ragg/rules/parser + br/ragg/rules/lexer + br/ragg/rules/stx + br/ragg/rules/rule-structs) + +(define (my-read in) + (syntax->datum (my-read-syntax #f in))) + +(define (my-read-syntax src in) + (define-values (first-line first-column first-position) (port-next-location in)) + (define tokenizer (tokenize in)) + (define rules + (parameterize ([current-parser-error-handler + (lambda (tok-ok? tok-name tok-value start-pos end-pos) + (raise-syntax-error + #f + (format "Error while parsing grammar near: ~a [line=~a, column=~a, position=~a]" + tok-value + (pos-line start-pos) + (pos-col start-pos) + (pos-offset start-pos)) + (datum->syntax #f + (string->symbol (format "~a" tok-value)) + (list src + (pos-line start-pos) + (pos-col start-pos) + (pos-offset start-pos) + (if (and (number? (pos-offset end-pos)) + (number? (pos-offset start-pos))) + (- (pos-offset end-pos) + (pos-offset start-pos)) + #f)))))]) + (grammar-parser tokenizer))) + (define-values (last-line last-column last-position) (port-next-location in)) + (list (rules->stx src rules + #:original-stx (datum->syntax #f 'original-stx + (list src + first-line + first-column + first-position + (if (and (number? last-position) + (number? first-position)) + (- last-position first-position) + #f)))))) + + +;; Extension: we'd like to cooperate with DrRacket and tell +;; it to use the default, textual lexer and color scheme when +;; editing bf programs. +;; +;; See: http://docs.racket-lang.org/guide/language-get-info.html +;; for more details, as well as the documentation in +;; syntax/module-reader. +(define (my-get-info key default default-filter) + (case key + [(color-lexer) + (dynamic-require 'syntax-color/default-lexer + 'default-lexer)] + [else + (default-filter key default)])) + diff --git a/beautiful-racket-ragg/br/ragg/codegen/runtime.rkt b/beautiful-racket-ragg/br/ragg/codegen/runtime.rkt new file mode 100755 index 0000000..d38b244 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/codegen/runtime.rkt @@ -0,0 +1,170 @@ +#lang racket/base + +(require racket/match + racket/list + racket/generator + (prefix-in lex: parser-tools/lex) + br/ragg/support + br/ragg/private/internal-support) + + +(provide THE-ERROR-HANDLER + make-permissive-tokenizer + atomic-datum->syntax + positions->srcloc + rule-components->syntax) + + + +;; The level of indirection here is necessary since the yacc grammar wants a +;; function value for the error handler up front. We want to delay that decision +;; till parse time. +(define (THE-ERROR-HANDLER tok-ok? tok-name tok-value start-pos end-pos) + (match (positions->srcloc start-pos end-pos) + [(list src line col offset span) + ((current-parser-error-handler) tok-name + tok-value + offset + line + col + span)])) + + + + +(define no-position (lex:position #f #f #f)) +(define (no-position? p) + (not + (or (lex:position-line p) + (lex:position-col p) + (lex:position-offset p)))) + + +;; make-permissive-tokenizer: (U (sequenceof (U token token-struct eof void)) (-> (U token token-struct eof void))) hash -> (-> position-token) +;; Creates a tokenizer from the given value. +;; FIXME: clean up code. +(define (make-permissive-tokenizer tokenizer token-type-hash) + (define tokenizer-thunk (cond + [(sequence? tokenizer) + (sequence->generator tokenizer)] + [(procedure? tokenizer) + tokenizer])) + + ;; lookup: symbol any pos pos -> position-token + (define (lookup type val start-pos end-pos) + (lex:position-token + ((hash-ref token-type-hash type + (lambda () + ((current-tokenizer-error-handler) (format "~a" type) val + (lex:position-offset start-pos) + (lex:position-line start-pos) + (lex:position-col start-pos) + (and (number? (lex:position-offset start-pos)) + (number? (lex:position-offset end-pos)) + (- (lex:position-offset end-pos) + (lex:position-offset start-pos)))))) + val) + start-pos end-pos)) + + (define (permissive-tokenizer) + (define next-token (tokenizer-thunk)) + (let loop ([next-token next-token]) + (match next-token + [(or (? eof-object?) (? void?)) + (lookup 'EOF eof no-position no-position)] + + [(? symbol?) + (lookup next-token next-token no-position no-position)] + + [(? string?) + (lookup (string->symbol next-token) next-token no-position no-position)] + + [(? char?) + (lookup (string->symbol (string next-token)) next-token no-position no-position)] + + ;; Compatibility + [(? lex:token?) + (loop (token (lex:token-name next-token) + (lex:token-value next-token)))] + + [(token-struct type val offset line column span skip?) + (cond [skip? + ;; skip whitespace, and just tokenize again. + (permissive-tokenizer)] + + [(hash-has-key? token-type-hash type) + (define start-pos (lex:position offset line column)) + ;; try to synthesize a consistent end position. + (define end-pos (lex:position (if (and (number? offset) (number? span)) + (+ offset span) + offset) + line + (if (and (number? column) (number? span)) + (+ column span) + column))) + (lookup type val start-pos end-pos)] + [else + ;; We ran into a token of unrecognized type. Let's raise an appropriate error. + ((current-tokenizer-error-handler) type val + offset line column span)])] + + [(lex:position-token t s e) + (define a-position-token (loop t)) + (lex:position-token (lex:position-token-token a-position-token) + (if (no-position? (lex:position-token-start-pos a-position-token)) + s + (lex:position-token-start-pos a-position-token)) + (if (no-position? (lex:position-token-end-pos a-position-token)) + e + (lex:position-token-end-pos a-position-token)))] + + [else + ;; Otherwise, we have no idea how to treat this as a token. + ((current-tokenizer-error-handler) 'unknown-type (format "~a" next-token) + #f #f #f #f)]))) + permissive-tokenizer) + + + +;; positions->srcloc: position position -> (list source line column offset span) +;; Given two positions, returns a srcloc-like structure, where srcloc is the value +;; consumed as the third argument to datum->syntax. +(define (positions->srcloc start-pos end-pos) + (list (current-source) + (lex:position-line start-pos) + (lex:position-col start-pos) + (lex:position-offset start-pos) + (if (and (number? (lex:position-offset end-pos)) + (number? (lex:position-offset start-pos))) + (- (lex:position-offset end-pos) + (lex:position-offset start-pos)) + #f))) + + +;; We create a syntax using read-syntax; by definition, it should have the +;; original? property set to #t, which we then copy over to syntaxes constructed +;; with atomic-datum->syntax and rule-components->syntax. +(define stx-with-original?-property + (read-syntax #f (open-input-string "original"))) + + +;; atomic-datum->syntax: datum position position +;; Helper that does the ugly work in wrapping a datum into a syntax +;; with source location. +(define (atomic-datum->syntax d start-pos end-pos) + (datum->syntax #f d (positions->srcloc start-pos end-pos) stx-with-original?-property)) + + + +;; rule-components->syntax: (U symbol false) (listof stx) ... #:srcloc (U #f (list src line column offset span)) -> stx +;; Creates an stx out of the rule name and its components. +;; The location information of the rule spans that of its components. +(define (rule-components->syntax rule-name/false #:srcloc [srcloc #f] . components) + (define flattened-components (apply append components)) + (datum->syntax #f + (apply append + (list + (datum->syntax #f rule-name/false srcloc stx-with-original?-property)) + components) + srcloc + stx-with-original?-property)) diff --git a/beautiful-racket-ragg/br/ragg/codegen/satisfaction.rkt b/beautiful-racket-ragg/br/ragg/codegen/satisfaction.rkt new file mode 100755 index 0000000..07f20ac --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/codegen/satisfaction.rkt @@ -0,0 +1,207 @@ +#lang racket/base + +(provide make-and make-or node? node-val node-yes? visit! add-child!) + +(require racket/match) + +;; I can't get no... satisfaction. +;; +;; A small module to make sure a small constraint system can be satisfied. +;; +;; Small variation on topological sort where we need both AND and OR nodes. + + +(struct node (type val yes? parents count-to-satisfy) #:mutable) +;; or nodes are satisfied if any of the children is satisfied. +;; and nodes are satisfied if all of the children are satisfied. + + +;; visit!: node -> void +;; Visit a node, and marking it if it's all satisfied. Propagate +;; satisfaction to parents as appropriate. +(define visit! + (let () + (define (dec! n) + (set-node-count-to-satisfy! n (max 0 (sub1 (node-count-to-satisfy n)))) + (when (and (not (node-yes? n)) + (= (node-count-to-satisfy n) 0)) + (sat! n))) + + (define (sat! n) + (set-node-yes?! n #t) + (for ([p (in-list (node-parents n))]) + (dec! p))) + + (lambda (n) + (unless (node-yes? n) + (when (= (node-count-to-satisfy n) 0) + (sat! n)))))) + + +;; make-or: X -> node +;; Create an or node +(define (make-or [val #f]) + (node 'or val #f '() 1)) + + +;; make-and: X -> node +;; Create an and node +(define (make-and [val #f]) + (node 'and val #f '() 0)) + + +;; add-child!: node node -> void +;; Attach a child c to the parent node p. +(define (add-child! p c) + (set-node-parents! c (cons p (node-parents c))) + (match p + [(node 'or _ _ _ count-to-satisfy) + (void)] + [(node 'and _ _ _ count-to-satisfy) + (set-node-count-to-satisfy! p (add1 count-to-satisfy))])) + + +(module* test racket + (require (submod "..") + racket/block + rackunit) + + ;; a ::= a + (block + ;; Self-looping "a" and-node should not say yes after visiting. + (define a (make-and 'a)) + (add-child! a a) + (visit! a) + (check-false (node-yes? a))) + + + ;; a ::= a + (block + ;; Self-looping "a" or-node should not say yes after visiting. + (define a (make-or 'a)) + (add-child! a a) + (visit! a) + (check-false (node-yes? a))) + + + ;; This case should never happen in my situation, but we should check. + (block + ;; Empty "a" or-node should not say yes after visiting. + (define a (make-or 'a)) + (visit! a) + (check-false (node-yes? a))) + + + ;; a : TOKEN + (block + ;; Empty "a" and-node SHOULD say yes after visiting. + (define a (make-and 'a)) + (visit! a) + (check-true (node-yes? a))) + + + ;; a : a | b + ;; b : TOKEN + (block + (define a (make-or 'a)) + (add-child! a a) + (define b (make-and 'b)) + (add-child! a b) + (visit! b) + (check-true (node-yes? a)) + (check-true (node-yes? b))) + + ;; a : a b + ;; b : TOKEN + (block + (define a (make-and 'a)) + (define b (make-and 'b)) + (define TOKEN (make-and 'TOKEN)) + (add-child! a a) + (add-child! a b) + (add-child! b TOKEN) + (visit! TOKEN) + (check-false (node-yes? a)) + (check-true (node-yes? b)) + (check-true (node-yes? TOKEN))) + + ;; a : b + ;; b : a + (block + (define a (make-and 'a)) + (define b (make-and 'b)) + (add-child! a b) + (add-child! b a) + (check-false (node-yes? a)) + (check-false (node-yes? b))) + + ;; a : "a" b + ;; b : a | b + (block + (define a (make-and 'a)) + (define b (make-or 'b)) + (define lit (make-and "a")) + (add-child! a lit) + (add-child! a b) + (add-child! b a) + (add-child! b b) + (visit! lit) + (check-false (node-yes? a)) + (check-false (node-yes? b)) + (check-true (node-yes? lit))) + + + ;; x : x y + ;; y : LIT + (block + (define x (make-and "x")) + (define y (make-and "y")) + (define lit (make-and "LIT")) + (add-child! x x) + (add-child! x y) + (add-child! y lit) + (visit! lit) + (check-false (node-yes? x)) + (check-true (node-yes? y)) + (check-true (node-yes? lit))) + + + ;; expr: LPAREN expr RPAREN | ATOM + (block + (define LPAREN (make-and)) + (define RPAREN (make-and)) + (define expr (make-or)) + (define expr-1 (make-and)) + (define expr-2 (make-and)) + (define ATOM (make-and)) + (add-child! expr expr-1) + (add-child! expr expr-2) + (add-child! expr-1 LPAREN) + (add-child! expr-1 expr) + (add-child! expr-1 RPAREN) + (add-child! expr-2 ATOM) + (visit! LPAREN) + (visit! RPAREN) + (visit! ATOM) + (check-true (node-yes? expr))) + + + + ;; expr: LPAREN expr RPAREN + (block + (define LPAREN (make-and)) + (define RPAREN (make-and)) + (define expr (make-or)) + (define expr-1 (make-and)) + (define expr-2 (make-and)) + (define ATOM (make-and)) + (add-child! expr expr-1) + (add-child! expr expr-2) + (add-child! expr-1 LPAREN) + (add-child! expr-1 expr) + (add-child! expr-1 RPAREN) + (visit! LPAREN) + (visit! RPAREN) + (check-false (node-yes? expr))) + + ) diff --git a/beautiful-racket-ragg/br/ragg/codegen/sexp-based-lang.rkt b/beautiful-racket-ragg/br/ragg/codegen/sexp-based-lang.rkt new file mode 100755 index 0000000..80acdf7 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/codegen/sexp-based-lang.rkt @@ -0,0 +1,95 @@ +#lang racket/base + +;; A language level for automatically generating parsers out of BNF grammars. +;; +;; Danny Yoo (dyoo@hashcollision.org) +;; +;; Intent: make it trivial to generate languages for Racket. At the +;; moment, I find it painful to use parser-tools. This library is +;; meant to make it less agonizing. +;; +;; The intended use of this language is as follows: +;; +;;;;; s-exp-grammar.rkt ;;;;;;;;; +;; #lang br/ragg +;; s-exp : "(" s-exp* ")" | ATOM +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +;; What this generates is: +;; +;; * parse: a function that consumes a source and a +;; position-aware lexer, and produces a syntax object. +;; +;; * make-rule-parser: a custom parser given a provided start rule. +;; +;; You'll still need to do a little work, by providing a lexer that +;; defines what the uppercased tokens mean. For example, you can +;; use the parser-tools/lex lexer tools: +;; +;; (require ragg/support +;; parser-tools/lex +;; parser-tools/lex-sre) +;; +;; (define tokenize +;; (lexer-src-pos +;; [(:+ alphabetic) +;; (token 'ATOM lexeme)] +;; [whitespace +;; (return-without-pos (tokenize/1 input-port))] +;; [(:or "(" ")") +;; (token lexeme lexeme)])) +;; + +;; However, that should be all you need. The output of an +;; generated grammar is an honest-to-goodness syntax +;; object with source locations, fully-labeled by the rules. +;; +;; (parse (tokenize an-input-port)) +;; +;; + +;; The first rule is treated as the start rule; any successful parse +;; must finish with end-of-file. + + +;; Terminology: +;; + + +;; A rule is a rule identifier, followed by a colon ":", followed by a +;; pattern. + +;; A rule identifier is an identifier that is not in upper case. +;; A rule identifier should follow the Racket rules for identifiers, +;; except that it can't contain * or +. +;; +;; A token is a rule identifier that is all in upper case. + + + +;; A pattern may either be +;; +;; * an implicit sequence of patterns, +;; +;; * a literal string, +;; +;; * a rule identifier, +;; +;; * a quanitifed pattern, either with "*" or "+", +;; +;; * an optional pattern: a pattern surrounded by "[" and "]", or +;; +;; * a grouped sequence: a pattern surrounded by "(" and ")". + + +(require (for-syntax racket/base + "codegen.rkt")) + +(provide rules + (rename-out [#%plain-module-begin #%module-begin])) + +(define-syntax (rules stx) + (rules-codegen #:parser-provider-module 'br/ragg/cfg-parser/cfg-parser ;; 'parser-tools/yacc + #:parser-provider-form 'cfg-parser ;; 'parser + stx)) diff --git a/beautiful-racket-ragg/br/ragg/codegen/sexp-based-lang.rkt alias b/beautiful-racket-ragg/br/ragg/codegen/sexp-based-lang.rkt alias new file mode 100644 index 0000000..f550a79 Binary files /dev/null and b/beautiful-racket-ragg/br/ragg/codegen/sexp-based-lang.rkt alias differ diff --git a/beautiful-racket-ragg/br/ragg/examples/01-equal.rkt b/beautiful-racket-ragg/br/ragg/examples/01-equal.rkt new file mode 100755 index 0000000..b975fd1 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/01-equal.rkt @@ -0,0 +1,12 @@ +#lang br/ragg + +## Equal numbers of 0 and 1s in a string. +## +## (Thanks to mithos28 for this one.) + + +equal : [zero one | one zero] + +zero : "0" equal | equal "0" + +one : "1" equal | equal "1" diff --git a/beautiful-racket-ragg/br/ragg/examples/0n1.rkt b/beautiful-racket-ragg/br/ragg/examples/0n1.rkt new file mode 100755 index 0000000..c3173bd --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/0n1.rkt @@ -0,0 +1,3 @@ +#lang br/ragg + +rule: "0"* "1" diff --git a/beautiful-racket-ragg/br/ragg/examples/0n1n.rkt b/beautiful-racket-ragg/br/ragg/examples/0n1n.rkt new file mode 100755 index 0000000..8ac8f0b --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/0n1n.rkt @@ -0,0 +1,3 @@ +#lang br/ragg + +rule-0n1n: ["0" rule-0n1n "1"] diff --git a/beautiful-racket-ragg/br/ragg/examples/baby-json.rkt b/beautiful-racket-ragg/br/ragg/examples/baby-json.rkt new file mode 100755 index 0000000..b4f5f56 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/baby-json.rkt @@ -0,0 +1,16 @@ +#lang br/ragg + +;; Simple baby example of JSON structure +json: number | string + | array + | object + +number: NUMBER + +string: STRING + +array: "[" [json ("," json)*] "]" + +object: "{" [kvpair ("," kvpair)*] "}" + +kvpair: ID ":" json diff --git a/beautiful-racket-ragg/br/ragg/examples/bnf.rkt b/beautiful-racket-ragg/br/ragg/examples/bnf.rkt new file mode 100755 index 0000000..e59f9aa --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/bnf.rkt @@ -0,0 +1,14 @@ +#lang br/ragg + + +## The following comes from: http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form + + : | + : "<" ">" "::=" + + : " " | "" ## "" is empty string, i.e. no whitespace + : | "|" + : | + : | + : | "<" ">" + : '"' '"' | "'" "'" ## actually, the original BNF did not use quotes diff --git a/beautiful-racket-ragg/br/ragg/examples/lua-parser.rkt b/beautiful-racket-ragg/br/ragg/examples/lua-parser.rkt new file mode 100755 index 0000000..da3192f --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/lua-parser.rkt @@ -0,0 +1,111 @@ +#lang br/ragg + +;; Lua parser, adapted from: +;; http://www.lua.org/manual/5.1/manual.html#8 +;; + + +chunk : (stat ["; "])* [laststat ["; "]] + +block : chunk + +stat : varlist "=" explist | + functioncall | + DO block END | + WHILE exp DO block END | + REPEAT block UNTIL exp | + IF exp THEN block (ELSEIF exp THEN block)* [ELSE block] END | + FOR NAME "=" exp "," exp ["," exp] DO block END | + FOR namelist IN explist DO block END | + FUNCTION funcname funcbody | + LOCAL FUNCTION NAME funcbody | + LOCAL namelist ["=" explist] + +laststat : RETURN [explist] | BREAK + +funcname : NAME ("." NAME)* [":" NAME] + +varlist : var ("," var)* + +var : NAME | prefixexp "[" exp "]" | prefixexp "." NAME + +namelist : NAME ("," NAME)* + +explist : (exp ",")* exp + + +;; Note by dyoo: The parsing of exp deviates from Lua in that we have these administrative +;; rules to explicitly represent the precedence rules. +;; +;; See: http://www.lua.org/manual/5.1/manual.html#2.5.6 +;; +;; Ragg doesn't yet automatically desugar operator precedence rules. +;; I'm doing it by hand at the moment, which is not ideal, so a future version of +;; ragg will have a story about describing precedence. +;; +;; Operator precedence in Lua follows the table below, from lower to higher priority: +;; +;; or exp_1 +;; and exp_2 +;; < > <= >= ~= == exp_3 +;; .. exp_4 +;; + - exp_5 +;; * / % exp_6 +;; not # - (unary) exp_7 +;; ^ exp_8 +;; +;; As usual, you can use parentheses to change the precedences of an expression. +;; The concatenation ('..') and exponentiation ('^') operators are right associative. +;; All other binary operators are left associative. +;; +;; The original grammar rule before encoding precedence was: +;; +;; exp : NIL | FALSE | TRUE | NUMBER | STRING | "..." | function | +;; prefixexp | tableconstructor | exp binop exp | unop exp + +exp : exp_1 +exp_1: exp_1 binop_1 exp_2 | exp_2 +exp_2: exp_2 binop_2 exp_3 | exp_3 +exp_3: exp_3 binop_3 exp_4 | exp_4 +exp_4: exp_5 binop_4 exp_4 | exp_5 ;; right associative +exp_5: exp_5 binop_5 exp_6 | exp_6 +exp_6: exp_6 binop_6 exp_7 | exp_7 +exp_7: unop exp_8 +exp_8: exp_9 binop_8 exp_8 | exp_9 ;; right associative +exp_9: NIL | FALSE | TRUE | NUMBER | STRING | "..." | function | + prefixexp | tableconstructor +binop_1: OR +binop_2: AND +binop_3: "<" | ">" | "<=" | ">=" | "~=" | "==" +binop_4: ".." +binop_5: "+" | "-" +binop_6: "*" | "/" | "%" +binop_8: "^" +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +prefixexp : var | functioncall | "(" exp ")" + +functioncall : prefixexp args | prefixexp ":" NAME args + +args : "(" [explist] ")" | tableconstructor | STRING + +function : FUNCTION funcbody + +funcbody : "(" [parlist] ")" block END + +parlist : namelist ["," "..."] | "..." + +tableconstructor : "{" [fieldlist] "}" + +fieldlist : field (fieldsep field)* [fieldsep] + +field : "[" exp "]" "=" exp | NAME "=" exp | exp + +fieldsep : "," | ";" + +binop : "+" | "-" | "*" | "/" | "^" | "%" | ".." | + "<" | "<=" | ">" | ">=" | "==" | "~=" | + AND | OR + +unop : "-" | NOT | "#" \ No newline at end of file diff --git a/beautiful-racket-ragg/br/ragg/examples/nested-word-list.rkt b/beautiful-racket-ragg/br/ragg/examples/nested-word-list.rkt new file mode 100755 index 0000000..6df6687 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/nested-word-list.rkt @@ -0,0 +1,3 @@ +#lang br/ragg +nested-word-list: WORD + | LEFT-PAREN nested-word-list* RIGHT-PAREN diff --git a/beautiful-racket-ragg/br/ragg/examples/python-grammar.rkt b/beautiful-racket-ragg/br/ragg/examples/python-grammar.rkt new file mode 100755 index 0000000..21daae0 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/python-grammar.rkt @@ -0,0 +1,144 @@ +#lang br/ragg + +# Grammar for Python + +# Note: Changing the grammar specified in this file will most likely +# require corresponding changes in the parser module +# (../Modules/parsermodule.c). If you can't make the changes to +# that module yourself, please co-ordinate the required changes +# with someone who can; ask around on python-dev for help. Fred +# Drake will probably be listening there. + +# NOTE WELL: You should also follow all the steps listed in PEP 306, +# "How to Change Python's Grammar" + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() and input() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +file_input: (NEWLINE | stmt)* ENDMARKER +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef) +funcdef: 'def' NAME parameters ':' suite +parameters: '(' [varargslist] ')' +varargslist: ((fpdef ['=' test] ',')* + ('*' NAME [',' '**' NAME] | '**' NAME) | + fpdef ['=' test] (',' fpdef ['=' test])* [',']) +fpdef: NAME | '(' fplist ')' +fplist: fpdef (',' fpdef)* [','] + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | exec_stmt | assert_stmt) +expr_stmt: testlist (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist))*) +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +print_stmt: 'print' ( [ test (',' test)* [','] ] | + '>>' test [ (',' test)+ [','] ] ) +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test [',' test [',' test]]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +import_from: ('from' ('.'* dotted_name | '.'+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +exec_stmt: 'exec' expr ['in' test [',' test]] +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test [('as' | ',') test]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +# Backward compatibility cruft to support: +# [ x for x in lambda: True, lambda: False if x() ] +# even while also allowing: +# lambda x: 5 if x else 2 +# (But not a mix of the two) +testlist_safe: old_test [(',' old_test)+ [',']] +old_test: or_test | old_lambdef +old_lambdef: 'lambda' [varargslist] ':' old_test + +test: or_test ['if' or_test 'else' test] | lambdef +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [listmaker] ']' | + '{' [dictorsetmaker] '}' | + '`' testlist1 '`' | + NAME | NUMBER | STRING+) +listmaker: test ( list_for | (',' test)* [','] ) +testlist_comp: test ( comp_for | (',' test)* [','] ) +lambdef: 'lambda' [varargslist] ':' test +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: expr (',' expr)* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | + (test (comp_for | (',' test)* [','])) ) + +classdef: 'class' NAME ['(' [testlist] ')'] ':' suite + +arglist: (argument ',')* (argument [','] + |'*' test (',' argument)* [',' '**' test] + |'**' test) +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +argument: test [comp_for] | test '=' test + +list_iter: list_for | list_if +list_for: 'for' exprlist 'in' testlist_safe [list_iter] +list_if: 'if' old_test [list_iter] + +comp_iter: comp_for | comp_if +comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' old_test [comp_iter] + +testlist1: test (',' test)* + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [testlist] diff --git a/beautiful-racket-ragg/br/ragg/examples/simple-arithmetic-grammar.rkt b/beautiful-racket-ragg/br/ragg/examples/simple-arithmetic-grammar.rkt new file mode 100755 index 0000000..08442eb --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/simple-arithmetic-grammar.rkt @@ -0,0 +1,5 @@ +#lang br/ragg + +expr : term ('+' term)* +term : factor ('*' factor)* +factor : INT diff --git a/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing.rkt b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing.rkt new file mode 100755 index 0000000..a22465f --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing.rkt @@ -0,0 +1,10 @@ +#lang br/ragg + +;; +;; See: http://stackoverflow.com/questions/12345647/rewrite-this-script-by-designing-an-interpreter-in-racket +;; + +drawing: rows* +rows: repeat chunk+ ";" +repeat: INTEGER +chunk: INTEGER STRING diff --git a/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/examples/letter-i.rkt b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/examples/letter-i.rkt new file mode 100755 index 0000000..7626fc8 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/examples/letter-i.rkt @@ -0,0 +1,4 @@ +#lang br/ragg/examples/simple-line-drawing +3 9 X; +6 3 b 3 X 3 b; +3 9 X; diff --git a/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/grammar.rkt b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/grammar.rkt new file mode 100755 index 0000000..a22465f --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/grammar.rkt @@ -0,0 +1,10 @@ +#lang br/ragg + +;; +;; See: http://stackoverflow.com/questions/12345647/rewrite-this-script-by-designing-an-interpreter-in-racket +;; + +drawing: rows* +rows: repeat chunk+ ";" +repeat: INTEGER +chunk: INTEGER STRING diff --git a/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/interpret.rkt b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/interpret.rkt new file mode 100755 index 0000000..e6cec0c --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/interpret.rkt @@ -0,0 +1,31 @@ +#lang racket/base +(require syntax/parse) + +(provide interpret-drawing) + +(define (interpret-drawing drawing-stx) + (syntax-parse drawing-stx + [({~literal drawing} row-stxs ...) + + (for ([row-stx (syntax->list #'(row-stxs ...))]) + (interpret-row row-stx))])) + + +(define (interpret-row row-stx) + (syntax-parse row-stx + [({~literal rows} + ({~literal repeat} repeat-number) + chunks ... ";") + + (for ([i (syntax-e #'repeat-number)]) + (for ([chunk-stx (syntax->list #'(chunks ...))]) + (interpret-chunk chunk-stx)) + (newline))])) + + +(define (interpret-chunk chunk-stx) + (syntax-parse chunk-stx + [({~literal chunk} chunk-size chunk-string) + + (for ([k (syntax-e #'chunk-size)]) + (display (syntax-e #'chunk-string)))])) diff --git a/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/lang/reader.rkt b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/lang/reader.rkt new file mode 100755 index 0000000..13c69c4 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/lang/reader.rkt @@ -0,0 +1,22 @@ +#lang s-exp syntax/module-reader +br/ragg/examples/simple-line-drawing/semantics +#:read my-read +#:read-syntax my-read-syntax +#:info my-get-info +#:whole-body-readers? #t + +(require br/ragg/examples/simple-line-drawing/lexer + br/ragg/examples/simple-line-drawing/grammar) + +(define (my-read in) + (syntax->datum (my-read-syntax #f in))) + +(define (my-read-syntax src ip) + (list (parse src (tokenize ip)))) + +(define (my-get-info key default default-filter) + (case key + [(color-lexer) + (dynamic-require 'syntax-color/default-lexer 'default-lexer)] + [else + (default-filter key default)])) diff --git a/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/lexer.rkt b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/lexer.rkt new file mode 100755 index 0000000..6ef3cf4 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/lexer.rkt @@ -0,0 +1,27 @@ +#lang racket/base + +(provide tokenize) + +;; A simple lexer for simple-line-drawing. +(require br/ragg/support + parser-tools/lex) + +(define (tokenize ip) + (port-count-lines! ip) + (define my-lexer + (lexer-src-pos + [(repetition 1 +inf.0 numeric) + (token 'INTEGER (string->number lexeme))] + [upper-case + (token 'STRING lexeme)] + ["b" + (token 'STRING " ")] + [";" + (token ";" lexeme)] + [whitespace + (token 'WHITESPACE lexeme #:skip? #t)] + [(eof) + (void)])) + (define (next-token) (my-lexer ip)) + next-token) + diff --git a/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/semantics.rkt b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/semantics.rkt new file mode 100755 index 0000000..028662d --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/simple-line-drawing/semantics.rkt @@ -0,0 +1,48 @@ +#lang racket/base +(require (for-syntax racket/base syntax/parse)) + +(provide #%module-begin + ;; We reuse Racket's treatment of raw datums, specifically + ;; for strings and numbers: + #%datum + + ;; And otherwise, we provide definitions of these three forms. + ;; During compiliation, Racket uses these definitions to + ;; rewrite into for loops, displays, and newlines. + drawing rows chunk) + +;; Define a few compile-time functions to do the syntax rewriting: +(begin-for-syntax + (define (compile-drawing drawing-stx) + (syntax-parse drawing-stx + [({~literal drawing} row-stxs ...) + + (syntax/loc drawing-stx + (begin row-stxs ...))])) + + (define (compile-rows row-stx) + (syntax-parse row-stx + [({~literal rows} + ({~literal repeat} repeat-number) + chunks ... + ";") + + (syntax/loc row-stx + (for ([i repeat-number]) + chunks ... + (newline)))])) + + (define (compile-chunk chunk-stx) + (syntax-parse chunk-stx + [({~literal chunk} chunk-size chunk-string) + + (syntax/loc chunk-stx + (for ([k chunk-size]) + (display chunk-string)))]))) + + +;; Wire up the use of "drawing", "rows", and "chunk" to these +;; transformers: +(define-syntax drawing compile-drawing) +(define-syntax rows compile-rows) +(define-syntax chunk compile-chunk) diff --git a/beautiful-racket-ragg/br/ragg/examples/statlist-grammar.rkt b/beautiful-racket-ragg/br/ragg/examples/statlist-grammar.rkt new file mode 100755 index 0000000..026b76e --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/statlist-grammar.rkt @@ -0,0 +1,14 @@ +#lang br/ragg + +## Statlist grammar + +statlist : stat+ +stat: ID '=' expr + | 'print' expr + +expr: multExpr ('+' multExpr)* +multExpr: primary (('*'|'.') primary)* +primary : + INT + | ID + | '[' expr ("," expr)* ']' diff --git a/beautiful-racket-ragg/br/ragg/examples/wordy.rkt b/beautiful-racket-ragg/br/ragg/examples/wordy.rkt new file mode 100755 index 0000000..7430770 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/examples/wordy.rkt @@ -0,0 +1,7 @@ +#lang br/ragg +;; A parser for a silly language +sentence: verb optional-adjective object +verb: greeting +optional-adjective: ["happy" | "frumpy"] +greeting: "hello" | "hola" | "aloha" +object: "world" | WORLD diff --git a/beautiful-racket-ragg/br/ragg/info.rkt b/beautiful-racket-ragg/br/ragg/info.rkt new file mode 100755 index 0000000..894cc3b --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/info.rkt @@ -0,0 +1,11 @@ +#lang setup/infotab +(define name "ragg") +(define categories '(devtools)) +(define can-be-loaded-with 'all) +(define required-core-version "5.3.1") +(define version "1.0") +(define repositories '("4.x")) +(define scribblings '(("ragg.scrbl"))) +(define blurb '("ragg: a Racket AST Generator Generator. A design goal is to be easy for beginners to use. Given a grammar in EBNF, ragg produces a parser that generates Racket's native syntax objects with full source location.")) +(define release-notes '((p "First release."))) +(define deps (list)) diff --git a/beautiful-racket-ragg/br/ragg/lang/reader.rkt b/beautiful-racket-ragg/br/ragg/lang/reader.rkt new file mode 100755 index 0000000..5a6c52e --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/lang/reader.rkt @@ -0,0 +1,4 @@ +#lang racket/base + +(require "../codegen/lang/reader.rkt") +(provide (all-from-out "../codegen/lang/reader.rkt")) diff --git a/beautiful-racket-ragg/br/ragg/private/internal-support.rkt b/beautiful-racket-ragg/br/ragg/private/internal-support.rkt new file mode 100755 index 0000000..abaf0ac --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/private/internal-support.rkt @@ -0,0 +1,36 @@ +#lang racket/base + +(require br/ragg/support) + +(provide current-source + current-parser-error-handler + current-tokenizer-error-handler) + +;; During parsing, we should define the source of the input. +(define current-source (make-parameter #f)) + + +;; When an parse error happens, we call the current-parser-error-handler: +(define current-parser-error-handler + (make-parameter + (lambda (tok-name tok-value offset line col span) + (raise (exn:fail:parsing + (format "Encountered parsing error near token ~e (~e) while parsing ~e [line=~a, column=~a, offset=~a]" + tok-name tok-value + (current-source) + line col offset) + (current-continuation-marks) + (list (srcloc (current-source) line col offset span))))))) + +;; When a tokenization error happens, we call the current-tokenizer-error-handler. +(define current-tokenizer-error-handler + (make-parameter + (lambda (tok-type tok-value offset line column span) + (raise (exn:fail:parsing + (format "Encountered unexpected token ~e (~e) while parsing ~e [line=~a, column=~a, offset=~a]" + tok-type + tok-value + (current-source) + line column offset) + (current-continuation-marks) + (list (srcloc (current-source) line column offset span))))))) diff --git a/beautiful-racket-ragg/br/ragg/ragg.scrbl b/beautiful-racket-ragg/br/ragg/ragg.scrbl new file mode 100755 index 0000000..9b90b9b --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/ragg.scrbl @@ -0,0 +1,1107 @@ +#lang scribble/manual +@(require scribble/eval + racket/date + file/md5 + (for-label racket + br/ragg/support + br/ragg/examples/nested-word-list + (only-in parser-tools/lex lexer-src-pos) + (only-in syntax/parse syntax-parse ~literal))) + + +@(define (lookup-date filename [default ""]) + (cond + [(file-exists? filename) + (define modify-seconds (file-or-directory-modify-seconds filename)) + (define a-date (seconds->date modify-seconds)) + (date->string a-date)] + [else + default])) + +@(define (compute-md5sum filename [default ""]) + (cond [(file-exists? filename) + (bytes->string/utf-8 (call-with-input-file filename md5 #:mode 'binary))] + [else + default])) + + + +@title{ragg: a Racket AST Generator Generator} +@author+email["Danny Yoo" "dyoo@hashcollision.org"] + + +@section{Informal quickstart} + +@(define my-eval (make-base-eval)) +@(my-eval '(require br/ragg/examples/nested-word-list + racket/list + racket/match)) + +Salutations! Let's consider the following scenario: say that we're given the +following string: +@racketblock["(radiant (humble))"] + + +@margin-note{(... and pretend that we don't already know about the built-in +@racket[read] function.)} How do we go about turning this kind of string into a +structured value? That is, how would we @emph{parse} it? + +We need to first consider the shape of the things we'd like to parse. The +string above looks like a deeply nested list of words. How might we describe +this formally? A convenient notation to describe the shape of these things is +@link["http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form"]{Backus-Naur +Form} (BNF). So let's try to notate the structure of nested word lists in BNF. + + +@nested[#:style 'code-inset]{ +@verbatim{ +nested-word-list: WORD + | LEFT-PAREN nested-word-list* RIGHT-PAREN +}} + +What we intend by this notation is this: @racket[nested-word-list] is either an +atomic @racket[WORD], or a parenthesized list of any number of +@racket[nested-word-list]s. We use the character @litchar{*} to represent zero +or more repetitions of the previous thing, and we treat the uppercased +@racket[LEFT-PAREN], @racket[RIGHT-PAREN], and @racket[WORD] as placeholders +for atomic @emph{tokens}. + +@margin-note{See @secref{install-ragg} for instructions on installing +@tt{ragg.}} +Here are a few examples of tokens: +@interaction[#:eval my-eval +(require br/ragg/support) +(token 'LEFT-PAREN) +(token 'WORD "crunchy" #:span 7) +(token 'RIGHT-PAREN)] + + +Have we made progress? At this point, we only have a BNF description in hand, +but we're still missing a @emph{parser}, something to take that description and +use it to make structures out of a sequence of tokens. + + +It's clear that we don't yet have a program because there's no @litchar{#lang} +line. We should add one. Put @litchar{#lang br/ragg} at the top of the BNF +description, and save it as a file called @filepath{nested-word-list.rkt}. + +@filebox["nested-word-list.rkt"]{ +@verbatim{ +#lang br/ragg +nested-word-list: WORD + | LEFT-PAREN nested-word-list* RIGHT-PAREN +}} + +Now it is a proper program. But what does it do? + +@interaction[#:eval my-eval +@eval:alts[(require "nested-word-list.rkt") (void)] +parse +] + +It gives us a @racket[parse] function. Let's investigate what @racket[parse] +does for us. What happens if we pass it a sequence of tokens? + +@interaction[#:eval my-eval + (define a-parsed-value + (parse (list (token 'LEFT-PAREN "(") + (token 'WORD "some") + (token 'LEFT-PAREN "[") + (token 'WORD "pig") + (token 'RIGHT-PAREN "]") + (token 'RIGHT-PAREN ")")))) + a-parsed-value] + +Wait... that looks suspiciously like a syntax object! +@interaction[#:eval my-eval +(syntax->datum a-parsed-value) +] + + +That's @racket[(some [pig])], essentially. + +What happens if we pass it a more substantial source of tokens? +@interaction[#:eval my-eval +@code:comment{tokenize: string -> (sequenceof token-struct?)} +@code:comment{Generate tokens from a string:} +(define (tokenize s) + (for/list ([str (regexp-match* #px"\\(|\\)|\\w+" s)]) + (match str + ["(" + (token 'LEFT-PAREN str)] + [")" + (token 'RIGHT-PAREN str)] + [else + (token 'WORD str)]))) + +@code:comment{For example:} +(define token-source (tokenize "(welcome (to (((ragg)) ())))")) +(define v (parse token-source)) +(syntax->datum v) +] + +Welcome to @tt{ragg}. + + + + + + + +@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +@section{Introduction} + +@tt{ragg} is a parsing framework for Racket with the design goal to be easy +to use. It includes the following features: +@itemize[ + +@item{It provides a @litchar{#lang} for writing extended BNF grammars. +A module written in @litchar{#lang br/ragg} automatically generates a +parser. The output of this parser tries to follow +@link["http://en.wikipedia.org/wiki/How_to_Design_Programs"]{HTDP} +doctrine; the structure of the grammar informs the structure of the +Racket syntax objects it generates.} + +@item{The language uses a few conventions to simplify the expression of +grammars. The first rule in the grammar is automatically assumed to be the +starting production. Identifiers in uppercase are assumed to represent +terminal tokens, and are otherwise the names of nonterminals.} + +@item{Tokenizers can be developed completely independently of parsers. +@tt{ragg} takes a liberal view on tokens: they can be strings, +symbols, or instances constructed with @racket[token]. Furthermore, +tokens can optionally provide location: if tokens provide location, the +generated syntax objects will as well.} + +@item{The underlying parser should be able to handle ambiguous grammars.} + +@item{It should integrate with the rest of the Racket +@link["http://docs.racket-lang.org/guide/languages.html"]{language toolchain}.} + +] + +@subsection[#:tag "install-ragg"]{Installation} + +@itemize[ + +@item{@margin-note{At the time of this writing, Racket 5.3.2 is in +@link["http://pre.racket-lang.org/"]{pre-release}.} If you are using a version +of Racket > 5.3.1, then follow the instructions on the +@link["https://plt-etc.byu.edu:9004/info/ragg"]{PLaneT2 page}.} + + + +@item{For those who are using Racket <= 5.3.1, you can download the following PLT package: + +@nested[#:style 'inset]{@link["ragg.plt"]{ragg.plt} [md5sum: @compute-md5sum["ragg.plt" "ab79038b40e510a5cf13363825c4aef4"]] + + Last updated: @lookup-date["ragg.plt" "Wednesday, January 16th, 2013"] + } + +Once downloaded, either use DrRacket's package installation features +(@link["http://docs.racket-lang.org/drracket/Menus.html#(idx._(gentag._57._(lib._scribblings/drracket/drracket..scrbl)))"]{Install +PLT File...} under DrRacket's File menu), or use the command line: +@nested[#:style 'inset]{@tt{raco setup -A ragg.plt}}} + +] + + + +@subsection{Example: a small DSL for ASCII diagrams} + +@margin-note{This is a +@link["http://stackoverflow.com/questions/12345647/rewrite-this-script-by-designing-an-interpreter-in-racket"]{restatement +of a question on Stack Overflow}.} To motivate @tt{ragg}'s design, let's look +at the following toy problem: we'd like to define a language for +drawing simple ASCII diagrams. We'd like to be able write something like this: + +@nested[#:style 'inset]{ +@verbatim|{ +3 9 X; +6 3 b 3 X 3 b; +3 9 X; +}|} + +whose interpretation should generate the following picture: + +@nested[#:style 'inset]{ +@verbatim|{ +XXXXXXXXX +XXXXXXXXX +XXXXXXXXX + XXX + XXX + XXX + XXX + XXX + XXX +XXXXXXXXX +XXXXXXXXX +XXXXXXXXX +}|} + + + +@subsection{Syntax and semantics} +We're being very fast-and-loose with what we mean by the program above, so +let's try to nail down some meanings. Each line of the program has a semicolon +at the end, and describes the output of several @emph{rows} of the line +drawing. Let's look at two of the lines in the example: + +@itemize[ +@item{@litchar{3 9 X;}: ``Repeat the following 3 times: print @racket["X"] nine times, followed by +a newline.''} + +@item{@litchar{6 3 b 3 X 3 b;}: ``Repeat the following 6 times: print @racket[" "] three times, +followed by @racket["X"] three times, followed by @racket[" "] three times, followed by a newline.''} +] + +Then each line consists of a @emph{repeat} number, followed by pairs of +(number, character) @emph{chunks}. We will +assume here that the intent of the lowercased character @litchar{b} is to +represent the printing of a 1-character whitespace @racket[" "], and for other +uppercase letters to represent the printing of themselves. + +Once we have a better idea of the pieces of each line, we have a better chance +to capture that meaning in a formal notation. Once we have each instruction in +a structured format, we should be able to interpret it with a straighforward +case analysis. + +Here is a first pass at expressing the structure of these line-drawing +programs. + + +@subsection{Parsing the concrete syntax} +@filebox["simple-line-drawing.rkt"]{ +@verbatim|{ +#lang br/ragg +drawing: rows* +rows: repeat chunk+ ";" +repeat: INTEGER +chunk: INTEGER STRING +}| +} + +@margin-note{@secref{ragg-syntax} describes @tt{ragg}'s syntax in more detail.} +We write a @tt{ragg} program as an extended BNF grammar, where patterns can be: +@itemize[ +@item{the names of other rules (e.g. @racket[chunk])} +@item{literal and symbolic token names (e.g. @racket[";"], @racket[INTEGER])} +@item{quantified patterns (e.g. @litchar{+} to represent one-or-more repetitions)} +] +The result of a @tt{ragg} program is a module with a @racket[parse] function +that can parse tokens and produce a syntax object as a result. + +Let's exercise this function: +@interaction[#:eval my-eval +(require br/ragg/support) +@eval:alts[(require "simple-line-drawing.rkt") + (require br/ragg/examples/simple-line-drawing)] +(define stx + (parse (list (token 'INTEGER 6) + (token 'INTEGER 2) + (token 'STRING " ") + (token 'INTEGER 3) + (token 'STRING "X") + ";"))) +(syntax->datum stx) +] + +Tokens can either be: plain strings, symbols, or instances produced by the +@racket[token] function. (Plus a few more special cases, one in which we'll describe in a +moment.) + +Preferably, we want to attach each token with auxiliary source location +information. The more source location we can provide, the better, as the +syntax objects produced by @racket[parse] will incorporate them. + +Let's write a helper function, a @emph{lexer}, to help us construct tokens more +easily. The Racket standard library comes with a module called +@racketmodname[parser-tools/lex] which can help us write a position-sensitive +tokenizer: + +@interaction[#:eval my-eval +(require parser-tools/lex) +(define (tokenize ip) + (port-count-lines! ip) + (define my-lexer + (lexer-src-pos + [(repetition 1 +inf.0 numeric) + (token 'INTEGER (string->number lexeme))] + [upper-case + (token 'STRING lexeme)] + ["b" + (token 'STRING " ")] + [";" + (token ";" lexeme)] + [whitespace + (token 'WHITESPACE lexeme #:skip? #t)] + [(eof) + (void)])) + (define (next-token) (my-lexer ip)) + next-token) + +(define a-sample-input-port (open-input-string "6 2 b 3 X;")) +(define token-thunk (tokenize a-sample-input-port)) +@code:comment{Now we can pass token-thunk to the parser:} +(define another-stx (parse token-thunk)) +(syntax->datum another-stx) +@code:comment{The syntax object has location information:} +(syntax-line another-stx) +(syntax-column another-stx) +(syntax-span another-stx) +] + + +There are a few things to note from this lexer example: +@itemize[ + +@item{The @racket[parse] function can consume either sequences of tokens, or a +function that produces tokens. Both of these are considered sources of +tokens.} + +@item{As a special case for acceptable tokens, a token can also be an instance +of the @racket[position-token] structure of @racketmodname[parser-tools/lex], +in which case the token will try to derive its position from that of the +position-token.} + +@item{The @racket[parse] function will stop reading from a token source if any +token is @racket[void].} + +@item{The @racket[parse] function will skip over any token with the +@racket[#:skip?] attribute. Elements such as whitespace and comments will +often have @racket[#:skip?] set to @racket[#t].} + +] + + +@subsection{From parsing to interpretation} + +We now have a parser for programs written in this simple-line-drawing language. +Our parser will give us back syntax objects: +@interaction[#:eval my-eval +(define parsed-program + (parse (tokenize (open-input-string "3 9 X; 6 3 b 3 X 3 b; 3 9 X;")))) +(syntax->datum parsed-program) +] + +Moreover, we know that these syntax objects have a regular, predictable +structure. Their structure follows the grammar, so we know we'll be looking at +values of the form: + +@racketblock[ + (drawing (rows (repeat ) + (chunk ) ... ";") + ...) +] + +where @racket[drawing], @racket[rows], @racket[repeat], and @racket[chunk] +should be treated literally, and everything else will be numbers or strings. + + +Still, these syntax object values are just inert structures. How do we +interpret them, and make them @emph{print}? We did claim at the beginning of +this section that these syntax objects should be fairly easy to case-analyze +and interpret, so let's do it. + +@margin-note{This is a very quick-and-dirty treatment of @racket[syntax-parse]. +See the @racketmodname[syntax/parse] documentation for a gentler guide to its +features.} Racket provides a special form called @racket[syntax-parse] in the +@racketmodname[syntax/parse] library. @racket[syntax-parse] lets us do a +structural case-analysis on syntax objects: we provide it a set of patterns to +parse and actions to perform when those patterns match. + + +As a simple example, we can write a function that looks at a syntax object and +says @racket[#t] if it's the literal @racket[yes], and @racket[#f] otherwise: + +@interaction[#:eval my-eval +(require syntax/parse) +@code:comment{yes-syntax-object?: syntax-object -> boolean} +@code:comment{Returns true if the syntax-object is yes.} +(define (yes-syntax-object? stx) + (syntax-parse stx + [(~literal yes) + #t] + [else + #f])) +(yes-syntax-object? #'yes) +(yes-syntax-object? #'nooooooooooo) +] + +Here, we use @racket[~literal] to let @racket[syntax-parse] know that +@racket[yes] should show up literally in the syntax object. The patterns can +also have some structure to them, such as: +@racketblock[({~literal drawing} rows-stxs ...)] +which matches on syntax objects that begin, literally, with @racket[drawing], +followed by any number of rows (which are syntax objects too). + + +Now that we know a little bit more about @racket[syntax-parse], +we can use it to do a case analysis on the syntax +objects that our @racket[parse] function gives us. +We start by defining a function on syntax objects of the form @racket[(drawing +rows-stx ...)]. +@interaction[#:eval my-eval +(define (interpret-drawing drawing-stx) + (syntax-parse drawing-stx + [({~literal drawing} rows-stxs ...) + + (for ([rows-stx (syntax->list #'(rows-stxs ...))]) + (interpret-rows rows-stx))]))] + +When we encounter a syntax object with @racket[(drawing rows-stx +...)], then @racket[interpret-rows] each @racket[rows-stx]. + +@;The pattern we +@;express in @racket[syntax-parse] above marks what things should be treated +@;literally, and the @racket[...] is a a part of the pattern matching language +@;known by @racket[syntax-parse] that lets us match multiple instances of the +@;last pattern. + + +Let's define @racket[interpret-rows] now: +@interaction[#:eval my-eval +(define (interpret-rows rows-stx) + (syntax-parse rows-stx + [({~literal rows} + ({~literal repeat} repeat-number) + chunks ... ";") + + (for ([i (syntax-e #'repeat-number)]) + (for ([chunk-stx (syntax->list #'(chunks ...))]) + (interpret-chunk chunk-stx)) + (newline))]))] + +For a @racket[rows], we extract out the @racket[repeat-number] out of the +syntax object and use it as the range of the @racket[for] loop. The inner loop +walks across each @racket[chunk-stx] and calls @racket[interpret-chunk] on it. + + +Finally, we need to write a definition for @racket[interpret-chunk]. We want +it to extract out the @racket[chunk-size] and @racket[chunk-string] portions, +and print to standard output: + +@interaction[#:eval my-eval +(define (interpret-chunk chunk-stx) + (syntax-parse chunk-stx + [({~literal chunk} chunk-size chunk-string) + + (for ([k (syntax-e #'chunk-size)]) + (display (syntax-e #'chunk-string)))])) +] + + +@margin-note{Here are the definitions in a single file: +@link["examples/simple-line-drawing/interpret.rkt"]{interpret.rkt}.} +With these definitions in hand, now we can pass it syntax objects +that we construct directly by hand: + +@interaction[#:eval my-eval +(interpret-chunk #'(chunk 3 "X")) +(interpret-drawing #'(drawing (rows (repeat 5) (chunk 3 "X") ";"))) +] + +or we can pass it the result generated by our parser: +@interaction[#:eval my-eval +(define parsed-program + (parse (tokenize (open-input-string "3 9 X; 6 3 b 3 X 3 b; 3 9 X;")))) +(interpret-drawing parsed-program)] + +And now we've got an interpreter! + + +@subsection{From interpretation to compilation} + +@margin-note{For a gentler tutorial on writing @litchar{#lang} extensions, see: +@link["http://hashcollision.org/brainfudge"]{F*dging up a Racket}.} (Just as a +warning: the following material is slightly more advanced, but shows how +writing a compiler for the line-drawing language reuses the ideas for the +interpreter.) + +Wouldn't it be nice to be able to write something like: + +@nested[#:style 'inset]{ +@verbatim|{ +3 9 X; +6 3 b 3 X 3 b; +3 9 X; +}|} + +and have Racket automatically compile this down to something like this? +@racketblock[ +(for ([i 3]) + (for ([k 9]) (displayln "X")) + (newline)) + +(for ([i 6]) + (for ([k 3]) (displayln " ")) + (for ([k 3]) (displayln "X")) + (for ([k 3]) (displayln " ")) + (newline)) + +(for ([i 3]) + (for ([k 9]) (displayln "X")) + (newline)) +] + +Well, of course it won't work: we don't have a @litchar{#lang} line. + +Let's add one. + +@filebox["letter-i.rkt"]{ +@verbatim|{ +#lang br/ragg/examples/simple-line-drawing +3 9 X; +6 3 b 3 X 3 b; +3 9 X; +}| +} + +Now @filepath{letter-i.rkt} is a program. + + +How does this work? From the previous sections, we've seen how to take the +contents of a file and interpret it. What we want to do now is teach Racket +how to compile programs labeled with this @litchar{#lang} line. We'll do two +things: + +@itemize[ +@item{Tell Racket to use the @tt{ragg}-generated parser and lexer we defined +earlier whenever it sees a program written with +@litchar{#lang br/ragg/examples/simple-line-drawing}.} + +@item{Define transformation rules for @racket[drawing], @racket[rows], and + @racket[chunk] to rewrite these into standard Racket forms.} +] + +The second part, the writing of the transformation rules, will look very +similar to the definitions we wrote for the interpreter, but the transformation +will happen at compile-time. (We @emph{could} just resort to simply calling +into the interpreter we just wrote up, but this section is meant to show that +compilation is also viable.) + + +We do the first part by defining a @emph{module reader}: a +@link["http://docs.racket-lang.org/guide/syntax_module-reader.html"]{module +reader} tells Racket how to parse and compile a file. Whenever Racket sees a +@litchar{#lang }, it looks for a corresponding module reader in +@filepath{/lang/reader}. + +Here's the definition for +@filepath{br/ragg/examples/simple-line-drawing/lang/reader.rkt}: + +@filebox["br/ragg/examples/simple-line-drawing/lang/reader.rkt"]{ +@codeblock|{ +#lang s-exp syntax/module-reader +br/ragg/examples/simple-line-drawing/semantics +#:read my-read +#:read-syntax my-read-syntax +#:whole-body-readers? #t + +(require br/ragg/examples/simple-line-drawing/lexer + br/ragg/examples/simple-line-drawing/grammar) + +(define (my-read in) + (syntax->datum (my-read-syntax #f in))) + +(define (my-read-syntax src ip) + (list (parse src (tokenize ip)))) +}| +} + +We use a helper module @racketmodname[syntax/module-reader], which provides +utilities for creating a module reader. It uses the lexer and +@tt{ragg}-generated parser we defined earlier (saved into +@link["http://hashcollision.org/ragg/examples/simple-line-drawing/lexer.rkt"]{lexer.rkt} +and +@link["http://hashcollision.org/ragg/examples/simple-line-drawing/grammar.rkt"]{grammar.rkt} +modules), and also tells Racket that it should compile the forms in the syntax +object using a module called @filepath{semantics.rkt}. + +@margin-note{For a systematic treatment on capturing the semantics of +a language, see @link["http://cs.brown.edu/~sk/Publications/Books/ProgLangs/"]{Programming Languages: Application and +Interpretation}.} + +Let's look into @filepath{semantics.rkt} and see what's involved in +compilation: +@filebox["br/ragg/examples/simple-line-drawing/semantics.rkt"]{ +@codeblock|{ +#lang racket/base +(require (for-syntax racket/base syntax/parse)) + +(provide #%module-begin + ;; We reuse Racket's treatment of raw datums, specifically + ;; for strings and numbers: + #%datum + + ;; And otherwise, we provide definitions of these three forms. + ;; During compiliation, Racket uses these definitions to + ;; rewrite into for loops, displays, and newlines. + drawing rows chunk) + +;; Define a few compile-time functions to do the syntax rewriting: +(begin-for-syntax + (define (compile-drawing drawing-stx) + (syntax-parse drawing-stx + [({~literal drawing} rows-stxs ...) + + (syntax/loc drawing-stx + (begin rows-stxs ...))])) + + (define (compile-rows rows-stx) + (syntax-parse rows-stx + [({~literal rows} + ({~literal repeat} repeat-number) + chunks ... + ";") + + (syntax/loc rows-stx + (for ([i repeat-number]) + chunks ... + (newline)))])) + + (define (compile-chunk chunk-stx) + (syntax-parse chunk-stx + [({~literal chunk} chunk-size chunk-string) + + (syntax/loc chunk-stx + (for ([k chunk-size]) + (display chunk-string)))]))) + + +;; Wire up the use of "drawing", "rows", and "chunk" to these +;; transformers: +(define-syntax drawing compile-drawing) +(define-syntax rows compile-rows) +(define-syntax chunk compile-chunk) +}| +} + +The semantics hold definitions for @racket[compile-drawing], +@racket[compile-rows], and @racket[compile-chunk], similar to what we had for +interpretation with @racket[interpret-drawing], @racket[interpret-rows], and +@racket[interpret-chunk]. However, compilation is not the same as +interpretation: each definition does not immediately execute the act of +drawing, but rather returns a syntax object whose evaluation will do the actual +work. + +There are a few things to note: + +@itemize[ + +@item{@tt{ragg}'s native data structure is the syntax object because the +majority of Racket's language-processing infrastructure knows how to read and +write this structured value.} + + +@item{ +@margin-note{By the way, we can just as easily rewrite the semantics so that +@racket[compile-rows] does explicitly call @racket[compile-chunk]. Often, +though, it's easier to write the transformation functions in this piecemeal way +and depend on the Racket macro expansion system to do the rewriting as it +encounters each of the forms.} +Unlike in interpretation, @racket[compile-rows] doesn't +compile each chunk by directly calling @racket[compile-chunk]. Rather, it +depends on the Racket macro expander to call each @racket[compile-XXX] function +as it encounters a @racket[drawing], @racket[rows], or @racket[chunk] in the +parsed value. The three statements at the bottom of @filepath{semantics.rkt} inform +the macro expansion system to do this: + +@racketblock[ +(define-syntax drawing compile-drawing) +(define-syntax rows compile-rows) +(define-syntax chunk compile-chunk) +]} +] + + +Altogether, @tt{ragg}'s intent is to be a parser generator generator for Racket +that's easy and fun to use. It's meant to fit naturally with the other tools +in the Racket language toolchain. Hopefully, it will reduce the friction in +making new languages with alternative concrete syntaxes. + +The rest of this document describes the @tt{ragg} language and the parsers it +generates. + + +@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +@section{The language} + +@subsection[#:tag "ragg-syntax"]{Syntax and terminology} +A program in the @tt{ragg} language consists of the language line +@litchar{#lang br/ragg}, followed by a collection of @tech{rule}s and +@tech{line comment}s. + +A @deftech{rule} is a sequence consisting of: a @tech{rule identifier}, a colon +@litchar{":"}, and a @tech{pattern}. + +A @deftech{rule identifier} is an @tech{identifier} that is not in upper case. + +A @deftech{token identifier} is an @tech{identifier} that is in upper case. + +An @deftech{identifier} is a character sequence of letters, numbers, and +characters in @racket["-.!$%&/<=>?^_~@"]. It must not contain +@litchar{*} or @litchar{+}, as those characters are used to denote +quantification. + + +A @deftech{pattern} is one of the following: +@itemize[ +@item{an implicit sequence of @tech{pattern}s separated by whitespace} +@item{a terminal: either a literal string or a @tech{token identifier}} +@item{a @tech{rule identifier}} +@item{a @deftech{choice pattern}: a sequence of @tech{pattern}s delimited with @litchar{|} characters.} +@item{a @deftech{quantifed pattern}: a @tech{pattern} followed by either @litchar{*} (``zero or more'') or @litchar{+} (``one or more'')} +@item{an @deftech{optional pattern}: a @tech{pattern} surrounded by @litchar{[} and @litchar{]}} +@item{an explicit sequence: a @tech{pattern} surrounded by @litchar{(} and @litchar{)}}] + +A @deftech{line comment} begins with either @litchar{#} or @litchar{;} and +continues till the end of the line. + + +For example, in the following program: +@nested[#:style 'inset +@verbatim|{ +#lang br/ragg +;; A parser for a silly language +sentence: verb optional-adjective object +verb: greeting +optional-adjective: ["happy" | "frumpy"] +greeting: "hello" | "hola" | "aloha" +object: "world" | WORLD +}|] + +the elements @tt{sentence}, @tt{verb}, @tt{greeting}, and @tt{object} are rule +identifiers. The first rule, @litchar{sentence: verb optional-adjective +object}, is a rule whose right side is an implicit pattern sequence of three +sub-patterns. The uppercased @tt{WORLD} is a token identifier. The fourth rule in the program associates @tt{greeting} with a @tech{choice pattern}. + + + +More examples: +@itemize[ + +@item{A +@link["http://hashcollision.org/ragg/examples/01-equal.rkt"]{BNF} for binary +strings that contain an equal number of zeros and ones. +@verbatim|{ +#lang br/ragg +equal: [zero one | one zero] ;; equal number of "0"s and "1"s. +zero: "0" equal | equal "0" ;; has an extra "0" in it. +one: "1" equal | equal "1" ;; has an extra "1" in it. +}| +} + +@item{A @link["http://hashcollision.org/ragg/examples/baby-json.rkt"]{BNF} for +@link["http://www.json.org/"]{JSON}-like structures. +@verbatim|{ +#lang br/ragg +json: number | string + | array | object +number: NUMBER +string: STRING +array: "[" [json ("," json)*] "]" +object: "{" [kvpair ("," kvpair)*] "}" +kvpair: ID ":" json +}| +} +] + +The @link["https://github.com/dyoo/ragg"]{ragg github source repository} +includes +@link["https://github.com/dyoo/ragg/tree/master/ragg/examples"]{several more +examples}. + + + +@subsection{Syntax errors} + +Besides the basic syntax errors that can occur with a malformed grammar, there +are a few other classes of situations that @litchar{#lang br/ragg} will consider +as syntax errors. + +@tt{ragg} will raise a syntax error if the grammar: +@itemize[ +@item{doesn't have any rules.} + +@item{has a rule with the same left hand side as any other rule.} + +@item{refers to rules that have not been defined. e.g. the +following program: +@nested[#:style 'code-inset +@verbatim|{ +#lang br/ragg +foo: [bar] +}| +] +should raise an error because @tt{bar} has not been defined, even though +@tt{foo} refers to it in an @tech{optional pattern}.} + + +@item{uses the token name @racket[EOF]; the end-of-file token type is reserved +for internal use by @tt{ragg}.} + + +@item{contains a rule that has no finite derivation. e.g. the following +program: +@nested[#:style 'code-inset +@verbatim|{ +#lang br/ragg +infinite-a: "a" infinite-a +}| +] +should raise an error because no finite sequence of tokens will satisfy +@tt{infinite-a}.} + +] + +Otherwise, @tt{ragg} should be fairly tolerant and permit even ambiguous +grammars. + +@subsection{Semantics} +@declare-exporting[br/ragg/examples/nested-word-list] + +A program written in @litchar{#lang br/ragg} produces a module that provides a few +bindings. The most important of these is @racket[parse]: + +@defproc[(parse [source any/c #f] + [token-source (or/c (sequenceof token) + (-> token))]) + syntax?]{ + +Parses the sequence of @tech{tokens} according to the rules in the grammar, using the +first rule as the start production. The parse must completely consume +@racket[token-source]. + +The @deftech{token source} can either be a sequence, or a 0-arity function that +produces @tech{tokens}. + +A @deftech{token} in @tt{ragg} can be any of the following values: +@itemize[ +@item{a string} +@item{a symbol} +@item{an instance produced by @racket[token]} +@item{an instance produced by the token constructors of @racketmodname[parser-tools/lex]} +@item{an instance of @racketmodname[parser-tools/lex]'s @racket[position-token] whose + @racket[position-token-token] is a @tech{token}.} +] + +A token whose type is either @racket[void] or @racket['EOF] terminates the +source. + + +If @racket[parse] succeeds, it will return a structured syntax object. The +structure of the syntax object follows the overall structure of the rules in +the BNF. For each rule @racket[r] and its associated pattern @racket[p], +@racket[parse] generates a syntax object @racket[#'(r p-value)] where +@racket[p-value]'s structure follows a case analysis on @racket[p]: + +@itemize[ +@item{For implicit and explicit sequences of @tech{pattern}s @racket[p1], + @racket[p2], ..., the corresponding values, spliced into the + structure.} +@item{For terminals, the value associated to the token.} +@item{For @tech{rule identifier}s: the associated parse value for the rule.} +@item{For @tech{choice pattern}s: the associated parse value for one of the matching subpatterns.} +@item{For @tech{quantifed pattern}s and @tech{optional pattern}s: the corresponding values, spliced into the structure.} +] + +Consequently, it's only the presence of @tech{rule identifier}s in a rule's +pattern that informs the parser to introduces nested structure into the syntax +object. + + +If the grammar has ambiguity, @tt{ragg} will choose and return a parse, though +it does not guarantee which one it chooses. + + +If the parse cannot be performed successfully, or if a token in the +@racket[token-source] uses a type that isn't mentioned in the grammar, then +@racket[parse] raises an instance of @racket[exn:fail:parsing].} + + + +It's often convenient to extract a parser for other non-terminal rules in the +grammar, and not just for the first rule. A @tt{ragg}-generated module also +provides a form called @racket[make-rule-parser] to extract a parser for the +other non-terminals: + +@defform[#:id make-rule-parser + (make-rule-parser name)]{ +Constructs a parser for the @racket[name] of one of the non-terminals +in the grammar. + +For example, given the @tt{ragg} program +@filepath{simple-arithmetic-grammar.rkt}: +@filebox["simple-arithmetic-grammar.rkt"]{ +@verbatim|{ +#lang br/ragg +expr : term ('+' term)* +term : factor ('*' factor)* +factor : INT +}| +} +the following interaction shows how to extract a parser for @racket[term]s. +@interaction[#:eval my-eval +@eval:alts[(require "simple-arithmetic-grammar.rkt") + (require br/ragg/examples/simple-arithmetic-grammar)] +(define term-parse (make-rule-parser term)) +(define tokens (list (token 'INT 3) + "*" + (token 'INT 4))) +(syntax->datum (parse tokens)) +(syntax->datum (term-parse tokens)) + +(define another-token-sequence + (list (token 'INT 1) "+" (token 'INT 2) + "*" (token 'INT 3))) +(syntax->datum (parse another-token-sequence)) +@code:comment{Note that term-parse will break on another-token-sequence} +@code:comment{as it does not know what to do with the "+"} +(term-parse another-token-sequence) +] + +} + + +Finally, the module provides a set of all the used token types in the grammar +in @racket[all-token-types]: +@defthing[all-token-types (setof symbol?)]{ +A set of all the token types used in a grammar. + +For example: +@interaction[#:eval my-eval +@eval:alts[(require "simple-arithmetic-grammar.rkt") + (require br/ragg/examples/simple-arithmetic-grammar)] +all-token-types +] + +} + + + + + +@section{Support API} + +@defmodule[br/ragg/support] + +The @racketmodname[br/ragg/support] module provides functions to interact with +@tt{ragg} programs. The most useful is the @racket[token] function, which +produces tokens to be parsed. + +@defproc[(token [type (or/c string? symbol?)] + [val any/c #f] + [#:line line (or/c positive-integer? #f) #f] + [#:column column (or/c natural-number? #f) #f] + [#:offset offset (or/c positive-integer? #f) #f] + [#:span span (or/c natural-number? #f) #f] + [#:skip? skip? boolean? #f] + ) + token-struct?]{ +Creates instances of @racket[token-struct]s. + +The syntax objects produced by a parse will inject the value @racket[val] in +place of the token name in the grammar. + +If @racket[#:skip?] is true, then the parser will skip over it during a +parse.} + + +@defstruct[token-struct ([type symbol?] + [val any/c] + [offset (or/c positive-integer? #f)] + [line (or/c natural-number? #f)] + [column (or/c positive-integer? #f)] + [span (or/c natural-number? #f)] + [skip? boolean?]) + #:transparent]{ +The token structure type. + +Rather than directly using the @racket[token-struct] constructor, please use +the helper function @racket[token] to construct instances. +} + + + + +@defstruct[(exn:fail:parsing exn:fail) + ([message string?] + [continuation-marks continuation-mark-set?] + [srclocs (listof srcloc?)])]{ +The exception raised when parsing fails. + +@racket[exn:fail:parsing] implements Racket's @racket[prop:exn:srcloc] +property, so if this exception reaches DrRacket's default error handler, +DrRacket should highlight the offending locations in the source.} + + + + + + +@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +@section{Caveats and things to do} + +Here are a few caveats and future aims for @tt{ragg}. + +@itemize[ + +@item{@tt{ragg} doesn't currently have a good story about operator precedence. +Future versions of @tt{ragg} will support the specification of operator +precedence to deal with grammar ambiguity, probably by extending the BNF +grammar rules in @litchar{#lang br/ragg} with keyword arguments.} + + +@item{I currently depend on the lexer framework provided by +@racketmodname[parser-tools/lex], which has a steeper learning curve than I'd +like. A future version of @tt{ragg} will probably try to provide a nicer set +of tools for defining lexers.} + + +@item{The underlying parsing engine (an Earley-style parser) has not been fully +optimized, so it may exhibit degenerate parse times. A future version of +@tt{ragg} will guarantee @math{O(n^3)} time bounds so that at the very least, +parses will be polynomial-time.} + + +@item{@tt{ragg} doesn't yet have a good story on dealing with parser error +recovery. If a parse fails, it tries to provide the source location, but does +little else.} + +@item{@tt{ragg} is slightly misnamed: what it really builds is a concrete +syntax tree rather than an abstract syntax tree. A future version of @tt{ragg} +will probably support annotations on patterns so that they can be omitted or +transformed in the parser output.} + +] + + +@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +@section{Miscellaneous and thanks} + +Thanks to Matthew Flatt for pointing me to @racket[cfg-parser] from the +@racket[cfg-parser] library. Joe Politz gave me good advice and +feedback. Also, he suggested the name ``ragg''. Other alternatives I'd been +considering were ``autogrammar'' or ``chompy''. Thankfully, he is a better +Namer than me. Daniel Patterson provided feedback that led to +@racket[make-rule-parser]. Robby Findler and Guillaume Marceau provided +steadfast suggestions to look into other parsing frameworks like +@link["http://en.wikipedia.org/wiki/Syntax_Definition_Formalism"]{SDF} and +@link["http://sablecc.org/"]{SableCC}. Special thanks to Shriram +Krishnamurthi, who convinced me that other people might find this package +useful. + + +@close-eval[my-eval] diff --git a/beautiful-racket-ragg/br/ragg/rules/lexer.rkt b/beautiful-racket-ragg/br/ragg/rules/lexer.rkt new file mode 100755 index 0000000..8022b65 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/rules/lexer.rkt @@ -0,0 +1,104 @@ +#lang racket/base +(require parser-tools/lex + (prefix-in : parser-tools/lex-sre) + "parser.rkt" + "rule-structs.rkt") + +(provide lex/1 tokenize) + +;; A newline can be any one of the following. +(define-lex-abbrev NL (:or "\r\n" "\r" "\n")) + +;; Slightly modified from the read.rkt example in parser-tools, treating +;; +, :, and * as reserved, non-identifier characters. +(define-lex-abbrevs + [letter (:or (:/ "a" "z") (:/ #\A #\Z))] + [digit (:/ #\0 #\9)] + [id-char (:or letter digit (char-set "-.!$%&/<=>?^_~@"))] + ) + +(define-lex-abbrev id + (:& (complement (:+ digit)) + (:+ id-char))) + + + + + +(define lex/1 + (lexer-src-pos + [(:: "'" + (:* (:or "\\'" (:~ "'" "\\"))) + "'") + (token-LIT lexeme)] + [(:: "\"" + (:* (:or "\\\"" (:~ "\"" "\\"))) + "\"") + (token-LIT lexeme)] + ["(" + (token-LPAREN lexeme)] + ["[" + (token-LBRACKET lexeme)] + [")" + (token-RPAREN lexeme)] + ["]" + (token-RBRACKET lexeme)] + ["|" + (token-PIPE lexeme)] + [(:or "+" "*") + (token-REPEAT lexeme)] + [whitespace + ;; Skip whitespace + (return-without-pos (lex/1 input-port))] + ;; Skip comments up to end of line + [(:: (:or "#" ";") + (complement (:: (:* any-char) NL (:* any-char))) + (:or NL "")) + ;; Skip comments up to end of line. + (return-without-pos (lex/1 input-port))] + [(eof) + (token-EOF lexeme)] + [(:: id (:* whitespace) ":") + (token-RULE_HEAD lexeme)] + [id + (token-ID lexeme)] + + ;; We call the error handler for everything else: + [(:: any-char) + (let-values ([(rest-of-text end-pos-2) + (lex-nonwhitespace input-port)]) + ((current-parser-error-handler) + #f + 'error + (string-append lexeme rest-of-text) + (position->pos start-pos) + (position->pos end-pos-2)))])) + + +;; This is the helper for the error production. +(define lex-nonwhitespace + (lexer + [(:+ (char-complement whitespace)) + (values lexeme end-pos)] + [any-char + (values lexeme end-pos)] + [(eof) + (values "" end-pos)])) + + + +;; position->pos: position -> pos +;; Coerses position structures from parser-tools/lex to our own pos structures. +(define (position->pos a-pos) + (pos (position-offset a-pos) + (position-line a-pos) + (position-col a-pos))) + + + +;; tokenize: input-port -> (-> token) +(define (tokenize ip + #:source [source (object-name ip)]) + (lambda () + (parameterize ([file-path source]) + (lex/1 ip)))) diff --git a/beautiful-racket-ragg/br/ragg/rules/parser.rkt b/beautiful-racket-ragg/br/ragg/rules/parser.rkt new file mode 100755 index 0000000..27a5822 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/rules/parser.rkt @@ -0,0 +1,219 @@ +#lang racket/base +(require parser-tools/yacc + parser-tools/lex + racket/list + racket/match + "rule-structs.rkt") + +;; A parser for grammars. + +(provide tokens + token-LPAREN + token-RPAREN + token-LBRACKET + token-RBRACKET + token-PIPE + token-REPEAT + token-RULE_HEAD + token-ID + token-LIT + token-EOF + grammar-parser + + current-source + current-parser-error-handler + + [struct-out rule] + [struct-out lhs-id] + [struct-out pattern] + [struct-out pattern-id] + [struct-out pattern-lit] + [struct-out pattern-token] + [struct-out pattern-choice] + [struct-out pattern-repeat] + [struct-out pattern-maybe] + [struct-out pattern-seq]) + +(define-tokens tokens (LPAREN + RPAREN + LBRACKET + RBRACKET + PIPE + REPEAT + RULE_HEAD + ID + LIT + EOF)) + +;; grammar-parser: (-> token) -> (listof rule) +(define grammar-parser + (parser + (tokens tokens) + (src-pos) + (start rules) + (end EOF) + + (grammar + [rules + [(rules*) $1]] + + [rules* + [(rule rules*) + (cons $1 $2)] + [() + '()]] + + ;; I have a separate token type for rule identifiers to avoid the + ;; shift/reduce conflict that happens with the implicit sequencing + ;; of top-level rules. i.e. the parser can't currently tell, when + ;; it sees an ID, if it should shift or reduce to a new rule. + [rule + [(RULE_HEAD pattern) + (begin + (define trimmed (regexp-replace #px"\\s*:$" $1 "")) + (rule (position->pos $1-start-pos) + (position->pos $2-end-pos) + (lhs-id (position->pos $1-start-pos) + (pos (+ (position-offset $1-start-pos) + (string-length trimmed)) + (position-line $1-start-pos) + (position-col $1-start-pos)) + trimmed) + $2))]] + + [pattern + [(implicit-pattern-sequence PIPE pattern) + (if (pattern-choice? $3) + (pattern-choice (position->pos $1-start-pos) + (position->pos $3-end-pos) + (cons $1 (pattern-choice-vals $3))) + (pattern-choice (position->pos $1-start-pos) + (position->pos $3-end-pos) + (list $1 $3)))] + [(implicit-pattern-sequence) + $1]] + + [implicit-pattern-sequence + [(repeatable-pattern implicit-pattern-sequence) + (if (pattern-seq? $2) + (pattern-seq (position->pos $1-start-pos) + (position->pos $2-end-pos) + (cons $1 (pattern-seq-vals $2))) + (pattern-seq (position->pos $1-start-pos) + (position->pos $2-end-pos) + (list $1 $2)))] + [(repeatable-pattern) + $1]] + + [repeatable-pattern + [(atomic-pattern REPEAT) + (cond [(string=? $2 "*") + (pattern-repeat (position->pos $1-start-pos) + (position->pos $2-end-pos) + 0 $1)] + [(string=? $2 "+") + (pattern-repeat (position->pos $1-start-pos) + (position->pos $2-end-pos) + 1 $1)] + [else + (error 'grammar-parse "unknown repetition operator ~e" $2)])] + [(atomic-pattern) + $1]] + + [atomic-pattern + [(LIT) + (pattern-lit (position->pos $1-start-pos) + (position->pos $1-end-pos) + (substring $1 1 (sub1 (string-length $1))))] + + [(ID) + (if (token-id? $1) + (pattern-token (position->pos $1-start-pos) + (position->pos $1-end-pos) + $1) + (pattern-id (position->pos $1-start-pos) + (position->pos $1-end-pos) + $1))] + + [(LBRACKET pattern RBRACKET) + (pattern-maybe (position->pos $1-start-pos) + (position->pos $3-end-pos) + $2)] + + [(LPAREN pattern RPAREN) + (relocate-pattern $2 (position->pos $1-start-pos) (position->pos $3-end-pos))]]) + + + (error (lambda (tok-ok? tok-name tok-value start-pos end-pos) + ((current-parser-error-handler) tok-ok? tok-name tok-value (position->pos start-pos) (position->pos end-pos)))))) + + +;; relocate-pattern: pattern -> pattern +;; Rewrites the pattern's start and end pos accordingly. +(define (relocate-pattern a-pat start-pos end-pos) + (match a-pat + [(pattern-id _ _ v) + (pattern-id start-pos end-pos v)] + [(pattern-token _ _ v) + (pattern-token start-pos end-pos v)] + [(pattern-lit _ _ v) + (pattern-lit start-pos end-pos v)] + [(pattern-choice _ _ vs) + (pattern-choice start-pos end-pos vs)] + [(pattern-repeat _ _ m v) + (pattern-repeat start-pos end-pos m v)] + [(pattern-maybe _ _ v) + (pattern-maybe start-pos end-pos v)] + [(pattern-seq _ _ vs) + (pattern-seq start-pos end-pos vs)] + [else + (error 'relocate-pattern "Internal error when relocating ~s\n" a-pat)])) + + +; token-id: string -> boolean +;; Produces true if the id we see should be treated as the name of a token. +;; By convention, tokens are all upper-cased. +(define (token-id? id) + (string=? (string-upcase id) + id)) + + + +;; position->pos: position -> pos +;; Coerses position structures from parser-tools/lex to our own pos structures. +(define (position->pos a-pos) + (pos (position-offset a-pos) + (position-line a-pos) + (position-col a-pos))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; During parsing, we should define the source of the input. +(define current-source (make-parameter #f)) + + +;; When bad things happen, we need to emit errors with source location. +(struct exn:fail:parse-grammar exn:fail (srclocs) + #:transparent + #:property prop:exn:srclocs (lambda (instance) + (exn:fail:parse-grammar-srclocs instance))) + +(define current-parser-error-handler + (make-parameter + (lambda (tok-ok? tok-name tok-value start-pos end-pos) + (raise (exn:fail:parse-grammar + (format "Error while parsing grammar near: ~e [line=~a, column=~a, position=~a]" + tok-value + (pos-line start-pos) + (pos-col start-pos) + (pos-offset start-pos)) + (current-continuation-marks) + (list (srcloc (current-source) + (pos-line start-pos) + (pos-col start-pos) + (pos-offset start-pos) + (if (and (number? (pos-offset end-pos)) + (number? (pos-offset start-pos))) + (- (pos-offset end-pos) + (pos-offset start-pos)) + #f)))))))) diff --git a/beautiful-racket-ragg/br/ragg/rules/rule-structs.rkt b/beautiful-racket-ragg/br/ragg/rules/rule-structs.rkt new file mode 100755 index 0000000..28dfcef --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/rules/rule-structs.rkt @@ -0,0 +1,47 @@ +#lang racket/base +(provide (all-defined-out)) + + +;; We keep our own position structure because parser-tools/lex's position +;; structure is non-transparent, hence highly resistant to unit testing. +(struct pos (offset line col) + #:transparent) + + + +(struct rule (start end lhs pattern) + #:transparent) + +(struct lhs-id (start end val) + #:transparent) + + +;; A pattern can be one of the following: +(struct pattern (start end) + #:transparent) + +(struct pattern-id pattern (val) + #:transparent) + + +;; Token structure to be defined by the user +(struct pattern-token pattern (val) + #:transparent) + +;; Token structure defined as the literal string to be matched. +(struct pattern-lit pattern (val) + #:transparent) + +(struct pattern-choice pattern (vals) + #:transparent) + +(struct pattern-repeat pattern (min ;; either 0 or 1 + val) + #:transparent) + +(struct pattern-maybe pattern (val) + #:transparent) + +(struct pattern-seq pattern (vals) + #:transparent) + diff --git a/beautiful-racket-ragg/br/ragg/rules/runtime.rkt b/beautiful-racket-ragg/br/ragg/rules/runtime.rkt new file mode 100755 index 0000000..45ec14a --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/rules/runtime.rkt @@ -0,0 +1,34 @@ +#lang racket/base +(require parser-tools/lex) + +(provide (all-defined-out)) + +;; During parsing, we should define the source of the input. +(define current-source (make-parameter #f)) + + +;; When bad things happen, we need to emit errors with source location. +(struct exn:fail:parse-grammar exn:fail (srclocs) + #:transparent + #:property prop:exn:srclocs (lambda (instance) + (exn:fail:parse-grammar-srclocs instance))) + +(define current-parser-error-handler + (make-parameter + (lambda (tok-ok? tok-name tok-value start-pos end-pos) + (raise (exn:fail:parse-grammar + (format "Error while parsing grammar near: ~e [line=~a, column~a, position=~a]" + tok-value + (position-line start-pos) + (position-col start-pos) + (position-offset start-pos)) + (current-continuation-marks) + (list (srcloc (current-source) + (position-line start-pos) + (position-col start-pos) + (position-offset start-pos) + (if (and (number? (position-offset end-pos)) + (number? (position-offset start-pos))) + (- (position-offset end-pos) + (position-offset start-pos)) + #f)))))))) diff --git a/beautiful-racket-ragg/br/ragg/rules/stx-types.rkt b/beautiful-racket-ragg/br/ragg/rules/stx-types.rkt new file mode 100755 index 0000000..e0ac70a --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/rules/stx-types.rkt @@ -0,0 +1,16 @@ +#lang racket/base +(provide (all-defined-out)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; These are just here to provide bindings for Check Syntax. +;; Otherwise, we should never hit these, as the toplevel rules-codegen +;; should eliminate all uses of these if it does the right thing. +(define (rules stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (rule stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (id stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (lit stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (token stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (choice stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (repeat stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (maybe stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (seq stx) (raise-syntax-error #f "Used out of context of rules" stx)) \ No newline at end of file diff --git a/beautiful-racket-ragg/br/ragg/rules/stx.rkt b/beautiful-racket-ragg/br/ragg/rules/stx.rkt new file mode 100755 index 0000000..013fd50 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/rules/stx.rkt @@ -0,0 +1,76 @@ +#lang racket/base + +(require "rule-structs.rkt" + parser-tools/lex + racket/match + syntax/strip-context) + +(provide rules->stx) + +;; Given a sequence of rules, we translate these to syntax objects. + +;; rules->stx: (listof rule) -> syntax +(define (rules->stx source rules #:original-stx [original-stx #f]) + (define rule-stxs + (map (lambda (stx) (rule->stx source stx)) + rules)) + (datum->syntax #f + `(rules ,@rule-stxs) + original-stx)) + + +(define (rule->stx source a-rule) + (define id-stx + (datum->syntax #f + (string->symbol (lhs-id-val (rule-lhs a-rule))) + (list source + (pos-line (lhs-id-start (rule-lhs a-rule))) + (pos-col (lhs-id-start (rule-lhs a-rule))) + (pos-offset (lhs-id-start (rule-lhs a-rule))) + (if (and (number? (pos-offset (lhs-id-start (rule-lhs a-rule)))) + (number? (pos-offset (lhs-id-end (rule-lhs a-rule))))) + (- (pos-offset (lhs-id-end (rule-lhs a-rule))) + (pos-offset (lhs-id-start (rule-lhs a-rule)))) + #f)))) + (define pattern-stx (pattern->stx source (rule-pattern a-rule))) + (define line (pos-line (rule-start a-rule))) + (define column (pos-col (rule-start a-rule))) + (define position (pos-offset (rule-start a-rule))) + (define span (if (and (number? (pos-offset (rule-start a-rule))) + (number? (pos-offset (rule-end a-rule)))) + (- (pos-offset (rule-end a-rule)) + (pos-offset (rule-start a-rule))) + #f)) + (datum->syntax #f + `(rule ,id-stx ,pattern-stx) + (list source line column position span))) + +(define (pattern->stx source a-pattern) + (define recur (lambda (s) (pattern->stx source s))) + + (define line (pos-line (pattern-start a-pattern))) + (define column (pos-col (pattern-start a-pattern))) + (define position (pos-offset (pattern-start a-pattern))) + (define span (if (and (number? (pos-offset (pattern-start a-pattern))) + (number? (pos-offset (pattern-end a-pattern)))) + (- (pos-offset (pattern-end a-pattern)) + (pos-offset (pattern-start a-pattern))) + #f)) + (define source-location (list source line column position span)) + (datum->syntax #f + (match a-pattern + [(struct pattern-id (start end val)) + `(id ,(datum->syntax #f (string->symbol val) source-location))] + [(struct pattern-lit (start end val)) + `(lit ,(datum->syntax #f val source-location))] + [(struct pattern-token (start end val)) + `(token ,(datum->syntax #f (string->symbol val) source-location))] + [(struct pattern-choice (start end vals)) + `(choice ,@(map recur vals))] + [(struct pattern-repeat (start end min val)) + `(repeat ,min ,(recur val))] + [(struct pattern-maybe (start end val)) + `(maybe ,(recur val))] + [(struct pattern-seq (start end vals)) + `(seq ,@(map recur vals))]) + source-location)) diff --git a/beautiful-racket-ragg/br/ragg/support.rkt b/beautiful-racket-ragg/br/ragg/support.rkt new file mode 100755 index 0000000..e63d868 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/support.rkt @@ -0,0 +1,37 @@ +#lang racket/base + +(provide [struct-out token-struct] + token + [struct-out exn:fail:parsing]) + +(struct token-struct (type val offset line column span skip?) + #:transparent) + + +;; Token constructor. +;; This is intended to be a general token structure constructor that's nice +;; to work with. +;; It should cooperate with the tokenizers constructed with make-permissive-tokenizer. +(define token + (lambda (type ;; (U symbol string) + [val #f] ;; any + #:offset [offset #f] ;; (U #f number) + #:line [line #f] ;; (U #f number) + #:column [column #f] ;; (U #f number) + #:span [span #f] ;; boolean + #:skip? [skip? #f]) + (token-struct (if (string? type) (string->symbol type) type) + val + offset line column span skip?))) + + +;; When bad things happen, we need to emit errors with source location. +(struct exn:fail:parsing exn:fail (srclocs) + #:transparent + #:property prop:exn:srclocs (lambda (instance) + (exn:fail:parsing-srclocs instance))) + + + + + diff --git a/beautiful-racket-ragg/br/ragg/test/exercise-python-grammar.rkt b/beautiful-racket-ragg/br/ragg/test/exercise-python-grammar.rkt new file mode 100755 index 0000000..361c2ba --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/exercise-python-grammar.rkt @@ -0,0 +1,136 @@ +#lang racket +(require br/ragg/examples/python-grammar + br/ragg/support + python-tokenizer + racket/generator + parser-tools/lex + racket/match + rackunit) + + + +(define (kludge-nl-dedent-endmarker toks) + ;; Kludge! If the last three tokens in the stream are: + ;; NL, DEDENT, ENDMARKER, + ;; then switch them around to: + ;; DEDENT, NEWLINE, ENDMARKER + ;; The Python tokenizer is doing something funny here, and I think + ;; it's a bug in tokenize.py (and, transitively, the python-tokenizer + ;; PLaneT package). + (cond [(< (length toks) 3) + toks] + [else + (define last-three-toks (take-right toks 3)) + (match last-three-toks + [(list (list 'NL nl-text start-loc end-loc rest-str) + (and t2 (list 'DEDENT _ ...)) + (and t3 (list 'ENDMARKER _ ...))) + (append (drop-right toks 3) + (list t2 + (list 'NEWLINE nl-text start-loc end-loc rest-str) + t3))] + [else + toks])])) + +(define (adapt-python-tokenizer ip #:end-marker-to-eof? [end-marker-to-eof? #f]) + (define generated-tokens (kludge-nl-dedent-endmarker + (sequence->list (generate-tokens ip)))) + (define tokens (sequence->generator generated-tokens)) + (lambda () + (let loop () + (define next-token (tokens)) + (match next-token + [(list type text (list start-line start-col) (list end-line end-col) rest-string) + ;; FIXME: improve the Python tokenizer to hold offsets too. + (define start-pos (position #f start-line start-col)) + (define end-pos (position #f end-line end-col)) + (cond + [(eq? type 'NL) + ;; Skip over NL tokens: they are meant to represent the continuation + ;; of a logical line. + (loop)] + [else + (position-token (case type + [(NAME) + (cond [(set-member? all-token-types (string->symbol text)) + (token (string->symbol text) text)] + [else + (token 'NAME text)])] + [(OP) + (token (string->symbol text) text)] + [(NUMBER) + (token 'NUMBER text)] + [(STRING) + (token 'STRING text)] + [(COMMENT) + (token 'WHITESPACE #:skip? #t)] + [(NEWLINE) + (token 'NEWLINE text)] + [(DEDENT) + (token 'DEDENT text)] + [(INDENT) + (token 'INDENT text)] + [(ERRORTOKEN) + (error 'uh-oh)] + [(ENDMARKER) + (if end-marker-to-eof? + (token eof) + (token 'ENDMARKER text))]) + start-pos + end-pos)])] + [(? void) + (token eof)])))) + + +(define sample-tokens (adapt-python-tokenizer + (open-input-string #<datum (parse "hello.py" sample-tokens))) + + + + +(define parse-expr (make-rule-parser expr)) + + +(check-equal? + (syntax->datum (parse-expr + (adapt-python-tokenizer (open-input-string "42") + #:end-marker-to-eof? #t))) + '(expr (xor_expr (and_expr (shift_expr (arith_expr (term (factor (power (atom "42")))))))))) + + + +(check-equal? + (syntax->datum (parse-expr + (adapt-python-tokenizer (open-input-string "(lambda x,y: y,x)") + #:end-marker-to-eof? #t))) + '(expr (xor_expr (and_expr (shift_expr (arith_expr (term (factor (power (atom "(" (testlist_comp (test (lambdef "lambda" (varargslist (fpdef "x") "," (fpdef "y")) ":" (test (or_test (and_test (not_test (comparison (expr (xor_expr (and_expr (shift_expr (arith_expr (term (factor (power (atom "y")))))))))))))))) "," (test (or_test (and_test (not_test (comparison (expr (xor_expr (and_expr (shift_expr (arith_expr (term (factor (power (atom "x"))))))))))))))) ")")))))))))) + + +(check-equal? + (syntax->datum (parse-expr + (adapt-python-tokenizer (open-input-string "sqrt(x^2+y^2)") + #:end-marker-to-eof? #t))) + '(expr (xor_expr (and_expr (shift_expr (arith_expr (term (factor (power (atom "sqrt") (trailer "(" (arglist (argument (test (or_test (and_test (not_test (comparison (expr (xor_expr (and_expr (shift_expr (arith_expr (term (factor (power (atom "x"))))))) "^" (and_expr (shift_expr (arith_expr (term (factor (power (atom "2")))) "+" (term (factor (power (atom "y"))))))) "^" (and_expr (shift_expr (arith_expr (term (factor (power (atom "2")))))))))))))))) ")")))))))))) + + + +(define parse-single-input (make-rule-parser single_input)) +(check-equal? + (syntax->datum + (parse-single-input + (adapt-python-tokenizer (open-input-string "def f(x):\n return x*x\n\n") + #:end-marker-to-eof? #t))) + '(single_input + (compound_stmt + (funcdef "def" "f" (parameters "(" (varargslist (fpdef "x")) ")") ":" (suite "\n" " " (stmt (simple_stmt (small_stmt (flow_stmt (return_stmt "return" (testlist (test (or_test (and_test (not_test (comparison (expr (xor_expr (and_expr (shift_expr (arith_expr (term (factor (power (atom "x"))) "*" (factor (power (atom "x")))))))))))))))))) "\n")) ""))) "\n")) diff --git a/beautiful-racket-ragg/br/ragg/test/test-01-equal.rkt b/beautiful-racket-ragg/br/ragg/test/test-01-equal.rkt new file mode 100755 index 0000000..d6284e7 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/test-01-equal.rkt @@ -0,0 +1,30 @@ +#lang racket/base + +(require br/ragg/examples/01-equal + rackunit) + +(check-equal? (syntax->datum (parse "")) + '(equal)) +(check-equal? (syntax->datum (parse "01")) + '(equal (zero (equal) #\0) + (one (equal) #\1))) +(check-equal? (syntax->datum (parse "10")) + '(equal (one (equal) #\1) + (zero (equal) #\0))) +(check-equal? (syntax->datum (parse "0011")) + '(equal (zero (equal) #\0) + (one (equal (zero (equal) #\0) + (one (equal) #\1)) + #\1))) +(check-equal? (syntax->datum (parse "0110")) + '(equal (one (equal (zero (equal) #\0) + (one (equal) #\1)) + #\1) + (zero (equal) #\0))) + +(check-equal? (syntax->datum (parse "1100")) + '(equal (one (equal) #\1) + (zero (equal (one (equal) #\1) + (zero (equal) #\0)) + #\0))) + diff --git a/beautiful-racket-ragg/br/ragg/test/test-0n1.rkt b/beautiful-racket-ragg/br/ragg/test/test-0n1.rkt new file mode 100755 index 0000000..520d5b9 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/test-0n1.rkt @@ -0,0 +1,50 @@ +#lang racket/base + +(require br/ragg/examples/0n1 + br/ragg/support + rackunit) + +(define (lex ip) + (port-count-lines! ip) + (lambda () + (define next-char (read-char ip)) + (cond [(eof-object? next-char) + (token eof)] + [(char=? next-char #\0) + (token "0" "0")] + [(char=? next-char #\1) + (token "1" "1")]))) + + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "1")))) + '(rule "1")) + + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "01")))) + '(rule "0" "1")) + + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "001")))) + '(rule "0" "0" "1")) + + +(check-exn exn:fail:parsing? + (lambda () + (parse #f (lex (open-input-string "0"))))) + +(check-exn exn:fail:parsing? + (lambda () + (parse #f (lex (open-input-string "10"))))) + +(check-exn exn:fail:parsing? + (lambda () + (parse #f (lex (open-input-string "010"))))) + + +;; This should fail predictably because we're passing in tokens +;; that the parser doesn't know. +(check-exn exn:fail:parsing? + (lambda () (parse '("zero" "one" "zero")))) +(check-exn (regexp (regexp-quote + "Encountered unexpected token \"zero\" (\"zero\") while parsing")) + (lambda () (parse '("zero" "one" "zero")))) diff --git a/beautiful-racket-ragg/br/ragg/test/test-0n1n.rkt b/beautiful-racket-ragg/br/ragg/test/test-0n1n.rkt new file mode 100755 index 0000000..5d6e9b1 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/test-0n1n.rkt @@ -0,0 +1,49 @@ +#lang racket/base +(require br/ragg/examples/0n1n + br/ragg/support + rackunit) + +(define (lex ip) + (port-count-lines! ip) + (lambda () + (define next-char (read-char ip)) + (cond [(eof-object? next-char) + (token eof)] + [(char=? next-char #\0) + (token "0" "0")] + [(char=? next-char #\1) + (token "1" "1")]))) + + +;; The only rule in the grammar is: +;; +;; rule-0n1n: ["0" rule-0n1n "1"] +;; +;; It makes use of the "maybe" pattern. The result type of the +;; grammar rule is: +;; +;; rule-0n1n: (U #f +;; (list "0" rule-0n1n "1")) + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "0011")))) + '(rule-0n1n "0" (rule-0n1n "0" (rule-0n1n) "1") "1")) + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "01")))) + '(rule-0n1n "0" (rule-0n1n) "1")) + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "")))) + '(rule-0n1n)) + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "000111")))) + '(rule-0n1n "0" (rule-0n1n "0" (rule-0n1n "0" (rule-0n1n) "1") "1") "1")) + + + +(check-exn exn:fail:parsing? + (lambda () (parse #f (lex (open-input-string "0001111"))))) + +(check-exn exn:fail:parsing? + (lambda () (parse #f (lex (open-input-string "0001110"))))) + +(check-exn exn:fail:parsing? + (lambda () (parse #f (lex (open-input-string "10001110"))))) diff --git a/beautiful-racket-ragg/br/ragg/test/test-all.rkt b/beautiful-racket-ragg/br/ragg/test/test-all.rkt new file mode 100755 index 0000000..b989dbb --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/test-all.rkt @@ -0,0 +1,18 @@ +#lang racket/base + + +(require "test-0n1.rkt" + "test-0n1n.rkt" + "test-01-equal.rkt" + "test-simple-arithmetic-grammar.rkt" + "test-baby-json.rkt" + "test-wordy.rkt" + "test-simple-line-drawing.rkt" + "test-flatten.rkt" + "test-lexer.rkt" + "test-parser.rkt" + "exercise-python-grammar.rkt" + "test-errors.rkt" + "test-old-token.rkt" + "test-weird-grammar.rkt" + (submod br/ragg/codegen/satisfaction test)) diff --git a/beautiful-racket-ragg/br/ragg/test/test-baby-json.rkt b/beautiful-racket-ragg/br/ragg/test/test-baby-json.rkt new file mode 100755 index 0000000..b9c97f9 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/test-baby-json.rkt @@ -0,0 +1,25 @@ +#lang racket/base +(require br/ragg/examples/baby-json + br/ragg/support + rackunit) + +(check-equal? + (syntax->datum + (parse (list "{" + (token 'ID "message") + ":" + (token 'STRING "'hello world'") + "}"))) + '(json (object "{" + (kvpair "message" ":" (json (string "'hello world'"))) + "}"))) + + +(check-equal? + (syntax->datum + (parse "[[[{}]],[],[[{}]]]")) + '(json (array #\[ (json (array #\[ (json (array #\[ (json (object #\{ #\})) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json (object #\{ #\})) #\])) #\])) #\]))) + + + + diff --git a/beautiful-racket-ragg/br/ragg/test/test-errors.rkt b/beautiful-racket-ragg/br/ragg/test/test-errors.rkt new file mode 100755 index 0000000..d3ea23e --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/test-errors.rkt @@ -0,0 +1,134 @@ +#lang racket/base + +(require rackunit + (for-syntax racket/base)) + +;; The tests in this module make sure we produce proper error messages +;; on weird grammars. + + +(define-namespace-anchor anchor) +(define ns (namespace-anchor->namespace anchor)) +(define (c prog) + (parameterize ([current-namespace ns] + [read-accept-reader #t]) + (define ip (open-input-string prog)) + (port-count-lines! ip) + (compile (read-syntax #f ip)))) + + +;; Helper to let me quickly write compile-error checks. +(define-syntax (check-compile-error stx) + (syntax-case stx () + [(_ prog expected-msg) + (quasisyntax/loc stx + (begin #,(syntax/loc stx + (check-exn (regexp (regexp-quote expected-msg)) + (lambda () + (c prog)))) + #,(syntax/loc stx + (check-exn exn:fail:syntax? + (lambda () + (c prog))))))])) + + + + + +(check-compile-error "#lang br/ragg" + "The grammar does not appear to have any rules") + +(check-compile-error "#lang br/ragg\nfoo" + "Error while parsing grammar near: foo [line=2, column=0, position=12]") + +(check-compile-error "#lang br/ragg\nnumber : 42" + "Error while parsing grammar near: 42 [line=2, column=9, position=21]") + +(check-compile-error "#lang br/ragg\nnumber : 1" + "Error while parsing grammar near: 1 [line=2, column=9, position=21]") + + + +(check-compile-error "#lang br/ragg\n x: NUMBER\nx:STRING" + "Rule x has a duplicate definition") + +;; Check to see that missing definitions for rules also raise good syntax +;; errors: + +(check-compile-error "#lang br/ragg\nx:y" + "Rule y has no definition") + +(check-compile-error "#lang br/ragg\nnumber : 1flarbl" + "Rule 1flarbl has no definition") + + + + +(check-compile-error "#lang br/ragg\nprogram: EOF" + "Token EOF is reserved and can not be used in a grammar") + + + +;; Nontermination checks: +(check-compile-error "#lang br/ragg\nx : x" + "Rule x has no finite derivation") + + + +(check-compile-error #<symbol (format "r~a" n))))) + + +;; Simple literals +(check-equal? (map syntax->datum (flatten-rule #'(rule expr (lit "hello")))) + '((prim-rule lit expr [(lit "hello")]))) + +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr + (seq (lit "hello") + (lit "world"))))) + '((prim-rule seq expr [(lit "hello") (lit "world")]))) + + +(check-equal? (map syntax->datum (flatten-rule #'(rule expr (token HELLO)))) + '((prim-rule token expr [(token HELLO)]))) + +(check-equal? (map syntax->datum (flatten-rule #'(rule expr (id rule-2)))) + '((prim-rule id expr [(id rule-2)]))) + + +;; Sequences of primitives +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (seq (lit "1") (seq (lit "2") (lit "3")))))) + '((prim-rule seq expr + [(lit "1") (lit "2") (lit "3")]))) + +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (seq (seq (lit "1") (lit "2")) (lit "3"))))) + '((prim-rule seq expr + [(lit "1") (lit "2") (lit "3")]))) + + +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (seq (seq (lit "1")) (seq (lit "2") (lit "3")))))) + '((prim-rule seq expr + [(lit "1") (lit "2") (lit "3")]))) + + + +;; choices +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (choice (id rule-2) (id rule-3))))) + '((prim-rule choice expr + [(id rule-2)] + [(id rule-3)]))) + +(check-equal? (map syntax->datum + (flatten-rule #'(rule sexp (choice (seq (lit "(") (lit ")")) + (seq))) + #:fresh-name (make-fresh-name))) + '((prim-rule choice sexp + [(lit "(") (lit ")")] []))) + +(check-equal? (map syntax->datum + (flatten-rule #'(rule sexp (choice (seq (seq (lit "(") (token BLAH)) + (lit ")")) + (seq))) + #:fresh-name (make-fresh-name))) + '((prim-rule choice sexp + [(lit "(") (token BLAH) (lit ")")] []))) + + + + +;; maybe +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (maybe (id rule-2))))) + '((prim-rule maybe expr + [(id rule-2)] + []))) +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (maybe (token HUH))))) + '((prim-rule maybe expr + [(token HUH)] + []))) +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (maybe (seq (lit "hello") (lit "world")))))) + '((prim-rule maybe expr + [(lit "hello") (lit "world")] + []))) + + + + +;; repeat +(check-equal? (map syntax->datum + (flatten-rule #'(rule rule-2+ (repeat 0 (id rule-2))))) + '((prim-rule repeat rule-2+ + [(inferred-id rule-2+ repeat) (id rule-2)] + []))) +(check-equal? (map syntax->datum + (flatten-rule #'(rule rule-2+ (repeat 0 (seq (lit "+") (id rule-2)))))) + '((prim-rule repeat rule-2+ + [(inferred-id rule-2+ repeat) (lit "+") (id rule-2)] + []))) + +(check-equal? (map syntax->datum + (flatten-rule #'(rule rule-2+ (repeat 1 (id rule-2))))) + '((prim-rule repeat rule-2+ + [(inferred-id rule-2+ repeat) (id rule-2)] + [(id rule-2)]))) +(check-equal? (map syntax->datum + (flatten-rule #'(rule rule-2+ (repeat 1 (seq (lit "-") (id rule-2)))))) + '((prim-rule repeat rule-2+ + [(inferred-id rule-2+ repeat) (lit "-") (id rule-2)] + [(lit "-") (id rule-2)]))) + + + + + + +;; Mixtures + +;; choice and maybe +(check-equal? (map syntax->datum + (flatten-rule #'(rule sexp (choice (lit "x") + (maybe (lit "y")))) + #:fresh-name (make-fresh-name))) + '((prim-rule choice sexp + [(lit "x")] + [(inferred-id r1 maybe)]) + (inferred-prim-rule maybe r1 + [(lit "y")] + []))) +;; choice, maybe, repeat +(check-equal? (map syntax->datum + (flatten-rule #'(rule sexp (choice (lit "x") + (maybe (repeat 1 (lit "y"))))) + #:fresh-name (make-fresh-name))) + '((prim-rule choice sexp + [(lit "x")] + [(inferred-id r1 maybe)]) + (inferred-prim-rule maybe r1 + [(inferred-id r2 repeat)] + []) + (inferred-prim-rule repeat r2 + [(inferred-id r2 repeat) (lit "y")] + [(lit "y")]))) +;; choice, seq +(check-equal? (map syntax->datum + (flatten-rule #'(rule sexp (choice (seq (lit "x") (lit "y")) + (seq (lit "z") (lit "w")))) + #:fresh-name (make-fresh-name))) + '((prim-rule choice sexp + [(lit "x") (lit "y")] + [(lit "z") (lit "w")]))) + +;; maybe, choice +(check-equal? (map syntax->datum + (flatten-rule #'(rule sexp (maybe (choice (seq (lit "x") (lit "y")) + (seq (lit "z") (lit "w"))))) + #:fresh-name (make-fresh-name))) + '((prim-rule maybe sexp + [(inferred-id r1 choice)] + []) + (inferred-prim-rule choice r1 + [(lit "x") (lit "y")] + [(lit "z") (lit "w")]))) + + +;; seq, repeat +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (seq (id term) (repeat 0 (seq (lit "+") (id term))))) + #:fresh-name (make-fresh-name))) + '((prim-rule seq expr [(id term) (inferred-id r1 repeat)]) + (inferred-prim-rule repeat r1 [(inferred-id r1 repeat) (lit "+") (id term)] []))) + + +;; larger example: simple arithmetic +(check-equal? (map syntax->datum + (flatten-rules (syntax->list + #'((rule expr (seq (id term) (repeat 0 (seq (lit "+") (id term))))) + (rule term (seq (id factor) (repeat 0 (seq (lit "*") (id factor))))) + (rule factor (token INT)))) + #:fresh-name (make-fresh-name))) + + '((prim-rule seq expr [(id term) (inferred-id r1 repeat)]) + (inferred-prim-rule repeat r1 [(inferred-id r1 repeat) (lit "+") (id term)] []) + (prim-rule seq term [(id factor) (inferred-id r2 repeat)]) + (inferred-prim-rule repeat r2 [(inferred-id r2 repeat) (lit "*") (id factor)] []) + (prim-rule token factor [(token INT)]))) diff --git a/beautiful-racket-ragg/br/ragg/test/test-lexer.rkt b/beautiful-racket-ragg/br/ragg/test/test-lexer.rkt new file mode 100755 index 0000000..5da637c --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/test-lexer.rkt @@ -0,0 +1,58 @@ +#lang racket/base +(require br/ragg/rules/lexer + rackunit + parser-tools/lex) + +(define (l s) + (define t (lex/1 (open-input-string s))) + (list (token-name (position-token-token t)) + (token-value (position-token-token t)) + (position-offset (position-token-start-pos t)) + (position-offset (position-token-end-pos t)))) + +;; WARNING: the offsets are not in terms of file positions. So they +;; start counting at 1, not 0. +(check-equal? (l " hi") + '(ID "hi" 2 4)) + +(check-equal? (l " hi") + '(ID "hi" 3 5)) + +(check-equal? (l "hi") + '(ID "hi" 1 3)) + +(check-equal? (l "# foobar\nhi") + '(ID "hi" 10 12)) + +(check-equal? (l "# foobar\rhi") + '(ID "hi" 10 12)) + +(check-equal? (l "# foobar\r\nhi") + '(ID "hi" 11 13)) + +(check-equal? (l "hi:") + '(RULE_HEAD "hi:" 1 4)) + +(check-equal? (l "hi :") + '(RULE_HEAD "hi :" 1 7)) + +(check-equal? (l "|") + '(PIPE "|" 1 2)) + +(check-equal? (l "(") + '(LPAREN "(" 1 2)) + +(check-equal? (l "[") + '(LBRACKET "[" 1 2)) + +(check-equal? (l ")") + '(RPAREN ")" 1 2)) + +(check-equal? (l "]") + '(RBRACKET "]" 1 2)) + +(check-equal? (l "'hello'") + '(LIT "'hello'" 1 8)) + +(check-equal? (l "'he\\'llo'") + '(LIT "'he\\'llo'" 1 10)) diff --git a/beautiful-racket-ragg/br/ragg/test/test-old-token.rkt b/beautiful-racket-ragg/br/ragg/test/test-old-token.rkt new file mode 100755 index 0000000..0a0f6aa --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/test-old-token.rkt @@ -0,0 +1,76 @@ +#lang racket/base + +;; Make sure the old token type also works fine. + +(require br/ragg/examples/simple-line-drawing + br/ragg/support + racket/list + parser-tools/lex + (prefix-in : parser-tools/lex-sre) + rackunit) + +(define-tokens tokens (INTEGER STRING |;| EOF)) + +(define (make-tokenizer ip) + (port-count-lines! ip) + (define lex (lexer-src-pos + [(:+ numeric) + (token-INTEGER (string->number lexeme))] + [upper-case + (token-STRING lexeme)] + ["b" + (token-STRING " ")] + [";" + (|token-;| lexeme)] + [whitespace + (return-without-pos (lex input-port))] + [(eof) + (token-EOF 'eof)])) + (lambda () + (lex ip))) + + + +(define the-parsed-object-stx + (parse (make-tokenizer (open-input-string #<list the-parsed-object-stx)))) + +(check-equal? (syntax->datum the-parsed-object-stx) + '(drawing (rows (repeat 3) (chunk 9 "X") ";") + (rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";") + (rows (repeat 3) (chunk 9 "X") ";"))) + +(define the-parsed-object (syntax->list the-parsed-object-stx)) + +(check-equal? (syntax-line the-parsed-object-stx) 1) +(check-equal? (syntax-column the-parsed-object-stx) 0) +(check-equal? (syntax-position the-parsed-object-stx) 1) +(check-equal? (syntax-span the-parsed-object-stx) 28) + +(check-equal? (length the-parsed-object) 4) + +(check-equal? (syntax->datum (second the-parsed-object)) + '(rows (repeat 3) (chunk 9 "X") ";")) +(check-equal? (syntax-line (list-ref the-parsed-object 1)) 1) + +(check-equal? (syntax->datum (third the-parsed-object)) + '(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";")) +(check-equal? (syntax-line (list-ref the-parsed-object 2)) 2) + +(check-equal? (syntax->datum (fourth the-parsed-object)) + '(rows (repeat 3) (chunk 9 "X") ";")) +(check-equal? (syntax-line (list-ref the-parsed-object 3)) 3) + +;; FIXME: add tests to make sure location is as we expect. +;; +;; FIXME: handle the EOF issue better. Something in cfg-parser +;; appears to deviate from parser-tools/yacc with regards to the stop +;; token. diff --git a/beautiful-racket-ragg/br/ragg/test/test-parser.rkt b/beautiful-racket-ragg/br/ragg/test/test-parser.rkt new file mode 100755 index 0000000..9d8310e --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/test-parser.rkt @@ -0,0 +1,129 @@ +#lang racket/base + + +(require rackunit + parser-tools/lex + br/ragg/rules/parser + br/ragg/rules/lexer + br/ragg/rules/rule-structs) + + +;; quick-and-dirty helper for pos construction. +(define (p x) + (pos x #f #f)) + + + +;; FIXME: fix the test cases so they work on locations rather than just offsets. +(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'"))) + (list (rule (p 1) (p 15) + (lhs-id (p 1) (p 5) "expr" ) + (pattern-lit (p 8) (p 15) "hello")))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON"))) + (list (rule (p 1) (p 13) + (lhs-id (p 1) (p 5) "expr") + (pattern-token (p 8) (p 13) "COLON")))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON COLON"))) + (list (rule (p 1) (p 19) + (lhs-id (p 1) (p 5) "expr") + (pattern-seq (p 8) (p 19) + (list + (pattern-token (p 8) (p 13) "COLON") + (pattern-token (p 14) (p 19) "COLON")))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'*"))) + (list (rule (p 1) (p 16) + (lhs-id (p 1) (p 5) "expr" ) + (pattern-repeat (p 8) (p 16) + 0 + (pattern-lit (p 8) (p 15) "hello"))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'+"))) + (list (rule (p 1) (p 16) + (lhs-id (p 1) (p 5) "expr" ) + (pattern-repeat (p 8) (p 16) + 1 + (pattern-lit (p 8) (p 15) "hello"))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : ['hello']"))) + (list (rule (p 1) (p 17) + (lhs-id (p 1) (p 5) "expr" ) + (pattern-maybe (p 8) (p 17) + (pattern-lit (p 9) (p 16) "hello"))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH"))) + (list (rule (p 1) (p 20) + (lhs-id (p 1) (p 5) "expr") + (pattern-choice (p 8) (p 20) + (list (pattern-token (p 8) (p 13) "COLON") + (pattern-token (p 16) (p 20) "BLAH")))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH | BAZ expr"))) + (list (rule (p 1) (p 31) + (lhs-id (p 1) (p 5) "expr") + (pattern-choice (p 8) (p 31) + (list (pattern-token (p 8) (p 13) "COLON") + (pattern-token (p 16) (p 20) "BLAH") + (pattern-seq (p 23) (p 31) + (list (pattern-token (p 23) (p 26) "BAZ") + (pattern-id (p 27) (p 31) "expr")))))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two three"))) + (list (rule (p 1) (p 21) + (lhs-id (p 1) (p 5) "expr") + (pattern-seq (p 8) (p 21) (list (pattern-id (p 8) (p 11) "one") + (pattern-id (p 12) (p 15) "two") + (pattern-id (p 16) (p 21) "three")))))) + + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two three)"))) + (list (rule (p 1) (p 23) + (lhs-id (p 1) (p 5) "expr") + (pattern-seq (p 8) (p 23) (list (pattern-id (p 9) (p 12) "one") + (pattern-id (p 13) (p 16) "two") + (pattern-id (p 17) (p 22) "three")))))) + + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two* three"))) + (list (rule (p 1) (p 22) + (lhs-id (p 1) (p 5) "expr") + (pattern-seq (p 8) (p 22) (list (pattern-id (p 8) (p 11) "one") + (pattern-repeat (p 12) (p 16) 0 (pattern-id (p 12) (p 15) "two")) + (pattern-id (p 17) (p 22) "three")))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two+ three"))) + (list (rule (p 1) (p 22) + (lhs-id (p 1) (p 5) "expr") + (pattern-seq (p 8) (p 22) (list (pattern-id (p 8) (p 11) "one") + (pattern-repeat (p 12) (p 16) 1 (pattern-id (p 12) (p 15) "two")) + (pattern-id (p 17) (p 22) "three")))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two)+ three"))) + (list (rule (p 1) (p 24) + (lhs-id (p 1) (p 5) "expr") + (pattern-seq (p 8) (p 24) (list (pattern-repeat (p 8) (p 18) 1 + (pattern-seq (p 8) (p 17) + (list (pattern-id (p 9) (p 12) "one") + (pattern-id (p 13) (p 16) "two")))) + (pattern-id (p 19) (p 24) "three")))))) + + +(check-equal? (grammar-parser (tokenize (open-input-string #<number lexeme))] + [whitespace + (token 'WHITESPACE #:skip? #t)] + ["+" + (token '+ "+")] + ["*" + (token '* "*")] + [(eof) + (token eof)])) + (lambda () + (lex/1 ip))) + + +;; expr : term ('+' term)* +;; term : factor (('*') factor)* +;; factor : INT + +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "42")))) + '(expr (term (factor 42)))) +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3+4")))) + '(expr (term (factor 3)) + "+" + (term (factor 4)))) +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3+4+5")))) + '(expr (term (factor 3)) + "+" + (term (factor 4)) + "+" + (term (factor 5)))) + + +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3*4*5")))) + '(expr (term (factor 3) "*" (factor 4) "*" (factor 5)))) + + +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3*4 + 5*6")))) + '(expr (term (factor 3) "*" (factor 4)) + "+" + (term (factor 5) "*" (factor 6)))) + +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "4*5+6")))) + '(expr (term (factor 4) "*" (factor 5)) + "+" + (term (factor 6)))) + +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "4+5 *6")))) + '(expr (term (factor 4)) + "+" + (term (factor 5) "*" (factor 6)))) + + +(check-exn exn:fail:parsing? + (lambda () (parse #f (tokenize (open-input-string "7+"))))) +(check-exn exn:fail:parsing? + (lambda () (parse #f (tokenize (open-input-string "7+6+"))))) + + +(check-equal? all-token-types + (set '+ '* 'INT)) diff --git a/beautiful-racket-ragg/br/ragg/test/test-simple-line-drawing.rkt b/beautiful-racket-ragg/br/ragg/test/test-simple-line-drawing.rkt new file mode 100755 index 0000000..803222b --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/test-simple-line-drawing.rkt @@ -0,0 +1,72 @@ +#lang racket/base + +(require br/ragg/examples/simple-line-drawing + br/ragg/support + racket/list + parser-tools/lex + (prefix-in : parser-tools/lex-sre) + rackunit) + +(define (make-tokenizer ip) + (port-count-lines! ip) + (define lex (lexer-src-pos + [(:+ numeric) + (token 'INTEGER (string->number lexeme))] + [upper-case + (token 'STRING lexeme)] + ["b" + (token 'STRING " ")] + [";" + (token ";" lexeme)] + [whitespace + (token 'WHITESPACE lexeme #:skip? #t)] + [(eof) + (void)])) + (lambda () + (lex ip))) + + + +(define the-parsed-object-stx + (parse (make-tokenizer (open-input-string #<list the-parsed-object-stx)))) + +(check-equal? (syntax->datum the-parsed-object-stx) + '(drawing (rows (repeat 3) (chunk 9 "X") ";") + (rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";") + (rows (repeat 3) (chunk 9 "X") ";"))) + +(define the-parsed-object (syntax->list the-parsed-object-stx)) + +(check-equal? (syntax-line the-parsed-object-stx) 1) +(check-equal? (syntax-column the-parsed-object-stx) 0) +(check-equal? (syntax-position the-parsed-object-stx) 1) +(check-equal? (syntax-span the-parsed-object-stx) 28) + +(check-equal? (length the-parsed-object) 4) + +(check-equal? (syntax->datum (second the-parsed-object)) + '(rows (repeat 3) (chunk 9 "X") ";")) +(check-equal? (syntax-line (list-ref the-parsed-object 1)) 1) + +(check-equal? (syntax->datum (third the-parsed-object)) + '(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";")) +(check-equal? (syntax-line (list-ref the-parsed-object 2)) 2) + +(check-equal? (syntax->datum (fourth the-parsed-object)) + '(rows (repeat 3) (chunk 9 "X") ";")) +(check-equal? (syntax-line (list-ref the-parsed-object 3)) 3) + +;; FIXME: add tests to make sure location is as we expect. +;; +;; FIXME: handle the EOF issue better. Something in cfg-parser +;; appears to deviate from parser-tools/yacc with regards to the stop +;; token. diff --git a/beautiful-racket-ragg/br/ragg/test/test-weird-grammar.rkt b/beautiful-racket-ragg/br/ragg/test/test-weird-grammar.rkt new file mode 100755 index 0000000..1847feb --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/test-weird-grammar.rkt @@ -0,0 +1,7 @@ +#lang racket/base + +(require "weird-grammar.rkt" + rackunit) + +(check-equal? (syntax->datum (parse '("foo"))) + '(foo "foo")) diff --git a/beautiful-racket-ragg/br/ragg/test/test-wordy.rkt b/beautiful-racket-ragg/br/ragg/test/test-wordy.rkt new file mode 100755 index 0000000..7ec4db3 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/test-wordy.rkt @@ -0,0 +1,18 @@ +#lang racket/base +(require br/ragg/examples/wordy + br/ragg/support + rackunit) + +(check-equal? + (syntax->datum + (parse (list "hello" "world"))) + '(sentence (verb (greeting "hello")) (optional-adjective) (object "world"))) + + + +(check-equal? + (syntax->datum + (parse (list "hola" "frumpy" (token 'WORLD "세계")))) + + '(sentence (verb (greeting "hola")) (optional-adjective "frumpy") (object "세계"))) + diff --git a/beautiful-racket-ragg/br/ragg/test/weird-grammar.rkt b/beautiful-racket-ragg/br/ragg/test/weird-grammar.rkt new file mode 100755 index 0000000..da75f91 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/test/weird-grammar.rkt @@ -0,0 +1,6 @@ +#lang br/ragg + +;; This used to fail when we had the yacc-based backend, but +;; cfg-parser seems to be ok with it. + +foo: "foo" diff --git a/beautiful-racket-ragg/info.rkt b/beautiful-racket-ragg/info.rkt new file mode 100755 index 0000000..45a38e2 --- /dev/null +++ b/beautiful-racket-ragg/info.rkt @@ -0,0 +1,7 @@ +#lang setup/infotab + +(define deps '("base" "parser-tools-lib" "rackunit-lib" "python-tokenizer")) +(define build-deps '("at-exp-lib" "parser-tools-doc" "racket-doc" + "scribble-lib")) +(define collection 'multi) + diff --git a/beautiful-racket/br/demo/basic/#sinewave.bas#2# b/beautiful-racket/br/demo/basic/#sinewave.bas#2# deleted file mode 100644 index 2e87802..0000000 --- a/beautiful-racket/br/demo/basic/#sinewave.bas#2# +++ /dev/null @@ -1,250 +0,0 @@ -#reader(lib"read.ss""wxme")WXME0108 ## -#| - This file uses the GRacket editor format. - Open this file in DrRacket version 6.4.0.15 or later to read it. - - Most likely, it was created by saving a program in DrRacket, - and it probably contains a program with non-text elements - (such as images or comment boxes). - - http://racket-lang.org/ -|# - 22 7 #"wxtext\0" -3 1 6 #"wxtab\0" -1 1 8 #"wximage\0" -2 0 8 #"wxmedia\0" -4 1 34 #"(lib \"syntax-browser.ss\" \"mrlib\")\0" -1 0 16 #"drscheme:number\0" -3 0 44 #"(lib \"number-snip.ss\" \"drscheme\" \"private\")\0" -1 0 36 #"(lib \"comment-snip.ss\" \"framework\")\0" -1 0 93 -( - #"((lib \"collapsed-snipclass.ss\" \"framework\") (lib \"collapsed-sni" - #"pclass-wxme.ss\" \"framework\"))\0" -) 0 0 43 #"(lib \"collapsed-snipclass.ss\" \"framework\")\0" -0 0 19 #"drscheme:sexp-snip\0" -0 0 36 #"(lib \"cache-image-snip.ss\" \"mrlib\")\0" -1 0 68 -( - #"((lib \"image-core.ss\" \"mrlib\") (lib \"image-core-wxme.rkt\" \"mr" - #"lib\"))\0" -) 1 0 29 #"drscheme:bindings-snipclass%\0" -1 0 101 -( - #"((lib \"ellipsis-snip.rkt\" \"drracket\" \"private\") (lib \"ellipsi" - #"s-snip-wxme.rkt\" \"drracket\" \"private\"))\0" -) 2 0 88 -( - #"((lib \"pict-snip.rkt\" \"drracket\" \"private\") (lib \"pict-snip.r" - #"kt\" \"drracket\" \"private\"))\0" -) 0 0 34 #"(lib \"bullet-snip.rkt\" \"browser\")\0" -0 0 25 #"(lib \"matrix.ss\" \"htdp\")\0" -1 0 22 #"drscheme:lambda-snip%\0" -1 0 29 #"drclickable-string-snipclass\0" -0 0 26 #"drracket:spacer-snipclass\0" -0 0 57 -#"(lib \"hrule-snip.rkt\" \"macro-debugger\" \"syntax-browser\")\0" -1 0 1 6 #"wxloc\0" - 0 0 64 0 1 #"\0" -0 75 1 #"\0" -0 12 90 -1 90 -1 3 -1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 255 255 255 1 -1 0 9 -#"Standard\0" -0 75 15 #"Triplicate T3c\0" -0 16 90 -1 90 -1 3 -1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 255 255 255 1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 -1 -1 2 24 -#"framework:default-color\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 255 255 255 -1 -1 2 -1 #"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 150 0 150 0 0 0 -1 -1 2 15 -#"text:ports out\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 34 119 34 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1.0 0 -1 -1 93 -1 -1 -1 0 0 0 0 0 0 0 0 0 1.0 1.0 1.0 255 0 0 0 0 0 -1 --1 2 15 #"text:ports err\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 136 17 17 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 0 0 175 0 0 0 -1 -1 2 17 -#"text:ports value\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 34 34 119 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1.0 0 92 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1.0 1.0 1.0 34 139 34 0 0 0 -1 --1 2 27 #"Matching Parenthesis Style\0" -0 -1 1 #"\0" -1.0 0 92 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1.0 1.0 1.0 34 139 34 0 0 0 -1 --1 2 1 #"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 38 38 128 0 0 0 -1 -1 2 37 -#"framework:syntax-color:scheme:symbol\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 34 34 119 0 0 0 -1 -1 2 38 -#"framework:syntax-color:scheme:keyword\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 34 34 119 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 194 116 31 0 0 0 -1 -1 2 -38 #"framework:syntax-color:scheme:comment\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 119 34 119 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 41 128 38 0 0 0 -1 -1 2 37 -#"framework:syntax-color:scheme:string\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 34 119 34 0 0 0 -1 -1 2 35 -#"framework:syntax-color:scheme:text\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 34 119 34 0 0 0 -1 -1 2 39 -#"framework:syntax-color:scheme:constant\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 34 119 34 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 132 60 36 0 0 0 -1 -1 2 49 -#"framework:syntax-color:scheme:hash-colon-keyword\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 34 119 34 0 0 0 -1 -1 2 42 -#"framework:syntax-color:scheme:parenthesis\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 0 178 178 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 255 0 0 0 0 0 -1 -1 2 36 -#"framework:syntax-color:scheme:error\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 136 17 17 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 -1 -1 2 36 -#"framework:syntax-color:scheme:other\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 34 34 119 0 0 0 -1 -1 2 16 -#"Misspelled Text\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 81 112 203 0 0 0 -1 -1 2 -38 #"drracket:check-syntax:lexically-bound\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 81 112 203 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 178 34 34 0 0 0 -1 -1 2 28 -#"drracket:check-syntax:set!d\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 136 17 17 0 0 0 -1 -1 2 37 -#"drracket:check-syntax:unused-require\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 136 17 17 0 0 0 -1 -1 2 36 -#"drracket:check-syntax:free-variable\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 136 17 17 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 68 0 203 0 0 0 -1 -1 2 31 -#"drracket:check-syntax:imported\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 68 0 203 0 0 0 -1 -1 2 47 -#"drracket:check-syntax:my-obligation-style-pref\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 178 34 34 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 0 116 0 0 0 0 -1 -1 2 50 -#"drracket:check-syntax:their-obligation-style-pref\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 0 116 0 0 0 0 -1 -1 2 48 -#"drracket:check-syntax:unk-obligation-style-pref\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 139 142 28 0 0 0 -1 -1 2 -49 #"drracket:check-syntax:both-obligation-style-pref\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 139 142 28 0 0 0 -1 -1 2 -37 #"plt:module-language:test-coverage-on\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 -1 -1 2 1 -#"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 1 0 0 0 0 0 0 255 165 0 0 0 0 -1 -1 2 38 -#"plt:module-language:test-coverage-off\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 93 -1 -1 0 1 0 0 0 1 0 0 0 0 0 0 255 165 0 0 0 0 -1 -1 4 1 -#"\0" -0 71 1 #"\0" -1.0 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 1.0 1.0 1.0 1.0 1.0 1.0 0 0 0 0 0 0 --1 -1 4 1 #"\0" -0 -1 1 #"\0" -1.0 0 -1 -1 -1 -1 -1 -1 1 0 0 0 0 0 0 0 0 1.0 1.0 1.0 0 0 255 0 0 0 -1 --1 4 1 #"\0" -0 71 1 #"\0" -1.0 0 -1 -1 -1 -1 -1 -1 1 0 0 0 0 0 0 0 0 1.0 1.0 1.0 0 0 255 0 0 0 -1 --1 4 1 #"\0" -0 71 1 #"\0" -1.0 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1.0 1.0 1.0 0 100 0 0 0 0 -1 --1 4 1 #"\0" -0 -1 1 #"\0" -1.0 0 92 -1 -1 -1 -1 -1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 255 255 0 -1 -1 2 -1 #"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1 1 1 200 0 0 0 0 0 -1 -1 0 1 -#"\0" -0 75 1 #"\0" -1.0 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 1.0 1.0 1.0 1.0 1.0 1.0 0 0 0 0 0 0 --1 -1 2 1 #"\0" -0 75 1 #"\0" -1.0 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 1.0 1.0 1.0 1.0 1.0 1.0 0 0 0 0 0 0 --1 -1 2 1 #"\0" -0 75 1 #"\0" -1.0 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 255 255 255 -1 -1 -2 1 #"\0" -0 75 1 #"\0" -1.0 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 0 0 0 34 139 34 255 255 255 -1 --1 2 1 #"\0" -0 -1 1 #"\0" -1.0 0 92 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1.0 1.0 1.0 36 36 140 0 0 0 -1 --1 2 1 #"\0" -0 -1 1 #"\0" -1.0 0 92 -1 -1 -1 -1 -1 0 0 0 0 0 0 1.0 1.0 1.0 1.0 1.0 1.0 0 0 0 0 0 0 --1 -1 2 1 #"\0" -0 -1 1 #"\0" -0 4 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 1.0 1.0 1.0 1.0 1.0 1.0 0 0 0 0 0 0 -1 --1 2 1 #"\0" -0 75 1 #"\0" -1.0 0 92 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 1.0 1.0 1.0 36 36 140 0 0 0 -1 --1 2 1 #"\0" -0 75 1 #"\0" -1.0 0 92 -1 -1 -1 -1 -1 0 0 0 0 0 0 1.0 1.0 1.0 1.0 1.0 1.0 0 0 0 0 0 0 --1 -1 2 1 #"\0" -0 75 1 #"\0" -0 4 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 1.0 1.0 1.0 1.0 1.0 1.0 0 0 0 0 0 0 -1 --1 4 1 #"\0" -0 -1 1 #"\0" -1 0 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 -1 -1 - 0 13 0 28 3 19 #"#lang br/demo/basic" -0 0 24 29 1 #"\n" -0 0 24 29 1 #"\n" -0 0 21 3 2 #"50" -0 0 24 3 1 #" " -0 0 14 3 5 #"PRINT" -0 0 24 3 1 #" " -0 0 14 3 3 #"INT" -0 0 24 3 1 #"(" -0 0 14 3 3 #"RND" -0 0 24 3 1 #"(" -0 0 21 3 2 #"10" -0 0 24 3 2 #"))" -0 0 diff --git a/beautiful-racket/br/demo/basic/parser.rkt b/beautiful-racket/br/demo/basic/parser.rkt index a4e148d..a4d0949 100644 --- a/beautiful-racket/br/demo/basic/parser.rkt +++ b/beautiful-racket/br/demo/basic/parser.rkt @@ -1,4 +1,4 @@ -#lang ragg +#lang br/ragg ;; recursive rules destucture easily in the expander program : [CR]* [line [CR line]*] [CR]* diff --git a/beautiful-racket/br/demo/basic/tokenizer.rkt b/beautiful-racket/br/demo/basic/tokenizer.rkt index deb218b..dd9531b 100644 --- a/beautiful-racket/br/demo/basic/tokenizer.rkt +++ b/beautiful-racket/br/demo/basic/tokenizer.rkt @@ -1,6 +1,6 @@ #lang br (require parser-tools/lex parser-tools/lex-sre - ragg/support + br/ragg/support racket/string) (provide tokenize) diff --git a/beautiful-racket/br/demo/bf/bf-parser.rkt b/beautiful-racket/br/demo/bf/bf-parser.rkt index 82a4777..ae0b593 100644 --- a/beautiful-racket/br/demo/bf/bf-parser.rkt +++ b/beautiful-racket/br/demo/bf/bf-parser.rkt @@ -1,4 +1,4 @@ -#lang ragg +#lang br/ragg bf-program : (op | loop)* op : ">" | "<" | "+" | "-" | "." | "," loop : "[" (op | loop)* "]" \ No newline at end of file diff --git a/beautiful-racket/br/demo/bf/bf-reader.rkt b/beautiful-racket/br/demo/bf/bf-reader.rkt index 3ef187c..7d41d24 100644 --- a/beautiful-racket/br/demo/bf/bf-reader.rkt +++ b/beautiful-racket/br/demo/bf/bf-reader.rkt @@ -1,5 +1,5 @@ #lang br -(require parser-tools/lex ragg/support) +(require parser-tools/lex br/ragg/support) (define (tokenize input-port) (define (next-token) (define get-token diff --git a/beautiful-racket/br/demo/bf/test-tokenizer.rkt b/beautiful-racket/br/demo/bf/test-tokenizer.rkt index fa6f3ff..4676bbf 100644 --- a/beautiful-racket/br/demo/bf/test-tokenizer.rkt +++ b/beautiful-racket/br/demo/bf/test-tokenizer.rkt @@ -1,5 +1,5 @@ #lang br -(require parser-tools/lex ragg/support) +(require parser-tools/lex br/ragg/support) (define+provide (tokenize ip) (define get-token diff --git a/beautiful-racket/br/demo/hdl-tst/parser.rkt b/beautiful-racket/br/demo/hdl-tst/parser.rkt index bb9c9c1..a451596 100644 --- a/beautiful-racket/br/demo/hdl-tst/parser.rkt +++ b/beautiful-racket/br/demo/hdl-tst/parser.rkt @@ -1,4 +1,4 @@ -#lang ragg +#lang br/ragg tst-program : header-expr test-expr* diff --git a/beautiful-racket/br/demo/hdl-tst/tokenizer.rkt b/beautiful-racket/br/demo/hdl-tst/tokenizer.rkt index 6c78f48..6bd2c1c 100644 --- a/beautiful-racket/br/demo/hdl-tst/tokenizer.rkt +++ b/beautiful-racket/br/demo/hdl-tst/tokenizer.rkt @@ -1,6 +1,6 @@ #lang br (require parser-tools/lex parser-tools/lex-sre - ragg/support + br/ragg/support racket/string) (provide tokenize) diff --git a/beautiful-racket/br/demo/hdl/parser.rkt b/beautiful-racket/br/demo/hdl/parser.rkt index 0d920aa..b059104 100644 --- a/beautiful-racket/br/demo/hdl/parser.rkt +++ b/beautiful-racket/br/demo/hdl/parser.rkt @@ -1,4 +1,4 @@ -#lang ragg +#lang br/ragg ;; rule of thumb: use [optional] bits judiciously as they multiply the cases needed for a production rule ;; rule of thumb: for a set of related IDs, put each into the same grammar entity diff --git a/beautiful-racket/br/demo/hdl/tokenizer.rkt b/beautiful-racket/br/demo/hdl/tokenizer.rkt index 9c4a490..bae1da2 100644 --- a/beautiful-racket/br/demo/hdl/tokenizer.rkt +++ b/beautiful-racket/br/demo/hdl/tokenizer.rkt @@ -1,6 +1,6 @@ #lang br (require parser-tools/lex parser-tools/lex-sre - ragg/support + br/ragg/support racket/string) (provide tokenize) diff --git a/beautiful-racket/br/demo/txtadv/parser.rkt b/beautiful-racket/br/demo/txtadv/parser.rkt index 14c576a..9db0a93 100644 --- a/beautiful-racket/br/demo/txtadv/parser.rkt +++ b/beautiful-racket/br/demo/txtadv/parser.rkt @@ -1,4 +1,4 @@ -#lang ragg +#lang br/ragg txtadv-program : verb-section everywhere-section things-section places-section start-section diff --git a/beautiful-racket/br/demo/txtadv/tokenizer.rkt b/beautiful-racket/br/demo/txtadv/tokenizer.rkt index b54b0a3..016d1bf 100644 --- a/beautiful-racket/br/demo/txtadv/tokenizer.rkt +++ b/beautiful-racket/br/demo/txtadv/tokenizer.rkt @@ -1,6 +1,6 @@ #lang br (require parser-tools/lex parser-tools/lex-sre - ragg/support + br/ragg/support racket/string) (provide tokenize) diff --git a/beautiful-racket/info.rkt b/beautiful-racket/info.rkt index 7e46cdc..42bfea9 100644 --- a/beautiful-racket/info.rkt +++ b/beautiful-racket/info.rkt @@ -2,5 +2,5 @@ (define collection 'multi) (define version "0.01") -(define deps '("base" "sugar" "beautiful-racket-lib" "rackunit-lib" "ragg" "parser-tools-lib")) +(define deps '("base" "sugar" "beautiful-racket-lib" "rackunit-lib" "beautiful-racket-ragg" "parser-tools-lib")) (define build-deps '("racket-doc"))