From f9a6abdf4291963d73472bd46f82d844e7fd4fef Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Fri, 8 Apr 2016 18:23:27 -0700 Subject: [PATCH] clean up br-bf --- br-bf copy/bf-test-sexp.rkt | 2 ++ br-bf copy/bf-test.rkt | 2 ++ br-bf copy/expander.rkt | 23 +++++++++++++++++++++ br-bf copy/fib.rkt | 12 +++++++++++ br-bf copy/hello-world.rkt | 6 ++++++ br-bf copy/info.rkt | 3 +++ br-bf copy/main.rkt | 12 +++++++++++ br-bf copy/parser-test.rkt | 4 ++++ br-bf copy/parser.rkt | 20 +++++++++++++++++++ br-bf copy/reader.rkt | 10 ++++++++++ br-bf copy/tokenizer.rkt | 15 ++++++++++++++ br-bf/expander.rkt | 26 ++++++++++++++++++++++++ br-bf/hello-world-s-exp.rkt | 21 ------------------- br-bf/main.rkt | 40 ++++++++++++++++++++++++++++--------- br-bf/parser.rkt | 7 +++++++ br-bf/tokenizer.rkt | 30 +++++++++++++++++++++------- br/syntax.rkt | 7 ++++++- 17 files changed, 202 insertions(+), 38 deletions(-) create mode 100644 br-bf copy/bf-test-sexp.rkt create mode 100644 br-bf copy/bf-test.rkt create mode 100644 br-bf copy/expander.rkt create mode 100644 br-bf copy/fib.rkt create mode 100644 br-bf copy/hello-world.rkt create mode 100644 br-bf copy/info.rkt create mode 100644 br-bf copy/main.rkt create mode 100644 br-bf copy/parser-test.rkt create mode 100644 br-bf copy/parser.rkt create mode 100644 br-bf copy/reader.rkt create mode 100644 br-bf copy/tokenizer.rkt create mode 100644 br-bf/expander.rkt delete mode 100644 br-bf/hello-world-s-exp.rkt diff --git a/br-bf copy/bf-test-sexp.rkt b/br-bf copy/bf-test-sexp.rkt new file mode 100644 index 0000000..c161b67 --- /dev/null +++ b/br-bf copy/bf-test-sexp.rkt @@ -0,0 +1,2 @@ +#lang s-exp br-bf/expander +(bf-program (op "+") (op "+") (op "+") (op "+") (op "+") (op "+") (op "+") (loop "[" (op ">") (op "+") (op "+") (op "+") (op "+") (op "+") (op "<") (op "-") "]") (op ">") (op ".")) \ No newline at end of file diff --git a/br-bf copy/bf-test.rkt b/br-bf copy/bf-test.rkt new file mode 100644 index 0000000..2281219 --- /dev/null +++ b/br-bf copy/bf-test.rkt @@ -0,0 +1,2 @@ +#lang br-bf ++++++++[>+++++<-]>. diff --git a/br-bf copy/expander.rkt b/br-bf copy/expander.rkt new file mode 100644 index 0000000..8e8cf24 --- /dev/null +++ b/br-bf copy/expander.rkt @@ -0,0 +1,23 @@ +#lang br +(provide #%module-begin #%top-interaction bf-program op loop) + +(define #'(bf-program ...) + #'(begin ...)) + +(define-cases #'op + [#'(_ ">") #'(move-pointer 1)] + [#'(_ "<") #'(move-pointer -1)] + [#'(_ "+") #'(set-pointer-byte! (add1 (get-pointer-byte)))] + [#'(_ "-") #'(set-pointer-byte! (sub1 (get-pointer-byte)))] + [#'(_ ".") #'(write-byte (get-pointer-byte))] + [#'(_ ",") #'(set-pointer-byte! (read-byte))]) + +(define #'(loop "[" ... "]") + #'(until (zero? (get-pointer-byte)) + ...)) + +(define bf-vector (make-vector 1000 0)) +(define bf-pointer 0) +(define (get-pointer-byte) (vector-ref bf-vector bf-pointer)) +(define (set-pointer-byte! val) (vector-set! bf-vector bf-pointer val)) +(define (move-pointer how-far) (set! bf-pointer (+ bf-pointer how-far))) \ No newline at end of file diff --git a/br-bf copy/fib.rkt b/br-bf copy/fib.rkt new file mode 100644 index 0000000..b1e61dc --- /dev/null +++ b/br-bf copy/fib.rkt @@ -0,0 +1,12 @@ +#lang br-bf ++++++++++++ +>+>>>>++++++++++++++++++++++++++++++++++++++++++++ +>++++++++++++++++++++++++++++++++<<<<<<[>[>>>>>>+> ++<<<<<<<-]>>>>>>>[<<<<<<<+>>>>>>>-]<[>++++++++++[- +<-[>>+>+<<<-]>>>[<<<+>>>-]+<[>[-]<[-]]>[<<[>>>+<<< +-]>>[-]]<<]>>>[>>+>+<<<-]>>>[<<<+>>>-]+<[>[-]<[-]] +>[<<+>>[-]]<<<<<<<]>>>>>[+++++++++++++++++++++++++ ++++++++++++++++++++++++.[-]]++++++++++<[->-<]>++++ +++++++++++++++++++++++++++++++++++++++++++++.[-]<< +<<<<<<<<<<[>>>+>+<<<<-]>>>>[<<<<+>>>>-]<-[>>.>.<<< +[-]]<<[>>+>+<<<-]>>>[<<<+>>>-]<<[<+>-]>[<+>-]<<<-] \ No newline at end of file diff --git a/br-bf copy/hello-world.rkt b/br-bf copy/hello-world.rkt new file mode 100644 index 0000000..b40875c --- /dev/null +++ b/br-bf copy/hello-world.rkt @@ -0,0 +1,6 @@ +#lang br-bf +++++++[>++++++++++++<-]>. +>++++++++++[>++++++++++<-]>+. ++++++++..+++.>++++[>+++++++++++<-]>. +<+++[>----<-]>.<<<<<+++[>+++++<-]>. +>>.+++.------.--------.>>+. \ No newline at end of file diff --git a/br-bf copy/info.rkt b/br-bf copy/info.rkt new file mode 100644 index 0000000..2df8ff5 --- /dev/null +++ b/br-bf copy/info.rkt @@ -0,0 +1,3 @@ +#lang info + +(define compile-omit-paths 'all) \ No newline at end of file diff --git a/br-bf copy/main.rkt b/br-bf copy/main.rkt new file mode 100644 index 0000000..778f492 --- /dev/null +++ b/br-bf copy/main.rkt @@ -0,0 +1,12 @@ +#lang br + +(module reader br + (require "tokenizer.rkt" "parser.rkt" syntax/strip-context) + (provide read-syntax) + (define (read-syntax src-path src-port) + (define parsed-stx (parse src-path (tokenize src-port))) + (define new-ctxt-stx (datum->syntax #f 'new-ctxt)) + (inject-syntax ([#'src-stx (replace-context new-ctxt-stx parsed-stx)]) + #'(module bf-interpreter br-bf/expander + src-stx)))) + \ No newline at end of file diff --git a/br-bf copy/parser-test.rkt b/br-bf copy/parser-test.rkt new file mode 100644 index 0000000..4325329 --- /dev/null +++ b/br-bf copy/parser-test.rkt @@ -0,0 +1,4 @@ +#lang racket +(require "tokenizer.rkt" "parser.rkt" ragg/support) + +(syntax->datum (parse (tokenize (open-input-string "[+-]>")))) \ No newline at end of file diff --git a/br-bf copy/parser.rkt b/br-bf copy/parser.rkt new file mode 100644 index 0000000..ab562b4 --- /dev/null +++ b/br-bf copy/parser.rkt @@ -0,0 +1,20 @@ +#lang ragg +;; use uppercase TOKEN-IDENTIFIERS for classes of tokens +;; too numerous to indicate individually +;; (e.g., numbers, strings) + +bf-program : (op | loop)* +op : ">" | "<" | "+" | "-" | "." | "," +loop : "[" (op | loop)* "]" + + +;; Alternate ways of specifying grammar +;; bf-program : op* +;; op : ">" | "<" | "+" | "-" | "." | "," | loop +;; loop : "[" op* "]" + +;; bf-program : expr* +;; expr : op | loop +;; op : ">" | "<" | "+" | "-" | "." | "," +;; loop : "[" bf-program "]" + diff --git a/br-bf copy/reader.rkt b/br-bf copy/reader.rkt new file mode 100644 index 0000000..b80844d --- /dev/null +++ b/br-bf copy/reader.rkt @@ -0,0 +1,10 @@ +#lang ragg + + : ">" + | "<" + | "+" + | "-" + | "." + | "," + | + : "["*"]" \ No newline at end of file diff --git a/br-bf copy/tokenizer.rkt b/br-bf copy/tokenizer.rkt new file mode 100644 index 0000000..ceb0c83 --- /dev/null +++ b/br-bf copy/tokenizer.rkt @@ -0,0 +1,15 @@ +#lang racket/base +(require parser-tools/lex ragg/support) +(provide tokenize) + +(define (tokenize ip) + (port-count-lines! ip) + + (define lex + (lexer + [(char-set "><-.,+[]") lexeme] + [whitespace (token 'white #:skip? #t)] + [(eof) (void)])) + + (define next-token-func (λ _ (lex ip))) + next-token-func) diff --git a/br-bf/expander.rkt b/br-bf/expander.rkt new file mode 100644 index 0000000..2871c7a --- /dev/null +++ b/br-bf/expander.rkt @@ -0,0 +1,26 @@ +#lang br +(provide (rename-out [bf-module-begin #%module-begin]) + #%top-interaction bf-program op loop) + +(define #'bf-module-begin #'#%module-begin) + +(define #'(bf-program ...) + #'(begin ...)) + +(define-cases #'op + [#'(_ ">") #'(move-pointer 1)] + [#'(_ "<") #'(move-pointer -1)] + [#'(_ "+") #'(set-pointer-byte! (add1 (get-pointer-byte)))] + [#'(_ "-") #'(set-pointer-byte! (sub1 (get-pointer-byte)))] + [#'(_ ".") #'(write-byte (get-pointer-byte))] + [#'(_ ",") #'(set-pointer-byte! (read-byte))]) + +(define #'(loop "[" ... "]") + #'(until (zero? (get-pointer-byte)) + ...)) + +(define bf-vector (make-vector 1000 0)) +(define bf-pointer 0) +(define (get-pointer-byte) (vector-ref bf-vector bf-pointer)) +(define (set-pointer-byte! val) (vector-set! bf-vector bf-pointer val)) +(define (move-pointer how-far) (set! bf-pointer (+ bf-pointer how-far))) \ No newline at end of file diff --git a/br-bf/hello-world-s-exp.rkt b/br-bf/hello-world-s-exp.rkt deleted file mode 100644 index 252d3a5..0000000 --- a/br-bf/hello-world-s-exp.rkt +++ /dev/null @@ -1,21 +0,0 @@ -#lang s-exp br-bf - -(plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus) -(brackets - (greater-than) (plus)(plus)(plus)(plus)(plus) (plus)(plus) - (greater-than) (plus)(plus)(plus)(plus)(plus) (plus)(plus) - (plus)(plus)(plus) (greater-than) (plus)(plus)(plus) - (greater-than) (plus) (less-than)(less-than)(less-than) - (less-than) (minus)) -(greater-than) (plus)(plus) (period) -(greater-than) (plus) (period) -(plus)(plus)(plus)(plus)(plus) (plus)(plus) (period) -(period) (plus)(plus)(plus) (period) -(greater-than) (plus)(plus) (period) -(less-than)(less-than) (plus)(plus)(plus)(plus)(plus) -(plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus) -(period) (greater-than) (period) -(plus)(plus)(plus) (period) -(minus)(minus)(minus)(minus)(minus)(minus)(period) -(minus)(minus)(minus)(minus)(minus)(minus)(minus)(minus) -(period)(greater-than) (plus) (period) (greater-than) (period) diff --git a/br-bf/main.rkt b/br-bf/main.rkt index 10dfa94..74efbbd 100644 --- a/br-bf/main.rkt +++ b/br-bf/main.rkt @@ -1,20 +1,34 @@ #lang br -(provide #%module-begin #%top-interaction bf-program op loop) (module reader br - (require "tokenizer.rkt" "parser.rkt" syntax/strip-context) (provide read-syntax) + (require "tokenizer.rkt" "parser.rkt") (define (read-syntax src-path src-port) - (define src-exprs (list (parse src-path (tokenize src-port)))) - ;; todo: why is `replace-context` necessary ; why does #'here work - (replace-context #'here - (inject-syntax ([#'( ...) src-exprs]) - #'(module bf-interpreter br-bf - ...))))) + (define parsed-syntax (parse src-path (tokenize src-port))) + ;; `strip-context` because `read-syntax` promises + ;; a "clean" syntax object without context + ;; (so later operations can add it) + (strip-context + (inject-syntax ([parsed-syntax]) + #'(module bf-interpreter br-bf + parsed-syntax))))) +(provide (rename-out [bf-module-begin #%module-begin]) + #%top-interaction bf-program op loop) + +;; just relying on br's #%module-begin. +;; Could just as easily pass through that one. +(define #'bf-module-begin #'#%module-begin) + + +;; macros to expand our parse tree into local functions + +;; bf-program doesn't do anything (define #'(bf-program ...) #'(begin ...)) +;; op branches. Note that string & number literals are +;; matched literally in syntax patterns. (define-cases #'op [#'(_ ">") #'(move-pointer 1)] [#'(_ "<") #'(move-pointer -1)] @@ -23,12 +37,20 @@ [#'(_ ".") #'(write-byte (get-pointer-byte))] [#'(_ ",") #'(set-pointer-byte! (read-byte))]) + (define #'(loop "[" ... "]") #'(until (zero? (get-pointer-byte)) ...)) +;; bf implementation + +;; state: one vector, one pointer (define bf-vector (make-vector 1000 0)) (define bf-pointer 0) + +;; gets and sets (define (get-pointer-byte) (vector-ref bf-vector bf-pointer)) (define (set-pointer-byte! val) (vector-set! bf-vector bf-pointer val)) -(define (move-pointer how-far) (set! bf-pointer (+ bf-pointer how-far))) + +;; pointer mover +(define (move-pointer how-far) (set! bf-pointer (+ bf-pointer how-far))) \ No newline at end of file diff --git a/br-bf/parser.rkt b/br-bf/parser.rkt index ab562b4..a5ce3b1 100644 --- a/br-bf/parser.rkt +++ b/br-bf/parser.rkt @@ -3,6 +3,13 @@ ;; too numerous to indicate individually ;; (e.g., numbers, strings) +;; parser imposes structure: +;; takes a flat list of tokens +;; and arranges them into an (often hierarchical / recursive) shape. +;; produces a parse tree, which is like an annotated, structured version of the source code. +;; gives us the parenthesized expressions we need for the expander. + + bf-program : (op | loop)* op : ">" | "<" | "+" | "-" | "." | "," loop : "[" (op | loop)* "]" diff --git a/br-bf/tokenizer.rkt b/br-bf/tokenizer.rkt index ceb0c83..a3602b7 100644 --- a/br-bf/tokenizer.rkt +++ b/br-bf/tokenizer.rkt @@ -1,15 +1,31 @@ #lang racket/base -(require parser-tools/lex ragg/support) +(require parser-tools/lex (prefix-in : parser-tools/lex-sre) ragg/support) (provide tokenize) +;; tokenizer prepares source for parser by +;; 1) identifying tokens, the smallest unit of information +;; 2) throwing away anything irrelevant (whitespace, comments) +;; tokenizer cooperates with the lexer, which is a fancy regular-expression processor + (define (tokenize ip) - (port-count-lines! ip) - - (define lex + (define get-token (lexer [(char-set "><-.,+[]") lexeme] + ;; todo: try adding support for line comments + #;[(:: "#" (:* (complement "\n")) "\n") (token 'comment #:skip? #t)] [whitespace (token 'white #:skip? #t)] - [(eof) (void)])) + [(eof) eof])) + + (define (next-token) (get-token ip)) + + next-token) + +(module+ test + (require rackunit) + (define (test-tokenize str) + (define ip (open-input-string str)) + (define token-producer (tokenize ip)) + (for/list ([token (in-producer token-producer eof)]) + token)) - (define next-token-func (λ _ (lex ip))) - next-token-func) + (check-equal? (test-tokenize "+") (list "+"))) diff --git a/br/syntax.rkt b/br/syntax.rkt index e4b5a42..b94d993 100644 --- a/br/syntax.rkt +++ b/br/syntax.rkt @@ -10,9 +10,14 @@ [pattern body ...] ...)])) (define-syntax (add-syntax stx) + ;; todo: permit mixing of two-arg and one-arg binding forms + ;; one-arg form allows you to inject an existing syntax object using its current name (syntax-case stx (syntax) [(_ ([(syntax sid) sid-stx] ...) body ...) - #'(with-syntax ([sid sid-stx] ...) body ...)])) + #'(with-syntax ([sid sid-stx] ...) body ...)] + ;; todo: limit `sid` to be an identifier + [(_ ([sid] ...) body ...) + #'(with-syntax ([sid sid] ...) body ...)])) (define-syntax syntax-let (make-rename-transformer #'add-syntax))