From f9a6abdf4291963d73472bd46f82d844e7fd4fef Mon Sep 17 00:00:00 2001
From: Matthew Butterick <mb@mbtype.com>
Date: Fri, 8 Apr 2016 18:23:27 -0700
Subject: [PATCH] clean up br-bf

---
 br-bf copy/bf-test-sexp.rkt |  2 ++
 br-bf copy/bf-test.rkt      |  2 ++
 br-bf copy/expander.rkt     | 23 +++++++++++++++++++++
 br-bf copy/fib.rkt          | 12 +++++++++++
 br-bf copy/hello-world.rkt  |  6 ++++++
 br-bf copy/info.rkt         |  3 +++
 br-bf copy/main.rkt         | 12 +++++++++++
 br-bf copy/parser-test.rkt  |  4 ++++
 br-bf copy/parser.rkt       | 20 +++++++++++++++++++
 br-bf copy/reader.rkt       | 10 ++++++++++
 br-bf copy/tokenizer.rkt    | 15 ++++++++++++++
 br-bf/expander.rkt          | 26 ++++++++++++++++++++++++
 br-bf/hello-world-s-exp.rkt | 21 -------------------
 br-bf/main.rkt              | 40 ++++++++++++++++++++++++++++---------
 br-bf/parser.rkt            |  7 +++++++
 br-bf/tokenizer.rkt         | 30 +++++++++++++++++++++-------
 br/syntax.rkt               |  7 ++++++-
 17 files changed, 202 insertions(+), 38 deletions(-)
 create mode 100644 br-bf copy/bf-test-sexp.rkt
 create mode 100644 br-bf copy/bf-test.rkt
 create mode 100644 br-bf copy/expander.rkt
 create mode 100644 br-bf copy/fib.rkt
 create mode 100644 br-bf copy/hello-world.rkt
 create mode 100644 br-bf copy/info.rkt
 create mode 100644 br-bf copy/main.rkt
 create mode 100644 br-bf copy/parser-test.rkt
 create mode 100644 br-bf copy/parser.rkt
 create mode 100644 br-bf copy/reader.rkt
 create mode 100644 br-bf copy/tokenizer.rkt
 create mode 100644 br-bf/expander.rkt
 delete mode 100644 br-bf/hello-world-s-exp.rkt
diff --git a/br-bf copy/bf-test-sexp.rkt b/br-bf copy/bf-test-sexp.rkt
new file mode 100644
index 0000000..c161b67
--- /dev/null
+++ b/br-bf copy/bf-test-sexp.rkt	
@@ -0,0 +1,2 @@
+#lang s-exp br-bf/expander
+(bf-program (op "+") (op "+") (op "+") (op "+") (op "+") (op "+") (op "+") (loop "[" (op ">") (op "+") (op "+") (op "+") (op "+") (op "+") (op "<") (op "-") "]") (op ">") (op "."))
\ No newline at end of file
diff --git a/br-bf copy/bf-test.rkt b/br-bf copy/bf-test.rkt
new file mode 100644
index 0000000..2281219
--- /dev/null
+++ b/br-bf copy/bf-test.rkt	
@@ -0,0 +1,2 @@
+#lang br-bf
++++++++[>+++++<-]>.
diff --git a/br-bf copy/expander.rkt b/br-bf copy/expander.rkt
new file mode 100644
index 0000000..8e8cf24
--- /dev/null
+++ b/br-bf copy/expander.rkt	
@@ -0,0 +1,23 @@
+#lang br
+(provide #%module-begin #%top-interaction bf-program op loop)
+
+(define #'(bf-program <op-or-loop> ...)
+  #'(begin <op-or-loop> ...))
+
+(define-cases #'op
+  [#'(_ ">") #'(move-pointer 1)]
+  [#'(_ "<") #'(move-pointer -1)]
+  [#'(_ "+") #'(set-pointer-byte! (add1 (get-pointer-byte)))]
+  [#'(_ "-") #'(set-pointer-byte! (sub1 (get-pointer-byte)))]
+  [#'(_ ".") #'(write-byte (get-pointer-byte))]
+  [#'(_ ",") #'(set-pointer-byte! (read-byte))])
+
+(define #'(loop "[" <op-or-loop> ... "]")
+  #'(until (zero? (get-pointer-byte))
+           <op-or-loop> ...))
+
+(define bf-vector (make-vector 1000 0))
+(define bf-pointer 0)
+(define (get-pointer-byte) (vector-ref bf-vector bf-pointer))
+(define (set-pointer-byte! val) (vector-set! bf-vector bf-pointer val))
+(define (move-pointer how-far) (set! bf-pointer (+ bf-pointer how-far)))
\ No newline at end of file
diff --git a/br-bf copy/fib.rkt b/br-bf copy/fib.rkt
new file mode 100644
index 0000000..b1e61dc
--- /dev/null
+++ b/br-bf copy/fib.rkt	
@@ -0,0 +1,12 @@
+#lang br-bf
++++++++++++
+>+>>>>++++++++++++++++++++++++++++++++++++++++++++
+>++++++++++++++++++++++++++++++++<<<<<<[>[>>>>>>+>
++<<<<<<<-]>>>>>>>[<<<<<<<+>>>>>>>-]<[>++++++++++[-
+<-[>>+>+<<<-]>>>[<<<+>>>-]+<[>[-]<[-]]>[<<[>>>+<<<
+-]>>[-]]<<]>>>[>>+>+<<<-]>>>[<<<+>>>-]+<[>[-]<[-]]
+>[<<+>>[-]]<<<<<<<]>>>>>[+++++++++++++++++++++++++
++++++++++++++++++++++++.[-]]++++++++++<[->-<]>++++
+++++++++++++++++++++++++++++++++++++++++++++.[-]<<
+<<<<<<<<<<[>>>+>+<<<<-]>>>>[<<<<+>>>>-]<-[>>.>.<<<
+[-]]<<[>>+>+<<<-]>>>[<<<+>>>-]<<[<+>-]>[<+>-]<<<-]
\ No newline at end of file
diff --git a/br-bf copy/hello-world.rkt b/br-bf copy/hello-world.rkt
new file mode 100644
index 0000000..b40875c
--- /dev/null
+++ b/br-bf copy/hello-world.rkt	
@@ -0,0 +1,6 @@
+#lang br-bf
+++++++[>++++++++++++<-]>.
+>++++++++++[>++++++++++<-]>+.
++++++++..+++.>++++[>+++++++++++<-]>.
+<+++[>----<-]>.<<<<<+++[>+++++<-]>.
+>>.+++.------.--------.>>+.
\ No newline at end of file
diff --git a/br-bf copy/info.rkt b/br-bf copy/info.rkt
new file mode 100644
index 0000000..2df8ff5
--- /dev/null
+++ b/br-bf copy/info.rkt	
@@ -0,0 +1,3 @@
+#lang info
+
+(define compile-omit-paths 'all)
\ No newline at end of file
diff --git a/br-bf copy/main.rkt b/br-bf copy/main.rkt
new file mode 100644
index 0000000..778f492
--- /dev/null
+++ b/br-bf copy/main.rkt	
@@ -0,0 +1,12 @@
+#lang br
+
+(module reader br
+  (require "tokenizer.rkt" "parser.rkt" syntax/strip-context)
+  (provide read-syntax)
+  (define (read-syntax src-path src-port)
+    (define parsed-stx (parse src-path (tokenize src-port)))
+    (define new-ctxt-stx (datum->syntax #f 'new-ctxt))
+    (inject-syntax ([#'src-stx (replace-context new-ctxt-stx parsed-stx)])
+                   #'(module bf-interpreter br-bf/expander
+                       src-stx))))
+  
\ No newline at end of file
diff --git a/br-bf copy/parser-test.rkt b/br-bf copy/parser-test.rkt
new file mode 100644
index 0000000..4325329
--- /dev/null
+++ b/br-bf copy/parser-test.rkt	
@@ -0,0 +1,4 @@
+#lang racket
+(require "tokenizer.rkt" "parser.rkt" ragg/support)
+
+(syntax->datum (parse (tokenize (open-input-string "[+-]>"))))
\ No newline at end of file
diff --git a/br-bf copy/parser.rkt b/br-bf copy/parser.rkt
new file mode 100644
index 0000000..ab562b4
--- /dev/null
+++ b/br-bf copy/parser.rkt	
@@ -0,0 +1,20 @@
+#lang ragg
+;; use uppercase TOKEN-IDENTIFIERS for classes of tokens
+;; too numerous to indicate individually
+;; (e.g., numbers, strings)
+
+bf-program : (op | loop)*
+op : ">" | "<" | "+" | "-" | "." | ","
+loop : "[" (op | loop)* "]"
+
+
+;; Alternate ways of specifying grammar
+;; bf-program : op*
+;; op : ">" | "<" | "+" | "-" | "." | "," | loop
+;; loop : "[" op* "]"
+
+;; bf-program : expr*
+;; expr : op | loop
+;; op : ">" | "<" | "+" | "-" | "." | ","
+;; loop : "[" bf-program "]"
+
diff --git a/br-bf copy/reader.rkt b/br-bf copy/reader.rkt
new file mode 100644
index 0000000..b80844d
--- /dev/null
+++ b/br-bf copy/reader.rkt	
@@ -0,0 +1,10 @@
+#lang ragg
+
+<expr> : ">"
+       | "<" 
+       | "+" 
+       | "-" 
+       | "." 
+       | "," 
+       | <loop>
+<loop> : "["<expr>*"]"
\ No newline at end of file
diff --git a/br-bf copy/tokenizer.rkt b/br-bf copy/tokenizer.rkt
new file mode 100644
index 0000000..ceb0c83
--- /dev/null
+++ b/br-bf copy/tokenizer.rkt	
@@ -0,0 +1,15 @@
+#lang racket/base
+(require parser-tools/lex ragg/support)
+(provide tokenize)
+
+(define (tokenize ip)
+  (port-count-lines! ip)
+  
+  (define lex
+    (lexer
+     [(char-set "><-.,+[]") lexeme]
+     [whitespace (token 'white #:skip? #t)]
+     [(eof) (void)]))
+  
+  (define next-token-func (λ _ (lex ip)))
+  next-token-func)
diff --git a/br-bf/expander.rkt b/br-bf/expander.rkt
new file mode 100644
index 0000000..2871c7a
--- /dev/null
+++ b/br-bf/expander.rkt
@@ -0,0 +1,26 @@
+#lang br
+(provide (rename-out [bf-module-begin #%module-begin])
+         #%top-interaction bf-program op loop)
+
+(define #'bf-module-begin #'#%module-begin)
+
+(define #'(bf-program <op-or-loop> ...)
+  #'(begin <op-or-loop> ...))
+
+(define-cases #'op
+  [#'(_ ">") #'(move-pointer 1)]
+  [#'(_ "<") #'(move-pointer -1)]
+  [#'(_ "+") #'(set-pointer-byte! (add1 (get-pointer-byte)))]
+  [#'(_ "-") #'(set-pointer-byte! (sub1 (get-pointer-byte)))]
+  [#'(_ ".") #'(write-byte (get-pointer-byte))]
+  [#'(_ ",") #'(set-pointer-byte! (read-byte))])
+
+(define #'(loop "[" <op-or-loop> ... "]")
+  #'(until (zero? (get-pointer-byte))
+           <op-or-loop> ...))
+
+(define bf-vector (make-vector 1000 0))
+(define bf-pointer 0)
+(define (get-pointer-byte) (vector-ref bf-vector bf-pointer))
+(define (set-pointer-byte! val) (vector-set! bf-vector bf-pointer val))
+(define (move-pointer how-far) (set! bf-pointer (+ bf-pointer how-far)))
\ No newline at end of file
diff --git a/br-bf/hello-world-s-exp.rkt b/br-bf/hello-world-s-exp.rkt
deleted file mode 100644
index 252d3a5..0000000
--- a/br-bf/hello-world-s-exp.rkt
+++ /dev/null
@@ -1,21 +0,0 @@
-#lang s-exp br-bf
- 
-(plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus)
-(brackets
- (greater-than) (plus)(plus)(plus)(plus)(plus) (plus)(plus)
- (greater-than) (plus)(plus)(plus)(plus)(plus) (plus)(plus)
- (plus)(plus)(plus) (greater-than) (plus)(plus)(plus)
- (greater-than) (plus) (less-than)(less-than)(less-than)
- (less-than) (minus))
-(greater-than) (plus)(plus) (period)
-(greater-than) (plus) (period)
-(plus)(plus)(plus)(plus)(plus) (plus)(plus) (period)
-(period) (plus)(plus)(plus) (period)
-(greater-than) (plus)(plus) (period)
-(less-than)(less-than) (plus)(plus)(plus)(plus)(plus)
-(plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus)
-(period) (greater-than) (period)
-(plus)(plus)(plus) (period)
-(minus)(minus)(minus)(minus)(minus)(minus)(period)
-(minus)(minus)(minus)(minus)(minus)(minus)(minus)(minus)
-(period)(greater-than) (plus) (period) (greater-than) (period)
diff --git a/br-bf/main.rkt b/br-bf/main.rkt
index 10dfa94..74efbbd 100644
--- a/br-bf/main.rkt
+++ b/br-bf/main.rkt
@@ -1,20 +1,34 @@
 #lang br
-(provide #%module-begin #%top-interaction bf-program op loop)
 
 (module reader br
-  (require "tokenizer.rkt" "parser.rkt" syntax/strip-context)
   (provide read-syntax)
+  (require "tokenizer.rkt" "parser.rkt")
   (define (read-syntax src-path src-port)
-    (define src-exprs (list (parse src-path (tokenize src-port))))
-    ;; todo: why is `replace-context` necessary ; why does #'here work
-    (replace-context #'here
-                     (inject-syntax ([#'(<src-expr> ...) src-exprs])
-                                    #'(module bf-interpreter br-bf
-                                        <src-expr> ...)))))
+    (define parsed-syntax (parse src-path (tokenize src-port)))
+    ;; `strip-context` because `read-syntax` promises
+    ;; a "clean" syntax object without context
+    ;; (so later operations can add it)
+    (strip-context
+     (inject-syntax ([parsed-syntax])
+                    #'(module bf-interpreter br-bf
+                        parsed-syntax)))))
 
+(provide (rename-out [bf-module-begin #%module-begin])
+         #%top-interaction bf-program op loop)
+
+;; just relying on br's #%module-begin.
+;; Could just as easily pass through that one.
+(define #'bf-module-begin #'#%module-begin) 
+
+
+;; macros to expand our parse tree into local functions
+
+;; bf-program doesn't do anything
 (define #'(bf-program <op-or-loop> ...)
   #'(begin <op-or-loop> ...))
 
+;; op branches. Note that string & number literals are
+;; matched literally in syntax patterns.
 (define-cases #'op
   [#'(_ ">") #'(move-pointer 1)]
   [#'(_ "<") #'(move-pointer -1)]
@@ -23,12 +37,20 @@
   [#'(_ ".") #'(write-byte (get-pointer-byte))]
   [#'(_ ",") #'(set-pointer-byte! (read-byte))])
 
+
 (define #'(loop "[" <op-or-loop> ... "]")
   #'(until (zero? (get-pointer-byte))
            <op-or-loop> ...))
 
+;; bf implementation
+
+;; state: one vector, one pointer
 (define bf-vector (make-vector 1000 0))
 (define bf-pointer 0)
+
+;; gets and sets
 (define (get-pointer-byte) (vector-ref bf-vector bf-pointer))
 (define (set-pointer-byte! val) (vector-set! bf-vector bf-pointer val))
-(define (move-pointer how-far) (set! bf-pointer (+ bf-pointer how-far)))
+
+;; pointer mover
+(define (move-pointer how-far) (set! bf-pointer (+ bf-pointer how-far)))
\ No newline at end of file
diff --git a/br-bf/parser.rkt b/br-bf/parser.rkt
index ab562b4..a5ce3b1 100644
--- a/br-bf/parser.rkt
+++ b/br-bf/parser.rkt
@@ -3,6 +3,13 @@
 ;; too numerous to indicate individually
 ;; (e.g., numbers, strings)
 
+;; parser imposes structure:
+;; takes a flat list of tokens
+;; and arranges them into an (often hierarchical / recursive) shape.
+;; produces a parse tree, which is like an annotated, structured version of the source code.
+;; gives us the parenthesized expressions we need for the expander.
+
+
 bf-program : (op | loop)*
 op : ">" | "<" | "+" | "-" | "." | ","
 loop : "[" (op | loop)* "]"
diff --git a/br-bf/tokenizer.rkt b/br-bf/tokenizer.rkt
index ceb0c83..a3602b7 100644
--- a/br-bf/tokenizer.rkt
+++ b/br-bf/tokenizer.rkt
@@ -1,15 +1,31 @@
 #lang racket/base
-(require parser-tools/lex ragg/support)
+(require parser-tools/lex (prefix-in : parser-tools/lex-sre) ragg/support)
 (provide tokenize)
 
+;; tokenizer prepares source for parser by
+;; 1) identifying tokens, the smallest unit of information
+;; 2) throwing away anything irrelevant (whitespace, comments)
+;; tokenizer cooperates with the lexer, which is a fancy regular-expression processor
+
 (define (tokenize ip)
-  (port-count-lines! ip)
-  
-  (define lex
+  (define get-token
     (lexer
      [(char-set "><-.,+[]") lexeme]
+     ;; todo: try adding support for line comments
+     #;[(:: "#" (:* (complement "\n")) "\n") (token 'comment #:skip? #t)]
      [whitespace (token 'white #:skip? #t)]
-     [(eof) (void)]))
+     [(eof) eof]))
+  
+  (define (next-token) (get-token ip))
+  
+  next-token)
+
+(module+ test
+  (require rackunit)
+  (define (test-tokenize str)
+    (define ip (open-input-string str))
+    (define token-producer (tokenize ip))
+    (for/list ([token (in-producer token-producer eof)])
+              token))
   
-  (define next-token-func (λ _ (lex ip)))
-  next-token-func)
+  (check-equal? (test-tokenize "+") (list "+")))
diff --git a/br/syntax.rkt b/br/syntax.rkt
index e4b5a42..b94d993 100644
--- a/br/syntax.rkt
+++ b/br/syntax.rkt
@@ -10,9 +10,14 @@
          [pattern body ...] ...)]))
 
 (define-syntax (add-syntax stx)
+  ;; todo: permit mixing of two-arg and one-arg binding forms
+  ;; one-arg form allows you to inject an existing syntax object using its current name
   (syntax-case stx (syntax)
     [(_ ([(syntax sid) sid-stx] ...) body ...)
-     #'(with-syntax ([sid sid-stx] ...) body ...)]))
+     #'(with-syntax ([sid sid-stx] ...) body ...)]
+    ;; todo: limit `sid` to be an identifier
+    [(_ ([sid] ...) body ...)
+     #'(with-syntax ([sid sid] ...) body ...)]))
 
 (define-syntax syntax-let (make-rename-transformer #'add-syntax))