You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
beautiful-racket/br-bf/tokenizer.rkt

34 lines
1.1 KiB
Racket

9 years ago
#lang racket/base
9 years ago
(require parser-tools/lex (prefix-in : parser-tools/lex-sre) ragg/support)
9 years ago
(provide tokenize)
9 years ago
9 years ago
;; tokenizer prepares source for parser by
;; 1) identifying tokens, the smallest unit of information
;; 2) throwing away anything irrelevant (whitespace, comments)
;; tokenizer cooperates with the lexer, which is a fancy regular-expression processor
(define (tokenize ip)
9 years ago
(define get-token
9 years ago
(lexer
[(char-set "><-.,+[]") lexeme]
9 years ago
;; todo: try adding support for line comments
#;[(:: "#" (:* (complement "\n")) "\n") (token 'comment #:skip? #t)]
9 years ago
[whitespace (token 'white #:skip? #t)]
9 years ago
;; treat other characters as comments
[(char-range #\nul #\~) (token 'ascii #:skip? #t)]
9 years ago
[(eof) eof]))
(define (next-token) (get-token ip))
next-token)
(module+ test
(require rackunit)
(define (test-tokenize str)
(define ip (open-input-string str))
(define token-producer (tokenize ip))
(for/list ([token (in-producer token-producer eof)])
token))
9 years ago
9 years ago
(check-equal? (test-tokenize "+") (list "+")))