improve tokenizer macros

dev-srcloc
Matthew Butterick 8 years ago
parent ece5a5c290
commit da2bdce15a

@ -0,0 +1,4 @@
#lang br/quicklang
(module reader br/quicklang
(require (submod "jsonic-b/main.rkt" reader))
(provide (all-from-out (submod "jsonic-b/main.rkt" reader))))

@ -1,37 +1,20 @@
#lang br/quicklang #lang br/quicklang
(require json) (require json)
(define-macro (js-module-begin PARSE-TREE) (define-macro (js-module-begin PARSE-TREE)
#'(#%module-begin #'(#%module-begin
(define result-string PARSE-TREE) (define result-string PARSE-TREE)
(when (string->jsexpr result-string) (define validated-jsexpr (string->jsexpr result-string))
(display result-string)))) (display (jsexpr->string validated-jsexpr))))
(provide (rename-out [js-module-begin #%module-begin])) (provide (rename-out [js-module-begin #%module-begin]))
(define-macro (jsonic-program S-EXP-OR-JSON-CHAR ...) (define-macro (jsonic-program S-EXP-OR-JSON-CHAR ...)
#'(string-trim (string-append S-EXP-OR-JSON-CHAR ...))) #'(string-trim (string-append S-EXP-OR-JSON-CHAR ...)))
(provide jsonic-program) (provide jsonic-program)
(define-macro (json-char TOKEN) (define-macro (json-char CHAR-STR) #'CHAR-STR)
#'TOKEN)
(provide json-char) (provide json-char)
(define (list->json x) (define-macro (s-exp SEXP-STR)
(format "[~a]" (string-join (map ->string x) ", "))) (with-pattern ([SEXP-DATUM (format-datum '~a (syntax->datum #'SEXP-STR))])
#'(jsexpr->string SEXP-DATUM)))
(define (hash->json x)
(format "{~a}" (string-join (for/list ([(k v) (in-hash x)])
(format "~a: ~a" (->string k) (->string v))) ", ")))
(define (->string x)
(cond
[(number? x) (number->string x)]
[(string? x) (format "~v" x)]
[(list? x) (list->json x)]
[(hash? x) (hash->json x)]
[else ""]))
(define-macro (s-exp EXP-STRING)
(with-pattern ([EXP-DATUM (format-datum '~a (syntax->datum #'EXP-STRING))])
#'(->string EXP-DATUM)))
(provide s-exp) (provide s-exp)

@ -1,10 +1,12 @@
#lang br/demo/jsonic #lang br/demo/jsonic-b
// a line comment // a line comment
[ [
@$ 'null $@,
@$ #f $@,
@$ (* 6 7) $@, @$ (* 6 7) $@,
@$ "string" $@, @$ "string" $@,
@$ (list "array" "of" "strings") $@, @$ (list "array" "of" "strings") $@,
@$ (hash "key-1" 42 @$ (hash 'key-1 42
"key-2" "value" 'key-2 "value"
"key-3" (hash "subkey" 21)) $@ 'key-3 (hash 'subkey 21)) $@
] ]

@ -1,4 +1,4 @@
#lang brag #lang brag
jsonic-program: (s-exp | json-char)* jsonic-program: (s-exp | json-char)*
s-exp: SEXP s-exp: SEXP-TOK
json-char: CHAR json-char: CHAR-TOK

@ -2,7 +2,7 @@
(require "tokenizer.rkt" "parser.rkt") (require "tokenizer.rkt" "parser.rkt")
(define (read-syntax path port) (define (read-syntax path port)
(define parse-tree (parse path (tokenize port))) (define parse-tree (parse path (tokenize port)))
(define module-datum `(module jsonic-module br/demo/jsonic/expander (define module-datum `(module jsonic-module br/demo/jsonic-b/expander
,parse-tree)) ,parse-tree))
(datum->syntax #f module-datum)) (datum->syntax #f module-datum))
(provide read-syntax) (provide read-syntax)

@ -5,15 +5,10 @@
(define our-lexer (define our-lexer
(lexer (lexer
[(eof) eof] [(eof) eof]
[(:seq "//" (:* (char-complement "\n"))) (next-token)] [(delimited-by "//" "\n") (next-token)]
[(:seq "@$" (complement (:seq any-string "$@" any-string)) "$@") [(delimited-by "@$" "$@")
(let ([trimmed-lexeme (string-trim (string-trim lexeme "$@") "@$")]) (token 'SEXP-TOK (trim-delimiters "@$" lexeme "$@"))]
(token 'SEXP trimmed-lexeme))] [any-char (token 'CHAR-TOK lexeme)]))
[any-char (token 'CHAR lexeme)]))
(our-lexer port)) (our-lexer port))
next-token) next-token)
(provide tokenize) (provide tokenize)
;; (char-complement "\n") means any char but "\n"
;; (complement "\n") means any whole string except "\n"

@ -986,6 +986,15 @@ In addition to the exports shown below, the @racketmodname[brag/lexer-support] m
Repeatedly apply @racket[tokenizer] to @racket[source-string], gathering the resulting tokens into a list. Useful for testing or debugging a tokenizer. Repeatedly apply @racket[tokenizer] to @racket[source-string], gathering the resulting tokens into a list. Useful for testing or debugging a tokenizer.
} }
@defproc[(trim-delimiters [left-delimiter string?]
[str string?]
[right-delimiter string?])
string?]{
Remove @racket[left-delimiter] from the left side of @racket[str], and @racket[right-delimiter] from its right side. Intended as a helper function for @racket[delimited-by].
}
@defform[(:* re ...)]{ @defform[(:* re ...)]{
Repetition of @racket[re] sequence 0 or more times.} Repetition of @racket[re] sequence 0 or more times.}
@ -1045,5 +1054,9 @@ one character.}
Character ranges, matching characters between successive pairs of Character ranges, matching characters between successive pairs of
characters.} characters.}
@defform[(delimited-by open close)]{
A string that is bounded by the @racket[open] and @racket[close] delimiters. Matching is non-greedy (meaning, it stops at the first occurence of @racket[close]). The resulting lexeme includes the delimiters. To remove them, see @racket[trim-delimiters].}
@close-eval[my-eval] @close-eval[my-eval]

@ -1,7 +1,9 @@
#lang racket/base #lang racket/base
(require "support.rkt" (require "support.rkt"
parser-tools/lex parser-tools/lex
(prefix-in : parser-tools/lex-sre)) racket/string
(prefix-in : parser-tools/lex-sre)
(for-syntax racket/base))
(provide (all-from-out "support.rkt") (provide (all-from-out "support.rkt")
(all-from-out parser-tools/lex) (all-from-out parser-tools/lex)
(all-from-out parser-tools/lex-sre) (all-from-out parser-tools/lex-sre)
@ -14,3 +16,12 @@
(define token-producer (tokenize input-port)) (define token-producer (tokenize input-port))
(for/list ([token (in-producer token-producer eof)]) (for/list ([token (in-producer token-producer eof)])
token)) token))
(define (trim-delimiters left lexeme right)
(string-trim (string-trim lexeme left #:right? #f) right #:left? #f))
(define-lex-trans delimited-by
(λ(stx)
(syntax-case stx ()
[(_ OPEN CLOSE)
#'(:seq OPEN (complement (:seq any-string CLOSE any-string)) CLOSE)])))
Loading…
Cancel
Save