;; This implements the equivalent of mzscheme's read-syntax for R5RS scheme. ;; It has not been thoroughly tested. Also it will read an entire file into a ;; list of syntax objects, instead of returning one syntax object at a time ;; Everything in this module will be read with case sensitivity. #cs (module read mzscheme (require (lib "lex.ss" "parser-tools") (lib "yacc.ss" "parser-tools") (lib "readerr.ss" "syntax")) (define-tokens data (DATUM)) (define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF)) (define scheme-lexer (lexer-src-pos ;; Skip comments, without accumulating extra position information [(: (whitespace) (comment)) (return-without-pos (scheme-lexer input-port))] ["#t" (token-DATUM #t)] ["#f" (token-DATUM #f)] [(@ "#\\" (any)) (token-DATUM (caddr (string->list lexeme)))] ["#\\space" (token-DATUM #\space)] ["#\\newline" (token-DATUM #\newline)] [(: (@ (initial) (* (subsequent))) + - "...") (token-DATUM (string->symbol lexeme))] [#\" (token-DATUM (list->string (get-string-token input-port)))] [#\( 'OP] [#\) 'CP] [#\[ 'OP] [#\] 'CP] ["#(" 'HASHOP] [(num2) (token-DATUM (string->number lexeme 2))] [(num8) (token-DATUM (string->number lexeme 8))] [(num10) (token-DATUM (string->number lexeme 10))] [(num16) (token-DATUM (string->number lexeme 16))] ["'" 'QUOTE] ["`" 'QUASIQUOTE] ["," 'UNQUOTE] [",@" 'UNQUOTE-SPLICING] ["." 'DOT] [(eof) 'EOF])) (define get-string-token (lexer [(^ #\" #\\) (cons (car (string->list lexeme)) (get-string-token input-port))] [(@ #\\ #\\) (cons #\\ (get-string-token input-port))] [(@ #\\ #\") (cons #\" (get-string-token input-port))] [#\" null])) (define-lex-abbrevs [any (- #\000 #\377)] [letter (: (- a z) (- #\A #\Z))] [digit (- #\0 #\9)] [whitespace (: #\newline #\return #\tab #\space #\vtab)] [initial (: (letter) ! $ % & * / : < = > ? ^ _ ~ @)] [subsequent (: (initial) (digit) + - #\. @)] [comment (@ #\; (* (^ #\newline)) #\newline)] ; [numR (@ (prefixR) (complexR))] ; [complexR (: (realR) ; (@ (realR) @ (realR)) ; (@ (realR) + (urealR) i) ; (@ (realR) - (urealR) i) ; (@ (realR) + i) ; (@ (realR) - i) ; (@ + (urealR) i) ; (@ - (urealR) i) ; (@ + i) ; (@ - i))] ; [realR (@ (sign) (urealR))] ; [urealR (: (uintegerR) (@ (uintegerR) / (uintegerR)) (decimalR))] ; [uintegerR (@ (+ (digitR)) (* #\#))] ; [prefixR (: (@ (radixR) (exactness)) ; (@ (exactness) (radixR)))] ; [numR (@ (prefixR) (complexR))] ; [complexR (: (realR) ; (@ (realR) @ (realR)) [num2 (@ (prefix2) (complex2))] [complex2 (: (real2) (@ (real2) @ (real2)) (@ (real2) + (ureal2) i) (@ (real2) - (ureal2) i) (@ (real2) + i) (@ (real2) - i) (@ + (ureal2) i) (@ - (ureal2) i) (@ + i) (@ - i))] [real2 (@ (sign) (ureal2))] [ureal2 (: (uinteger2) (@ (uinteger2) / (uinteger2)))] [uinteger2 (@ (+ (digit2)) (* #\#))] [prefix2 (: (@ (radix2) (exactness)) (@ (exactness) (radix2)))] [radix2 "#b"] [digit2 (: #\0 #\1)] [num8 (@ (prefix8) (complex8))] [complex8 (: (real8) (@ (real8) @ (real8)) (@ (real8) + (ureal8) i) (@ (real8) - (ureal8) i) (@ (real8) + i) (@ (real8) - i) (@ + (ureal8) i) (@ - (ureal8) i) (@ + i) (@ - i))] [real8 (@ (sign) (ureal8))] [ureal8 (: (uinteger8) (@ (uinteger8) / (uinteger8)))] [uinteger8 (@ (+ (digit8)) (* #\#))] [prefix8 (: (@ (radix8) (exactness)) (@ (exactness) (radix8)))] [radix8 "#o"] [digit8 (- #\0 #\7)] [num10 (@ (prefix10) (complex10))] [complex10 (: (real10) (@ (real10) @ (real10)) (@ (real10) + (ureal10) i) (@ (real10) - (ureal10) i) (@ (real10) + i) (@ (real10) - i) (@ + (ureal10) i) (@ - (ureal10) i) (@ + i) (@ - i))] [real10 (@ (sign) (ureal10))] [ureal10 (: (uinteger10) (@ (uinteger10) / (uinteger10)) (decimal10))] [uinteger10 (@ (+ (digit10)) (* #\#))] [prefix10 (: (@ (radix10) (exactness)) (@ (exactness) (radix10)))] [radix10 (: (@) "#d")] [digit10 (digit)] [decimal10 (: (@ (uinteger10) (suffix)) (@ #\. (+ (digit10)) (* #\#) (suffix)) (@ (+ (digit10)) #\. (* (digit10)) (* #\#) (suffix)) (@ (+ (digit10)) (+ #\#) #\. (* #\#) (suffix)))] [num16 (@ (prefix16) (complex16))] [complex16 (: (real16) (@ (real16) @ (real16)) (@ (real16) + (ureal16) i) (@ (real16) - (ureal16) i) (@ (real16) + i) (@ (real16) - i) (@ + (ureal16) i) (@ - (ureal16) i) (@ + i) (@ - i))] [real16 (@ (sign) (ureal16))] [ureal16 (: (uinteger16) (@ (uinteger16) / (uinteger16)))] [uinteger16 (@ (+ (digit16)) (* #\#))] [prefix16 (: (@ (radix16) (exactness)) (@ (exactness) (radix16)))] [radix16 "#x"] [digit16 (: (digit) (- #\a #\f) (- #\A #\F))] [suffix (: (@) (@ (exponent-marker) (sign) (+ (digit10))))] [exponent-marker (: e s f d l)] [sign (: (@) + -)] [exactness (: (@) "#i" "#e")]) (define stx-for-original-property (read-syntax #f (open-input-string "original"))) ;; A macro to build the syntax object (define-syntax (build-so stx) (syntax-case stx () ((_ value start end) (with-syntax ((start-pos (datum->syntax-object (syntax end) (string->symbol (format "$~a-start-pos" (syntax-object->datum (syntax start)))))) (end-pos (datum->syntax-object (syntax end) (string->symbol (format "$~a-end-pos" (syntax-object->datum (syntax end)))))) (source (datum->syntax-object (syntax end) 'source-name))) (syntax (datum->syntax-object #f value (list source (position-line start-pos) (position-col start-pos) (position-offset start-pos) (- (position-offset end-pos) (position-offset start-pos))) stx-for-original-property)))))) (define (scheme-parser source-name) (parser (src-pos) (start s) (end EOF) (error (lambda (a name val start end) (raise-read-error "read-error" source-name (position-line start) (position-col start) (position-offset start) (- (position-offset end) (position-offset start))))) (tokens data delim) (grammar (s [(sexp-list) (reverse $1)]) (sexp [(DATUM) (build-so $1 1 1)] [(OP sexp-list CP) (build-so (reverse $2) 1 3)] [(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)] [(QUOTE sexp) (build-so (list 'quote $2) 1 2)] [(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)] [(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)] [(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)] [(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)]) (sexp-list [() null] [(sexp-list sexp) (cons $2 $1)])))) (define (rs sn ip) (port-count-lines! ip) ((scheme-parser sn) (lambda () (scheme-lexer ip)))) (define readsyntax (case-lambda ((sn) (rs sn (current-input-port))) ((sn ip) (rs sn ip)))) (provide (rename readsyntax read-syntax)) )