You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
245 lines
8.4 KiB
Scheme
245 lines
8.4 KiB
Scheme
;; This implements the equivalent of mzscheme's read-syntax for R5RS scheme.
|
|
;; It has not been thoroughly tested. Also it will read an entire file into a
|
|
;; list of syntax objects, instead of returning one syntax object at a time
|
|
|
|
;; Everything in this module will be read with case sensitivity.
|
|
#cs
|
|
(module read mzscheme
|
|
|
|
(require (lib "lex.ss" "parser-tools")
|
|
(lib "yacc.ss" "parser-tools")
|
|
(lib "readerr.ss" "syntax"))
|
|
|
|
(define-tokens data (DATUM))
|
|
(define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF))
|
|
|
|
(define scheme-lexer
|
|
(lexer-src-pos
|
|
|
|
;; Skip comments, without accumulating extra position information
|
|
[(: (whitespace) (comment)) (return-without-pos (scheme-lexer input-port))]
|
|
|
|
["#t" (token-DATUM #t)]
|
|
["#f" (token-DATUM #f)]
|
|
[(@ "#\\" (any)) (token-DATUM (caddr (string->list lexeme)))]
|
|
["#\\space" (token-DATUM #\space)]
|
|
["#\\newline" (token-DATUM #\newline)]
|
|
[(: (@ (initial) (* (subsequent))) + - "...") (token-DATUM (string->symbol lexeme))]
|
|
[#\" (token-DATUM (list->string (get-string-token input-port)))]
|
|
[#\( 'OP]
|
|
[#\) 'CP]
|
|
[#\[ 'OP]
|
|
[#\] 'CP]
|
|
["#(" 'HASHOP]
|
|
[(num2) (token-DATUM (string->number lexeme 2))]
|
|
[(num8) (token-DATUM (string->number lexeme 8))]
|
|
[(num10) (token-DATUM (string->number lexeme 10))]
|
|
[(num16) (token-DATUM (string->number lexeme 16))]
|
|
["'" 'QUOTE]
|
|
["`" 'QUASIQUOTE]
|
|
["," 'UNQUOTE]
|
|
[",@" 'UNQUOTE-SPLICING]
|
|
["." 'DOT]
|
|
[(eof) 'EOF]))
|
|
|
|
(define get-string-token
|
|
(lexer
|
|
[(^ #\" #\\) (cons (car (string->list lexeme))
|
|
(get-string-token input-port))]
|
|
[(@ #\\ #\\) (cons #\\ (get-string-token input-port))]
|
|
[(@ #\\ #\") (cons #\" (get-string-token input-port))]
|
|
[#\" null]))
|
|
|
|
|
|
(define-lex-abbrevs
|
|
[any (- #\000 #\377)]
|
|
[letter (: (- a z) (- #\A #\Z))]
|
|
[digit (- #\0 #\9)]
|
|
[whitespace (: #\newline #\return #\tab #\space #\vtab)]
|
|
[initial (: (letter) ! $ % & * / : < = > ? ^ _ ~ @)]
|
|
[subsequent (: (initial) (digit) + - #\. @)]
|
|
[comment (@ #\; (* (^ #\newline)) #\newline)]
|
|
|
|
; [numR (@ (prefixR) (complexR))]
|
|
; [complexR (: (realR)
|
|
; (@ (realR) @ (realR))
|
|
; (@ (realR) + (urealR) i)
|
|
; (@ (realR) - (urealR) i)
|
|
; (@ (realR) + i)
|
|
; (@ (realR) - i)
|
|
; (@ + (urealR) i)
|
|
; (@ - (urealR) i)
|
|
; (@ + i)
|
|
; (@ - i))]
|
|
; [realR (@ (sign) (urealR))]
|
|
; [urealR (: (uintegerR) (@ (uintegerR) / (uintegerR)) (decimalR))]
|
|
; [uintegerR (@ (+ (digitR)) (* #\#))]
|
|
; [prefixR (: (@ (radixR) (exactness))
|
|
; (@ (exactness) (radixR)))]
|
|
; [numR (@ (prefixR) (complexR))]
|
|
; [complexR (: (realR)
|
|
; (@ (realR) @ (realR))
|
|
|
|
[num2 (@ (prefix2) (complex2))]
|
|
[complex2 (: (real2)
|
|
(@ (real2) @ (real2))
|
|
(@ (real2) + (ureal2) i)
|
|
(@ (real2) - (ureal2) i)
|
|
(@ (real2) + i)
|
|
(@ (real2) - i)
|
|
(@ + (ureal2) i)
|
|
(@ - (ureal2) i)
|
|
(@ + i)
|
|
(@ - i))]
|
|
[real2 (@ (sign) (ureal2))]
|
|
[ureal2 (: (uinteger2) (@ (uinteger2) / (uinteger2)))]
|
|
[uinteger2 (@ (+ (digit2)) (* #\#))]
|
|
[prefix2 (: (@ (radix2) (exactness))
|
|
(@ (exactness) (radix2)))]
|
|
[radix2 "#b"]
|
|
[digit2 (: #\0 #\1)]
|
|
|
|
[num8 (@ (prefix8) (complex8))]
|
|
[complex8 (: (real8)
|
|
(@ (real8) @ (real8))
|
|
(@ (real8) + (ureal8) i)
|
|
(@ (real8) - (ureal8) i)
|
|
(@ (real8) + i)
|
|
(@ (real8) - i)
|
|
(@ + (ureal8) i)
|
|
(@ - (ureal8) i)
|
|
(@ + i)
|
|
(@ - i))]
|
|
[real8 (@ (sign) (ureal8))]
|
|
[ureal8 (: (uinteger8) (@ (uinteger8) / (uinteger8)))]
|
|
[uinteger8 (@ (+ (digit8)) (* #\#))]
|
|
[prefix8 (: (@ (radix8) (exactness))
|
|
(@ (exactness) (radix8)))]
|
|
[radix8 "#o"]
|
|
[digit8 (- #\0 #\7)]
|
|
|
|
[num10 (@ (prefix10) (complex10))]
|
|
[complex10 (: (real10)
|
|
(@ (real10) @ (real10))
|
|
(@ (real10) + (ureal10) i)
|
|
(@ (real10) - (ureal10) i)
|
|
(@ (real10) + i)
|
|
(@ (real10) - i)
|
|
(@ + (ureal10) i)
|
|
(@ - (ureal10) i)
|
|
(@ + i)
|
|
(@ - i))]
|
|
[real10 (@ (sign) (ureal10))]
|
|
[ureal10 (: (uinteger10) (@ (uinteger10) / (uinteger10)) (decimal10))]
|
|
[uinteger10 (@ (+ (digit10)) (* #\#))]
|
|
[prefix10 (: (@ (radix10) (exactness))
|
|
(@ (exactness) (radix10)))]
|
|
[radix10 (: (@) "#d")]
|
|
[digit10 (digit)]
|
|
[decimal10 (: (@ (uinteger10) (suffix))
|
|
(@ #\. (+ (digit10)) (* #\#) (suffix))
|
|
(@ (+ (digit10)) #\. (* (digit10)) (* #\#) (suffix))
|
|
(@ (+ (digit10)) (+ #\#) #\. (* #\#) (suffix)))]
|
|
|
|
[num16 (@ (prefix16) (complex16))]
|
|
[complex16 (: (real16)
|
|
(@ (real16) @ (real16))
|
|
(@ (real16) + (ureal16) i)
|
|
(@ (real16) - (ureal16) i)
|
|
(@ (real16) + i)
|
|
(@ (real16) - i)
|
|
(@ + (ureal16) i)
|
|
(@ - (ureal16) i)
|
|
(@ + i)
|
|
(@ - i))]
|
|
[real16 (@ (sign) (ureal16))]
|
|
[ureal16 (: (uinteger16) (@ (uinteger16) / (uinteger16)))]
|
|
[uinteger16 (@ (+ (digit16)) (* #\#))]
|
|
[prefix16 (: (@ (radix16) (exactness))
|
|
(@ (exactness) (radix16)))]
|
|
[radix16 "#x"]
|
|
[digit16 (: (digit) (- #\a #\f) (- #\A #\F))]
|
|
|
|
|
|
[suffix (: (@) (@ (exponent-marker) (sign) (+ (digit10))))]
|
|
[exponent-marker (: e s f d l)]
|
|
[sign (: (@) + -)]
|
|
[exactness (: (@) "#i" "#e")])
|
|
|
|
|
|
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
|
|
|
|
;; A macro to build the syntax object
|
|
(define-syntax (build-so stx)
|
|
(syntax-case stx ()
|
|
((_ value start end)
|
|
(with-syntax ((start-pos (datum->syntax-object
|
|
(syntax end)
|
|
(string->symbol
|
|
(format "$~a-start-pos"
|
|
(syntax-object->datum (syntax start))))))
|
|
(end-pos (datum->syntax-object
|
|
(syntax end)
|
|
(string->symbol
|
|
(format "$~a-end-pos"
|
|
(syntax-object->datum (syntax end))))))
|
|
(source (datum->syntax-object
|
|
(syntax end)
|
|
'source-name)))
|
|
(syntax
|
|
(datum->syntax-object
|
|
#f
|
|
value
|
|
(list source
|
|
(position-line start-pos)
|
|
(position-col start-pos)
|
|
(position-offset start-pos)
|
|
(- (position-offset end-pos)
|
|
(position-offset start-pos)))
|
|
stx-for-original-property))))))
|
|
|
|
(define (scheme-parser source-name)
|
|
(parser
|
|
(src-pos)
|
|
|
|
(start s)
|
|
(end EOF)
|
|
(error (lambda (a name val start end)
|
|
(raise-read-error
|
|
"read-error"
|
|
source-name
|
|
(position-line start)
|
|
(position-col start)
|
|
(position-offset start)
|
|
(- (position-offset end)
|
|
(position-offset start)))))
|
|
(tokens data delim)
|
|
|
|
|
|
(grammar
|
|
|
|
(s [(sexp-list) (reverse $1)])
|
|
|
|
(sexp [(DATUM) (build-so $1 1 1)]
|
|
[(OP sexp-list CP) (build-so (reverse $2) 1 3)]
|
|
[(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)]
|
|
[(QUOTE sexp) (build-so (list 'quote $2) 1 2)]
|
|
[(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)]
|
|
[(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)]
|
|
[(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)]
|
|
[(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)])
|
|
|
|
(sexp-list [() null]
|
|
[(sexp-list sexp) (cons $2 $1)]))))
|
|
|
|
(define (rs sn ip)
|
|
(port-count-lines! ip)
|
|
((scheme-parser sn) (lambda () (scheme-lexer ip))))
|
|
|
|
(define readsyntax
|
|
(case-lambda ((sn) (rs sn (current-input-port)))
|
|
((sn ip) (rs sn ip))))
|
|
|
|
(provide (rename readsyntax read-syntax))
|
|
|
|
) |