You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
br-parser-tools/collects/parser-tools/examples/read.ss

246 lines
8.5 KiB
Scheme

;; This implements the equivalent of mzscheme's read-syntax for R5RS scheme.
;; It has not been thoroughly tested. Also it will read an entire file into a
;; list of syntax objects, instead of returning one syntax object at a time
;; Everything in this module will be read with case sensitivity.
#cs
(module read mzscheme
(require (lib "lex.ss" "parser-tools")
(lib "yacc.ss" "parser-tools")
(lib "readerr.ss" "syntax"))
(define-tokens data (DATUM))
(define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF))
(define scheme-lexer
(lexer-src-pos
;; Skip comments, without accumulating extra position information
[(: (whitespace) (comment)) (return-without-pos (scheme-lexer lex-buf))]
["#t" (token-DATUM #t)]
["#f" (token-DATUM #f)]
[(@ "#\\" (any)) (token-DATUM (caddr (string->list (get-lexeme))))]
["#\\space" (token-DATUM #\space)]
["#\\newline" (token-DATUM #\newline)]
[(: (@ (initial) (* (subsequent))) + - "...") (token-DATUM (string->symbol (get-lexeme)))]
[#\" (token-DATUM (list->string (get-string-token lex-buf)))]
[#\( 'OP]
[#\) 'CP]
[#\[ 'OP]
[#\] 'CP]
["#(" 'HASHOP]
[(num2) (token-DATUM (string->number (get-lexeme) 2))]
[(num8) (token-DATUM (string->number (get-lexeme) 8))]
[(num10) (token-DATUM (string->number (get-lexeme) 10))]
[(num16) (token-DATUM (string->number (get-lexeme) 16))]
["'" 'QUOTE]
["`" 'QUASIQUOTE]
["," 'UNQUOTE]
[",@" 'UNQUOTE-SPLICING]
["." 'DOT]
[(eof) 'EOF]))
(define get-string-token
(lexer
[(^ #\" #\\) (cons (car (string->list (get-lexeme)))
(get-string-token lex-buf))]
[(@ #\\ #\\) (cons #\\ (get-string-token lex-buf))]
[(@ #\\ #\") (cons #\" (get-string-token lex-buf))]
[#\" null]))
(define-lex-abbrevs
[any (- #\000 #\377)]
[letter (: (- a z) (- A Z))]
[digit (- #\0 #\9)]
[whitespace (: #\newline #\return #\tab #\space #\vtab)]
[initial (: (letter) ! $ % & * / : < = > ? ^ _ ~)]
[subsequent (: (initial) (digit) + - #\. @)]
[comment (@ #\; (* (^ #\newline)) #\newline)]
; [numR (@ (prefixR) (complexR))]
; [complexR (: (realR)
; (@ (realR) @ (realR))
; (@ (realR) + (urealR) i)
; (@ (realR) - (urealR) i)
; (@ (realR) + i)
; (@ (realR) - i)
; (@ + (urealR) i)
; (@ - (urealR) i)
; (@ + i)
; (@ - i))]
; [realR (@ (sign) (urealR))]
; [urealR (: (uintegerR) (@ (uintegerR) / (uintegerR)) (decimalR))]
; [uintegerR (@ (+ (digitR)) (* #\#))]
; [prefixR (: (@ (radixR) (exactness))
; (@ (exactness) (radixR)))]
; [numR (@ (prefixR) (complexR))]
; [complexR (: (realR)
; (@ (realR) @ (realR))
[num2 (@ (prefix2) (complex2))]
[complex2 (: (real2)
(@ (real2) @ (real2))
(@ (real2) + (ureal2) i)
(@ (real2) - (ureal2) i)
(@ (real2) + i)
(@ (real2) - i)
(@ + (ureal2) i)
(@ - (ureal2) i)
(@ + i)
(@ - i))]
[real2 (@ (sign) (ureal2))]
[ureal2 (: (uinteger2) (@ (uinteger2) / (uinteger2)))]
[uinteger2 (@ (+ (digit2)) (* #\#))]
[prefix2 (: (@ (radix2) (exactness))
(@ (exactness) (radix2)))]
[radix2 "#b"]
[digit2 (: #\0 #\1)]
[num8 (@ (prefix8) (complex8))]
[complex8 (: (real8)
(@ (real8) @ (real8))
(@ (real8) + (ureal8) i)
(@ (real8) - (ureal8) i)
(@ (real8) + i)
(@ (real8) - i)
(@ + (ureal8) i)
(@ - (ureal8) i)
(@ + i)
(@ - i))]
[real8 (@ (sign) (ureal8))]
[ureal8 (: (uinteger8) (@ (uinteger8) / (uinteger8)))]
[uinteger8 (@ (+ (digit8)) (* #\#))]
[prefix8 (: (@ (radix8) (exactness))
(@ (exactness) (radix8)))]
[radix8 "#o"]
[digit8 (- #\0 #\7)]
[num10 (@ (prefix10) (complex10))]
[complex10 (: (real10)
(@ (real10) @ (real10))
(@ (real10) + (ureal10) i)
(@ (real10) - (ureal10) i)
(@ (real10) + i)
(@ (real10) - i)
(@ + (ureal10) i)
(@ - (ureal10) i)
(@ + i)
(@ - i))]
[real10 (@ (sign) (ureal10))]
[ureal10 (: (uinteger10) (@ (uinteger10) / (uinteger10)) (decimal10))]
[uinteger10 (@ (+ (digit10)) (* #\#))]
[prefix10 (: (@ (radix10) (exactness))
(@ (exactness) (radix10)))]
[radix10 (: (@) "#d")]
[digit10 (digit)]
[decimal10 (: (@ (uinteger10) (suffix))
(@ #\. (+ (digit10)) (* #\#) (suffix))
(@ (+ (digit10)) #\. (* (digit10)) (* #\#) (suffix))
(@ (+ (digit10)) (+ #\#) #\. (* #\#) (suffix)))]
[num16 (@ (prefix16) (complex16))]
[complex16 (: (real16)
(@ (real16) @ (real16))
(@ (real16) + (ureal16) i)
(@ (real16) - (ureal16) i)
(@ (real16) + i)
(@ (real16) - i)
(@ + (ureal16) i)
(@ - (ureal16) i)
(@ + i)
(@ - i))]
[real16 (@ (sign) (ureal16))]
[ureal16 (: (uinteger16) (@ (uinteger16) / (uinteger16)))]
[uinteger16 (@ (+ (digit16)) (* #\#))]
[prefix16 (: (@ (radix16) (exactness))
(@ (exactness) (radix16)))]
[radix16 "#x"]
[digit16 (: (digit) (- #\a #\f) (- #\A #\F))]
[suffix (: (@) (@ (exponent-marker) (sign) (+ (digit10))))]
[exponent-marker (: e s f d l)]
[sign (: (@) + -)]
[exactness (: (@) "#i" "#e")])
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
;; A macro to build the syntax object
(define-syntax (build-so stx)
(syntax-case stx ()
((_ value start end)
(with-syntax ((start-pos (datum->syntax-object
(syntax end)
(string->symbol
(format "$~a-start-pos"
(syntax-object->datum (syntax start))))))
(end-pos (datum->syntax-object
(syntax end)
(string->symbol
(format "$~a-end-pos"
(syntax-object->datum (syntax end))))))
(source (datum->syntax-object
(syntax end)
'source-name)))
(syntax
(datum->syntax-object
#f
value
(list source
(position-line start-pos)
(position-col start-pos)
(position-offset start-pos)
(- (position-offset end-pos)
(position-offset start-pos)))
stx-for-original-property))))))
(define (scheme-parser source-name)
(parser
(src-pos)
(start s)
(end EOF)
(error (lambda (a name val start end)
(raise-read-error
"read-error"
source-name
(position-line start)
(position-col start)
(position-offset start)
(- (position-offset end)
(position-offset start)))))
(tokens data delim)
(grammar
(s [(sexp-list) (reverse $1)])
(sexp [(DATUM) (build-so $1 1 1)]
[(OP sexp-list CP) (build-so (reverse $2) 1 3)]
[(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)]
[(QUOTE sexp) (build-so (list 'quote $2) 1 2)]
[(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)]
[(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)]
[(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)]
[(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)])
(sexp-list [() null]
[(sexp-list sexp) (cons $2 $1)]))))
(define (rs sn ip off)
(let ((lb (make-lex-buf ip off)))
((scheme-parser sn) (lambda () (scheme-lexer lb)))))
(define readsyntax
(case-lambda ((sn) (rs sn (current-input-port) (list 0 0 0)))
((sn ip) (rs sn ip (list 0 0 0)))
((sn ip off) (rs sn ip off))))
(provide (rename readsyntax read-syntax))
)