(module lex mzscheme ;; Provides the syntax used to create lexers and the functions needed to ;; create and use the buffer that the lexer reads from. See doc.txt. (require-for-syntax "private-lex/generate-code.ss" "private-lex/structs.ss") (require (lib "list.ss") (lib "readerr.ss" "syntax") "private-lex/token.ss") (provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs make-lex-buf get-position position-offset position-line position-col position? define-tokens define-empty-tokens) (define-syntaxes (lexer lexer-src-pos) (values (build-lexer #'here '(lambda (x) x)) (build-lexer #'here '(lambda (x) (list x first-pos end-pos))))) (define-syntax (define-lex-abbrev stx) (syntax-case stx () ((_ name re) (syntax (define-syntax name (make-lex-abbrev (quote-syntax re))))) (_ (raise-syntax-error #f "Form should be (define-lex-abbrev name re)" stx)))) (define-syntax (define-lex-abbrevs stx) (syntax-case stx () ((_ x ...) (let* ((abbrev (syntax->list (syntax (x ...)))) (r (map (lambda (a) (syntax-case a () ((name re) (identifier? (syntax name)) (syntax (define-lex-abbrev name re))) (_ (raise-syntax-error 'Lexer-abbreviation "Form should be (identifier value)" a)))) abbrev))) (datum->syntax-object #'here (cons 'begin r) stx))) (_ (raise-syntax-error #f "Form should be (define-lex-abbrevs (name re) ...)" stx)))) ;; Lex buffer is NOT thread safe ;; c = char | eof ;; lex-buf = ;; (make-lex-buffer input-port (c list) (c list) int int int (int list) boolean) (define-struct lex-buffer (ip from to offset line col line-lengths tab-skips has-seen-eof?)) ;; make-lex-buf: input-port -> lex-buf (define make-lex-buf (case-lambda ((ip) (cond ((not (input-port? ip)) (raise-type-error 'make-lex-buf "input-port" 0 ip)) (else (make-lex-buffer ip null null 1 1 1 null null #f)))) ((ip offsets) (cond ((not (input-port? ip)) (raise-type-error 'make-lex-buf "input-port" 0 ip offsets)) ((or (not (= 3 (length offsets))) (not (andmap integer? offsets)) (not (andmap exact? offsets)) (not (andmap (lambda (x) (>= x 0)) offsets))) (raise-type-error 'make-lex-buf "list of 3 non-negative exact integers" 1 ip offsets)) (else (make-lex-buffer ip null null (caddr offsets) (car offsets) (cadr offsets) null null #f)))))) (define (get-next lb) (cond ((null? (lex-buffer-from lb)) (let ((res (read-char (lex-buffer-ip lb)))) (if (eof-object? res) (if (lex-buffer-has-seen-eof? lb) (raise-read-error (format "lex-buf: No characters left in input stream") #f #f #f #f #f) (set-lex-buffer-has-seen-eof?! lb #t))) res)) (else (begin0 (car (lex-buffer-from lb)) (set-lex-buffer-from! lb (cdr (lex-buffer-from lb))))))) ;; next-char: lex-buf -> c ;; gets the next character from the buffer (define (next-char lb) (let ((char-in (let ((real-char (get-next lb))) (if (eq? #\return real-char) (let ((second-char (get-next))) (if (not (eq? second-char #\newline)) (set-lex-buffer-from! lb (cons second-char (lex-buffer-from lb)))) #\newline) real-char)))) (set-lex-buffer-to! lb (cons char-in (lex-buffer-to lb))) (cond ((eq? #\tab char-in) (let ((skip-amt (- 8 (modulo (lex-buffer-col lb) 8)))) (set-lex-buffer-tab-skips! lb (cons skip-amt (lex-buffer-tab-skips lb))) (set-lex-buffer-col! lb (+ skip-amt (lex-buffer-col lb))))) ((eq? #\newline char-in) (set-lex-buffer-line-lengths! lb (cons (lex-buffer-col lb) (lex-buffer-line-lengths lb))) (set-lex-buffer-line! lb (add1 (lex-buffer-line lb))) (set-lex-buffer-col! lb 1)) (else (set-lex-buffer-col! lb (add1 (lex-buffer-col lb))))) (set-lex-buffer-offset! lb (add1 (lex-buffer-offset lb))) char-in)) ;; push-back: lex-buf * int -> c list ;; pushes the last read i characters back to be read again ;; returns the characters not pushed back and read after the last push-back ;; (in reverse order) (define (push-back lb i) (letrec ( ;; switch-buffers: c list * c list * int -> clist * clist ;; moves num-to-add characters from from to to. ;; (switch-buffers '(1 2 3 4) '(5 6 7 8) 2) = ;; (values '(3 4) '(2 1 5 6 7 8)) ;; (switch-buffers '(4 3 2 1) '(5 6 7 8) 2) = ;; (values '(2 1) '(3 4 5 6 7 8)) (switch-buffers (lambda (from to num-to-add) (cond ((= 0 num-to-add) (values from to)) (else (cond ((eq? #\newline (car from)) (set-lex-buffer-line! lb (sub1 (lex-buffer-line lb))) (set-lex-buffer-col! lb (car (lex-buffer-line-lengths lb))) (set-lex-buffer-line-lengths! lb (cdr (lex-buffer-line-lengths lb)))) ((eq? #\tab (car from)) (set-lex-buffer-col! lb (- (lex-buffer-col lb) (car (lex-buffer-tab-skips lb)))) (set-lex-buffer-tab-skips! lb (cdr (lex-buffer-tab-skips lb)))) (else (set-lex-buffer-col! lb (sub1 (lex-buffer-col lb))))) (switch-buffers (cdr from) (cons (car from) to) (sub1 num-to-add))))))) (let-values (((ret new-from) (switch-buffers (lex-buffer-to lb) (lex-buffer-from lb) i))) (set-lex-buffer-from! lb new-from) (set-lex-buffer-to! lb null) (set-lex-buffer-offset! lb (- (lex-buffer-offset lb) i)) (set-lex-buffer-line-lengths! lb null) (set-lex-buffer-tab-skips! lb null) ret))) (define-struct position (offset line col)) (define (get-position lb) (make-position (lex-buffer-offset lb) (lex-buffer-line lb) (lex-buffer-col lb))) )