diff --git a/collects/parser-tools/lex.ss b/collects/parser-tools/lex.ss index c96d37b..b97a011 100644 --- a/collects/parser-tools/lex.ss +++ b/collects/parser-tools/lex.ss @@ -144,32 +144,66 @@ (define-struct lex-buffer (ip from to offset line col line-lengths)) ;; make-lex-buf: input-port -> lex-buf - (define (make-lex-buf ip) - (make-lex-buffer ip null null 1 1 1 null)) + (define make-lex-buf + (case-lambda + ((ip) + (cond + ((not (input-port? ip)) + (raise-type-error 'make-lex-buf "input-port" 0 ip)) + (else + (make-lex-buffer ip null null 0 0 0 null)))) + ((ip offsets) + (cond + ((not (input-port? ip)) + (raise-type-error 'make-lex-buf "input-port" 0 ip offsets)) + ((or (not (= 3 (length offsets))) + (not (andmap integer? offsets)) + (not (andmap exact? offsets)) + (not (andmap (lambda (x) (>= x 0)) offsets))) + (raise-type-error 'make-lex-buf "list of 3 non-negative exact integers" 1 ip offsets)) + (else + (make-lex-buffer ip null null (caddr offsets) (car offsets) (cadr offsets) null)))))) ;; next-char: lex-buf -> c ;; gets the next character from the buffer (define (next-char lb) - (let ((char-in - (cond - ((null? (lex-buffer-from lb)) - (read-char (lex-buffer-ip lb))) - (else (begin0 - (car (lex-buffer-from lb)) - (set-lex-buffer-from! lb (cdr (lex-buffer-from lb)))))))) - (set-lex-buffer-to! lb (cons char-in (lex-buffer-to lb))) - (cond - ((eq? #\newline char-in) - (set-lex-buffer-line-lengths! - lb - (cons (lex-buffer-col lb) - (lex-buffer-line-lengths lb))) - (set-lex-buffer-line! lb (add1 (lex-buffer-line lb))) - (set-lex-buffer-col! lb 1)) - (else - (set-lex-buffer-col! lb (add1 (lex-buffer-col lb))))) - (set-lex-buffer-offset! lb (add1 (lex-buffer-offset lb))) - char-in)) + (let ((get-next + (lambda () + (cond + ((null? (lex-buffer-from lb)) + (read-char (lex-buffer-ip lb))) + (else + (begin0 + (car (lex-buffer-from lb)) + (set-lex-buffer-from! lb (cdr (lex-buffer-from lb))))))))) + (let ((char-in + (let ((real-char (get-next))) + (if (char=? #\return real-char) + (let ((second-char (get-next))) + (if (not (char=? second-char #\newline)) + (set-lex-buffer-from! + lb + (cons second-char (lex-buffer-from lb)))) + #\newline) + real-char)))) + (set-lex-buffer-to! lb (cons char-in (lex-buffer-to lb))) + (cond + ((char=? #\tab char-in) + (let ((skip-amt (- 8 (modulo (lex-buffer-col lb) 8)))) + (set-lex-buffer-col! lb (+ skip-amt (lex-buffer-col lb))) + (set-lex-buffer-offset! lb (+ skip-amt (lex-buffer-col lb))))) + ((char=? #\newline char-in) + (set-lex-buffer-line-lengths! + lb + (cons (lex-buffer-col lb) + (lex-buffer-line-lengths lb))) + (set-lex-buffer-line! lb (add1 (lex-buffer-line lb))) + (set-lex-buffer-col! lb 0) + (set-lex-buffer-offset! lb (add1 (lex-buffer-offset lb)))) + (else + (set-lex-buffer-col! lb (add1 (lex-buffer-col lb))) + (set-lex-buffer-offset! lb (add1 (lex-buffer-offset lb))))) + char-in))) ;; push-back: lex-buf * int -> c list ;; pushes the last read i characters back to be read again @@ -189,7 +223,7 @@ ((= 0 num-to-add) (values from to)) (else (cond - ((eq? #\newline (car from)) + ((char=? #\newline (car from)) (set-lex-buffer-line! lb (sub1 (lex-buffer-line lb)))