*** empty log message ***

original commit: 6140639bdc5fbe802148e163b44a0dbe6a6d5f94
tokens
Scott Owens 21 years ago
parent 24f3240710
commit 596cdd18f7

@ -0,0 +1,24 @@
(module lex-plt-v200 mzscheme
(require (lib "lex.ss" "parser-tools")
(prefix : (lib "lex-sre.ss" "parser-tools")))
(provide epsilon
~
(rename :* *)
(rename :+ +)
(rename :? ?)
(rename :or :)
(rename :& &)
(rename :: @)
(rename :~ ^)
(rename :/ -))
(define-lex-trans epsilon
(syntax-rules ()
((_) "")))
(define-lex-trans ~
(syntax-rules ()
((_ re) (complement re)))))

@ -13,7 +13,8 @@
& &
~ ~
(rename sre-- -) (rename sre-- -)
(rename sre-/ /)/-only-chars) (rename sre-/ /)
/-only-chars)
(define-lex-trans sre-* (define-lex-trans sre-*
(syntax-rules () (syntax-rules ()
@ -72,6 +73,7 @@
((_ re ...) ((_ re ...)
(char-complement (union re ...))))) (char-complement (union re ...)))))
;; char-set difference
(define-lex-trans (sre-- stx) (define-lex-trans (sre-- stx)
(syntax-case stx () (syntax-case stx ()
((_) ((_)
@ -79,7 +81,7 @@
"must have at least one argument" "must have at least one argument"
stx)) stx))
((_ big-re re ...) ((_ big-re re ...)
(syntax (intersect big-re (complement (union re) ...)))))) (syntax (intersect big-re (~ (union re) ...))))))
(define-lex-trans (sre-/ stx) (define-lex-trans (sre-/ stx)
(syntax-case stx () (syntax-case stx ()

@ -11,14 +11,23 @@
(require (lib "readerr.ss" "syntax") (require (lib "readerr.ss" "syntax")
(lib "cffi.ss" "compiler")
"private-lex/token.ss") "private-lex/token.ss")
(provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs define-lex-trans (provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs define-lex-trans
position-offset position-line position-col position?
define-tokens define-empty-tokens token-name token-value token? file-path ;; Dealing with tokens and related structures
define-tokens define-empty-tokens token-name token-value token?
(struct position (offset line col))
(struct position-token (token start-pos end-pos))
;; File path for highlighting errors while lexing
file-path
;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4.
any-char any-string nothing alphabetic lower-case upper-case title-case any-char any-string nothing alphabetic lower-case upper-case title-case
numeric symbolic punctuation graphic whitespace blank iso-control numeric symbolic punctuation graphic whitespace blank iso-control
;; A regular expression operator
char-set) char-set)
(define file-path (make-parameter #f)) (define file-path (make-parameter #f))
@ -260,18 +269,16 @@
(cond (cond
(wrap? (wrap?
(let/ec ret (let/ec ret
(list (action first-pos end-pos value ret ip) (make-position-token (action first-pos end-pos value ret ip)
first-pos first-pos
end-pos))) end-pos)))
(else (else
(action first-pos end-pos value id ip))))) (action first-pos end-pos value id ip)))))
(define-struct position (offset line col))
(define (get-position ip) (define (get-position ip)
(let-values (((line col off) (port-next-location ip))) (let-values (((line col off) (port-next-location ip)))
(make-position off line col))) (make-position off line col)))
(define-syntax (create-unicode-abbrevs stx) (define-syntax (create-unicode-abbrevs stx)
(syntax-case stx () (syntax-case stx ()
((_ ctxt) ((_ ctxt)

@ -1,4 +1,3 @@
#cs
(module token-syntax mzscheme (module token-syntax mzscheme
;; The things needed at compile time to handle definition of tokens ;; The things needed at compile time to handle definition of tokens

@ -1,13 +1,44 @@
#cs
(module token mzscheme (module token mzscheme
(require-for-syntax "token-syntax.ss") (require-for-syntax "token-syntax.ss")
;; Defining tokens ;; Defining tokens
(provide define-tokens define-empty-tokens make-token token-name token-value token?) (provide define-tokens define-empty-tokens make-token token?
(protect (rename token-name real-token-name))
(protect (rename token-value real-token-value))
(rename token-name* token-name)
(rename token-value* token-value)
(struct position (offset line col))
(struct position-token (token start-pos end-pos)))
(define-struct token (name value) (make-inspector))
;; A token is either
;; - symbol
;; - (make-token symbol any)
(define-struct token (name value))
;; token-name*: token -> symbol
(define (token-name* t)
(cond
((symbol? t) t)
((token? t) (token-name t))
(else (raise-type-error
'token-name
"symbol or struct:token"
0
t))))
;; token-value*: token -> any
(define (token-value* t)
(cond
((symbol? t) #f)
((token? t) (token-value t))
(else (raise-type-error
'token-value
"symbol or struct:token"
0
t))))
(define-syntaxes (define-tokens define-empty-tokens) (define-syntaxes (define-tokens define-empty-tokens)
(let ((define-tokens-helper (let ((define-tokens-helper
@ -36,7 +67,9 @@
n n
n) n)
,@(if empty? '() '(x))) ,@(if empty? '() '(x)))
(make-token ',n ,(if empty? #f 'x)))) ,(if empty?
`',n
`(make-token ',n x))))
(syntax->list (syntax (terms ...))))) (syntax->list (syntax (terms ...)))))
stx)) stx))
((_ ...) ((_ ...)
@ -47,5 +80,8 @@
(values (values
(lambda (stx) (define-tokens-helper stx #f)) (lambda (stx) (define-tokens-helper stx #f))
(lambda (stx) (define-tokens-helper stx #t))))) (lambda (stx) (define-tokens-helper stx #t)))))
)
(define-struct position (offset line col))
(define-struct position-token (token start-pos end-pos))
)

@ -1,7 +1,6 @@
;; Constructs to create and access grammars, the internal ;; Constructs to create and access grammars, the internal
;; representation of the input to the parser generator. ;; representation of the input to the parser generator.
#cs
(module grammar mzscheme (module grammar mzscheme
(require (lib "class.ss") (require (lib "class.ss")

@ -1,4 +1,3 @@
#cs
(module graph mzscheme (module graph mzscheme
(provide digraph) (provide digraph)

@ -1,4 +1,3 @@
#cs
(module input-file-parser mzscheme (module input-file-parser mzscheme
;; routines for parsing the input to the parser generator and producing a ;; routines for parsing the input to the parser generator and producing a

@ -1,4 +1,3 @@
#cs
(module lalr mzscheme (module lalr mzscheme
;; Compute LALR lookaheads from DeRemer and Pennello 1982 ;; Compute LALR lookaheads from DeRemer and Pennello 1982

@ -1,4 +1,3 @@
#cs
(module lr0 mzscheme (module lr0 mzscheme
;; Handle the LR0 automaton ;; Handle the LR0 automaton

@ -1,4 +1,3 @@
#cs
(module parser-actions mzscheme (module parser-actions mzscheme
;; The entries into the action table ;; The entries into the action table

@ -1,4 +1,3 @@
#cs
(module parser-builder mzscheme (module parser-builder mzscheme
(require "input-file-parser.ss" (require "input-file-parser.ss"

@ -1,4 +1,3 @@
#cs
(module table mzscheme (module table mzscheme
;; Routine to build the LALR table ;; Routine to build the LALR table

@ -1,4 +1,3 @@
#cs
(module yacc-helper mzscheme (module yacc-helper mzscheme
(require (lib "list.ss") (require (lib "list.ss")

@ -1,5 +1,6 @@
(module yacc-to-scheme mzscheme (module yacc-to-scheme mzscheme
(require (lib "lex.ss" "parser-tools") (require (lib "lex.ss" "parser-tools")
(prefix : (lib "lex-sre.ss" "parser-tools"))
(lib "yacc.ss" "parser-tools") (lib "yacc.ss" "parser-tools")
(lib "readerr.ss" "syntax") (lib "readerr.ss" "syntax")
(lib "list.ss")) (lib "list.ss"))
@ -7,24 +8,24 @@
(define match-double-string (define match-double-string
(lexer (lexer
((^ #\" #\\) (cons (car (string->list lexeme)) ((:* (:~ #\" #\\)) (append (string->list lexeme)
(match-double-string input-port))) (match-double-string input-port)))
((@ #\\ (- #\000 #\377)) (cons (string-ref lexeme 1) (match-double-string input-port))) ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port)))
(#\" null))) (#\" null)))
(define match-single-string (define match-single-string
(lexer (lexer
((^ #\' #\\) (cons (car (string->list lexeme)) ((:* (:~ #\' #\\)) (append (string->list lexeme)
(match-single-string input-port))) (match-single-string input-port)))
((@ #\\ (- #\000 #\377)) (cons (string-ref lexeme 1) (match-single-string input-port))) ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port)))
(#\' null))) (#\' null)))
(define-lex-abbrevs (define-lex-abbrevs
(letter (: (- "a" "z") (- "A" "Z"))) (letter (:or (:/ "a" "z") (:/ "A" "Z")))
(digit (- "0" "9")) (digit (:/ "0" "9"))
(initial (: letter "!" "$" "%" "&" "*" "/" "<" "=" ">" "?" "^" "_" "~" "@")) (initial (:or letter (char-set "!$%&*/<=>?^_~@")))
(subsequent (: initial digit "+" "-" "." "@")) (subsequent (:or initial digit (char-set "+-.@")))
(comment (@ "/*" (* (: (^ "*") (@ "*" (^ "/")))) "*/"))) (comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/")))
(define-empty-tokens x (define-empty-tokens x
(EOF PIPE |:| SEMI |%%| %prec)) (EOF PIPE |:| SEMI |%%| %prec))
@ -34,14 +35,15 @@
(define get-token-grammar (define get-token-grammar
(lexer-src-pos (lexer-src-pos
("%%" '|%%|) ("%%" '|%%|)
((: ":") (string->symbol lexeme)) (":" (string->symbol lexeme))
("%prec" (string->symbol lexeme)) ("%prec" (string->symbol lexeme))
(#\| 'PIPE) (#\| 'PIPE)
((+ (: #\newline #\tab " " comment (@ "{" (* (^ "}")) "}"))) (return-without-pos (get-token-grammar input-port))) ((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}")))
(return-without-pos (get-token-grammar input-port)))
(#\; 'SEMI) (#\; 'SEMI)
(#\' (token-STRING (string->symbol (list->string (match-single-string input-port))))) (#\' (token-STRING (string->symbol (list->string (match-single-string input-port)))))
(#\" (token-STRING (string->symbol (list->string (match-double-string input-port))))) (#\" (token-STRING (string->symbol (list->string (match-double-string input-port)))))
((@ initial (* subsequent)) (token-SYM (string->symbol lexeme))))) ((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme)))))
(define (parse-grammar enter-term enter-empty-term enter-non-term) (define (parse-grammar enter-term enter-empty-term enter-non-term)
(parser (parser

@ -1,4 +1,3 @@
#cs
(module yacc mzscheme (module yacc mzscheme
(require-for-syntax "private-yacc/parser-builder.ss" (require-for-syntax "private-yacc/parser-builder.ss"
@ -6,6 +5,7 @@
(require "private-yacc/array2d.ss" (require "private-yacc/array2d.ss"
"private-lex/token.ss" "private-lex/token.ss"
"private-yacc/parser-actions.ss" "private-yacc/parser-actions.ss"
(lib "etc.ss")
(lib "pretty.ss") (lib "pretty.ss")
(lib "readerr.ss" "syntax")) (lib "readerr.ss" "syntax"))
@ -178,85 +178,80 @@
(reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))) (reduce-stack (cdr stack) (sub1 num) ret-vals src-pos)))
(else (values stack ret-vals)))) (else (values stack ret-vals))))
(define-struct stack-frame (state value start-pos end-pos) (make-inspector)) ;; extract-helper : (symbol or make-token) any any -> symbol any any any
(define (extract-helper tok v1 v2)
(cond
((symbol? tok)
(values tok #f v1 v2))
((token? tok)
(values (real-token-name tok) (real-token-value tok) v1 v2))
(else (raise-type-error 'parser
"symbol or struct:token"
0
tok))))
(define (make-empty-stack i) (list (make-stack-frame i #f #f #f))) ;; extract-src-pos : position-token -> symbol any any any
(define (extract-src-pos ip)
(cond
((position-token? ip)
(extract-helper (position-token-token ip)
(position-token-start-pos ip)
(position-token-end-pos ip)))
(else
(raise-type-error 'parser
"struct:position-token"
0
ip))))
(define (false-thunk) #f) ;; extract-no-src-pos : (symbol or make-token) -> symbol any any any
(define (extract-no-src-pos ip)
(extract-helper ip #f #f))
(define-struct stack-frame (state value start-pos end-pos))
(define (make-empty-stack i) (list (make-stack-frame i #f #f #f)))
;; The table format is an array2d that maps each state/term pair to either ;; The table format is an array2d that maps each state/term pair to either
;; an accept, shift or reduce structure - or a #f. Except that we will encode ;; an accept, shift or reduce structure - or a #f. Except that we will encode
;; by changing (make-accept) -> 'accept, (make-shift i) -> i and ;; by changing (make-accept) -> 'accept, (make-shift i) -> i and
;; (make-reduce i1 i2 i3) -> #(i1 i2 i3) ;; (make-reduce i1 i2 i3) -> #(i1 i2 i3)
(define (parser-body debug err starts ends table term-sym->index actions src-pos) (define (parser-body debug? err starts ends table term-sym->index actions src-pos)
(letrec ((input->token (local ((define extract
(if src-pos (if src-pos
(lambda (ip) extract-src-pos
(cond extract-no-src-pos))
((and (list? ip) (= 3 (length ip)))
(let ((tok (car ip)))
(cond
((symbol? tok) (make-token tok #f))
((token? tok) tok)
(else (raise-type-error 'parser
"(list (token or symbol) position position)"
0
ip)))))
(else
(raise-type-error 'parser
"(list (token or symbol) position position)"
0
ip))))
(lambda (ip)
(cond
((symbol? ip) (make-token ip #f))
((token? ip) ip)
(else (raise-type-error 'parser "token or symbol" 0 ip))))))
(fix-error (define (fix-error stack tok val start-pos end-pos get-token)
(lambda (stack tok ip get-token) (when debug? (pretty-print stack))
(when debug (pretty-print stack)) (local ((define (remove-input tok val start-pos end-pos)
(letrec ((remove-input (if (memq tok ends)
(lambda ()
(if (memq (token-name tok) ends)
(raise-read-error "parser: Cannot continue after error" (raise-read-error "parser: Cannot continue after error"
#f #f #f #f #f) #f #f #f #f #f)
(let ((a (find-action stack tok ip))) (let ((a (find-action stack tok val start-pos end-pos)))
(cond (cond
((shift? a) ((shift? a)
;;(printf "shift:~a~n" (shift-state a)) ;;(printf "shift:~a~n" (shift-state a))
(cons (if src-pos (cons (make-stack-frame (shift-state a)
(make-stack-frame (shift-state a) val
(token-value tok) start-pos
(cadr ip) end-pos)
(caddr ip))
(make-stack-frame (shift-state a)
(token-value tok)
#f
#f))
stack)) stack))
(else (else
;;(printf "discard input:~a~n" tok) ;;(printf "discard input:~a~n" tok)
(set! ip (get-token)) (let-values (((tok val start-pos end-pos)
(set! tok (input->token ip)) (extract (get-token))))
(remove-input))))))) (remove-input tok val start-pos end-pos))))))))
(remove-states (let remove-states ()
(lambda () (let ((a (find-action stack 'error #f start-pos end-pos)))
(let ((a (find-action stack (make-token 'error #f) ip)))
(cond (cond
((shift? a) ((shift? a)
;;(printf "shift:~a~n" (shift-state a)) ;;(printf "shift:~a~n" (shift-state a))
(set! stack (set! stack
(cons (cons
(if src-pos
(make-stack-frame (shift-state a)
#f
(cadr ip)
(caddr ip))
(make-stack-frame (shift-state a) (make-stack-frame (shift-state a)
#f #f
#f start-pos
#f)) end-pos)
stack)) stack))
(remove-input)) (remove-input))
(else (else
@ -268,45 +263,37 @@
(else (else
(set! stack (cdr stack)) (set! stack (cdr stack))
(remove-states))))))))) (remove-states)))))))))
(remove-states))))
(find-action (define (find-action stack tok val start-pos end-pos)
(lambda (stack tok ip)
(let ((token-index (hash-table-get term-sym->index (let ((token-index (hash-table-get term-sym->index
(token-name tok) tok
false-thunk))) (lambda () #f))))
(if token-index (if token-index
(array2d-ref table (array2d-ref table
(stack-frame-state (car stack)) (stack-frame-state (car stack))
token-index) token-index)
(begin (begin
(if src-pos (if src-pos
(err #f (token-name tok) (token-value tok) (cadr ip) (caddr ip)) (err #f tok val start-pos end-pos)
(err #f (token-name tok) (token-value tok))) (err #f tok val))
(raise-read-error (format "parser: got token of unknown type ~a" (token-name tok)) (raise-read-error (format "parser: got token of unknown type ~a" tok)
#f #f #f #f #f)))))) #f #f #f #f #f)))))
(make-parser (define (make-parser start-number)
(lambda (start-number)
(lambda (get-token) (lambda (get-token)
(let parsing-loop ((stack (make-empty-stack start-number)) (let parsing-loop ((stack (make-empty-stack start-number))
(ip (get-token))) (ip (get-token)))
(let* ((tok (input->token ip)) (let-values (((tok val start-pos end-pos)
(action (find-action stack tok ip))) (extract ip)))
(let ((action (find-action stack tok val start-pos end-pos)))
(cond (cond
((shift? action) ((shift? action)
;; (printf "shift:~a~n" (shift-state action)) ;; (printf "shift:~a~n" (shift-state action))
(let ((val (token-value tok))) (parsing-loop (cons (make-stack-frame (shift-state action)
(parsing-loop (cons (if src-pos
(make-stack-frame (shift-state action)
val
(cadr ip)
(caddr ip))
(make-stack-frame (shift-state action)
val val
#f start-pos
#f)) end-pos)
stack) stack)
(get-token)))) (get-token)))
((reduce? action) ((reduce? action)
;; (printf "reduce:~a~n" (reduce-prod-num action)) ;; (printf "reduce:~a~n" (reduce-prod-num action))
(let-values (((new-stack args) (let-values (((new-stack args)
@ -322,9 +309,9 @@
(make-stack-frame (make-stack-frame
goto goto
(apply (vector-ref actions (reduce-prod-num action)) args) (apply (vector-ref actions (reduce-prod-num action)) args)
(if (null? args) (cadr ip) (cadr args)) (if (null? args) start-pos (cadr args))
(if (null? args) (if (null? args)
(caddr ip) end-pos
(list-ref args (- (* (reduce-rhs-length action) 3) 1)))) (list-ref args (- (* (reduce-rhs-length action) 3) 1))))
(make-stack-frame (make-stack-frame
goto goto
@ -338,9 +325,10 @@
(stack-frame-value (car stack))) (stack-frame-value (car stack)))
(else (else
(if src-pos (if src-pos
(err #t (token-name tok) (token-value tok) (cadr ip) (caddr ip)) (err #t tok val start-pos end-pos)
(err #t (token-name tok) (token-value tok))) (err #t tok val))
(parsing-loop (fix-error stack tok ip get-token) (get-token)))))))))) (parsing-loop (fix-error stack tok val start-pos end-pos get-token)
(get-token))))))))))
(cond (cond
((null? (cdr starts)) (make-parser 0)) ((null? (cdr starts)) (make-parser 0))
(else (else

Loading…
Cancel
Save