*** empty log message ***

original commit: 6140639bdc5fbe802148e163b44a0dbe6a6d5f94
tokens
Scott Owens 21 years ago
parent 24f3240710
commit 596cdd18f7

@ -0,0 +1,24 @@
(module lex-plt-v200 mzscheme
(require (lib "lex.ss" "parser-tools")
(prefix : (lib "lex-sre.ss" "parser-tools")))
(provide epsilon
~
(rename :* *)
(rename :+ +)
(rename :? ?)
(rename :or :)
(rename :& &)
(rename :: @)
(rename :~ ^)
(rename :/ -))
(define-lex-trans epsilon
(syntax-rules ()
((_) "")))
(define-lex-trans ~
(syntax-rules ()
((_ re) (complement re)))))

@ -13,7 +13,8 @@
&
~
(rename sre-- -)
(rename sre-/ /)/-only-chars)
(rename sre-/ /)
/-only-chars)
(define-lex-trans sre-*
(syntax-rules ()
@ -72,6 +73,7 @@
((_ re ...)
(char-complement (union re ...)))))
;; char-set difference
(define-lex-trans (sre-- stx)
(syntax-case stx ()
((_)
@ -79,7 +81,7 @@
"must have at least one argument"
stx))
((_ big-re re ...)
(syntax (intersect big-re (complement (union re) ...))))))
(syntax (intersect big-re (~ (union re) ...))))))
(define-lex-trans (sre-/ stx)
(syntax-case stx ()

@ -11,14 +11,23 @@
(require (lib "readerr.ss" "syntax")
(lib "cffi.ss" "compiler")
"private-lex/token.ss")
(provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs define-lex-trans
position-offset position-line position-col position?
define-tokens define-empty-tokens token-name token-value token? file-path
;; Dealing with tokens and related structures
define-tokens define-empty-tokens token-name token-value token?
(struct position (offset line col))
(struct position-token (token start-pos end-pos))
;; File path for highlighting errors while lexing
file-path
;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4.
any-char any-string nothing alphabetic lower-case upper-case title-case
numeric symbolic punctuation graphic whitespace blank iso-control
;; A regular expression operator
char-set)
(define file-path (make-parameter #f))
@ -260,18 +269,16 @@
(cond
(wrap?
(let/ec ret
(list (action first-pos end-pos value ret ip)
first-pos
end-pos)))
(make-position-token (action first-pos end-pos value ret ip)
first-pos
end-pos)))
(else
(action first-pos end-pos value id ip)))))
(define-struct position (offset line col))
(define (get-position ip)
(let-values (((line col off) (port-next-location ip)))
(make-position off line col)))
(define-syntax (create-unicode-abbrevs stx)
(syntax-case stx ()
((_ ctxt)

@ -1,4 +1,3 @@
#cs
(module token-syntax mzscheme
;; The things needed at compile time to handle definition of tokens

@ -1,13 +1,44 @@
#cs
(module token mzscheme
(require-for-syntax "token-syntax.ss")
;; Defining tokens
(provide define-tokens define-empty-tokens make-token token-name token-value token?)
(provide define-tokens define-empty-tokens make-token token?
(protect (rename token-name real-token-name))
(protect (rename token-value real-token-value))
(rename token-name* token-name)
(rename token-value* token-value)
(struct position (offset line col))
(struct position-token (token start-pos end-pos)))
(define-struct token (name value) (make-inspector))
;; A token is either
;; - symbol
;; - (make-token symbol any)
(define-struct token (name value))
;; token-name*: token -> symbol
(define (token-name* t)
(cond
((symbol? t) t)
((token? t) (token-name t))
(else (raise-type-error
'token-name
"symbol or struct:token"
0
t))))
;; token-value*: token -> any
(define (token-value* t)
(cond
((symbol? t) #f)
((token? t) (token-value t))
(else (raise-type-error
'token-value
"symbol or struct:token"
0
t))))
(define-syntaxes (define-tokens define-empty-tokens)
(let ((define-tokens-helper
@ -36,7 +67,9 @@
n
n)
,@(if empty? '() '(x)))
(make-token ',n ,(if empty? #f 'x))))
,(if empty?
`',n
`(make-token ',n x))))
(syntax->list (syntax (terms ...)))))
stx))
((_ ...)
@ -47,5 +80,8 @@
(values
(lambda (stx) (define-tokens-helper stx #f))
(lambda (stx) (define-tokens-helper stx #t)))))
)
(define-struct position (offset line col))
(define-struct position-token (token start-pos end-pos))
)

@ -1,7 +1,6 @@
;; Constructs to create and access grammars, the internal
;; representation of the input to the parser generator.
#cs
(module grammar mzscheme
(require (lib "class.ss")

@ -1,4 +1,3 @@
#cs
(module graph mzscheme
(provide digraph)

@ -1,4 +1,3 @@
#cs
(module input-file-parser mzscheme
;; routines for parsing the input to the parser generator and producing a

@ -1,4 +1,3 @@
#cs
(module lalr mzscheme
;; Compute LALR lookaheads from DeRemer and Pennello 1982

@ -1,4 +1,3 @@
#cs
(module lr0 mzscheme
;; Handle the LR0 automaton

@ -1,4 +1,3 @@
#cs
(module parser-actions mzscheme
;; The entries into the action table

@ -1,4 +1,3 @@
#cs
(module parser-builder mzscheme
(require "input-file-parser.ss"

@ -1,4 +1,3 @@
#cs
(module table mzscheme
;; Routine to build the LALR table

@ -1,4 +1,3 @@
#cs
(module yacc-helper mzscheme
(require (lib "list.ss")

@ -1,5 +1,6 @@
(module yacc-to-scheme mzscheme
(require (lib "lex.ss" "parser-tools")
(prefix : (lib "lex-sre.ss" "parser-tools"))
(lib "yacc.ss" "parser-tools")
(lib "readerr.ss" "syntax")
(lib "list.ss"))
@ -7,24 +8,24 @@
(define match-double-string
(lexer
((^ #\" #\\) (cons (car (string->list lexeme))
(match-double-string input-port)))
((@ #\\ (- #\000 #\377)) (cons (string-ref lexeme 1) (match-double-string input-port)))
((:* (:~ #\" #\\)) (append (string->list lexeme)
(match-double-string input-port)))
((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port)))
(#\" null)))
(define match-single-string
(lexer
((^ #\' #\\) (cons (car (string->list lexeme))
(match-single-string input-port)))
((@ #\\ (- #\000 #\377)) (cons (string-ref lexeme 1) (match-single-string input-port)))
((:* (:~ #\' #\\)) (append (string->list lexeme)
(match-single-string input-port)))
((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port)))
(#\' null)))
(define-lex-abbrevs
(letter (: (- "a" "z") (- "A" "Z")))
(digit (- "0" "9"))
(initial (: letter "!" "$" "%" "&" "*" "/" "<" "=" ">" "?" "^" "_" "~" "@"))
(subsequent (: initial digit "+" "-" "." "@"))
(comment (@ "/*" (* (: (^ "*") (@ "*" (^ "/")))) "*/")))
(letter (:or (:/ "a" "z") (:/ "A" "Z")))
(digit (:/ "0" "9"))
(initial (:or letter (char-set "!$%&*/<=>?^_~@")))
(subsequent (:or initial digit (char-set "+-.@")))
(comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/")))
(define-empty-tokens x
(EOF PIPE |:| SEMI |%%| %prec))
@ -34,14 +35,15 @@
(define get-token-grammar
(lexer-src-pos
("%%" '|%%|)
((: ":") (string->symbol lexeme))
(":" (string->symbol lexeme))
("%prec" (string->symbol lexeme))
(#\| 'PIPE)
((+ (: #\newline #\tab " " comment (@ "{" (* (^ "}")) "}"))) (return-without-pos (get-token-grammar input-port)))
((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}")))
(return-without-pos (get-token-grammar input-port)))
(#\; 'SEMI)
(#\' (token-STRING (string->symbol (list->string (match-single-string input-port)))))
(#\" (token-STRING (string->symbol (list->string (match-double-string input-port)))))
((@ initial (* subsequent)) (token-SYM (string->symbol lexeme)))))
((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme)))))
(define (parse-grammar enter-term enter-empty-term enter-non-term)
(parser

@ -1,4 +1,3 @@
#cs
(module yacc mzscheme
(require-for-syntax "private-yacc/parser-builder.ss"
@ -6,6 +5,7 @@
(require "private-yacc/array2d.ss"
"private-lex/token.ss"
"private-yacc/parser-actions.ss"
(lib "etc.ss")
(lib "pretty.ss")
(lib "readerr.ss" "syntax"))
@ -178,135 +178,122 @@
(reduce-stack (cdr stack) (sub1 num) ret-vals src-pos)))
(else (values stack ret-vals))))
(define-struct stack-frame (state value start-pos end-pos) (make-inspector))
;; extract-helper : (symbol or make-token) any any -> symbol any any any
(define (extract-helper tok v1 v2)
(cond
((symbol? tok)
(values tok #f v1 v2))
((token? tok)
(values (real-token-name tok) (real-token-value tok) v1 v2))
(else (raise-type-error 'parser
"symbol or struct:token"
0
tok))))
(define (make-empty-stack i) (list (make-stack-frame i #f #f #f)))
;; extract-src-pos : position-token -> symbol any any any
(define (extract-src-pos ip)
(cond
((position-token? ip)
(extract-helper (position-token-token ip)
(position-token-start-pos ip)
(position-token-end-pos ip)))
(else
(raise-type-error 'parser
"struct:position-token"
0
ip))))
(define (false-thunk) #f)
;; extract-no-src-pos : (symbol or make-token) -> symbol any any any
(define (extract-no-src-pos ip)
(extract-helper ip #f #f))
(define-struct stack-frame (state value start-pos end-pos))
(define (make-empty-stack i) (list (make-stack-frame i #f #f #f)))
;; The table format is an array2d that maps each state/term pair to either
;; an accept, shift or reduce structure - or a #f. Except that we will encode
;; by changing (make-accept) -> 'accept, (make-shift i) -> i and
;; (make-reduce i1 i2 i3) -> #(i1 i2 i3)
(define (parser-body debug err starts ends table term-sym->index actions src-pos)
(letrec ((input->token
(define (parser-body debug? err starts ends table term-sym->index actions src-pos)
(local ((define extract
(if src-pos
(lambda (ip)
(cond
((and (list? ip) (= 3 (length ip)))
(let ((tok (car ip)))
(cond
((symbol? tok) (make-token tok #f))
((token? tok) tok)
(else (raise-type-error 'parser
"(list (token or symbol) position position)"
0
ip)))))
(else
(raise-type-error 'parser
"(list (token or symbol) position position)"
0
ip))))
(lambda (ip)
(cond
((symbol? ip) (make-token ip #f))
((token? ip) ip)
(else (raise-type-error 'parser "token or symbol" 0 ip))))))
extract-src-pos
extract-no-src-pos))
(fix-error
(lambda (stack tok ip get-token)
(when debug (pretty-print stack))
(letrec ((remove-input
(lambda ()
(if (memq (token-name tok) ends)
(raise-read-error "parser: Cannot continue after error"
#f #f #f #f #f)
(let ((a (find-action stack tok ip)))
(cond
((shift? a)
;;(printf "shift:~a~n" (shift-state a))
(cons (if src-pos
(make-stack-frame (shift-state a)
(token-value tok)
(cadr ip)
(caddr ip))
(make-stack-frame (shift-state a)
(token-value tok)
#f
#f))
stack))
(else
;;(printf "discard input:~a~n" tok)
(set! ip (get-token))
(set! tok (input->token ip))
(remove-input)))))))
(remove-states
(lambda ()
(let ((a (find-action stack (make-token 'error #f) ip)))
(define (fix-error stack tok val start-pos end-pos get-token)
(when debug? (pretty-print stack))
(local ((define (remove-input tok val start-pos end-pos)
(if (memq tok ends)
(raise-read-error "parser: Cannot continue after error"
#f #f #f #f #f)
(let ((a (find-action stack tok val start-pos end-pos)))
(cond
((shift? a)
;;(printf "shift:~a~n" (shift-state a))
(set! stack
(cons
(if src-pos
(make-stack-frame (shift-state a)
#f
(cadr ip)
(caddr ip))
(make-stack-frame (shift-state a)
#f
#f
#f))
stack))
(remove-input))
(cons (make-stack-frame (shift-state a)
val
start-pos
end-pos)
stack))
(else
;;(printf "discard state:~a~n" (car stack))
(cond
((< (length stack) 2)
(raise-read-error "parser: Cannot continue after error"
#f #f #f #f #f))
(else
(set! stack (cdr stack))
(remove-states)))))))))
(remove-states))))
;;(printf "discard input:~a~n" tok)
(let-values (((tok val start-pos end-pos)
(extract (get-token))))
(remove-input tok val start-pos end-pos))))))))
(let remove-states ()
(let ((a (find-action stack 'error #f start-pos end-pos)))
(cond
((shift? a)
;;(printf "shift:~a~n" (shift-state a))
(set! stack
(cons
(make-stack-frame (shift-state a)
#f
start-pos
end-pos)
stack))
(remove-input))
(else
;;(printf "discard state:~a~n" (car stack))
(cond
((< (length stack) 2)
(raise-read-error "parser: Cannot continue after error"
#f #f #f #f #f))
(else
(set! stack (cdr stack))
(remove-states)))))))))
(find-action
(lambda (stack tok ip)
(let ((token-index (hash-table-get term-sym->index
(token-name tok)
false-thunk)))
(if token-index
(array2d-ref table
(stack-frame-state (car stack))
token-index)
(begin
(if src-pos
(err #f (token-name tok) (token-value tok) (cadr ip) (caddr ip))
(err #f (token-name tok) (token-value tok)))
(raise-read-error (format "parser: got token of unknown type ~a" (token-name tok))
#f #f #f #f #f))))))
(make-parser
(lambda (start-number)
(lambda (get-token)
(let parsing-loop ((stack (make-empty-stack start-number))
(ip (get-token)))
(let* ((tok (input->token ip))
(action (find-action stack tok ip)))
(define (find-action stack tok val start-pos end-pos)
(let ((token-index (hash-table-get term-sym->index
tok
(lambda () #f))))
(if token-index
(array2d-ref table
(stack-frame-state (car stack))
token-index)
(begin
(if src-pos
(err #f tok val start-pos end-pos)
(err #f tok val))
(raise-read-error (format "parser: got token of unknown type ~a" tok)
#f #f #f #f #f)))))
(define (make-parser start-number)
(lambda (get-token)
(let parsing-loop ((stack (make-empty-stack start-number))
(ip (get-token)))
(let-values (((tok val start-pos end-pos)
(extract ip)))
(let ((action (find-action stack tok val start-pos end-pos)))
(cond
((shift? action)
;; (printf "shift:~a~n" (shift-state action))
(let ((val (token-value tok)))
(parsing-loop (cons (if src-pos
(make-stack-frame (shift-state action)
val
(cadr ip)
(caddr ip))
(make-stack-frame (shift-state action)
val
#f
#f))
stack)
(get-token))))
(parsing-loop (cons (make-stack-frame (shift-state action)
val
start-pos
end-pos)
stack)
(get-token)))
((reduce? action)
;; (printf "reduce:~a~n" (reduce-prod-num action))
(let-values (((new-stack args)
@ -322,9 +309,9 @@
(make-stack-frame
goto
(apply (vector-ref actions (reduce-prod-num action)) args)
(if (null? args) (cadr ip) (cadr args))
(if (null? args) start-pos (cadr args))
(if (null? args)
(caddr ip)
end-pos
(list-ref args (- (* (reduce-rhs-length action) 3) 1))))
(make-stack-frame
goto
@ -338,9 +325,10 @@
(stack-frame-value (car stack)))
(else
(if src-pos
(err #t (token-name tok) (token-value tok) (cadr ip) (caddr ip))
(err #t (token-name tok) (token-value tok)))
(parsing-loop (fix-error stack tok ip get-token) (get-token))))))))))
(err #t tok val start-pos end-pos)
(err #t tok val))
(parsing-loop (fix-error stack tok val start-pos end-pos get-token)
(get-token))))))))))
(cond
((null? (cdr starts)) (make-parser 0))
(else

Loading…
Cancel
Save