From 9b8e0f96c89fb830acaf75d2f3b6241ca5dffa5f Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Thu, 7 Jun 2018 18:00:51 -0700 Subject: [PATCH] rebase on fork of parser-tools --- .../LICENSE.txt | 0 .../br-parser-tools/br-parser-tools.scrbl | 80 ++-- br-parser-tools-doc/br-parser-tools/info.rkt | 3 + .../info.rkt | 6 +- .../LICENSE.txt | 0 .../br-parser-tools}/cfg-parser.rkt | 145 ++---- .../br-parser-tools}/examples/calc.rkt | 6 +- .../br-parser-tools}/examples/read.rkt | 6 +- .../br-parser-tools}/info.rkt | 0 .../br-parser-tools}/lex-plt-v200.rkt | 4 +- .../br-parser-tools}/lex-sre.rkt | 2 +- br-parser-tools-lib/br-parser-tools/lex.rkt | 369 ++++++++++++++++ .../br-parser-tools}/private-lex/actions.rkt | 8 +- .../br-parser-tools}/private-lex/deriv.rkt | 0 .../private-lex/error-tests.rkt | 0 .../br-parser-tools}/private-lex/front.rkt | 0 .../br-parser-tools}/private-lex/re.rkt | 2 +- .../br-parser-tools}/private-lex/stx.rkt | 2 +- .../private-lex/token-syntax.rkt | 0 .../br-parser-tools}/private-lex/token.rkt | 5 +- .../private-lex/unicode-chars.rkt | 0 .../br-parser-tools}/private-lex/util.rkt | 0 .../br-parser-tools}/private-yacc/grammar.rkt | 0 .../br-parser-tools}/private-yacc/graph.rkt | 0 .../private-yacc/input-file-parser.rkt | 0 .../br-parser-tools}/private-yacc/lalr.rkt | 0 .../br-parser-tools}/private-yacc/lr0.rkt | 0 .../private-yacc/parser-actions.rkt | 0 .../private-yacc/parser-builder.rkt | 0 .../br-parser-tools}/private-yacc/table.rkt | 0 .../private-yacc/yacc-helper.rkt | 0 .../br-parser-tools}/yacc-to-scheme.rkt | 6 +- br-parser-tools-lib/br-parser-tools/yacc.rkt | 412 ++++++++++++++++++ .../info.rkt | 2 +- {parser-tools => br-parser-tools}/LICENSE.txt | 0 br-parser-tools/info.rkt | 12 + parser-tools-doc/parser-tools/info.rkt | 3 - parser-tools-lib/parser-tools/lex.rkt | 393 ----------------- parser-tools-lib/parser-tools/yacc.rkt | 396 ----------------- parser-tools/info.rkt | 12 - 40 files changed, 919 insertions(+), 955 deletions(-) rename {parser-tools-doc => br-parser-tools-doc}/LICENSE.txt (100%) rename parser-tools-doc/parser-tools/parser-tools.scrbl => br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl (91%) create mode 100644 br-parser-tools-doc/br-parser-tools/info.rkt rename {parser-tools-doc => br-parser-tools-doc}/info.rkt (61%) rename {parser-tools-lib => br-parser-tools-lib}/LICENSE.txt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/cfg-parser.rkt (91%) mode change 100644 => 100755 rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/examples/calc.rkt (95%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/examples/read.rkt (98%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/info.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/lex-plt-v200.rkt (84%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/lex-sre.rkt (98%) create mode 100644 br-parser-tools-lib/br-parser-tools/lex.rkt rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-lex/actions.rkt (77%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-lex/deriv.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-lex/error-tests.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-lex/front.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-lex/re.rkt (99%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-lex/stx.rkt (99%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-lex/token-syntax.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-lex/token.rkt (94%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-lex/unicode-chars.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-lex/util.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-yacc/grammar.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-yacc/graph.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-yacc/input-file-parser.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-yacc/lalr.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-yacc/lr0.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-yacc/parser-actions.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-yacc/parser-builder.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-yacc/table.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/private-yacc/yacc-helper.rkt (100%) rename {parser-tools-lib/parser-tools => br-parser-tools-lib/br-parser-tools}/yacc-to-scheme.rkt (97%) create mode 100644 br-parser-tools-lib/br-parser-tools/yacc.rkt rename {parser-tools-lib => br-parser-tools-lib}/info.rkt (70%) rename {parser-tools => br-parser-tools}/LICENSE.txt (100%) create mode 100644 br-parser-tools/info.rkt delete mode 100644 parser-tools-doc/parser-tools/info.rkt delete mode 100644 parser-tools-lib/parser-tools/lex.rkt delete mode 100644 parser-tools-lib/parser-tools/yacc.rkt delete mode 100644 parser-tools/info.rkt diff --git a/parser-tools-doc/LICENSE.txt b/br-parser-tools-doc/LICENSE.txt similarity index 100% rename from parser-tools-doc/LICENSE.txt rename to br-parser-tools-doc/LICENSE.txt diff --git a/parser-tools-doc/parser-tools/parser-tools.scrbl b/br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl similarity index 91% rename from parser-tools-doc/parser-tools/parser-tools.scrbl rename to br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl index b8657cf..7e0684c 100644 --- a/parser-tools-doc/parser-tools/parser-tools.scrbl +++ b/br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl @@ -2,14 +2,14 @@ @(require scribble/manual scribble/struct scribble/xref scribble/bnf (for-label scheme/base scheme/contract - parser-tools/lex - (prefix-in : parser-tools/lex-sre) - parser-tools/yacc - parser-tools/cfg-parser)) + br-parser-tools/lex + (prefix-in : br-parser-tools/lex-sre) + br-parser-tools/yacc + br-parser-tools/cfg-parser)) -@title{Parser Tools: @exec{lex} and @exec{yacc}-style Parsing} +@title{Parser Tools: @exec{lex} and @exec{yacc}-style Parsing (Beautiful Racket edition)} -@author["Scott Owens"] +@author["Scott Owens (99%)" "Matthew Butterick (1%)"] This documentation assumes familiarity with @exec{lex} and @exec{yacc} style lexer and parser generators. @@ -24,7 +24,7 @@ style lexer and parser generators. @section-index["scanning"] @section-index["scanner"] -@defmodule[parser-tools/lex] +@defmodule[br-parser-tools/lex] @; ---------------------------------------- @@ -59,7 +59,7 @@ style lexer and parser generators. @margin-note{The implementation of @racketmodname[syntax-color/racket-lexer] contains a lexer for the @racketmodname[racket] language. In addition, files in the @filepath{examples} sub-directory - of the @filepath{parser-tools} collection contain + of the @filepath{br-parser-tools} collection contain simpler example lexers.} An @racket[re] is matched as follows: @@ -67,7 +67,7 @@ style lexer and parser generators. @itemize[ @item{@racket[id] --- expands to the named @deftech{lexer abbreviation}; abbreviations are defined via @racket[define-lex-abbrev] or supplied by modules - like @racketmodname[parser-tools/lex-sre].} + like @racketmodname[br-parser-tools/lex-sre].} @item{@racket[string] --- matches the sequence of characters in @racket[string].} @item{@racket[character] --- matches a literal @racket[character].} @item{@racket[(repetition lo hi re)] --- matches @racket[re] repeated between @racket[lo] @@ -92,15 +92,15 @@ empty string, @racket[(union)] matches nothing, The regular expression language is not designed to be used directly, but rather as a basis for a user-friendly notation written with regular expression macros. For example, -@racketmodname[parser-tools/lex-sre] supplies operators from Olin -Shivers's SREs, and @racketmodname[parser-tools/lex-plt-v200] supplies +@racketmodname[br-parser-tools/lex-sre] supplies operators from Olin +Shivers's SREs, and @racketmodname[br-parser-tools/lex-plt-v200] supplies (deprecated) operators from the previous version of this library. Since those libraries provide operators whose names match other Racket bindings, such as @racket[*] and @racket[+], they normally must be imported using a prefix: @racketblock[ -(require (prefix-in : parser-tools/lex-sre)) +(require (prefix-in : br-parser-tools/lex-sre)) ] The suggested prefix is @racket[:], so that @racket[:*] and @@ -167,14 +167,14 @@ are a few examples, using @racket[:] prefixed SRE syntax: @item{@racket[input-port] --- the input-port being processed (this is useful for matching input with multiple lexers).} - @item{@racket[(return-without-pos x)] is a function (continuation) that - immediately returns the value of @racket[x] from the lexer. This useful - in a src-pos lexer to prevent the lexer from adding source + @item{@racket[(return-without-pos x)] and @racket[(return-without-srcloc x)] are functions (continuations) that + immediately return the value of @racket[x] from the lexer. This useful + in a src-pos or src-loc lexer to prevent the lexer from adding source information. For example: @racketblock[ (define get-token - (lexer-src-pos + (lexer-srcloc ... ((comment) (get-token input-port)) ...)) @@ -182,12 +182,12 @@ are a few examples, using @racket[:] prefixed SRE syntax: would wrap the source location information for the comment around the value of the recursive call. Using - @racket[((comment) (return-without-pos (get-token input-port)))] + @racket[((comment) (return-without-srcloc (get-token input-port)))] will cause the value of the recursive call to be returned without wrapping position around it.} ] - The lexer raises an exception @racket[(exn:read)] if none of the + The lexer raises an @racket[exn:fail:read] exception if none of the regular expressions match the input. Hint: If @racket[(any-char _custom-error-behavior)] is the last rule, then there will always be a match, and @racket[_custom-error-behavior] is executed to @@ -248,12 +248,21 @@ an @racket[action-expr], returns @racket[(make-position-token _action-result start-pos end-pos)] instead of simply @racket[_action-result].} +@defform[(lexer-srcloc (trigger action-expr) ...)]{ + +Like @racket[lexer], but for each @racket[_action-result] produced by +an @racket[action-expr], returns @racket[(make-srcloc-token +_action-result lexeme-srcloc)] instead of simply +@racket[_action-result].} + @deftogether[( @defidform[start-pos] @defidform[end-pos] @defidform[lexeme] +@defidform[lexeme-srcloc] @defidform[input-port] @defidform[return-without-pos] +@defidform[return-without-srcloc] )]{ Use of these names outside of a @racket[lexer] action is a syntax @@ -276,12 +285,21 @@ error.} Lexers created with @racket[lexer-src-pos] return instances of @racket[position-token].} +@defstruct[srcloc-token ([token any/c] + [srcloc srcloc?])]{ + + Lexers created with @racket[lexer-srcloc] return instances of @racket[srcloc-token].} + @defparam[file-path source any/c]{ A parameter that the lexer uses as the source location if it raises a @racket[exn:fail:read] error. Setting this parameter allows DrRacket, for example, to open the file containing the error.} +@defparam[lexer-file-path source any/c]{ + +Alias for @racket[file-path].} + @; ---------------------------------------- @@ -340,14 +358,14 @@ characters, @racket[char-lower-case?] characters, etc.} @subsection{Lexer SRE Operators} -@defmodule[parser-tools/lex-sre] +@defmodule[br-parser-tools/lex-sre] @; Put the docs in a macro, so that we can bound the scope of @; the import of `*', etc.: @(define-syntax-rule (lex-sre-doc) (... (begin - (require (for-label parser-tools/lex-sre)) + (require (for-label br-parser-tools/lex-sre)) @defform[(* re ...)]{ @@ -416,16 +434,16 @@ characters.} @subsection{Lexer Legacy Operators} -@defmodule[parser-tools/lex-plt-v200] +@defmodule[br-parser-tools/lex-plt-v200] @(define-syntax-rule (lex-v200-doc) (... (begin - (require (for-label parser-tools/lex-plt-v200)) + (require (for-label br-parser-tools/lex-plt-v200)) -@t{The @racketmodname[parser-tools/lex-plt-v200] module re-exports +@t{The @racketmodname[br-parser-tools/lex-plt-v200] module re-exports @racket[*], @racket[+], @racket[?], and @racket[&] from - @racketmodname[parser-tools/lex-sre]. It also re-exports + @racketmodname[br-parser-tools/lex-sre]. It also re-exports @racket[:or] as @racket[:], @racket[::] as @racket[|@|], @racket[:~] as @racket[^], and @racket[:/] as @racket[-].} @@ -446,7 +464,7 @@ The same as @racket[(complement re ...)].}))) Each @racket[_action-expr] in a @racket[lexer] form can produce any kind of value, but for many purposes, producing a @deftech{token} value is useful. Tokens are usually necessary for inter-operating with -a parser generated by @racket[parser-tools/parser], but tokens may not +a parser generated by @racket[br-parser-tools/parser], but tokens may not be the right choice when using @racket[lexer] in other situations. @defform[(define-tokens group-id (token-id ...))]{ @@ -492,7 +510,7 @@ be the right choice when using @racket[lexer] in other situations. @section-index["yacc"] -@defmodule[parser-tools/yacc] +@defmodule[br-parser-tools/yacc] @defform/subs[#:literals (grammar tokens start end precs src-pos suppress debug yacc-output prec) @@ -691,9 +709,9 @@ be the right choice when using @racket[lexer] in other situations. @section-index["cfg-parser"] -@defmodule[parser-tools/cfg-parser]{The @racketmodname[parser-tools/cfg-parser] +@defmodule[br-parser-tools/cfg-parser]{The @racketmodname[br-parser-tools/cfg-parser] library provides a parser generator that is an alternative to that of -@racketmodname[parser-tools/yacc].} +@racketmodname[br-parser-tools/yacc].} @defform/subs[#:literals (grammar tokens start end precs src-pos suppress debug yacc-output prec) @@ -710,7 +728,7 @@ library provides a parser generator that is an alternative to that of Creates a parser similar to that of @racket[parser]. Unlike @racket[parser], @racket[cfg-parser], can consume arbitrary and potentially ambiguous context-free - grammars. Its interface is a subset of @racketmodname[parser-tools/yacc], with + grammars. Its interface is a subset of @racketmodname[br-parser-tools/yacc], with the following differences: @itemize[ @@ -730,7 +748,7 @@ library provides a parser generator that is an alternative to that of @section{Converting @exec{yacc} or @exec{bison} Grammars} -@defmodule[parser-tools/yacc-to-scheme] +@defmodule[br-parser-tools/yacc-to-scheme] @defproc[(trans [file path-string?]) any/c]{ @@ -744,7 +762,7 @@ conversion tool. It is not entirely robust. For example, if the C actions in the original grammar have nested blocks, the tool will fail. Annotated examples are in the @filepath{examples} subdirectory of the -@filepath{parser-tools} collection.} +@filepath{br-parser-tools} collection.} @; ---------------------------------------------------------------------- diff --git a/br-parser-tools-doc/br-parser-tools/info.rkt b/br-parser-tools-doc/br-parser-tools/info.rkt new file mode 100644 index 0000000..f219d03 --- /dev/null +++ b/br-parser-tools-doc/br-parser-tools/info.rkt @@ -0,0 +1,3 @@ +#lang info + +(define scribblings '(("br-parser-tools.scrbl" (multi-page) (parsing-library)))) diff --git a/parser-tools-doc/info.rkt b/br-parser-tools-doc/info.rkt similarity index 61% rename from parser-tools-doc/info.rkt rename to br-parser-tools-doc/info.rkt index a9c1254..8760588 100644 --- a/parser-tools-doc/info.rkt +++ b/br-parser-tools-doc/info.rkt @@ -5,10 +5,10 @@ (define build-deps '("scheme-lib" "racket-doc" "syntax-color-doc" - "parser-tools-lib" + "br-parser-tools-lib" "scribble-lib")) -(define update-implies '("parser-tools-lib")) +(define update-implies '("br-parser-tools-lib")) -(define pkg-desc "documentation part of \"parser-tools\"") +(define pkg-desc "documentation part of \"br-parser-tools\"") (define pkg-authors '(mflatt)) diff --git a/parser-tools-lib/LICENSE.txt b/br-parser-tools-lib/LICENSE.txt similarity index 100% rename from parser-tools-lib/LICENSE.txt rename to br-parser-tools-lib/LICENSE.txt diff --git a/parser-tools-lib/parser-tools/cfg-parser.rkt b/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt old mode 100644 new mode 100755 similarity index 91% rename from parser-tools-lib/parser-tools/cfg-parser.rkt rename to br-parser-tools-lib/br-parser-tools/cfg-parser.rkt index 07da911..26692a7 --- a/parser-tools-lib/parser-tools/cfg-parser.rkt +++ b/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt @@ -1,5 +1,5 @@ #lang racket/base -;; This module implements a parser form like the parser-tools's +;; This module implements a parser form like the br-parser-tools's ;; `parser', except that it works on an arbitrary CFG (returning ;; the first sucecssful parse). @@ -23,7 +23,7 @@ ;; different lengths. (Otherwise, in the spirit of finding one ;; successful parse, only the first result is kept.) -;; The parser-tools's `parse' is used to transform tokens in the +;; The br-parser-tools's `parse' is used to transform tokens in the ;; grammar to tokens specific to this parser. In other words, this ;; parser uses `parser' so that it doesn't have to know anything about ;; tokens. @@ -31,12 +31,12 @@ -(require parser-tools/yacc - parser-tools/lex) +(require br-parser-tools/yacc + br-parser-tools/lex) (require (for-syntax racket/base syntax/boundmap - parser-tools/private-lex/token-syntax)) + br-parser-tools/private-lex/token-syntax)) (provide cfg-parser) @@ -489,6 +489,12 @@ (fail-k max-depth tasks))))]) (k end max-depth tasks new-got-k new-fail-k)))]))))) +;; These temp identifiers can't be `gensym` or `generate-temporary` +;; because they have to be consistent between module loads +;; (IIUC, the parser is multi-threaded, and this approach is not thread-safe) +;; so I see no alternative to the old standby of making them ludicrously unlikely +(define-for-syntax start-id-temp 'start_jihqolbbafscgxvsufnepvmxqipnxgmlpxukmdoqxqzmzgaogaftbkbyqjttwwfimifowdxfyekjiixdmtprfkcvfciraehoeuaz) +(define-for-syntax atok-id-temp 'atok_wrutdjgecmybyfipiwsgjlvsveryodlgassuzcargiuznzgdghrykfqfbwcjgzdhdoeqxcucmtjkuyucskzethozhqkasphdwbht) (define-syntax (cfg-parser stx) (syntax-case stx () [(_ clause ...) @@ -704,11 +710,17 @@ [(pos ...) (if src-pos? #'($1-start-pos $1-end-pos) - #'(#f #f))]) - #`(grammar (start [() null] - [(atok start) (cons $1 $2)]) - (atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) - #`(start start) + #'(#f #f))] + ;; rename `start` and `atok` to temp ids + ;; so that "start" and "atok" can be used as literal string tokens in a grammar. + ;; not sure why this works, but it passes all tests. + [%start start-id-temp] + [%atok atok-id-temp]) + #`(grammar (%start [() null] + [(%atok %start) (cons $1 $2)]) + (%atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) + (with-syntax ([%start start-id-temp]) + #`(start %start)) parser-clauses)))] [(grammar . _) (raise-syntax-error @@ -744,37 +756,31 @@ val (next success-k fail-k max-depth tasks)))] [fail-k (lambda (max-depth tasks) - (define (call-error-proc tok-ok? tok-name tok-value start-pos end-pos) - (cond - [(procedure-arity-includes? error-proc 5) - (error-proc tok-ok? tok-name tok-value start-pos end-pos)] - [else - (error-proc tok-ok? tok-name tok-value)])) (cond - [(null? tok-list) - (if error-proc - (call-error-proc #t - 'no-tokens - #f - (make-position #f #f #f) - (make-position #f #f #f)) - (error - 'cfg-parse - "no tokens"))] - [else - (let ([bad-tok (list-ref tok-list - (min (sub1 (length tok-list)) - max-depth))]) - (if error-proc - (call-error-proc #t - (tok-orig-name bad-tok) - (tok-val bad-tok) - (tok-start bad-tok) - (tok-end bad-tok)) - (error - 'cfg-parse - "failed at ~a" - (tok-val bad-tok))))]))]) + [(null? tok-list) + (if error-proc + (error-proc #t + 'no-tokens + #f + (make-position #f #f #f) + (make-position #f #f #f)) + (error + 'cfg-parse + "no tokens"))] + [else + (let ([bad-tok (list-ref tok-list + (min (sub1 (length tok-list)) + max-depth))]) + (if error-proc + (error-proc #t + (tok-orig-name bad-tok) + (tok-val bad-tok) + (tok-start bad-tok) + (tok-end bad-tok)) + (error + 'cfg-parse + "failed at ~a" + (tok-val bad-tok))))]))]) (#,start tok-list ;; we simulate a token at the very beginning with zero width ;; for use with the position-generating code (*-start-pos, *-end-pos). @@ -803,9 +809,8 @@ (module* test racket/base (require (submod "..") - parser-tools/lex + br-parser-tools/lex racket/block - racket/generator rackunit) ;; Test: parsing regular expressions. @@ -854,61 +859,7 @@ 1 13) 1 13))) - - ;; Check that cfg-parser can accept error functions of 3 arguments: - (block - (define-tokens non-terminals (ONE ZERO EOF)) - (define parse - (cfg-parser (tokens non-terminals) - (start ones) - (end EOF) - (error (lambda (tok-ok tok-name tok-val) - (error (format "~a ~a ~a" tok-ok tok-name tok-val)))) - (grammar [ones [() null] - [(ONE ones) (cons $1 $2)]]))) - (define (sequence->tokenizer s) - (define-values (more? next) (sequence-generate s)) - (lambda () - (cond [(more?) (next)] - [else (token-EOF 'eof)]))) - (check-exn #rx"#t ZERO zero" - (lambda () (parse (sequence->tokenizer (list (token-ZERO "zero"))))))) - - - - - ;; Check that cfg-parser can accept error functions of 5 arguments: - (block - (define-tokens non-terminals (ONE ZERO EOF)) - (define parse - (cfg-parser (tokens non-terminals) - (start ones) - (src-pos) - (end EOF) - (error (lambda (tok-ok tok-name tok-val start-pos end-pos) - (error (format "~a ~a ~a ~a ~a" - tok-ok tok-name tok-val - (position-offset start-pos) - (position-offset end-pos))))) - (grammar [ones [() null] - [(ONE ones) (cons $1 $2)]]))) - (define (sequence->tokenizer s) - (define-values (more? next) (sequence-generate s)) - (lambda () - (cond [(more?) (next)] - [else (position-token (token-EOF 'eof) - (position #f #f #f) - (position #f #f #f))]))) - (check-exn #rx"#t ZERO zero 2 3" - (lambda () - (parse - (sequence->tokenizer - (list (position-token - (token-ZERO "zero") - (position 2 2 5) - (position 3 2 6)))))))) - - + diff --git a/parser-tools-lib/parser-tools/examples/calc.rkt b/br-parser-tools-lib/br-parser-tools/examples/calc.rkt similarity index 95% rename from parser-tools-lib/parser-tools/examples/calc.rkt rename to br-parser-tools-lib/br-parser-tools/examples/calc.rkt index 71b9f7f..9ad1218 100644 --- a/parser-tools-lib/parser-tools/examples/calc.rkt +++ b/br-parser-tools-lib/br-parser-tools/examples/calc.rkt @@ -4,9 +4,9 @@ ;; Import the parser and lexer generators. -(require parser-tools/yacc - parser-tools/lex - (prefix-in : parser-tools/lex-sre)) +(require br-parser-tools/yacc + br-parser-tools/lex + (prefix-in : br-parser-tools/lex-sre)) (define-tokens value-tokens (NUM VAR FNCT)) (define-empty-tokens op-tokens (newline = OP CP + - * / ^ EOF NEG)) diff --git a/parser-tools-lib/parser-tools/examples/read.rkt b/br-parser-tools-lib/br-parser-tools/examples/read.rkt similarity index 98% rename from parser-tools-lib/parser-tools/examples/read.rkt rename to br-parser-tools-lib/br-parser-tools/examples/read.rkt index 2dc9475..a10b2c1 100644 --- a/parser-tools-lib/parser-tools/examples/read.rkt +++ b/br-parser-tools-lib/br-parser-tools/examples/read.rkt @@ -4,9 +4,9 @@ (module read mzscheme - (require parser-tools/lex - (prefix : parser-tools/lex-sre) - parser-tools/yacc + (require br-parser-tools/lex + (prefix : br-parser-tools/lex-sre) + br-parser-tools/yacc syntax/readerr) (define-tokens data (DATUM)) diff --git a/parser-tools-lib/parser-tools/info.rkt b/br-parser-tools-lib/br-parser-tools/info.rkt similarity index 100% rename from parser-tools-lib/parser-tools/info.rkt rename to br-parser-tools-lib/br-parser-tools/info.rkt diff --git a/parser-tools-lib/parser-tools/lex-plt-v200.rkt b/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt similarity index 84% rename from parser-tools-lib/parser-tools/lex-plt-v200.rkt rename to br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt index 34b8d16..0cbb175 100644 --- a/parser-tools-lib/parser-tools/lex-plt-v200.rkt +++ b/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt @@ -1,6 +1,6 @@ (module lex-plt-v200 mzscheme - (require parser-tools/lex - (prefix : parser-tools/lex-sre)) + (require br-parser-tools/lex + (prefix : br-parser-tools/lex-sre)) (provide epsilon ~ diff --git a/parser-tools-lib/parser-tools/lex-sre.rkt b/br-parser-tools-lib/br-parser-tools/lex-sre.rkt similarity index 98% rename from parser-tools-lib/parser-tools/lex-sre.rkt rename to br-parser-tools-lib/br-parser-tools/lex-sre.rkt index 28be898..820d090 100644 --- a/parser-tools-lib/parser-tools/lex-sre.rkt +++ b/br-parser-tools-lib/br-parser-tools/lex-sre.rkt @@ -1,5 +1,5 @@ (module lex-sre mzscheme - (require parser-tools/lex) + (require br-parser-tools/lex) (provide (rename sre-* *) (rename sre-+ +) diff --git a/br-parser-tools-lib/br-parser-tools/lex.rkt b/br-parser-tools-lib/br-parser-tools/lex.rkt new file mode 100644 index 0000000..6bb9dd4 --- /dev/null +++ b/br-parser-tools-lib/br-parser-tools/lex.rkt @@ -0,0 +1,369 @@ +#lang racket/base + +;; Provides the syntax used to create lexers and the functions needed to +;; create and use the buffer that the lexer reads from. See docs. + +(require (for-syntax mzlib/list + syntax/stx + syntax/define + syntax/boundmap + "private-lex/util.rkt" + "private-lex/actions.rkt" + "private-lex/front.rkt" + "private-lex/unicode-chars.rkt" + racket/base + racket/promise)) + +(require mzlib/stxparam + syntax/readerr + "private-lex/token.rkt") + +(provide lexer lexer-src-pos lexer-srcloc define-lex-abbrev define-lex-abbrevs define-lex-trans + + ;; Dealing with tokens and related structures + define-tokens define-empty-tokens token-name token-value token? + (struct-out position) + (struct-out position-token) + (struct-out srcloc-token) + + ;; File path for highlighting errors while lexing + file-path + lexer-file-path ;; alternate name + + ;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4. + any-char any-string nothing alphabetic lower-case upper-case title-case + numeric symbolic punctuation graphic whitespace blank iso-control + + ;; A regular expression operator + char-set) + +;; wrap-action: syntax-object src-pos? -> syntax-object +(define-for-syntax (wrap-action action src-loc-style) + (with-syntax ([action-stx + (cond + [(eq? src-loc-style 'lexer-src-pos) + #`(let/ec ret + (syntax-parameterize + ([return-without-pos (make-rename-transformer #'ret)]) + (make-position-token #,action start-pos end-pos)))] + [(eq? src-loc-style 'lexer-srcloc) + #`(let/ec ret + (syntax-parameterize + ([return-without-srcloc (make-rename-transformer #'ret)]) + (make-srcloc-token #,action lexeme-srcloc)))] + [else action])]) + (syntax/loc action + (λ (start-pos-p end-pos-p lexeme-p input-port-p) + (define lexeme-srcloc-p (make-srcloc (object-name input-port-p) + (position-line start-pos-p) + (position-col start-pos-p) + (position-offset start-pos-p) + (and (number? (position-offset end-pos-p)) + (number? (position-offset start-pos-p)) + (- (position-offset end-pos-p) + (position-offset start-pos-p))))) + (syntax-parameterize + ([start-pos (make-rename-transformer #'start-pos-p)] + [end-pos (make-rename-transformer #'end-pos-p)] + [lexeme (make-rename-transformer #'lexeme-p)] + [input-port (make-rename-transformer #'input-port-p)] + [lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)]) + action-stx))))) + +(define-for-syntax (make-lexer-macro caller src-loc-style) + (λ (stx) + (syntax-case stx () + [(_ . RE+ACTS) + (let () + (define spec/re-acts (syntax->list #'RE+ACTS)) + (for/and ([x (in-list spec/re-acts)]) + (syntax-case x () + [(RE ACT) #t] + [else (raise-syntax-error caller "not a regular expression / action pair" stx x)])) + (define eof-act (get-special-action spec/re-acts #'eof #'eof)) + (define spec-act (get-special-action spec/re-acts #'special #'(void))) + (define spec-comment-act (get-special-action spec/re-acts #'special-comment #'#f)) + (define ids (list #'special #'special-comment #'eof)) + (define re-acts (filter (λ (spec/re-act) + (syntax-case spec/re-act () + [((special) act) + (not (ormap + (λ (x) + (and (identifier? #'special) + (module-or-top-identifier=? #'special x))) + ids))] + [_ #t])) spec/re-acts)) + (define names (map (λ (x) (datum->syntax #f (gensym))) re-acts)) + (define acts (map (λ (x) (stx-car (stx-cdr x))) re-acts)) + (define re-actnames (map (λ (re-act name) (list (stx-car re-act) name)) re-acts names)) + (when (null? spec/re-acts) + (raise-syntax-error caller "expected at least one action" stx)) + (define-values (trans start action-names no-look disappeared-uses) (build-lexer re-actnames)) + (when (vector-ref action-names start) ;; Start state is final + (unless (and + ;; All the successor states are final + (vector? (vector-ref trans start)) + (andmap (λ (x) (vector-ref action-names (vector-ref x 2))) + (vector->list (vector-ref trans start))) + ;; Each character has a successor state + (let loop ([check 0] + [nexts (vector->list (vector-ref trans start))]) + (cond + [(null? nexts) #f] + [else + (let ([next (car nexts)]) + (and (= (vector-ref next 0) check) + (let ([next-check (vector-ref next 1)]) + (or (>= next-check max-char-num) + (loop (add1 next-check) (cdr nexts))))))]))) + (eprintf "warning: lexer at ~a can accept the empty string\n" stx))) + (with-syntax ([START-STATE-STX start] + [TRANS-TABLE-STX trans] + [NO-LOOKAHEAD-STX no-look] + [(NAME ...) names] + [(ACT ...) (map (λ (a) (wrap-action a src-loc-style)) acts)] + [(ACT-NAME ...) (vector->list action-names)] + [SPEC-ACT-STX (wrap-action spec-act src-loc-style)] + [HAS-COMMENT-ACT?-STX (if (syntax-e spec-comment-act) #t #f)] + [SPEC-COMMENT-ACT-STX (wrap-action spec-comment-act src-loc-style)] + [EOF-ACT-STX (wrap-action eof-act src-loc-style)]) + (syntax-property + (syntax/loc stx (let ([NAME ACT] ...) + (let ([proc (lexer-body START-STATE-STX + TRANS-TABLE-STX + (vector ACT-NAME ...) + NO-LOOKAHEAD-STX + SPEC-ACT-STX + HAS-COMMENT-ACT?-STX + SPEC-COMMENT-ACT-STX + EOF-ACT-STX)]) + ;; reverse eta to get named procedures: + (λ (port) (proc port))))) + 'disappeared-use disappeared-uses)))]))) + +(define-syntax lexer (make-lexer-macro 'lexer #f)) +(define-syntax lexer-src-pos (make-lexer-macro 'lexer-src-pos 'lexer-src-pos)) +(define-syntax lexer-srcloc (make-lexer-macro 'lexer-srcloc 'lexer-srcloc)) + +(define-syntax (define-lex-abbrev stx) + (syntax-case stx () + [(_ NAME RE) (identifier? #'NAME) + (syntax/loc stx + (define-syntax NAME + (make-lex-abbrev (λ () (quote-syntax RE)))))] + [_ (raise-syntax-error 'define-lex-abbrev "form should be (define-lex-abbrev name re)" stx)])) + +(define-syntax (define-lex-abbrevs stx) + (syntax-case stx () + [(_ . XS) + (with-syntax ([(ABBREV ...) (map + (λ (a) + (syntax-case a () + [(NAME RE) (identifier? #'NAME) + (syntax/loc a (define-lex-abbrev NAME RE))] + [_ (raise-syntax-error + #f + "form should be (define-lex-abbrevs (name re) ...)" + stx + a)])) + (syntax->list #'XS))]) + (syntax/loc stx (begin ABBREV ...)))] + [_ (raise-syntax-error #f "form should be (define-lex-abbrevs (name re) ...)" stx)])) + +(define-syntax (define-lex-trans stx) + (syntax-case stx () + [(_ name-form body-form) + (let-values (((name body) + (normalize-definition #'(define-syntax name-form body-form) #'λ))) + + #`(define-syntax #,name + (let ((func #,body)) + (unless (procedure? func) + (raise-syntax-error 'define-lex-trans "expected a procedure as the transformer, got ~e" func)) + (unless (procedure-arity-includes? func 1) + (raise-syntax-error 'define-lex-trans "expected a procedure that accepts 1 argument as the transformer, got ~e" func)) + (make-lex-trans func))))] + [_ + (raise-syntax-error + #f + "form should be (define-lex-trans name transformer)" + stx)])) + + +(define (get-next-state-helper char min max table) + (cond + [(>= min max) #f] + [else + (define try (quotient (+ min max) 2)) + (define el (vector-ref table try)) + (define r1 (vector-ref el 0)) + (define r2 (vector-ref el 1)) + (cond + [(and (>= char r1) (<= char r2)) (vector-ref el 2)] + [(< char r1) (get-next-state-helper char min try table)] + [else (get-next-state-helper char (add1 try) max table)])])) + + + + +(define (get-next-state char table) + (and table (get-next-state-helper char 0 (vector-length table) table))) + +(define (lexer-body start-state trans-table actions no-lookahead special-action + has-special-comment-action? special-comment-action eof-action) + (letrec ([lexer + (λ (ip) + (let ((first-pos (get-position ip)) + (first-char (peek-char-or-special ip 0))) + ;(printf "(peek-char-or-special port 0) = ~e\n" first-char) + (cond + [(eof-object? first-char) + (do-match ip first-pos eof-action (read-char-or-special ip))] + [(special-comment? first-char) + (read-char-or-special ip) + (cond + (has-special-comment-action? + (do-match ip first-pos special-comment-action #f)) + (else (lexer ip)))] + [(not (char? first-char)) + (do-match ip first-pos special-action (read-char-or-special ip))] + [else + (let lexer-loop ( + ;; current-state + (state start-state) + ;; the character to transition on + (char first-char) + ;; action for the longest match seen thus far + ;; including a match at the current state + (longest-match-action + (vector-ref actions start-state)) + ;; how many bytes precede char + (length-bytes 0) + ;; how many characters have been read + ;; including the one just read + (length-chars 1) + ;; how many characters are in the longest match + (longest-match-length 0)) + (let ([next-state + (cond + [(not (char? char)) #f] + [else (get-next-state (char->integer char) + (vector-ref trans-table state))])]) + (cond + [(not next-state) + (check-match ip first-pos longest-match-length + length-chars longest-match-action)] + [(vector-ref no-lookahead next-state) + (let ((act (vector-ref actions next-state))) + (check-match ip + first-pos + (if act length-chars longest-match-length) + length-chars + (if act act longest-match-action)))] + [else + (let* ([act (vector-ref actions next-state)] + [next-length-bytes (+ (char-utf-8-length char) length-bytes)] + [next-char (peek-char-or-special ip next-length-bytes)]) + #;(printf "(peek-char-or-special port ~e) = ~e\n" + next-length-bytes next-char) + (lexer-loop next-state + next-char + (if act + act + longest-match-action) + next-length-bytes + (add1 length-chars) + (if act + length-chars + longest-match-length)))])))])))]) + (λ (ip) + (unless (input-port? ip) + (raise-argument-error 'lexer "input-port?" 0 ip)) + (lexer ip)))) + +(define (check-match lb first-pos longest-match-length length longest-match-action) + (unless longest-match-action + (let* ([match (read-string length lb)] + [end-pos (get-position lb)]) + (raise-read-error + (format "lexer: No match found in input starting with: ~a" match) + (file-path) + (position-line first-pos) + (position-col first-pos) + (position-offset first-pos) + (- (position-offset end-pos) (position-offset first-pos))))) + (let ([match (read-string longest-match-length lb)]) + ;(printf "(read-string ~e port) = ~e\n" longest-match-length match) + (do-match lb first-pos longest-match-action match))) + +(define file-path (make-parameter #f)) +(define lexer-file-path file-path) + +(define (do-match ip first-pos action value) + #;(printf "(action ~a ~a ~a ~a)\n" + (position-offset first-pos) (position-offset (get-position ip)) value ip) + (action first-pos (get-position ip) value ip)) + +(define (get-position ip) + (define-values (line col off) (port-next-location ip)) + (make-position off line col)) + +(define-syntax (create-unicode-abbrevs stx) + (syntax-case stx () + [(_ CTXT) + (with-syntax ([(RANGES ...) (for/list ([range (in-list (list (force alphabetic-ranges) + (force lower-case-ranges) + (force upper-case-ranges) + (force title-case-ranges) + (force numeric-ranges) + (force symbolic-ranges) + (force punctuation-ranges) + (force graphic-ranges) + (force whitespace-ranges) + (force blank-ranges) + (force iso-control-ranges)))]) + `(union ,@(map (λ (x) + `(char-range ,(integer->char (car x)) + ,(integer->char (cdr x)))) + range)))] + [(NAMES ...) (for/list ([sym (in-list '(alphabetic + lower-case + upper-case + title-case + numeric + symbolic + punctuation + graphic + whitespace + blank + iso-control))]) + (datum->syntax #'CTXT sym #f))]) + #'(define-lex-abbrevs (NAMES RANGES) ...))])) + +(define-lex-abbrev any-char (char-complement (union))) +(define-lex-abbrev any-string (intersection)) +(define-lex-abbrev nothing (union)) +(create-unicode-abbrevs #'here) + +(define-lex-trans (char-set stx) + (syntax-case stx () + [(_ STR) + (string? (syntax-e #'STR)) + (with-syntax ([(CHAR ...) (string->list (syntax-e #'STR))]) + #'(union CHAR ...))])) + +(define-syntax provide-lex-keyword + (syntax-rules () + [(_ ID ...) + (begin + (define-syntax-parameter ID + (make-set!-transformer + (λ (stx) + (raise-syntax-error + 'provide-lex-keyword + (format "use of a lexer keyword (~a) is not in an appropriate lexer action" 'ID) + stx)))) + ... + (provide ID ...))])) + +(provide-lex-keyword start-pos end-pos lexeme lexeme-srcloc input-port return-without-pos return-without-srcloc) diff --git a/parser-tools-lib/parser-tools/private-lex/actions.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt similarity index 77% rename from parser-tools-lib/parser-tools/private-lex/actions.rkt rename to br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt index 6ec0c7f..13f982c 100644 --- a/parser-tools-lib/parser-tools/private-lex/actions.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt @@ -1,4 +1,4 @@ -#lang scheme/base +#lang racket/base (provide (all-defined-out)) (require syntax/stx) @@ -10,7 +10,7 @@ ((null? rules) none) (else (syntax-case (car rules) () - (((special) act) + [((special) ACT) (and (identifier? #'special) (module-or-top-identifier=? (syntax special) which-special)) - (syntax act)) - (_ (get-special-action (cdr rules) which-special none)))))) + #'ACT] + [_ (get-special-action (cdr rules) which-special none)])))) diff --git a/parser-tools-lib/parser-tools/private-lex/deriv.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-lex/deriv.rkt rename to br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt diff --git a/parser-tools-lib/parser-tools/private-lex/error-tests.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-lex/error-tests.rkt rename to br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt diff --git a/parser-tools-lib/parser-tools/private-lex/front.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-lex/front.rkt rename to br-parser-tools-lib/br-parser-tools/private-lex/front.rkt diff --git a/parser-tools-lib/parser-tools/private-lex/re.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt similarity index 99% rename from parser-tools-lib/parser-tools/private-lex/re.rkt rename to br-parser-tools-lib/br-parser-tools/private-lex/re.rkt index 9da62de..b06c3eb 100644 --- a/parser-tools-lib/parser-tools/private-lex/re.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt @@ -172,7 +172,7 @@ ((and (= 0 low) (or (= 0 high) (eq? z r))) e) ((and (= 1 low) (= 1 high)) r) ((and (repeatR? r) - (eq? (repeatR-high r) +inf.0) + (eqv? (repeatR-high r) +inf.0) (or (= 0 (repeatR-low r)) (= 1 (repeatR-low r)))) (build-repeat (* low (repeatR-low r)) diff --git a/parser-tools-lib/parser-tools/private-lex/stx.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt similarity index 99% rename from parser-tools-lib/parser-tools/private-lex/stx.rkt rename to br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt index 1104a87..86f7a70 100644 --- a/parser-tools-lib/parser-tools/private-lex/stx.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt @@ -84,7 +84,7 @@ stx (car arg-list))) (unless (or (and (number? high) (exact? high) (integer? high) (>= high 0)) - (eq? high +inf.0)) + (eqv? high +inf.0)) (raise-syntax-error #f "not a non-negative exact integer or +inf.0" stx diff --git a/parser-tools-lib/parser-tools/private-lex/token-syntax.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-lex/token-syntax.rkt rename to br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt diff --git a/parser-tools-lib/parser-tools/private-lex/token.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt similarity index 94% rename from parser-tools-lib/parser-tools/private-lex/token.rkt rename to br-parser-tools-lib/br-parser-tools/private-lex/token.rkt index 6618a57..27b3458 100644 --- a/parser-tools-lib/parser-tools/private-lex/token.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt @@ -10,7 +10,8 @@ (rename token-name* token-name) (rename token-value* token-value) (struct position (offset line col)) - (struct position-token (token start-pos end-pos))) + (struct position-token (token start-pos end-pos)) + (struct srcloc-token (token srcloc))) ;; A token is either @@ -85,5 +86,7 @@ (define-struct position (offset line col) #f) (define-struct position-token (token start-pos end-pos) #f) + + (define-struct srcloc-token (token srcloc) #f) ) diff --git a/parser-tools-lib/parser-tools/private-lex/unicode-chars.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-lex/unicode-chars.rkt rename to br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt diff --git a/parser-tools-lib/parser-tools/private-lex/util.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-lex/util.rkt rename to br-parser-tools-lib/br-parser-tools/private-lex/util.rkt diff --git a/parser-tools-lib/parser-tools/private-yacc/grammar.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-yacc/grammar.rkt rename to br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt diff --git a/parser-tools-lib/parser-tools/private-yacc/graph.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-yacc/graph.rkt rename to br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt diff --git a/parser-tools-lib/parser-tools/private-yacc/input-file-parser.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-yacc/input-file-parser.rkt rename to br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt diff --git a/parser-tools-lib/parser-tools/private-yacc/lalr.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-yacc/lalr.rkt rename to br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt diff --git a/parser-tools-lib/parser-tools/private-yacc/lr0.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-yacc/lr0.rkt rename to br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt diff --git a/parser-tools-lib/parser-tools/private-yacc/parser-actions.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-yacc/parser-actions.rkt rename to br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt diff --git a/parser-tools-lib/parser-tools/private-yacc/parser-builder.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-yacc/parser-builder.rkt rename to br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt diff --git a/parser-tools-lib/parser-tools/private-yacc/table.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-yacc/table.rkt rename to br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt diff --git a/parser-tools-lib/parser-tools/private-yacc/yacc-helper.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt similarity index 100% rename from parser-tools-lib/parser-tools/private-yacc/yacc-helper.rkt rename to br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt diff --git a/parser-tools-lib/parser-tools/yacc-to-scheme.rkt b/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt similarity index 97% rename from parser-tools-lib/parser-tools/yacc-to-scheme.rkt rename to br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt index 4fb9620..7f766eb 100644 --- a/parser-tools-lib/parser-tools/yacc-to-scheme.rkt +++ b/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt @@ -1,7 +1,7 @@ (module yacc-to-scheme mzscheme - (require parser-tools/lex - (prefix : parser-tools/lex-sre) - parser-tools/yacc + (require br-parser-tools/lex + (prefix : br-parser-tools/lex-sre) + br-parser-tools/yacc syntax/readerr mzlib/list) (provide trans) diff --git a/br-parser-tools-lib/br-parser-tools/yacc.rkt b/br-parser-tools-lib/br-parser-tools/yacc.rkt new file mode 100644 index 0000000..efded4b --- /dev/null +++ b/br-parser-tools-lib/br-parser-tools/yacc.rkt @@ -0,0 +1,412 @@ +#lang scheme/base + +(require (for-syntax scheme/base + "private-yacc/parser-builder.rkt" + "private-yacc/grammar.rkt" + "private-yacc/yacc-helper.rkt" + "private-yacc/parser-actions.rkt")) +(require "private-lex/token.rkt" + "private-yacc/parser-actions.rkt" + mzlib/etc + mzlib/pretty + syntax/readerr) + +(provide parser) + + +;; convert-parse-table : (vectorof (listof (cons/c gram-sym? action?))) -> +;; (vectorof (symbol runtime-action hashtable)) +(define-for-syntax (convert-parse-table table) + (list->vector + (map + (lambda (state-entry) + (let ((ht (make-hasheq))) + (for-each + (lambda (gs/action) + (hash-set! ht + (gram-sym-symbol (car gs/action)) + (action->runtime-action (cdr gs/action)))) + state-entry) + ht)) + (vector->list table)))) + +(define-syntax (parser stx) + (syntax-case stx () + ((_ args ...) + (let ((arg-list (syntax->list (syntax (args ...)))) + (src-pos #f) + (debug #f) + (error #f) + (tokens #f) + (start #f) + (end #f) + (precs #f) + (suppress #f) + (grammar #f) + (yacc-output #f)) + (for-each + (lambda (arg) + (syntax-case* arg (debug error tokens start end precs grammar + suppress src-pos yacc-output) + (lambda (a b) + (eq? (syntax-e a) (syntax-e b))) + ((debug filename) + (cond + ((not (string? (syntax-e (syntax filename)))) + (raise-syntax-error + #f + "Debugging filename must be a string" + stx + (syntax filename))) + (debug + (raise-syntax-error #f "Multiple debug declarations" stx)) + (else + (set! debug (syntax-e (syntax filename)))))) + ((suppress) + (set! suppress #t)) + ((src-pos) + (set! src-pos #t)) + ((error expression) + (if error + (raise-syntax-error #f "Multiple error declarations" stx) + (set! error (syntax expression)))) + ((tokens def ...) + (begin + (when tokens + (raise-syntax-error #f "Multiple tokens declarations" stx)) + (let ((defs (syntax->list (syntax (def ...))))) + (for-each + (lambda (d) + (unless (identifier? d) + (raise-syntax-error + #f + "Token-group name must be an identifier" + stx + d))) + defs) + (set! tokens defs)))) + ((start symbol ...) + (let ((symbols (syntax->list (syntax (symbol ...))))) + (for-each + (lambda (sym) + (unless (identifier? sym) + (raise-syntax-error #f + "Start symbol must be a symbol" + stx + sym))) + symbols) + (when start + (raise-syntax-error #f "Multiple start declarations" stx)) + (when (null? symbols) + (raise-syntax-error #f + "Missing start symbol" + stx + arg)) + (set! start symbols))) + ((end symbols ...) + (let ((symbols (syntax->list (syntax (symbols ...))))) + (for-each + (lambda (sym) + (unless (identifier? sym) + (raise-syntax-error #f + "End token must be a symbol" + stx + sym))) + symbols) + (let ((d (duplicate-list? (map syntax-e symbols)))) + (when d + (raise-syntax-error + #f + (format "Duplicate end token definition for ~a" d) + stx + arg)) + (when (null? symbols) + (raise-syntax-error + #f + "end declaration must contain at least 1 token" + stx + arg)) + (when end + (raise-syntax-error #f "Multiple end declarations" stx)) + (set! end symbols)))) + ((precs decls ...) + (if precs + (raise-syntax-error #f "Multiple precs declarations" stx) + (set! precs (syntax/loc arg (decls ...))))) + ((grammar prods ...) + (if grammar + (raise-syntax-error #f "Multiple grammar declarations" stx) + (set! grammar (syntax/loc arg (prods ...))))) + ((yacc-output filename) + (cond + ((not (string? (syntax-e (syntax filename)))) + (raise-syntax-error #f + "Yacc-output filename must be a string" + stx + (syntax filename))) + (yacc-output + (raise-syntax-error #f "Multiple yacc-output declarations" stx)) + (else + (set! yacc-output (syntax-e (syntax filename)))))) + (_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)))) + (syntax->list (syntax (args ...)))) + (unless tokens + (raise-syntax-error #f "missing tokens declaration" stx)) + (unless error + (raise-syntax-error #f "missing error declaration" stx)) + (unless grammar + (raise-syntax-error #f "missing grammar declaration" stx)) + (unless end + (raise-syntax-error #f "missing end declaration" stx)) + (unless start + (raise-syntax-error #f "missing start declaration" stx)) + (let-values (((table all-term-syms actions check-syntax-fix) + (build-parser (if debug debug "") + src-pos + suppress + tokens + start + end + precs + grammar))) + (when (and yacc-output (not (string=? yacc-output ""))) + (with-handlers [(exn:fail:filesystem? + (lambda (e) + (eprintf + "Cannot write yacc-output to file \"~a\"\n" + yacc-output)))] + (call-with-output-file yacc-output + (lambda (port) + (display-yacc (syntax->datum grammar) + tokens + (map syntax->datum start) + (if precs + (syntax->datum precs) + #f) + port)) + #:exists 'truncate))) + (with-syntax ((check-syntax-fix check-syntax-fix) + (err error) + (ends end) + (starts start) + (debug debug) + (table (convert-parse-table table)) + (all-term-syms all-term-syms) + (actions actions) + (src-pos src-pos)) + (syntax + (begin + check-syntax-fix + (parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos))))))) + (_ + (raise-syntax-error #f + "parser must have the form (parser args ...)" + stx)))) + +(define (reduce-stack stack num ret-vals src-pos) + (cond + ((> num 0) + (let* ((top-frame (car stack)) + (ret-vals + (if src-pos + (cons (stack-frame-value top-frame) + (cons (stack-frame-start-pos top-frame) + (cons (stack-frame-end-pos top-frame) + ret-vals))) + (cons (stack-frame-value top-frame) ret-vals)))) + (reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))) + (else (values stack ret-vals)))) + +;; extract-helper : (symbol or make-token) any any -> symbol any any any +(define (extract-helper tok v1 v2) + (cond + ((symbol? tok) + (values tok #f v1 v2)) + ((token? tok) + (values (real-token-name tok) (real-token-value tok) v1 v2)) + (else (raise-argument-error 'parser + "(or/c symbol? token?)" + 0 + tok)))) + +;; well-formed-position-token?: any -> boolean +;; Returns true if pt is a position token whose position-token-token +;; is itself a token or a symbol. +;; This is meant to help raise more precise error messages when +;; a tokenizer produces an erroneous position-token wrapped twice. +;; (as often happens when omitting return-without-pos). +(define (well-formed-token-field? t) + (or (symbol? t) + (token? t))) + +(define (well-formed-position-token? pt) + (and (position-token? pt) + (well-formed-token-field? (position-token-token pt)))) + +(define (well-formed-srcloc-token? st) + (and (srcloc-token? st) + (well-formed-token-field? (srcloc-token-token st)))) + +;; extract-src-pos : position-token -> symbol any any any +(define (extract-src-pos ip) + (unless (well-formed-position-token? ip) + (raise-argument-error 'parser + "well-formed-position-token?" + 0 + ip)) + (extract-helper (position-token-token ip) + (position-token-start-pos ip) + (position-token-end-pos ip))) + +(define (extract-srcloc ip) + (unless (well-formed-srcloc-token? ip) + (raise-argument-error 'parser + "well-formed-srcloc-token?" + 0 + ip)) + (let ([loc (srcloc-token-srcloc ip)]) + (extract-helper (srcloc-token-token ip) + (position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc)) + (position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f)))) + + +;; extract-no-src-pos : (symbol or make-token) -> symbol any any any +(define (extract-no-src-pos ip) + (extract-helper ip #f #f)) + +(define-struct stack-frame (state value start-pos end-pos) #:inspector (make-inspector)) + +(define (make-empty-stack i) (list (make-stack-frame i #f #f #f))) + + +;; The table is a vector that maps each state to a hash-table that maps a +;; terminal symbol to either an accept, shift, reduce, or goto structure. +; We encode the structures according to the runtime-action data definition in +;; parser-actions.rkt +(define (parser-body debug? err starts ends table all-term-syms actions src-pos) + (local ((define extract + (if src-pos + extract-src-pos + extract-no-src-pos)) + + (define (fix-error stack tok val start-pos end-pos get-token) + (when debug? (pretty-print stack)) + (local ((define (remove-input tok val start-pos end-pos) + (if (memq tok ends) + (raise-read-error "parser: Cannot continue after error" + #f #f #f #f #f) + (let ((a (find-action stack tok val start-pos end-pos))) + (cond + ((runtime-shift? a) + ;; (printf "shift:~a\n" (runtime-shift-state a)) + (cons (make-stack-frame (runtime-shift-state a) + val + start-pos + end-pos) + stack)) + (else + ;; (printf "discard input:~a\n" tok) + (let-values (((tok val start-pos end-pos) + (extract (get-token)))) + (remove-input tok val start-pos end-pos)))))))) + (let remove-states () + (let ((a (find-action stack 'error #f start-pos end-pos))) + (cond + ((runtime-shift? a) + ;; (printf "shift:~a\n" (runtime-shift-state a)) + (set! stack + (cons + (make-stack-frame (runtime-shift-state a) + #f + start-pos + end-pos) + stack)) + (remove-input tok val start-pos end-pos)) + (else + ;; (printf "discard state:~a\n" (car stack)) + (cond + ((< (length stack) 2) + (raise-read-error "parser: Cannot continue after error" + #f #f #f #f #f)) + (else + (set! stack (cdr stack)) + (remove-states))))))))) + + (define (find-action stack tok val start-pos end-pos) + (unless (hash-ref all-term-syms + tok + #f) + (if src-pos + (err #f tok val start-pos end-pos) + (err #f tok val)) + (raise-read-error (format "parser: got token of unknown type ~a" tok) + #f #f #f #f #f)) + (hash-ref (vector-ref table (stack-frame-state (car stack))) + tok + #f)) + + (define (make-parser start-number) + (lambda (get-token) + (unless (and (procedure? get-token) + (procedure-arity-includes? get-token 0)) + (error 'get-token "expected a nullary procedure, got ~e" get-token)) + (let parsing-loop ((stack (make-empty-stack start-number)) + (ip (get-token))) + (let-values (((tok val start-pos end-pos) + (extract ip))) + (let ((action (find-action stack tok val start-pos end-pos))) + (cond + ((runtime-shift? action) + ;; (printf "shift:~a\n" (runtime-shift-state action)) + (parsing-loop (cons (make-stack-frame (runtime-shift-state action) + val + start-pos + end-pos) + stack) + (get-token))) + ((runtime-reduce? action) + ;; (printf "reduce:~a\n" (runtime-reduce-prod-num action)) + (let-values (((new-stack args) + (reduce-stack stack + (runtime-reduce-rhs-length action) + null + src-pos))) + (let ((goto + (runtime-goto-state + (hash-ref + (vector-ref table (stack-frame-state (car new-stack))) + (runtime-reduce-lhs action))))) + (parsing-loop + (cons + (if src-pos + (make-stack-frame + goto + (apply (vector-ref actions (runtime-reduce-prod-num action)) args) + (if (null? args) start-pos (cadr args)) + (if (null? args) + end-pos + (list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1)))) + (make-stack-frame + goto + (apply (vector-ref actions (runtime-reduce-prod-num action)) args) + #f + #f)) + new-stack) + ip)))) + ((runtime-accept? action) + ;; (printf "accept\n") + (stack-frame-value (car stack))) + (else + (if src-pos + (err #t tok val start-pos end-pos) + (err #t tok val)) + (parsing-loop (fix-error stack tok val start-pos end-pos get-token) + (get-token)))))))))) + (cond + ((null? (cdr starts)) (make-parser 0)) + (else + (let loop ((l starts) + (i 0)) + (cond + ((null? l) null) + (else (cons (make-parser i) (loop (cdr l) (add1 i)))))))))) diff --git a/parser-tools-lib/info.rkt b/br-parser-tools-lib/info.rkt similarity index 70% rename from parser-tools-lib/info.rkt rename to br-parser-tools-lib/info.rkt index 1b20d3c..f9f9e11 100644 --- a/parser-tools-lib/info.rkt +++ b/br-parser-tools-lib/info.rkt @@ -6,6 +6,6 @@ "compatibility-lib")) (define build-deps '("rackunit-lib")) -(define pkg-desc "implementation (no documentation) part of \"parser-tools\"") +(define pkg-desc "implementation (no documentation) part of \"br-parser-tools\"") (define pkg-authors '(mflatt)) diff --git a/parser-tools/LICENSE.txt b/br-parser-tools/LICENSE.txt similarity index 100% rename from parser-tools/LICENSE.txt rename to br-parser-tools/LICENSE.txt diff --git a/br-parser-tools/info.rkt b/br-parser-tools/info.rkt new file mode 100644 index 0000000..6a692a8 --- /dev/null +++ b/br-parser-tools/info.rkt @@ -0,0 +1,12 @@ +#lang info + +(define collection 'multi) + +(define deps '("br-parser-tools-lib" + "br-parser-tools-doc")) +(define implies '("br-parser-tools-lib" + "br-parser-tools-doc")) + +(define pkg-desc "Lex- and Yacc-style parsing tools") + +(define pkg-authors '(mflatt)) diff --git a/parser-tools-doc/parser-tools/info.rkt b/parser-tools-doc/parser-tools/info.rkt deleted file mode 100644 index ffc8c17..0000000 --- a/parser-tools-doc/parser-tools/info.rkt +++ /dev/null @@ -1,3 +0,0 @@ -#lang info - -(define scribblings '(("parser-tools.scrbl" (multi-page) (parsing-library)))) diff --git a/parser-tools-lib/parser-tools/lex.rkt b/parser-tools-lib/parser-tools/lex.rkt deleted file mode 100644 index cc4f393..0000000 --- a/parser-tools-lib/parser-tools/lex.rkt +++ /dev/null @@ -1,393 +0,0 @@ -(module lex mzscheme - - ;; Provides the syntax used to create lexers and the functions needed to - ;; create and use the buffer that the lexer reads from. See docs. - - (require-for-syntax mzlib/list - syntax/stx - syntax/define - syntax/boundmap - "private-lex/util.rkt" - "private-lex/actions.rkt" - "private-lex/front.rkt" - "private-lex/unicode-chars.rkt") - - (require mzlib/stxparam - syntax/readerr - "private-lex/token.rkt") - - (provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs define-lex-trans - - ;; Dealing with tokens and related structures - define-tokens define-empty-tokens token-name token-value token? - (struct position (offset line col)) - (struct position-token (token start-pos end-pos)) - - ;; File path for highlighting errors while lexing - file-path - - ;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4. - any-char any-string nothing alphabetic lower-case upper-case title-case - numeric symbolic punctuation graphic whitespace blank iso-control - - ;; A regular expression operator - char-set) - - ;; wrap-action: syntax-object src-pos? -> syntax-object - (define-for-syntax (wrap-action action src-pos?) - (with-syntax ((action-stx - (if src-pos? - #`(let/ec ret - (syntax-parameterize - ((return-without-pos (make-rename-transformer #'ret))) - (make-position-token #,action start-pos end-pos))) - action))) - (syntax/loc action - (lambda (start-pos-p end-pos-p lexeme-p input-port-p) - (syntax-parameterize - ((start-pos (make-rename-transformer #'start-pos-p)) - (end-pos (make-rename-transformer #'end-pos-p)) - (lexeme (make-rename-transformer #'lexeme-p)) - (input-port (make-rename-transformer #'input-port-p))) - action-stx))))) - - (define-for-syntax (make-lexer-trans src-pos?) - (lambda (stx) - (syntax-case stx () - ((_ re-act ...) - (begin - (for-each - (lambda (x) - (syntax-case x () - ((re act) (void)) - (_ (raise-syntax-error #f - "not a regular expression / action pair" - stx - x)))) - (syntax->list (syntax (re-act ...)))) - (let* ((spec/re-act-lst - (syntax->list (syntax (re-act ...)))) - (eof-act - (get-special-action spec/re-act-lst #'eof #''eof)) - (spec-act - (get-special-action spec/re-act-lst #'special #'(void))) - (spec-comment-act - (get-special-action spec/re-act-lst #'special-comment #'#f)) - (ids (list #'special #'special-comment #'eof)) - (re-act-lst - (filter - (lambda (spec/re-act) - (syntax-case spec/re-act () - (((special) act) - (not (ormap - (lambda (x) - (and (identifier? #'special) - (module-or-top-identifier=? (syntax special) x))) - ids))) - (_ #t))) - spec/re-act-lst)) - (name-lst (map (lambda (x) (datum->syntax-object #f (gensym))) re-act-lst)) - (act-lst (map (lambda (x) (stx-car (stx-cdr x))) re-act-lst)) - (re-actname-lst (map (lambda (re-act name) - (list (stx-car re-act) - name)) - re-act-lst - name-lst))) - (when (null? spec/re-act-lst) - (raise-syntax-error (if src-pos? 'lexer/src-pos 'lexer) "expected at least one action" stx)) - (let-values (((trans start action-names no-look disappeared-uses) - (build-lexer re-actname-lst))) - (when (vector-ref action-names start) ;; Start state is final - (unless (and - ;; All the successor states are final - (andmap (lambda (x) (vector-ref action-names (vector-ref x 2))) - (vector->list (vector-ref trans start))) - ;; Each character has a successor state - (let loop ((check 0) - (nexts (vector->list (vector-ref trans start)))) - (cond - ((null? nexts) #f) - (else - (let ((next (car nexts))) - (and (= (vector-ref next 0) check) - (let ((next-check (vector-ref next 1))) - (or (>= next-check max-char-num) - (loop (add1 next-check) (cdr nexts)))))))))) - (eprintf "Warning: lexer at ~a can accept the empty string.\n" stx))) - (with-syntax ((start-state-stx start) - (trans-table-stx trans) - (no-lookahead-stx no-look) - ((name ...) name-lst) - ((act ...) (map (lambda (a) - (wrap-action a src-pos?)) - act-lst)) - ((act-name ...) (vector->list action-names)) - (spec-act-stx - (wrap-action spec-act src-pos?)) - (has-comment-act?-stx - (if (syntax-e spec-comment-act) #t #f)) - (spec-comment-act-stx - (wrap-action spec-comment-act src-pos?)) - (eof-act-stx (wrap-action eof-act src-pos?))) - (syntax-property - (syntax/loc stx - (let ([name act] ...) - (let ([proc - (lexer-body start-state-stx - trans-table-stx - (vector act-name ...) - no-lookahead-stx - spec-act-stx - has-comment-act?-stx - spec-comment-act-stx - eof-act-stx)]) - ;; reverse eta to get named procedures: - (lambda (port) (proc port))))) - 'disappeared-use - disappeared-uses))))))))) - - (define-syntax lexer (make-lexer-trans #f)) - (define-syntax lexer-src-pos (make-lexer-trans #t)) - - (define-syntax (define-lex-abbrev stx) - (syntax-case stx () - ((_ name re) - (identifier? (syntax name)) - (syntax/loc stx - (define-syntax name - (make-lex-abbrev (lambda () (quote-syntax re)))))) - (_ - (raise-syntax-error - #f - "form should be (define-lex-abbrev name re)" - stx)))) - - (define-syntax (define-lex-abbrevs stx) - (syntax-case stx () - ((_ x ...) - (with-syntax (((abbrev ...) - (map - (lambda (a) - (syntax-case a () - ((name re) - (identifier? (syntax name)) - (syntax/loc a (define-lex-abbrev name re))) - (_ (raise-syntax-error - #f - "form should be (define-lex-abbrevs (name re) ...)" - stx - a)))) - (syntax->list (syntax (x ...)))))) - (syntax/loc stx (begin abbrev ...)))) - (_ - (raise-syntax-error - #f - "form should be (define-lex-abbrevs (name re) ...)" - stx)))) - - (define-syntax (define-lex-trans stx) - (syntax-case stx () - ((_ name-form body-form) - (let-values (((name body) - (normalize-definition (syntax (define-syntax name-form body-form)) #'lambda))) - - #`(define-syntax #,name - (let ((func #,body)) - (unless (procedure? func) - (raise-syntax-error 'define-lex-trans "expected a procedure as the transformer, got ~e" func)) - (unless (procedure-arity-includes? func 1) - (raise-syntax-error 'define-lex-trans "expected a procedure that accepts 1 argument as the transformer, got ~e" func)) - (make-lex-trans func))))) - (_ - (raise-syntax-error - #f - "form should be (define-lex-trans name transformer)" - stx)))) - - - (define (get-next-state-helper char min max table) - (if (>= min max) - #f - (let* ((try (quotient (+ min max) 2)) - (el (vector-ref table try)) - (r1 (vector-ref el 0)) - (r2 (vector-ref el 1))) - (cond - ((and (>= char r1) (<= char r2)) (vector-ref el 2)) - ((< char r1) (get-next-state-helper char min try table)) - (else (get-next-state-helper char (add1 try) max table)))))) - - - - - (define (get-next-state char table) - (if table - (get-next-state-helper char 0 (vector-length table) table) - #f)) - - (define (lexer-body start-state trans-table actions no-lookahead special-action - has-special-comment-action? special-comment-action eof-action) - (letrec ((lexer - (lambda (ip) - (let ((first-pos (get-position ip)) - (first-char (peek-char-or-special ip 0))) - ;(printf "(peek-char-or-special port 0) = ~e\n" first-char) - (cond - ((eof-object? first-char) - (do-match ip first-pos eof-action (read-char-or-special ip))) - ((special-comment? first-char) - (read-char-or-special ip) - (cond - (has-special-comment-action? - (do-match ip first-pos special-comment-action #f)) - (else (lexer ip)))) - ((not (char? first-char)) - (do-match ip first-pos special-action (read-char-or-special ip))) - (else - (let lexer-loop ( - ;; current-state - (state start-state) - ;; the character to transition on - (char first-char) - ;; action for the longest match seen thus far - ;; including a match at the current state - (longest-match-action - (vector-ref actions start-state)) - ;; how many bytes precede char - (length-bytes 0) - ;; how many characters have been read - ;; including the one just read - (length-chars 1) - ;; how many characters are in the longest match - (longest-match-length 0)) - (let ((next-state - (cond - ((not (char? char)) #f) - (else (get-next-state (char->integer char) - (vector-ref trans-table state)))))) - (cond - ((not next-state) - (check-match ip first-pos longest-match-length - length-chars longest-match-action)) - ((vector-ref no-lookahead next-state) - (let ((act (vector-ref actions next-state))) - (check-match ip - first-pos - (if act length-chars longest-match-length) - length-chars - (if act act longest-match-action)))) - (else - (let* ((act (vector-ref actions next-state)) - (next-length-bytes (+ (char-utf-8-length char) length-bytes)) - (next-char (peek-char-or-special ip next-length-bytes))) - #;(printf "(peek-char-or-special port ~e) = ~e\n" - next-length-bytes next-char) - (lexer-loop next-state - next-char - (if act - act - longest-match-action) - next-length-bytes - (add1 length-chars) - (if act - length-chars - longest-match-length))))))))))))) - (lambda (ip) - (unless (input-port? ip) - (raise-argument-error - 'lexer - "input-port?" - 0 - ip)) - (lexer ip)))) - - (define (check-match lb first-pos longest-match-length length longest-match-action) - (unless longest-match-action - (let* ((match (read-string length lb)) - (end-pos (get-position lb))) - (raise-read-error - (format "lexer: No match found in input starting with: ~a" match) - (file-path) - (position-line first-pos) - (position-col first-pos) - (position-offset first-pos) - (- (position-offset end-pos) (position-offset first-pos))))) - (let ((match (read-string longest-match-length lb))) - ;(printf "(read-string ~e port) = ~e\n" longest-match-length match) - (do-match lb first-pos longest-match-action match))) - - (define file-path (make-parameter #f)) - - (define (do-match ip first-pos action value) - #;(printf "(action ~a ~a ~a ~a)\n" - (position-offset first-pos) (position-offset (get-position ip)) value ip) - (action first-pos (get-position ip) value ip)) - - (define (get-position ip) - (let-values (((line col off) (port-next-location ip))) - (make-position off line col))) - - (define-syntax (create-unicode-abbrevs stx) - (syntax-case stx () - ((_ ctxt) - (with-syntax (((ranges ...) (map (lambda (range) - `(union ,@(map (lambda (x) - `(char-range ,(integer->char (car x)) - ,(integer->char (cdr x)))) - range))) - (list (force alphabetic-ranges) - (force lower-case-ranges) - (force upper-case-ranges) - (force title-case-ranges) - (force numeric-ranges) - (force symbolic-ranges) - (force punctuation-ranges) - (force graphic-ranges) - (force whitespace-ranges) - (force blank-ranges) - (force iso-control-ranges)))) - ((names ...) (map (lambda (sym) - (datum->syntax-object (syntax ctxt) sym #f)) - '(alphabetic - lower-case - upper-case - title-case - numeric - symbolic - punctuation - graphic - whitespace - blank - iso-control)))) - (syntax (define-lex-abbrevs (names ranges) ...)))))) - - (define-lex-abbrev any-char (char-complement (union))) - (define-lex-abbrev any-string (intersection)) - (define-lex-abbrev nothing (union)) - (create-unicode-abbrevs #'here) - - (define-lex-trans (char-set stx) - (syntax-case stx () - ((_ str) - (string? (syntax-e (syntax str))) - (with-syntax (((char ...) (string->list (syntax-e (syntax str))))) - (syntax (union char ...)))))) - - (define-syntax provide-lex-keyword - (syntax-rules () - [(_ id ...) - (begin - (define-syntax-parameter id - (make-set!-transformer - (lambda (stx) - (raise-syntax-error - #f - (format "use of a lexer keyword (~a) is not in an appropriate lexer action" - 'id) - stx)))) - ... - (provide id ...))])) - - (provide-lex-keyword start-pos end-pos lexeme input-port return-without-pos) - -) diff --git a/parser-tools-lib/parser-tools/yacc.rkt b/parser-tools-lib/parser-tools/yacc.rkt deleted file mode 100644 index 6584fd3..0000000 --- a/parser-tools-lib/parser-tools/yacc.rkt +++ /dev/null @@ -1,396 +0,0 @@ -#lang scheme/base - -(require (for-syntax scheme/base - "private-yacc/parser-builder.rkt" - "private-yacc/grammar.rkt" - "private-yacc/yacc-helper.rkt" - "private-yacc/parser-actions.rkt")) -(require "private-lex/token.rkt" - "private-yacc/parser-actions.rkt" - mzlib/etc - mzlib/pretty - syntax/readerr) - -(provide parser) - - - ;; convert-parse-table : (vectorof (listof (cons/c gram-sym? action?))) -> - ;; (vectorof (symbol runtime-action hashtable)) - (define-for-syntax (convert-parse-table table) - (list->vector - (map - (lambda (state-entry) - (let ((ht (make-hasheq))) - (for-each - (lambda (gs/action) - (hash-set! ht - (gram-sym-symbol (car gs/action)) - (action->runtime-action (cdr gs/action)))) - state-entry) - ht)) - (vector->list table)))) - - (define-syntax (parser stx) - (syntax-case stx () - ((_ args ...) - (let ((arg-list (syntax->list (syntax (args ...)))) - (src-pos #f) - (debug #f) - (error #f) - (tokens #f) - (start #f) - (end #f) - (precs #f) - (suppress #f) - (grammar #f) - (yacc-output #f)) - (for-each - (lambda (arg) - (syntax-case* arg (debug error tokens start end precs grammar - suppress src-pos yacc-output) - (lambda (a b) - (eq? (syntax-e a) (syntax-e b))) - ((debug filename) - (cond - ((not (string? (syntax-e (syntax filename)))) - (raise-syntax-error - #f - "Debugging filename must be a string" - stx - (syntax filename))) - (debug - (raise-syntax-error #f "Multiple debug declarations" stx)) - (else - (set! debug (syntax-e (syntax filename)))))) - ((suppress) - (set! suppress #t)) - ((src-pos) - (set! src-pos #t)) - ((error expression) - (if error - (raise-syntax-error #f "Multiple error declarations" stx) - (set! error (syntax expression)))) - ((tokens def ...) - (begin - (when tokens - (raise-syntax-error #f "Multiple tokens declarations" stx)) - (let ((defs (syntax->list (syntax (def ...))))) - (for-each - (lambda (d) - (unless (identifier? d) - (raise-syntax-error - #f - "Token-group name must be an identifier" - stx - d))) - defs) - (set! tokens defs)))) - ((start symbol ...) - (let ((symbols (syntax->list (syntax (symbol ...))))) - (for-each - (lambda (sym) - (unless (identifier? sym) - (raise-syntax-error #f - "Start symbol must be a symbol" - stx - sym))) - symbols) - (when start - (raise-syntax-error #f "Multiple start declarations" stx)) - (when (null? symbols) - (raise-syntax-error #f - "Missing start symbol" - stx - arg)) - (set! start symbols))) - ((end symbols ...) - (let ((symbols (syntax->list (syntax (symbols ...))))) - (for-each - (lambda (sym) - (unless (identifier? sym) - (raise-syntax-error #f - "End token must be a symbol" - stx - sym))) - symbols) - (let ((d (duplicate-list? (map syntax-e symbols)))) - (when d - (raise-syntax-error - #f - (format "Duplicate end token definition for ~a" d) - stx - arg)) - (when (null? symbols) - (raise-syntax-error - #f - "end declaration must contain at least 1 token" - stx - arg)) - (when end - (raise-syntax-error #f "Multiple end declarations" stx)) - (set! end symbols)))) - ((precs decls ...) - (if precs - (raise-syntax-error #f "Multiple precs declarations" stx) - (set! precs (syntax/loc arg (decls ...))))) - ((grammar prods ...) - (if grammar - (raise-syntax-error #f "Multiple grammar declarations" stx) - (set! grammar (syntax/loc arg (prods ...))))) - ((yacc-output filename) - (cond - ((not (string? (syntax-e (syntax filename)))) - (raise-syntax-error #f - "Yacc-output filename must be a string" - stx - (syntax filename))) - (yacc-output - (raise-syntax-error #f "Multiple yacc-output declarations" stx)) - (else - (set! yacc-output (syntax-e (syntax filename)))))) - (_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)))) - (syntax->list (syntax (args ...)))) - (unless tokens - (raise-syntax-error #f "missing tokens declaration" stx)) - (unless error - (raise-syntax-error #f "missing error declaration" stx)) - (unless grammar - (raise-syntax-error #f "missing grammar declaration" stx)) - (unless end - (raise-syntax-error #f "missing end declaration" stx)) - (unless start - (raise-syntax-error #f "missing start declaration" stx)) - (let-values (((table all-term-syms actions check-syntax-fix) - (build-parser (if debug debug "") - src-pos - suppress - tokens - start - end - precs - grammar))) - (when (and yacc-output (not (string=? yacc-output ""))) - (with-handlers [(exn:fail:filesystem? - (lambda (e) - (eprintf - "Cannot write yacc-output to file \"~a\"\n" - yacc-output)))] - (call-with-output-file yacc-output - (lambda (port) - (display-yacc (syntax->datum grammar) - tokens - (map syntax->datum start) - (if precs - (syntax->datum precs) - #f) - port)) - #:exists 'truncate))) - (with-syntax ((check-syntax-fix check-syntax-fix) - (err error) - (ends end) - (starts start) - (debug debug) - (table (convert-parse-table table)) - (all-term-syms all-term-syms) - (actions actions) - (src-pos src-pos)) - (syntax - (begin - check-syntax-fix - (parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos))))))) - (_ - (raise-syntax-error #f - "parser must have the form (parser args ...)" - stx)))) - - (define (reduce-stack stack num ret-vals src-pos) - (cond - ((> num 0) - (let* ((top-frame (car stack)) - (ret-vals - (if src-pos - (cons (stack-frame-value top-frame) - (cons (stack-frame-start-pos top-frame) - (cons (stack-frame-end-pos top-frame) - ret-vals))) - (cons (stack-frame-value top-frame) ret-vals)))) - (reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))) - (else (values stack ret-vals)))) - - ;; extract-helper : (symbol or make-token) any any -> symbol any any any - (define (extract-helper tok v1 v2) - (cond - ((symbol? tok) - (values tok #f v1 v2)) - ((token? tok) - (values (real-token-name tok) (real-token-value tok) v1 v2)) - (else (raise-argument-error 'parser - "(or/c symbol? token?)" - 0 - tok)))) - - ;; well-formed-position-token?: any -> boolean - ;; Returns true if pt is a position token whose position-token-token - ;; is itself a token or a symbol. - ;; This is meant to help raise more precise error messages when - ;; a tokenizer produces an erroneous position-token wrapped twice. - ;; (as often happens when omitting return-without-pos). - (define (well-formed-position-token? pt) - (and (position-token? pt) - (let ([t (position-token-token pt)]) - (or (symbol? t) - (token? t))))) - - ;; extract-src-pos : position-token -> symbol any any any - (define (extract-src-pos ip) - (cond - ((well-formed-position-token? ip) - (extract-helper (position-token-token ip) - (position-token-start-pos ip) - (position-token-end-pos ip))) - (else - (raise-argument-error 'parser - "well-formed-position-token?" - 0 - ip)))) - - ;; extract-no-src-pos : (symbol or make-token) -> symbol any any any - (define (extract-no-src-pos ip) - (extract-helper ip #f #f)) - - (define-struct stack-frame (state value start-pos end-pos) #:inspector (make-inspector)) - - (define (make-empty-stack i) (list (make-stack-frame i #f #f #f))) - - - ;; The table is a vector that maps each state to a hash-table that maps a - ;; terminal symbol to either an accept, shift, reduce, or goto structure. - ; We encode the structures according to the runtime-action data definition in - ;; parser-actions.rkt - (define (parser-body debug? err starts ends table all-term-syms actions src-pos) - (local ((define extract - (if src-pos - extract-src-pos - extract-no-src-pos)) - - (define (fix-error stack tok val start-pos end-pos get-token) - (when debug? (pretty-print stack)) - (local ((define (remove-input tok val start-pos end-pos) - (if (memq tok ends) - (raise-read-error "parser: Cannot continue after error" - #f #f #f #f #f) - (let ((a (find-action stack tok val start-pos end-pos))) - (cond - ((runtime-shift? a) - ;; (printf "shift:~a\n" (runtime-shift-state a)) - (cons (make-stack-frame (runtime-shift-state a) - val - start-pos - end-pos) - stack)) - (else - ;; (printf "discard input:~a\n" tok) - (let-values (((tok val start-pos end-pos) - (extract (get-token)))) - (remove-input tok val start-pos end-pos)))))))) - (let remove-states () - (let ((a (find-action stack 'error #f start-pos end-pos))) - (cond - ((runtime-shift? a) - ;; (printf "shift:~a\n" (runtime-shift-state a)) - (set! stack - (cons - (make-stack-frame (runtime-shift-state a) - #f - start-pos - end-pos) - stack)) - (remove-input tok val start-pos end-pos)) - (else - ;; (printf "discard state:~a\n" (car stack)) - (cond - ((< (length stack) 2) - (raise-read-error "parser: Cannot continue after error" - #f #f #f #f #f)) - (else - (set! stack (cdr stack)) - (remove-states))))))))) - - (define (find-action stack tok val start-pos end-pos) - (unless (hash-ref all-term-syms - tok - #f) - (if src-pos - (err #f tok val start-pos end-pos) - (err #f tok val)) - (raise-read-error (format "parser: got token of unknown type ~a" tok) - #f #f #f #f #f)) - (hash-ref (vector-ref table (stack-frame-state (car stack))) - tok - #f)) - - (define (make-parser start-number) - (lambda (get-token) - (unless (and (procedure? get-token) - (procedure-arity-includes? get-token 0)) - (error 'get-token "expected a nullary procedure, got ~e" get-token)) - (let parsing-loop ((stack (make-empty-stack start-number)) - (ip (get-token))) - (let-values (((tok val start-pos end-pos) - (extract ip))) - (let ((action (find-action stack tok val start-pos end-pos))) - (cond - ((runtime-shift? action) - ;; (printf "shift:~a\n" (runtime-shift-state action)) - (parsing-loop (cons (make-stack-frame (runtime-shift-state action) - val - start-pos - end-pos) - stack) - (get-token))) - ((runtime-reduce? action) - ;; (printf "reduce:~a\n" (runtime-reduce-prod-num action)) - (let-values (((new-stack args) - (reduce-stack stack - (runtime-reduce-rhs-length action) - null - src-pos))) - (let ((goto - (runtime-goto-state - (hash-ref - (vector-ref table (stack-frame-state (car new-stack))) - (runtime-reduce-lhs action))))) - (parsing-loop - (cons - (if src-pos - (make-stack-frame - goto - (apply (vector-ref actions (runtime-reduce-prod-num action)) args) - (if (null? args) start-pos (cadr args)) - (if (null? args) - end-pos - (list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1)))) - (make-stack-frame - goto - (apply (vector-ref actions (runtime-reduce-prod-num action)) args) - #f - #f)) - new-stack) - ip)))) - ((runtime-accept? action) - ;; (printf "accept\n") - (stack-frame-value (car stack))) - (else - (if src-pos - (err #t tok val start-pos end-pos) - (err #t tok val)) - (parsing-loop (fix-error stack tok val start-pos end-pos get-token) - (get-token)))))))))) - (cond - ((null? (cdr starts)) (make-parser 0)) - (else - (let loop ((l starts) - (i 0)) - (cond - ((null? l) null) - (else (cons (make-parser i) (loop (cdr l) (add1 i)))))))))) diff --git a/parser-tools/info.rkt b/parser-tools/info.rkt deleted file mode 100644 index db01829..0000000 --- a/parser-tools/info.rkt +++ /dev/null @@ -1,12 +0,0 @@ -#lang info - -(define collection 'multi) - -(define deps '("parser-tools-lib" - "parser-tools-doc")) -(define implies '("parser-tools-lib" - "parser-tools-doc")) - -(define pkg-desc "Lex- and Yacc-style parsing tools") - -(define pkg-authors '(mflatt))