docs form David Van Horn; Slideshow tweaks

svn: r8661 original commit: ca1a9dc8bfc22c376c6c3eeeedfbb52c2ae9b1e8
17 years ago · 2224a46b6f
parent cc263b3e74
commit 2224a46b6f
2 changed files with 245 additions and 160 deletions
--- a/collects/parser-tools/info.ss
+++ b/collects/parser-tools/info.ss
@ -1,3 +1,3 @@
 #lang setup/infotab
-(define scribblings '(("parser-tools.scrbl" ())))
+(define scribblings '(("parser-tools.scrbl" (multi-page))))
--- a/collects/parser-tools/parser-tools.scrbl
+++ b/collects/parser-tools/parser-tools.scrbl
@ -3,14 +3,19 @@
 	  scribble/struct
 	  scribble/xref
 	  scribble/bnf
-          (for-label parser-tools/lex
+          (for-label scheme/base
-                     (prefix-in : parser-tools/lex-sre)))
+                     scheme/contract
                     parser-tools/lex
                     (prefix-in : parser-tools/lex-sre)
                     parser-tools/yacc))
@title{@bold{Parser Tools}: @exec{lex} and @exec{yacc}-style Parsing}
 This documentation assumes familiarity with @exec{lex} and @exec{yacc}
 style lexer and parser generators.
@table-of-contents[]
@; ----------------------------------------------------------------------
@section{Lexers}
@ -22,7 +27,7 @@ style lexer and parser generators.
@subsection{Creating a Lexer}
@defform/subs[#:literals (repetition union intersection complement concatenation
-                          char-range char-complement char-set
+                          char-range char-complement
                          eof special special-comment)
              (lexer [trigger action-expr] ...)
              ([trigger re
@ -39,7 +44,6 @@ style lexer and parser generators.
                   (concatenation re ...)
                   (char-range char char)
                   (char-complement re)
                   (char-set string)
                   (id datum ...)])]{
     Produces a function that takes an input-port, matches the
@ -70,7 +74,6 @@ style lexer and parser generators.
         a single character string can be used as a @scheme[char].}
    @item{@scheme[(char-complement re)] --- matches any character not matched by @scheme[re].  
          The sub-expression must be a set of characters @scheme[re].}
    @item{@scheme[(char-set string)] --- matches any character in the string.}
    @item{@scheme[(id datum ...)] --- expands the @deftech{lexer macro} named @scheme[id]; macros
          are defined via @scheme[define-lex-trans].}
   }
@ -276,6 +279,10 @@ error.}
@subsection{Lexer Abbreviations and Macros}
@defform[(char-set string)]{
 A @tech{lexer macro} that matches any character in @scheme[string].}
@defidform[any-char]{A @tech{lexer abbreviation} that matches any character.}
@defidform[any-string]{A @tech{lexer abbreviation} that matches any string.}
@ -321,54 +328,7 @@ characters, @scheme[char-lower-case?] characters, etc.}
@; ----------------------------------------
-@subsection{Tokens}
+@subsection{Lexer SRE Operators}
 Each @scheme[_action-expr] in a @scheme[lexer] form can produce any
 kind of value, but for many purposes, producing a @deftech{token}
 value is useful. Tokens are usually necessary for inter-operating with
 a parser generated by @scheme[parser-tools/parser], but tokens not be
 the right choice when using @scheme[lexer] in other situations.
@defform[(define-tokens group-id (token-id ...))]{
   Binds @scheme[group-id] to the group of tokens being defined.  For
   each @scheme[token-id], a function
   @schemeidfont{token-}@scheme[token-id] is created that takes any
   value and puts it in a token record specific to @scheme[token-id].
   The token value is inspected using @scheme[token-name] and
   @scheme[token-value].
   A token cannot be named @schemeidfont{error}, since
   @schemeidfont{error} it has special use in the parser.}
@defform[(define-empty-tokens group-id (token-id ...) )]{
   Like @scheme[define-tokens], except a each token constructor
   @schemeidfont{token-}@scheme[token-id] take nos arguments and returns
   @scheme[(@scheme[quote] token-id)].}
@defproc[(token-name [t (or/c token? symbol?)]) symbol?]{
   Returns the name of a token that is represented either by a symbol
   or a token structure.}
@defproc[(token-value [t (or/c token? symbol?)]) any/c]{
   Returns the value of a token that is represented either by a symbol
   or a token structure, returning @scheme[#f] for a symbol token.}
@defproc[(token? [v any/c]) boolean?]{
  Returns @scheme[#t] if @scheme[val] is a
  token structure, @scheme[#f] otherwise.}
@; ----------------------------------------------------------------------
@section{Lex SRE Operators}
@defmodule[parser-tools/lex-sre]
@ -442,144 +402,266 @@ characters.}
@(lex-sre-doc)
@; ----------------------------------------
@subsection{Lexer Legacy Operators}
@defmodule[parser-tools/lex-plt-v200]
@(define-syntax-rule (lex-v200-doc)
   (...
    (begin
     (require (for-label parser-tools/lex-plt-v200))
@t{The @schememodname[parser-tools/lex-plt-v200] module re-exports
   @scheme[*], @scheme[+], @scheme[?], and @scheme[&] from
   @schememodname[parser-tools/lex-sre]. It also re-exports
   @scheme[:or] as @scheme[:], @scheme[::] as @scheme[|@|], @scheme[:~]
   as @scheme[^], and @scheme[:/] as @scheme[-].}
@defform[(epsilon)]{
 A @tech{lexer macro} that matches an empty sequence.}
@defform[(~ re ...)]{
 The same as @scheme[(complement re ...)].})))
@(lex-v200-doc)
@; ----------------------------------------
@subsection{Tokens}
 Each @scheme[_action-expr] in a @scheme[lexer] form can produce any
 kind of value, but for many purposes, producing a @deftech{token}
 value is useful. Tokens are usually necessary for inter-operating with
 a parser generated by @scheme[parser-tools/parser], but tokens not be
 the right choice when using @scheme[lexer] in other situations.
@defform[(define-tokens group-id (token-id ...))]{
   Binds @scheme[group-id] to the group of tokens being defined.  For
   each @scheme[token-id], a function
   @schemeidfont{token-}@scheme[token-id] is created that takes any
   value and puts it in a token record specific to @scheme[token-id].
   The token value is inspected using @scheme[token-id] and
   @scheme[token-value].
   A token cannot be named @schemeidfont{error}, since
   @schemeidfont{error} it has special use in the parser.}
@defform[(define-empty-tokens group-id (token-id ...) )]{
   Like @scheme[define-tokens], except a each token constructor
   @schemeidfont{token-}@scheme[token-id] take nos arguments and returns
   @scheme[(@scheme[quote] token-id)].}
@defproc[(token-name [t (or/c token? symbol?)]) symbol?]{
   Returns the name of a token that is represented either by a symbol
   or a token structure.}
@defproc[(token-value [t (or/c token? symbol?)]) any/c]{
   Returns the value of a token that is represented either by a symbol
   or a token structure, returning @scheme[#f] for a symbol token.}
@defproc[(token? [v any/c]) boolean?]{
  Returns @scheme[#t] if @scheme[val] is a
  token structure, @scheme[#f] otherwise.}
@; ----------------------------------------------------------------------
@section{Parsers}
@defmodule[parser-tools/yacc]
-@defform/subs[(parser clause ...)
+@defform/subs[#:literals (grammar tokens start end precs error src-pos
-              ([clause ....])]{
+                          suppress debug yacc-output prec)
-
+              (parser clause ...)
-  Creates a parser. The clauses may be in any order (as
+              ([clause (grammar (non-terminal-id 
-    long as there are no duplicates and all non-optional arguments are
+                                 ((grammar-id ...) maybe-prec expr)
-    present).
+                                 ...)
                                ...)
                       (tokens group-id ...)
                       (start non-terminal-id ...)
                       (end token-id ...)
                       (error expr)
                       (precs (assoc token-id ...) ...)
                       (src-pos)
                       (suppress)
                       (debug filename)
                       (yacc-output filename)]
               [maybe-prec code:blank
                           (prec token-id)]
               [assoc left right nonassoc])]{
    Creates a parser. The clauses may be in any order, as long as there
    are no duplicates and all non-@italic{OPTIONAL} declarations are
    present:
    @itemize{
-      @item{@scheme[(debug filename)] @italic{OPTIONAL}
+      @item{@schemeblock0[(grammar (non-terminal-id 
                                    ((grammar-id ...) maybe-prec expr)
                                    ...)
                                   ...)]
-      causes the parser generator to write the LALR table to the file
+      Declares the grammar to be parsed.  Each @scheme[grammar-id] can
-      named @filepath{filename} (unless the file exists).
+      be a @scheme[token-id] from a @scheme[group-id] named in a
-      @filepath{filename} must be a string.  Additionally, if a debug
+      @scheme[tokens] declaration, or it can be a
-      file is specified, when a running generated parser encounters a
+      @scheme[non-terminal-id] declared in the @scheme[grammar]
-      parse error on some input file, after the user specified error
+      declaration. The optional @scheme[prec] declaration works with
-      expression returns, the complete parse stack is printed to
+      the @scheme[precs] declaration. The @scheme[expr] is a
-      assist in debugging the grammar of that particular parser.  The
+      ``semantic action,'' which is evaluated when the input is found
-      numbers in the stack printout correspond to the state numbers in
+      to match its corresponding production.
-      the LALR table file.}
+
      Each action is scheme code that has the same scope as its
      parser's definition, except that the variables @scheme[$1], ...,
      @schemeidfont{$}@math{n} are bound, where @math{n} is the number
      of @scheme[grammar-id]s in the corresponding production. Each
      @schemeidfont{$}@math{i} is bound to the result of the action
      for the @math{i}@superscript{th} grammar symbol on the right of
      the production, if that grammar symbol is a non-terminal, or the
      value stored in the token if the grammar symbol is a terminal.
      If the @scheme[src-pos] option is present in the parser, then
      variables @scheme[$1-start-pos], ...,
      @schemeidfont{$}@math{n}@schemeidfont{-start-pos} and
      @scheme[$1-end-pos], ...,
      @schemeidfont{$}@math{n}@schemeidfont{-end-pos} and are also
      available, and they refer to the position structures
      corresponding to the start and end of the corresponding
      @scheme[grammar-symbol]. Grammar symbols defined as empty-tokens
      have no @schemeidfont{$}@math{i} associated, but do have
      @schemeidfont{$}@math{i}@schemeidfont{-start-pos} and
      @schemeidfont{$}@math{i}@schemeidfont{-end-pos}.
-      @item{@scheme[(yacc-output filename)] @italic{OPTIONAL}
+      All of the productions for a given non-terminal must be grouped
      with it. That is, no @scheme[non-terminal-id] may appear twice
      on the left hand side in a parser.}
      causes the parser generator to write a grammar file in the
      syntax of YACC/Bison.  The file might not be a valid YACC file
      because the scheme grammar can use symbols that are invalid in
      C.}
-      @item{@scheme[(suppress)] @italic{OPTIONAL}
+      @item{@scheme[(tokens group-id ...)]
-      causes the parser generator not to report shift/reduce or
+      Declares that all of the tokens defined in each
-      reduce/reduce conflicts.}
+      @scheme[group-id] can be used by the parser in the
      @scheme[grammar] declaration.}
      @item{@scheme[(src-pos)] @italic{OPTIONAL}
-      causes the generated parser to expect input in the form
+      @item{@scheme[(start non-terminal-id ...)]
-      @scheme[(make-position-token token position position)] instead
+
-      of simply @scheme[token].  Include this option when using the
+      Declares a list of starting non-terminals for the grammar.}
-      parser with a lexer generated with @scheme[lexer-src-pos].}
+
      @item{@scheme[(end token-id ...)]
-      @item{@scheme[(error expression)]
+      Specifies a set of tokens from which some member must follow any
      valid parse.  For example, an EOF token would be specified for a
      parser that parses entire files and a newline token for a parser
      that parses entire lines individually.}
-      expression should evaluate to a function which will be executed
+
-      for its side-effect whenever the parser encounters an error.  If
+      @item{@scheme[(error expr)]
-      the @scheme[src-pos] option is present, the function should
+
-      accept 5 arguments, @schemeblock[(lambda (token-ok token-name
+      The @scheme[expr] should evaluate to a function which will be
-      token-value start-pos end-pos) ...)].  Otherwise it should
+      executed for its side-effect whenever the parser encounters an
-      accept 3, @schemeblock[(lambda (token-ok token-name token-value)
+      error.
-      ...)].  The first argument will be @scheme[#f] iff the error is
+
-      that an invalid token was received.  The second and third
+      If the @scheme[src-pos] declaration is present, the function
      should accept 5 arguments,:
      @schemeblock[(lambda (tok-ok? tok-name tok-value _start-pos _end-pos) 
                     ....)]
      Otherwise it should accept 3:
      @schemeblock[(lambda (tok-ok? tok-name tok-value) 
                     ....)]
      The first argument will be @scheme[#f] if and only if the error
      is that an invalid token was received.  The second and third
      arguments will be the name and the value of the token at which
      the error was detected.  The fourth and fifth arguments, if
      present, provide the source positions of that token.}
      @item{@scheme[(tokens group-name ...)]
-      declares that all of the tokens defined in the groups can be
+      @item{@scheme[(precs (assoc token-id ...) ...)]
-      handled by this parser.}
+      @italic{OPTIONAL}
      Precedence declarations to resolve shift/reduce and
      reduce/reduce conflicts as in @exec{yacc}/@exec{bison}. An
      @scheme[assoc] must be one of @scheme[left], @scheme[right] or
      @scheme[nonassoc].  States with multiple shift/reduce or
      reduce/reduce conflicts (or some combination thereof) are not
      resolved with precedence.}
      @item{@scheme[(src-pos)] @italic{OPTIONAL}
-      @item{@scheme[(start non-terminal-name ...)]
+      Causes the generated parser to expect input in the form
      @scheme[(make-position-token _token _start-pos _end-pos)] instead
      of simply @scheme[_token].  Include this option when using the
      parser with a lexer generated with @scheme[lexer-src-pos].}
      declares a list of starting non-terminals for the grammar.}
-      @item{@scheme[(end token-name ...)]
+      @item{@scheme[(debug filename)] @italic{OPTIONAL}
-      specifies a set of tokens from which some member must follow any
+      Causes the parser generator to write the LALR table to the file
-      valid parse.  For example an EOF token would be specified for a
+      named @scheme[filename] (unless the file exists), where
-      parser that parses entire files and a @nonterm{newline} token
+      @scheme[filename] is a literal string.  Additionally, if a debug
-      for a parser that parses entire lines individually.}
+      file is specified, when a running generated parser encounters a
      parse error on some input file, after the user specified error
      expression returns, the complete parse stack is printed to
      assist in debugging the grammar of that particular parser.  The
      numbers in the stack printout correspond to the state numbers in
      the LALR table file.}
      @item{@scheme[(precs (assoc token-name ...) ...)]
      @italic{OPTIONAL}
-      precedence declarations to resolve shift/reduce and
+      @item{@scheme[(yacc-output filename)] @italic{OPTIONAL}
      reduce/reduce conflicts as in YACC/BISON.  @scheme[assoc] must
      be one of @scheme[left], @scheme[right] or @scheme[nonassoc].
      States with multiple shift/reduce or reduce/reduce conflicts or
      some combination thereof are not resolved with precedence.}
-      @item{@schemeblock0[(grammar (non-terminal ((grammar-symbol ...) (prec token-name) expression)
+      Causes the parser generator to write a grammar file in
-                            ...) 
+      approximately the syntax of @exec{yacc}/@exec{bison}.  The file
-              ...)]
+      might not be a valid @exec{yacc} file, because the scheme
      grammar can use symbols that are invalid in C.}
-      declares the @scheme[grammar] to be parsed.  Each
+
-      @scheme[grammar-symbol] must be a @scheme[token-name] or
+      @item{@scheme[(suppress)] @italic{OPTIONAL}
-      @scheme[non-terminal].  The @scheme[prec] declaration is
+
-      optional.  @scheme[expression] is a semantic action which will
+      Causes the parser generator not to report shift/reduce or
-      be evaluated when the input is found to match its corresponding
+      reduce/reduce conflicts.}
      production.  Each action is scheme code that has the same scope
      as its parser's definition, except that the variables
      @scheme[$1], ..., @scheme[$n] are bound in the expression and
      may hide outside bindings of @scheme[$1], ... @scheme[$n].
      @scheme[$x] is bound to the result of the action for the
      @scheme[$x]@superscript{th} grammar symbol on the right of the
      production, if that grammar symbol is a non-terminal, or the
      value stored in the token if the grammar symbol is a terminal.
      Here @scheme[n] is the number of @scheme[grammar-symbol]s on the
      right of the production.  If the @scheme[src-pos] option is
      present in the parser, variables @scheme[$1-start-pos], ...,
      @scheme[$n-start-pos] and @scheme[$1-end-pos], ...,
      @scheme[$n-end-pos] are also available and refer to the position
      structures corresponding to the start and end of the
      corresponding @scheme[grammar-symbol].  Grammar symbols defined
      as empty-tokens have no @scheme[$n] associated, but do have
      @scheme[$n-start-pos] and @scheme[$n-end-pos].  All of the
      productions for a given non-terminal must be grouped with it,
      i.e., no non-terminal may appear twice on the left hand side in
      a parser.}
    }
-The result of a parser expression with one start non-terminal is a
+    The result of a @scheme[parser] expression with one @scheme[start]
-function, @scheme[f], that takes one argument.  This argument must be
+    non-terminal is a function, @scheme[_parse], that takes one
-a zero argument function, @scheme[t], that produces successive tokens
+    argument.  This argument must be a zero argument function,
-of the input each time it is called.  If desired, the @scheme[t] may
+    @scheme[_gen], that produces successive tokens of the input each
-return symbols instead of tokens.  The parser will treat symbols as
+    time it is called.  If desired, the @scheme[_gen] may return
-tokens of the corresponding name (with @scheme[#f] as a value, so it
+    symbols instead of tokens, and the parser will treat symbols as
-is usual to return symbols only in the case of empty tokens).
+    tokens of the corresponding name (with @scheme[#f] as a value, so
-@scheme[f] returns the value associated with the parse tree by the
+    it is usual to return symbols only in the case of empty tokens).
-semantic actions.  If the parser encounters an error, after invoking
+    The @scheme[_parse] function returns the value associated with the
-the supplied error function, it will try to use error productions to
+    parse tree by the semantic actions.  If the parser encounters an
-continue parsing.  If it cannot, it raises a read error.
+    error, after invoking the supplied error function, it will try to
-
+    use error productions to continue parsing.  If it cannot, it
-If multiple start non-terminals are provided, the parser expression
+    raises @scheme[exn:fail:read].
-will result in a list of parsing functions (each one will individually
+
-behave as if it were the result of a parser expression with only one
+    If multiple non-terminals are provided in @scheme[start], the
-start non-terminal), one for each start non-terminal, in the same order.
+    @scheme[parser] expression produces a list of parsing functions,
-
+    one for each non-terminal in the same order. Each parsing function
-Each time the scheme code for a lexer is compiled (e.g. when a
+    is like the result of a parser expression with only one
-@filepath{.ss} file containing a @scheme[parser] form is loaded), the
+    @scheme[start] non-terminal,
-parser generator is run.  To avoid this overhead place the parser into
+
-a module and compile the module to a @filepath{.zo} bytecode file.}
+    Each time the scheme code for a @scheme[parser] is compiled
    (e.g. when a @filepath{.ss} file containing a @scheme[parser] form
    is loaded), the parser generator is run.  To avoid this overhead
    place the parser into a module and compile the module to a
    @filepath{.zo} bytecode file.}
@; ----------------------------------------------------------------------
@ -601,3 +683,6 @@ actions in the original grammar have nested blocks, the tool will fail.
 Annotated examples are in the @filepath{examples} subdirectory of the
@filepath{parser-tools} collection.}
@; ----------------------------------------------------------------------
@index-section[]
`@ -1,3 +1,3 @@`
	`#lang setup/infotab`	`#lang setup/infotab`

	`(define scribblings '(("parser-tools.scrbl" ())))`	`(define scribblings '(("parser-tools.scrbl" (multi-page))))`