fork parser tools
parent
2f5923e670
commit
86df35057c
@ -0,0 +1,11 @@
|
||||
parser-tools-doc
|
||||
Copyright (c) 2010-2014 PLT Design Inc.
|
||||
|
||||
This package is distributed under the GNU Lesser General Public
|
||||
License (LGPL). This means that you can link this package into proprietary
|
||||
applications, provided you follow the rules stated in the LGPL. You
|
||||
can also modify this package; if you distribute a modified version,
|
||||
you must distribute it under the terms of the LGPL, which in
|
||||
particular means that you must release the source code for the
|
||||
modified software. See http://www.gnu.org/copyleft/lesser.html
|
||||
for more information.
|
@ -0,0 +1,14 @@
|
||||
#lang info
|
||||
|
||||
(define collection 'multi)
|
||||
(define deps '("base"))
|
||||
(define build-deps '("scheme-lib"
|
||||
"racket-doc"
|
||||
"syntax-color-doc"
|
||||
"br-parser-tools-lib"
|
||||
"scribble-lib"))
|
||||
(define update-implies '("br-parser-tools-lib"))
|
||||
|
||||
(define pkg-desc "documentation part of \"br-parser-tools\"")
|
||||
|
||||
(define pkg-authors '(mflatt))
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
@ -0,0 +1,3 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"/><meta name="viewport" content="width=device-width, initial-scale=0.8"/><title>Parser Tools: lex and yacc-style Parsing</title><link rel="stylesheet" type="text/css" href="../scribble.css" title="default"/><link rel="stylesheet" type="text/css" href="../manual-style.css" title="default"/><link rel="stylesheet" type="text/css" href="../manual-racket.css" title="default"/><link rel="stylesheet" type="text/css" href="../doc-site.css" title="default"/><script type="text/javascript" src="../scribble-common.js"></script><script type="text/javascript" src="../manual-racket.js"></script><script type="text/javascript" src="../doc-site.js"></script><script type="text/javascript" src="file:///Users/mb/git/racket/racket/doc/local-redirect/local-redirect.js"></script><script type="text/javascript" src="file:///Users/MB/Library/Racket/6.8.0.1/doc/local-redirect/local-user-redirect.js"></script><!--[if IE 6]><style type="text/css">.SIEHidden { overflow: hidden; }</style><![endif]--></head><body id="doc-racket-lang-org"><div class="tocset"><div class="tocview"><div class="tocviewlist tocviewlisttopspace"><div class="tocviewtitle"><table cellspacing="0" cellpadding="0"><tr><td style="width: 1em;"><a href="javascript:void(0);" title="Expand/Collapse" class="tocviewtoggle" onclick="TocviewToggle(this,"tocview_0");">▼</a></td><td></td><td><a href="" class="tocviewselflink" data-pltdoc="x">Parser Tools:<span class="mywbr"> </span> <span class="stt">lex</span> and <span class="stt">yacc</span>-<wbr></wbr>style Parsing</a></td></tr></table></div><div class="tocviewsublistonly" style="display: block;" id="tocview_0"><table cellspacing="0" cellpadding="0"><tr><td align="right">1 </td><td><a href="Lexers.html" class="tocviewlink" data-pltdoc="x">Lexers</a></td></tr><tr><td align="right">2 </td><td><a href="LALR_1__Parsers.html" class="tocviewlink" data-pltdoc="x">LALR(1) Parsers</a></td></tr><tr><td align="right">3 </td><td><a href="Context-Free_Parsers.html" class="tocviewlink" data-pltdoc="x">Context-<wbr></wbr>Free Parsers</a></td></tr><tr><td align="right">4 </td><td><a href="Converting_yacc_or_bison_Grammars.html" class="tocviewlink" data-pltdoc="x">Converting <span class="stt">yacc</span> or <span class="stt">bison</span> Grammars</a></td></tr><tr><td align="right"></td><td><a href="doc-index.html" class="tocviewlink" data-pltdoc="x">Index</a></td></tr></table></div></div></div><div class="tocsub"><div class="tocsubtitle">On this page:</div><table class="tocsublist" cellspacing="0"><tr><td><span class="tocsublinknumber"></span><a href="#%28part._top%29" class="tocsubseclink" data-pltdoc="x">Parser Tools:<span class="mywbr"> </span> <span class="stt">lex</span> and <span class="stt">yacc</span>-<wbr></wbr>style Parsing</a></td></tr></table></div></div><div class="maincolumn"><div class="main"><div class="versionbox"><span class="version">6.8.0.1</span></div><div class="navsettop"><span class="navleft"><form class="searchform"><input class="searchbox" style="color: #888;" type="text" value="...search manuals..." title="Enter a search string to search the manuals" onkeypress="return DoSearchKey(event, this, "6.8.0.1", "../");" onfocus="this.style.color="black"; this.style.textAlign="left"; if (this.value == "...search manuals...") this.value="";" onblur="if (this.value.match(/^ *$/)) { this.style.color="#888"; this.style.textAlign="center"; this.value="...search manuals..."; }"/></form> <a href="../index.html" title="up to the documentation top" data-pltdoc="x" onclick="return GotoPLTRoot("6.8.0.1");">top</a></span><span class="navright"> <span class="nonavigation">← prev</span> <a href="../index.html" title="up to the documentation top" data-pltdoc="x" onclick="return GotoPLTRoot("6.8.0.1");">up</a> <a href="Lexers.html" title="forward to "1 Lexers"" data-pltdoc="x">next →</a></span> </div><h2 x-source-module="(lib "parser-tools/br-parser-tools.scrbl")" x-source-pkg="br-parser-tools-doc" x-part-tag=""top""><a name="(part._top)"></a><a name="(part._.Parser_.Tools__lex_and_yacc-style_.Parsing)"></a>Parser Tools: <span class="stt">lex</span> and <span class="stt">yacc</span>-style Parsing</h2><div class="SAuthorListBox"><span class="SAuthorList"><p class="author">Scott Owens</p></span></div><p>This documentation assumes familiarity with <span class="stt">lex</span> and <span class="stt">yacc</span>
|
||||
style lexer and parser generators.</p><table cellspacing="0" cellpadding="0"><tr><td><p><span class="hspace"> </span><a href="Lexers.html" class="toptoclink" data-pltdoc="x">1<span class="hspace"> </span>Lexers</a></p></td></tr><tr><td><p><span class="hspace"> </span><a href="Lexers.html#%28part._.Creating_a_.Lexer%29" class="toclink" data-pltdoc="x">1.1<span class="hspace"> </span>Creating a Lexer</a></p></td></tr><tr><td><p><span class="hspace"> </span><a href="Lexers.html#%28part._.Lexer_.Abbreviations_and_.Macros%29" class="toclink" data-pltdoc="x">1.2<span class="hspace"> </span>Lexer Abbreviations and Macros</a></p></td></tr><tr><td><p><span class="hspace"> </span><a href="Lexers.html#%28part._.Lexer_.S.R.E_.Operators%29" class="toclink" data-pltdoc="x">1.3<span class="hspace"> </span>Lexer SRE Operators</a></p></td></tr><tr><td><p><span class="hspace"> </span><a href="Lexers.html#%28part._.Lexer_.Legacy_.Operators%29" class="toclink" data-pltdoc="x">1.4<span class="hspace"> </span>Lexer Legacy Operators</a></p></td></tr><tr><td><p><span class="hspace"> </span><a href="Lexers.html#%28part._.Tokens%29" class="toclink" data-pltdoc="x">1.5<span class="hspace"> </span>Tokens</a></p></td></tr><tr><td><p><span class="hspace"></span></p></td></tr><tr><td><p><span class="hspace"> </span><a href="LALR_1__Parsers.html" class="toptoclink" data-pltdoc="x">2<span class="hspace"> </span>LALR(1) Parsers</a></p></td></tr><tr><td><p><span class="hspace"></span></p></td></tr><tr><td><p><span class="hspace"> </span><a href="Context-Free_Parsers.html" class="toptoclink" data-pltdoc="x">3<span class="hspace"> </span>Context-Free Parsers</a></p></td></tr><tr><td><p><span class="hspace"></span></p></td></tr><tr><td><p><span class="hspace"> </span><a href="Converting_yacc_or_bison_Grammars.html" class="toptoclink" data-pltdoc="x">4<span class="hspace"> </span>Converting <span class="stt">yacc</span> or <span class="stt">bison</span> Grammars</a></p></td></tr><tr><td><p><span class="hspace"></span></p></td></tr><tr><td><p><span class="hspace"> </span><a href="doc-index.html" class="toptoclink" data-pltdoc="x">Index</a></p></td></tr></table><div class="navsetbottom"><span class="navleft"><form class="searchform"><input class="searchbox" style="color: #888;" type="text" value="...search manuals..." title="Enter a search string to search the manuals" onkeypress="return DoSearchKey(event, this, "6.8.0.1", "../");" onfocus="this.style.color="black"; this.style.textAlign="left"; if (this.value == "...search manuals...") this.value="";" onblur="if (this.value.match(/^ *$/)) { this.style.color="#888"; this.style.textAlign="center"; this.value="...search manuals..."; }"/></form> <a href="../index.html" title="up to the documentation top" data-pltdoc="x" onclick="return GotoPLTRoot("6.8.0.1");">top</a></span><span class="navright"> <span class="nonavigation">← prev</span> <a href="../index.html" title="up to the documentation top" data-pltdoc="x" onclick="return GotoPLTRoot("6.8.0.1");">up</a> <a href="Lexers.html" title="forward to "1 Lexers"" data-pltdoc="x">next →</a></span> </div></div></div><div id="contextindicator"> </div></body></html>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
("3b7b1bba00241dab306a03c51c1a0d3cd638493374bd6fe4f9796aa4f48ec21e154a911452939887" "2db07a7be06a931a970c9ee101e90228f9c1c6fcd9676e52634e35fc448a283a7429a4d38e6a8c1e" "d8d3930e051fde87175bb0f52680184b782e2377")
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,319 @@
|
||||
/* See the beginning of "manual.css". */
|
||||
|
||||
/* Monospace: */
|
||||
|
||||
.RktIn, .RktRdr, .RktPn, .RktMeta,
|
||||
.RktMod, .RktKw, .RktVar, .RktSym,
|
||||
.RktRes, .RktOut, .RktCmt, .RktVal,
|
||||
.RktBlk, .RktErr {
|
||||
font-family: 'Source Code Pro', monospace;
|
||||
white-space: inherit;
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
/* this selctor grabs the first linked Racket symbol
|
||||
in a definition box (i.e., the symbol being defined) */
|
||||
a.RktValDef, a.RktStxDef, a.RktSymDef,
|
||||
span.RktValDef, span.RktStxDef, span.RktSymDef
|
||||
{
|
||||
font-size: 1.15rem;
|
||||
color: black;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
|
||||
.inheritedlbl {
|
||||
font-family: 'Fira', sans;
|
||||
}
|
||||
|
||||
.RBackgroundLabelInner {
|
||||
font-family: inherit;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Inherited methods, left margin */
|
||||
|
||||
.inherited {
|
||||
width: 95%;
|
||||
margin-top: 0.5em;
|
||||
text-align: left;
|
||||
background-color: inherit;
|
||||
}
|
||||
|
||||
.inherited td {
|
||||
font-size: 82%;
|
||||
padding-left: 0.5rem;
|
||||
line-height: 1.3;
|
||||
text-indent: 0;
|
||||
padding-right: 0;
|
||||
}
|
||||
|
||||
.inheritedlbl {
|
||||
font-style: normal;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Racket text styles */
|
||||
|
||||
.RktIn {
|
||||
color: #cc6633;
|
||||
background-color: #eee;
|
||||
}
|
||||
|
||||
.RktInBG {
|
||||
background-color: #eee;
|
||||
}
|
||||
|
||||
|
||||
.refcolumn .RktInBG {
|
||||
background-color: white;
|
||||
}
|
||||
|
||||
.RktRdr {
|
||||
}
|
||||
|
||||
.RktPn {
|
||||
color: #843c24;
|
||||
}
|
||||
|
||||
.RktMeta {
|
||||
color: black;
|
||||
}
|
||||
|
||||
.RktMod {
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.RktOpt {
|
||||
color: black;
|
||||
}
|
||||
|
||||
.RktKw {
|
||||
color: black;
|
||||
}
|
||||
|
||||
.RktErr {
|
||||
color: red;
|
||||
font-style: italic;
|
||||
font-weight: 400;
|
||||
}
|
||||
|
||||
.RktVar {
|
||||
position: relative;
|
||||
left: -1px; font-style: italic;
|
||||
color: #444;
|
||||
}
|
||||
|
||||
.SVInsetFlow .RktVar {
|
||||
font-weight: 400;
|
||||
color: #444;
|
||||
}
|
||||
|
||||
|
||||
.RktSym {
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
|
||||
|
||||
.RktValLink, .RktStxLink, .RktModLink {
|
||||
text-decoration: none;
|
||||
color: #07A;
|
||||
font-weight: 500;
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
/* for syntax links within headings */
|
||||
h2 a.RktStxLink, h3 a.RktStxLink, h4 a.RktStxLink, h5 a.RktStxLink,
|
||||
h2 a.RktValLink, h3 a.RktValLink, h4 a.RktValLink, h5 a.RktValLink,
|
||||
h2 .RktSym, h3 .RktSym, h4 .RktSym, h5 .RktSym,
|
||||
h2 .RktMod, h3 .RktMod, h4 .RktMod, h5 .RktMod,
|
||||
h2 .RktVal, h3 .RktVal, h4 .RktVal, h5 .RktVal,
|
||||
h2 .RktPn, h3 .RktPn, h4 .RktPn, h5 .RktPn {
|
||||
color: #333;
|
||||
font-size: 1.65rem;
|
||||
font-weight: 400;
|
||||
}
|
||||
|
||||
.toptoclink .RktStxLink, .toclink .RktStxLink,
|
||||
.toptoclink .RktValLink, .toclink .RktValLink,
|
||||
.toptoclink .RktModLink, .toclink .RktModLink {
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.tocset .RktValLink, .tocset .RktStxLink, .tocset .RktModLink {
|
||||
color: black;
|
||||
font-weight: 400;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.tocset td a.tocviewselflink .RktValLink,
|
||||
.tocset td a.tocviewselflink .RktStxLink,
|
||||
.tocset td a.tocviewselflink .RktMod,
|
||||
.tocset td a.tocviewselflink .RktSym {
|
||||
font-weight: lighter;
|
||||
color: white;
|
||||
}
|
||||
|
||||
|
||||
.RktRes {
|
||||
color: #0000af;
|
||||
}
|
||||
|
||||
.RktOut {
|
||||
color: #960096;
|
||||
}
|
||||
|
||||
.RktCmt {
|
||||
color: #c2741f;
|
||||
}
|
||||
|
||||
.RktVal {
|
||||
color: #228b22;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Some inline styles */
|
||||
|
||||
.together { /* for definitions grouped together in one box */
|
||||
width: 100%;
|
||||
border-top: 2px solid white;
|
||||
}
|
||||
|
||||
tbody > tr:first-child > td > .together {
|
||||
border-top: 0px; /* erase border on first instance of together */
|
||||
}
|
||||
|
||||
.RktBlk {
|
||||
white-space: pre;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.highlighted {
|
||||
font-size: 1rem;
|
||||
background-color: #fee;
|
||||
}
|
||||
|
||||
.defmodule {
|
||||
font-family: 'Source Code Pro';
|
||||
padding: 0.25rem 0.75rem 0.25rem 0.5rem;
|
||||
margin-bottom: 1rem;
|
||||
width: 100%;
|
||||
background-color: hsl(60, 29%, 94%);
|
||||
}
|
||||
|
||||
.defmodule a {
|
||||
color: #444;
|
||||
}
|
||||
|
||||
|
||||
.defmodule td span.hspace:first-child {
|
||||
position: absolute;
|
||||
width: 0;
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
.defmodule .RpackageSpec .Smaller,
|
||||
.defmodule .RpackageSpec .stt {
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
|
||||
.specgrammar {
|
||||
float: none;
|
||||
padding-left: 1em;
|
||||
}
|
||||
|
||||
|
||||
.RBibliography td {
|
||||
vertical-align: text-top;
|
||||
padding-top: 1em;
|
||||
}
|
||||
|
||||
.leftindent {
|
||||
margin-left: 2rem;
|
||||
margin-right: 0em;
|
||||
}
|
||||
|
||||
.insetpara {
|
||||
margin-left: 1em;
|
||||
margin-right: 1em;
|
||||
}
|
||||
|
||||
.SCodeFlow .Rfilebox {
|
||||
margin-left: -1em; /* see 17.2 of guide, module languages */
|
||||
}
|
||||
|
||||
.Rfiletitle {
|
||||
text-align: right;
|
||||
background-color: #eee;
|
||||
}
|
||||
|
||||
.SCodeFlow .Rfiletitle {
|
||||
border-top: 1px dotted gray;
|
||||
border-right: 1px dotted gray;
|
||||
}
|
||||
|
||||
|
||||
.Rfilename {
|
||||
border-top: 0;
|
||||
border-right: 0;
|
||||
padding-left: 0.5em;
|
||||
padding-right: 0.5em;
|
||||
background-color: inherit;
|
||||
}
|
||||
|
||||
.Rfilecontent {
|
||||
margin: 0.5em;
|
||||
}
|
||||
|
||||
.RpackageSpec {
|
||||
padding-right: 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* For background labels */
|
||||
|
||||
.RBackgroundLabel {
|
||||
float: right;
|
||||
width: 0px;
|
||||
height: 0px;
|
||||
}
|
||||
|
||||
.RBackgroundLabelInner {
|
||||
position: relative;
|
||||
width: 25em;
|
||||
left: -25.5em;
|
||||
top: 0.20rem; /* sensitive to monospaced font choice */
|
||||
text-align: right;
|
||||
z-index: 0;
|
||||
font-weight: 300;
|
||||
font-family: 'Source Code Pro';
|
||||
font-size: 0.9rem;
|
||||
color: gray;
|
||||
}
|
||||
|
||||
|
||||
.RpackageSpec .Smaller {
|
||||
font-weight: 300;
|
||||
font-family: 'Source Code Pro';
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.RForeground {
|
||||
position: relative;
|
||||
left: 0px;
|
||||
top: 0px;
|
||||
z-index: 1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* For section source modules & tags */
|
||||
|
||||
.RPartExplain {
|
||||
background: #eee;
|
||||
font-size: 0.9rem;
|
||||
margin-top: 0.2rem;
|
||||
padding: 0.2rem;
|
||||
text-align: left;
|
||||
}
|
@ -0,0 +1,98 @@
|
||||
/* For the Racket manual style */
|
||||
|
||||
AddOnLoad(function() {
|
||||
/* Look for header elements that have x-source-module and x-part tag.
|
||||
For those elements, add a hidden element that explains how to
|
||||
link to the section, and set the element's onclick() to display
|
||||
the explanation. */
|
||||
var tag_names = ["h1", "h2", "h3", "h4", "h5"];
|
||||
for (var j = 0; j < tag_names.length; j++) {
|
||||
elems = document.getElementsByTagName(tag_names[j]);
|
||||
for (var i = 0; i < elems.length; i++) {
|
||||
var elem = elems.item(i);
|
||||
AddPartTitleOnClick(elem);
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
function AddPartTitleOnClick(elem) {
|
||||
var mod_path = elem.getAttribute("x-source-module");
|
||||
var tag = elem.getAttribute("x-part-tag");
|
||||
if (mod_path && tag) {
|
||||
// Might not be present:
|
||||
var prefixes = elem.getAttribute("x-part-prefixes");
|
||||
|
||||
var info = document.createElement("div");
|
||||
info.className = "RPartExplain";
|
||||
|
||||
/* The "top" tag refers to a whole document: */
|
||||
var is_top = (tag == "\"top\"");
|
||||
info.appendChild(document.createTextNode("Link to this "
|
||||
+ (is_top ? "document" : "section")
|
||||
+ " with "));
|
||||
|
||||
/* Break `secref` into two lines if the module path and tag
|
||||
are long enough: */
|
||||
var is_long = (is_top ? false : ((mod_path.length
|
||||
+ tag.length
|
||||
+ (prefixes ? (16 + prefixes.length) : 0))
|
||||
> 60));
|
||||
|
||||
var line1 = document.createElement("div");
|
||||
var line1x = ((is_long && prefixes) ? document.createElement("div") : line1);
|
||||
var line2 = (is_long ? document.createElement("div") : line1);
|
||||
|
||||
function add(dest, str, cn) {
|
||||
var s = document.createElement("span");
|
||||
s.className = cn;
|
||||
s.style.whiteSpace = "nowrap";
|
||||
s.appendChild(document.createTextNode(str));
|
||||
dest.appendChild(s);
|
||||
}
|
||||
/* Construct a `secref` call with suitable syntax coloring: */
|
||||
add(line1, "\xA0@", "RktRdr");
|
||||
add(line1, (is_top ? "other-doc" : "secref"), "RktSym");
|
||||
add(line1, "[", "RktPn");
|
||||
if (!is_top)
|
||||
add(line1, tag, "RktVal");
|
||||
if (is_long) {
|
||||
/* indent additional lines: */
|
||||
if (prefixes)
|
||||
add(line1x, "\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0", "RktPn");
|
||||
add(line2, "\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0", "RktPn");
|
||||
}
|
||||
if (prefixes) {
|
||||
add(line1x, " #:tag-prefixes ", "RktPn");
|
||||
add(line1x, "'", "RktVal");
|
||||
add(line1x, prefixes, "RktVal");
|
||||
}
|
||||
if (!is_top)
|
||||
add(line2, " #:doc ", "RktPn");
|
||||
add(line2, "'", "RktVal");
|
||||
add(line2, mod_path, "RktVal");
|
||||
add(line2, "]", "RktPn");
|
||||
|
||||
info.appendChild(line1);
|
||||
if (is_long)
|
||||
info.appendChild(line1x);
|
||||
if (is_long)
|
||||
info.appendChild(line2);
|
||||
|
||||
info.style.display = "none";
|
||||
|
||||
/* Add the new element afterthe header: */
|
||||
var n = elem.nextSibling;
|
||||
if (n)
|
||||
elem.parentNode.insertBefore(info, n);
|
||||
else
|
||||
elem.parentNode.appendChild(info);
|
||||
|
||||
/* Clicking the header shows the explanation element: */
|
||||
elem.onclick = function () {
|
||||
if (info.style.display == "none")
|
||||
info.style.display = "block";
|
||||
else
|
||||
info.style.display = "none";
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,742 @@
|
||||
|
||||
/* See the beginning of "scribble.css".
|
||||
This file is used by the `scribble/manual` language, along with
|
||||
"manual-racket.css". */
|
||||
|
||||
@import url("manual-fonts.css");
|
||||
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
@media all {html {font-size: 15px;}}
|
||||
@media all and (max-width:940px){html {font-size: 14px;}}
|
||||
@media all and (max-width:850px){html {font-size: 13px;}}
|
||||
@media all and (max-width:830px){html {font-size: 12px;}}
|
||||
@media all and (max-width:740px){html {font-size: 11px;}}
|
||||
|
||||
/* CSS seems backward: List all the classes for which we want a
|
||||
particular font, so that the font can be changed in one place. (It
|
||||
would be nicer to reference a font definition from all the places
|
||||
that we want it.)
|
||||
|
||||
As you read the rest of the file, remember to double-check here to
|
||||
see if any font is set. */
|
||||
|
||||
/* Monospace: */
|
||||
.maincolumn, .refpara, .refelem, .tocset, .stt, .hspace, .refparaleft, .refelemleft {
|
||||
font-family: 'Source Code Pro', monospace;
|
||||
white-space: inherit;
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
.stt {
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
h2 .stt {
|
||||
font-size: 2.7rem;
|
||||
}
|
||||
|
||||
.toptoclink .stt {
|
||||
font-size: inherit;
|
||||
}
|
||||
.toclink .stt {
|
||||
font-size: 90%;
|
||||
}
|
||||
|
||||
.RpackageSpec .stt {
|
||||
font-weight: 300;
|
||||
font-family: 'Source Code Pro';
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
h3 .stt, h4 .stt, h5 .stt {
|
||||
color: #333;
|
||||
font-size: 1.65rem;
|
||||
font-weight: 400;
|
||||
}
|
||||
|
||||
|
||||
/* Serif: */
|
||||
.main, .refcontent, .tocview, .tocsub, .sroman, i {
|
||||
font-family: 'Charter', serif;
|
||||
font-size: 1.18rem;
|
||||
}
|
||||
|
||||
|
||||
/* Sans-serif: */
|
||||
.version, .versionNoNav, .ssansserif {
|
||||
font-family: 'Fira', sans-serif;
|
||||
}
|
||||
.ssansserif {
|
||||
font-family: 'Fira';
|
||||
font-weight: 500;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
.tocset .ssansserif {
|
||||
font-size: 100%;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
|
||||
p, .SIntrapara {
|
||||
display: block;
|
||||
margin: 0 0 1em 0;
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.compact {
|
||||
padding: 0 0 1em 0;
|
||||
}
|
||||
|
||||
li {
|
||||
list-style-position: outside;
|
||||
margin-left: 1.2em;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6, h7, h8 {
|
||||
font-family: 'Fira';
|
||||
font-weight: 300;
|
||||
font-size: 1.6rem;
|
||||
color: #333;
|
||||
margin-top: inherit;
|
||||
margin-bottom: 1rem;
|
||||
line-height: 1.25;
|
||||
-moz-font-feature-settings: 'tnum=1';
|
||||
-moz-font-feature-settings: 'tnum' 1;
|
||||
-webkit-font-feature-settings: 'tnum' 1;
|
||||
-o-font-feature-settings: 'tnum' 1;
|
||||
-ms-font-feature-settings: 'tnum' 1;
|
||||
font-feature-settings: 'tnum' 1;
|
||||
|
||||
}
|
||||
|
||||
h3, h4, h5, h6, h7, h8 {
|
||||
border-top: 1px solid black;
|
||||
}
|
||||
|
||||
|
||||
|
||||
h2 { /* per-page main title */
|
||||
font-family: 'Miso';
|
||||
font-weight: bold;
|
||||
margin-top: 4rem;
|
||||
font-size: 3rem;
|
||||
line-height: 1.1;
|
||||
width: 90%;
|
||||
}
|
||||
|
||||
h3, h4, h5, h6, h7, h8 {
|
||||
margin-top: 2em;
|
||||
padding-top: 0.1em;
|
||||
margin-bottom: 0.75em;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Main */
|
||||
|
||||
body {
|
||||
color: black;
|
||||
background-color: white;
|
||||
}
|
||||
|
||||
.maincolumn {
|
||||
width: auto;
|
||||
margin-top: 4rem;
|
||||
margin-left: 17rem;
|
||||
margin-right: 2rem;
|
||||
margin-bottom: 10rem; /* to avoid fixed bottom nav bar */
|
||||
max-width: 700px;
|
||||
min-width: 370px; /* below this size, code samples don't fit */
|
||||
}
|
||||
|
||||
a {
|
||||
text-decoration: inherit;
|
||||
}
|
||||
|
||||
a, .toclink, .toptoclink, .tocviewlink, .tocviewselflink, .tocviewtoggle, .plainlink,
|
||||
.techinside, .techoutside:hover, .techinside:hover {
|
||||
color: #07A;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Navigation */
|
||||
|
||||
.navsettop, .navsetbottom {
|
||||
left: 0;
|
||||
width: 15rem;
|
||||
height: 6rem;
|
||||
font-family: 'Fira';
|
||||
font-size: 0.9rem;
|
||||
border-bottom: 0px solid hsl(216, 15%, 70%);
|
||||
background-color: inherit;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.navsettop {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
margin-bottom: 0;
|
||||
border-bottom: 0;
|
||||
}
|
||||
|
||||
.navsettop a, .navsetbottom a {
|
||||
color: black;
|
||||
}
|
||||
|
||||
.navsettop a:hover, .navsetbottom a:hover {
|
||||
background: hsl(216, 78%, 95%);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.navleft, .navright {
|
||||
position: static;
|
||||
float: none;
|
||||
margin: 0;
|
||||
white-space: normal;
|
||||
}
|
||||
|
||||
|
||||
.navleft a {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
.navright a {
|
||||
display: inline-block;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.navleft a, .navright a, .navright span {
|
||||
display: inline-block;
|
||||
padding: 0.5rem;
|
||||
min-width: 1rem;
|
||||
}
|
||||
|
||||
|
||||
.navright {
|
||||
height: 2rem;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
|
||||
.navsetbottom {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.nonavigation {
|
||||
color: #889;
|
||||
}
|
||||
|
||||
.searchform {
|
||||
display: block;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
border-bottom: 1px solid #eee;
|
||||
height: 4rem;
|
||||
}
|
||||
|
||||
.nosearchform {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
height: 4rem;
|
||||
}
|
||||
|
||||
.searchbox {
|
||||
font-size: 1rem;
|
||||
width: 12rem;
|
||||
margin: 1rem;
|
||||
padding: 0.25rem;
|
||||
vertical-align: middle;
|
||||
background-color: white;
|
||||
}
|
||||
|
||||
#search_box {
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Version */
|
||||
|
||||
.versionbox {
|
||||
position: absolute;
|
||||
float: none;
|
||||
top: 0.25rem;
|
||||
left: 17rem;
|
||||
z-index: 11000;
|
||||
height: 2em;
|
||||
font-size: 70%;
|
||||
font-weight: lighter;
|
||||
width: inherit;
|
||||
margin: 0;
|
||||
}
|
||||
.version, .versionNoNav {
|
||||
font-size: inherit;
|
||||
}
|
||||
.version:before, .versionNoNav:before {
|
||||
content: "v.";
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Margin notes */
|
||||
|
||||
/* cancel scribble.css styles: */
|
||||
.refpara, .refelem {
|
||||
position: static;
|
||||
float: none;
|
||||
height: auto;
|
||||
width: auto;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.refcolumn {
|
||||
position: static;
|
||||
display: block;
|
||||
width: auto;
|
||||
font-size: inherit;
|
||||
margin: 2rem;
|
||||
margin-left: 2rem;
|
||||
padding: 0.5em;
|
||||
padding-left: 0.75em;
|
||||
padding-right: 1em;
|
||||
background: hsl(60, 29%, 94%);
|
||||
border: 1px solid #ccb;
|
||||
border-left: 0.4rem solid #ccb;
|
||||
}
|
||||
|
||||
|
||||
/* slightly different handling for margin-note* on narrow screens */
|
||||
@media all and (max-width:1340px) {
|
||||
span.refcolumn {
|
||||
float: right;
|
||||
width: 50%;
|
||||
margin-left: 1rem;
|
||||
margin-bottom: 0.8rem;
|
||||
margin-top: 1.2rem;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
.refcontent, .refcontent p {
|
||||
line-height: 1.5;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.refcontent p + p {
|
||||
margin-top: 1em;
|
||||
}
|
||||
|
||||
.refcontent a {
|
||||
font-weight: 400;
|
||||
}
|
||||
|
||||
.refpara, .refparaleft {
|
||||
top: -1em;
|
||||
}
|
||||
|
||||
|
||||
@media all and (max-width:600px) {
|
||||
.refcolumn {
|
||||
margin-left: 0;
|
||||
margin-right: 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@media all and (min-width:1340px) {
|
||||
.refcolumn {
|
||||
margin: 0 -22.5rem 1rem 0;
|
||||
float: right;
|
||||
clear: right;
|
||||
width: 18rem;
|
||||
}
|
||||
}
|
||||
|
||||
.refcontent {
|
||||
font-family: 'Fira';
|
||||
font-size: 1rem;
|
||||
line-height: 1.6;
|
||||
margin: 0 0 0 0;
|
||||
}
|
||||
|
||||
|
||||
.refparaleft, .refelemleft {
|
||||
position: relative;
|
||||
float: left;
|
||||
right: 2em;
|
||||
height: 0em;
|
||||
width: 13em;
|
||||
margin: 0em 0em 0em -13em;
|
||||
}
|
||||
|
||||
.refcolumnleft {
|
||||
background-color: hsl(60, 29%, 94%);
|
||||
display: block;
|
||||
position: relative;
|
||||
width: 13em;
|
||||
font-size: 85%;
|
||||
border: 0.5em solid hsl(60, 29%, 94%);
|
||||
margin: 0 0 0 0;
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Table of contents, left margin */
|
||||
|
||||
.tocset {
|
||||
position: absolute;
|
||||
float: none;
|
||||
left: 0;
|
||||
top: 0rem;
|
||||
width: 14rem;
|
||||
padding: 7rem 0.5rem 0.5rem 0.5rem;
|
||||
background-color: hsl(216, 15%, 70%);
|
||||
margin: 0;
|
||||
|
||||
}
|
||||
|
||||
.tocset td {
|
||||
vertical-align: text-top;
|
||||
padding-bottom: 0.4rem;
|
||||
padding-left: 0.2rem;
|
||||
line-height: 1.1;
|
||||
font-family: 'Fira';
|
||||
-moz-font-feature-settings: 'tnum=1';
|
||||
-moz-font-feature-settings: 'tnum' 1;
|
||||
-webkit-font-feature-settings: 'tnum' 1;
|
||||
-o-font-feature-settings: 'tnum' 1;
|
||||
-ms-font-feature-settings: 'tnum' 1;
|
||||
font-feature-settings: 'tnum' 1;
|
||||
|
||||
}
|
||||
|
||||
.tocset td a {
|
||||
color: black;
|
||||
font-weight: 400;
|
||||
}
|
||||
|
||||
|
||||
.tocview {
|
||||
text-align: left;
|
||||
background-color: inherit;
|
||||
}
|
||||
|
||||
|
||||
.tocview td, .tocsub td {
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
|
||||
.tocview table, .tocsub table {
|
||||
width: 90%;
|
||||
}
|
||||
|
||||
.tocset td a.tocviewselflink {
|
||||
font-weight: lighter;
|
||||
font-size: 110%; /* monospaced styles below don't need to enlarge */
|
||||
color: white;
|
||||
}
|
||||
|
||||
.tocviewselflink {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.tocsub {
|
||||
text-align: left;
|
||||
margin-top: 0.5em;
|
||||
background-color: inherit;
|
||||
}
|
||||
|
||||
.tocviewlist, .tocsublist {
|
||||
margin-left: 0.2em;
|
||||
margin-right: 0.2em;
|
||||
padding-top: 0.2em;
|
||||
padding-bottom: 0.2em;
|
||||
}
|
||||
.tocviewlist table {
|
||||
font-size: 82%;
|
||||
}
|
||||
|
||||
.tocviewlisttopspace {
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
.tocviewsublist, .tocviewsublistonly, .tocviewsublisttop, .tocviewsublistbottom {
|
||||
margin-left: 0.4em;
|
||||
border-left: 1px solid #99a;
|
||||
padding-left: 0.8em;
|
||||
}
|
||||
.tocviewsublist {
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
.tocviewsublist table,
|
||||
.tocviewsublistonly table,
|
||||
.tocviewsublisttop table,
|
||||
.tocviewsublistbottom table,
|
||||
table.tocsublist {
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
.tocviewsublist td, .tocviewsublistbottom td, .tocviewsublisttop td, .tocsub td,
|
||||
.tocviewsublistonly td {
|
||||
font-size: 90%;
|
||||
}
|
||||
|
||||
|
||||
.tocviewtoggle {
|
||||
font-size: 75%; /* looks better, and avoids bounce when toggling sub-sections due to font alignments */
|
||||
}
|
||||
|
||||
.tocsublist td {
|
||||
padding-left: 0.5rem;
|
||||
padding-top: 0.25rem;
|
||||
text-indent: 0;
|
||||
}
|
||||
|
||||
.tocsublinknumber {
|
||||
font-size: 100%;
|
||||
}
|
||||
|
||||
.tocsublink {
|
||||
font-size: 82%;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.tocsubseclink {
|
||||
font-size: 100%;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.tocsubnonseclink {
|
||||
font-size: 82%;
|
||||
text-decoration: none;
|
||||
margin-left: 1rem;
|
||||
padding-left: 0;
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
/* the label "on this page" */
|
||||
.tocsubtitle {
|
||||
display: block;
|
||||
font-size: 62%;
|
||||
font-family: 'Fira';
|
||||
font-weight: bolder;
|
||||
font-style: normal;
|
||||
letter-spacing: 2px;
|
||||
text-transform: uppercase;
|
||||
margin: 0.5em;
|
||||
}
|
||||
|
||||
.toptoclink {
|
||||
font-weight: bold;
|
||||
font-size: 110%;
|
||||
margin-bottom: 0.5rem;
|
||||
margin-top: 1.5rem;
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
.toclink {
|
||||
font-size: inherit;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Some inline styles */
|
||||
|
||||
.indexlink {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
pre {
|
||||
margin-left: 2em;
|
||||
}
|
||||
|
||||
blockquote {
|
||||
margin-left: 2em;
|
||||
margin-right: 2em;
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
.SCodeFlow {
|
||||
border-left: 1px dotted black;
|
||||
padding-left: 1em;
|
||||
padding-right: 1em;
|
||||
margin-top: 1em;
|
||||
margin-bottom: 1em;
|
||||
margin-left: 0em;
|
||||
margin-right: 2em;
|
||||
white-space: nowrap;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.SCodeFlow img {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
.boxed {
|
||||
margin: 0;
|
||||
margin-top: 2em;
|
||||
padding: 0.25em;
|
||||
padding-bottom: 0.5em;
|
||||
background: #f3f3f3;
|
||||
box-sizing:border-box;
|
||||
border-top: 1px solid #99b;
|
||||
background: hsl(216, 78%, 95%);
|
||||
background: -moz-linear-gradient(to bottom left, hsl(0, 0%, 99%) 0%, hsl(216, 78%, 95%) 100%);
|
||||
background: -webkit-linear-gradient(to bottom left, hsl(0, 0%, 99%) 0%, hsl(216, 78%, 95%) 100%);
|
||||
background: -o-linear-gradient(to bottom left, hsl(0, 0%, 99%) 0%, hsl(216, 78%, 95%) 100%);
|
||||
background: -ms-linear-gradient(to bottom left, hsl(0, 0%, 99%) 0%, hsl(216, 78%, 95%) 100%);
|
||||
background: linear-gradient(to bottom left, hsl(0, 0%, 99%) 0%, hsl(216, 78%, 95%) 100%);
|
||||
}
|
||||
|
||||
blockquote > blockquote.SVInsetFlow {
|
||||
/* resolves issue in e.g. /reference/notation.html */
|
||||
margin-top: 0em;
|
||||
}
|
||||
|
||||
.leftindent .SVInsetFlow { /* see e.g. section 4.5 of Racket Guide */
|
||||
margin-top: 1em;
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
.SVInsetFlow a, .SCodeFlow a {
|
||||
color: #07A;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.SubFlow {
|
||||
display: block;
|
||||
margin: 0em;
|
||||
}
|
||||
|
||||
.boxed {
|
||||
width: 100%;
|
||||
background-color: inherit;
|
||||
}
|
||||
|
||||
.techoutside { text-decoration: none; }
|
||||
|
||||
.SAuthorListBox {
|
||||
position: static;
|
||||
float: none;
|
||||
font-family: 'Fira';
|
||||
font-weight: 300;
|
||||
font-size: 110%;
|
||||
margin-top: 1rem;
|
||||
margin-bottom: 3rem;
|
||||
width: 30rem;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
.author > a { /* email links within author block */
|
||||
font-weight: inherit;
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.SAuthorList {
|
||||
font-size: 82%;
|
||||
}
|
||||
.SAuthorList:before {
|
||||
content: "by ";
|
||||
}
|
||||
.author {
|
||||
display: inline;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
/* phone + tablet styles */
|
||||
|
||||
@media all and (max-width:720px){
|
||||
|
||||
|
||||
@media all and (max-width:720px){
|
||||
|
||||
@media all {html {font-size: 15px;}}
|
||||
@media all and (max-width:700px){html {font-size: 14px;}}
|
||||
@media all and (max-width:630px){html {font-size: 13px;}}
|
||||
@media all and (max-width:610px){html {font-size: 12px;}}
|
||||
@media all and (max-width:550px){html {font-size: 11px;}}
|
||||
@media all and (max-width:520px){html {font-size: 10px;}}
|
||||
|
||||
.navsettop, .navsetbottom {
|
||||
display: block;
|
||||
position: absolute;
|
||||
width: 100%;
|
||||
height: 4rem;
|
||||
border: 0;
|
||||
background-color: hsl(216, 15%, 70%);
|
||||
}
|
||||
|
||||
.searchform {
|
||||
display: inline;
|
||||
border: 0;
|
||||
}
|
||||
|
||||
.navright {
|
||||
position: absolute;
|
||||
right: 1.5rem;
|
||||
margin-top: 1rem;
|
||||
border: 0px solid red;
|
||||
}
|
||||
|
||||
.navsetbottom {
|
||||
display: block;
|
||||
margin-top: 8rem;
|
||||
}
|
||||
|
||||
.tocset {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.tocset table, .tocset tbody, .tocset tr, .tocset td {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
.tocview {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.tocsub .tocsubtitle {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.versionbox {
|
||||
top: 4.5rem;
|
||||
left: 1rem; /* same distance as main-column */
|
||||
z-index: 11000;
|
||||
height: 2em;
|
||||
font-size: 70%;
|
||||
font-weight: lighter;
|
||||
}
|
||||
|
||||
|
||||
.maincolumn {
|
||||
margin-left: 1em;
|
||||
margin-top: 7rem;
|
||||
margin-bottom: 0rem;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* print styles : hide the navigation elements */
|
||||
@media print {
|
||||
.tocset,
|
||||
.navsettop,
|
||||
.navsetbottom { display: none; }
|
||||
.maincolumn {
|
||||
width: auto;
|
||||
margin-right: 13em;
|
||||
margin-left: 0;
|
||||
}
|
||||
}
|
@ -0,0 +1,249 @@
|
||||
|
||||
/* See the beginning of "scribble.css". */
|
||||
|
||||
/* Monospace: */
|
||||
.RktIn, .RktRdr, .RktPn, .RktMeta,
|
||||
.RktMod, .RktKw, .RktVar, .RktSym,
|
||||
.RktRes, .RktOut, .RktCmt, .RktVal,
|
||||
.RktBlk {
|
||||
font-family: monospace;
|
||||
white-space: inherit;
|
||||
}
|
||||
|
||||
/* Serif: */
|
||||
.inheritedlbl {
|
||||
font-family: serif;
|
||||
}
|
||||
|
||||
/* Sans-serif: */
|
||||
.RBackgroundLabelInner {
|
||||
font-family: sans-serif;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Inherited methods, left margin */
|
||||
|
||||
.inherited {
|
||||
width: 100%;
|
||||
margin-top: 0.5em;
|
||||
text-align: left;
|
||||
background-color: #ECF5F5;
|
||||
}
|
||||
|
||||
.inherited td {
|
||||
font-size: 82%;
|
||||
padding-left: 1em;
|
||||
text-indent: -0.8em;
|
||||
padding-right: 0.2em;
|
||||
}
|
||||
|
||||
.inheritedlbl {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Racket text styles */
|
||||
|
||||
.RktIn {
|
||||
color: #cc6633;
|
||||
background-color: #eeeeee;
|
||||
}
|
||||
|
||||
.RktInBG {
|
||||
background-color: #eeeeee;
|
||||
}
|
||||
|
||||
.RktRdr {
|
||||
}
|
||||
|
||||
.RktPn {
|
||||
color: #843c24;
|
||||
}
|
||||
|
||||
.RktMeta {
|
||||
color: black;
|
||||
}
|
||||
|
||||
.RktMod {
|
||||
color: black;
|
||||
}
|
||||
|
||||
.RktOpt {
|
||||
color: black;
|
||||
}
|
||||
|
||||
.RktKw {
|
||||
color: black;
|
||||
}
|
||||
|
||||
.RktErr {
|
||||
color: red;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.RktVar {
|
||||
color: #262680;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.RktSym {
|
||||
color: #262680;
|
||||
}
|
||||
|
||||
.RktSymDef { /* used with RktSym at def site */
|
||||
}
|
||||
|
||||
.RktValLink {
|
||||
text-decoration: none;
|
||||
color: blue;
|
||||
}
|
||||
|
||||
.RktValDef { /* used with RktValLink at def site */
|
||||
}
|
||||
|
||||
.RktModLink {
|
||||
text-decoration: none;
|
||||
color: blue;
|
||||
}
|
||||
|
||||
.RktStxLink {
|
||||
text-decoration: none;
|
||||
color: black;
|
||||
}
|
||||
|
||||
.RktStxDef { /* used with RktStxLink at def site */
|
||||
}
|
||||
|
||||
.RktRes {
|
||||
color: #0000af;
|
||||
}
|
||||
|
||||
.RktOut {
|
||||
color: #960096;
|
||||
}
|
||||
|
||||
.RktCmt {
|
||||
color: #c2741f;
|
||||
}
|
||||
|
||||
.RktVal {
|
||||
color: #228b22;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Some inline styles */
|
||||
|
||||
.together {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.prototype, .argcontract, .RBoxed {
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.prototype td {
|
||||
vertical-align: text-top;
|
||||
}
|
||||
|
||||
.RktBlk {
|
||||
white-space: inherit;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.RktBlk tr {
|
||||
white-space: inherit;
|
||||
}
|
||||
|
||||
.RktBlk td {
|
||||
vertical-align: baseline;
|
||||
white-space: inherit;
|
||||
}
|
||||
|
||||
.argcontract td {
|
||||
vertical-align: text-top;
|
||||
}
|
||||
|
||||
.highlighted {
|
||||
background-color: #ddddff;
|
||||
}
|
||||
|
||||
.defmodule {
|
||||
width: 100%;
|
||||
background-color: #F5F5DC;
|
||||
}
|
||||
|
||||
.specgrammar {
|
||||
float: right;
|
||||
}
|
||||
|
||||
.RBibliography td {
|
||||
vertical-align: text-top;
|
||||
}
|
||||
|
||||
.leftindent {
|
||||
margin-left: 1em;
|
||||
margin-right: 0em;
|
||||
}
|
||||
|
||||
.insetpara {
|
||||
margin-left: 1em;
|
||||
margin-right: 1em;
|
||||
}
|
||||
|
||||
.Rfilebox {
|
||||
}
|
||||
|
||||
.Rfiletitle {
|
||||
text-align: right;
|
||||
margin: 0em 0em 0em 0em;
|
||||
}
|
||||
|
||||
.Rfilename {
|
||||
border-top: 1px solid #6C8585;
|
||||
border-right: 1px solid #6C8585;
|
||||
padding-left: 0.5em;
|
||||
padding-right: 0.5em;
|
||||
background-color: #ECF5F5;
|
||||
}
|
||||
|
||||
.Rfilecontent {
|
||||
margin: 0em 0em 0em 0em;
|
||||
}
|
||||
|
||||
.RpackageSpec {
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* For background labels */
|
||||
|
||||
.RBackgroundLabel {
|
||||
float: right;
|
||||
width: 0px;
|
||||
height: 0px;
|
||||
}
|
||||
|
||||
.RBackgroundLabelInner {
|
||||
position: relative;
|
||||
width: 25em;
|
||||
left: -25.5em;
|
||||
top: 0px;
|
||||
text-align: right;
|
||||
color: white;
|
||||
z-index: 0;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.RForeground {
|
||||
position: relative;
|
||||
left: 0px;
|
||||
top: 0px;
|
||||
z-index: 1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* History */
|
||||
|
||||
.SHistory {
|
||||
font-size: 82%;
|
||||
}
|
@ -0,0 +1,170 @@
|
||||
// Common functionality for PLT documentation pages
|
||||
|
||||
// Page Parameters ------------------------------------------------------------
|
||||
|
||||
var page_query_string = location.search.substring(1);
|
||||
|
||||
var page_args =
|
||||
((function(){
|
||||
if (!page_query_string) return [];
|
||||
var args = page_query_string.split(/[&;]/);
|
||||
for (var i=0; i<args.length; i++) {
|
||||
var a = args[i];
|
||||
var p = a.indexOf('=');
|
||||
if (p >= 0) args[i] = [a.substring(0,p), a.substring(p+1)];
|
||||
else args[i] = [a, false];
|
||||
}
|
||||
return args;
|
||||
})());
|
||||
|
||||
function GetPageArg(key, def) {
|
||||
for (var i=0; i<page_args.length; i++)
|
||||
if (page_args[i][0] == key) return decodeURIComponent(page_args[i][1]);
|
||||
return def;
|
||||
}
|
||||
|
||||
function MergePageArgsIntoLink(a) {
|
||||
if (page_args.length == 0 ||
|
||||
(!a.attributes["data-pltdoc"]) || (a.attributes["data-pltdoc"].value == ""))
|
||||
return;
|
||||
a.href = MergePageArgsIntoUrl(a.href);
|
||||
}
|
||||
|
||||
function MergePageArgsIntoUrl(href) {
|
||||
var mtch = href.match(/^([^?#]*)(?:\?([^#]*))?(#.*)?$/);
|
||||
if (mtch == undefined) { // I think this never happens
|
||||
return "?" + page_query_string;
|
||||
}
|
||||
if (!mtch[2]) {
|
||||
return mtch[1] + "?" + page_query_string + (mtch[3] || "");
|
||||
}
|
||||
// need to merge here, precedence to arguments that exist in `a'
|
||||
var i, j;
|
||||
var prefix = mtch[1], str = mtch[2] || "", suffix = mtch[3] || "";
|
||||
var args = str.split(/[&;]/);
|
||||
for (i=0; i<args.length; i++) {
|
||||
j = args[i].indexOf('=');
|
||||
if (j) args[i] = args[i].substring(0,j);
|
||||
}
|
||||
var additions = "";
|
||||
for (i=0; i<page_args.length; i++) {
|
||||
var exists = false;
|
||||
for (j=0; j<args.length; j++)
|
||||
if (args[j] == page_args[i][0]) { exists = true; break; }
|
||||
if (!exists) str += "&" + page_args[i][0] + "=" + page_args[i][1];
|
||||
}
|
||||
return prefix + "?" + str + suffix;
|
||||
}
|
||||
|
||||
// Cookies --------------------------------------------------------------------
|
||||
|
||||
// Actually, try localStorage (a la HTML 5), first.
|
||||
|
||||
function GetCookie(key, def) {
|
||||
try {
|
||||
var v = localStorage[key];
|
||||
if (!v) v = def;
|
||||
return v;
|
||||
} catch (e) {
|
||||
var i, cookiestrs;
|
||||
try {
|
||||
if (document.cookie.length <= 0) return def;
|
||||
cookiestrs = document.cookie.split(/; */);
|
||||
} catch (e) { return def; }
|
||||
for (i = 0; i < cookiestrs.length; i++) {
|
||||
var cur = cookiestrs[i];
|
||||
var eql = cur.indexOf('=');
|
||||
if (eql >= 0 && cur.substring(0,eql) == key)
|
||||
return unescape(cur.substring(eql+1));
|
||||
}
|
||||
return def;
|
||||
}
|
||||
}
|
||||
|
||||
function SetCookie(key, val) {
|
||||
try {
|
||||
localStorage[key] = val;
|
||||
} catch(e) {
|
||||
var d = new Date();
|
||||
d.setTime(d.getTime()+(365*24*60*60*1000));
|
||||
try {
|
||||
document.cookie =
|
||||
key + "=" + escape(val) + "; expires="+ d.toGMTString() + "; path=/";
|
||||
} catch (e) {}
|
||||
}
|
||||
}
|
||||
|
||||
// note that this always stores a directory name, ending with a "/"
|
||||
function SetPLTRoot(ver, relative) {
|
||||
var root = location.protocol + "//" + location.host
|
||||
+ NormalizePath(location.pathname.replace(/[^\/]*$/, relative));
|
||||
SetCookie("PLT_Root."+ver, root);
|
||||
}
|
||||
|
||||
// adding index.html works because of the above
|
||||
function GotoPLTRoot(ver, relative) {
|
||||
var u = GetCookie("PLT_Root."+ver, null);
|
||||
if (u == null) return true; // no cookie: use plain up link
|
||||
// the relative path is optional, default goes to the toplevel start page
|
||||
if (!relative) relative = "index.html";
|
||||
location = u + relative;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Utilities ------------------------------------------------------------------
|
||||
|
||||
var normalize_rxs = [/\/\/+/g, /\/\.(\/|$)/, /\/[^\/]*\/\.\.(\/|$)/];
|
||||
function NormalizePath(path) {
|
||||
var tmp, i;
|
||||
for (i = 0; i < normalize_rxs.length; i++)
|
||||
while ((tmp = path.replace(normalize_rxs[i], "/")) != path) path = tmp;
|
||||
return path;
|
||||
}
|
||||
|
||||
// `noscript' is problematic in some browsers (always renders as a
|
||||
// block), use this hack instead (does not always work!)
|
||||
// document.write("<style>mynoscript { display:none; }</style>");
|
||||
|
||||
// Interactions ---------------------------------------------------------------
|
||||
|
||||
function DoSearchKey(event, field, ver, top_path) {
|
||||
var val = field.value;
|
||||
if (event && event.keyCode == 13) {
|
||||
var u = GetCookie("PLT_Root."+ver, null);
|
||||
if (u == null) u = top_path; // default: go to the top path
|
||||
u += "search/index.html?q=" + encodeURIComponent(val);
|
||||
u = MergePageArgsIntoUrl(u);
|
||||
location = u;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function TocviewToggle(glyph, id) {
|
||||
var s = document.getElementById(id).style;
|
||||
var expand = s.display == "none";
|
||||
s.display = expand ? "block" : "none";
|
||||
glyph.innerHTML = expand ? "▼" : "►";
|
||||
}
|
||||
|
||||
// Page Init ------------------------------------------------------------------
|
||||
|
||||
// Note: could make a function that inspects and uses window.onload to chain to
|
||||
// a previous one, but this file needs to be required first anyway, since it
|
||||
// contains utilities for all other files.
|
||||
var on_load_funcs = [];
|
||||
function AddOnLoad(fun) { on_load_funcs.push(fun); }
|
||||
window.onload = function() {
|
||||
for (var i=0; i<on_load_funcs.length; i++) on_load_funcs[i]();
|
||||
};
|
||||
|
||||
AddOnLoad(function(){
|
||||
var links = document.getElementsByTagName("a");
|
||||
for (var i=0; i<links.length; i++) MergePageArgsIntoLink(links[i]);
|
||||
var label = GetPageArg("ctxtname",false);
|
||||
if (!label) return;
|
||||
var indicator = document.getElementById("contextindicator");
|
||||
if (!indicator) return;
|
||||
indicator.innerHTML = label;
|
||||
indicator.style.display = "block";
|
||||
});
|
@ -0,0 +1,485 @@
|
||||
|
||||
/* This file is used by default by all Scribble documents.
|
||||
See also "manual.css", which is added by default by the
|
||||
`scribble/manual` language. */
|
||||
|
||||
/* CSS seems backward: List all the classes for which we want a
|
||||
particular font, so that the font can be changed in one place. (It
|
||||
would be nicer to reference a font definition from all the places
|
||||
that we want it.)
|
||||
|
||||
As you read the rest of the file, remember to double-check here to
|
||||
see if any font is set. */
|
||||
|
||||
/* Monospace: */
|
||||
.maincolumn, .refpara, .refelem, .tocset, .stt, .hspace, .refparaleft, .refelemleft {
|
||||
font-family: monospace;
|
||||
}
|
||||
|
||||
/* Serif: */
|
||||
.main, .refcontent, .tocview, .tocsub, .sroman, i {
|
||||
font-family: serif;
|
||||
}
|
||||
|
||||
/* Sans-serif: */
|
||||
.version, .versionNoNav, .ssansserif {
|
||||
font-family: sans-serif;
|
||||
}
|
||||
.ssansserif {
|
||||
font-size: 80%;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
|
||||
p, .SIntrapara {
|
||||
display: block;
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
h2 { /* per-page main title */
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
h3, h4, h5, h6, h7, h8 {
|
||||
margin-top: 1.75em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
.SSubSubSubSection {
|
||||
font-weight: bold;
|
||||
font-size: 0.83em; /* should match h5; from HTML 4 reference */
|
||||
}
|
||||
|
||||
/* Needed for browsers like Opera, and eventually for HTML 4 conformance.
|
||||
This means that multiple paragraphs in a table element do not have a space
|
||||
between them. */
|
||||
table p {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Main */
|
||||
|
||||
body {
|
||||
color: black;
|
||||
background-color: #ffffff;
|
||||
}
|
||||
|
||||
table td {
|
||||
padding-left: 0;
|
||||
padding-right: 0;
|
||||
}
|
||||
|
||||
.maincolumn {
|
||||
width: 43em;
|
||||
margin-right: -40em;
|
||||
margin-left: 15em;
|
||||
}
|
||||
|
||||
.main {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Navigation */
|
||||
|
||||
.navsettop, .navsetbottom {
|
||||
background-color: #f0f0e0;
|
||||
padding: 0.25em 0 0.25em 0;
|
||||
}
|
||||
|
||||
.navsettop {
|
||||
margin-bottom: 1.5em;
|
||||
border-bottom: 2px solid #e0e0c0;
|
||||
}
|
||||
|
||||
.navsetbottom {
|
||||
margin-top: 2em;
|
||||
border-top: 2px solid #e0e0c0;
|
||||
}
|
||||
|
||||
.navleft {
|
||||
margin-left: 1ex;
|
||||
position: relative;
|
||||
float: left;
|
||||
white-space: nowrap;
|
||||
}
|
||||
.navright {
|
||||
margin-right: 1ex;
|
||||
position: relative;
|
||||
float: right;
|
||||
white-space: nowrap;
|
||||
}
|
||||
.nonavigation {
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
.searchform {
|
||||
display: inline;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.nosearchform {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.searchbox {
|
||||
width: 16em;
|
||||
margin: 0px;
|
||||
padding: 0px;
|
||||
background-color: #eee;
|
||||
border: 1px solid #ddd;
|
||||
text-align: center;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
#contextindicator {
|
||||
position: fixed;
|
||||
background-color: #c6f;
|
||||
color: #000;
|
||||
font-family: monospace;
|
||||
font-weight: bold;
|
||||
padding: 2px 10px;
|
||||
display: none;
|
||||
right: 0;
|
||||
bottom: 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Version */
|
||||
|
||||
.versionbox {
|
||||
position: relative;
|
||||
float: right;
|
||||
left: 2em;
|
||||
height: 0em;
|
||||
width: 13em;
|
||||
margin: 0em -13em 0em 0em;
|
||||
}
|
||||
.version {
|
||||
font-size: small;
|
||||
}
|
||||
.versionNoNav {
|
||||
font-size: xx-small; /* avoid overlap with author */
|
||||
}
|
||||
|
||||
.version:before, .versionNoNav:before {
|
||||
content: "Version ";
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Margin notes */
|
||||
|
||||
.refpara, .refelem {
|
||||
position: relative;
|
||||
float: right;
|
||||
left: 2em;
|
||||
height: 0em;
|
||||
width: 13em;
|
||||
margin: 0em -13em 0em 0em;
|
||||
}
|
||||
|
||||
.refpara, .refparaleft {
|
||||
top: -1em;
|
||||
}
|
||||
|
||||
.refcolumn {
|
||||
background-color: #F5F5DC;
|
||||
display: block;
|
||||
position: relative;
|
||||
width: 13em;
|
||||
font-size: 85%;
|
||||
border: 0.5em solid #F5F5DC;
|
||||
margin: 0 0 0 0;
|
||||
white-space: normal; /* in case margin note is inside code sample */
|
||||
}
|
||||
|
||||
.refcontent {
|
||||
margin: 0 0 0 0;
|
||||
}
|
||||
|
||||
.refcontent p {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
.refparaleft, .refelemleft {
|
||||
position: relative;
|
||||
float: left;
|
||||
right: 2em;
|
||||
height: 0em;
|
||||
width: 13em;
|
||||
margin: 0em 0em 0em -13em;
|
||||
}
|
||||
|
||||
.refcolumnleft {
|
||||
background-color: #F5F5DC;
|
||||
display: block;
|
||||
position: relative;
|
||||
width: 13em;
|
||||
font-size: 85%;
|
||||
border: 0.5em solid #F5F5DC;
|
||||
margin: 0 0 0 0;
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Table of contents, inline */
|
||||
|
||||
.toclink {
|
||||
text-decoration: none;
|
||||
color: blue;
|
||||
font-size: 85%;
|
||||
}
|
||||
|
||||
.toptoclink {
|
||||
text-decoration: none;
|
||||
color: blue;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Table of contents, left margin */
|
||||
|
||||
.tocset {
|
||||
position: relative;
|
||||
float: left;
|
||||
width: 12.5em;
|
||||
margin-right: 2em;
|
||||
}
|
||||
.tocset td {
|
||||
vertical-align: text-top;
|
||||
}
|
||||
|
||||
.tocview {
|
||||
text-align: left;
|
||||
background-color: #f0f0e0;
|
||||
}
|
||||
|
||||
.tocsub {
|
||||
text-align: left;
|
||||
margin-top: 0.5em;
|
||||
background-color: #f0f0e0;
|
||||
}
|
||||
|
||||
.tocviewlist, .tocsublist {
|
||||
margin-left: 0.2em;
|
||||
margin-right: 0.2em;
|
||||
padding-top: 0.2em;
|
||||
padding-bottom: 0.2em;
|
||||
}
|
||||
.tocviewlist table {
|
||||
font-size: 82%;
|
||||
}
|
||||
|
||||
.tocviewlisttopspace {
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
.tocviewsublist, .tocviewsublistonly, .tocviewsublisttop, .tocviewsublistbottom {
|
||||
margin-left: 0.4em;
|
||||
border-left: 1px solid #bbf;
|
||||
padding-left: 0.8em;
|
||||
}
|
||||
.tocviewsublist {
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
.tocviewsublist table,
|
||||
.tocviewsublistonly table,
|
||||
.tocviewsublisttop table,
|
||||
.tocviewsublistbottom table {
|
||||
font-size: 75%;
|
||||
}
|
||||
|
||||
.tocviewtitle * {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.tocviewlink {
|
||||
text-decoration: none;
|
||||
color: blue;
|
||||
}
|
||||
|
||||
.tocviewselflink {
|
||||
text-decoration: underline;
|
||||
color: blue;
|
||||
}
|
||||
|
||||
.tocviewtoggle {
|
||||
text-decoration: none;
|
||||
color: blue;
|
||||
font-size: 75%; /* looks better, and avoids bounce when toggling sub-sections due to font alignments */
|
||||
}
|
||||
|
||||
.tocsublist td {
|
||||
padding-left: 1em;
|
||||
text-indent: -1em;
|
||||
}
|
||||
|
||||
.tocsublinknumber {
|
||||
font-size: 82%;
|
||||
}
|
||||
|
||||
.tocsublink {
|
||||
font-size: 82%;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.tocsubseclink {
|
||||
font-size: 82%;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.tocsubnonseclink {
|
||||
font-size: 82%;
|
||||
text-decoration: none;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
.tocsubtitle {
|
||||
font-size: 82%;
|
||||
font-style: italic;
|
||||
margin: 0.2em;
|
||||
}
|
||||
|
||||
/* ---------------------------------------- */
|
||||
/* Some inline styles */
|
||||
|
||||
.indexlink {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.nobreak {
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
pre { margin-left: 2em; }
|
||||
blockquote { margin-left: 2em; }
|
||||
|
||||
ol { list-style-type: decimal; }
|
||||
ol ol { list-style-type: lower-alpha; }
|
||||
ol ol ol { list-style-type: lower-roman; }
|
||||
ol ol ol ol { list-style-type: upper-alpha; }
|
||||
|
||||
.SCodeFlow {
|
||||
display: block;
|
||||
margin-left: 1em;
|
||||
margin-bottom: 0em;
|
||||
margin-right: 1em;
|
||||
margin-top: 0em;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.SVInsetFlow {
|
||||
display: block;
|
||||
margin-left: 0em;
|
||||
margin-bottom: 0em;
|
||||
margin-right: 0em;
|
||||
margin-top: 0em;
|
||||
}
|
||||
|
||||
.SubFlow {
|
||||
display: block;
|
||||
margin: 0em;
|
||||
}
|
||||
|
||||
.boxed {
|
||||
width: 100%;
|
||||
background-color: #E8E8FF;
|
||||
}
|
||||
|
||||
.hspace {
|
||||
}
|
||||
|
||||
.slant {
|
||||
font-style: oblique;
|
||||
}
|
||||
|
||||
.badlink {
|
||||
text-decoration: underline;
|
||||
color: red;
|
||||
}
|
||||
|
||||
.plainlink {
|
||||
text-decoration: none;
|
||||
color: blue;
|
||||
}
|
||||
|
||||
.techoutside { text-decoration: underline; color: #b0b0b0; }
|
||||
.techoutside:hover { text-decoration: underline; color: blue; }
|
||||
|
||||
/* .techinside:hover doesn't work with FF, .techinside:hover>
|
||||
.techinside doesn't work with IE, so use both (and IE doesn't
|
||||
work with inherit in the second one, so use blue directly) */
|
||||
.techinside { color: black; }
|
||||
.techinside:hover { color: blue; }
|
||||
.techoutside:hover>.techinside { color: inherit; }
|
||||
|
||||
.SCentered {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.imageleft {
|
||||
float: left;
|
||||
margin-right: 0.3em;
|
||||
}
|
||||
|
||||
.Smaller {
|
||||
font-size: 82%;
|
||||
}
|
||||
|
||||
.Larger {
|
||||
font-size: 122%;
|
||||
}
|
||||
|
||||
/* A hack, inserted to break some Scheme ids: */
|
||||
.mywbr {
|
||||
display: inline-block;
|
||||
height: 0;
|
||||
width: 0;
|
||||
font-size: 1px;
|
||||
}
|
||||
|
||||
.compact li p {
|
||||
margin: 0em;
|
||||
padding: 0em;
|
||||
}
|
||||
|
||||
.noborder img {
|
||||
border: 0;
|
||||
}
|
||||
|
||||
.SAuthorListBox {
|
||||
position: relative;
|
||||
float: right;
|
||||
left: 2em;
|
||||
top: -2.5em;
|
||||
height: 0em;
|
||||
width: 13em;
|
||||
margin: 0em -13em 0em 0em;
|
||||
}
|
||||
.SAuthorList {
|
||||
font-size: 82%;
|
||||
}
|
||||
.SAuthorList:before {
|
||||
content: "by ";
|
||||
}
|
||||
.author {
|
||||
display: inline;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
/* print styles : hide the navigation elements */
|
||||
@media print {
|
||||
.tocset,
|
||||
.navsettop,
|
||||
.navsetbottom { display: none; }
|
||||
.maincolumn {
|
||||
width: auto;
|
||||
margin-right: 13em;
|
||||
margin-left: 0;
|
||||
}
|
||||
}
|
@ -0,0 +1,3 @@
|
||||
#lang info
|
||||
|
||||
(define scribblings '(("br-parser-tools.scrbl" (multi-page) (parsing-library))))
|
@ -0,0 +1,11 @@
|
||||
parser-tools-lib
|
||||
Copyright (c) 2010-2014 PLT Design Inc.
|
||||
|
||||
This package is distributed under the GNU Lesser General Public
|
||||
License (LGPL). This means that you can link this package into proprietary
|
||||
applications, provided you follow the rules stated in the LGPL. You
|
||||
can also modify this package; if you distribute a modified version,
|
||||
you must distribute it under the terms of the LGPL, which in
|
||||
particular means that you must release the source code for the
|
||||
modified software. See http://www.gnu.org/copyleft/lesser.html
|
||||
for more information.
|
@ -0,0 +1,982 @@
|
||||
#lang racket/base
|
||||
;; This module implements a parser form like the br-parser-tools's
|
||||
;; `parser', except that it works on an arbitrary CFG (returning
|
||||
;; the first sucecssful parse).
|
||||
|
||||
;; I'm pretty sure that this is an implementation of Earley's
|
||||
;; algorithm.
|
||||
|
||||
;; To a first approximation, it's a backtracking parser. Alternative
|
||||
;; for a non-terminal are computed in parallel, and multiple attempts
|
||||
;; to compute the same result block until the first one completes. If
|
||||
;; you get into deadlock, such as when trying to match
|
||||
;; <foo> := <foo>
|
||||
;; then it means that there's no successful parse, so everything
|
||||
;; that's blocked fails.
|
||||
|
||||
;; A cache holds the series of results for a particular non-terminal
|
||||
;; at a particular starting location. (A series is used, instead of a
|
||||
;; sinlge result, for backtracking.) Otherwise, the parser uses
|
||||
;; backtracking search. Backtracking is implemented through explicit
|
||||
;; success and failure continuations. Multiple results for a
|
||||
;; particular nonterminal and location are kept only when they have
|
||||
;; different lengths. (Otherwise, in the spirit of finding one
|
||||
;; successful parse, only the first result is kept.)
|
||||
|
||||
;; The br-parser-tools's `parse' is used to transform tokens in the
|
||||
;; grammar to tokens specific to this parser. In other words, this
|
||||
;; parser uses `parser' so that it doesn't have to know anything about
|
||||
;; tokens.
|
||||
;;
|
||||
|
||||
|
||||
|
||||
(require br-parser-tools/yacc
|
||||
br-parser-tools/lex)
|
||||
|
||||
(require (for-syntax racket/base
|
||||
syntax/boundmap
|
||||
br-parser-tools/private-lex/token-syntax))
|
||||
|
||||
(provide cfg-parser)
|
||||
|
||||
;; A raw token, wrapped so that we can recognize it:
|
||||
(define-struct tok (name orig-name val start end))
|
||||
|
||||
;; Represents the thread scheduler:
|
||||
(define-struct tasks (active active-back waits multi-waits cache progress?))
|
||||
|
||||
(define-for-syntax make-token-identifier-mapping make-hasheq)
|
||||
(define-for-syntax token-identifier-mapping-get
|
||||
(case-lambda
|
||||
[(t tok)
|
||||
(hash-ref t (syntax-e tok))]
|
||||
[(t tok fail)
|
||||
(hash-ref t (syntax-e tok) fail)]))
|
||||
(define-for-syntax token-identifier-mapping-put!
|
||||
(lambda (t tok v)
|
||||
(hash-set! t (syntax-e tok) v)))
|
||||
(define-for-syntax token-identifier-mapping-map
|
||||
(lambda (t f)
|
||||
(hash-map t f)))
|
||||
|
||||
;; Used to calculate information on the grammar, such as whether
|
||||
;; a particular non-terminal is "simple" instead of recursively defined.
|
||||
(define-for-syntax (nt-fixpoint nts proc nt-ids patss)
|
||||
(define (ormap-all val f as bs)
|
||||
(cond
|
||||
[(null? as) val]
|
||||
[else (ormap-all (or (f (car as) (car bs)) val)
|
||||
f
|
||||
(cdr as) (cdr bs))]))
|
||||
(let loop ()
|
||||
(when (ormap-all #f
|
||||
(lambda (nt pats)
|
||||
(let ([old (bound-identifier-mapping-get nts nt)])
|
||||
(let ([new (proc nt pats old)])
|
||||
(if (equal? old new)
|
||||
#f
|
||||
(begin
|
||||
(bound-identifier-mapping-put! nts nt new)
|
||||
#t)))))
|
||||
nt-ids patss)
|
||||
(loop))))
|
||||
|
||||
;; Tries parse-a followed by parse-b. If parse-a is not simple,
|
||||
;; then after parse-a succeeds once, we parallelize parse-b
|
||||
;; and trying a second result for parse-a.
|
||||
(define (parse-and simple-a? parse-a parse-b
|
||||
stream last-consumed-token depth end success-k fail-k
|
||||
max-depth tasks)
|
||||
(letrec ([mk-got-k
|
||||
(lambda (success-k fail-k)
|
||||
(lambda (val stream last-consumed-token depth max-depth tasks next1-k)
|
||||
(if simple-a?
|
||||
(parse-b val stream last-consumed-token depth end
|
||||
(mk-got2-k success-k fail-k next1-k)
|
||||
(mk-fail2-k success-k fail-k next1-k)
|
||||
max-depth tasks)
|
||||
(parallel-or
|
||||
(lambda (success-k fail-k max-depth tasks)
|
||||
(parse-b val stream last-consumed-token depth end
|
||||
success-k fail-k
|
||||
max-depth tasks))
|
||||
(lambda (success-k fail-k max-depth tasks)
|
||||
(next1-k (mk-got-k success-k fail-k)
|
||||
fail-k max-depth tasks))
|
||||
success-k fail-k max-depth tasks))))]
|
||||
[mk-got2-k
|
||||
(lambda (success-k fail-k next1-k)
|
||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
||||
(success-k val stream last-consumed-token depth max-depth tasks
|
||||
(lambda (success-k fail-k max-depth tasks)
|
||||
(next-k (mk-got2-k success-k fail-k next1-k)
|
||||
(mk-fail2-k success-k fail-k next1-k)
|
||||
max-depth tasks)))))]
|
||||
[mk-fail2-k
|
||||
(lambda (success-k fail-k next1-k)
|
||||
(lambda (max-depth tasks)
|
||||
(next1-k (mk-got-k success-k fail-k)
|
||||
fail-k
|
||||
max-depth
|
||||
tasks)))])
|
||||
(parse-a stream last-consumed-token depth end
|
||||
(mk-got-k success-k fail-k)
|
||||
fail-k
|
||||
max-depth tasks)))
|
||||
|
||||
;; Parallel or for non-terminal alternatives
|
||||
(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
||||
(parallel-or (lambda (success-k fail-k max-depth tasks)
|
||||
(parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks))
|
||||
(lambda (success-k fail-k max-depth tasks)
|
||||
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks))
|
||||
success-k fail-k max-depth tasks))
|
||||
|
||||
;; Generic parallel-or
|
||||
(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks)
|
||||
(define answer-key (gensym))
|
||||
(letrec ([gota-k
|
||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
||||
(report-answer answer-key
|
||||
max-depth
|
||||
tasks
|
||||
(list val stream last-consumed-token depth next-k)))]
|
||||
[faila-k
|
||||
(lambda (max-depth tasks)
|
||||
(report-answer answer-key
|
||||
max-depth
|
||||
tasks
|
||||
null))])
|
||||
(let* ([tasks (queue-task
|
||||
tasks
|
||||
(lambda (max-depth tasks)
|
||||
(parse-a gota-k
|
||||
faila-k
|
||||
max-depth tasks)))]
|
||||
[tasks (queue-task
|
||||
tasks
|
||||
(lambda (max-depth tasks)
|
||||
(parse-b gota-k
|
||||
faila-k
|
||||
max-depth tasks)))]
|
||||
[queue-next (lambda (next-k tasks)
|
||||
(queue-task tasks
|
||||
(lambda (max-depth tasks)
|
||||
(next-k gota-k
|
||||
faila-k
|
||||
max-depth tasks))))])
|
||||
(letrec ([mk-got-one
|
||||
(lambda (immediate-next? get-nth success-k)
|
||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
||||
(let ([tasks (if immediate-next?
|
||||
(queue-next next-k tasks)
|
||||
tasks)])
|
||||
(success-k val stream last-consumed-token depth max-depth
|
||||
tasks
|
||||
(lambda (success-k fail-k max-depth tasks)
|
||||
(let ([tasks (if immediate-next?
|
||||
tasks
|
||||
(queue-next next-k tasks))])
|
||||
(get-nth max-depth tasks success-k fail-k)))))))]
|
||||
[get-first
|
||||
(lambda (max-depth tasks success-k fail-k)
|
||||
(wait-for-answer #f max-depth tasks answer-key
|
||||
(mk-got-one #t get-first success-k)
|
||||
(lambda (max-depth tasks)
|
||||
(get-second max-depth tasks success-k fail-k))
|
||||
#f))]
|
||||
[get-second
|
||||
(lambda (max-depth tasks success-k fail-k)
|
||||
(wait-for-answer #f max-depth tasks answer-key
|
||||
(mk-got-one #f get-second success-k)
|
||||
fail-k #f))])
|
||||
(get-first max-depth tasks success-k fail-k)))))
|
||||
|
||||
;; Non-terminal alternatives where the first is "simple" can be done
|
||||
;; sequentially, which is simpler
|
||||
(define (parse-or parse-a parse-b
|
||||
stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
||||
(letrec ([mk-got-k
|
||||
(lambda (success-k fail-k)
|
||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
||||
(success-k val stream last-consumed-token depth
|
||||
max-depth tasks
|
||||
(lambda (success-k fail-k max-depth tasks)
|
||||
(next-k (mk-got-k success-k fail-k)
|
||||
(mk-fail-k success-k fail-k)
|
||||
max-depth tasks)))))]
|
||||
[mk-fail-k
|
||||
(lambda (success-k fail-k)
|
||||
(lambda (max-depth tasks)
|
||||
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))])
|
||||
(parse-a stream last-consumed-token depth end
|
||||
(mk-got-k success-k fail-k)
|
||||
(mk-fail-k success-k fail-k)
|
||||
max-depth tasks)))
|
||||
|
||||
;; Starts a thread
|
||||
(define queue-task
|
||||
(lambda (tasks t [progress? #t])
|
||||
(make-tasks (tasks-active tasks)
|
||||
(cons t (tasks-active-back tasks))
|
||||
(tasks-waits tasks)
|
||||
(tasks-multi-waits tasks)
|
||||
(tasks-cache tasks)
|
||||
(or progress? (tasks-progress? tasks)))))
|
||||
|
||||
;; Reports an answer to a waiting thread:
|
||||
(define (report-answer answer-key max-depth tasks val)
|
||||
(let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))])
|
||||
(if v
|
||||
(let ([tasks (make-tasks (cons (v val)
|
||||
(tasks-active tasks))
|
||||
(tasks-active-back tasks)
|
||||
(tasks-waits tasks)
|
||||
(tasks-multi-waits tasks)
|
||||
(tasks-cache tasks)
|
||||
#t)])
|
||||
(hash-remove! (tasks-waits tasks) answer-key)
|
||||
(swap-task max-depth tasks))
|
||||
;; We have an answer ready too fast; wait
|
||||
(swap-task max-depth
|
||||
(queue-task tasks
|
||||
(lambda (max-depth tasks)
|
||||
(report-answer answer-key max-depth tasks val))
|
||||
#f)))))
|
||||
|
||||
;; Reports an answer to multiple waiting threads:
|
||||
(define (report-answer-all answer-key max-depth tasks val k)
|
||||
(let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))])
|
||||
(hash-remove! (tasks-multi-waits tasks) answer-key)
|
||||
(let ([tasks (make-tasks (append (map (lambda (a) (a val)) v)
|
||||
(tasks-active tasks))
|
||||
(tasks-active-back tasks)
|
||||
(tasks-waits tasks)
|
||||
(tasks-multi-waits tasks)
|
||||
(tasks-cache tasks)
|
||||
#t)])
|
||||
(k max-depth tasks))))
|
||||
|
||||
;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise
|
||||
;; there might be many. Use wither #t or #f (and `report-answer' or
|
||||
;; `report-answer-all', resptively) consistently for a particular answer key.
|
||||
(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k)
|
||||
(let ([wait (lambda (val)
|
||||
(lambda (max-depth tasks)
|
||||
(if val
|
||||
(if (null? val)
|
||||
(fail-k max-depth tasks)
|
||||
(let-values ([(val stream last-consumed-token depth next-k) (apply values val)])
|
||||
(success-k val stream last-consumed-token depth max-depth tasks next-k)))
|
||||
(deadlock-k max-depth tasks))))])
|
||||
(if multi?
|
||||
(hash-set! (tasks-multi-waits tasks) answer-key
|
||||
(cons wait (hash-ref (tasks-multi-waits tasks) answer-key
|
||||
(lambda () null))))
|
||||
(hash-set! (tasks-waits tasks) answer-key wait))
|
||||
(let ([tasks (make-tasks (tasks-active tasks)
|
||||
(tasks-active-back tasks)
|
||||
(tasks-waits tasks)
|
||||
(tasks-multi-waits tasks)
|
||||
(tasks-cache tasks)
|
||||
#t)])
|
||||
(swap-task max-depth tasks))))
|
||||
|
||||
;; Swap thread
|
||||
(define (swap-task max-depth tasks)
|
||||
;; Swap in first active:
|
||||
(if (null? (tasks-active tasks))
|
||||
(if (tasks-progress? tasks)
|
||||
(swap-task max-depth
|
||||
(make-tasks (reverse (tasks-active-back tasks))
|
||||
null
|
||||
(tasks-waits tasks)
|
||||
(tasks-multi-waits tasks)
|
||||
(tasks-cache tasks)
|
||||
#f))
|
||||
;; No progress, so issue failure for all multi-waits
|
||||
(if (zero? (hash-count (tasks-multi-waits tasks)))
|
||||
(error 'swap-task "Deadlock")
|
||||
(swap-task max-depth
|
||||
(make-tasks (apply
|
||||
append
|
||||
(hash-map (tasks-multi-waits tasks)
|
||||
(lambda (k l)
|
||||
(map (lambda (v) (v #f)) l))))
|
||||
(tasks-active-back tasks)
|
||||
(tasks-waits tasks)
|
||||
(make-hasheq)
|
||||
(tasks-cache tasks)
|
||||
#t))))
|
||||
(let ([t (car (tasks-active tasks))]
|
||||
[tasks (make-tasks (cdr (tasks-active tasks))
|
||||
(tasks-active-back tasks)
|
||||
(tasks-waits tasks)
|
||||
(tasks-multi-waits tasks)
|
||||
(tasks-cache tasks)
|
||||
(tasks-progress? tasks))])
|
||||
(t max-depth tasks))))
|
||||
|
||||
;; Finds the symbolic representative of a token class
|
||||
(define-for-syntax (map-token toks tok)
|
||||
(car (token-identifier-mapping-get toks tok)))
|
||||
|
||||
(define no-pos-val (make-position #f #f #f))
|
||||
(define-for-syntax no-pos
|
||||
(let ([npv ((syntax-local-certifier) #'no-pos-val)])
|
||||
(lambda (stx) npv)))
|
||||
(define-for-syntax at-tok-pos
|
||||
(lambda (sel expr)
|
||||
(lambda (stx)
|
||||
#`(let ([v #,expr]) (if v (#,sel v) no-pos-val)))))
|
||||
|
||||
;; Builds a matcher for a particular alternative
|
||||
(define-for-syntax (build-match nts toks pat handle $ctx)
|
||||
(let loop ([pat pat]
|
||||
[pos 1])
|
||||
(if (null? pat)
|
||||
#`(success-k #,handle stream last-consumed-token depth max-depth tasks
|
||||
(lambda (success-k fail-k max-depth tasks)
|
||||
(fail-k max-depth tasks)))
|
||||
(let ([id (datum->syntax (car pat)
|
||||
(string->symbol (format "$~a" pos)))]
|
||||
[id-start-pos (datum->syntax (car pat)
|
||||
(string->symbol (format "$~a-start-pos" pos)))]
|
||||
[id-end-pos (datum->syntax (car pat)
|
||||
(string->symbol (format "$~a-end-pos" pos)))]
|
||||
[n-end-pos (and (null? (cdr pat))
|
||||
(datum->syntax (car pat) '$n-end-pos))])
|
||||
(cond
|
||||
[(bound-identifier-mapping-get nts (car pat) (lambda () #f))
|
||||
;; Match non-termimal
|
||||
#`(parse-and
|
||||
;; First part is simple? (If so, we don't have to parallelize the `and'.)
|
||||
#,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))])
|
||||
(or (not l)
|
||||
(andmap values (caddr l))))
|
||||
#,(car pat)
|
||||
(let ([original-stream stream])
|
||||
(lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
||||
(let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream)
|
||||
tok-end
|
||||
tok-start)
|
||||
#'(if (eq? original-stream stream)
|
||||
last-consumed-token
|
||||
(and (pair? original-stream)
|
||||
(car original-stream))))]
|
||||
[#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]
|
||||
#,@(if n-end-pos
|
||||
#`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)])
|
||||
null))
|
||||
#,(loop (cdr pat) (add1 pos)))))
|
||||
stream last-consumed-token depth
|
||||
#,(let ([cnt (apply +
|
||||
(map (lambda (item)
|
||||
(cond
|
||||
[(bound-identifier-mapping-get nts item (lambda () #f))
|
||||
=> (lambda (l) (car l))]
|
||||
[else 1]))
|
||||
(cdr pat)))])
|
||||
#`(- end #,cnt))
|
||||
success-k fail-k max-depth tasks)]
|
||||
[else
|
||||
;; Match token
|
||||
(let ([tok-id (map-token toks (car pat))])
|
||||
#`(if (and (pair? stream)
|
||||
(eq? '#,tok-id (tok-name (car stream))))
|
||||
(let* ([stream-a (car stream)]
|
||||
[#,id (tok-val stream-a)]
|
||||
[last-consumed-token (car stream)]
|
||||
[stream (cdr stream)]
|
||||
[depth (add1 depth)])
|
||||
(let ([max-depth (max max-depth depth)])
|
||||
(let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)]
|
||||
[#,id-end-pos (at-tok-pos #'tok-end #'stream-a)]
|
||||
#,@(if n-end-pos
|
||||
#`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)])
|
||||
null))
|
||||
#,(loop (cdr pat) (add1 pos)))))
|
||||
(fail-k max-depth tasks)))])))))
|
||||
|
||||
;; Starts parsing to match a non-terminal. There's a minor
|
||||
;; optimization that checks for known starting tokens. Otherwise,
|
||||
;; use the cache, block if someone else is already trying the match,
|
||||
;; and cache the result if it's computed.
|
||||
;; The cache maps nontermial+startingpos+iteration to a result, where
|
||||
;; the iteration is 0 for the first match attempt, 1 for the second,
|
||||
;; etc.
|
||||
(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k)
|
||||
(if (and (positive? min-cnt)
|
||||
(pair? stream)
|
||||
(not (memq (tok-name (car stream)) init-tokens)))
|
||||
;; No such leading token; give up
|
||||
(fail-k max-depth tasks)
|
||||
;; Run pattern
|
||||
(let loop ([n 0]
|
||||
[success-k success-k]
|
||||
[fail-k fail-k]
|
||||
[max-depth max-depth]
|
||||
[tasks tasks]
|
||||
[k k])
|
||||
(let ([answer-key (gensym)]
|
||||
[table-key (vector key depth n)]
|
||||
[old-depth depth]
|
||||
[old-stream stream])
|
||||
#;(printf "Loop ~a\n" table-key)
|
||||
(cond
|
||||
[(hash-ref (tasks-cache tasks) table-key (lambda () #f))
|
||||
=> (lambda (result)
|
||||
#;(printf "Reuse ~a\n" table-key)
|
||||
(result success-k fail-k max-depth tasks))]
|
||||
[else
|
||||
#;(printf "Try ~a ~a\n" table-key (map tok-name stream))
|
||||
(hash-set! (tasks-cache tasks) table-key
|
||||
(lambda (success-k fail-k max-depth tasks)
|
||||
#;(printf "Wait ~a ~a\n" table-key answer-key)
|
||||
(wait-for-answer #t max-depth tasks answer-key success-k fail-k
|
||||
(lambda (max-depth tasks)
|
||||
#;(printf "Deadlock ~a ~a\n" table-key answer-key)
|
||||
(fail-k max-depth tasks)))))
|
||||
(let result-loop ([max-depth max-depth][tasks tasks][k k])
|
||||
(letrec ([orig-stream stream]
|
||||
[new-got-k
|
||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
||||
;; Check whether we already have a result that consumed the same amount:
|
||||
(let ([result-key (vector #f key old-depth depth)])
|
||||
(cond
|
||||
[(hash-ref (tasks-cache tasks) result-key (lambda () #f))
|
||||
;; Go for the next-result
|
||||
(result-loop max-depth
|
||||
tasks
|
||||
(lambda (end max-depth tasks success-k fail-k)
|
||||
(next-k success-k fail-k max-depth tasks)))]
|
||||
[else
|
||||
#;(printf "Success ~a ~a\n" table-key
|
||||
(map tok-name (let loop ([d old-depth][s old-stream])
|
||||
(if (= d depth)
|
||||
null
|
||||
(cons (car s) (loop (add1 d) (cdr s)))))))
|
||||
(let ([next-k (lambda (success-k fail-k max-depth tasks)
|
||||
(loop (add1 n)
|
||||
success-k
|
||||
fail-k
|
||||
max-depth
|
||||
tasks
|
||||
(lambda (end max-depth tasks success-k fail-k)
|
||||
(next-k success-k fail-k max-depth tasks))))])
|
||||
(hash-set! (tasks-cache tasks) result-key #t)
|
||||
(hash-set! (tasks-cache tasks) table-key
|
||||
(lambda (success-k fail-k max-depth tasks)
|
||||
(success-k val stream last-consumed-token depth max-depth tasks next-k)))
|
||||
(report-answer-all answer-key
|
||||
max-depth
|
||||
tasks
|
||||
(list val stream last-consumed-token depth next-k)
|
||||
(lambda (max-depth tasks)
|
||||
(success-k val stream last-consumed-token depth max-depth tasks next-k))))])))]
|
||||
[new-fail-k
|
||||
(lambda (max-depth tasks)
|
||||
#;(printf "Failure ~a\n" table-key)
|
||||
(hash-set! (tasks-cache tasks) table-key
|
||||
(lambda (success-k fail-k max-depth tasks)
|
||||
(fail-k max-depth tasks)))
|
||||
(report-answer-all answer-key
|
||||
max-depth
|
||||
tasks
|
||||
null
|
||||
(lambda (max-depth tasks)
|
||||
(fail-k max-depth tasks))))])
|
||||
(k end max-depth tasks new-got-k new-fail-k)))])))))
|
||||
|
||||
(define-syntax (cfg-parser stx)
|
||||
(syntax-case stx ()
|
||||
[(_ clause ...)
|
||||
(let ([clauses (syntax->list #'(clause ...))])
|
||||
(let-values ([(start grammar cfg-error parser-clauses src-pos?)
|
||||
(let ([all-toks (apply
|
||||
append
|
||||
(map (lambda (clause)
|
||||
(syntax-case clause (tokens)
|
||||
[(tokens t ...)
|
||||
(apply
|
||||
append
|
||||
(map (lambda (t)
|
||||
(let ([v (syntax-local-value t (lambda () #f))])
|
||||
(cond
|
||||
[(terminals-def? v)
|
||||
(map (lambda (v)
|
||||
(cons v #f))
|
||||
(syntax->list (terminals-def-t v)))]
|
||||
[(e-terminals-def? v)
|
||||
(map (lambda (v)
|
||||
(cons v #t))
|
||||
(syntax->list (e-terminals-def-t v)))]
|
||||
[else null])))
|
||||
(syntax->list #'(t ...))))]
|
||||
[_else null]))
|
||||
clauses))]
|
||||
[all-end-toks (apply
|
||||
append
|
||||
(map (lambda (clause)
|
||||
(syntax-case clause (end)
|
||||
[(end t ...)
|
||||
(syntax->list #'(t ...))]
|
||||
[_else null]))
|
||||
clauses))])
|
||||
(let loop ([clauses clauses]
|
||||
[cfg-start #f]
|
||||
[cfg-grammar #f]
|
||||
[cfg-error #f]
|
||||
[src-pos? #f]
|
||||
[parser-clauses null])
|
||||
(if (null? clauses)
|
||||
(values cfg-start
|
||||
cfg-grammar
|
||||
cfg-error
|
||||
(reverse parser-clauses)
|
||||
src-pos?)
|
||||
(syntax-case (car clauses) (start error grammar src-pos)
|
||||
[(start tok)
|
||||
(loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)]
|
||||
[(error expr)
|
||||
(loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)]
|
||||
[(grammar [nt [pat handle0 handle ...] ...] ...)
|
||||
(let ([nts (make-bound-identifier-mapping)]
|
||||
[toks (make-token-identifier-mapping)]
|
||||
[end-toks (make-token-identifier-mapping)]
|
||||
[nt-ids (syntax->list #'(nt ...))]
|
||||
[patss (map (lambda (stx)
|
||||
(map syntax->list (syntax->list stx)))
|
||||
(syntax->list #'((pat ...) ...)))])
|
||||
(for-each (lambda (nt)
|
||||
(bound-identifier-mapping-put! nts nt (list 0)))
|
||||
nt-ids)
|
||||
(for-each (lambda (t)
|
||||
(token-identifier-mapping-put! end-toks t #t))
|
||||
all-end-toks)
|
||||
(for-each (lambda (t)
|
||||
(unless (token-identifier-mapping-get end-toks (car t) (lambda () #f))
|
||||
(let ([id (gensym (syntax-e (car t)))])
|
||||
(token-identifier-mapping-put! toks (car t)
|
||||
(cons id (cdr t))))))
|
||||
all-toks)
|
||||
;; Compute min max size for each non-term:
|
||||
(nt-fixpoint
|
||||
nts
|
||||
(lambda (nt pats old-list)
|
||||
(let ([new-cnt
|
||||
(apply
|
||||
min
|
||||
(map (lambda (pat)
|
||||
(apply
|
||||
+
|
||||
(map (lambda (elem)
|
||||
(car
|
||||
(bound-identifier-mapping-get nts
|
||||
elem
|
||||
(lambda () (list 1)))))
|
||||
pat)))
|
||||
pats))])
|
||||
(if (new-cnt . > . (car old-list))
|
||||
(cons new-cnt (cdr old-list))
|
||||
old-list)))
|
||||
nt-ids patss)
|
||||
;; Compute set of toks that must appear at the beginning
|
||||
;; for a non-terminal
|
||||
(nt-fixpoint
|
||||
nts
|
||||
(lambda (nt pats old-list)
|
||||
(let ([new-list
|
||||
(apply
|
||||
append
|
||||
(map (lambda (pat)
|
||||
(let loop ([pat pat])
|
||||
(if (pair? pat)
|
||||
(let ([l (bound-identifier-mapping-get
|
||||
nts
|
||||
(car pat)
|
||||
(lambda ()
|
||||
(list 1 (map-token toks (car pat)))))])
|
||||
;; If the non-terminal can match 0 things,
|
||||
;; then it might match something from the
|
||||
;; next pattern element. Otherwise, it must
|
||||
;; match the first element:
|
||||
(if (zero? (car l))
|
||||
(append (cdr l) (loop (cdr pat)))
|
||||
(cdr l)))
|
||||
null)))
|
||||
pats))])
|
||||
(let ([new (filter (lambda (id)
|
||||
(andmap (lambda (id2)
|
||||
(not (eq? id id2)))
|
||||
(cdr old-list)))
|
||||
new-list)])
|
||||
(if (pair? new)
|
||||
;; Drop dups in new list:
|
||||
(let ([new (let loop ([new new])
|
||||
(if (null? (cdr new))
|
||||
new
|
||||
(if (ormap (lambda (id)
|
||||
(eq? (car new) id))
|
||||
(cdr new))
|
||||
(loop (cdr new))
|
||||
(cons (car new) (loop (cdr new))))))])
|
||||
(cons (car old-list) (append new (cdr old-list))))
|
||||
old-list))))
|
||||
nt-ids patss)
|
||||
;; Determine left-recursive clauses:
|
||||
(for-each (lambda (nt pats)
|
||||
(let ([l (bound-identifier-mapping-get nts nt)])
|
||||
(bound-identifier-mapping-put! nts nt (list (car l)
|
||||
(cdr l)
|
||||
(map (lambda (x) #f) pats)))))
|
||||
nt-ids patss)
|
||||
(nt-fixpoint
|
||||
nts
|
||||
(lambda (nt pats old-list)
|
||||
(list (car old-list)
|
||||
(cadr old-list)
|
||||
(map (lambda (pat simple?)
|
||||
(or simple?
|
||||
(let ([l (map (lambda (elem)
|
||||
(bound-identifier-mapping-get
|
||||
nts
|
||||
elem
|
||||
(lambda () #f)))
|
||||
pat)])
|
||||
(andmap (lambda (i)
|
||||
(or (not i)
|
||||
(andmap values (caddr i))))
|
||||
l))))
|
||||
pats (caddr old-list))))
|
||||
nt-ids patss)
|
||||
;; Build a definition for each non-term:
|
||||
(loop (cdr clauses)
|
||||
cfg-start
|
||||
(map (lambda (nt pats handles $ctxs)
|
||||
(define info (bound-identifier-mapping-get nts nt))
|
||||
(list nt
|
||||
#`(let ([key (gensym '#,nt)])
|
||||
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
||||
(parse-nt/share
|
||||
key #,(car info) '#,(cadr info) stream last-consumed-token depth end
|
||||
max-depth tasks
|
||||
success-k fail-k
|
||||
(lambda (end max-depth tasks success-k fail-k)
|
||||
#,(let loop ([pats pats]
|
||||
[handles (syntax->list handles)]
|
||||
[$ctxs (syntax->list $ctxs)]
|
||||
[simple?s (caddr info)])
|
||||
(if (null? pats)
|
||||
#'(fail-k max-depth tasks)
|
||||
#`(#,(if (or (null? (cdr pats))
|
||||
(car simple?s))
|
||||
#'parse-or
|
||||
#'parse-parallel-or)
|
||||
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
||||
#,(build-match nts
|
||||
toks
|
||||
(car pats)
|
||||
(car handles)
|
||||
(car $ctxs)))
|
||||
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
||||
#,(loop (cdr pats)
|
||||
(cdr handles)
|
||||
(cdr $ctxs)
|
||||
(cdr simple?s)))
|
||||
stream last-consumed-token depth end success-k fail-k max-depth tasks)))))))))
|
||||
nt-ids
|
||||
patss
|
||||
(syntax->list #'(((begin handle0 handle ...) ...) ...))
|
||||
(syntax->list #'((handle0 ...) ...)))
|
||||
cfg-error
|
||||
src-pos?
|
||||
(list*
|
||||
(with-syntax ([((tok tok-id . $e) ...)
|
||||
(token-identifier-mapping-map toks
|
||||
(lambda (k v)
|
||||
(list* k
|
||||
(car v)
|
||||
(if (cdr v)
|
||||
#f
|
||||
'$1))))]
|
||||
[(pos ...)
|
||||
(if src-pos?
|
||||
#'($1-start-pos $1-end-pos)
|
||||
#'(#f #f))])
|
||||
#`(grammar (start [() null]
|
||||
[(atok start) (cons $1 $2)])
|
||||
(atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...)))
|
||||
#`(start start)
|
||||
parser-clauses)))]
|
||||
[(grammar . _)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"bad grammar clause"
|
||||
stx
|
||||
(car clauses))]
|
||||
[(src-pos)
|
||||
(loop (cdr clauses)
|
||||
cfg-start
|
||||
cfg-grammar
|
||||
cfg-error
|
||||
#t
|
||||
(cons (car clauses) parser-clauses))]
|
||||
[_else
|
||||
(loop (cdr clauses)
|
||||
cfg-start
|
||||
cfg-grammar
|
||||
cfg-error
|
||||
src-pos?
|
||||
(cons (car clauses) parser-clauses))]))))])
|
||||
#`(let ([orig-parse (parser
|
||||
[error (lambda (a b c)
|
||||
(error 'cfg-parser "unexpected ~a token: ~a" b c))]
|
||||
. #,parser-clauses)]
|
||||
[error-proc #,cfg-error])
|
||||
(letrec #,grammar
|
||||
(lambda (get-tok)
|
||||
(let ([tok-list (orig-parse get-tok)])
|
||||
(letrec ([success-k
|
||||
(lambda (val stream last-consumed-token depth max-depth tasks next)
|
||||
(if (null? stream)
|
||||
val
|
||||
(next success-k fail-k max-depth tasks)))]
|
||||
[fail-k (lambda (max-depth tasks)
|
||||
(define (call-error-proc tok-ok? tok-name tok-value start-pos end-pos)
|
||||
(cond
|
||||
[(procedure-arity-includes? error-proc 5)
|
||||
(error-proc tok-ok? tok-name tok-value start-pos end-pos)]
|
||||
[else
|
||||
(error-proc tok-ok? tok-name tok-value)]))
|
||||
(cond
|
||||
[(null? tok-list)
|
||||
(if error-proc
|
||||
(call-error-proc #t
|
||||
'no-tokens
|
||||
#f
|
||||
(make-position #f #f #f)
|
||||
(make-position #f #f #f))
|
||||
(error
|
||||
'cfg-parse
|
||||
"no tokens"))]
|
||||
[else
|
||||
(let ([bad-tok (list-ref tok-list
|
||||
(min (sub1 (length tok-list))
|
||||
max-depth))])
|
||||
(if error-proc
|
||||
(call-error-proc #t
|
||||
(tok-orig-name bad-tok)
|
||||
(tok-val bad-tok)
|
||||
(tok-start bad-tok)
|
||||
(tok-end bad-tok))
|
||||
(error
|
||||
'cfg-parse
|
||||
"failed at ~a"
|
||||
(tok-val bad-tok))))]))])
|
||||
(#,start tok-list
|
||||
;; we simulate a token at the very beginning with zero width
|
||||
;; for use with the position-generating code (*-start-pos, *-end-pos).
|
||||
(if (null? tok-list)
|
||||
(tok #f #f #f
|
||||
(position 1
|
||||
#,(if src-pos? #'1 #'#f)
|
||||
#,(if src-pos? #'0 #'#f))
|
||||
(position 1
|
||||
#,(if src-pos? #'1 #'#f)
|
||||
#,(if src-pos? #'0 #'#f)))
|
||||
(tok (tok-name (car tok-list))
|
||||
(tok-orig-name (car tok-list))
|
||||
(tok-val (car tok-list))
|
||||
(tok-start (car tok-list))
|
||||
(tok-start (car tok-list))))
|
||||
0
|
||||
(length tok-list)
|
||||
success-k
|
||||
fail-k
|
||||
0
|
||||
(make-tasks null null
|
||||
(make-hasheq) (make-hasheq)
|
||||
(make-hash) #t)))))))))]))
|
||||
|
||||
|
||||
(module* test racket/base
|
||||
(require (submod "..")
|
||||
br-parser-tools/lex
|
||||
racket/block
|
||||
racket/generator
|
||||
rackunit)
|
||||
|
||||
;; Test: parsing regular expressions.
|
||||
;; Here is a test case on locations:
|
||||
(block
|
||||
(define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF))
|
||||
(define lex (lexer-src-pos ["|" (token-OR lexeme)]
|
||||
["^" (token-ANCHOR lexeme)]
|
||||
["*" (token-STAR lexeme)]
|
||||
[(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)]
|
||||
["(" (token-LPAREN lexeme)]
|
||||
[")" (token-RPAREN lexeme)]
|
||||
[whitespace (return-without-pos (lex input-port))]
|
||||
[(eof) (token-EOF 'eof)]))
|
||||
(define -parse (cfg-parser
|
||||
(tokens regexp-tokens)
|
||||
(start top)
|
||||
(end EOF)
|
||||
(src-pos)
|
||||
(grammar [top [(maybe-anchor regexp)
|
||||
(cond [$1
|
||||
`(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))]
|
||||
[else
|
||||
`(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]]
|
||||
[maybe-anchor [(ANCHOR) #t]
|
||||
[() #f]]
|
||||
[regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))]
|
||||
[(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))]
|
||||
[(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))]
|
||||
[(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]])))
|
||||
(define (pos->sexp pos)
|
||||
(position-offset pos))
|
||||
|
||||
(define (parse s)
|
||||
(define ip (open-input-string s))
|
||||
(port-count-lines! ip)
|
||||
(-parse (lambda () (lex ip))))
|
||||
|
||||
(check-equal? (parse "abc")
|
||||
'(unanchored (lit "abc" 1 4) 1 4))
|
||||
(check-equal? (parse "a | (b*) | c")
|
||||
'(unanchored (or (or (lit "a" 1 2)
|
||||
(group (star (lit "b" 6 7) 6 8) 5 9)
|
||||
1 9)
|
||||
(lit "c" 12 13)
|
||||
1 13)
|
||||
1 13)))
|
||||
|
||||
|
||||
;; Check that cfg-parser can accept error functions of 3 arguments:
|
||||
(block
|
||||
(define-tokens non-terminals (ONE ZERO EOF))
|
||||
(define parse
|
||||
(cfg-parser (tokens non-terminals)
|
||||
(start ones)
|
||||
(end EOF)
|
||||
(error (lambda (tok-ok tok-name tok-val)
|
||||
(error (format "~a ~a ~a" tok-ok tok-name tok-val))))
|
||||
(grammar [ones [() null]
|
||||
[(ONE ones) (cons $1 $2)]])))
|
||||
(define (sequence->tokenizer s)
|
||||
(define-values (more? next) (sequence-generate s))
|
||||
(lambda ()
|
||||
(cond [(more?) (next)]
|
||||
[else (token-EOF 'eof)])))
|
||||
(check-exn #rx"#t ZERO zero"
|
||||
(lambda () (parse (sequence->tokenizer (list (token-ZERO "zero")))))))
|
||||
|
||||
|
||||
|
||||
|
||||
;; Check that cfg-parser can accept error functions of 5 arguments:
|
||||
(block
|
||||
(define-tokens non-terminals (ONE ZERO EOF))
|
||||
(define parse
|
||||
(cfg-parser (tokens non-terminals)
|
||||
(start ones)
|
||||
(src-pos)
|
||||
(end EOF)
|
||||
(error (lambda (tok-ok tok-name tok-val start-pos end-pos)
|
||||
(error (format "~a ~a ~a ~a ~a"
|
||||
tok-ok tok-name tok-val
|
||||
(position-offset start-pos)
|
||||
(position-offset end-pos)))))
|
||||
(grammar [ones [() null]
|
||||
[(ONE ones) (cons $1 $2)]])))
|
||||
(define (sequence->tokenizer s)
|
||||
(define-values (more? next) (sequence-generate s))
|
||||
(lambda ()
|
||||
(cond [(more?) (next)]
|
||||
[else (position-token (token-EOF 'eof)
|
||||
(position #f #f #f)
|
||||
(position #f #f #f))])))
|
||||
(check-exn #rx"#t ZERO zero 2 3"
|
||||
(lambda ()
|
||||
(parse
|
||||
(sequence->tokenizer
|
||||
(list (position-token
|
||||
(token-ZERO "zero")
|
||||
(position 2 2 5)
|
||||
(position 3 2 6))))))))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
;; Tests used during development
|
||||
(define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF))
|
||||
|
||||
(define lex
|
||||
(lexer
|
||||
["+" (token-PLUS '+)]
|
||||
["-" (token-MINUS '-)]
|
||||
["*" (token-STAR '*)]
|
||||
["|" (token-BAR '||)]
|
||||
[":" (token-COLON '|:|)]
|
||||
[whitespace (lex input-port)]
|
||||
[(eof) (token-EOF 'eof)]))
|
||||
|
||||
(define parse
|
||||
(cfg-parser
|
||||
(tokens non-terminals)
|
||||
(start <program>)
|
||||
(end EOF)
|
||||
(error (lambda (a b stx)
|
||||
(error 'parse "failed at ~s" stx)))
|
||||
(grammar [<program> [(PLUS) "plus"]
|
||||
[(<minus-program> BAR <minus-program>) (list $1 $2 $3)]
|
||||
[(<program> COLON) (list $1)]]
|
||||
[<minus-program> [(MINUS) "minus"]
|
||||
[(<program> STAR) (cons $1 $2)]]
|
||||
[<simple> [(<alts> <alts> <alts> MINUS) "yes"]]
|
||||
[<alts> [(PLUS) 'plus]
|
||||
[(MINUS) 'minus]]
|
||||
[<random> [() '0]
|
||||
[(<random> PLUS) (add1 $1)]
|
||||
[(<random> PLUS) (add1 $1)]])))
|
||||
|
||||
(let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**"
|
||||
#;"+*|+**|-" #;"-|-*|-|-*"
|
||||
#;"-|-*|-|-**|-|-*|-|-**"
|
||||
"-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-***
|
||||
|-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|
|
||||
-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****"
|
||||
;; This one fails:
|
||||
#;"+*")])
|
||||
(check-equal? (parse (lambda () (lex p)))
|
||||
'((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
||||
||
|
||||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
||||
.
|
||||
*)
|
||||
||
|
||||
(((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
||||
||
|
||||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
||||
.
|
||||
*))
|
||||
.
|
||||
*)
|
||||
||
|
||||
(((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
||||
||
|
||||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
||||
.
|
||||
*)
|
||||
||
|
||||
(((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
||||
||
|
||||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
||||
.
|
||||
*))
|
||||
.
|
||||
*)))))
|
@ -0,0 +1,89 @@
|
||||
#lang scheme
|
||||
|
||||
;; An interactive calculator inspired by the calculator example in the bison manual.
|
||||
|
||||
|
||||
;; Import the parser and lexer generators.
|
||||
(require br-parser-tools/yacc
|
||||
br-parser-tools/lex
|
||||
(prefix-in : br-parser-tools/lex-sre))
|
||||
|
||||
(define-tokens value-tokens (NUM VAR FNCT))
|
||||
(define-empty-tokens op-tokens (newline = OP CP + - * / ^ EOF NEG))
|
||||
|
||||
;; A hash table to store variable values in for the calculator
|
||||
(define vars (make-hash))
|
||||
|
||||
(define-lex-abbrevs
|
||||
(lower-letter (:/ "a" "z"))
|
||||
|
||||
(upper-letter (:/ #\A #\Z))
|
||||
|
||||
;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too.
|
||||
(digit (:/ "0" "9")))
|
||||
|
||||
(define calcl
|
||||
(lexer
|
||||
[(eof) 'EOF]
|
||||
;; recursively call the lexer on the remaining input after a tab or space. Returning the
|
||||
;; result of that operation. This effectively skips all whitespace.
|
||||
[(:or #\tab #\space) (calcl input-port)]
|
||||
;; (token-newline) returns 'newline
|
||||
[#\newline (token-newline)]
|
||||
;; Since (token-=) returns '=, just return the symbol directly
|
||||
[(:or "=" "+" "-" "*" "/" "^") (string->symbol lexeme)]
|
||||
["(" 'OP]
|
||||
[")" 'CP]
|
||||
["sin" (token-FNCT sin)]
|
||||
[(:+ (:or lower-letter upper-letter)) (token-VAR (string->symbol lexeme))]
|
||||
[(:+ digit) (token-NUM (string->number lexeme))]
|
||||
[(:: (:+ digit) #\. (:* digit)) (token-NUM (string->number lexeme))]))
|
||||
|
||||
|
||||
(define calcp
|
||||
(parser
|
||||
|
||||
(start start)
|
||||
(end newline EOF)
|
||||
(tokens value-tokens op-tokens)
|
||||
(error (lambda (a b c) (void)))
|
||||
|
||||
(precs (right =)
|
||||
(left - +)
|
||||
(left * /)
|
||||
(left NEG)
|
||||
(right ^))
|
||||
|
||||
(grammar
|
||||
|
||||
(start [() #f]
|
||||
;; If there is an error, ignore everything before the error
|
||||
;; and try to start over right after the error
|
||||
[(error start) $2]
|
||||
[(exp) $1])
|
||||
|
||||
(exp [(NUM) $1]
|
||||
[(VAR) (hash-ref vars $1 (lambda () 0))]
|
||||
[(VAR = exp) (begin (hash-set! vars $1 $3)
|
||||
$3)]
|
||||
[(FNCT OP exp CP) ($1 $3)]
|
||||
[(exp + exp) (+ $1 $3)]
|
||||
[(exp - exp) (- $1 $3)]
|
||||
[(exp * exp) (* $1 $3)]
|
||||
[(exp / exp) (/ $1 $3)]
|
||||
[(- exp) (prec NEG) (- $2)]
|
||||
[(exp ^ exp) (expt $1 $3)]
|
||||
[(OP exp CP) $2]))))
|
||||
|
||||
;; run the calculator on the given input-port
|
||||
(define (calc ip)
|
||||
(port-count-lines! ip)
|
||||
(letrec ((one-line
|
||||
(lambda ()
|
||||
(let ((result (calcp (lambda () (calcl ip)))))
|
||||
(when result
|
||||
(printf "~a\n" result)
|
||||
(one-line))))))
|
||||
(one-line)))
|
||||
|
||||
(calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3"))
|
@ -0,0 +1,242 @@
|
||||
;; This implements the equivalent of racket's read-syntax for R5RS scheme.
|
||||
;; It has not been thoroughly tested. Also it will read an entire file into a
|
||||
;; list of syntax objects, instead of returning one syntax object at a time
|
||||
|
||||
(module read mzscheme
|
||||
|
||||
(require br-parser-tools/lex
|
||||
(prefix : br-parser-tools/lex-sre)
|
||||
br-parser-tools/yacc
|
||||
syntax/readerr)
|
||||
|
||||
(define-tokens data (DATUM))
|
||||
(define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF))
|
||||
|
||||
(define scheme-lexer
|
||||
(lexer-src-pos
|
||||
|
||||
;; Skip comments, without accumulating extra position information
|
||||
[(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))]
|
||||
|
||||
["#t" (token-DATUM #t)]
|
||||
["#f" (token-DATUM #f)]
|
||||
[(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))]
|
||||
["#\\space" (token-DATUM #\space)]
|
||||
["#\\newline" (token-DATUM #\newline)]
|
||||
[(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))]
|
||||
[#\" (token-DATUM (list->string (get-string-token input-port)))]
|
||||
[#\( 'OP]
|
||||
[#\) 'CP]
|
||||
[#\[ 'OP]
|
||||
[#\] 'CP]
|
||||
["#(" 'HASHOP]
|
||||
[num2 (token-DATUM (string->number lexeme 2))]
|
||||
[num8 (token-DATUM (string->number lexeme 8))]
|
||||
[num10 (token-DATUM (string->number lexeme 10))]
|
||||
[num16 (token-DATUM (string->number lexeme 16))]
|
||||
["'" 'QUOTE]
|
||||
["`" 'QUASIQUOTE]
|
||||
["," 'UNQUOTE]
|
||||
[",@" 'UNQUOTE-SPLICING]
|
||||
["." 'DOT]
|
||||
[(eof) 'EOF]))
|
||||
|
||||
(define get-string-token
|
||||
(lexer
|
||||
[(:~ #\" #\\) (cons (car (string->list lexeme))
|
||||
(get-string-token input-port))]
|
||||
[(:: #\\ #\\) (cons #\\ (get-string-token input-port))]
|
||||
[(:: #\\ #\") (cons #\" (get-string-token input-port))]
|
||||
[#\" null]))
|
||||
|
||||
|
||||
(define-lex-abbrevs
|
||||
[letter (:or (:/ "a" "z") (:/ #\A #\Z))]
|
||||
[digit (:/ #\0 #\9)]
|
||||
[scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)]
|
||||
[initial (:or letter (char-set "!$%&*/:<=>?^_~@"))]
|
||||
[subsequent (:or initial digit (char-set "+-.@"))]
|
||||
[comment (:: #\; (:* (:~ #\newline)) #\newline)]
|
||||
|
||||
|
||||
;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of
|
||||
;; using regexp macros to avoid the cut and paste.
|
||||
; [numR (:: prefixR complexR)]
|
||||
; [complexR (:or realR
|
||||
; (:: realR "@" realR)
|
||||
; (:: realR "+" urealR "i")
|
||||
; (:: realR "-" urealR "i")
|
||||
; (:: realR "+i")
|
||||
; (:: realR "-i")
|
||||
; (:: "+" urealR "i")
|
||||
; (:: "-" urealR "i")
|
||||
; (:: "+i")
|
||||
; (:: "-i"))]
|
||||
; [realR (:: sign urealR)]
|
||||
; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)]
|
||||
; [uintegerR (:: (:+ digitR) (:* #\#))]
|
||||
; [prefixR (:or (:: radixR exactness)
|
||||
; (:: exactness radixR))]
|
||||
|
||||
[num2 (:: prefix2 complex2)]
|
||||
[complex2 (:or real2
|
||||
(:: real2 "@" real2)
|
||||
(:: real2 "+" ureal2 "i")
|
||||
(:: real2 "-" ureal2 "i")
|
||||
(:: real2 "+i")
|
||||
(:: real2 "-i")
|
||||
(:: "+" ureal2 "i")
|
||||
(:: "-" ureal2 "i")
|
||||
(:: "+i")
|
||||
(:: "-i"))]
|
||||
[real2 (:: sign ureal2)]
|
||||
[ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))]
|
||||
[uinteger2 (:: (:+ digit2) (:* #\#))]
|
||||
[prefix2 (:or (:: radix2 exactness)
|
||||
(:: exactness radix2))]
|
||||
[radix2 "#b"]
|
||||
[digit2 (:or "0" "1")]
|
||||
[num8 (:: prefix8 complex8)]
|
||||
[complex8 (:or real8
|
||||
(:: real8 "@" real8)
|
||||
(:: real8 "+" ureal8 "i")
|
||||
(:: real8 "-" ureal8 "i")
|
||||
(:: real8 "+i")
|
||||
(:: real8 "-i")
|
||||
(:: "+" ureal8 "i")
|
||||
(:: "-" ureal8 "i")
|
||||
(:: "+i")
|
||||
(:: "-i"))]
|
||||
[real8 (:: sign ureal8)]
|
||||
[ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))]
|
||||
[uinteger8 (:: (:+ digit8) (:* #\#))]
|
||||
[prefix8 (:or (:: radix8 exactness)
|
||||
(:: exactness radix8))]
|
||||
[radix8 "#o"]
|
||||
[digit8 (:/ "0" "7")]
|
||||
|
||||
[num10 (:: prefix10 complex10)]
|
||||
[complex10 (:or real10
|
||||
(:: real10 "@" real10)
|
||||
(:: real10 "+" ureal10 "i")
|
||||
(:: real10 "-" ureal10 "i")
|
||||
(:: real10 "+i")
|
||||
(:: real10 "-i")
|
||||
(:: "+" ureal10 "i")
|
||||
(:: "-" ureal10 "i")
|
||||
(:: "+i")
|
||||
(:: "-i"))]
|
||||
[real10 (:: sign ureal10)]
|
||||
[ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)]
|
||||
[uinteger10 (:: (:+ digit10) (:* #\#))]
|
||||
[prefix10 (:or (:: radix10 exactness)
|
||||
(:: exactness radix10))]
|
||||
[radix10 (:? "#d")]
|
||||
[digit10 digit]
|
||||
[decimal10 (:or (:: uinteger10 suffix)
|
||||
(:: #\. (:+ digit10) (:* #\#) suffix)
|
||||
(:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix)
|
||||
(:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))]
|
||||
|
||||
[num16 (:: prefix16 complex16)]
|
||||
[complex16 (:or real16
|
||||
(:: real16 "@" real16)
|
||||
(:: real16 "+" ureal16 "i")
|
||||
(:: real16 "-" ureal16 "i")
|
||||
(:: real16 "+i")
|
||||
(:: real16 "-i")
|
||||
(:: "+" ureal16 "i")
|
||||
(:: "-" ureal16 "i")
|
||||
"+i"
|
||||
"-i")]
|
||||
[real16 (:: sign ureal16)]
|
||||
[ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))]
|
||||
[uinteger16 (:: (:+ digit16) (:* #\#))]
|
||||
[prefix16 (:or (:: radix16 exactness)
|
||||
(:: exactness radix16))]
|
||||
[radix16 "#x"]
|
||||
[digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))]
|
||||
|
||||
|
||||
[suffix (:or "" (:: exponent-marker sign (:+ digit10)))]
|
||||
[exponent-marker (:or "e" "s" "f" "d" "l")]
|
||||
[sign (:or "" "+" "-")]
|
||||
[exactness (:or "" "#i" "#e")])
|
||||
|
||||
|
||||
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
|
||||
|
||||
;; A macro to build the syntax object
|
||||
(define-syntax (build-so stx)
|
||||
(syntax-case stx ()
|
||||
((_ value start end)
|
||||
(with-syntax ((start-pos (datum->syntax-object
|
||||
(syntax end)
|
||||
(string->symbol
|
||||
(format "$~a-start-pos"
|
||||
(syntax-object->datum (syntax start))))))
|
||||
(end-pos (datum->syntax-object
|
||||
(syntax end)
|
||||
(string->symbol
|
||||
(format "$~a-end-pos"
|
||||
(syntax-object->datum (syntax end))))))
|
||||
(source (datum->syntax-object
|
||||
(syntax end)
|
||||
'source-name)))
|
||||
(syntax
|
||||
(datum->syntax-object
|
||||
#f
|
||||
value
|
||||
(list source
|
||||
(position-line start-pos)
|
||||
(position-col start-pos)
|
||||
(position-offset start-pos)
|
||||
(- (position-offset end-pos)
|
||||
(position-offset start-pos)))
|
||||
stx-for-original-property))))))
|
||||
|
||||
(define (scheme-parser source-name)
|
||||
(parser
|
||||
(src-pos)
|
||||
|
||||
(start s)
|
||||
(end EOF)
|
||||
(error (lambda (a name val start end)
|
||||
(raise-read-error
|
||||
"read-error"
|
||||
source-name
|
||||
(position-line start)
|
||||
(position-col start)
|
||||
(position-offset start)
|
||||
(- (position-offset end)
|
||||
(position-offset start)))))
|
||||
(tokens data delim)
|
||||
|
||||
|
||||
(grammar
|
||||
|
||||
(s [(sexp-list) (reverse $1)])
|
||||
|
||||
(sexp [(DATUM) (build-so $1 1 1)]
|
||||
[(OP sexp-list CP) (build-so (reverse $2) 1 3)]
|
||||
[(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)]
|
||||
[(QUOTE sexp) (build-so (list 'quote $2) 1 2)]
|
||||
[(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)]
|
||||
[(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)]
|
||||
[(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)]
|
||||
[(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)])
|
||||
|
||||
(sexp-list [() null]
|
||||
[(sexp-list sexp) (cons $2 $1)]))))
|
||||
|
||||
(define (rs sn ip)
|
||||
(port-count-lines! ip)
|
||||
((scheme-parser sn) (lambda () (scheme-lexer ip))))
|
||||
|
||||
(define readsyntax
|
||||
(case-lambda ((sn) (rs sn (current-input-port)))
|
||||
((sn ip) (rs sn ip))))
|
||||
|
||||
(provide (rename readsyntax read-syntax))
|
||||
|
||||
)
|
@ -0,0 +1,3 @@
|
||||
#lang info
|
||||
|
||||
(define compile-omit-paths '("private-lex/error-tests.rkt"))
|
@ -0,0 +1,24 @@
|
||||
(module lex-plt-v200 mzscheme
|
||||
(require br-parser-tools/lex
|
||||
(prefix : br-parser-tools/lex-sre))
|
||||
|
||||
(provide epsilon
|
||||
~
|
||||
(rename :* *)
|
||||
(rename :+ +)
|
||||
(rename :? ?)
|
||||
(rename :or :)
|
||||
(rename :& &)
|
||||
(rename :: @)
|
||||
(rename :~ ^)
|
||||
(rename :/ -))
|
||||
|
||||
(define-lex-trans epsilon
|
||||
(syntax-rules ()
|
||||
((_) "")))
|
||||
|
||||
(define-lex-trans ~
|
||||
(syntax-rules ()
|
||||
((_ re) (complement re)))))
|
||||
|
||||
|
@ -0,0 +1,119 @@
|
||||
(module lex-sre mzscheme
|
||||
(require br-parser-tools/lex)
|
||||
|
||||
(provide (rename sre-* *)
|
||||
(rename sre-+ +)
|
||||
?
|
||||
(rename sre-= =)
|
||||
(rename sre->= >=)
|
||||
**
|
||||
(rename sre-or or)
|
||||
:
|
||||
seq
|
||||
&
|
||||
~
|
||||
(rename sre-- -)
|
||||
(rename sre-/ /)
|
||||
/-only-chars)
|
||||
|
||||
(define-lex-trans sre-*
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(repetition 0 +inf.0 (union re ...)))))
|
||||
|
||||
(define-lex-trans sre-+
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(repetition 1 +inf.0 (union re ...)))))
|
||||
|
||||
(define-lex-trans ?
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(repetition 0 1 (union re ...)))))
|
||||
|
||||
(define-lex-trans sre-=
|
||||
(syntax-rules ()
|
||||
((_ n re ...)
|
||||
(repetition n n (union re ...)))))
|
||||
|
||||
(define-lex-trans sre->=
|
||||
(syntax-rules ()
|
||||
((_ n re ...)
|
||||
(repetition n +inf.0 (union re ...)))))
|
||||
|
||||
(define-lex-trans **
|
||||
(syntax-rules ()
|
||||
((_ low #f re ...)
|
||||
(** low +inf.0 re ...))
|
||||
((_ low high re ...)
|
||||
(repetition low high (union re ...)))))
|
||||
|
||||
(define-lex-trans sre-or
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(union re ...))))
|
||||
|
||||
(define-lex-trans :
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(concatenation re ...))))
|
||||
|
||||
(define-lex-trans seq
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(concatenation re ...))))
|
||||
|
||||
(define-lex-trans &
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(intersection re ...))))
|
||||
|
||||
(define-lex-trans ~
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(char-complement (union re ...)))))
|
||||
|
||||
;; set difference
|
||||
(define-lex-trans (sre-- stx)
|
||||
(syntax-case stx ()
|
||||
((_)
|
||||
(raise-syntax-error #f
|
||||
"must have at least one argument"
|
||||
stx))
|
||||
((_ big-re re ...)
|
||||
(syntax (& big-re (complement (union re ...)))))))
|
||||
|
||||
(define-lex-trans (sre-/ stx)
|
||||
(syntax-case stx ()
|
||||
((_ range ...)
|
||||
(let ((chars
|
||||
(apply append (map (lambda (r)
|
||||
(let ((x (syntax-e r)))
|
||||
(cond
|
||||
((char? x) (list x))
|
||||
((string? x) (string->list x))
|
||||
(else
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"not a char or string"
|
||||
stx
|
||||
r)))))
|
||||
(syntax->list (syntax (range ...)))))))
|
||||
(unless (even? (length chars))
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"not given an even number of characters"
|
||||
stx))
|
||||
#`(/-only-chars #,@chars)))))
|
||||
|
||||
(define-lex-trans /-only-chars
|
||||
(syntax-rules ()
|
||||
((_ c1 c2)
|
||||
(char-range c1 c2))
|
||||
((_ c1 c2 c ...)
|
||||
(union (char-range c1 c2)
|
||||
(/-only-chars c ...)))))
|
||||
|
||||
)
|
||||
|
||||
|
@ -0,0 +1,410 @@
|
||||
(module lex mzscheme
|
||||
|
||||
;; Provides the syntax used to create lexers and the functions needed to
|
||||
;; create and use the buffer that the lexer reads from. See docs.
|
||||
|
||||
(require-for-syntax mzlib/list
|
||||
syntax/stx
|
||||
syntax/define
|
||||
syntax/boundmap
|
||||
"private-lex/util.rkt"
|
||||
"private-lex/actions.rkt"
|
||||
"private-lex/front.rkt"
|
||||
"private-lex/unicode-chars.rkt")
|
||||
|
||||
(require mzlib/stxparam
|
||||
syntax/readerr
|
||||
"private-lex/token.rkt")
|
||||
|
||||
(provide lexer lexer-src-pos lexer-srcloc define-lex-abbrev define-lex-abbrevs define-lex-trans
|
||||
|
||||
;; Dealing with tokens and related structures
|
||||
define-tokens define-empty-tokens token-name token-value token?
|
||||
(struct position (offset line col))
|
||||
(struct position-token (token start-pos end-pos))
|
||||
(struct srcloc-token (token srcloc))
|
||||
|
||||
;; File path for highlighting errors while lexing
|
||||
file-path
|
||||
|
||||
;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4.
|
||||
any-char any-string nothing alphabetic lower-case upper-case title-case
|
||||
numeric symbolic punctuation graphic whitespace blank iso-control
|
||||
|
||||
;; A regular expression operator
|
||||
char-set)
|
||||
|
||||
;; wrap-action: syntax-object src-pos? -> syntax-object
|
||||
(define-for-syntax (wrap-action action src-loc-style)
|
||||
(with-syntax ((action-stx
|
||||
(cond
|
||||
[(eq? src-loc-style 'lexer-src-pos)
|
||||
#`(let/ec ret
|
||||
(syntax-parameterize
|
||||
([return-without-pos (make-rename-transformer #'ret)])
|
||||
(make-position-token #,action start-pos end-pos)))]
|
||||
[(eq? src-loc-style 'lexer-srcloc)
|
||||
#`(let/ec ret
|
||||
(syntax-parameterize
|
||||
([return-without-pos (make-rename-transformer #'ret)])
|
||||
(make-srcloc-token #,action lexeme-srcloc)))]
|
||||
[else action])))
|
||||
(syntax/loc action
|
||||
(lambda (start-pos-p end-pos-p lexeme-p input-port-p)
|
||||
(define lexeme-srcloc-p (make-srcloc (object-name input-port-p)
|
||||
(position-line start-pos-p)
|
||||
(position-col start-pos-p)
|
||||
(position-offset start-pos-p)
|
||||
(and (number? (position-offset end-pos-p))
|
||||
(number? (position-offset start-pos-p))
|
||||
(- (position-offset end-pos-p)
|
||||
(position-offset start-pos-p)))))
|
||||
(syntax-parameterize
|
||||
([start-pos (make-rename-transformer #'start-pos-p)]
|
||||
[end-pos (make-rename-transformer #'end-pos-p)]
|
||||
[lexeme (make-rename-transformer #'lexeme-p)]
|
||||
[input-port (make-rename-transformer #'input-port-p)]
|
||||
[lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)])
|
||||
action-stx)))))
|
||||
|
||||
(define-for-syntax (make-lexer-trans src-loc-style)
|
||||
(lambda (stx)
|
||||
(syntax-case stx ()
|
||||
((_ re-act ...)
|
||||
(begin
|
||||
(for-each
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((re act) (void))
|
||||
(_ (raise-syntax-error #f
|
||||
"not a regular expression / action pair"
|
||||
stx
|
||||
x))))
|
||||
(syntax->list (syntax (re-act ...))))
|
||||
(let* ((spec/re-act-lst
|
||||
(syntax->list (syntax (re-act ...))))
|
||||
(eof-act
|
||||
(get-special-action spec/re-act-lst #'eof #''eof))
|
||||
(spec-act
|
||||
(get-special-action spec/re-act-lst #'special #'(void)))
|
||||
(spec-comment-act
|
||||
(get-special-action spec/re-act-lst #'special-comment #'#f))
|
||||
(ids (list #'special #'special-comment #'eof))
|
||||
(re-act-lst
|
||||
(filter
|
||||
(lambda (spec/re-act)
|
||||
(syntax-case spec/re-act ()
|
||||
(((special) act)
|
||||
(not (ormap
|
||||
(lambda (x)
|
||||
(and (identifier? #'special)
|
||||
(module-or-top-identifier=? (syntax special) x)))
|
||||
ids)))
|
||||
(_ #t)))
|
||||
spec/re-act-lst))
|
||||
(name-lst (map (lambda (x) (datum->syntax-object #f (gensym))) re-act-lst))
|
||||
(act-lst (map (lambda (x) (stx-car (stx-cdr x))) re-act-lst))
|
||||
(re-actname-lst (map (lambda (re-act name)
|
||||
(list (stx-car re-act)
|
||||
name))
|
||||
re-act-lst
|
||||
name-lst)))
|
||||
(when (null? spec/re-act-lst)
|
||||
(raise-syntax-error (or src-loc-style 'lexer) "expected at least one action" stx))
|
||||
(let-values (((trans start action-names no-look disappeared-uses)
|
||||
(build-lexer re-actname-lst)))
|
||||
(when (vector-ref action-names start) ;; Start state is final
|
||||
(unless (and
|
||||
;; All the successor states are final
|
||||
(andmap (lambda (x) (vector-ref action-names (vector-ref x 2)))
|
||||
(vector->list (vector-ref trans start)))
|
||||
;; Each character has a successor state
|
||||
(let loop ((check 0)
|
||||
(nexts (vector->list (vector-ref trans start))))
|
||||
(cond
|
||||
((null? nexts) #f)
|
||||
(else
|
||||
(let ((next (car nexts)))
|
||||
(and (= (vector-ref next 0) check)
|
||||
(let ((next-check (vector-ref next 1)))
|
||||
(or (>= next-check max-char-num)
|
||||
(loop (add1 next-check) (cdr nexts))))))))))
|
||||
(eprintf "Warning: lexer at ~a can accept the empty string.\n" stx)))
|
||||
(with-syntax ((start-state-stx start)
|
||||
(trans-table-stx trans)
|
||||
(no-lookahead-stx no-look)
|
||||
((name ...) name-lst)
|
||||
((act ...) (map (lambda (a)
|
||||
(wrap-action a src-loc-style))
|
||||
act-lst))
|
||||
((act-name ...) (vector->list action-names))
|
||||
(spec-act-stx
|
||||
(wrap-action spec-act src-loc-style))
|
||||
(has-comment-act?-stx
|
||||
(if (syntax-e spec-comment-act) #t #f))
|
||||
(spec-comment-act-stx
|
||||
(wrap-action spec-comment-act src-loc-style))
|
||||
(eof-act-stx (wrap-action eof-act src-loc-style)))
|
||||
(syntax-property
|
||||
(syntax/loc stx
|
||||
(let ([name act] ...)
|
||||
(let ([proc
|
||||
(lexer-body start-state-stx
|
||||
trans-table-stx
|
||||
(vector act-name ...)
|
||||
no-lookahead-stx
|
||||
spec-act-stx
|
||||
has-comment-act?-stx
|
||||
spec-comment-act-stx
|
||||
eof-act-stx)])
|
||||
;; reverse eta to get named procedures:
|
||||
(lambda (port) (proc port)))))
|
||||
'disappeared-use
|
||||
disappeared-uses)))))))))
|
||||
|
||||
(define-syntax lexer (make-lexer-trans #f))
|
||||
(define-syntax lexer-src-pos (make-lexer-trans 'lexer-src-pos))
|
||||
(define-syntax lexer-srcloc (make-lexer-trans 'lexer-srcloc))
|
||||
|
||||
(define-syntax (define-lex-abbrev stx)
|
||||
(syntax-case stx ()
|
||||
((_ name re)
|
||||
(identifier? (syntax name))
|
||||
(syntax/loc stx
|
||||
(define-syntax name
|
||||
(make-lex-abbrev (lambda () (quote-syntax re))))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"form should be (define-lex-abbrev name re)"
|
||||
stx))))
|
||||
|
||||
(define-syntax (define-lex-abbrevs stx)
|
||||
(syntax-case stx ()
|
||||
((_ x ...)
|
||||
(with-syntax (((abbrev ...)
|
||||
(map
|
||||
(lambda (a)
|
||||
(syntax-case a ()
|
||||
((name re)
|
||||
(identifier? (syntax name))
|
||||
(syntax/loc a (define-lex-abbrev name re)))
|
||||
(_ (raise-syntax-error
|
||||
#f
|
||||
"form should be (define-lex-abbrevs (name re) ...)"
|
||||
stx
|
||||
a))))
|
||||
(syntax->list (syntax (x ...))))))
|
||||
(syntax/loc stx (begin abbrev ...))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"form should be (define-lex-abbrevs (name re) ...)"
|
||||
stx))))
|
||||
|
||||
(define-syntax (define-lex-trans stx)
|
||||
(syntax-case stx ()
|
||||
((_ name-form body-form)
|
||||
(let-values (((name body)
|
||||
(normalize-definition (syntax (define-syntax name-form body-form)) #'lambda)))
|
||||
|
||||
#`(define-syntax #,name
|
||||
(let ((func #,body))
|
||||
(unless (procedure? func)
|
||||
(raise-syntax-error 'define-lex-trans "expected a procedure as the transformer, got ~e" func))
|
||||
(unless (procedure-arity-includes? func 1)
|
||||
(raise-syntax-error 'define-lex-trans "expected a procedure that accepts 1 argument as the transformer, got ~e" func))
|
||||
(make-lex-trans func)))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"form should be (define-lex-trans name transformer)"
|
||||
stx))))
|
||||
|
||||
|
||||
(define (get-next-state-helper char min max table)
|
||||
(if (>= min max)
|
||||
#f
|
||||
(let* ((try (quotient (+ min max) 2))
|
||||
(el (vector-ref table try))
|
||||
(r1 (vector-ref el 0))
|
||||
(r2 (vector-ref el 1)))
|
||||
(cond
|
||||
((and (>= char r1) (<= char r2)) (vector-ref el 2))
|
||||
((< char r1) (get-next-state-helper char min try table))
|
||||
(else (get-next-state-helper char (add1 try) max table))))))
|
||||
|
||||
|
||||
|
||||
|
||||
(define (get-next-state char table)
|
||||
(if table
|
||||
(get-next-state-helper char 0 (vector-length table) table)
|
||||
#f))
|
||||
|
||||
(define (lexer-body start-state trans-table actions no-lookahead special-action
|
||||
has-special-comment-action? special-comment-action eof-action)
|
||||
(letrec ((lexer
|
||||
(lambda (ip)
|
||||
(let ((first-pos (get-position ip))
|
||||
(first-char (peek-char-or-special ip 0)))
|
||||
;(printf "(peek-char-or-special port 0) = ~e\n" first-char)
|
||||
(cond
|
||||
((eof-object? first-char)
|
||||
(do-match ip first-pos eof-action (read-char-or-special ip)))
|
||||
((special-comment? first-char)
|
||||
(read-char-or-special ip)
|
||||
(cond
|
||||
(has-special-comment-action?
|
||||
(do-match ip first-pos special-comment-action #f))
|
||||
(else (lexer ip))))
|
||||
((not (char? first-char))
|
||||
(do-match ip first-pos special-action (read-char-or-special ip)))
|
||||
(else
|
||||
(let lexer-loop (
|
||||
;; current-state
|
||||
(state start-state)
|
||||
;; the character to transition on
|
||||
(char first-char)
|
||||
;; action for the longest match seen thus far
|
||||
;; including a match at the current state
|
||||
(longest-match-action
|
||||
(vector-ref actions start-state))
|
||||
;; how many bytes precede char
|
||||
(length-bytes 0)
|
||||
;; how many characters have been read
|
||||
;; including the one just read
|
||||
(length-chars 1)
|
||||
;; how many characters are in the longest match
|
||||
(longest-match-length 0))
|
||||
(let ((next-state
|
||||
(cond
|
||||
((not (char? char)) #f)
|
||||
(else (get-next-state (char->integer char)
|
||||
(vector-ref trans-table state))))))
|
||||
(cond
|
||||
((not next-state)
|
||||
(check-match ip first-pos longest-match-length
|
||||
length-chars longest-match-action))
|
||||
((vector-ref no-lookahead next-state)
|
||||
(let ((act (vector-ref actions next-state)))
|
||||
(check-match ip
|
||||
first-pos
|
||||
(if act length-chars longest-match-length)
|
||||
length-chars
|
||||
(if act act longest-match-action))))
|
||||
(else
|
||||
(let* ((act (vector-ref actions next-state))
|
||||
(next-length-bytes (+ (char-utf-8-length char) length-bytes))
|
||||
(next-char (peek-char-or-special ip next-length-bytes)))
|
||||
#;(printf "(peek-char-or-special port ~e) = ~e\n"
|
||||
next-length-bytes next-char)
|
||||
(lexer-loop next-state
|
||||
next-char
|
||||
(if act
|
||||
act
|
||||
longest-match-action)
|
||||
next-length-bytes
|
||||
(add1 length-chars)
|
||||
(if act
|
||||
length-chars
|
||||
longest-match-length)))))))))))))
|
||||
(lambda (ip)
|
||||
(unless (input-port? ip)
|
||||
(raise-argument-error
|
||||
'lexer
|
||||
"input-port?"
|
||||
0
|
||||
ip))
|
||||
(lexer ip))))
|
||||
|
||||
(define (check-match lb first-pos longest-match-length length longest-match-action)
|
||||
(unless longest-match-action
|
||||
(let* ((match (read-string length lb))
|
||||
(end-pos (get-position lb)))
|
||||
(raise-read-error
|
||||
(format "lexer: No match found in input starting with: ~a" match)
|
||||
(file-path)
|
||||
(position-line first-pos)
|
||||
(position-col first-pos)
|
||||
(position-offset first-pos)
|
||||
(- (position-offset end-pos) (position-offset first-pos)))))
|
||||
(let ((match (read-string longest-match-length lb)))
|
||||
;(printf "(read-string ~e port) = ~e\n" longest-match-length match)
|
||||
(do-match lb first-pos longest-match-action match)))
|
||||
|
||||
(define file-path (make-parameter #f))
|
||||
|
||||
(define (do-match ip first-pos action value)
|
||||
#;(printf "(action ~a ~a ~a ~a)\n"
|
||||
(position-offset first-pos) (position-offset (get-position ip)) value ip)
|
||||
(action first-pos (get-position ip) value ip))
|
||||
|
||||
(define (get-position ip)
|
||||
(let-values (((line col off) (port-next-location ip)))
|
||||
(make-position off line col)))
|
||||
|
||||
(define-syntax (create-unicode-abbrevs stx)
|
||||
(syntax-case stx ()
|
||||
((_ ctxt)
|
||||
(with-syntax (((ranges ...) (map (lambda (range)
|
||||
`(union ,@(map (lambda (x)
|
||||
`(char-range ,(integer->char (car x))
|
||||
,(integer->char (cdr x))))
|
||||
range)))
|
||||
(list (force alphabetic-ranges)
|
||||
(force lower-case-ranges)
|
||||
(force upper-case-ranges)
|
||||
(force title-case-ranges)
|
||||
(force numeric-ranges)
|
||||
(force symbolic-ranges)
|
||||
(force punctuation-ranges)
|
||||
(force graphic-ranges)
|
||||
(force whitespace-ranges)
|
||||
(force blank-ranges)
|
||||
(force iso-control-ranges))))
|
||||
((names ...) (map (lambda (sym)
|
||||
(datum->syntax-object (syntax ctxt) sym #f))
|
||||
'(alphabetic
|
||||
lower-case
|
||||
upper-case
|
||||
title-case
|
||||
numeric
|
||||
symbolic
|
||||
punctuation
|
||||
graphic
|
||||
whitespace
|
||||
blank
|
||||
iso-control))))
|
||||
(syntax (define-lex-abbrevs (names ranges) ...))))))
|
||||
|
||||
(define-lex-abbrev any-char (char-complement (union)))
|
||||
(define-lex-abbrev any-string (intersection))
|
||||
(define-lex-abbrev nothing (union))
|
||||
(create-unicode-abbrevs #'here)
|
||||
|
||||
(define-lex-trans (char-set stx)
|
||||
(syntax-case stx ()
|
||||
((_ str)
|
||||
(string? (syntax-e (syntax str)))
|
||||
(with-syntax (((char ...) (string->list (syntax-e (syntax str)))))
|
||||
(syntax (union char ...))))))
|
||||
|
||||
(define-syntax provide-lex-keyword
|
||||
(syntax-rules ()
|
||||
[(_ id ...)
|
||||
(begin
|
||||
(define-syntax-parameter id
|
||||
(make-set!-transformer
|
||||
(lambda (stx)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
(format "use of a lexer keyword (~a) is not in an appropriate lexer action"
|
||||
'id)
|
||||
stx))))
|
||||
...
|
||||
(provide id ...))]))
|
||||
|
||||
(provide-lex-keyword start-pos end-pos lexeme lexeme-srcloc input-port return-without-pos)
|
||||
|
||||
)
|
@ -0,0 +1,16 @@
|
||||
#lang scheme/base
|
||||
|
||||
(provide (all-defined-out))
|
||||
(require syntax/stx)
|
||||
|
||||
;; get-special-action: (syntax-object list) syntax-object syntax-object -> syntax-object
|
||||
;; Returns the first action from a rule of the form ((which-special) action)
|
||||
(define (get-special-action rules which-special none)
|
||||
(cond
|
||||
((null? rules) none)
|
||||
(else
|
||||
(syntax-case (car rules) ()
|
||||
(((special) act)
|
||||
(and (identifier? #'special) (module-or-top-identifier=? (syntax special) which-special))
|
||||
(syntax act))
|
||||
(_ (get-special-action (cdr rules) which-special none))))))
|
@ -0,0 +1,339 @@
|
||||
(module deriv mzscheme
|
||||
|
||||
(require mzlib/list
|
||||
(prefix is: mzlib/integer-set)
|
||||
"re.rkt"
|
||||
"util.rkt")
|
||||
|
||||
(provide build-dfa print-dfa (struct dfa (num-states start-state final-states/actions transitions)))
|
||||
|
||||
(define e (build-epsilon))
|
||||
(define z (build-zero))
|
||||
|
||||
|
||||
;; Don't do anything with this one but extract the chars
|
||||
(define all-chars (->re `(char-complement (union)) (make-cache)))
|
||||
|
||||
;; get-char-groups : re bool -> (list-of char-setR?)
|
||||
;; Collects the char-setRs in r that could be used in
|
||||
;; taking the derivative of r.
|
||||
(define (get-char-groups r found-negation)
|
||||
(cond
|
||||
((or (eq? r e) (eq? r z)) null)
|
||||
((char-setR? r) (list r))
|
||||
((concatR? r)
|
||||
(if (re-nullable? (concatR-re1 r))
|
||||
(append (get-char-groups (concatR-re1 r) found-negation)
|
||||
(get-char-groups (concatR-re2 r) found-negation))
|
||||
(get-char-groups (concatR-re1 r) found-negation)))
|
||||
((repeatR? r)
|
||||
(get-char-groups (repeatR-re r) found-negation))
|
||||
((orR? r)
|
||||
(apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r))))
|
||||
((andR? r)
|
||||
(apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r))))
|
||||
((negR? r)
|
||||
(if found-negation
|
||||
(get-char-groups (negR-re r) #t)
|
||||
(cons all-chars (get-char-groups (negR-re r) #t))))))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(r1 (->re #\1 c))
|
||||
(r2 (->re #\2 c)))
|
||||
((get-char-groups e #f) null)
|
||||
((get-char-groups z #f) null)
|
||||
((get-char-groups r1 #f) (list r1))
|
||||
((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f)
|
||||
(list r1))
|
||||
((get-char-groups (->re `(concatenation ,e ,r2) c) #f)
|
||||
(list r2))
|
||||
((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f)
|
||||
(list r1 r2))
|
||||
((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f)
|
||||
(list r1))
|
||||
((get-char-groups
|
||||
(->re `(union (repetition 0 +inf.0 ,r1)
|
||||
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
|
||||
(list r1 r2 (->re "3" c) (->re "4" c)))
|
||||
((get-char-groups (->re `(complement ,r1) c) #f)
|
||||
(list all-chars r1))
|
||||
((get-char-groups
|
||||
(->re `(intersection (repetition 0 +inf.0 ,r1)
|
||||
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
|
||||
(list r1 r2 (->re "3" c) (->re "4" c)))
|
||||
)
|
||||
(define loc:member? is:member?)
|
||||
|
||||
;; deriveR : re char cache -> re
|
||||
(define (deriveR r c cache)
|
||||
(cond
|
||||
((or (eq? r e) (eq? r z)) z)
|
||||
((char-setR? r)
|
||||
(if (loc:member? c (char-setR-chars r)) e z))
|
||||
((concatR? r)
|
||||
(let* ((r1 (concatR-re1 r))
|
||||
(r2 (concatR-re2 r))
|
||||
(d (build-concat (deriveR r1 c cache) r2 cache)))
|
||||
(if (re-nullable? r1)
|
||||
(build-or (list d (deriveR r2 c cache)) cache)
|
||||
d)))
|
||||
((repeatR? r)
|
||||
(build-concat (deriveR (repeatR-re r) c cache)
|
||||
(build-repeat (sub1 (repeatR-low r))
|
||||
(sub1 (repeatR-high r))
|
||||
(repeatR-re r) cache)
|
||||
cache))
|
||||
((orR? r)
|
||||
(build-or (map (lambda (x) (deriveR x c cache))
|
||||
(orR-res r))
|
||||
cache))
|
||||
((andR? r)
|
||||
(build-and (map (lambda (x) (deriveR x c cache))
|
||||
(andR-res r))
|
||||
cache))
|
||||
((negR? r)
|
||||
(build-neg (deriveR (negR-re r) c cache) cache))))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(a (char->integer #\a))
|
||||
(b (char->integer #\b))
|
||||
(r1 (->re #\a c))
|
||||
(r2 (->re `(repetition 0 +inf.0 #\a) c))
|
||||
(r3 (->re `(repetition 0 +inf.0 ,r2) c))
|
||||
(r4 (->re `(concatenation #\a ,r2) c))
|
||||
(r5 (->re `(repetition 0 +inf.0 ,r4) c))
|
||||
(r6 (->re `(union ,r5 #\a) c))
|
||||
(r7 (->re `(concatenation ,r2 ,r2) c))
|
||||
(r8 (->re `(complement ,r4) c))
|
||||
(r9 (->re `(intersection ,r2 ,r4) c)))
|
||||
((deriveR e a c) z)
|
||||
((deriveR z a c) z)
|
||||
((deriveR r1 b c) z)
|
||||
((deriveR r1 a c) e)
|
||||
((deriveR r2 a c) r2)
|
||||
((deriveR r2 b c) z)
|
||||
((deriveR r3 a c) r2)
|
||||
((deriveR r3 b c) z)
|
||||
((deriveR r4 a c) r2)
|
||||
((deriveR r4 b c) z)
|
||||
((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c))
|
||||
((deriveR r5 b c) z)
|
||||
((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c))
|
||||
((deriveR r6 b c) z)
|
||||
((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c))
|
||||
((deriveR r7 b c) z)
|
||||
((deriveR r8 a c) (->re `(complement, r2) c))
|
||||
((deriveR r8 b c) (->re `(complement ,z) c))
|
||||
((deriveR r9 a c) r2)
|
||||
((deriveR r9 b c) z)
|
||||
((deriveR (->re `(repetition 1 2 "ab") c) a c)
|
||||
(->re `(concatenation "b" (repetition 0 1 "ab")) c)))
|
||||
|
||||
;; An re-action is (cons re action)
|
||||
|
||||
;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f)
|
||||
;; applies deriveR to all the re-actions's re parts.
|
||||
;; Returns #f if the derived state is equivalent to z.
|
||||
(define (derive r c cache)
|
||||
(let ((new-r (map (lambda (ra)
|
||||
(cons (deriveR (car ra) c cache) (cdr ra)))
|
||||
r)))
|
||||
(if (andmap (lambda (x) (eq? z (car x)))
|
||||
new-r)
|
||||
#f
|
||||
new-r)))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(r1 (->re #\1 c))
|
||||
(r2 (->re #\2 c)))
|
||||
((derive null (char->integer #\1) c) #f)
|
||||
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c)
|
||||
(list (cons e 1) (cons z 2)))
|
||||
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f))
|
||||
|
||||
|
||||
;; get-final : (list-of re-action) -> (union #f syntax-object)
|
||||
;; An re that accepts e represents a final state. Return the
|
||||
;; action from the first final state or #f if there is none.
|
||||
(define (get-final res)
|
||||
(cond
|
||||
((null? res) #f)
|
||||
((re-nullable? (caar res)) (cdar res))
|
||||
(else (get-final (cdr res)))))
|
||||
|
||||
(test-block ((c->i char->integer)
|
||||
(c (make-cache))
|
||||
(r1 (->re #\a c))
|
||||
(r2 (->re #\b c))
|
||||
(b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5)))
|
||||
(a (list (cons r1 1) (cons r2 2))))
|
||||
((derive null (c->i #\a) c) #f)
|
||||
((derive a (c->i #\a) c) (list (cons e 1) (cons z 2)))
|
||||
((derive a (c->i #\b) c) (list (cons z 1) (cons e 2)))
|
||||
((derive a (c->i #\c) c) #f)
|
||||
((derive (list (cons (->re `(union " " "\n" ",") c) 1)
|
||||
(cons (->re `(concatenation (repetition 0 1 "-")
|
||||
(repetition 1 +inf.0 (char-range "0" "9"))) c) 2)
|
||||
(cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3)
|
||||
(cons (->re "[" c) 4)
|
||||
(cons (->re "]" c) 5)) (c->i #\[) c)
|
||||
b)
|
||||
((get-final a) #f)
|
||||
((get-final (list (cons e 1) (cons e 2))) 1)
|
||||
((get-final b) 4))
|
||||
|
||||
|
||||
;; A state is (make-state (list-of re-action) nat)
|
||||
(define-struct state (spec index))
|
||||
|
||||
;; get->key : re-action -> (list-of nat)
|
||||
;; states are indexed by the list of indexes of their res
|
||||
(define (get-key s)
|
||||
(map (lambda (x) (re-index (car x))) s))
|
||||
|
||||
(define loc:partition is:partition)
|
||||
|
||||
;; compute-chars : (list-of state) -> (list-of char-set)
|
||||
;; Computed the sets of equivalent characters for taking the
|
||||
;; derivative of the car of st. Only one derivative per set need to be taken.
|
||||
(define (compute-chars st)
|
||||
(cond
|
||||
((null? st) null)
|
||||
(else
|
||||
(loc:partition (map char-setR-chars
|
||||
(apply append (map (lambda (x) (get-char-groups (car x) #f))
|
||||
(state-spec (car st)))))))))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(c->i char->integer)
|
||||
(r1 (->re `(char-range #\1 #\4) c))
|
||||
(r2 (->re `(char-range #\2 #\3) c)))
|
||||
((compute-chars null) null)
|
||||
((compute-chars (list (make-state null 1))) null)
|
||||
((map is:integer-set-contents
|
||||
(compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2))))
|
||||
(list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3)))
|
||||
(is:integer-set-contents (is:union (is:make-range (c->i #\1))
|
||||
(is:make-range (c->i #\4)))))))
|
||||
|
||||
|
||||
;; A dfa is (make-dfa int int
|
||||
;; (list-of (cons int syntax-object))
|
||||
;; (list-of (cons int (list-of (cons char-set int)))))
|
||||
;; Each transitions is a state and a list of chars with the state to transition to.
|
||||
;; The finals and transitions are sorted by state number, and duplicate free.
|
||||
(define-struct dfa (num-states start-state final-states/actions transitions) (make-inspector))
|
||||
|
||||
(define loc:get-integer is:get-integer)
|
||||
|
||||
;; build-dfa : (list-of re-action) cache -> dfa
|
||||
(define (build-dfa rs cache)
|
||||
(let* ((transitions (make-hash-table))
|
||||
(get-state-number (make-counter))
|
||||
(start (make-state rs (get-state-number))))
|
||||
(cache (cons 'state (get-key rs)) (lambda () start))
|
||||
(let loop ((old-states (list start))
|
||||
(new-states null)
|
||||
(all-states (list start))
|
||||
(cs (compute-chars (list start))))
|
||||
(cond
|
||||
((and (null? old-states) (null? new-states))
|
||||
(make-dfa (get-state-number) (state-index start)
|
||||
(sort (filter (lambda (x) (cdr x))
|
||||
(map (lambda (state)
|
||||
(cons (state-index state) (get-final (state-spec state))))
|
||||
all-states))
|
||||
(lambda (a b) (< (car a) (car b))))
|
||||
(sort (hash-table-map transitions
|
||||
(lambda (state trans)
|
||||
(cons (state-index state)
|
||||
(map (lambda (t)
|
||||
(cons (car t)
|
||||
(state-index (cdr t))))
|
||||
trans))))
|
||||
(lambda (a b) (< (car a) (car b))))))
|
||||
((null? old-states)
|
||||
(loop new-states null all-states (compute-chars new-states)))
|
||||
((null? cs)
|
||||
(loop (cdr old-states) new-states all-states (compute-chars (cdr old-states))))
|
||||
(else
|
||||
(let* ((state (car old-states))
|
||||
(c (car cs))
|
||||
(new-re (derive (state-spec state) (loc:get-integer c) cache)))
|
||||
(cond
|
||||
(new-re
|
||||
(let* ((new-state? #f)
|
||||
(new-state (cache (cons 'state (get-key new-re))
|
||||
(lambda ()
|
||||
(set! new-state? #t)
|
||||
(make-state new-re (get-state-number)))))
|
||||
(new-all-states (if new-state? (cons new-state all-states) all-states)))
|
||||
(hash-table-put! transitions
|
||||
state
|
||||
(cons (cons c new-state)
|
||||
(hash-table-get transitions state
|
||||
(lambda () null))))
|
||||
(cond
|
||||
(new-state?
|
||||
(loop old-states (cons new-state new-states) new-all-states (cdr cs)))
|
||||
(else
|
||||
(loop old-states new-states new-all-states (cdr cs))))))
|
||||
(else (loop old-states new-states all-states (cdr cs))))))))))
|
||||
|
||||
(define (print-dfa x)
|
||||
(printf "number of states: ~a\n" (dfa-num-states x))
|
||||
(printf "start state: ~a\n" (dfa-start-state x))
|
||||
(printf "final states: ~a\n" (map car (dfa-final-states/actions x)))
|
||||
(for-each (lambda (trans)
|
||||
(printf "state: ~a\n" (car trans))
|
||||
(for-each (lambda (rule)
|
||||
(printf " -~a-> ~a\n"
|
||||
(is:integer-set-contents (car rule))
|
||||
(cdr rule)))
|
||||
(cdr trans)))
|
||||
(dfa-transitions x)))
|
||||
|
||||
(define (build-test-dfa rs)
|
||||
(let ((c (make-cache)))
|
||||
(build-dfa (map (lambda (x) (cons (->re x c) 'action))
|
||||
rs)
|
||||
c)))
|
||||
|
||||
|
||||
#|
|
||||
(define t1 (build-test-dfa null))
|
||||
(define t2 (build-test-dfa `(#\a)))
|
||||
(define t3 (build-test-dfa `(#\a #\b)))
|
||||
(define t4 (build-test-dfa `((repetition 0 +inf.0 #\a)
|
||||
(repetition 0 +inf.0 (concatenation #\a #\b)))))
|
||||
(define t5 (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\0 #\1)) #\1))))
|
||||
(define t6 (build-test-dfa `((repetition 0 +inf.0 (repetition 0 +inf.0 #\a))
|
||||
(repetition 0 +inf.0 (concatenation #\b (repetition 1 +inf.0 #\b))))))
|
||||
(define t7 (build-test-dfa `((concatenation (repetition 0 +inf.0 #\a) (repetition 0 +inf.0 #\b)
|
||||
(repetition 0 +inf.0 #\c) (repetition 0 +inf.0 #\d)
|
||||
(repetition 0 +inf.0 #\e)))))
|
||||
(define t8
|
||||
(build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b)
|
||||
(union #\a #\b) (union #\a #\b) (union #\a #\b)))))
|
||||
(define t9 (build-test-dfa `((concatenation "/*"
|
||||
(complement (concatenation (intersection) "*/" (intersection)))
|
||||
"*/"))))
|
||||
(define t11 (build-test-dfa `((complement "1"))))
|
||||
(define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b")
|
||||
(concatenation "a" (repetition 0 +inf.0 "b")))
|
||||
"ab"))))
|
||||
(define x (build-test-dfa `((union " " "\n" ",")
|
||||
(concatenation (repetition 0 1 "-") (repetition 1 +inf.0 (char-range "0" "9")))
|
||||
(concatenation "-" (repetition 1 +inf.0 "-"))
|
||||
"["
|
||||
"]")))
|
||||
(define y (build-test-dfa
|
||||
`((repetition 1 +inf.0
|
||||
(union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|")
|
||||
(concatenation "|" (repetition 0 +inf.0 (char-complement "|"))))))))
|
||||
(define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection))
|
||||
(complement (union (concatenation (intersection) "01")
|
||||
(repetition 1 +inf.0 "1")))))))
|
||||
(define t14 (build-test-dfa `((complement "1"))))
|
||||
|#
|
||||
)
|
@ -0,0 +1,81 @@
|
||||
#lang scheme/base
|
||||
(require (for-syntax scheme/base)
|
||||
"../lex.rkt"
|
||||
rackunit)
|
||||
|
||||
(define-syntax (catch-syn-error stx)
|
||||
(syntax-case stx ()
|
||||
[(_ arg)
|
||||
(datum->syntax
|
||||
#'here
|
||||
(with-handlers ((exn:fail:syntax? exn-message))
|
||||
(syntax-local-expand-expression #'arg)
|
||||
"not-an-error"))]))
|
||||
|
||||
(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev)))
|
||||
(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev a)))
|
||||
(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev (a b) v)))
|
||||
(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev 1 1)))
|
||||
(check-regexp-match #rx"lex-abbrevs" (catch-syn-error (define-lex-abbrevs ())))
|
||||
|
||||
(check-regexp-match #rx"lex-trans" (catch-syn-error (define-lex-trans)))
|
||||
|
||||
(check-regexp-match #rx"lexer" (catch-syn-error (lexer)))
|
||||
(check-regexp-match #rx"lexer" (catch-syn-error (lexer ("a" "b" "c"))))
|
||||
(check-regexp-match #rx"lexer" (catch-syn-error (lexer ())))
|
||||
(check-regexp-match #rx"lexer" (catch-syn-error (lexer (""))))
|
||||
|
||||
(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (a 1))))
|
||||
(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer ((a) 1))))
|
||||
(check-regexp-match #rx"regular-expression" (catch-syn-error (let ((a 1)) (lexer ((a) 1)))))
|
||||
|
||||
(check-regexp-match #rx"regular-expression"
|
||||
(catch-syn-error (let-syntax ((a 1))
|
||||
(lexer ((a) 1)))))
|
||||
|
||||
(check-regexp-match #rx"define-lex-trans"
|
||||
(catch-syn-error
|
||||
(let ()
|
||||
(define-lex-trans a 1)
|
||||
(let ()
|
||||
(lexer ((a) 1))))))
|
||||
|
||||
;; Detecting mutual recursion cycle:
|
||||
(check-regexp-match #rx"regular-expression"
|
||||
(catch-syn-error
|
||||
(let ()
|
||||
(define-lex-abbrev a b)
|
||||
(define-lex-abbrev b a)
|
||||
(let ()
|
||||
(lexer (a 1))))))
|
||||
|
||||
(check-regexp-match #rx"regular-expression"
|
||||
(catch-syn-error
|
||||
(let ()
|
||||
(define-lex-abbrev a (repetition 0 1 b))
|
||||
(define-lex-abbrev b (repetition 0 1 a))
|
||||
(let ()
|
||||
(lexer (a 1))))))
|
||||
|
||||
;; Detecting cycle within same abbreviation:
|
||||
(check-regexp-match #rx"regular-expression"
|
||||
(catch-syn-error
|
||||
(let ()
|
||||
(define-lex-abbrev balanced
|
||||
(union (concatenation "(" balanced ")" balanced)
|
||||
any-char))
|
||||
(lexer
|
||||
[balanced (string-append lexeme (balanced input-port))]
|
||||
[(eof) ""]))))
|
||||
|
||||
|
||||
(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (1 1))))
|
||||
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition) 1))))
|
||||
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition #\1 #\1 "3") 1))))
|
||||
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 #\1 "3") 1))))
|
||||
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 0 "3") 1))))
|
||||
(check-regexp-match #rx"complement" (catch-syn-error (lexer ((complement) 1))))
|
||||
(check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range) 1))))
|
||||
(check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range #\9 #\0) 1))))
|
||||
(check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement) 1))))
|
||||
(check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement (concatenation "1" "2")) 1))))
|
@ -0,0 +1,179 @@
|
||||
(module front mzscheme
|
||||
(require (prefix is: mzlib/integer-set)
|
||||
mzlib/list
|
||||
syntax/stx
|
||||
"util.rkt"
|
||||
"stx.rkt"
|
||||
"re.rkt"
|
||||
"deriv.rkt")
|
||||
|
||||
(provide build-lexer)
|
||||
|
||||
(define-syntax time-label
|
||||
(syntax-rules ()
|
||||
((_ l e ...)
|
||||
(begin
|
||||
(printf "~a: " l)
|
||||
(time (begin e ...))))))
|
||||
|
||||
;; A table is either
|
||||
;; - (vector-of (union #f nat))
|
||||
;; - (vector-of (vector-of (vector nat nat nat)))
|
||||
|
||||
(define loc:integer-set-contents is:integer-set-contents)
|
||||
|
||||
;; dfa->1d-table : dfa -> (same as build-lexer)
|
||||
(define (dfa->1d-table dfa)
|
||||
(let ((state-table (make-vector (dfa-num-states dfa) #f))
|
||||
(transition-cache (make-hash-table 'equal)))
|
||||
(for-each
|
||||
(lambda (trans)
|
||||
(let* ((from-state (car trans))
|
||||
(all-chars/to (cdr trans))
|
||||
(flat-all-chars/to
|
||||
(sort
|
||||
(apply append
|
||||
(map (lambda (chars/to)
|
||||
(let ((char-ranges (loc:integer-set-contents (car chars/to)))
|
||||
(to (cdr chars/to)))
|
||||
(map (lambda (char-range)
|
||||
(let ((entry (vector (car char-range) (cdr char-range) to)))
|
||||
(hash-table-get transition-cache entry
|
||||
(lambda ()
|
||||
(hash-table-put! transition-cache
|
||||
entry
|
||||
entry)
|
||||
entry))))
|
||||
char-ranges)))
|
||||
all-chars/to))
|
||||
(lambda (a b)
|
||||
(< (vector-ref a 0) (vector-ref b 0))))))
|
||||
(vector-set! state-table from-state (list->vector flat-all-chars/to))))
|
||||
(dfa-transitions dfa))
|
||||
state-table))
|
||||
|
||||
|
||||
(define loc:foldr is:foldr)
|
||||
|
||||
;; dfa->2d-table : dfa -> (same as build-lexer)
|
||||
(define (dfa->2d-table dfa)
|
||||
(let (
|
||||
;; char-table : (vector-of (union #f nat))
|
||||
;; The lexer table, one entry per state per char.
|
||||
;; Each entry specifies a state to transition to.
|
||||
;; #f indicates no transition
|
||||
(char-table (make-vector (* 256 (dfa-num-states dfa)) #f)))
|
||||
|
||||
;; Fill the char-table vector
|
||||
(for-each
|
||||
(lambda (trans)
|
||||
(let ((from-state (car trans)))
|
||||
(for-each (lambda (chars/to)
|
||||
(let ((to-state (cdr chars/to)))
|
||||
(loc:foldr (lambda (char _)
|
||||
(vector-set! char-table
|
||||
(bitwise-ior
|
||||
char
|
||||
(arithmetic-shift from-state 8))
|
||||
to-state))
|
||||
(void)
|
||||
(car chars/to))))
|
||||
(cdr trans))))
|
||||
(dfa-transitions dfa))
|
||||
char-table))
|
||||
|
||||
|
||||
;; dfa->actions : dfa -> (vector-of (union #f syntax-object))
|
||||
;; The action for each final state, #f if the state isn't final
|
||||
(define (dfa->actions dfa)
|
||||
(let ((actions (make-vector (dfa-num-states dfa) #f)))
|
||||
(for-each (lambda (state/action)
|
||||
(vector-set! actions (car state/action) (cdr state/action)))
|
||||
(dfa-final-states/actions dfa))
|
||||
actions))
|
||||
|
||||
;; dfa->no-look : dfa -> (vector-of bool)
|
||||
;; For each state whether the lexer can ignore the next input.
|
||||
;; It can do this only if there are no transitions out of the
|
||||
;; current state.
|
||||
(define (dfa->no-look dfa)
|
||||
(let ((no-look (make-vector (dfa-num-states dfa) #t)))
|
||||
(for-each (lambda (trans)
|
||||
(vector-set! no-look (car trans) #f))
|
||||
(dfa-transitions dfa))
|
||||
no-look))
|
||||
|
||||
(test-block ((d1 (make-dfa 1 1 (list) (list)))
|
||||
(d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
|
||||
(list (cons 1 (list (cons (is:make-range 49 50) 1)
|
||||
(cons (is:make-range 51) 2)))
|
||||
(cons 2 (list (cons (is:make-range 49) 3))))))
|
||||
(d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
|
||||
(list (cons 1 (list (cons (is:make-range 100 200) 0)
|
||||
(cons (is:make-range 49 50) 1)
|
||||
(cons (is:make-range 51) 2)))
|
||||
(cons 2 (list (cons (is:make-range 49) 3)))))))
|
||||
((dfa->2d-table d1) (make-vector 256 #f))
|
||||
((dfa->2d-table d2) (let ((v (make-vector 1024 #f)))
|
||||
(vector-set! v 305 1)
|
||||
(vector-set! v 306 1)
|
||||
(vector-set! v 307 2)
|
||||
(vector-set! v 561 3)
|
||||
v))
|
||||
((dfa->1d-table d1) (make-vector 1 #f))
|
||||
((dfa->1d-table d2) #(#f
|
||||
#(#(49 50 1) #(51 51 2))
|
||||
#(#(49 49 3))
|
||||
#f))
|
||||
((dfa->1d-table d3) #(#f
|
||||
#(#(49 50 1) #(51 51 2) #(100 200 0))
|
||||
#(#(49 49 3))
|
||||
#f))
|
||||
((dfa->actions d1) (vector #f))
|
||||
((dfa->actions d2) (vector #f #f 2 3))
|
||||
((dfa->no-look d1) (vector #t))
|
||||
((dfa->no-look d2) (vector #t #f #f #t)))
|
||||
|
||||
;; build-lexer : syntax-object list ->
|
||||
;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object))
|
||||
;; each syntax object has the form (re action)
|
||||
(define (build-lexer sos)
|
||||
(let* ((disappeared-uses (box null))
|
||||
(s-re-acts (map (lambda (so)
|
||||
(cons (parse (stx-car so) disappeared-uses)
|
||||
(stx-car (stx-cdr so))))
|
||||
sos))
|
||||
|
||||
(cache (make-cache))
|
||||
|
||||
(re-acts (map (lambda (s-re-act)
|
||||
(cons (->re (car s-re-act) cache)
|
||||
(cdr s-re-act)))
|
||||
s-re-acts))
|
||||
|
||||
(dfa (build-dfa re-acts cache))
|
||||
(table (dfa->1d-table dfa)))
|
||||
;(print-dfa dfa)
|
||||
#;(let ((num-states (vector-length table))
|
||||
(num-vectors (length (filter values (vector->list table))))
|
||||
(num-entries (apply + (map
|
||||
(lambda (x) (if x (vector-length x) 0))
|
||||
(vector->list table))))
|
||||
(num-different-entries
|
||||
(let ((ht (make-hash-table)))
|
||||
(for-each
|
||||
(lambda (x)
|
||||
(when x
|
||||
(for-each
|
||||
(lambda (y)
|
||||
(hash-table-put! ht y #t))
|
||||
(vector->list x))))
|
||||
(vector->list table))
|
||||
(length (hash-table-map ht cons)))))
|
||||
(printf "~a states, ~aKB\n"
|
||||
num-states
|
||||
(/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries
|
||||
(* 5 num-different-entries))) 1024)))
|
||||
(values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)
|
||||
(unbox disappeared-uses))))
|
||||
)
|
@ -0,0 +1,385 @@
|
||||
(module re mzscheme
|
||||
(require mzlib/list
|
||||
scheme/match
|
||||
(prefix is: mzlib/integer-set)
|
||||
"util.rkt")
|
||||
|
||||
(provide ->re build-epsilon build-zero build-char-set build-concat
|
||||
build-repeat build-or build-and build-neg
|
||||
epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR?
|
||||
char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high
|
||||
orR-res andR-res negR-re
|
||||
re-nullable? re-index)
|
||||
|
||||
;; get-index : -> nat
|
||||
(define get-index (make-counter))
|
||||
|
||||
;; An re is either
|
||||
;; - (make-epsilonR bool nat)
|
||||
;; - (make-zeroR bool nat)
|
||||
;; - (make-char-setR bool nat char-set)
|
||||
;; - (make-concatR bool nat re re)
|
||||
;; - (make-repeatR bool nat nat nat-or-+inf.0 re)
|
||||
;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs
|
||||
;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs
|
||||
;; - (make-negR bool nat re)
|
||||
;;
|
||||
;; Every re must have an index field globally different from all
|
||||
;; other re index fields.
|
||||
(define-struct re (nullable? index) (make-inspector))
|
||||
(define-struct (epsilonR re) () (make-inspector))
|
||||
(define-struct (zeroR re) () (make-inspector))
|
||||
(define-struct (char-setR re) (chars) (make-inspector))
|
||||
(define-struct (concatR re) (re1 re2) (make-inspector))
|
||||
(define-struct (repeatR re) (low high re) (make-inspector))
|
||||
(define-struct (orR re) (res) (make-inspector))
|
||||
(define-struct (andR re) (res) (make-inspector))
|
||||
(define-struct (negR re) (re) (make-inspector))
|
||||
|
||||
;; e : re
|
||||
;; The unique epsilon re
|
||||
(define e (make-epsilonR #t (get-index)))
|
||||
|
||||
;; z : re
|
||||
;; The unique zero re
|
||||
(define z (make-zeroR #f (get-index)))
|
||||
|
||||
|
||||
;; s-re = char constant
|
||||
;; | string constant (sequence of characters)
|
||||
;; | re a precompiled re
|
||||
;; | (repetition low high s-re) repetition between low and high times (inclusive)
|
||||
;; | (union s-re ...)
|
||||
;; | (intersection s-re ...)
|
||||
;; | (complement s-re)
|
||||
;; | (concatenation s-re ...)
|
||||
;; | (char-range rng rng) match any character between two (inclusive)
|
||||
;; | (char-complement char-set) match any character not listed
|
||||
;; low = natural-number
|
||||
;; high = natural-number or +inf.0
|
||||
;; rng = char or string with length 1
|
||||
;; (concatenation) (repetition 0 0 x), and "" match the empty string.
|
||||
;; (union) matches no strings.
|
||||
;; (intersection) matches any string.
|
||||
|
||||
(define loc:make-range is:make-range)
|
||||
(define loc:union is:union)
|
||||
(define loc:split is:split)
|
||||
(define loc:complement is:complement)
|
||||
|
||||
;; ->re : s-re cache -> re
|
||||
(define (->re exp cache)
|
||||
(match exp
|
||||
((? char?) (build-char-set (loc:make-range (char->integer exp)) cache))
|
||||
((? string?) (->re `(concatenation ,@(string->list exp)) cache))
|
||||
((? re?) exp)
|
||||
(`(repetition ,low ,high ,r)
|
||||
(build-repeat low high (->re r cache) cache))
|
||||
(`(union ,rs ...)
|
||||
(build-or (flatten-res (map (lambda (r) (->re r cache)) rs)
|
||||
orR? orR-res loc:union cache)
|
||||
cache))
|
||||
(`(intersection ,rs ...)
|
||||
(build-and (flatten-res (map (lambda (r) (->re r cache)) rs)
|
||||
andR? andR-res (lambda (a b)
|
||||
(let-values (((i _ __) (loc:split a b))) i))
|
||||
cache)
|
||||
cache))
|
||||
(`(complement ,r)
|
||||
(build-neg (->re r cache) cache))
|
||||
(`(concatenation ,rs ...)
|
||||
(foldr (lambda (x y)
|
||||
(build-concat (->re x cache) y cache))
|
||||
e
|
||||
rs))
|
||||
(`(char-range ,c1 ,c2)
|
||||
(let ((i1 (char->integer (if (string? c1) (string-ref c1 0) c1)))
|
||||
(i2 (char->integer (if (string? c2) (string-ref c2 0) c2))))
|
||||
(if (<= i1 i2)
|
||||
(build-char-set (loc:make-range i1 i2) cache)
|
||||
z)))
|
||||
(`(char-complement ,crs ...)
|
||||
(let ((cs (->re `(union ,@crs) cache)))
|
||||
(cond
|
||||
((zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache))
|
||||
((char-setR? cs)
|
||||
(build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache))
|
||||
(else z))))))
|
||||
|
||||
|
||||
|
||||
|
||||
;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re))
|
||||
;; (char-set char-set -> char-set) cache -> (list-of re)
|
||||
;; Takes all the char-sets in l and combines them into one char-set using the combine function.
|
||||
;; Flattens out the values of type?. get-res only needs to function on things type? returns
|
||||
;; true for.
|
||||
(define (flatten-res l type? get-res combine cache)
|
||||
(let loop ((res l)
|
||||
;; chars : (union #f char-set)
|
||||
(chars #f)
|
||||
(no-chars null))
|
||||
(cond
|
||||
((null? res)
|
||||
(if chars
|
||||
(cons (build-char-set chars cache) no-chars)
|
||||
no-chars))
|
||||
((char-setR? (car res))
|
||||
(if chars
|
||||
(loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars)
|
||||
(loop (cdr res) (char-setR-chars (car res)) no-chars)))
|
||||
((type? (car res))
|
||||
(loop (append (get-res (car res)) (cdr res)) chars no-chars))
|
||||
(else (loop (cdr res) chars (cons (car res) no-chars))))))
|
||||
|
||||
;; build-epsilon : -> re
|
||||
(define (build-epsilon) e)
|
||||
|
||||
(define (build-zero) z)
|
||||
|
||||
(define loc:integer-set-contents is:integer-set-contents)
|
||||
|
||||
;; build-char-set : char-set cache -> re
|
||||
(define (build-char-set cs cache)
|
||||
(let ((l (loc:integer-set-contents cs)))
|
||||
(cond
|
||||
((null? l) z)
|
||||
(else
|
||||
(cache l
|
||||
(lambda ()
|
||||
(make-char-setR #f (get-index) cs)))))))
|
||||
|
||||
|
||||
|
||||
;; build-concat : re re cache -> re
|
||||
(define (build-concat r1 r2 cache)
|
||||
(cond
|
||||
((eq? e r1) r2)
|
||||
((eq? e r2) r1)
|
||||
((or (eq? z r1) (eq? z r2)) z)
|
||||
(else
|
||||
(cache (cons 'concat (cons (re-index r1) (re-index r2)))
|
||||
(lambda ()
|
||||
(make-concatR (and (re-nullable? r1) (re-nullable? r2))
|
||||
(get-index)
|
||||
r1 r2))))))
|
||||
|
||||
;; build-repeat : nat nat-or-+inf.0 re cache -> re
|
||||
(define (build-repeat low high r cache)
|
||||
(let ((low (if (< low 0) 0 low)))
|
||||
(cond
|
||||
((eq? r e) e)
|
||||
((and (= 0 low) (or (= 0 high) (eq? z r))) e)
|
||||
((and (= 1 low) (= 1 high)) r)
|
||||
((and (repeatR? r)
|
||||
(eq? (repeatR-high r) +inf.0)
|
||||
(or (= 0 (repeatR-low r))
|
||||
(= 1 (repeatR-low r))))
|
||||
(build-repeat (* low (repeatR-low r))
|
||||
+inf.0
|
||||
(repeatR-re r)
|
||||
cache))
|
||||
(else
|
||||
(cache (cons 'repeat (cons low (cons high (re-index r))))
|
||||
(lambda ()
|
||||
(make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r)))))))
|
||||
|
||||
|
||||
;; build-or : (list-of re) cache -> re
|
||||
(define (build-or rs cache)
|
||||
(let ((rs
|
||||
(filter
|
||||
(lambda (x) (not (eq? x z)))
|
||||
(do-simple-equiv (replace rs orR? orR-res null) re-index))))
|
||||
(cond
|
||||
((null? rs) z)
|
||||
((null? (cdr rs)) (car rs))
|
||||
((memq (build-neg z cache) rs) (build-neg z cache))
|
||||
(else
|
||||
(cache (cons 'or (map re-index rs))
|
||||
(lambda ()
|
||||
(make-orR (ormap re-nullable? rs) (get-index) rs)))))))
|
||||
|
||||
;; build-and : (list-of re) cache -> re
|
||||
(define (build-and rs cache)
|
||||
(let ((rs (do-simple-equiv (replace rs andR? andR-res null) re-index)))
|
||||
(cond
|
||||
((null? rs) (build-neg z cache))
|
||||
((null? (cdr rs)) (car rs))
|
||||
((memq z rs) z)
|
||||
(else
|
||||
(cache (cons 'and (map re-index rs))
|
||||
(lambda ()
|
||||
(make-andR (andmap re-nullable? rs) (get-index) rs)))))))
|
||||
|
||||
;; build-neg : re cache -> re
|
||||
(define (build-neg r cache)
|
||||
(cond
|
||||
((negR? r) (negR-re r))
|
||||
(else
|
||||
(cache (cons 'neg (re-index r))
|
||||
(lambda ()
|
||||
(make-negR (not (re-nullable? r)) (get-index) r))))))
|
||||
|
||||
;; Tests for the build-functions
|
||||
(test-block ((c (make-cache))
|
||||
(isc is:integer-set-contents)
|
||||
(r1 (build-char-set (is:make-range (char->integer #\1)) c))
|
||||
(r2 (build-char-set (is:make-range (char->integer #\2)) c))
|
||||
(r3 (build-char-set (is:make-range (char->integer #\3)) c))
|
||||
(rc (build-concat r1 r2 c))
|
||||
(rc2 (build-concat r2 r1 c))
|
||||
(rr (build-repeat 0 +inf.0 rc c))
|
||||
(ro (build-or `(,rr ,rc ,rr) c))
|
||||
(ro2 (build-or `(,rc ,rr ,z) c))
|
||||
(ro3 (build-or `(,rr ,rc) c))
|
||||
(ro4 (build-or `(,(build-or `(,r1 ,r2) c)
|
||||
,(build-or `(,r2 ,r3) c)) c))
|
||||
(ra (build-and `(,rr ,rc ,rr) c))
|
||||
(ra2 (build-and `(,rc ,rr) c))
|
||||
(ra3 (build-and `(,rr ,rc) c))
|
||||
(ra4 (build-and `(,(build-and `(,r3 ,r2) c)
|
||||
,(build-and `(,r2 ,r1) c)) c))
|
||||
(rn (build-neg z c))
|
||||
(rn2 (build-neg r1 c)))
|
||||
|
||||
((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1))))
|
||||
((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2))))
|
||||
((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3))))
|
||||
((build-char-set (is:make-range) c) z)
|
||||
((build-concat r1 e c) r1)
|
||||
((build-concat e r1 c) r1)
|
||||
((build-concat r1 z c) z)
|
||||
((build-concat z r1 c) z)
|
||||
((build-concat r1 r2 c) rc)
|
||||
((concatR-re1 rc) r1)
|
||||
((concatR-re2 rc) r2)
|
||||
((concatR-re1 rc2) r2)
|
||||
((concatR-re2 rc2) r1)
|
||||
(ro ro2)
|
||||
(ro ro3)
|
||||
(ro4 (build-or `(,r1 ,r2 ,r3) c))
|
||||
((orR-res ro) (list rc rr))
|
||||
((orR-res ro4) (list r1 r2 r3))
|
||||
((build-or null c) z)
|
||||
((build-or `(,r1 ,z) c) r1)
|
||||
((build-repeat 0 +inf.0 rc c) rr)
|
||||
((build-repeat 0 1 z c) e)
|
||||
((build-repeat 0 0 rc c) e)
|
||||
((build-repeat 0 +inf.0 z c) e)
|
||||
((build-repeat -1 +inf.0 z c) e)
|
||||
((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c)
|
||||
(build-repeat 0 +inf.0 rc c))
|
||||
((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c)
|
||||
(build-repeat 0 +inf.0 rc c))
|
||||
((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c)
|
||||
(build-repeat 20 +inf.0 rc c))
|
||||
((build-repeat 1 1 rc c) rc)
|
||||
((repeatR-re rr) rc)
|
||||
(ra ra2)
|
||||
(ra ra3)
|
||||
(ra4 (build-and `(,r1 ,r2 ,r3) c))
|
||||
((andR-res ra) (list rc rr))
|
||||
((andR-res ra4) (list r1 r2 r3))
|
||||
((build-and null c) (build-neg z c))
|
||||
((build-and `(,r1 ,z) c) z)
|
||||
((build-and `(,r1) c) r1)
|
||||
((build-neg r1 c) (build-neg r1 c))
|
||||
((build-neg (build-neg r1 c) c) r1)
|
||||
((negR-re (build-neg r2 c)) r2)
|
||||
((re-nullable? r1) #f)
|
||||
((re-nullable? rc) #f)
|
||||
((re-nullable? (build-concat rr rr c)) #t)
|
||||
((re-nullable? rr) #t)
|
||||
((re-nullable? (build-repeat 0 1 rc c)) #t)
|
||||
((re-nullable? (build-repeat 1 2 rc c)) #f)
|
||||
((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t)
|
||||
((re-nullable? ro) #t)
|
||||
((re-nullable? (build-or `(,r1 ,r2) c)) #f)
|
||||
((re-nullable? (build-and `(,r1 ,e) c)) #f)
|
||||
((re-nullable? (build-and `(,rr ,e) c)) #t)
|
||||
((re-nullable? (build-neg r1 c)) #t)
|
||||
((re-nullable? (build-neg rr c)) #f))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(isc is:integer-set-contents)
|
||||
(r1 (->re #\1 c))
|
||||
(r2 (->re #\2 c))
|
||||
(r3-5 (->re '(char-range #\3 #\5) c))
|
||||
(r4 (build-or `(,r1 ,r2) c))
|
||||
(r5 (->re `(union ,r3-5 #\7) c))
|
||||
(r6 (->re #\6 c)))
|
||||
((flatten-res null orR? orR-res is:union c) null)
|
||||
((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c))))
|
||||
(isc (is:make-range (char->integer #\1))))
|
||||
((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c))))
|
||||
(isc (is:make-range (char->integer #\1) (char->integer #\2))))
|
||||
((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1)
|
||||
orR? orR-res is:union c))))
|
||||
(isc (is:make-range (char->integer #\1) (char->integer #\7))))
|
||||
((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y)
|
||||
(let-values (((i _ __)
|
||||
(is:split x y)))
|
||||
i))
|
||||
c)
|
||||
(list z)))
|
||||
|
||||
;; ->re
|
||||
(test-block ((c (make-cache))
|
||||
(isc is:integer-set-contents)
|
||||
(r (->re #\a c))
|
||||
(rr (->re `(concatenation ,r ,r) c))
|
||||
(rrr (->re `(concatenation ,r ,rr) c))
|
||||
(rrr* (->re `(repetition 0 +inf.0 ,rrr) c)))
|
||||
((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a))))
|
||||
((->re "" c) e)
|
||||
((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c))
|
||||
((->re r c) r)
|
||||
((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c))
|
||||
((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c))
|
||||
((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c))
|
||||
((->re `(repetition 0 1 ,rrr*) c) rrr*)
|
||||
((->re `(union (union (char-range #\a #\c)
|
||||
(char-complement (char-range #\000 #\110)
|
||||
(char-range #\112 ,(integer->char max-char-num))))
|
||||
(union (repetition 0 +inf.0 #\2))) c)
|
||||
(build-or (list (build-char-set (is:union (is:make-range 73)
|
||||
(is:make-range 97 99))
|
||||
c)
|
||||
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
|
||||
c))
|
||||
((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c))
|
||||
((->re `(union ,r) c) r)
|
||||
((->re `(union) c) z)
|
||||
((->re `(intersection (intersection #\111
|
||||
(char-complement (char-range #\000 #\110)
|
||||
(char-range #\112 ,(integer->char max-char-num))))
|
||||
(intersection (repetition 0 +inf.0 #\2))) c)
|
||||
(build-and (list (build-char-set (is:make-range 73) c)
|
||||
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
|
||||
c))
|
||||
((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110)
|
||||
(char-range #\112 ,(integer->char max-char-num))))
|
||||
(intersection (repetition 0 +inf.0 #\2))) c)
|
||||
z)
|
||||
((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c))
|
||||
((->re `(intersection ,r) c) r)
|
||||
((->re `(intersection) c) (build-neg z c))
|
||||
((->re `(complement ,r) c) (build-neg r c))
|
||||
((->re `(concatenation) c) e)
|
||||
((->re `(concatenation ,rrr*) c) rrr*)
|
||||
(rr (build-concat r r c))
|
||||
((->re `(concatenation ,r ,rr ,rrr) c)
|
||||
(build-concat r (build-concat rr rrr c) c))
|
||||
((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49)))
|
||||
((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57)))
|
||||
((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49)))
|
||||
((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57)))
|
||||
((->re `(char-range "9" "1") c) z)
|
||||
((isc (char-setR-chars (->re `(char-complement) c)))
|
||||
(isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c))))
|
||||
((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c)))
|
||||
(isc (is:make-range 0)))
|
||||
)
|
||||
|
||||
)
|
@ -0,0 +1,220 @@
|
||||
#lang racket
|
||||
|
||||
(require "util.rkt"
|
||||
syntax/id-table)
|
||||
|
||||
(provide parse)
|
||||
|
||||
(define (bad-args stx num)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
(format "incorrect number of arguments (should have ~a)" num)
|
||||
stx))
|
||||
|
||||
;; char-range-arg: syntax-object syntax-object -> nat
|
||||
;; If c contains is a character or length 1 string, returns the integer
|
||||
;; for the character. Otherwise raises a syntax error.
|
||||
(define (char-range-arg stx containing-stx)
|
||||
(let ((c (syntax-e stx)))
|
||||
(cond
|
||||
((char? c) (char->integer c))
|
||||
((and (string? c) (= (string-length c) 1))
|
||||
(char->integer (string-ref c 0)))
|
||||
(else
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"not a char or single-char string"
|
||||
containing-stx stx)))))
|
||||
(module+ test
|
||||
(check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1))
|
||||
(check-equal? (char-range-arg #'"1" #'here) (char->integer #\1)))
|
||||
|
||||
(define orig-insp (variable-reference->module-declaration-inspector
|
||||
(#%variable-reference)))
|
||||
(define (disarm stx)
|
||||
(syntax-disarm stx orig-insp))
|
||||
|
||||
;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt)
|
||||
;; checks for errors and generates the plain s-exp form for s
|
||||
;; Expands lex-abbrevs and applies lex-trans.
|
||||
(define (parse stx disappeared-uses)
|
||||
(let loop ([stx stx]
|
||||
[disappeared-uses disappeared-uses]
|
||||
;; seen-lex-abbrevs: id-table
|
||||
[seen-lex-abbrevs (make-immutable-free-id-table)])
|
||||
(let ([recur (lambda (s)
|
||||
(loop (syntax-rearm s stx)
|
||||
disappeared-uses
|
||||
seen-lex-abbrevs))]
|
||||
[recur/abbrev (lambda (s id)
|
||||
(loop (syntax-rearm s stx)
|
||||
disappeared-uses
|
||||
(free-id-table-set seen-lex-abbrevs id id)))])
|
||||
(syntax-case (disarm stx) (repetition union intersection complement concatenation
|
||||
char-range char-complement)
|
||||
(_
|
||||
(identifier? stx)
|
||||
(let ((expansion (syntax-local-value stx (lambda () #f))))
|
||||
(unless (lex-abbrev? expansion)
|
||||
(raise-syntax-error 'regular-expression
|
||||
"undefined abbreviation"
|
||||
stx))
|
||||
;; Check for cycles.
|
||||
(when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f))
|
||||
(raise-syntax-error 'regular-expression
|
||||
"illegal lex-abbrev cycle detected"
|
||||
stx
|
||||
#f
|
||||
(list (free-id-table-ref seen-lex-abbrevs stx))))
|
||||
(set-box! disappeared-uses (cons stx (unbox disappeared-uses)))
|
||||
(recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx)))
|
||||
(_
|
||||
(or (char? (syntax-e stx)) (string? (syntax-e stx)))
|
||||
(syntax-e stx))
|
||||
((repetition arg ...)
|
||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
||||
(unless (= 3 (length arg-list))
|
||||
(bad-args stx 2))
|
||||
(let ((low (syntax-e (car arg-list)))
|
||||
(high (syntax-e (cadr arg-list)))
|
||||
(re (caddr arg-list)))
|
||||
(unless (and (number? low) (exact? low) (integer? low) (>= low 0))
|
||||
(raise-syntax-error #f
|
||||
"not a non-negative exact integer"
|
||||
stx
|
||||
(car arg-list)))
|
||||
(unless (or (and (number? high) (exact? high) (integer? high) (>= high 0))
|
||||
(eq? high +inf.0))
|
||||
(raise-syntax-error #f
|
||||
"not a non-negative exact integer or +inf.0"
|
||||
stx
|
||||
(cadr arg-list)))
|
||||
(unless (<= low high)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"the first argument is not less than or equal to the second argument"
|
||||
stx))
|
||||
`(repetition ,low ,high ,(recur re)))))
|
||||
((union re ...)
|
||||
`(union ,@(map recur (syntax->list (syntax (re ...))))))
|
||||
((intersection re ...)
|
||||
`(intersection ,@(map recur (syntax->list (syntax (re ...))))))
|
||||
((complement re ...)
|
||||
(let ((re-list (syntax->list (syntax (re ...)))))
|
||||
(unless (= 1 (length re-list))
|
||||
(bad-args stx 1))
|
||||
`(complement ,(recur (car re-list)))))
|
||||
((concatenation re ...)
|
||||
`(concatenation ,@(map recur (syntax->list (syntax (re ...))))))
|
||||
((char-range arg ...)
|
||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
||||
(unless (= 2 (length arg-list))
|
||||
(bad-args stx 2))
|
||||
(let ((i1 (char-range-arg (car arg-list) stx))
|
||||
(i2 (char-range-arg (cadr arg-list) stx)))
|
||||
(if (<= i1 i2)
|
||||
`(char-range ,(integer->char i1) ,(integer->char i2))
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"the first argument does not precede or equal second argument"
|
||||
stx)))))
|
||||
((char-complement arg ...)
|
||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
||||
(unless (= 1 (length arg-list))
|
||||
(bad-args stx 1))
|
||||
(let ((parsed (recur (car arg-list))))
|
||||
(unless (char-set? parsed)
|
||||
(raise-syntax-error #f
|
||||
"not a character set"
|
||||
stx
|
||||
(car arg-list)))
|
||||
`(char-complement ,parsed))))
|
||||
((op form ...)
|
||||
(identifier? (syntax op))
|
||||
(let* ((o (syntax op))
|
||||
(expansion (syntax-local-value o (lambda () #f))))
|
||||
(set-box! disappeared-uses (cons o (unbox disappeared-uses)))
|
||||
(cond
|
||||
((lex-trans? expansion)
|
||||
(recur ((lex-trans-f expansion) (disarm stx))))
|
||||
(expansion
|
||||
(raise-syntax-error 'regular-expression
|
||||
"not a lex-trans"
|
||||
stx))
|
||||
(else
|
||||
(raise-syntax-error 'regular-expression
|
||||
"undefined operator"
|
||||
stx)))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'regular-expression
|
||||
"not a char, string, identifier, or (op args ...)"
|
||||
stx))))))
|
||||
|
||||
|
||||
|
||||
;; char-set? : s-re -> bool
|
||||
;; A char-set is an re that matches only strings of length 1.
|
||||
;; char-set? is conservative.
|
||||
(define (char-set? s-re)
|
||||
(cond
|
||||
((char? s-re) #t)
|
||||
((string? s-re) (= (string-length s-re) 1))
|
||||
((list? s-re)
|
||||
(let ((op (car s-re)))
|
||||
(case op
|
||||
((union intersection) (andmap char-set? (cdr s-re)))
|
||||
((char-range char-complement) #t)
|
||||
((repetition)
|
||||
(and (= (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re))))
|
||||
((concatenation)
|
||||
(and (= 2 (length s-re)) (char-set? (cadr s-re))))
|
||||
(else #f))))
|
||||
(else #f)))
|
||||
|
||||
(module+ test
|
||||
(require rackunit))
|
||||
(module+ test
|
||||
(check-equal? (char-set? #\a) #t)
|
||||
(check-equal? (char-set? "12") #f)
|
||||
(check-equal? (char-set? "1") #t)
|
||||
(check-equal? (char-set? '(repetition 1 2 #\1)) #f)
|
||||
(check-equal? (char-set? '(repetition 1 1 "12")) #f)
|
||||
(check-equal? (char-set? '(repetition 1 1 "1")) #t)
|
||||
(check-equal? (char-set? '(union "1" "2" "3")) #t)
|
||||
(check-equal? (char-set? '(union "1" "" "3")) #f)
|
||||
(check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t)
|
||||
(check-equal? (char-set? '(intersection "1" "")) #f)
|
||||
(check-equal? (char-set? '(complement "1")) #f)
|
||||
(check-equal? (char-set? '(concatenation "1" "2")) #f)
|
||||
(check-equal? (char-set? '(concatenation "" "2")) #f)
|
||||
(check-equal? (char-set? '(concatenation "1")) #t)
|
||||
(check-equal? (char-set? '(concatenation "12")) #f)
|
||||
(check-equal? (char-set? '(char-range #\1 #\2)) #t)
|
||||
(check-equal? (char-set? '(char-complement #\1)) #t))
|
||||
|
||||
;; yikes... these test cases all have the wrong arity, now.
|
||||
;; and by "now", I mean it's been broken since before we
|
||||
;; moved to git.
|
||||
(module+ test
|
||||
(check-equal? (parse #'#\a null) #\a)
|
||||
(check-equal? (parse #'"1" null) "1")
|
||||
(check-equal? (parse #'(repetition 1 1 #\1) null)
|
||||
'(repetition 1 1 #\1))
|
||||
(check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1))
|
||||
(check-equal? (parse #'(union #\1 (union "2") (union)) null)
|
||||
'(union #\1 (union "2") (union)))
|
||||
(check-equal? (parse #'(intersection #\1 (intersection "2") (intersection))
|
||||
null)
|
||||
'(intersection #\1 (intersection "2") (intersection)))
|
||||
(check-equal? (parse #'(complement (union #\1 #\2))
|
||||
null)
|
||||
'(complement (union #\1 #\2)))
|
||||
(check-equal? (parse #'(concatenation "1" "2" (concatenation)) null)
|
||||
'(concatenation "1" "2" (concatenation)))
|
||||
(check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1))
|
||||
(check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1))
|
||||
(check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3))
|
||||
(check-equal? (parse #'(char-complement (union "1" "2")) null)
|
||||
'(char-complement (union "1" "2"))))
|
||||
; )
|
@ -0,0 +1,9 @@
|
||||
(module token-syntax mzscheme
|
||||
|
||||
;; The things needed at compile time to handle definition of tokens
|
||||
|
||||
(provide make-terminals-def terminals-def-t terminals-def?
|
||||
make-e-terminals-def e-terminals-def-t e-terminals-def?)
|
||||
(define-struct terminals-def (t))
|
||||
(define-struct e-terminals-def (t))
|
||||
)
|
@ -0,0 +1,92 @@
|
||||
(module token mzscheme
|
||||
|
||||
(require-for-syntax "token-syntax.rkt")
|
||||
|
||||
;; Defining tokens
|
||||
|
||||
(provide define-tokens define-empty-tokens make-token token?
|
||||
(protect (rename token-name real-token-name))
|
||||
(protect (rename token-value real-token-value))
|
||||
(rename token-name* token-name)
|
||||
(rename token-value* token-value)
|
||||
(struct position (offset line col))
|
||||
(struct position-token (token start-pos end-pos))
|
||||
(struct srcloc-token (token srcloc)))
|
||||
|
||||
|
||||
;; A token is either
|
||||
;; - symbol
|
||||
;; - (make-token symbol any)
|
||||
(define-struct token (name value) (make-inspector))
|
||||
|
||||
;; token-name*: token -> symbol
|
||||
(define (token-name* t)
|
||||
(cond
|
||||
((symbol? t) t)
|
||||
((token? t) (token-name t))
|
||||
(else (raise-type-error
|
||||
'token-name
|
||||
"symbol or struct:token"
|
||||
0
|
||||
t))))
|
||||
|
||||
;; token-value*: token -> any
|
||||
(define (token-value* t)
|
||||
(cond
|
||||
((symbol? t) #f)
|
||||
((token? t) (token-value t))
|
||||
(else (raise-type-error
|
||||
'token-value
|
||||
"symbol or struct:token"
|
||||
0
|
||||
t))))
|
||||
|
||||
(define-for-syntax (make-ctor-name n)
|
||||
(datum->syntax-object n
|
||||
(string->symbol (format "token-~a" (syntax-e n)))
|
||||
n
|
||||
n))
|
||||
|
||||
(define-for-syntax (make-define-tokens empty?)
|
||||
(lambda (stx)
|
||||
(syntax-case stx ()
|
||||
((_ name (token ...))
|
||||
(andmap identifier? (syntax->list (syntax (token ...))))
|
||||
(with-syntax (((marked-token ...)
|
||||
(map values #;(make-syntax-introducer)
|
||||
(syntax->list (syntax (token ...))))))
|
||||
(quasisyntax/loc stx
|
||||
(begin
|
||||
(define-syntax name
|
||||
#,(if empty?
|
||||
#'(make-e-terminals-def (quote-syntax (marked-token ...)))
|
||||
#'(make-terminals-def (quote-syntax (marked-token ...)))))
|
||||
#,@(map
|
||||
(lambda (n)
|
||||
(when (eq? (syntax-e n) 'error)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"Cannot define a token named error."
|
||||
stx))
|
||||
(if empty?
|
||||
#`(define (#,(make-ctor-name n))
|
||||
'#,n)
|
||||
#`(define (#,(make-ctor-name n) x)
|
||||
(make-token '#,n x))))
|
||||
(syntax->list (syntax (token ...))))
|
||||
#;(define marked-token #f) #;...))))
|
||||
((_ ...)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))"
|
||||
stx)))))
|
||||
|
||||
(define-syntax define-tokens (make-define-tokens #f))
|
||||
(define-syntax define-empty-tokens (make-define-tokens #t))
|
||||
|
||||
(define-struct position (offset line col) #f)
|
||||
(define-struct position-token (token start-pos end-pos) #f)
|
||||
|
||||
(define-struct srcloc-token (token srcloc) #f)
|
||||
)
|
||||
|
@ -0,0 +1,69 @@
|
||||
#lang racket
|
||||
|
||||
(require "util.rkt")
|
||||
|
||||
(provide (all-defined-out))
|
||||
|
||||
;; mapped-chars : (listof (list nat nat bool))
|
||||
(define mapped-chars (make-known-char-range-list))
|
||||
|
||||
;; get-chars-for-x : (nat -> bool) (listof (list nat nat bool)) -> (listof (cons nat nat))
|
||||
(define (get-chars-for char-x? mapped-chars)
|
||||
(cond
|
||||
((null? mapped-chars) null)
|
||||
(else
|
||||
(let* ((range (car mapped-chars))
|
||||
(low (car range))
|
||||
(high (cadr range))
|
||||
(x (char-x? low)))
|
||||
(cond
|
||||
((caddr range)
|
||||
(if x
|
||||
(cons (cons low high)
|
||||
(get-chars-for char-x? (cdr mapped-chars)))
|
||||
(get-chars-for char-x? (cdr mapped-chars))))
|
||||
(else
|
||||
(let loop ((range-start low)
|
||||
(i (car range))
|
||||
(parity x))
|
||||
(cond
|
||||
((> i high)
|
||||
(if parity
|
||||
(cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars)))
|
||||
(get-chars-for char-x? (cdr mapped-chars))))
|
||||
((eq? parity (char-x? i))
|
||||
(loop range-start (add1 i) parity))
|
||||
(parity
|
||||
(cons (cons range-start (sub1 i)) (loop i (add1 i) #f)))
|
||||
(else
|
||||
(loop i (add1 i) #t))))))))))
|
||||
|
||||
(define (compute-ranges x?)
|
||||
(delay (get-chars-for (lambda (x) (x? (integer->char x))) mapped-chars)))
|
||||
|
||||
(define alphabetic-ranges (compute-ranges char-alphabetic?)) ;; 325
|
||||
(define lower-case-ranges (compute-ranges char-lower-case?)) ;; 405
|
||||
(define upper-case-ranges (compute-ranges char-upper-case?)) ;; 380
|
||||
(define title-case-ranges (compute-ranges char-title-case?)) ;; 10
|
||||
(define numeric-ranges (compute-ranges char-numeric?)) ;; 47
|
||||
(define symbolic-ranges (compute-ranges char-symbolic?)) ;; 153
|
||||
(define punctuation-ranges (compute-ranges char-punctuation?)) ;; 86
|
||||
(define graphic-ranges (compute-ranges char-graphic?)) ;; 401
|
||||
(define whitespace-ranges (compute-ranges char-whitespace?)) ;; 10
|
||||
(define blank-ranges (compute-ranges char-blank?)) ;; 9
|
||||
#;(define hexadecimal-ranges (compute-ranges char-hexadecimal?))
|
||||
(define iso-control-ranges (compute-ranges char-iso-control?)) ;; 2
|
||||
|
||||
|
||||
|
||||
(module+ test
|
||||
(require rackunit)
|
||||
(check-equal? (get-chars-for odd? '()) '())
|
||||
(check-equal? (get-chars-for odd? '((1 4 #f) (8 13 #f)))
|
||||
'((1 . 1) (3 . 3) (9 . 9) (11 . 11) (13 . 13)))
|
||||
(check-equal? (get-chars-for (lambda (x)
|
||||
(odd? (quotient x 10)))
|
||||
'((1 5 #t) (17 19 #t) (21 51 #f)))
|
||||
'((17 . 19) (30 . 39) (50 . 51))))
|
||||
|
||||
|
@ -0,0 +1,127 @@
|
||||
#lang racket
|
||||
|
||||
(provide (all-defined-out))
|
||||
|
||||
(define max-char-num #x10FFFF)
|
||||
|
||||
(define-struct lex-abbrev (get-abbrev))
|
||||
(define-struct lex-trans (f))
|
||||
|
||||
(module+ test
|
||||
(require rackunit))
|
||||
|
||||
#;(define-syntax test-block
|
||||
(syntax-rules ()
|
||||
((_ defs (code right-ans) ...)
|
||||
(let* defs
|
||||
(let ((real-ans code))
|
||||
(unless (equal? real-ans right-ans)
|
||||
(printf "Test failed: ~e gave ~e. Expected ~e\n"
|
||||
'code real-ans 'right-ans))) ...))))
|
||||
|
||||
(define-syntax test-block
|
||||
(syntax-rules ()
|
||||
((_ x ...) (void))))
|
||||
|
||||
|
||||
;; A cache is (X ( -> Y) -> Y)
|
||||
;; make-cache : -> cache
|
||||
;; table map Xs to Ys. If key is mapped, its value is returned.
|
||||
;; Otherwise, build is invoked and its result is placed in the table and
|
||||
;; returned.
|
||||
;; Xs are compared with equal?
|
||||
(define (make-cache)
|
||||
(let ((table (make-hash)))
|
||||
(lambda (key build)
|
||||
(hash-ref table key
|
||||
(lambda ()
|
||||
(let ((new (build)))
|
||||
(hash-set! table key new)
|
||||
new))))))
|
||||
|
||||
(module+ test
|
||||
(define cache (make-cache))
|
||||
(check-equal? (cache '(s 1 2) (lambda () 9)) 9)
|
||||
(check-equal? (cache '(s 2 1) (lambda () 8)) 8)
|
||||
(check-equal? (cache '(s 1 2) (lambda () 1)) 9)
|
||||
(check-equal? (cache (cons 's (cons 0 (cons +inf.0 10)))
|
||||
(lambda () 22)) 22)
|
||||
(check-equal? (cache (cons 's (cons 0 (cons +inf.0 10)))
|
||||
(lambda () 1)) 22))
|
||||
|
||||
|
||||
|
||||
;; make-counter : -> -> nat
|
||||
;; makes a function that returns a higher number by 1, each time
|
||||
;; it is called.
|
||||
(define (make-counter)
|
||||
(let ((counter 0))
|
||||
(lambda ()
|
||||
(begin0
|
||||
counter
|
||||
(set! counter (add1 counter))))))
|
||||
|
||||
(module+ test
|
||||
(define c (make-counter))
|
||||
(define d (make-counter))
|
||||
(check-equal? (c) 0)
|
||||
(check-equal? (d) 0)
|
||||
(check-equal? (c) 1)
|
||||
(check-equal? (d) 1)
|
||||
(check-equal? (c) 2))
|
||||
|
||||
|
||||
;; remove-dups : (list-of X) (X -> number) -> (list-of X)
|
||||
;; removes the entries from l that have the same index as a
|
||||
;; previous entry. l must be grouped by indexes.
|
||||
(define (remove-dups l index acc)
|
||||
(cond
|
||||
((null? l) (reverse acc))
|
||||
((null? acc) (remove-dups (cdr l) index (cons (car l) acc)))
|
||||
((= (index (car acc)) (index (car l)))
|
||||
(remove-dups (cdr l) index acc))
|
||||
(else
|
||||
(remove-dups (cdr l) index (cons (car l) acc)))))
|
||||
|
||||
|
||||
(module+ test
|
||||
(check-equal? (remove-dups '((1 2) (2 2) (1 3) (1 4)
|
||||
(100 4) (0 5)) cadr null)
|
||||
'((1 2) (1 3) (1 4) (0 5)))
|
||||
(check-equal? (remove-dups null error null) null))
|
||||
|
||||
;; do-simple-equiv : (list-of X) (X -> nat) -> (list-of X)
|
||||
;; Sorts l according to index and removes the entries with duplicate
|
||||
;; indexes.
|
||||
(define (do-simple-equiv l index)
|
||||
(let ((ordered (sort l (lambda (a b) (< (index a) (index b))))))
|
||||
(remove-dups ordered index null)))
|
||||
|
||||
(module+ test
|
||||
(check-equal? (do-simple-equiv '((2 2) (1 4) (1 2)
|
||||
(100 4) (1 3) (0 5))
|
||||
cadr)
|
||||
'((2 2) (1 3) (1 4) (0 5)))
|
||||
(check-equal? (do-simple-equiv null error) null))
|
||||
|
||||
;; replace : (list-of X) (X -> bool) (X -> (list-of X)) (list-of X) ->
|
||||
;; (list-of X)
|
||||
;; If (pred? r) for some r in l, splice (get r) in place of r in the resulting
|
||||
;; list.
|
||||
(define (replace l pred? get acc)
|
||||
(cond
|
||||
((null? l) acc)
|
||||
((pred? (car l)) (replace (cdr l) pred? get (append (get (car l)) acc)))
|
||||
(else (replace (cdr l) pred? get (cons (car l) acc)))))
|
||||
|
||||
|
||||
(module+ test
|
||||
(check-equal? (replace null void (lambda () (list 1)) null) null)
|
||||
(check-equal? (replace '(1 2 3 4 3 5)
|
||||
(lambda (x) (= x 3))
|
||||
(lambda (x) (list 1 2 3))
|
||||
null)
|
||||
'(5 1 2 3 4 1 2 3 2 1)))
|
||||
|
||||
|
||||
|
@ -0,0 +1,280 @@
|
||||
;; Constructs to create and access grammars, the internal
|
||||
;; representation of the input to the parser generator.
|
||||
|
||||
(module grammar mzscheme
|
||||
|
||||
(require mzlib/class
|
||||
mzlib/list
|
||||
"yacc-helper.rkt"
|
||||
racket/contract)
|
||||
|
||||
;; Each production has a unique index 0 <= index <= number of productions
|
||||
(define-struct prod (lhs rhs index prec action) (make-inspector))
|
||||
|
||||
;; The dot-pos field is the index of the element in the rhs
|
||||
;; of prod that the dot immediately precedes.
|
||||
;; Thus 0 <= dot-pos <= (vector-length rhs).
|
||||
(define-struct item (prod dot-pos) (make-inspector))
|
||||
|
||||
;; gram-sym = (union term? non-term?)
|
||||
;; Each term has a unique index 0 <= index < number of terms
|
||||
;; Each non-term has a unique index 0 <= index < number of non-terms
|
||||
(define-struct term (sym index prec) (make-inspector))
|
||||
(define-struct non-term (sym index) (make-inspector))
|
||||
|
||||
;; a precedence declaration.
|
||||
(define-struct prec (num assoc) (make-inspector))
|
||||
|
||||
(provide/contract
|
||||
(make-item (prod? (or/c #f natural-number/c) . -> . item?))
|
||||
(make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?))
|
||||
(make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?))
|
||||
(make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?))
|
||||
(make-prod (non-term? (vectorof (or/c non-term? term?))
|
||||
(or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?)))
|
||||
|
||||
(provide
|
||||
|
||||
|
||||
;; Things that work on items
|
||||
start-item? item-prod item->string
|
||||
sym-at-dot move-dot-right item<? item-dot-pos
|
||||
|
||||
;; Things that operate on grammar symbols
|
||||
gram-sym-symbol gram-sym-index term-prec gram-sym->string
|
||||
non-term? term? non-term<? term<?
|
||||
term-list->bit-vector term-index non-term-index
|
||||
|
||||
;; Things that work on precs
|
||||
prec-num prec-assoc
|
||||
|
||||
grammar%
|
||||
|
||||
;; Things that work on productions
|
||||
prod-index prod-prec prod-rhs prod-lhs prod-action)
|
||||
|
||||
|
||||
;;---------------------- LR items --------------------------
|
||||
|
||||
;; item<?: LR-item * LR-item -> bool
|
||||
;; Lexicographic comparison on two items.
|
||||
(define (item<? i1 i2)
|
||||
(let ((p1 (prod-index (item-prod i1)))
|
||||
(p2 (prod-index (item-prod i2))))
|
||||
(or (< p1 p2)
|
||||
(and (= p1 p2)
|
||||
(let ((d1 (item-dot-pos i1))
|
||||
(d2 (item-dot-pos i2)))
|
||||
(< d1 d2))))))
|
||||
|
||||
;; start-item?: LR-item -> bool
|
||||
;; The start production always has index 0
|
||||
(define (start-item? i)
|
||||
(= 0 (non-term-index (prod-lhs (item-prod i)))))
|
||||
|
||||
|
||||
;; move-dot-right: LR-item -> LR-item | #f
|
||||
;; moves the dot to the right in the item, unless it is at its
|
||||
;; rightmost, then it returns false
|
||||
(define (move-dot-right i)
|
||||
(cond
|
||||
((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f)
|
||||
(else (make-item (item-prod i)
|
||||
(add1 (item-dot-pos i))))))
|
||||
|
||||
;; sym-at-dot: LR-item -> gram-sym | #f
|
||||
;; returns the symbol after the dot in the item or #f if there is none
|
||||
(define (sym-at-dot i)
|
||||
(let ((dp (item-dot-pos i))
|
||||
(rhs (prod-rhs (item-prod i))))
|
||||
(cond
|
||||
((= dp (vector-length rhs)) #f)
|
||||
(else (vector-ref rhs dp)))))
|
||||
|
||||
|
||||
;; print-item: LR-item ->
|
||||
(define (item->string it)
|
||||
(let ((print-sym (lambda (i)
|
||||
(let ((gs (vector-ref (prod-rhs (item-prod it)) i)))
|
||||
(cond
|
||||
((term? gs) (format "~a " (term-sym gs)))
|
||||
(else (format "~a " (non-term-sym gs))))))))
|
||||
(string-append
|
||||
(format "~a -> " (non-term-sym (prod-lhs (item-prod it))))
|
||||
(let loop ((i 0))
|
||||
(cond
|
||||
((= i (vector-length (prod-rhs (item-prod it))))
|
||||
(if (= i (item-dot-pos it))
|
||||
". "
|
||||
""))
|
||||
((= i (item-dot-pos it))
|
||||
(string-append ". " (print-sym i) (loop (add1 i))))
|
||||
(else (string-append (print-sym i) (loop (add1 i)))))))))
|
||||
|
||||
;; --------------------- Grammar Symbols --------------------------
|
||||
|
||||
(define (non-term<? nt1 nt2)
|
||||
(< (non-term-index nt1) (non-term-index nt2)))
|
||||
|
||||
(define (term<? nt1 nt2)
|
||||
(< (term-index nt1) (term-index nt2)))
|
||||
|
||||
(define (gram-sym-index gs)
|
||||
(cond
|
||||
((term? gs) (term-index gs))
|
||||
(else (non-term-index gs))))
|
||||
|
||||
(define (gram-sym-symbol gs)
|
||||
(cond
|
||||
((term? gs) (term-sym gs))
|
||||
(else (non-term-sym gs))))
|
||||
|
||||
(define (gram-sym->string gs)
|
||||
(symbol->string (gram-sym-symbol gs)))
|
||||
|
||||
;; term-list->bit-vector: term list -> int
|
||||
;; Creates a number where the nth bit is 1 if the term with index n is in
|
||||
;; the list, and whose nth bit is 0 otherwise
|
||||
(define (term-list->bit-vector terms)
|
||||
(cond
|
||||
((null? terms) 0)
|
||||
(else
|
||||
(bitwise-ior (arithmetic-shift 1 (term-index (car terms))) (term-list->bit-vector (cdr terms))))))
|
||||
|
||||
|
||||
;; ------------------------- Grammar ------------------------------
|
||||
|
||||
(define grammar%
|
||||
(class object%
|
||||
(super-instantiate ())
|
||||
;; prods: production list list
|
||||
;; where there is one production list per non-term
|
||||
(init prods)
|
||||
;; init-prods: production list
|
||||
;; The productions parsing can start from
|
||||
;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable
|
||||
(init-field init-prods terms non-terms end-terms)
|
||||
|
||||
;; list of all productions
|
||||
(define all-prods (apply append prods))
|
||||
(define num-prods (length all-prods))
|
||||
(define num-terms (length terms))
|
||||
(define num-non-terms (length non-terms))
|
||||
|
||||
(let ((count 0))
|
||||
(for-each
|
||||
(lambda (nt)
|
||||
(set-non-term-index! nt count)
|
||||
(set! count (add1 count)))
|
||||
non-terms))
|
||||
|
||||
(let ((count 0))
|
||||
(for-each
|
||||
(lambda (t)
|
||||
(set-term-index! t count)
|
||||
(set! count (add1 count)))
|
||||
terms))
|
||||
|
||||
(let ((count 0))
|
||||
(for-each
|
||||
(lambda (prod)
|
||||
(set-prod-index! prod count)
|
||||
(set! count (add1 count)))
|
||||
all-prods))
|
||||
|
||||
;; indexed by the index of the non-term - contains the list of productions for that non-term
|
||||
(define nt->prods
|
||||
(let ((v (make-vector (length prods) #f)))
|
||||
(for-each (lambda (prods)
|
||||
(vector-set! v (non-term-index (prod-lhs (car prods))) prods))
|
||||
prods)
|
||||
v))
|
||||
|
||||
(define nullable-non-terms
|
||||
(nullable all-prods num-non-terms))
|
||||
|
||||
(define/public (get-num-terms) num-terms)
|
||||
(define/public (get-num-non-terms) num-non-terms)
|
||||
|
||||
(define/public (get-prods-for-non-term nt)
|
||||
(vector-ref nt->prods (non-term-index nt)))
|
||||
(define/public (get-prods) all-prods)
|
||||
(define/public (get-init-prods) init-prods)
|
||||
|
||||
(define/public (get-terms) terms)
|
||||
(define/public (get-non-terms) non-terms)
|
||||
|
||||
(define/public (get-num-prods) num-prods)
|
||||
(define/public (get-end-terms) end-terms)
|
||||
|
||||
(define/public (nullable-non-term? nt)
|
||||
(vector-ref nullable-non-terms (non-term-index nt)))
|
||||
|
||||
(define/public (nullable-after-dot? item)
|
||||
(let* ((rhs (prod-rhs (item-prod item)))
|
||||
(prod-length (vector-length rhs)))
|
||||
(let loop ((i (item-dot-pos item)))
|
||||
(cond
|
||||
((< i prod-length)
|
||||
(if (and (non-term? (vector-ref rhs i)) (nullable-non-term? (vector-ref rhs i)))
|
||||
(loop (add1 i))
|
||||
#f))
|
||||
((= i prod-length) #t)))))
|
||||
|
||||
(define/public (nullable-non-term-thunk)
|
||||
(lambda (nt)
|
||||
(nullable-non-term? nt)))
|
||||
(define/public (nullable-after-dot?-thunk)
|
||||
(lambda (item)
|
||||
(nullable-after-dot? item)))))
|
||||
|
||||
|
||||
;; nullable: production list * int -> non-term set
|
||||
;; determines which non-terminals can derive epsilon
|
||||
(define (nullable prods num-nts)
|
||||
(letrec ((nullable (make-vector num-nts #f))
|
||||
(added #f)
|
||||
|
||||
;; possible-nullable: producion list -> production list
|
||||
;; Removes all productions that have a terminal
|
||||
(possible-nullable
|
||||
(lambda (prods)
|
||||
(filter (lambda (prod)
|
||||
(vector-andmap non-term? (prod-rhs prod)))
|
||||
prods)))
|
||||
|
||||
;; set-nullables: production list -> production list
|
||||
;; makes one pass through the productions, adding the ones
|
||||
;; known to be nullable now to nullable and returning a list
|
||||
;; of productions that we don't know about yet.
|
||||
(set-nullables
|
||||
(lambda (prods)
|
||||
(cond
|
||||
((null? prods) null)
|
||||
((vector-ref nullable
|
||||
(gram-sym-index (prod-lhs (car prods))))
|
||||
(set-nullables (cdr prods)))
|
||||
((vector-andmap (lambda (nt)
|
||||
(vector-ref nullable (gram-sym-index nt)))
|
||||
(prod-rhs (car prods)))
|
||||
(vector-set! nullable
|
||||
(gram-sym-index (prod-lhs (car prods)))
|
||||
#t)
|
||||
(set! added #t)
|
||||
(set-nullables (cdr prods)))
|
||||
(else
|
||||
(cons (car prods)
|
||||
(set-nullables (cdr prods))))))))
|
||||
|
||||
(let loop ((P (possible-nullable prods)))
|
||||
(cond
|
||||
((null? P) nullable)
|
||||
(else
|
||||
(set! added #f)
|
||||
(let ((new-P (set-nullables P)))
|
||||
(if added
|
||||
(loop new-P)
|
||||
nullable)))))))
|
||||
|
||||
|
||||
)
|
@ -0,0 +1,61 @@
|
||||
(module graph mzscheme
|
||||
|
||||
(provide digraph)
|
||||
|
||||
(define (zero-thunk) 0)
|
||||
|
||||
;; digraph:
|
||||
;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * (-> 'b)
|
||||
;; -> ('a -> 'b)
|
||||
;; DeRemer and Pennello 1982
|
||||
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
|
||||
;; We use a hash-table to represent the result function 'a -> 'b set, so
|
||||
;; the values of type 'a must be comparable with eq?.
|
||||
(define (digraph nodes edges f- union fail)
|
||||
(letrec [
|
||||
;; Will map elements of 'a to 'b sets
|
||||
(results (make-hash-table))
|
||||
(f (lambda (x) (hash-table-get results x fail)))
|
||||
|
||||
;; Maps elements of 'a to integers.
|
||||
(N (make-hash-table))
|
||||
(get-N (lambda (x) (hash-table-get N x zero-thunk)))
|
||||
(set-N (lambda (x d) (hash-table-put! N x d)))
|
||||
|
||||
(stack null)
|
||||
(push (lambda (x)
|
||||
(set! stack (cons x stack))))
|
||||
(pop (lambda ()
|
||||
(begin0
|
||||
(car stack)
|
||||
(set! stack (cdr stack)))))
|
||||
(depth (lambda () (length stack)))
|
||||
|
||||
;; traverse: 'a ->
|
||||
(traverse
|
||||
(lambda (x)
|
||||
(push x)
|
||||
(let ((d (depth)))
|
||||
(set-N x d)
|
||||
(hash-table-put! results x (f- x))
|
||||
(for-each (lambda (y)
|
||||
(if (= 0 (get-N y))
|
||||
(traverse y))
|
||||
(hash-table-put! results
|
||||
x
|
||||
(union (f x) (f y)))
|
||||
(set-N x (min (get-N x) (get-N y))))
|
||||
(edges x))
|
||||
(if (= d (get-N x))
|
||||
(let loop ((p (pop)))
|
||||
(set-N p +inf.0)
|
||||
(hash-table-put! results p (f x))
|
||||
(if (not (eq? x p))
|
||||
(loop (pop))))))))]
|
||||
(for-each (lambda (x)
|
||||
(if (= 0 (get-N x))
|
||||
(traverse x)))
|
||||
nodes)
|
||||
f))
|
||||
|
||||
)
|
@ -0,0 +1,374 @@
|
||||
(module input-file-parser mzscheme
|
||||
|
||||
;; routines for parsing the input to the parser generator and producing a
|
||||
;; grammar (See grammar.rkt)
|
||||
|
||||
(require "yacc-helper.rkt"
|
||||
"../private-lex/token-syntax.rkt"
|
||||
"grammar.rkt"
|
||||
mzlib/class
|
||||
racket/contract)
|
||||
(require-for-template mzscheme)
|
||||
|
||||
(define (is-a-grammar%? x) (is-a? x grammar%))
|
||||
(provide/contract
|
||||
(parse-input ((listof identifier?) (listof identifier?) (listof identifier?)
|
||||
(or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?))
|
||||
(get-term-list ((listof identifier?) . -> . (listof identifier?))))
|
||||
|
||||
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
|
||||
|
||||
;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx)))
|
||||
(define (get-args i rhs src-pos term-defs)
|
||||
(let ((empty-table (make-hash-table))
|
||||
(biggest-pos #f))
|
||||
(hash-table-put! empty-table 'error #t)
|
||||
(for-each (lambda (td)
|
||||
(let ((v (syntax-local-value td)))
|
||||
(if (e-terminals-def? v)
|
||||
(for-each (lambda (s)
|
||||
(hash-table-put! empty-table (syntax-object->datum s) #t))
|
||||
(syntax->list (e-terminals-def-t v))))))
|
||||
term-defs)
|
||||
(let ([args
|
||||
(let get-args ((i i)
|
||||
(rhs rhs))
|
||||
(cond
|
||||
((null? rhs) null)
|
||||
(else
|
||||
(let ((b (car rhs))
|
||||
(name (if (hash-table-get empty-table (syntax-object->datum (car rhs)) (lambda () #f))
|
||||
(gensym)
|
||||
(string->symbol (format "$~a" i)))))
|
||||
(cond
|
||||
(src-pos
|
||||
(let ([start-pos-id
|
||||
(datum->syntax-object b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property)]
|
||||
[end-pos-id
|
||||
(datum->syntax-object b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property)])
|
||||
(set! biggest-pos (cons start-pos-id end-pos-id))
|
||||
`(,(datum->syntax-object b name b stx-for-original-property)
|
||||
,start-pos-id
|
||||
,end-pos-id
|
||||
,@(get-args (add1 i) (cdr rhs)))))
|
||||
(else
|
||||
`(,(datum->syntax-object b name b stx-for-original-property)
|
||||
,@(get-args (add1 i) (cdr rhs)))))))))])
|
||||
(values args biggest-pos))))
|
||||
|
||||
;; Given the list of terminal symbols and the precedence/associativity definitions,
|
||||
;; builds terminal structures (See grammar.rkt)
|
||||
;; build-terms: symbol list * symbol list list -> term list
|
||||
(define (build-terms term-list precs)
|
||||
(let ((counter 0)
|
||||
|
||||
;;(term-list (cons (gensym) term-list))
|
||||
|
||||
;; Will map a terminal symbol to its precedence/associativity
|
||||
(prec-table (make-hash-table)))
|
||||
|
||||
;; Fill the prec table
|
||||
(for-each
|
||||
(lambda (p-decl)
|
||||
(begin0
|
||||
(let ((assoc (car p-decl)))
|
||||
(for-each
|
||||
(lambda (term-sym)
|
||||
(hash-table-put! prec-table term-sym (make-prec counter assoc)))
|
||||
(cdr p-decl)))
|
||||
(set! counter (add1 counter))))
|
||||
precs)
|
||||
|
||||
;; Build the terminal structures
|
||||
(map
|
||||
(lambda (term-sym)
|
||||
(make-term term-sym
|
||||
#f
|
||||
(hash-table-get prec-table term-sym (lambda () #f))))
|
||||
term-list)))
|
||||
|
||||
;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt)
|
||||
;; get-terms-from-def: identifier? -> (listof identifier?)
|
||||
(define (get-terms-from-def term-syn)
|
||||
(let ((t (syntax-local-value term-syn (lambda () #f))))
|
||||
(cond
|
||||
((terminals-def? t) (syntax->list (terminals-def-t t)))
|
||||
((e-terminals-def? t) (syntax->list (e-terminals-def-t t)))
|
||||
(else
|
||||
(raise-syntax-error
|
||||
'parser-tokens
|
||||
"undefined token group"
|
||||
term-syn)))))
|
||||
|
||||
(define (get-term-list term-group-names)
|
||||
(remove-duplicates
|
||||
(cons (datum->syntax-object #f 'error)
|
||||
(apply append
|
||||
(map get-terms-from-def term-group-names)))))
|
||||
|
||||
(define (parse-input term-defs start ends prec-decls prods src-pos)
|
||||
(let* ((start-syms (map syntax-e start))
|
||||
|
||||
(list-of-terms (map syntax-e (get-term-list term-defs)))
|
||||
|
||||
(end-terms
|
||||
(map
|
||||
(lambda (end)
|
||||
(unless (memq (syntax-e end) list-of-terms)
|
||||
(raise-syntax-error
|
||||
'parser-end-tokens
|
||||
(format "End token ~a not defined as a token"
|
||||
(syntax-e end))
|
||||
end))
|
||||
(syntax-e end))
|
||||
ends))
|
||||
|
||||
;; Get the list of terminals out of input-terms
|
||||
|
||||
(list-of-non-terms
|
||||
(syntax-case prods ()
|
||||
(((non-term production ...) ...)
|
||||
(begin
|
||||
(for-each
|
||||
(lambda (nts)
|
||||
(if (memq (syntax-object->datum nts) list-of-terms)
|
||||
(raise-syntax-error
|
||||
'parser-non-terminals
|
||||
(format "~a used as both token and non-terminal"
|
||||
(syntax-object->datum nts))
|
||||
nts)))
|
||||
(syntax->list (syntax (non-term ...))))
|
||||
|
||||
(let ((dup (duplicate-list? (syntax-object->datum
|
||||
(syntax (non-term ...))))))
|
||||
(if dup
|
||||
(raise-syntax-error
|
||||
'parser-non-terminals
|
||||
(format "non-terminal ~a defined multiple times"
|
||||
dup)
|
||||
prods)))
|
||||
|
||||
(syntax-object->datum (syntax (non-term ...)))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'parser-grammar
|
||||
"Grammar must be of the form (grammar (non-terminal productions ...) ...)"
|
||||
prods))))
|
||||
|
||||
;; Check the precedence declarations for errors and turn them into data
|
||||
(precs
|
||||
(syntax-case prec-decls ()
|
||||
(((type term ...) ...)
|
||||
(let ((p-terms
|
||||
(syntax-object->datum (syntax (term ... ...)))))
|
||||
(cond
|
||||
((duplicate-list? p-terms) =>
|
||||
(lambda (d)
|
||||
(raise-syntax-error
|
||||
'parser-precedences
|
||||
(format "duplicate precedence declaration for token ~a"
|
||||
d)
|
||||
prec-decls)))
|
||||
(else
|
||||
(for-each
|
||||
(lambda (a)
|
||||
(for-each
|
||||
(lambda (t)
|
||||
(if (not (memq (syntax-object->datum t)
|
||||
list-of-terms))
|
||||
(raise-syntax-error
|
||||
'parser-precedences
|
||||
(format
|
||||
"Precedence declared for non-token ~a"
|
||||
(syntax-object->datum t))
|
||||
t)))
|
||||
(syntax->list a)))
|
||||
(syntax->list (syntax ((term ...) ...))))
|
||||
(for-each
|
||||
(lambda (type)
|
||||
(if (not (memq (syntax-object->datum type)
|
||||
`(left right nonassoc)))
|
||||
(raise-syntax-error
|
||||
'parser-precedences
|
||||
"Associativity must be left, right or nonassoc"
|
||||
type)))
|
||||
(syntax->list (syntax (type ...))))
|
||||
(syntax-object->datum prec-decls)))))
|
||||
(#f null)
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'parser-precedences
|
||||
"Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc"
|
||||
prec-decls))))
|
||||
|
||||
(terms (build-terms list-of-terms precs))
|
||||
|
||||
(non-terms (map (lambda (non-term) (make-non-term non-term #f))
|
||||
list-of-non-terms))
|
||||
(term-table (make-hash-table))
|
||||
(non-term-table (make-hash-table)))
|
||||
|
||||
(for-each (lambda (t)
|
||||
(hash-table-put! term-table (gram-sym-symbol t) t))
|
||||
terms)
|
||||
|
||||
(for-each (lambda (nt)
|
||||
(hash-table-put! non-term-table (gram-sym-symbol nt) nt))
|
||||
non-terms)
|
||||
|
||||
(let* (
|
||||
;; parse-prod: syntax-object -> gram-sym vector
|
||||
(parse-prod
|
||||
(lambda (prod-so)
|
||||
(syntax-case prod-so ()
|
||||
((prod-rhs-sym ...)
|
||||
(andmap identifier? (syntax->list prod-so))
|
||||
(begin
|
||||
(for-each (lambda (t)
|
||||
(if (memq (syntax-object->datum t) end-terms)
|
||||
(raise-syntax-error
|
||||
'parser-production-rhs
|
||||
(format "~a is an end token and cannot be used in a production"
|
||||
(syntax-object->datum t))
|
||||
t)))
|
||||
(syntax->list prod-so))
|
||||
(list->vector
|
||||
(map (lambda (s)
|
||||
(hash-table-get
|
||||
term-table
|
||||
(syntax-object->datum s)
|
||||
(lambda ()
|
||||
(hash-table-get
|
||||
non-term-table
|
||||
(syntax-object->datum s)
|
||||
(lambda ()
|
||||
(raise-syntax-error
|
||||
'parser-production-rhs
|
||||
(format
|
||||
"~a is not declared as a terminal or non-terminal"
|
||||
(syntax-object->datum s))
|
||||
s))))))
|
||||
(syntax->list prod-so)))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'parser-production-rhs
|
||||
"production right-hand-side must have form (symbol ...)"
|
||||
prod-so)))))
|
||||
|
||||
;; parse-action: syntax-object * syntax-object -> syntax-object
|
||||
(parse-action
|
||||
(lambda (rhs act)
|
||||
(let-values ([(args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs)])
|
||||
(let ([act
|
||||
(if biggest
|
||||
(with-syntax ([$n-start-pos (datum->syntax-object (car biggest) '$n-start-pos)]
|
||||
[$n-end-pos (datum->syntax-object (cdr biggest) '$n-end-pos)])
|
||||
#`(let ([$n-start-pos #,(car biggest)]
|
||||
[$n-end-pos #,(cdr biggest)])
|
||||
#,act))
|
||||
act)])
|
||||
(quasisyntax/loc act
|
||||
(lambda #,args
|
||||
#,act))))))
|
||||
|
||||
;; parse-prod+action: non-term * syntax-object -> production
|
||||
(parse-prod+action
|
||||
(lambda (nt prod-so)
|
||||
(syntax-case prod-so ()
|
||||
((prod-rhs action)
|
||||
(let ((p (parse-prod (syntax prod-rhs))))
|
||||
(make-prod
|
||||
nt
|
||||
p
|
||||
#f
|
||||
(let loop ((i (sub1 (vector-length p))))
|
||||
(if (>= i 0)
|
||||
(let ((gs (vector-ref p i)))
|
||||
(if (term? gs)
|
||||
(term-prec gs)
|
||||
(loop (sub1 i))))
|
||||
#f))
|
||||
(parse-action (syntax prod-rhs) (syntax action)))))
|
||||
((prod-rhs (prec term) action)
|
||||
(identifier? (syntax term))
|
||||
(let ((p (parse-prod (syntax prod-rhs))))
|
||||
(make-prod
|
||||
nt
|
||||
p
|
||||
#f
|
||||
(term-prec
|
||||
(hash-table-get
|
||||
term-table
|
||||
(syntax-object->datum (syntax term))
|
||||
(lambda ()
|
||||
(raise-syntax-error
|
||||
'parser-production-rhs
|
||||
(format
|
||||
"unrecognized terminal ~a in precedence declaration"
|
||||
(syntax-object->datum (syntax term)))
|
||||
(syntax term)))))
|
||||
(parse-action (syntax prod-rhs) (syntax action)))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'parser-production-rhs
|
||||
"production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]"
|
||||
prod-so)))))
|
||||
|
||||
;; parse-prod-for-nt: syntax-object -> production list
|
||||
(parse-prods-for-nt
|
||||
(lambda (prods-so)
|
||||
(syntax-case prods-so ()
|
||||
((nt productions ...)
|
||||
(> (length (syntax->list (syntax (productions ...)))) 0)
|
||||
(let ((nt (hash-table-get non-term-table
|
||||
(syntax-object->datum (syntax nt)))))
|
||||
(map (lambda (p) (parse-prod+action nt p))
|
||||
(syntax->list (syntax (productions ...))))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'parser-productions
|
||||
"A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side"
|
||||
prods-so))))))
|
||||
|
||||
(for-each
|
||||
(lambda (sstx ssym)
|
||||
(unless (memq ssym list-of-non-terms)
|
||||
(raise-syntax-error
|
||||
'parser-start
|
||||
(format "Start symbol ~a not defined as a non-terminal" ssym)
|
||||
sstx)))
|
||||
start start-syms)
|
||||
|
||||
(let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms))
|
||||
(end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms))
|
||||
(parsed-prods (map parse-prods-for-nt (syntax->list prods)))
|
||||
(start-prods
|
||||
(map (lambda (start end-non-term)
|
||||
(list (make-prod start (vector end-non-term) #f #f
|
||||
(syntax (lambda (x) x)))))
|
||||
starts end-non-terms))
|
||||
(prods
|
||||
`(,@start-prods
|
||||
,@(map
|
||||
(lambda (end-nt start-sym)
|
||||
(map
|
||||
(lambda (end)
|
||||
(make-prod end-nt
|
||||
(vector
|
||||
(hash-table-get non-term-table start-sym)
|
||||
(hash-table-get term-table end))
|
||||
#f
|
||||
#f
|
||||
(syntax (lambda (x) x))))
|
||||
end-terms))
|
||||
end-non-terms start-syms)
|
||||
,@parsed-prods)))
|
||||
|
||||
(make-object grammar%
|
||||
prods
|
||||
(map car start-prods)
|
||||
terms
|
||||
(append starts (append end-non-terms non-terms))
|
||||
(map (lambda (term-name)
|
||||
(hash-table-get term-table term-name))
|
||||
end-terms)))))))
|
@ -0,0 +1,277 @@
|
||||
(module lalr mzscheme
|
||||
|
||||
;; Compute LALR lookaheads from DeRemer and Pennello 1982
|
||||
|
||||
(require "lr0.rkt"
|
||||
"grammar.rkt"
|
||||
mzlib/list
|
||||
mzlib/class)
|
||||
|
||||
(provide compute-LA)
|
||||
|
||||
;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set)
|
||||
;; computes for each state, non-term transition pair, the terminals
|
||||
;; which can transition out of the resulting state
|
||||
;; output term set is represented in bit-vector form
|
||||
(define (compute-DR a g)
|
||||
(lambda (tk)
|
||||
(let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk))))
|
||||
(term-list->bit-vector
|
||||
(filter
|
||||
(lambda (term)
|
||||
(send a run-automaton r term))
|
||||
(send g get-terms))))))
|
||||
|
||||
;; compute-reads:
|
||||
;; LR0-automaton * grammar -> (trans-key -> trans-key list)
|
||||
(define (compute-reads a g)
|
||||
(let ((nullable-non-terms
|
||||
(filter (lambda (nt) (send g nullable-non-term? nt))
|
||||
(send g get-non-terms))))
|
||||
(lambda (tk)
|
||||
(let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk))))
|
||||
(map (lambda (x) (make-trans-key r x))
|
||||
(filter (lambda (non-term) (send a run-automaton r non-term))
|
||||
nullable-non-terms))))))
|
||||
|
||||
;; compute-read: LR0-automaton * grammar -> (trans-key -> term set)
|
||||
;; output term set is represented in bit-vector form
|
||||
(define (compute-read a g)
|
||||
(let* ((dr (compute-DR a g))
|
||||
(reads (compute-reads a g)))
|
||||
(digraph-tk->terml (send a get-mapped-non-term-keys)
|
||||
reads
|
||||
dr
|
||||
(send a get-num-states))))
|
||||
;; returns the list of all k such that state k transitions to state start on the
|
||||
;; transitions in rhs (in order)
|
||||
(define (run-lr0-backward a rhs dot-pos start num-states)
|
||||
(let loop ((states (list start))
|
||||
(i (sub1 dot-pos)))
|
||||
(cond
|
||||
((< i 0) states)
|
||||
(else (loop (send a run-automaton-back states (vector-ref rhs i))
|
||||
(sub1 i))))))
|
||||
|
||||
;; prod->items-for-include: grammar * prod * non-term -> lr0-item list
|
||||
;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma)
|
||||
;; and gamma =>* epsilon
|
||||
(define (prod->items-for-include g prod nt)
|
||||
(let* ((rhs (prod-rhs prod))
|
||||
(rhs-l (vector-length rhs)))
|
||||
(append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l))))
|
||||
(list (make-item prod (sub1 rhs-l)))
|
||||
null)
|
||||
(let loop ((i (sub1 rhs-l)))
|
||||
(cond
|
||||
((and (> i 0)
|
||||
(non-term? (vector-ref rhs i))
|
||||
(send g nullable-non-term? (vector-ref rhs i)))
|
||||
(if (eq? nt (vector-ref rhs (sub1 i)))
|
||||
(cons (make-item prod (sub1 i))
|
||||
(loop (sub1 i)))
|
||||
(loop (sub1 i))))
|
||||
(else null))))))
|
||||
|
||||
;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list
|
||||
;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list
|
||||
;; and gamma =>* epsilon
|
||||
(define (prod-list->items-for-include g prod-list nt)
|
||||
(apply append (map (lambda (prod) (prod->items-for-include g prod nt)) prod-list)))
|
||||
|
||||
;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list)
|
||||
(define (compute-includes a g)
|
||||
(let ((num-states (send a get-num-states))
|
||||
(items-for-input-nt (make-vector (send g get-num-non-terms) null)))
|
||||
(for-each
|
||||
(lambda (input-nt)
|
||||
(vector-set! items-for-input-nt (non-term-index input-nt)
|
||||
(prod-list->items-for-include g (send g get-prods) input-nt)))
|
||||
(send g get-non-terms))
|
||||
(lambda (tk)
|
||||
(let* ((goal-state (trans-key-st tk))
|
||||
(non-term (trans-key-gs tk))
|
||||
(items (vector-ref items-for-input-nt (non-term-index non-term))))
|
||||
(trans-key-list-remove-dups
|
||||
(apply append
|
||||
(map (lambda (item)
|
||||
(let* ((prod (item-prod item))
|
||||
(rhs (prod-rhs prod))
|
||||
(lhs (prod-lhs prod)))
|
||||
(map (lambda (state)
|
||||
(make-trans-key state lhs))
|
||||
(run-lr0-backward a
|
||||
rhs
|
||||
(item-dot-pos item)
|
||||
goal-state
|
||||
num-states))))
|
||||
items)))))))
|
||||
|
||||
;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list)
|
||||
(define (compute-lookback a g)
|
||||
(let ((num-states (send a get-num-states)))
|
||||
(lambda (state prod)
|
||||
(map (lambda (k) (make-trans-key k (prod-lhs prod)))
|
||||
(run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states)))))
|
||||
|
||||
;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set)
|
||||
;; output term set is represented in bit-vector form
|
||||
(define (compute-follow a g includes)
|
||||
(let ((read (compute-read a g)))
|
||||
(digraph-tk->terml (send a get-mapped-non-term-keys)
|
||||
includes
|
||||
read
|
||||
(send a get-num-states))))
|
||||
|
||||
;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set
|
||||
;; output term set is represented in bit-vector form
|
||||
(define (compute-LA a g)
|
||||
(let* ((includes (compute-includes a g))
|
||||
(lookback (compute-lookback a g))
|
||||
(follow (compute-follow a g includes)))
|
||||
(lambda (k p)
|
||||
(let* ((l (lookback k p))
|
||||
(f (map follow l)))
|
||||
(apply bitwise-ior (cons 0 f))))))
|
||||
|
||||
(define (print-DR dr a g)
|
||||
(print-input-st-sym dr "DR" a g print-output-terms))
|
||||
(define (print-Read Read a g)
|
||||
(print-input-st-sym Read "Read" a g print-output-terms))
|
||||
(define (print-includes i a g)
|
||||
(print-input-st-sym i "includes" a g print-output-st-nt))
|
||||
(define (print-lookback l a g)
|
||||
(print-input-st-prod l "lookback" a g print-output-st-nt))
|
||||
(define (print-follow f a g)
|
||||
(print-input-st-sym f "follow" a g print-output-terms))
|
||||
(define (print-LA l a g)
|
||||
(print-input-st-prod l "LA" a g print-output-terms))
|
||||
|
||||
(define (print-input-st-sym f name a g print-output)
|
||||
(printf "~a:\n" name)
|
||||
(send a for-each-state
|
||||
(lambda (state)
|
||||
(for-each
|
||||
(lambda (non-term)
|
||||
(let ((res (f (make-trans-key state non-term))))
|
||||
(if (not (null? res))
|
||||
(printf "~a(~a, ~a) = ~a\n"
|
||||
name
|
||||
state
|
||||
(gram-sym-symbol non-term)
|
||||
(print-output res)))))
|
||||
(send g get-non-terms))))
|
||||
(newline))
|
||||
|
||||
(define (print-input-st-prod f name a g print-output)
|
||||
(printf "~a:\n" name)
|
||||
(send a for-each-state
|
||||
(lambda (state)
|
||||
(for-each
|
||||
(lambda (non-term)
|
||||
(for-each
|
||||
(lambda (prod)
|
||||
(let ((res (f state prod)))
|
||||
(if (not (null? res))
|
||||
(printf "~a(~a, ~a) = ~a\n"
|
||||
name
|
||||
(kernel-index state)
|
||||
(prod-index prod)
|
||||
(print-output res)))))
|
||||
(send g get-prods-for-non-term non-term)))
|
||||
(send g get-non-terms)))))
|
||||
|
||||
(define (print-output-terms r)
|
||||
(map
|
||||
(lambda (p)
|
||||
(gram-sym-symbol p))
|
||||
r))
|
||||
|
||||
(define (print-output-st-nt r)
|
||||
(map
|
||||
(lambda (p)
|
||||
(list
|
||||
(kernel-index (trans-key-st p))
|
||||
(gram-sym-symbol (trans-key-gs p))))
|
||||
r))
|
||||
|
||||
;; init-tk-map : int -> (vectorof hashtable?)
|
||||
(define (init-tk-map n)
|
||||
(let ((v (make-vector n #f)))
|
||||
(let loop ((i (sub1 (vector-length v))))
|
||||
(when (>= i 0)
|
||||
(vector-set! v i (make-hash-table))
|
||||
(loop (sub1 i))))
|
||||
v))
|
||||
|
||||
;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int
|
||||
(define (lookup-tk-map map)
|
||||
(lambda (tk)
|
||||
(let ((st (trans-key-st tk))
|
||||
(gs (trans-key-gs tk)))
|
||||
(hash-table-get (vector-ref map (kernel-index st))
|
||||
(gram-sym-symbol gs)
|
||||
(lambda () 0)))))
|
||||
|
||||
;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int ->
|
||||
(define (add-tk-map map)
|
||||
(lambda (tk v)
|
||||
(let ((st (trans-key-st tk))
|
||||
(gs (trans-key-gs tk)))
|
||||
(hash-table-put! (vector-ref map (kernel-index st))
|
||||
(gram-sym-symbol gs)
|
||||
v))))
|
||||
|
||||
;; digraph-tk->terml:
|
||||
;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int
|
||||
;; -> (trans-key -> term list)
|
||||
;; DeRemer and Pennello 1982
|
||||
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
|
||||
;; A specialization of digraph in the file graph.rkt
|
||||
(define (digraph-tk->terml nodes edges f- num-states)
|
||||
(letrec [
|
||||
;; Will map elements of trans-key to term sets represented as bit vectors
|
||||
(results (init-tk-map num-states))
|
||||
|
||||
;; Maps elements of trans-keys to integers.
|
||||
(N (init-tk-map num-states))
|
||||
|
||||
(get-N (lookup-tk-map N))
|
||||
(set-N (add-tk-map N))
|
||||
(get-f (lookup-tk-map results))
|
||||
(set-f (add-tk-map results))
|
||||
|
||||
(stack null)
|
||||
(push (lambda (x)
|
||||
(set! stack (cons x stack))))
|
||||
(pop (lambda ()
|
||||
(begin0
|
||||
(car stack)
|
||||
(set! stack (cdr stack)))))
|
||||
(depth (lambda () (length stack)))
|
||||
|
||||
;; traverse: 'a ->
|
||||
(traverse
|
||||
(lambda (x)
|
||||
(push x)
|
||||
(let ((d (depth)))
|
||||
(set-N x d)
|
||||
(set-f x (f- x))
|
||||
(for-each (lambda (y)
|
||||
(when (= 0 (get-N y))
|
||||
(traverse y))
|
||||
(set-f x (bitwise-ior (get-f x) (get-f y)))
|
||||
(set-N x (min (get-N x) (get-N y))))
|
||||
(edges x))
|
||||
(when (= d (get-N x))
|
||||
(let loop ((p (pop)))
|
||||
(set-N p +inf.0)
|
||||
(set-f p (get-f x))
|
||||
(unless (equal? x p)
|
||||
(loop (pop))))))))]
|
||||
(for-each (lambda (x)
|
||||
(when (= 0 (get-N x))
|
||||
(traverse x)))
|
||||
nodes)
|
||||
get-f))
|
||||
)
|
@ -0,0 +1,372 @@
|
||||
(module lr0 mzscheme
|
||||
|
||||
;; Handle the LR0 automaton
|
||||
|
||||
(require "grammar.rkt"
|
||||
"graph.rkt"
|
||||
mzlib/list
|
||||
mzlib/class)
|
||||
|
||||
(provide build-lr0-automaton lr0%
|
||||
(struct trans-key (st gs)) trans-key-list-remove-dups
|
||||
kernel-items kernel-index)
|
||||
|
||||
;; kernel = (make-kernel (LR1-item list) index)
|
||||
;; the list must be kept sorted according to item<? so that equal? can
|
||||
;; be used to compare kernels
|
||||
;; Each kernel is assigned a unique index, 0 <= index < number of states
|
||||
;; trans-key = (make-trans-key kernel gram-sym)
|
||||
(define-struct kernel (items index) (make-inspector))
|
||||
(define-struct trans-key (st gs) (make-inspector))
|
||||
|
||||
(define (trans-key<? a b)
|
||||
(let ((kia (kernel-index (trans-key-st a)))
|
||||
(kib (kernel-index (trans-key-st b))))
|
||||
(or (< kia kib)
|
||||
(and (= kia kib)
|
||||
(< (non-term-index (trans-key-gs a))
|
||||
(non-term-index (trans-key-gs b)))))))
|
||||
|
||||
(define (trans-key-list-remove-dups tkl)
|
||||
(let loop ((sorted (sort tkl trans-key<?)))
|
||||
(cond
|
||||
((null? sorted) null)
|
||||
((null? (cdr sorted)) sorted)
|
||||
(else
|
||||
(if (and (= (non-term-index (trans-key-gs (car sorted)))
|
||||
(non-term-index (trans-key-gs (cadr sorted))))
|
||||
(= (kernel-index (trans-key-st (car sorted)))
|
||||
(kernel-index (trans-key-st (cadr sorted)))))
|
||||
(loop (cdr sorted))
|
||||
(cons (car sorted) (loop (cdr sorted))))))))
|
||||
|
||||
|
||||
;; build-transition-table : int (listof (cons/c trans-key X) ->
|
||||
;; (vectorof (symbol X hashtable))
|
||||
(define (build-transition-table num-states assoc)
|
||||
(let ((transitions (make-vector num-states #f)))
|
||||
(let loop ((i (sub1 (vector-length transitions))))
|
||||
(when (>= i 0)
|
||||
(vector-set! transitions i (make-hash-table))
|
||||
(loop (sub1 i))))
|
||||
(for-each
|
||||
(lambda (trans-key/kernel)
|
||||
(let ((tk (car trans-key/kernel)))
|
||||
(hash-table-put! (vector-ref transitions (kernel-index (trans-key-st tk)))
|
||||
(gram-sym-symbol (trans-key-gs tk))
|
||||
(cdr trans-key/kernel))))
|
||||
assoc)
|
||||
transitions))
|
||||
|
||||
;; reverse-assoc : (listof (cons/c trans-key? kernel?)) ->
|
||||
;; (listof (cons/c trans-key? (listof kernel?)))
|
||||
(define (reverse-assoc assoc)
|
||||
(let ((reverse-hash (make-hash-table 'equal))
|
||||
(hash-table-add!
|
||||
(lambda (ht k v)
|
||||
(hash-table-put! ht k (cons v (hash-table-get ht k (lambda () null)))))))
|
||||
(for-each
|
||||
(lambda (trans-key/kernel)
|
||||
(let ((tk (car trans-key/kernel)))
|
||||
(hash-table-add! reverse-hash
|
||||
(make-trans-key (cdr trans-key/kernel)
|
||||
(trans-key-gs tk))
|
||||
(trans-key-st tk))))
|
||||
assoc)
|
||||
(hash-table-map reverse-hash cons)))
|
||||
|
||||
|
||||
;; kernel-list-remove-duplicates
|
||||
;; LR0-automaton = object of class lr0%
|
||||
(define lr0%
|
||||
(class object%
|
||||
(super-instantiate ())
|
||||
;; term-assoc : (listof (cons/c trans-key? kernel?))
|
||||
;; non-term-assoc : (listof (cons/c trans-key? kernel?))
|
||||
;; states : (vectorof kernel?)
|
||||
;; epsilons : ???
|
||||
(init-field term-assoc non-term-assoc states epsilons)
|
||||
|
||||
(define transitions (build-transition-table (vector-length states)
|
||||
(append term-assoc non-term-assoc)))
|
||||
|
||||
(define reverse-term-assoc (reverse-assoc term-assoc))
|
||||
(define reverse-non-term-assoc (reverse-assoc non-term-assoc))
|
||||
(define reverse-transitions
|
||||
(build-transition-table (vector-length states)
|
||||
(append reverse-term-assoc reverse-non-term-assoc)))
|
||||
|
||||
(define mapped-non-terms (map car non-term-assoc))
|
||||
|
||||
(define/public (get-mapped-non-term-keys)
|
||||
mapped-non-terms)
|
||||
|
||||
(define/public (get-num-states)
|
||||
(vector-length states))
|
||||
|
||||
(define/public (get-epsilon-trans)
|
||||
epsilons)
|
||||
|
||||
(define/public (get-transitions)
|
||||
(append term-assoc non-term-assoc))
|
||||
|
||||
;; for-each-state : (state ->) ->
|
||||
;; Iteration over the states in an automaton
|
||||
(define/public (for-each-state f)
|
||||
(let ((num-states (vector-length states)))
|
||||
(let loop ((i 0))
|
||||
(if (< i num-states)
|
||||
(begin
|
||||
(f (vector-ref states i))
|
||||
(loop (add1 i)))))))
|
||||
|
||||
;; run-automaton: kernel? gram-sym? -> (union kernel #f)
|
||||
;; returns the state reached from state k on input s, or #f when k
|
||||
;; has no transition on s
|
||||
(define/public (run-automaton k s)
|
||||
(hash-table-get (vector-ref transitions (kernel-index k))
|
||||
(gram-sym-symbol s)
|
||||
(lambda () #f)))
|
||||
|
||||
;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel)
|
||||
;; returns the list of states that can reach k by transitioning on s.
|
||||
(define/public (run-automaton-back k s)
|
||||
(apply append
|
||||
(map
|
||||
(lambda (k)
|
||||
(hash-table-get (vector-ref reverse-transitions (kernel-index k))
|
||||
(gram-sym-symbol s)
|
||||
(lambda () null)))
|
||||
k)))))
|
||||
|
||||
(define (union comp<?)
|
||||
(letrec ((union
|
||||
(lambda (l1 l2)
|
||||
(cond
|
||||
((null? l1) l2)
|
||||
((null? l2) l1)
|
||||
(else (let ((c1 (car l1))
|
||||
(c2 (car l2)))
|
||||
(cond
|
||||
((comp<? c1 c2)
|
||||
(cons c1 (union (cdr l1) l2)))
|
||||
((comp<? c2 c1)
|
||||
(cons c2 (union l1 (cdr l2))))
|
||||
(else (union (cdr l1) l2)))))))))
|
||||
union))
|
||||
|
||||
|
||||
;; The kernels in the automaton are represented cannonically.
|
||||
;; That is (equal? a b) <=> (eq? a b)
|
||||
(define (kernel->string k)
|
||||
(apply string-append
|
||||
`("{" ,@(map (lambda (i) (string-append (item->string i) ", "))
|
||||
(kernel-items k))
|
||||
"}")))
|
||||
|
||||
;; build-LR0-automaton: grammar -> LR0-automaton
|
||||
;; Constructs the kernels of the sets of LR(0) items of g
|
||||
(define (build-lr0-automaton grammar)
|
||||
; (printf "LR(0) automaton:\n")
|
||||
(letrec (
|
||||
(epsilons (make-hash-table 'equal))
|
||||
(grammar-symbols (append (send grammar get-non-terms)
|
||||
(send grammar get-terms)))
|
||||
;; first-non-term: non-term -> non-term list
|
||||
;; given a non-terminal symbol C, return those non-terminal
|
||||
;; symbols A s.t. C -> An for some string of terminals and
|
||||
;; non-terminals n where -> means a rightmost derivation in many
|
||||
;; steps. Assumes that each non-term can be reduced to a string
|
||||
;; of terms.
|
||||
(first-non-term
|
||||
(digraph (send grammar get-non-terms)
|
||||
(lambda (nt)
|
||||
(filter non-term?
|
||||
(map (lambda (prod)
|
||||
(sym-at-dot (make-item prod 0)))
|
||||
(send grammar get-prods-for-non-term nt))))
|
||||
(lambda (nt) (list nt))
|
||||
(union non-term<?)
|
||||
(lambda () null)))
|
||||
|
||||
;; closure: LR1-item list -> LR1-item list
|
||||
;; Creates a set of items containing i s.t. if A -> n.Xm is in it,
|
||||
;; X -> .o is in it too.
|
||||
(LR0-closure
|
||||
(lambda (i)
|
||||
(cond
|
||||
((null? i) null)
|
||||
(else
|
||||
(let ((next-gsym (sym-at-dot (car i))))
|
||||
(cond
|
||||
((non-term? next-gsym)
|
||||
(cons (car i)
|
||||
(append
|
||||
(apply append
|
||||
(map (lambda (non-term)
|
||||
(map (lambda (x)
|
||||
(make-item x 0))
|
||||
(send grammar
|
||||
get-prods-for-non-term
|
||||
non-term)))
|
||||
(first-non-term next-gsym)))
|
||||
(LR0-closure (cdr i)))))
|
||||
(else
|
||||
(cons (car i) (LR0-closure (cdr i))))))))))
|
||||
|
||||
|
||||
;; maps trans-keys to kernels
|
||||
(automaton-term null)
|
||||
(automaton-non-term null)
|
||||
|
||||
;; keeps the kernels we have seen, so we can have a unique
|
||||
;; list for each kernel
|
||||
(kernels (make-hash-table 'equal))
|
||||
|
||||
(counter 0)
|
||||
|
||||
;; goto: LR1-item list -> LR1-item list list
|
||||
;; creates new kernels by moving the dot in each item in the
|
||||
;; LR0-closure of kernel to the right, and grouping them by
|
||||
;; the term/non-term moved over. Returns the kernels not
|
||||
;; yet seen, and places the trans-keys into automaton
|
||||
(goto
|
||||
(lambda (kernel)
|
||||
(let (
|
||||
;; maps a gram-syms to a list of items
|
||||
(table (make-hash-table))
|
||||
|
||||
;; add-item!:
|
||||
;; (symbol (listof item) hashtable) item? ->
|
||||
;; adds i into the table grouped with the grammar
|
||||
;; symbol following its dot
|
||||
(add-item!
|
||||
(lambda (table i)
|
||||
(let ((gs (sym-at-dot i)))
|
||||
(cond
|
||||
(gs
|
||||
(let ((already
|
||||
(hash-table-get table
|
||||
(gram-sym-symbol gs)
|
||||
(lambda () null))))
|
||||
(unless (member i already)
|
||||
(hash-table-put! table
|
||||
(gram-sym-symbol gs)
|
||||
(cons i already)))))
|
||||
((= 0 (vector-length (prod-rhs (item-prod i))))
|
||||
(let ((current (hash-table-get epsilons
|
||||
kernel
|
||||
(lambda () null))))
|
||||
(hash-table-put! epsilons
|
||||
kernel
|
||||
(cons i current)))))))))
|
||||
|
||||
;; Group the items of the LR0 closure of the kernel
|
||||
;; by the character after the dot
|
||||
(for-each (lambda (item)
|
||||
(add-item! table item))
|
||||
(LR0-closure (kernel-items kernel)))
|
||||
|
||||
;; each group is a new kernel, with the dot advanced.
|
||||
;; sorts the items in a kernel so kernels can be compared
|
||||
;; with equal? for using the table kernels to make sure
|
||||
;; only one representitive of each kernel is created
|
||||
(filter
|
||||
(lambda (x) x)
|
||||
(map
|
||||
(lambda (i)
|
||||
(let* ((gs (car i))
|
||||
(items (cadr i))
|
||||
(new #f)
|
||||
(new-kernel (sort
|
||||
(filter (lambda (x) x)
|
||||
(map move-dot-right items))
|
||||
item<?))
|
||||
(unique-kernel (hash-table-get
|
||||
kernels
|
||||
new-kernel
|
||||
(lambda ()
|
||||
(let ((k (make-kernel
|
||||
new-kernel
|
||||
counter)))
|
||||
(set! new #t)
|
||||
(set! counter (add1 counter))
|
||||
(hash-table-put! kernels
|
||||
new-kernel
|
||||
k)
|
||||
k)))))
|
||||
(cond
|
||||
((term? gs)
|
||||
(set! automaton-term (cons (cons (make-trans-key kernel gs)
|
||||
unique-kernel)
|
||||
automaton-term)))
|
||||
(else
|
||||
(set! automaton-non-term (cons (cons (make-trans-key kernel gs)
|
||||
unique-kernel)
|
||||
automaton-non-term))))
|
||||
#;(printf "~a -> ~a on ~a\n"
|
||||
(kernel->string kernel)
|
||||
(kernel->string unique-kernel)
|
||||
(gram-sym-symbol gs))
|
||||
(if new
|
||||
unique-kernel
|
||||
#f)))
|
||||
(let loop ((gsyms grammar-symbols))
|
||||
(cond
|
||||
((null? gsyms) null)
|
||||
(else
|
||||
(let ((items (hash-table-get table
|
||||
(gram-sym-symbol (car gsyms))
|
||||
(lambda () null))))
|
||||
(cond
|
||||
((null? items) (loop (cdr gsyms)))
|
||||
(else
|
||||
(cons (list (car gsyms) items)
|
||||
(loop (cdr gsyms))))))))))))))
|
||||
|
||||
(starts
|
||||
(map (lambda (init-prod) (list (make-item init-prod 0)))
|
||||
(send grammar get-init-prods)))
|
||||
(startk
|
||||
(map (lambda (start)
|
||||
(let ((k (make-kernel start counter)))
|
||||
(hash-table-put! kernels start k)
|
||||
(set! counter (add1 counter))
|
||||
k))
|
||||
starts))
|
||||
(new-kernels (make-queue)))
|
||||
|
||||
(let loop ((old-kernels startk)
|
||||
(seen-kernels null))
|
||||
(cond
|
||||
((and (empty-queue? new-kernels) (null? old-kernels))
|
||||
(make-object lr0%
|
||||
automaton-term
|
||||
automaton-non-term
|
||||
(list->vector (reverse seen-kernels))
|
||||
epsilons))
|
||||
((null? old-kernels)
|
||||
(loop (deq! new-kernels) seen-kernels))
|
||||
(else
|
||||
(enq! new-kernels (goto (car old-kernels)))
|
||||
(loop (cdr old-kernels) (cons (car old-kernels) seen-kernels)))))))
|
||||
|
||||
(define-struct q (f l) (make-inspector))
|
||||
(define (empty-queue? q)
|
||||
(null? (q-f q)))
|
||||
(define (make-queue)
|
||||
(make-q null null))
|
||||
(define (enq! q i)
|
||||
(if (empty-queue? q)
|
||||
(let ((i (mcons i null)))
|
||||
(set-q-l! q i)
|
||||
(set-q-f! q i))
|
||||
(begin
|
||||
(set-mcdr! (q-l q) (mcons i null))
|
||||
(set-q-l! q (mcdr (q-l q))))))
|
||||
(define (deq! q)
|
||||
(begin0
|
||||
(mcar (q-f q))
|
||||
(set-q-f! q (mcdr (q-f q)))))
|
||||
|
||||
)
|
@ -0,0 +1,54 @@
|
||||
(module parser-actions mzscheme
|
||||
(require "grammar.rkt")
|
||||
(provide (all-defined-except make-reduce make-reduce*)
|
||||
(rename make-reduce* make-reduce))
|
||||
|
||||
;; An action is
|
||||
;; - (make-shift int)
|
||||
;; - (make-reduce prod runtime-action)
|
||||
;; - (make-accept)
|
||||
;; - (make-goto int)
|
||||
;; - (no-action)
|
||||
;; A reduce contains a runtime-reduce so that sharing of the reduces can
|
||||
;; be easily transferred to sharing of runtime-reduces.
|
||||
|
||||
(define-struct action () (make-inspector))
|
||||
(define-struct (shift action) (state) (make-inspector))
|
||||
(define-struct (reduce action) (prod runtime-reduce) (make-inspector))
|
||||
(define-struct (accept action) () (make-inspector))
|
||||
(define-struct (goto action) (state) (make-inspector))
|
||||
(define-struct (no-action action) () (make-inspector))
|
||||
|
||||
(define (make-reduce* p)
|
||||
(make-reduce p
|
||||
(vector (prod-index p)
|
||||
(gram-sym-symbol (prod-lhs p))
|
||||
(vector-length (prod-rhs p)))))
|
||||
|
||||
;; A runtime-action is
|
||||
;; non-negative-int (shift)
|
||||
;; (vector int symbol int) (reduce)
|
||||
;; 'accept (accept)
|
||||
;; negative-int (goto)
|
||||
;; #f (no-action)
|
||||
|
||||
(define (action->runtime-action a)
|
||||
(cond
|
||||
((shift? a) (shift-state a))
|
||||
((reduce? a) (reduce-runtime-reduce a))
|
||||
((accept? a) 'accept)
|
||||
((goto? a) (- (+ (goto-state a) 1)))
|
||||
((no-action? a) #f)))
|
||||
|
||||
(define (runtime-shift? x) (and (integer? x) (>= x 0)))
|
||||
(define runtime-reduce? vector?)
|
||||
(define (runtime-accept? x) (eq? x 'accept))
|
||||
(define (runtime-goto? x) (and (integer? x) (< x 0)))
|
||||
|
||||
(define runtime-shift-state values)
|
||||
(define (runtime-reduce-prod-num x) (vector-ref x 0))
|
||||
(define (runtime-reduce-lhs x) (vector-ref x 1))
|
||||
(define (runtime-reduce-rhs-length x) (vector-ref x 2))
|
||||
(define (runtime-goto-state x) (- (+ x 1)))
|
||||
|
||||
)
|
@ -0,0 +1,113 @@
|
||||
(module parser-builder mzscheme
|
||||
|
||||
(require "input-file-parser.rkt"
|
||||
"grammar.rkt"
|
||||
"table.rkt"
|
||||
mzlib/class
|
||||
racket/contract)
|
||||
(require-for-template mzscheme)
|
||||
|
||||
(provide/contract
|
||||
(build-parser (-> string? any/c any/c
|
||||
(listof identifier?)
|
||||
(listof identifier?)
|
||||
(listof identifier?)
|
||||
(or/c syntax? #f)
|
||||
syntax?
|
||||
(values any/c any/c any/c any/c))))
|
||||
|
||||
;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?)
|
||||
;; (union syntax? false/c) syntax?) -> syntax?
|
||||
(define (fix-check-syntax input-terms start ends assocs prods)
|
||||
(let* ((term-binders (get-term-list input-terms))
|
||||
(get-term-binder
|
||||
(let ((t (make-hash-table)))
|
||||
(for-each
|
||||
(lambda (term)
|
||||
(hash-table-put! t (syntax-e term) term))
|
||||
term-binders)
|
||||
(lambda (x)
|
||||
(let ((r (hash-table-get t (syntax-e x) (lambda () #f))))
|
||||
(if r
|
||||
(syntax-local-introduce (datum->syntax-object r (syntax-e x) x x))
|
||||
x)))))
|
||||
(rhs-list
|
||||
(syntax-case prods ()
|
||||
(((_ rhs ...) ...)
|
||||
(syntax->list (syntax (rhs ... ...)))))))
|
||||
(with-syntax (((tmp ...) (map syntax-local-introduce term-binders))
|
||||
((term-group ...)
|
||||
(map (lambda (tg)
|
||||
(syntax-property
|
||||
(datum->syntax-object tg #f)
|
||||
'disappeared-use
|
||||
tg))
|
||||
input-terms))
|
||||
((end ...)
|
||||
(map get-term-binder ends))
|
||||
((start ...)
|
||||
(map get-term-binder start))
|
||||
((bind ...)
|
||||
(syntax-case prods ()
|
||||
(((bind _ ...) ...)
|
||||
(syntax->list (syntax (bind ...))))))
|
||||
(((bound ...) ...)
|
||||
(map
|
||||
(lambda (rhs)
|
||||
(syntax-case rhs ()
|
||||
(((bound ...) (_ pbound) __)
|
||||
(map get-term-binder
|
||||
(cons (syntax pbound)
|
||||
(syntax->list (syntax (bound ...))))))
|
||||
(((bound ...) _)
|
||||
(map get-term-binder
|
||||
(syntax->list (syntax (bound ...)))))))
|
||||
rhs-list))
|
||||
((prec ...)
|
||||
(if assocs
|
||||
(map get-term-binder
|
||||
(syntax-case assocs ()
|
||||
(((__ term ...) ...)
|
||||
(syntax->list (syntax (term ... ...))))))
|
||||
null)))
|
||||
#`(when #f
|
||||
(let ((bind void) ... (tmp void) ...)
|
||||
(void bound ... ... term-group ... start ... end ... prec ...))))))
|
||||
(require mzlib/list "parser-actions.rkt")
|
||||
(define (build-parser filename src-pos suppress input-terms start end assocs prods)
|
||||
(let* ((grammar (parse-input input-terms start end assocs prods src-pos))
|
||||
(table (build-table grammar filename suppress))
|
||||
(all-tokens (make-hash-table))
|
||||
(actions-code
|
||||
`(vector ,@(map prod-action (send grammar get-prods)))))
|
||||
(for-each (lambda (term)
|
||||
(hash-table-put! all-tokens (gram-sym-symbol term) #t))
|
||||
(send grammar get-terms))
|
||||
#;(let ((num-states (vector-length table))
|
||||
(num-gram-syms (+ (send grammar get-num-terms)
|
||||
(send grammar get-num-non-terms)))
|
||||
(num-ht-entries (apply + (map length (vector->list table))))
|
||||
(num-reduces
|
||||
(let ((ht (make-hash-table)))
|
||||
(for-each
|
||||
(lambda (x)
|
||||
(when (reduce? x)
|
||||
(hash-table-put! ht x #t)))
|
||||
(map cdr (apply append (vector->list table))))
|
||||
(length (hash-table-map ht void)))))
|
||||
(printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n"
|
||||
num-states num-gram-syms num-ht-entries num-reduces)
|
||||
(printf "~a -- ~aKB, previously ~aKB\n"
|
||||
(/ (+ 2 num-states
|
||||
(* 4 num-states) (* 2 1.5 num-ht-entries)
|
||||
(* 5 num-reduces)) 256.0)
|
||||
(/ (+ 2 num-states
|
||||
(* 4 num-states) (* 2 2.3 num-ht-entries)
|
||||
(* 5 num-reduces)) 256.0)
|
||||
(/ (+ 2 (* num-states num-gram-syms) (* 5 num-reduces)) 256.0)))
|
||||
(values table
|
||||
all-tokens
|
||||
actions-code
|
||||
(fix-check-syntax input-terms start end assocs prods))))
|
||||
|
||||
)
|
@ -0,0 +1,290 @@
|
||||
#lang scheme/base
|
||||
|
||||
;; Routine to build the LALR table
|
||||
|
||||
(require "grammar.rkt"
|
||||
"lr0.rkt"
|
||||
"lalr.rkt"
|
||||
"parser-actions.rkt"
|
||||
racket/contract
|
||||
mzlib/list
|
||||
mzlib/class)
|
||||
|
||||
(define (is-a-grammar%? x) (is-a? x grammar%))
|
||||
(provide/contract
|
||||
(build-table (-> is-a-grammar%? string? any/c
|
||||
(vectorof (listof (cons/c (or/c term? non-term?) action?))))))
|
||||
|
||||
;; A parse-table is (vectorof (listof (cons/c gram-sym? action)))
|
||||
;; A grouped-parse-table is (vectorof (listof (cons/c gram-sym? (listof action))))
|
||||
|
||||
;; make-parse-table : int -> parse-table
|
||||
(define (make-parse-table num-states)
|
||||
(make-vector num-states null))
|
||||
|
||||
;; table-add!: parse-table nat symbol action ->
|
||||
(define (table-add! table state-index symbol val)
|
||||
(vector-set! table state-index (cons (cons symbol val)
|
||||
(vector-ref table state-index))))
|
||||
|
||||
;; group-table : parse-table -> grouped-parse-table
|
||||
(define (group-table table)
|
||||
(list->vector
|
||||
(map
|
||||
(lambda (state-entry)
|
||||
(let ((ht (make-hash)))
|
||||
(for-each
|
||||
(lambda (gs/actions)
|
||||
(let ((group (hash-ref ht (car gs/actions) (lambda () null))))
|
||||
(unless (member (cdr gs/actions) group)
|
||||
(hash-set! ht (car gs/actions) (cons (cdr gs/actions) group)))))
|
||||
state-entry)
|
||||
(hash-map ht cons)))
|
||||
(vector->list table))))
|
||||
|
||||
;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) ->
|
||||
;; (vectorof (listof (cons/c gram-sym? Y)))
|
||||
(define (table-map f table)
|
||||
(list->vector
|
||||
(map
|
||||
(lambda (state-entry)
|
||||
(map
|
||||
(lambda (gs/X)
|
||||
(cons (car gs/X) (f (car gs/X) (cdr gs/X))))
|
||||
state-entry))
|
||||
(vector->list table))))
|
||||
|
||||
|
||||
(define (bit-vector-for-each f bv)
|
||||
(letrec ((for-each
|
||||
(lambda (bv number)
|
||||
(cond
|
||||
((= 0 bv) (void))
|
||||
((= 1 (bitwise-and 1 bv))
|
||||
(f number)
|
||||
(for-each (arithmetic-shift bv -1) (add1 number)))
|
||||
(else (for-each (arithmetic-shift bv -1) (add1 number)))))))
|
||||
(for-each bv 0)))
|
||||
|
||||
|
||||
;; print-entry: symbol action output-port ->
|
||||
;; prints the action a for lookahead sym to the given port
|
||||
(define (print-entry sym a port)
|
||||
(let ((s "\t~a\t\t\t\t\t~a\t~a\n"))
|
||||
(cond
|
||||
((shift? a)
|
||||
(fprintf port s sym "shift" (shift-state a)))
|
||||
((reduce? a)
|
||||
(fprintf port s sym "reduce" (prod-index (reduce-prod a))))
|
||||
((accept? a)
|
||||
(fprintf port s sym "accept" ""))
|
||||
((goto? a)
|
||||
(fprintf port s sym "goto" (goto-state a))))))
|
||||
|
||||
|
||||
;; count: ('a -> bool) * 'a list -> num
|
||||
;; counts the number of elements in list that satisfy pred
|
||||
(define (count pred list)
|
||||
(cond
|
||||
((null? list) 0)
|
||||
((pred (car list)) (+ 1 (count pred (cdr list))))
|
||||
(else (count pred (cdr list)))))
|
||||
|
||||
;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port ->
|
||||
;; Prints out the parser given by table.
|
||||
(define (display-parser a grouped-table prods port)
|
||||
(let* ((SR-conflicts 0)
|
||||
(RR-conflicts 0))
|
||||
(for-each
|
||||
(lambda (prod)
|
||||
(fprintf port
|
||||
"~a\t~a\t=\t~a\n"
|
||||
(prod-index prod)
|
||||
(gram-sym-symbol (prod-lhs prod))
|
||||
(map gram-sym-symbol (vector->list (prod-rhs prod)))))
|
||||
prods)
|
||||
(send a for-each-state
|
||||
(lambda (state)
|
||||
(fprintf port "State ~a\n" (kernel-index state))
|
||||
(for-each (lambda (item)
|
||||
(fprintf port "\t~a\n" (item->string item)))
|
||||
(kernel-items state))
|
||||
(newline port)
|
||||
(for-each
|
||||
(lambda (gs/action)
|
||||
(let ((sym (gram-sym-symbol (car gs/action)))
|
||||
(act (cdr gs/action)))
|
||||
(cond
|
||||
((null? act) (void))
|
||||
((null? (cdr act))
|
||||
(print-entry sym (car act) port))
|
||||
(else
|
||||
(fprintf port "begin conflict:\n")
|
||||
(when (> (count reduce? act) 1)
|
||||
(set! RR-conflicts (add1 RR-conflicts)))
|
||||
(when (> (count shift? act) 0)
|
||||
(set! SR-conflicts (add1 SR-conflicts)))
|
||||
(map (lambda (x) (print-entry sym x port)) act)
|
||||
(fprintf port "end conflict\n")))))
|
||||
(vector-ref grouped-table (kernel-index state)))
|
||||
(newline port)))
|
||||
|
||||
(when (> SR-conflicts 0)
|
||||
(fprintf port "~a shift/reduce conflict~a\n"
|
||||
SR-conflicts
|
||||
(if (= SR-conflicts 1) "" "s")))
|
||||
(when (> RR-conflicts 0)
|
||||
(fprintf port "~a reduce/reduce conflict~a\n"
|
||||
RR-conflicts
|
||||
(if (= RR-conflicts 1) "" "s")))))
|
||||
|
||||
;; resolve-conflict : (listof action?) -> action? bool bool
|
||||
(define (resolve-conflict actions)
|
||||
(cond
|
||||
((null? actions) (values (make-no-action) #f #f))
|
||||
((null? (cdr actions))
|
||||
(values (car actions) #f #f))
|
||||
(else
|
||||
(let ((SR-conflict? (> (count shift? actions) 0))
|
||||
(RR-conflict? (> (count reduce? actions) 1)))
|
||||
(let loop ((current-guess #f)
|
||||
(rest actions))
|
||||
(cond
|
||||
((null? rest) (values current-guess SR-conflict? RR-conflict?))
|
||||
((shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?))
|
||||
((not current-guess)
|
||||
(loop (car rest) (cdr rest)))
|
||||
((and (reduce? (car rest))
|
||||
(< (prod-index (reduce-prod (car rest)))
|
||||
(prod-index (reduce-prod current-guess))))
|
||||
(loop (car rest) (cdr rest)))
|
||||
((accept? (car rest))
|
||||
(eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n")
|
||||
(loop current-guess (cdr rest)))
|
||||
(else (loop current-guess (cdr rest)))))))))
|
||||
|
||||
;; resolve-conflicts : grouped-parse-table bool -> parse-table
|
||||
(define (resolve-conflicts grouped-table suppress)
|
||||
(let* ((SR-conflicts 0)
|
||||
(RR-conflicts 0)
|
||||
(table (table-map
|
||||
(lambda (gs actions)
|
||||
(let-values (((action SR? RR?)
|
||||
(resolve-conflict actions)))
|
||||
(when SR?
|
||||
(set! SR-conflicts (add1 SR-conflicts)))
|
||||
(when RR?
|
||||
(set! RR-conflicts (add1 RR-conflicts)))
|
||||
action))
|
||||
grouped-table)))
|
||||
(unless suppress
|
||||
(when (> SR-conflicts 0)
|
||||
(eprintf "~a shift/reduce conflict~a\n"
|
||||
SR-conflicts
|
||||
(if (= SR-conflicts 1) "" "s")))
|
||||
(when (> RR-conflicts 0)
|
||||
(eprintf "~a reduce/reduce conflict~a\n"
|
||||
RR-conflicts
|
||||
(if (= RR-conflicts 1) "" "s"))))
|
||||
table))
|
||||
|
||||
|
||||
;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action)
|
||||
;; Resolves a single shift-reduce conflict, if precedences are in place.
|
||||
(define (resolve-sr-conflict/prec actions shift-prec)
|
||||
(let* ((shift (if (shift? (car actions))
|
||||
(car actions)
|
||||
(cadr actions)))
|
||||
(reduce (if (shift? (car actions))
|
||||
(cadr actions)
|
||||
(car actions)))
|
||||
(reduce-prec (prod-prec (reduce-prod reduce))))
|
||||
(cond
|
||||
((and shift-prec reduce-prec)
|
||||
(cond
|
||||
((< (prec-num shift-prec) (prec-num reduce-prec))
|
||||
(list reduce))
|
||||
((> (prec-num shift-prec) (prec-num reduce-prec))
|
||||
(list shift))
|
||||
((eq? 'left (prec-assoc shift-prec))
|
||||
(list reduce))
|
||||
((eq? 'right (prec-assoc shift-prec))
|
||||
(list shift))
|
||||
(else null)))
|
||||
(else actions))))
|
||||
|
||||
|
||||
;; resolve-prec-conflicts : parse-table -> grouped-parse-table
|
||||
(define (resolve-prec-conflicts table)
|
||||
(table-map
|
||||
(lambda (gs actions)
|
||||
(cond
|
||||
((and (term? gs)
|
||||
(= 2 (length actions))
|
||||
(or (shift? (car actions))
|
||||
(shift? (cadr actions))))
|
||||
(resolve-sr-conflict/prec actions (term-prec gs)))
|
||||
(else actions)))
|
||||
(group-table table)))
|
||||
|
||||
;; build-table: grammar string bool -> parse-table
|
||||
(define (build-table g file suppress)
|
||||
(let* ((a (build-lr0-automaton g))
|
||||
(term-vector (list->vector (send g get-terms)))
|
||||
(end-terms (send g get-end-terms))
|
||||
(table (make-parse-table (send a get-num-states)))
|
||||
(get-lookahead (compute-LA a g))
|
||||
(reduce-cache (make-hash)))
|
||||
|
||||
(for-each
|
||||
(lambda (trans-key/state)
|
||||
(let ((from-state-index (kernel-index (trans-key-st (car trans-key/state))))
|
||||
(gs (trans-key-gs (car trans-key/state)))
|
||||
(to-state (cdr trans-key/state)))
|
||||
(table-add! table from-state-index gs
|
||||
(cond
|
||||
((non-term? gs)
|
||||
(make-goto (kernel-index to-state)))
|
||||
((member gs end-terms)
|
||||
(make-accept))
|
||||
(else
|
||||
(make-shift
|
||||
(kernel-index to-state)))))))
|
||||
(send a get-transitions))
|
||||
|
||||
(send a for-each-state
|
||||
(lambda (state)
|
||||
(for-each
|
||||
(lambda (item)
|
||||
(let ((item-prod (item-prod item)))
|
||||
(bit-vector-for-each
|
||||
(lambda (term-index)
|
||||
(unless (start-item? item)
|
||||
(let ((r (hash-ref reduce-cache item-prod
|
||||
(lambda ()
|
||||
(let ((r (make-reduce item-prod)))
|
||||
(hash-set! reduce-cache item-prod r)
|
||||
r)))))
|
||||
(table-add! table
|
||||
(kernel-index state)
|
||||
(vector-ref term-vector term-index)
|
||||
r))))
|
||||
(get-lookahead state item-prod))))
|
||||
(append (hash-ref (send a get-epsilon-trans) state (lambda () null))
|
||||
(filter (lambda (item)
|
||||
(not (move-dot-right item)))
|
||||
(kernel-items state))))))
|
||||
|
||||
(let ((grouped-table (resolve-prec-conflicts table)))
|
||||
(unless (string=? file "")
|
||||
(with-handlers [(exn:fail:filesystem?
|
||||
(lambda (e)
|
||||
(eprintf
|
||||
"Cannot write debug output to file \"~a\": ~a\n"
|
||||
file
|
||||
(exn-message e))))]
|
||||
(call-with-output-file file
|
||||
(lambda (port)
|
||||
(display-parser a grouped-table (send g get-prods) port))
|
||||
#:exists 'truncate)))
|
||||
(resolve-conflicts grouped-table suppress))))
|
@ -0,0 +1,118 @@
|
||||
(module yacc-helper mzscheme
|
||||
|
||||
(require mzlib/list
|
||||
"../private-lex/token-syntax.rkt")
|
||||
|
||||
;; General helper routines
|
||||
|
||||
(provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc)
|
||||
|
||||
(define (vector-andmap f v)
|
||||
(let loop ((i 0))
|
||||
(cond
|
||||
((= i (vector-length v)) #t)
|
||||
(else (if (f (vector-ref v i))
|
||||
(loop (add1 i))
|
||||
#f)))))
|
||||
|
||||
;; duplicate-list?: symbol list -> #f | symbol
|
||||
;; returns a symbol that exists twice in l, or false if no such symbol
|
||||
;; exists
|
||||
(define (duplicate-list? l)
|
||||
(letrec ((t (make-hash-table))
|
||||
(dl? (lambda (l)
|
||||
(cond
|
||||
((null? l) #f)
|
||||
((hash-table-get t (car l) (lambda () #f)) =>
|
||||
(lambda (x) x))
|
||||
(else
|
||||
(hash-table-put! t (car l) (car l))
|
||||
(dl? (cdr l)))))))
|
||||
(dl? l)))
|
||||
|
||||
;; remove-duplicates: syntax-object list -> syntax-object list
|
||||
;; removes the duplicates from the lists
|
||||
(define (remove-duplicates sl)
|
||||
(let ((t (make-hash-table)))
|
||||
(letrec ((x
|
||||
(lambda (sl)
|
||||
(cond
|
||||
((null? sl) sl)
|
||||
((hash-table-get t (syntax-object->datum (car sl)) (lambda () #f))
|
||||
(x (cdr sl)))
|
||||
(else
|
||||
(hash-table-put! t (syntax-object->datum (car sl)) #t)
|
||||
(cons (car sl) (x (cdr sl))))))))
|
||||
(x sl))))
|
||||
|
||||
;; overlap?: symbol list * symbol list -> #f | symbol
|
||||
;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists
|
||||
(define (overlap? l1 l2)
|
||||
(let/ec ret
|
||||
(let ((t (make-hash-table)))
|
||||
(for-each (lambda (s1)
|
||||
(hash-table-put! t s1 s1))
|
||||
l1)
|
||||
(for-each (lambda (s2)
|
||||
(cond
|
||||
((hash-table-get t s2 (lambda () #f)) =>
|
||||
(lambda (o) (ret o)))))
|
||||
l2)
|
||||
#f)))
|
||||
|
||||
|
||||
(define (display-yacc grammar tokens start precs port)
|
||||
(let-syntax ((p (syntax-rules ()
|
||||
((_ args ...) (fprintf port args ...)))))
|
||||
(let* ((tokens (map syntax-local-value tokens))
|
||||
(eterms (filter e-terminals-def? tokens))
|
||||
(terms (filter terminals-def? tokens))
|
||||
(term-table (make-hash-table))
|
||||
(display-rhs
|
||||
(lambda (rhs)
|
||||
(for-each (lambda (sym) (p "~a " (hash-table-get term-table sym (lambda () sym))))
|
||||
(car rhs))
|
||||
(if (= 3 (length rhs))
|
||||
(p "%prec ~a" (cadadr rhs)))
|
||||
(p "\n"))))
|
||||
(for-each
|
||||
(lambda (t)
|
||||
(for-each
|
||||
(lambda (t)
|
||||
(hash-table-put! term-table t (format "'~a'" t)))
|
||||
(syntax-object->datum (e-terminals-def-t t))))
|
||||
eterms)
|
||||
(for-each
|
||||
(lambda (t)
|
||||
(for-each
|
||||
(lambda (t)
|
||||
(p "%token ~a\n" t)
|
||||
(hash-table-put! term-table t (format "~a" t)))
|
||||
(syntax-object->datum (terminals-def-t t))))
|
||||
terms)
|
||||
(if precs
|
||||
(for-each (lambda (prec)
|
||||
(p "%~a " (car prec))
|
||||
(for-each (lambda (tok)
|
||||
(p " ~a" (hash-table-get term-table tok)))
|
||||
(cdr prec))
|
||||
(p "\n"))
|
||||
precs))
|
||||
(p "%start ~a\n" start)
|
||||
(p "%%\n")
|
||||
|
||||
(for-each (lambda (prod)
|
||||
(let ((nt (car prod)))
|
||||
(p "~a: " nt)
|
||||
(display-rhs (cadr prod))
|
||||
(for-each (lambda (rhs)
|
||||
(p "| ")
|
||||
(display-rhs rhs))
|
||||
(cddr prod))
|
||||
(p ";\n")))
|
||||
grammar)
|
||||
(p "%%\n"))))
|
||||
|
||||
|
||||
)
|
||||
|
@ -0,0 +1,135 @@
|
||||
(module yacc-to-scheme mzscheme
|
||||
(require br-parser-tools/lex
|
||||
(prefix : br-parser-tools/lex-sre)
|
||||
br-parser-tools/yacc
|
||||
syntax/readerr
|
||||
mzlib/list)
|
||||
(provide trans)
|
||||
|
||||
(define match-double-string
|
||||
(lexer
|
||||
((:+ (:~ #\" #\\)) (append (string->list lexeme)
|
||||
(match-double-string input-port)))
|
||||
((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port)))
|
||||
(#\" null)))
|
||||
|
||||
(define match-single-string
|
||||
(lexer
|
||||
((:+ (:~ #\' #\\)) (append (string->list lexeme)
|
||||
(match-single-string input-port)))
|
||||
((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port)))
|
||||
(#\' null)))
|
||||
|
||||
(define-lex-abbrevs
|
||||
(letter (:or (:/ "a" "z") (:/ "A" "Z")))
|
||||
(digit (:/ "0" "9"))
|
||||
(initial (:or letter (char-set "!$%&*/<=>?^_~@")))
|
||||
(subsequent (:or initial digit (char-set "+-.@")))
|
||||
(comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/")))
|
||||
|
||||
(define-empty-tokens x
|
||||
(EOF PIPE |:| SEMI |%%| %prec))
|
||||
(define-tokens y
|
||||
(SYM STRING))
|
||||
|
||||
(define get-token-grammar
|
||||
(lexer-src-pos
|
||||
("%%" '|%%|)
|
||||
(":" (string->symbol lexeme))
|
||||
("%prec" (string->symbol lexeme))
|
||||
(#\| 'PIPE)
|
||||
((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}")))
|
||||
(return-without-pos (get-token-grammar input-port)))
|
||||
(#\; 'SEMI)
|
||||
(#\' (token-STRING (string->symbol (list->string (match-single-string input-port)))))
|
||||
(#\" (token-STRING (string->symbol (list->string (match-double-string input-port)))))
|
||||
((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme)))))
|
||||
|
||||
(define (parse-grammar enter-term enter-empty-term enter-non-term)
|
||||
(parser
|
||||
(tokens x y)
|
||||
(src-pos)
|
||||
(error (lambda (tok-ok tok-name tok-value start-pos end-pos)
|
||||
(raise-read-error
|
||||
(format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value)
|
||||
(file-path)
|
||||
(position-line start-pos)
|
||||
(position-col start-pos)
|
||||
(position-offset start-pos)
|
||||
(- (position-offset end-pos) (position-offset start-pos)))))
|
||||
|
||||
(end |%%|)
|
||||
(start gram)
|
||||
(grammar
|
||||
(gram
|
||||
((production) (list $1))
|
||||
((production gram) (cons $1 $2)))
|
||||
(production
|
||||
((SYM |:| prods SEMI)
|
||||
(begin
|
||||
(enter-non-term $1)
|
||||
(cons $1 $3))))
|
||||
(prods
|
||||
((rhs) (list `(,$1 #f)))
|
||||
((rhs prec) (list `(,$1 ,$2 #f)))
|
||||
((rhs PIPE prods) (cons `(,$1 #f) $3))
|
||||
((rhs prec PIPE prods) (cons `(,$1 ,$2 #f) $4)))
|
||||
(prec
|
||||
((%prec SYM)
|
||||
(begin
|
||||
(enter-term $2)
|
||||
(list 'prec $2)))
|
||||
((%prec STRING)
|
||||
(begin
|
||||
(enter-empty-term $2)
|
||||
(list 'prec $2))))
|
||||
(rhs
|
||||
(() null)
|
||||
((SYM rhs)
|
||||
(begin
|
||||
(enter-term $1)
|
||||
(cons $1 $2)))
|
||||
((STRING rhs)
|
||||
(begin
|
||||
(enter-empty-term $1)
|
||||
(cons $1 $2)))))))
|
||||
|
||||
(define (symbol<? a b)
|
||||
(string<? (symbol->string a) (symbol->string b)))
|
||||
|
||||
(define (trans filename)
|
||||
(let* ((i (open-input-file filename))
|
||||
(terms (make-hash-table))
|
||||
(eterms (make-hash-table))
|
||||
(nterms (make-hash-table))
|
||||
(enter-term
|
||||
(lambda (s)
|
||||
(if (not (hash-table-get nterms s (lambda () #f)))
|
||||
(hash-table-put! terms s #t))))
|
||||
(enter-empty-term
|
||||
(lambda (s)
|
||||
(if (not (hash-table-get nterms s (lambda () #f)))
|
||||
(hash-table-put! eterms s #t))))
|
||||
(enter-non-term
|
||||
(lambda (s)
|
||||
(hash-table-remove! terms s)
|
||||
(hash-table-remove! eterms s)
|
||||
(hash-table-put! nterms s #t))))
|
||||
(port-count-lines! i)
|
||||
(file-path filename)
|
||||
(regexp-match "%%" i)
|
||||
(begin0
|
||||
(let ((gram ((parse-grammar enter-term enter-empty-term enter-non-term)
|
||||
(lambda ()
|
||||
(let ((t (get-token-grammar i)))
|
||||
t)))))
|
||||
`(begin
|
||||
(define-tokens t ,(sort (hash-table-map terms (lambda (k v) k)) symbol<?))
|
||||
(define-empty-tokens et ,(sort (hash-table-map eterms (lambda (k v) k)) symbol<?))
|
||||
(parser
|
||||
(start ___)
|
||||
(end ___)
|
||||
(error ___)
|
||||
(tokens t et)
|
||||
(grammar ,@gram))))
|
||||
(close-input-port i)))))
|
@ -0,0 +1,412 @@
|
||||
#lang scheme/base
|
||||
|
||||
(require (for-syntax scheme/base
|
||||
"private-yacc/parser-builder.rkt"
|
||||
"private-yacc/grammar.rkt"
|
||||
"private-yacc/yacc-helper.rkt"
|
||||
"private-yacc/parser-actions.rkt"))
|
||||
(require "private-lex/token.rkt"
|
||||
"private-yacc/parser-actions.rkt"
|
||||
mzlib/etc
|
||||
mzlib/pretty
|
||||
syntax/readerr)
|
||||
|
||||
(provide parser)
|
||||
|
||||
|
||||
;; convert-parse-table : (vectorof (listof (cons/c gram-sym? action?))) ->
|
||||
;; (vectorof (symbol runtime-action hashtable))
|
||||
(define-for-syntax (convert-parse-table table)
|
||||
(list->vector
|
||||
(map
|
||||
(lambda (state-entry)
|
||||
(let ((ht (make-hasheq)))
|
||||
(for-each
|
||||
(lambda (gs/action)
|
||||
(hash-set! ht
|
||||
(gram-sym-symbol (car gs/action))
|
||||
(action->runtime-action (cdr gs/action))))
|
||||
state-entry)
|
||||
ht))
|
||||
(vector->list table))))
|
||||
|
||||
(define-syntax (parser stx)
|
||||
(syntax-case stx ()
|
||||
((_ args ...)
|
||||
(let ((arg-list (syntax->list (syntax (args ...))))
|
||||
(src-pos #f)
|
||||
(debug #f)
|
||||
(error #f)
|
||||
(tokens #f)
|
||||
(start #f)
|
||||
(end #f)
|
||||
(precs #f)
|
||||
(suppress #f)
|
||||
(grammar #f)
|
||||
(yacc-output #f))
|
||||
(for-each
|
||||
(lambda (arg)
|
||||
(syntax-case* arg (debug error tokens start end precs grammar
|
||||
suppress src-pos yacc-output)
|
||||
(lambda (a b)
|
||||
(eq? (syntax-e a) (syntax-e b)))
|
||||
((debug filename)
|
||||
(cond
|
||||
((not (string? (syntax-e (syntax filename))))
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"Debugging filename must be a string"
|
||||
stx
|
||||
(syntax filename)))
|
||||
(debug
|
||||
(raise-syntax-error #f "Multiple debug declarations" stx))
|
||||
(else
|
||||
(set! debug (syntax-e (syntax filename))))))
|
||||
((suppress)
|
||||
(set! suppress #t))
|
||||
((src-pos)
|
||||
(set! src-pos #t))
|
||||
((error expression)
|
||||
(if error
|
||||
(raise-syntax-error #f "Multiple error declarations" stx)
|
||||
(set! error (syntax expression))))
|
||||
((tokens def ...)
|
||||
(begin
|
||||
(when tokens
|
||||
(raise-syntax-error #f "Multiple tokens declarations" stx))
|
||||
(let ((defs (syntax->list (syntax (def ...)))))
|
||||
(for-each
|
||||
(lambda (d)
|
||||
(unless (identifier? d)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"Token-group name must be an identifier"
|
||||
stx
|
||||
d)))
|
||||
defs)
|
||||
(set! tokens defs))))
|
||||
((start symbol ...)
|
||||
(let ((symbols (syntax->list (syntax (symbol ...)))))
|
||||
(for-each
|
||||
(lambda (sym)
|
||||
(unless (identifier? sym)
|
||||
(raise-syntax-error #f
|
||||
"Start symbol must be a symbol"
|
||||
stx
|
||||
sym)))
|
||||
symbols)
|
||||
(when start
|
||||
(raise-syntax-error #f "Multiple start declarations" stx))
|
||||
(when (null? symbols)
|
||||
(raise-syntax-error #f
|
||||
"Missing start symbol"
|
||||
stx
|
||||
arg))
|
||||
(set! start symbols)))
|
||||
((end symbols ...)
|
||||
(let ((symbols (syntax->list (syntax (symbols ...)))))
|
||||
(for-each
|
||||
(lambda (sym)
|
||||
(unless (identifier? sym)
|
||||
(raise-syntax-error #f
|
||||
"End token must be a symbol"
|
||||
stx
|
||||
sym)))
|
||||
symbols)
|
||||
(let ((d (duplicate-list? (map syntax-e symbols))))
|
||||
(when d
|
||||
(raise-syntax-error
|
||||
#f
|
||||
(format "Duplicate end token definition for ~a" d)
|
||||
stx
|
||||
arg))
|
||||
(when (null? symbols)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"end declaration must contain at least 1 token"
|
||||
stx
|
||||
arg))
|
||||
(when end
|
||||
(raise-syntax-error #f "Multiple end declarations" stx))
|
||||
(set! end symbols))))
|
||||
((precs decls ...)
|
||||
(if precs
|
||||
(raise-syntax-error #f "Multiple precs declarations" stx)
|
||||
(set! precs (syntax/loc arg (decls ...)))))
|
||||
((grammar prods ...)
|
||||
(if grammar
|
||||
(raise-syntax-error #f "Multiple grammar declarations" stx)
|
||||
(set! grammar (syntax/loc arg (prods ...)))))
|
||||
((yacc-output filename)
|
||||
(cond
|
||||
((not (string? (syntax-e (syntax filename))))
|
||||
(raise-syntax-error #f
|
||||
"Yacc-output filename must be a string"
|
||||
stx
|
||||
(syntax filename)))
|
||||
(yacc-output
|
||||
(raise-syntax-error #f "Multiple yacc-output declarations" stx))
|
||||
(else
|
||||
(set! yacc-output (syntax-e (syntax filename))))))
|
||||
(_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg))))
|
||||
(syntax->list (syntax (args ...))))
|
||||
(unless tokens
|
||||
(raise-syntax-error #f "missing tokens declaration" stx))
|
||||
(unless error
|
||||
(raise-syntax-error #f "missing error declaration" stx))
|
||||
(unless grammar
|
||||
(raise-syntax-error #f "missing grammar declaration" stx))
|
||||
(unless end
|
||||
(raise-syntax-error #f "missing end declaration" stx))
|
||||
(unless start
|
||||
(raise-syntax-error #f "missing start declaration" stx))
|
||||
(let-values (((table all-term-syms actions check-syntax-fix)
|
||||
(build-parser (if debug debug "")
|
||||
src-pos
|
||||
suppress
|
||||
tokens
|
||||
start
|
||||
end
|
||||
precs
|
||||
grammar)))
|
||||
(when (and yacc-output (not (string=? yacc-output "")))
|
||||
(with-handlers [(exn:fail:filesystem?
|
||||
(lambda (e)
|
||||
(eprintf
|
||||
"Cannot write yacc-output to file \"~a\"\n"
|
||||
yacc-output)))]
|
||||
(call-with-output-file yacc-output
|
||||
(lambda (port)
|
||||
(display-yacc (syntax->datum grammar)
|
||||
tokens
|
||||
(map syntax->datum start)
|
||||
(if precs
|
||||
(syntax->datum precs)
|
||||
#f)
|
||||
port))
|
||||
#:exists 'truncate)))
|
||||
(with-syntax ((check-syntax-fix check-syntax-fix)
|
||||
(err error)
|
||||
(ends end)
|
||||
(starts start)
|
||||
(debug debug)
|
||||
(table (convert-parse-table table))
|
||||
(all-term-syms all-term-syms)
|
||||
(actions actions)
|
||||
(src-pos src-pos))
|
||||
(syntax
|
||||
(begin
|
||||
check-syntax-fix
|
||||
(parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos)))))))
|
||||
(_
|
||||
(raise-syntax-error #f
|
||||
"parser must have the form (parser args ...)"
|
||||
stx))))
|
||||
|
||||
(define (reduce-stack stack num ret-vals src-pos)
|
||||
(cond
|
||||
((> num 0)
|
||||
(let* ((top-frame (car stack))
|
||||
(ret-vals
|
||||
(if src-pos
|
||||
(cons (stack-frame-value top-frame)
|
||||
(cons (stack-frame-start-pos top-frame)
|
||||
(cons (stack-frame-end-pos top-frame)
|
||||
ret-vals)))
|
||||
(cons (stack-frame-value top-frame) ret-vals))))
|
||||
(reduce-stack (cdr stack) (sub1 num) ret-vals src-pos)))
|
||||
(else (values stack ret-vals))))
|
||||
|
||||
;; extract-helper : (symbol or make-token) any any -> symbol any any any
|
||||
(define (extract-helper tok v1 v2)
|
||||
(cond
|
||||
((symbol? tok)
|
||||
(values tok #f v1 v2))
|
||||
((token? tok)
|
||||
(values (real-token-name tok) (real-token-value tok) v1 v2))
|
||||
(else (raise-argument-error 'parser
|
||||
"(or/c symbol? token?)"
|
||||
0
|
||||
tok))))
|
||||
|
||||
;; well-formed-position-token?: any -> boolean
|
||||
;; Returns true if pt is a position token whose position-token-token
|
||||
;; is itself a token or a symbol.
|
||||
;; This is meant to help raise more precise error messages when
|
||||
;; a tokenizer produces an erroneous position-token wrapped twice.
|
||||
;; (as often happens when omitting return-without-pos).
|
||||
(define (well-formed-token-field? t)
|
||||
(or (symbol? t)
|
||||
(token? t)))
|
||||
|
||||
(define (well-formed-position-token? pt)
|
||||
(and (position-token? pt)
|
||||
(well-formed-token-field? (position-token-token pt))))
|
||||
|
||||
(define (well-formed-srcloc-token? st)
|
||||
(and (srcloc-token? st)
|
||||
(well-formed-token-field? (srcloc-token-token st))))
|
||||
|
||||
;; extract-src-pos : position-token -> symbol any any any
|
||||
(define (extract-src-pos ip)
|
||||
(unless (well-formed-position-token? ip)
|
||||
(raise-argument-error 'parser
|
||||
"well-formed-position-token?"
|
||||
0
|
||||
ip))
|
||||
(extract-helper (position-token-token ip)
|
||||
(position-token-start-pos ip)
|
||||
(position-token-end-pos ip)))
|
||||
|
||||
(define (extract-srcloc ip)
|
||||
(unless (well-formed-srcloc-token? ip)
|
||||
(raise-argument-error 'parser
|
||||
"well-formed-srcloc-token?"
|
||||
0
|
||||
ip))
|
||||
(let ([loc (srcloc-token-srcloc ip)])
|
||||
(extract-helper (srcloc-token-token ip)
|
||||
(position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc))
|
||||
(position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f))))
|
||||
|
||||
|
||||
;; extract-no-src-pos : (symbol or make-token) -> symbol any any any
|
||||
(define (extract-no-src-pos ip)
|
||||
(extract-helper ip #f #f))
|
||||
|
||||
(define-struct stack-frame (state value start-pos end-pos) #:inspector (make-inspector))
|
||||
|
||||
(define (make-empty-stack i) (list (make-stack-frame i #f #f #f)))
|
||||
|
||||
|
||||
;; The table is a vector that maps each state to a hash-table that maps a
|
||||
;; terminal symbol to either an accept, shift, reduce, or goto structure.
|
||||
; We encode the structures according to the runtime-action data definition in
|
||||
;; parser-actions.rkt
|
||||
(define (parser-body debug? err starts ends table all-term-syms actions src-pos)
|
||||
(local ((define extract
|
||||
(if src-pos
|
||||
extract-src-pos
|
||||
extract-no-src-pos))
|
||||
|
||||
(define (fix-error stack tok val start-pos end-pos get-token)
|
||||
(when debug? (pretty-print stack))
|
||||
(local ((define (remove-input tok val start-pos end-pos)
|
||||
(if (memq tok ends)
|
||||
(raise-read-error "parser: Cannot continue after error"
|
||||
#f #f #f #f #f)
|
||||
(let ((a (find-action stack tok val start-pos end-pos)))
|
||||
(cond
|
||||
((runtime-shift? a)
|
||||
;; (printf "shift:~a\n" (runtime-shift-state a))
|
||||
(cons (make-stack-frame (runtime-shift-state a)
|
||||
val
|
||||
start-pos
|
||||
end-pos)
|
||||
stack))
|
||||
(else
|
||||
;; (printf "discard input:~a\n" tok)
|
||||
(let-values (((tok val start-pos end-pos)
|
||||
(extract (get-token))))
|
||||
(remove-input tok val start-pos end-pos))))))))
|
||||
(let remove-states ()
|
||||
(let ((a (find-action stack 'error #f start-pos end-pos)))
|
||||
(cond
|
||||
((runtime-shift? a)
|
||||
;; (printf "shift:~a\n" (runtime-shift-state a))
|
||||
(set! stack
|
||||
(cons
|
||||
(make-stack-frame (runtime-shift-state a)
|
||||
#f
|
||||
start-pos
|
||||
end-pos)
|
||||
stack))
|
||||
(remove-input tok val start-pos end-pos))
|
||||
(else
|
||||
;; (printf "discard state:~a\n" (car stack))
|
||||
(cond
|
||||
((< (length stack) 2)
|
||||
(raise-read-error "parser: Cannot continue after error"
|
||||
#f #f #f #f #f))
|
||||
(else
|
||||
(set! stack (cdr stack))
|
||||
(remove-states)))))))))
|
||||
|
||||
(define (find-action stack tok val start-pos end-pos)
|
||||
(unless (hash-ref all-term-syms
|
||||
tok
|
||||
#f)
|
||||
(if src-pos
|
||||
(err #f tok val start-pos end-pos)
|
||||
(err #f tok val))
|
||||
(raise-read-error (format "parser: got token of unknown type ~a" tok)
|
||||
#f #f #f #f #f))
|
||||
(hash-ref (vector-ref table (stack-frame-state (car stack)))
|
||||
tok
|
||||
#f))
|
||||
|
||||
(define (make-parser start-number)
|
||||
(lambda (get-token)
|
||||
(unless (and (procedure? get-token)
|
||||
(procedure-arity-includes? get-token 0))
|
||||
(error 'get-token "expected a nullary procedure, got ~e" get-token))
|
||||
(let parsing-loop ((stack (make-empty-stack start-number))
|
||||
(ip (get-token)))
|
||||
(let-values (((tok val start-pos end-pos)
|
||||
(extract ip)))
|
||||
(let ((action (find-action stack tok val start-pos end-pos)))
|
||||
(cond
|
||||
((runtime-shift? action)
|
||||
;; (printf "shift:~a\n" (runtime-shift-state action))
|
||||
(parsing-loop (cons (make-stack-frame (runtime-shift-state action)
|
||||
val
|
||||
start-pos
|
||||
end-pos)
|
||||
stack)
|
||||
(get-token)))
|
||||
((runtime-reduce? action)
|
||||
;; (printf "reduce:~a\n" (runtime-reduce-prod-num action))
|
||||
(let-values (((new-stack args)
|
||||
(reduce-stack stack
|
||||
(runtime-reduce-rhs-length action)
|
||||
null
|
||||
src-pos)))
|
||||
(let ((goto
|
||||
(runtime-goto-state
|
||||
(hash-ref
|
||||
(vector-ref table (stack-frame-state (car new-stack)))
|
||||
(runtime-reduce-lhs action)))))
|
||||
(parsing-loop
|
||||
(cons
|
||||
(if src-pos
|
||||
(make-stack-frame
|
||||
goto
|
||||
(apply (vector-ref actions (runtime-reduce-prod-num action)) args)
|
||||
(if (null? args) start-pos (cadr args))
|
||||
(if (null? args)
|
||||
end-pos
|
||||
(list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1))))
|
||||
(make-stack-frame
|
||||
goto
|
||||
(apply (vector-ref actions (runtime-reduce-prod-num action)) args)
|
||||
#f
|
||||
#f))
|
||||
new-stack)
|
||||
ip))))
|
||||
((runtime-accept? action)
|
||||
;; (printf "accept\n")
|
||||
(stack-frame-value (car stack)))
|
||||
(else
|
||||
(if src-pos
|
||||
(err #t tok val start-pos end-pos)
|
||||
(err #t tok val))
|
||||
(parsing-loop (fix-error stack tok val start-pos end-pos get-token)
|
||||
(get-token))))))))))
|
||||
(cond
|
||||
((null? (cdr starts)) (make-parser 0))
|
||||
(else
|
||||
(let loop ((l starts)
|
||||
(i 0))
|
||||
(cond
|
||||
((null? l) null)
|
||||
(else (cons (make-parser i) (loop (cdr l) (add1 i))))))))))
|
@ -0,0 +1,11 @@
|
||||
#lang info
|
||||
|
||||
(define collection 'multi)
|
||||
(define deps '("scheme-lib"
|
||||
"base"
|
||||
"compatibility-lib"))
|
||||
(define build-deps '("rackunit-lib"))
|
||||
|
||||
(define pkg-desc "implementation (no documentation) part of \"br-parser-tools\"")
|
||||
|
||||
(define pkg-authors '(mflatt))
|
@ -0,0 +1,11 @@
|
||||
parser-tools
|
||||
Copyright (c) 2010-2014 PLT Design Inc.
|
||||
|
||||
This package is distributed under the GNU Lesser General Public
|
||||
License (LGPL). This means that you can link this package into proprietary
|
||||
applications, provided you follow the rules stated in the LGPL. You
|
||||
can also modify this package; if you distribute a modified version,
|
||||
you must distribute it under the terms of the LGPL, which in
|
||||
particular means that you must release the source code for the
|
||||
modified software. See http://www.gnu.org/copyleft/lesser.html
|
||||
for more information.
|
@ -0,0 +1,12 @@
|
||||
#lang info
|
||||
|
||||
(define collection 'multi)
|
||||
|
||||
(define deps '("br-parser-tools-lib"
|
||||
"br-parser-tools-doc"))
|
||||
(define implies '("br-parser-tools-lib"
|
||||
"br-parser-tools-doc"))
|
||||
|
||||
(define pkg-desc "Lex- and Yacc-style parsing tools")
|
||||
|
||||
(define pkg-authors '(mflatt))
|
Loading…
Reference in New Issue