More robust interface to Pygments (#230)

dev-output-files-201008
Shrutarshi Basu 4 years ago committed by GitHub
parent 88c354cb5f
commit 123547f3cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,8 +1,22 @@
# This allows us to launch Python and pygments once, and pipe to it
# continuously. Input format is:
# continuously.
#
# <lexer-name>
# <list-of-highlighted-lines>
# There are four options:
# 1. Language (used to determine the lexer)
# 2. Line numbers to highlight
# 3. Encoding to use for the output
# 4. HTML Class to use for the output
#
# These can be specified as arguments when this script is first invoked, or on a
# per invocation.
#
# To set the options for each invocation, the format is:
#
# __LANG__ <lexer-name>
# __LINENOS__ <true|false>
# __LINES__ <list-of-highlighted-lines>
# __CSS__ <CSS-class>
# __ENC__ <encoding>
# <code>
# ...
# __END__
@ -17,57 +31,120 @@
# ...
# __END__
from __future__ import print_function
import sys
import optparse
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.util import ClassNotFound
from pygments.formatters import HtmlFormatter
def get_lexer(lang):
if not lang:
get_lexer_by_name("text", encoding="guess")
try:
return get_lexer_by_name(lang, encoding="guess")
except ClassNotFound:
print("No lexer was found for the given language. Using plain text instead.", file=sys.stderr)
return get_lexer_by_name("text", encoding="guess")
parser = optparse.OptionParser()
parser.add_option('--linenos', action="store_true", dest="linenos")
parser.add_option('--cssclass', default="source", dest="cssclass")
parser.add_option('--encoding', default="utf-8", dest="encoding")
parser.add_option('--language', dest="language")
(options, _) = parser.parse_args()
lexer = ""
# Set initial options
config = {
'linenos': options.linenos,
'cssclass': options.cssclass,
'encoding': options.encoding,
'hl_lines': []
}
lexer = get_lexer(options.language)
code = ""
lines_to_highlight = ""
py_version = sys.version_info.major
sys.stdout.write("ready\n")
sys.stdout.flush()
while 1:
line_raw = sys.stdin.readline()
if not line_raw:
break
# Without trailing space, \n, or \n
line = line_raw.rstrip()
if line == '__EXIT__':
break
elif line == '__END__':
# Lex input finished. Lex it.
formatter = HtmlFormatter(linenos=options.linenos,
cssclass=options.cssclass,
encoding="utf-8",
hl_lines=lines_to_highlight)
formatter = HtmlFormatter(linenos=config['linenos'],
cssclass=config['cssclass'],
encoding=config['encoding'],
hl_lines=config['hl_lines'])
if py_version >= 3:
sys.stdout.write(highlight(code, lexer, formatter).decode("utf-8"))
else:
sys.stdout.write(highlight(code, lexer, formatter))
sys.stdout.write('\n__END__\n')
sys.stdout.flush()
lexer = ""
# Reset the configuration for the next invocation. Most options are
# actually persisted between runs, except for the code itself and the
# lines to be highlighted.
code = ""
lines_to_highlight = ""
elif lexer == "":
# Starting another lex. First line is the lexer name.
try:
lexer = get_lexer_by_name(line, encoding="guess")
except ClassNotFound:
lexer = get_lexer_by_name("text", encoding="guess")
elif lines_to_highlight == "":
# Starting another lex. Second line is list of lines to highlight,
# formatted as string of whitespace-separated integers
lines_to_highlight = [int(str) for str in line.split()]
config['hl_lines'] = []
elif code == "":
# Only check for new options at the beginning of a a fresh invocation
if line.startswith("__LANG__"):
# Use the provided language to find the appropriate lexer.
try:
lang = line.split()[1]
lexer = get_lexer(lang)
except IndexError:
print("No lexer was found for the given language. Using plain text instead.", file=sys.stderr)
lexer = get_lexer_by_name("text", encoding="guess")
elif line.startswith("__LINENOS__"):
try:
option = line.split()[1]
if option.lower() == "true":
config['linenos'] = True
elif option.lower() == "false":
config['linenos'] = False
else:
pass
except IndexError:
print("__LINENOS__ option must be given a `true` or `false` value",
file=sys.stderr)
elif line.startswith("__LINES__"):
# The list of lines to highlight is formatted as string of
# whitespace-separated integers
lines = line.split()[1:]
config['hl_lines'] = [int(str) for str in lines]
elif line.startswith("__CSS__"):
try:
config['cssclass'] = line.split[1]
except IndexError:
print("Could not parse CSS class line.", file=sys.stderr)
elif line.startswith("__ENC__"):
try:
config['encoding'] = line.split[1]
except IndexError:
print("Could not parse encoding line.", file=sys.stderr)
else:
# Done with configuration for this invocation, start accumulating
# code. Use `line_raw` because we want trailing space, \n, \r
code += line_raw
else:
# Accumulate more code
# Use `line_raw`: Do want trailing space, \n, \r

@ -130,11 +130,18 @@ if zero is False:
(unless (running?)
(start python-executable line-numbers? css-class))
(cond [(running?)
;; order of writing arguments is significant: cooperates with pipe.py
(displayln lang pyg-out)
(displayln (string-join (map number->string hl-lines) " ") pyg-out)
;; This works with a simple wrapper around Pygments defined in pipe.py.
;; First send over the configuration options, then code to highlight.
;; pipe.py also supports an encoding option, but seems unnecessary to
;; use at this point.
(fprintf pyg-out "__LANG__ ~a~n" lang)
(fprintf pyg-out "__LINES__ ~a~n" (string-join (map number->string hl-lines) " "))
(fprintf pyg-out "__LINENOS__ ~a~n"
(if line-numbers? "true" "false"))
(displayln code pyg-out)
(displayln "__END__" pyg-out)
;; Read back the highlighted code
(let loop ([s ""])
(match (read-line pyg-in 'any)
["__END__" (with-input-from-string s read-html-as-xexprs)]

Loading…
Cancel
Save