From 123547f3cbb8f3860d922dfff5f09921243ebe95 Mon Sep 17 00:00:00 2001 From: Shrutarshi Basu Date: Fri, 28 Aug 2020 16:12:49 -0400 Subject: [PATCH] More robust interface to Pygments (#230) --- pollen/private/external/pipe.py | 119 ++++++++++++++++++++++++++------ pollen/unstable/pygments.rkt | 13 +++- 2 files changed, 108 insertions(+), 24 deletions(-) diff --git a/pollen/private/external/pipe.py b/pollen/private/external/pipe.py index 3b78f30..85024de 100644 --- a/pollen/private/external/pipe.py +++ b/pollen/private/external/pipe.py @@ -1,8 +1,22 @@ # This allows us to launch Python and pygments once, and pipe to it -# continuously. Input format is: +# continuously. # -# -# +# There are four options: +# 1. Language (used to determine the lexer) +# 2. Line numbers to highlight +# 3. Encoding to use for the output +# 4. HTML Class to use for the output +# +# These can be specified as arguments when this script is first invoked, or on a +# per invocation. +# +# To set the options for each invocation, the format is: +# +# __LANG__ +# __LINENOS__ +# __LINES__ +# __CSS__ +# __ENC__ # # ... # __END__ @@ -17,57 +31,120 @@ # ... # __END__ +from __future__ import print_function import sys import optparse + from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.util import ClassNotFound from pygments.formatters import HtmlFormatter +def get_lexer(lang): + if not lang: + get_lexer_by_name("text", encoding="guess") + + try: + return get_lexer_by_name(lang, encoding="guess") + except ClassNotFound: + print("No lexer was found for the given language. Using plain text instead.", file=sys.stderr) + return get_lexer_by_name("text", encoding="guess") + parser = optparse.OptionParser() parser.add_option('--linenos', action="store_true", dest="linenos") parser.add_option('--cssclass', default="source", dest="cssclass") +parser.add_option('--encoding', default="utf-8", dest="encoding") +parser.add_option('--language', dest="language") (options, _) = parser.parse_args() -lexer = "" +# Set initial options +config = { + 'linenos': options.linenos, + 'cssclass': options.cssclass, + 'encoding': options.encoding, + 'hl_lines': [] +} +lexer = get_lexer(options.language) code = "" -lines_to_highlight = "" + py_version = sys.version_info.major sys.stdout.write("ready\n") sys.stdout.flush() + while 1: line_raw = sys.stdin.readline() if not line_raw: break # Without trailing space, \n, or \n line = line_raw.rstrip() + if line == '__EXIT__': break elif line == '__END__': # Lex input finished. Lex it. - formatter = HtmlFormatter(linenos=options.linenos, - cssclass=options.cssclass, - encoding="utf-8", - hl_lines=lines_to_highlight) + formatter = HtmlFormatter(linenos=config['linenos'], + cssclass=config['cssclass'], + encoding=config['encoding'], + hl_lines=config['hl_lines']) if py_version >= 3: sys.stdout.write(highlight(code, lexer, formatter).decode("utf-8")) else: sys.stdout.write(highlight(code, lexer, formatter)) sys.stdout.write('\n__END__\n') sys.stdout.flush() - lexer = "" + + # Reset the configuration for the next invocation. Most options are + # actually persisted between runs, except for the code itself and the + # lines to be highlighted. code = "" - lines_to_highlight = "" - elif lexer == "": - # Starting another lex. First line is the lexer name. - try: - lexer = get_lexer_by_name(line, encoding="guess") - except ClassNotFound: - lexer = get_lexer_by_name("text", encoding="guess") - elif lines_to_highlight == "": - # Starting another lex. Second line is list of lines to highlight, - # formatted as string of whitespace-separated integers - lines_to_highlight = [int(str) for str in line.split()] + config['hl_lines'] = [] + + elif code == "": + # Only check for new options at the beginning of a a fresh invocation + if line.startswith("__LANG__"): + # Use the provided language to find the appropriate lexer. + try: + lang = line.split()[1] + lexer = get_lexer(lang) + except IndexError: + print("No lexer was found for the given language. Using plain text instead.", file=sys.stderr) + lexer = get_lexer_by_name("text", encoding="guess") + + elif line.startswith("__LINENOS__"): + try: + option = line.split()[1] + if option.lower() == "true": + config['linenos'] = True + elif option.lower() == "false": + config['linenos'] = False + else: + pass + except IndexError: + print("__LINENOS__ option must be given a `true` or `false` value", + file=sys.stderr) + + elif line.startswith("__LINES__"): + # The list of lines to highlight is formatted as string of + # whitespace-separated integers + lines = line.split()[1:] + config['hl_lines'] = [int(str) for str in lines] + + elif line.startswith("__CSS__"): + try: + config['cssclass'] = line.split[1] + except IndexError: + print("Could not parse CSS class line.", file=sys.stderr) + + elif line.startswith("__ENC__"): + try: + config['encoding'] = line.split[1] + except IndexError: + print("Could not parse encoding line.", file=sys.stderr) + else: + # Done with configuration for this invocation, start accumulating + # code. Use `line_raw` because we want trailing space, \n, \r + code += line_raw + else: # Accumulate more code # Use `line_raw`: Do want trailing space, \n, \r diff --git a/pollen/unstable/pygments.rkt b/pollen/unstable/pygments.rkt index 5ca7793..18dcd7e 100644 --- a/pollen/unstable/pygments.rkt +++ b/pollen/unstable/pygments.rkt @@ -130,11 +130,18 @@ if zero is False: (unless (running?) (start python-executable line-numbers? css-class)) (cond [(running?) - ;; order of writing arguments is significant: cooperates with pipe.py - (displayln lang pyg-out) - (displayln (string-join (map number->string hl-lines) " ") pyg-out) + ;; This works with a simple wrapper around Pygments defined in pipe.py. + ;; First send over the configuration options, then code to highlight. + ;; pipe.py also supports an encoding option, but seems unnecessary to + ;; use at this point. + (fprintf pyg-out "__LANG__ ~a~n" lang) + (fprintf pyg-out "__LINES__ ~a~n" (string-join (map number->string hl-lines) " ")) + (fprintf pyg-out "__LINENOS__ ~a~n" + (if line-numbers? "true" "false")) (displayln code pyg-out) (displayln "__END__" pyg-out) + + ;; Read back the highlighted code (let loop ([s ""]) (match (read-line pyg-in 'any) ["__END__" (with-input-from-string s read-html-as-xexprs)]