From 2d4475022154ccaeaeee6bdb40ffee2c0dce1047 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Thu, 5 May 2016 00:54:54 -0700 Subject: [PATCH] hiding works for literals and tokens ; tests pass ; resume in ids --- .../br/ragg/codegen/codegen.rkt | 28 ++- .../br/ragg/codegen/runtime.rkt | 163 +++++++++--------- .../br/ragg/elider/json-elider-toy.rkt | 4 + .../br/ragg/elider/json-elider.rkt | 10 +- .../br/ragg/elider/test-json-elider.rkt | 8 +- beautiful-racket-ragg/br/ragg/rules/stx.rkt | 2 +- .../br/ragg/test/test-all.rkt | 2 +- .../br/ragg/test/test-parser.rkt | 18 +- 8 files changed, 132 insertions(+), 103 deletions(-) create mode 100644 beautiful-racket-ragg/br/ragg/elider/json-elider-toy.rkt diff --git a/beautiful-racket-ragg/br/ragg/codegen/codegen.rkt b/beautiful-racket-ragg/br/ragg/codegen/codegen.rkt index 583d517..11357b1 100755 --- a/beautiful-racket-ragg/br/ragg/codegen/codegen.rkt +++ b/beautiful-racket-ragg/br/ragg/codegen/codegen.rkt @@ -1,4 +1,4 @@ -#lang racket/base +#lang br (require (for-template racket/base) racket/list @@ -38,9 +38,25 @@ ;; We flatten the rules so we can use the yacc-style ruleset that parser-tools ;; supports. + + #| + MB: `rules` still carries 'hide syntax property + |# + #;(report flattened-rules) + + + #| + MB: `flattened-rules` still carries 'hide syntax property + |# (define flattened-rules (flatten-rules rules)) + #;(report flattened-rules) (define generated-rule-codes (map flat-rule->yacc-rule flattened-rules)) + + #| + MB: `generated-rule-codes` loses the 'hide syntax property + |# + #;(report generated-rule-codes) ;; The first rule, by default, is the start rule. (define rule-ids (for/list ([a-rule (in-list rules)]) @@ -163,6 +179,7 @@ ;; stx :== (name (U tokens rule-stx) ...) ;; (define (flat-rule->yacc-rule a-flat-rule) + #;(report a-flat-rule) (syntax-case a-flat-rule () [(rule-type origin name clauses ...) (begin @@ -206,6 +223,7 @@ [$X-end-pos (format-id translated-pattern "$~a-end-pos" pos)]) (syntax-case primitive-pattern (id lit token inferred-id) + ;; When a rule usage is inferred, the value of $X is a syntax object ;; whose head is the name of the inferred rule . We strip that out, ;; leaving the residue to be absorbed. @@ -214,11 +232,13 @@ [(inferred-rule-name . rest) (syntax->list #'rest)])] [(id val) - #`(list $X)] + #'(list $X)] + ;; move the 'hide syntax property into the translated-action + ;; because syntax gets datum-ized [(lit val) - #`(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))] + #`(list (atomic-datum->syntax $X $X-start-pos $X-end-pos #,(syntax-property primitive-pattern 'hide)))] [(token val) - #`(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))])))) + #`(list (atomic-datum->syntax $X $X-start-pos $X-end-pos #,(syntax-property primitive-pattern 'hide)))])))) (define whole-rule-loc (if (empty? translated-patterns) diff --git a/beautiful-racket-ragg/br/ragg/codegen/runtime.rkt b/beautiful-racket-ragg/br/ragg/codegen/runtime.rkt index d38b244..5efc7e9 100755 --- a/beautiful-racket-ragg/br/ragg/codegen/runtime.rkt +++ b/beautiful-racket-ragg/br/ragg/codegen/runtime.rkt @@ -1,4 +1,4 @@ -#lang racket/base +#lang br (require racket/match racket/list @@ -45,86 +45,86 @@ ;; FIXME: clean up code. (define (make-permissive-tokenizer tokenizer token-type-hash) (define tokenizer-thunk (cond - [(sequence? tokenizer) - (sequence->generator tokenizer)] - [(procedure? tokenizer) - tokenizer])) - + [(sequence? tokenizer) + (sequence->generator tokenizer)] + [(procedure? tokenizer) + tokenizer])) + ;; lookup: symbol any pos pos -> position-token (define (lookup type val start-pos end-pos) (lex:position-token ((hash-ref token-type-hash type - (lambda () - ((current-tokenizer-error-handler) (format "~a" type) val - (lex:position-offset start-pos) - (lex:position-line start-pos) - (lex:position-col start-pos) - (and (number? (lex:position-offset start-pos)) - (number? (lex:position-offset end-pos)) - (- (lex:position-offset end-pos) - (lex:position-offset start-pos)))))) + (lambda () + ((current-tokenizer-error-handler) (format "~a" type) val + (lex:position-offset start-pos) + (lex:position-line start-pos) + (lex:position-col start-pos) + (and (number? (lex:position-offset start-pos)) + (number? (lex:position-offset end-pos)) + (- (lex:position-offset end-pos) + (lex:position-offset start-pos)))))) val) start-pos end-pos)) - + (define (permissive-tokenizer) (define next-token (tokenizer-thunk)) (let loop ([next-token next-token]) - (match next-token - [(or (? eof-object?) (? void?)) - (lookup 'EOF eof no-position no-position)] - - [(? symbol?) - (lookup next-token next-token no-position no-position)] - - [(? string?) - (lookup (string->symbol next-token) next-token no-position no-position)] - - [(? char?) - (lookup (string->symbol (string next-token)) next-token no-position no-position)] - - ;; Compatibility - [(? lex:token?) - (loop (token (lex:token-name next-token) - (lex:token-value next-token)))] - - [(token-struct type val offset line column span skip?) - (cond [skip? - ;; skip whitespace, and just tokenize again. - (permissive-tokenizer)] - - [(hash-has-key? token-type-hash type) - (define start-pos (lex:position offset line column)) - ;; try to synthesize a consistent end position. - (define end-pos (lex:position (if (and (number? offset) (number? span)) - (+ offset span) - offset) - line - (if (and (number? column) (number? span)) - (+ column span) - column))) - (lookup type val start-pos end-pos)] - [else - ;; We ran into a token of unrecognized type. Let's raise an appropriate error. - ((current-tokenizer-error-handler) type val - offset line column span)])] - - [(lex:position-token t s e) - (define a-position-token (loop t)) - (lex:position-token (lex:position-token-token a-position-token) - (if (no-position? (lex:position-token-start-pos a-position-token)) - s - (lex:position-token-start-pos a-position-token)) - (if (no-position? (lex:position-token-end-pos a-position-token)) - e - (lex:position-token-end-pos a-position-token)))] - - [else - ;; Otherwise, we have no idea how to treat this as a token. - ((current-tokenizer-error-handler) 'unknown-type (format "~a" next-token) - #f #f #f #f)]))) + (match next-token + [(or (? eof-object?) (? void?)) + (lookup 'EOF eof no-position no-position)] + + [(? symbol?) + (lookup next-token next-token no-position no-position)] + + [(? string?) + (lookup (string->symbol next-token) next-token no-position no-position)] + + [(? char?) + (lookup (string->symbol (string next-token)) next-token no-position no-position)] + + ;; Compatibility + [(? lex:token?) + (loop (token (lex:token-name next-token) + (lex:token-value next-token)))] + + [(token-struct type val offset line column span skip?) + (cond [skip? + ;; skip whitespace, and just tokenize again. + (permissive-tokenizer)] + + [(hash-has-key? token-type-hash type) + (define start-pos (lex:position offset line column)) + ;; try to synthesize a consistent end position. + (define end-pos (lex:position (if (and (number? offset) (number? span)) + (+ offset span) + offset) + line + (if (and (number? column) (number? span)) + (+ column span) + column))) + (lookup type val start-pos end-pos)] + [else + ;; We ran into a token of unrecognized type. Let's raise an appropriate error. + ((current-tokenizer-error-handler) type val + offset line column span)])] + + [(lex:position-token t s e) + (define a-position-token (loop t)) + (lex:position-token (lex:position-token-token a-position-token) + (if (no-position? (lex:position-token-start-pos a-position-token)) + s + (lex:position-token-start-pos a-position-token)) + (if (no-position? (lex:position-token-end-pos a-position-token)) + e + (lex:position-token-end-pos a-position-token)))] + + [else + ;; Otherwise, we have no idea how to treat this as a token. + ((current-tokenizer-error-handler) 'unknown-type (format "~a" next-token) + #f #f #f #f)]))) permissive-tokenizer) - + ;; positions->srcloc: position position -> (list source line column offset span) ;; Given two positions, returns a srcloc-like structure, where srcloc is the value @@ -140,19 +140,25 @@ (lex:position-offset start-pos)) #f))) - +#| +MB: the next three functions control the parse tree output. +This would be the place to check a syntax property for hiding. +|# ;; We create a syntax using read-syntax; by definition, it should have the ;; original? property set to #t, which we then copy over to syntaxes constructed ;; with atomic-datum->syntax and rule-components->syntax. (define stx-with-original?-property - (read-syntax #f (open-input-string "original"))) + (read-syntax #f (open-input-string "meaningless-string"))) +(define elided (gensym)) ;; atomic-datum->syntax: datum position position ;; Helper that does the ugly work in wrapping a datum into a syntax ;; with source location. -(define (atomic-datum->syntax d start-pos end-pos) - (datum->syntax #f d (positions->srcloc start-pos end-pos) stx-with-original?-property)) +(define (atomic-datum->syntax d start-pos end-pos [hide? #f]) + (if hide? + elided + (datum->syntax #f d (positions->srcloc start-pos end-pos) stx-with-original?-property))) @@ -160,11 +166,10 @@ ;; Creates an stx out of the rule name and its components. ;; The location information of the rule spans that of its components. (define (rule-components->syntax rule-name/false #:srcloc [srcloc #f] . components) - (define flattened-components (apply append components)) + (define flattened-elided-components (filter-not (λ(c) (eq? c elided)) (apply append components))) (datum->syntax #f - (apply append - (list - (datum->syntax #f rule-name/false srcloc stx-with-original?-property)) - components) + (cons + (datum->syntax #f rule-name/false srcloc stx-with-original?-property) + flattened-elided-components) srcloc stx-with-original?-property)) diff --git a/beautiful-racket-ragg/br/ragg/elider/json-elider-toy.rkt b/beautiful-racket-ragg/br/ragg/elider/json-elider-toy.rkt new file mode 100644 index 0000000..fcb1040 --- /dev/null +++ b/beautiful-racket-ragg/br/ragg/elider/json-elider-toy.rkt @@ -0,0 +1,4 @@ +#lang br/ragg + +;; Simple baby example of JSON structure +json: ID <":"> ID diff --git a/beautiful-racket-ragg/br/ragg/elider/json-elider.rkt b/beautiful-racket-ragg/br/ragg/elider/json-elider.rkt index 2d51cc7..ce2c05e 100755 --- a/beautiful-racket-ragg/br/ragg/elider/json-elider.rkt +++ b/beautiful-racket-ragg/br/ragg/elider/json-elider.rkt @@ -1,15 +1,17 @@ #lang br/ragg ;; Simple baby example of JSON structure -json: (number | string +json: number + | string | array - | object) + | object + number: NUMBER string: STRING array: "[" [json ("," json)*] "]" -object: "{" [kvpair ("," kvpair)*] "}" +object: <"{"> [kvpair ("," kvpair)*] <"}"> -kvpair: ID <":"> json +kvpair: <":"> json diff --git a/beautiful-racket-ragg/br/ragg/elider/test-json-elider.rkt b/beautiful-racket-ragg/br/ragg/elider/test-json-elider.rkt index e7ed704..aeaad4f 100755 --- a/beautiful-racket-ragg/br/ragg/elider/test-json-elider.rkt +++ b/beautiful-racket-ragg/br/ragg/elider/test-json-elider.rkt @@ -1,4 +1,4 @@ -#lang racket/base +#lang br (require "json-elider.rkt" br/ragg/support rackunit) @@ -10,15 +10,13 @@ ":" (token 'STRING "'hello world'") "}"))) - '(json (object "{" - (kvpair "message" ":" (json (string "'hello world'"))) - "}"))) + '(json (object (kvpair (json (string "'hello world'")))))) (check-equal? (syntax->datum (parse "[[[{}]],[],[[{}]]]")) - '(json (array #\[ (json (array #\[ (json (array #\[ (json (object #\{ #\})) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json (object #\{ #\})) #\])) #\])) #\]))) + '(json (array #\[ (json (array #\[ (json (array #\[ (json (object)) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json (object )) #\])) #\])) #\]))) diff --git a/beautiful-racket-ragg/br/ragg/rules/stx.rkt b/beautiful-racket-ragg/br/ragg/rules/stx.rkt index 6c68516..0317fcf 100755 --- a/beautiful-racket-ragg/br/ragg/rules/stx.rkt +++ b/beautiful-racket-ragg/br/ragg/rules/stx.rkt @@ -1,4 +1,4 @@ -#lang racket/base +#lang br (require "rule-structs.rkt" parser-tools/lex diff --git a/beautiful-racket-ragg/br/ragg/test/test-all.rkt b/beautiful-racket-ragg/br/ragg/test/test-all.rkt index b989dbb..9a2277d 100755 --- a/beautiful-racket-ragg/br/ragg/test/test-all.rkt +++ b/beautiful-racket-ragg/br/ragg/test/test-all.rkt @@ -1,4 +1,4 @@ -#lang racket/base +#lang br (require "test-0n1.rkt" diff --git a/beautiful-racket-ragg/br/ragg/test/test-parser.rkt b/beautiful-racket-ragg/br/ragg/test/test-parser.rkt index d57b5d4..d284832 100755 --- a/beautiful-racket-ragg/br/ragg/test/test-parser.rkt +++ b/beautiful-racket-ragg/br/ragg/test/test-parser.rkt @@ -1,4 +1,4 @@ -#lang racket/base +#lang br (require rackunit @@ -47,11 +47,11 @@ 1 (pattern-lit (p 8) (p 15) "hello" #f))))) -(check-equal? (grammar-parser (tokenize (open-input-string "expr : ['hello']"))) - (list (rule (p 1) (p 17) +(check-equal? (grammar-parser (tokenize (open-input-string "expr : [<'hello'>]"))) + (list (rule (p 1) (p 19) (lhs-id (p 1) (p 5) "expr" #f) - (pattern-maybe (p 8) (p 17) - (pattern-lit (p 9) (p 16) "hello" #f))))) + (pattern-maybe (p 8) (p 19) + (pattern-lit (p 9) (p 18) "hello" #t))))) (check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH"))) (list (rule (p 1) (p 20) @@ -70,12 +70,12 @@ (list (pattern-token (p 23) (p 26) "BAZ" #f) (pattern-id (p 27) (p 31) "expr" #f)))))))) -(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two three"))) - (list (rule (p 1) (p 21) +(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two "))) + (list (rule (p 1) (p 23) (lhs-id (p 1) (p 5) "expr" #f) - (pattern-seq (p 8) (p 21) (list (pattern-id (p 8) (p 11) "one" #f) + (pattern-seq (p 8) (p 23) (list (pattern-id (p 8) (p 11) "one" #f) (pattern-id (p 12) (p 15) "two" #f) - (pattern-id (p 16) (p 21) "three" #f)))))) + (pattern-id (p 16) (p 23) "three" #t)))))) (check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two three)")))