hiding works for literals and tokens ; tests pass ; resume in ids

dev-elider
Matthew Butterick 8 years ago
parent 44d25659de
commit 2d44750221

@ -1,4 +1,4 @@
#lang racket/base #lang br
(require (for-template racket/base) (require (for-template racket/base)
racket/list racket/list
@ -38,9 +38,25 @@
;; We flatten the rules so we can use the yacc-style ruleset that parser-tools ;; We flatten the rules so we can use the yacc-style ruleset that parser-tools
;; supports. ;; supports.
#|
MB: `rules` still carries 'hide syntax property
|#
#;(report flattened-rules)
#|
MB: `flattened-rules` still carries 'hide syntax property
|#
(define flattened-rules (flatten-rules rules)) (define flattened-rules (flatten-rules rules))
#;(report flattened-rules)
(define generated-rule-codes (map flat-rule->yacc-rule flattened-rules)) (define generated-rule-codes (map flat-rule->yacc-rule flattened-rules))
#|
MB: `generated-rule-codes` loses the 'hide syntax property
|#
#;(report generated-rule-codes)
;; The first rule, by default, is the start rule. ;; The first rule, by default, is the start rule.
(define rule-ids (for/list ([a-rule (in-list rules)]) (define rule-ids (for/list ([a-rule (in-list rules)])
@ -163,6 +179,7 @@
;; stx :== (name (U tokens rule-stx) ...) ;; stx :== (name (U tokens rule-stx) ...)
;; ;;
(define (flat-rule->yacc-rule a-flat-rule) (define (flat-rule->yacc-rule a-flat-rule)
#;(report a-flat-rule)
(syntax-case a-flat-rule () (syntax-case a-flat-rule ()
[(rule-type origin name clauses ...) [(rule-type origin name clauses ...)
(begin (begin
@ -206,6 +223,7 @@
[$X-end-pos [$X-end-pos
(format-id translated-pattern "$~a-end-pos" pos)]) (format-id translated-pattern "$~a-end-pos" pos)])
(syntax-case primitive-pattern (id lit token inferred-id) (syntax-case primitive-pattern (id lit token inferred-id)
;; When a rule usage is inferred, the value of $X is a syntax object ;; When a rule usage is inferred, the value of $X is a syntax object
;; whose head is the name of the inferred rule . We strip that out, ;; whose head is the name of the inferred rule . We strip that out,
;; leaving the residue to be absorbed. ;; leaving the residue to be absorbed.
@ -214,11 +232,13 @@
[(inferred-rule-name . rest) [(inferred-rule-name . rest)
(syntax->list #'rest)])] (syntax->list #'rest)])]
[(id val) [(id val)
#`(list $X)] #'(list $X)]
;; move the 'hide syntax property into the translated-action
;; because syntax gets datum-ized
[(lit val) [(lit val)
#`(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))] #`(list (atomic-datum->syntax $X $X-start-pos $X-end-pos #,(syntax-property primitive-pattern 'hide)))]
[(token val) [(token val)
#`(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))])))) #`(list (atomic-datum->syntax $X $X-start-pos $X-end-pos #,(syntax-property primitive-pattern 'hide)))]))))
(define whole-rule-loc (define whole-rule-loc
(if (empty? translated-patterns) (if (empty? translated-patterns)

@ -1,4 +1,4 @@
#lang racket/base #lang br
(require racket/match (require racket/match
racket/list racket/list
@ -45,86 +45,86 @@
;; FIXME: clean up code. ;; FIXME: clean up code.
(define (make-permissive-tokenizer tokenizer token-type-hash) (define (make-permissive-tokenizer tokenizer token-type-hash)
(define tokenizer-thunk (cond (define tokenizer-thunk (cond
[(sequence? tokenizer) [(sequence? tokenizer)
(sequence->generator tokenizer)] (sequence->generator tokenizer)]
[(procedure? tokenizer) [(procedure? tokenizer)
tokenizer])) tokenizer]))
;; lookup: symbol any pos pos -> position-token ;; lookup: symbol any pos pos -> position-token
(define (lookup type val start-pos end-pos) (define (lookup type val start-pos end-pos)
(lex:position-token (lex:position-token
((hash-ref token-type-hash type ((hash-ref token-type-hash type
(lambda () (lambda ()
((current-tokenizer-error-handler) (format "~a" type) val ((current-tokenizer-error-handler) (format "~a" type) val
(lex:position-offset start-pos) (lex:position-offset start-pos)
(lex:position-line start-pos) (lex:position-line start-pos)
(lex:position-col start-pos) (lex:position-col start-pos)
(and (number? (lex:position-offset start-pos)) (and (number? (lex:position-offset start-pos))
(number? (lex:position-offset end-pos)) (number? (lex:position-offset end-pos))
(- (lex:position-offset end-pos) (- (lex:position-offset end-pos)
(lex:position-offset start-pos)))))) (lex:position-offset start-pos))))))
val) val)
start-pos end-pos)) start-pos end-pos))
(define (permissive-tokenizer) (define (permissive-tokenizer)
(define next-token (tokenizer-thunk)) (define next-token (tokenizer-thunk))
(let loop ([next-token next-token]) (let loop ([next-token next-token])
(match next-token (match next-token
[(or (? eof-object?) (? void?)) [(or (? eof-object?) (? void?))
(lookup 'EOF eof no-position no-position)] (lookup 'EOF eof no-position no-position)]
[(? symbol?) [(? symbol?)
(lookup next-token next-token no-position no-position)] (lookup next-token next-token no-position no-position)]
[(? string?) [(? string?)
(lookup (string->symbol next-token) next-token no-position no-position)] (lookup (string->symbol next-token) next-token no-position no-position)]
[(? char?) [(? char?)
(lookup (string->symbol (string next-token)) next-token no-position no-position)] (lookup (string->symbol (string next-token)) next-token no-position no-position)]
;; Compatibility ;; Compatibility
[(? lex:token?) [(? lex:token?)
(loop (token (lex:token-name next-token) (loop (token (lex:token-name next-token)
(lex:token-value next-token)))] (lex:token-value next-token)))]
[(token-struct type val offset line column span skip?) [(token-struct type val offset line column span skip?)
(cond [skip? (cond [skip?
;; skip whitespace, and just tokenize again. ;; skip whitespace, and just tokenize again.
(permissive-tokenizer)] (permissive-tokenizer)]
[(hash-has-key? token-type-hash type) [(hash-has-key? token-type-hash type)
(define start-pos (lex:position offset line column)) (define start-pos (lex:position offset line column))
;; try to synthesize a consistent end position. ;; try to synthesize a consistent end position.
(define end-pos (lex:position (if (and (number? offset) (number? span)) (define end-pos (lex:position (if (and (number? offset) (number? span))
(+ offset span) (+ offset span)
offset) offset)
line line
(if (and (number? column) (number? span)) (if (and (number? column) (number? span))
(+ column span) (+ column span)
column))) column)))
(lookup type val start-pos end-pos)] (lookup type val start-pos end-pos)]
[else [else
;; We ran into a token of unrecognized type. Let's raise an appropriate error. ;; We ran into a token of unrecognized type. Let's raise an appropriate error.
((current-tokenizer-error-handler) type val ((current-tokenizer-error-handler) type val
offset line column span)])] offset line column span)])]
[(lex:position-token t s e) [(lex:position-token t s e)
(define a-position-token (loop t)) (define a-position-token (loop t))
(lex:position-token (lex:position-token-token a-position-token) (lex:position-token (lex:position-token-token a-position-token)
(if (no-position? (lex:position-token-start-pos a-position-token)) (if (no-position? (lex:position-token-start-pos a-position-token))
s s
(lex:position-token-start-pos a-position-token)) (lex:position-token-start-pos a-position-token))
(if (no-position? (lex:position-token-end-pos a-position-token)) (if (no-position? (lex:position-token-end-pos a-position-token))
e e
(lex:position-token-end-pos a-position-token)))] (lex:position-token-end-pos a-position-token)))]
[else [else
;; Otherwise, we have no idea how to treat this as a token. ;; Otherwise, we have no idea how to treat this as a token.
((current-tokenizer-error-handler) 'unknown-type (format "~a" next-token) ((current-tokenizer-error-handler) 'unknown-type (format "~a" next-token)
#f #f #f #f)]))) #f #f #f #f)])))
permissive-tokenizer) permissive-tokenizer)
;; positions->srcloc: position position -> (list source line column offset span) ;; positions->srcloc: position position -> (list source line column offset span)
;; Given two positions, returns a srcloc-like structure, where srcloc is the value ;; Given two positions, returns a srcloc-like structure, where srcloc is the value
@ -140,19 +140,25 @@
(lex:position-offset start-pos)) (lex:position-offset start-pos))
#f))) #f)))
#|
MB: the next three functions control the parse tree output.
This would be the place to check a syntax property for hiding.
|#
;; We create a syntax using read-syntax; by definition, it should have the ;; We create a syntax using read-syntax; by definition, it should have the
;; original? property set to #t, which we then copy over to syntaxes constructed ;; original? property set to #t, which we then copy over to syntaxes constructed
;; with atomic-datum->syntax and rule-components->syntax. ;; with atomic-datum->syntax and rule-components->syntax.
(define stx-with-original?-property (define stx-with-original?-property
(read-syntax #f (open-input-string "original"))) (read-syntax #f (open-input-string "meaningless-string")))
(define elided (gensym))
;; atomic-datum->syntax: datum position position ;; atomic-datum->syntax: datum position position
;; Helper that does the ugly work in wrapping a datum into a syntax ;; Helper that does the ugly work in wrapping a datum into a syntax
;; with source location. ;; with source location.
(define (atomic-datum->syntax d start-pos end-pos) (define (atomic-datum->syntax d start-pos end-pos [hide? #f])
(datum->syntax #f d (positions->srcloc start-pos end-pos) stx-with-original?-property)) (if hide?
elided
(datum->syntax #f d (positions->srcloc start-pos end-pos) stx-with-original?-property)))
@ -160,11 +166,10 @@
;; Creates an stx out of the rule name and its components. ;; Creates an stx out of the rule name and its components.
;; The location information of the rule spans that of its components. ;; The location information of the rule spans that of its components.
(define (rule-components->syntax rule-name/false #:srcloc [srcloc #f] . components) (define (rule-components->syntax rule-name/false #:srcloc [srcloc #f] . components)
(define flattened-components (apply append components)) (define flattened-elided-components (filter-not (λ(c) (eq? c elided)) (apply append components)))
(datum->syntax #f (datum->syntax #f
(apply append (cons
(list (datum->syntax #f rule-name/false srcloc stx-with-original?-property)
(datum->syntax #f rule-name/false srcloc stx-with-original?-property)) flattened-elided-components)
components)
srcloc srcloc
stx-with-original?-property)) stx-with-original?-property))

@ -0,0 +1,4 @@
#lang br/ragg
;; Simple baby example of JSON structure
json: ID <":"> ID

@ -1,15 +1,17 @@
#lang br/ragg #lang br/ragg
;; Simple baby example of JSON structure ;; Simple baby example of JSON structure
json: (number | string json: number
| string
| array | array
| object) | object
number: NUMBER number: NUMBER
string: STRING string: STRING
array: "[" [json ("," json)*] "]" array: "[" [json ("," json)*] "]"
object: "{" [kvpair ("," kvpair)*] "}" object: <"{"> [kvpair ("," kvpair)*] <"}">
kvpair: ID <":"> json kvpair: <ID> <":"> json

@ -1,4 +1,4 @@
#lang racket/base #lang br
(require "json-elider.rkt" (require "json-elider.rkt"
br/ragg/support br/ragg/support
rackunit) rackunit)
@ -10,15 +10,13 @@
":" ":"
(token 'STRING "'hello world'") (token 'STRING "'hello world'")
"}"))) "}")))
'(json (object "{" '(json (object (kvpair (json (string "'hello world'"))))))
(kvpair "message" ":" (json (string "'hello world'")))
"}")))
(check-equal? (check-equal?
(syntax->datum (syntax->datum
(parse "[[[{}]],[],[[{}]]]")) (parse "[[[{}]],[],[[{}]]]"))
'(json (array #\[ (json (array #\[ (json (array #\[ (json (object #\{ #\})) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json (object #\{ #\})) #\])) #\])) #\]))) '(json (array #\[ (json (array #\[ (json (array #\[ (json (object)) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json (object )) #\])) #\])) #\])))

@ -1,4 +1,4 @@
#lang racket/base #lang br
(require "rule-structs.rkt" (require "rule-structs.rkt"
parser-tools/lex parser-tools/lex

@ -1,4 +1,4 @@
#lang racket/base #lang br
(require "test-0n1.rkt" (require "test-0n1.rkt"

@ -1,4 +1,4 @@
#lang racket/base #lang br
(require rackunit (require rackunit
@ -47,11 +47,11 @@
1 1
(pattern-lit (p 8) (p 15) "hello" #f))))) (pattern-lit (p 8) (p 15) "hello" #f)))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : ['hello']"))) (check-equal? (grammar-parser (tokenize (open-input-string "expr : [<'hello'>]")))
(list (rule (p 1) (p 17) (list (rule (p 1) (p 19)
(lhs-id (p 1) (p 5) "expr" #f) (lhs-id (p 1) (p 5) "expr" #f)
(pattern-maybe (p 8) (p 17) (pattern-maybe (p 8) (p 19)
(pattern-lit (p 9) (p 16) "hello" #f))))) (pattern-lit (p 9) (p 18) "hello" #t)))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH"))) (check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH")))
(list (rule (p 1) (p 20) (list (rule (p 1) (p 20)
@ -70,12 +70,12 @@
(list (pattern-token (p 23) (p 26) "BAZ" #f) (list (pattern-token (p 23) (p 26) "BAZ" #f)
(pattern-id (p 27) (p 31) "expr" #f)))))))) (pattern-id (p 27) (p 31) "expr" #f))))))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two three"))) (check-equal? (grammar-parser (tokenize (open-input-string "expr : one two <three>")))
(list (rule (p 1) (p 21) (list (rule (p 1) (p 23)
(lhs-id (p 1) (p 5) "expr" #f) (lhs-id (p 1) (p 5) "expr" #f)
(pattern-seq (p 8) (p 21) (list (pattern-id (p 8) (p 11) "one" #f) (pattern-seq (p 8) (p 23) (list (pattern-id (p 8) (p 11) "one" #f)
(pattern-id (p 12) (p 15) "two" #f) (pattern-id (p 12) (p 15) "two" #f)
(pattern-id (p 16) (p 21) "three" #f)))))) (pattern-id (p 16) (p 23) "three" #t))))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two three)"))) (check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two three)")))

Loading…
Cancel
Save