Correct location calculations for non-terminals.

The prior code constructed the location of nonterminal maches out of the the state of the stream after parsing. This isn't right for a few reasons: 1. It doesn't get starting location correctly. 2. It doesn't behave when the non-terminal production did not actually consume tokens for its parse. This patch modifies the parsers to also pass along a "last-consumed-token"; it, along with a few other changes, provides the parsers enough information to accurately construct the locations, even when no tokens have been consumed during the parse. We synthesize a sentinel last-consumed-token token to take location from the head of the stream. original commit: 6e21e34ec7c7a3e9cf23a3f24bfafd6155e1f14a
12 years ago · 2fd35df83a
parent 33ebcd7a78
commit 2fd35df83a
1 changed files with 178 additions and 74 deletions
--- a/collects/algol60/cfg-parser.rkt
+++ b/collects/algol60/cfg-parser.rkt
@ -27,6 +27,8 @@
 ;; grammar to tokens specific to this parser. In other words, this
 ;; parser uses `parser' so that it doesn't have to know anything about
 ;; tokens.
+;;
+


 (require parser-tools/yacc
@ -84,19 +86,19 @@
 ;; then after parse-a succeeds once, we parallelize parse-b
 ;; and trying a second result for parse-a.
 (define (parse-and simple-a? parse-a parse-b
-                   stream depth end success-k fail-k 
+                   stream last-consumed-token depth end success-k fail-k 
                   max-depth tasks)
  (letrec ([mk-got-k
            (lambda (success-k fail-k)
-              (lambda (val stream depth max-depth tasks next1-k)
+              (lambda (val stream last-consumed-token depth max-depth tasks next1-k)
                (if simple-a?
-                    (parse-b val stream depth end
+                    (parse-b val stream last-consumed-token depth end
                             (mk-got2-k success-k fail-k next1-k)
                             (mk-fail2-k success-k fail-k next1-k)
                             max-depth tasks)
                    (parallel-or
                     (lambda (success-k fail-k max-depth tasks)
-                       (parse-b val stream depth end
+                       (parse-b val stream last-consumed-token depth end
                                success-k fail-k
                                max-depth tasks))
                     (lambda (success-k fail-k max-depth tasks)
@ -105,8 +107,8 @@
                     success-k fail-k max-depth tasks))))]
           [mk-got2-k
            (lambda (success-k fail-k next1-k)
-              (lambda (val stream depth max-depth tasks next-k)
-                (success-k val stream depth max-depth tasks
+              (lambda (val stream last-consumed-token depth max-depth tasks next-k)
+                (success-k val stream last-consumed-token depth max-depth tasks
                           (lambda (success-k fail-k max-depth tasks)
                             (next-k (mk-got2-k success-k fail-k next1-k)
                                     (mk-fail2-k success-k fail-k next1-k)
@ -118,28 +120,28 @@
                         fail-k
                         max-depth
                         tasks)))])
-    (parse-a stream depth end
+    (parse-a stream last-consumed-token depth end
             (mk-got-k success-k fail-k)
             fail-k
             max-depth tasks)))

 ;; Parallel or for non-terminal alternatives
-(define (parse-parallel-or parse-a parse-b stream depth end success-k fail-k max-depth tasks)
+(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)
  (parallel-or (lambda (success-k fail-k max-depth tasks)
-                 (parse-a stream depth end success-k fail-k max-depth tasks))
+                 (parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks))
               (lambda (success-k fail-k max-depth tasks)
-                 (parse-b stream depth end success-k fail-k max-depth tasks))
+                 (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks))
               success-k fail-k max-depth tasks))

 ;; Generic parallel-or
 (define (parallel-or parse-a parse-b success-k fail-k max-depth tasks)
  (define answer-key (gensym))
  (letrec ([gota-k
-            (lambda (val stream depth max-depth tasks next-k)
+            (lambda (val stream last-consumed-token depth max-depth tasks next-k)
              (report-answer answer-key
                             max-depth
                             tasks
-                             (list val stream depth next-k)))]
+                             (list val stream last-consumed-token depth next-k)))]
           [faila-k
            (lambda (max-depth tasks)
              (report-answer answer-key
@ -166,11 +168,11 @@
                                               max-depth tasks))))])
      (letrec ([mk-got-one
                (lambda (immediate-next? get-nth success-k)
-                  (lambda (val stream depth max-depth tasks next-k)
+                  (lambda (val stream last-consumed-token depth max-depth tasks next-k)
                    (let ([tasks (if immediate-next?
                                     (queue-next next-k tasks)
                                     tasks)])
-                      (success-k val stream depth max-depth 
+                      (success-k val stream last-consumed-token depth max-depth 
                                 tasks
                                 (lambda (success-k fail-k max-depth tasks)                                     
                                   (let ([tasks (if immediate-next?
@ -194,11 +196,11 @@
 ;; Non-terminal alternatives where the first is "simple" can be done
 ;; sequentially, which is simpler
 (define (parse-or parse-a parse-b
-                  stream depth end success-k fail-k max-depth tasks)
+                  stream last-consumed-token depth end success-k fail-k max-depth tasks)
  (letrec ([mk-got-k
            (lambda (success-k fail-k)
-              (lambda (val stream depth max-depth tasks next-k)
-                (success-k val stream depth
+              (lambda (val stream last-consumed-token depth max-depth tasks next-k)
+                (success-k val stream last-consumed-token depth
                           max-depth tasks
                           (lambda (success-k fail-k max-depth tasks)
                             (next-k (mk-got-k success-k fail-k)
@ -207,8 +209,8 @@
           [mk-fail-k
            (lambda (success-k fail-k)
              (lambda (max-depth tasks)
-                (parse-b stream depth end success-k fail-k max-depth tasks)))])
-    (parse-a stream depth end
+                (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))])
+    (parse-a stream last-consumed-token depth end
             (mk-got-k success-k fail-k)
             (mk-fail-k success-k fail-k)
             max-depth tasks)))
@ -265,8 +267,8 @@
                  (if val
                      (if (null? val)
                          (fail-k max-depth tasks)
-                          (let-values ([(val stream depth next-k) (apply values val)])
-                            (success-k val stream depth max-depth tasks next-k)))
+                          (let-values ([(val stream last-consumed-token depth next-k) (apply values val)])
+                            (success-k val stream last-consumed-token depth max-depth tasks next-k)))
                      (deadlock-k max-depth tasks))))])
    (if multi?
        (hash-set! (tasks-multi-waits tasks) answer-key
@ -334,7 +336,7 @@
  (let loop ([pat pat]
             [pos 1])
    (if (null? pat)
-        #`(success-k #,handle stream depth max-depth tasks 
+        #`(success-k #,handle stream last-consumed-token depth max-depth tasks 
                     (lambda (success-k fail-k max-depth tasks)
                       (fail-k max-depth tasks)))
        (let ([id (datum->syntax (car pat)
@ -354,14 +356,21 @@
                    (or (not l)
                        (andmap values (caddr l))))
                #,(car pat)
-                (lambda (#,id stream depth end success-k fail-k max-depth tasks)
-                  (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'(and (pair? stream) (car stream)))]
-                               [#,id-end-pos (at-tok-pos #'tok-end #'(and (pair? stream) (car stream)))]
+                (let ([original-stream stream])
+                  (lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks)
+                    (let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream)
+                                                                   tok-end
+                                                                   tok-start)
+                                                             #'(if (eq? original-stream stream)
+                                                                   last-consumed-token
+                                                                   (and (pair? original-stream) 
+                                                                        (car original-stream))))]
+                                 [#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]
                                 #,@(if n-end-pos
-                                      #`([#,n-end-pos (at-tok-pos #'tok-end #'(and (pair? stream) (car stream)))])
+                                        #`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)])
                                        null))
-                    #,(loop (cdr pat) (add1 pos))))
-                stream depth 
+                      #,(loop (cdr pat) (add1 pos)))))
+                stream last-consumed-token depth 
                #,(let ([cnt (apply +
                                    (map (lambda (item)
                                           (cond
@ -378,6 +387,7 @@
                          (eq? '#,tok-id (tok-name (car stream))))
                     (let* ([stream-a (car stream)]
                            [#,id (tok-val stream-a)]
+                            [last-consumed-token (car stream)]
                            [stream (cdr stream)]
                            [depth (add1 depth)])
                       (let ([max-depth (max max-depth depth)])
@ -396,7 +406,7 @@
 ;; The cache maps nontermial+startingpos+iteration to a result, where
 ;; the iteration is 0 for the first match attempt, 1 for the second,
 ;; etc.
-(define (parse-nt/share key min-cnt init-tokens stream depth end max-depth tasks success-k fail-k k)
+(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k)
  (if (and (positive? min-cnt)
           (pair? stream)
           (not (memq (tok-name (car stream)) init-tokens)))
@ -431,7 +441,7 @@
             (let result-loop ([max-depth max-depth][tasks tasks][k k])
               (letrec ([orig-stream stream]
                        [new-got-k
-                         (lambda (val stream depth max-depth tasks next-k)
+                         (lambda (val stream last-consumed-token depth max-depth tasks next-k)
                           ;; Check whether we already have a result that consumed the same amount:
                           (let ([result-key (vector #f key old-depth depth)])
                             (cond
@ -458,13 +468,13 @@
                                  (hash-set! (tasks-cache tasks) result-key #t)
                                  (hash-set! (tasks-cache tasks) table-key
                                             (lambda (success-k fail-k max-depth tasks)
-                                                     (success-k val stream depth max-depth tasks next-k)))
+                                               (success-k val stream last-consumed-token depth max-depth tasks next-k)))
                                  (report-answer-all answer-key
                                                     max-depth
                                                     tasks
-                                                     (list val stream depth next-k)
+                                                     (list val stream last-consumed-token depth next-k)
                                                     (lambda (max-depth tasks)
-                                                       (success-k val stream depth max-depth tasks next-k))))])))]
+                                                       (success-k val stream last-consumed-token depth max-depth tasks next-k))))])))]
                        [new-fail-k
                         (lambda (max-depth tasks)
                           #;(printf "Failure ~a\n" table-key)
@ -483,7 +493,7 @@
  (syntax-case stx ()
    [(_ clause ...)
     (let ([clauses (syntax->list #'(clause ...))])
-       (let-values ([(start grammar cfg-error parser-clauses)
+       (let-values ([(start grammar cfg-error parser-clauses src-pos?)
                     (let ([all-toks (apply
                                      append
                                      (map (lambda (clause)
@ -524,7 +534,8 @@
                             (values cfg-start
                                     cfg-grammar
                                     cfg-error
-                                     (reverse parser-clauses))
+                                     (reverse parser-clauses)
+                                     src-pos?)
                             (syntax-case (car clauses) (start error grammar src-pos)
                               [(start tok)
                                (loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)]
@ -647,9 +658,9 @@
                                               (define info (bound-identifier-mapping-get nts nt))
                                               (list nt
                                                     #`(let ([key (gensym '#,nt)])
-                                                         (lambda (stream depth end success-k fail-k max-depth tasks)
+                                                         (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
                                                           (parse-nt/share
-                                                            key #,(car info) '#,(cadr info) stream depth end
+                                                            key #,(car info) '#,(cadr info) stream last-consumed-token depth end
                                                            max-depth tasks
                                                            success-k fail-k
                                                            (lambda (end max-depth tasks success-k fail-k)
@ -663,18 +674,18 @@
                                                                                   (car simple?s))
                                                                               #'parse-or
                                                                               #'parse-parallel-or)
-                                                                         (lambda (stream depth end success-k fail-k max-depth tasks)
+                                                                         (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
                                                                           #,(build-match nts
                                                                                          toks 
                                                                                          (car pats)
                                                                                          (car handles)
                                                                                          (car $ctxs)))
-                                                                         (lambda (stream depth end success-k fail-k max-depth tasks)
+                                                                         (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
                                                                           #,(loop (cdr pats)
                                                                                   (cdr handles)
                                                                                   (cdr $ctxs)
                                                                                   (cdr simple?s)))
-                                                                         stream depth end success-k fail-k max-depth tasks)))))))))
+                                                                         stream last-consumed-token depth end success-k fail-k max-depth tasks)))))))))
                                             nt-ids
                                             patss
                                             (syntax->list #'(((begin handle0 handle ...) ...) ...))
@ -728,7 +739,7 @@
               (lambda (get-tok)
                 (let ([tok-list (orig-parse get-tok)])
                   (letrec ([success-k
-                             (lambda (val stream depth max-depth tasks next) 
+                             (lambda (val stream last-consumed-token depth max-depth tasks next) 
                               (if (null? stream)
                                   val
                                   (next success-k fail-k max-depth tasks)))]
@ -746,18 +757,87 @@
                                             'cfg-parse
                                             "failed at ~a" 
                                             (tok-val bad-tok)))))])
-                     (#,start tok-list 0 
+                     (#,start tok-list
+                              ;; we simulate a token at the very beginning with zero width
+                              ;; for use with the position-generating code (*-start-pos, *-end-pos).
+                              (if (null? tok-list)
+                                  (tok #f #f #f
+                                       (position 1
+                                                 #,(if src-pos? #'1 #'#f) 
+                                                 #,(if src-pos? #'0 #'#f))
+                                       (position 1 
+                                                 #,(if src-pos? #'1 #'#f)
+                                                 #,(if src-pos? #'0 #'#f)))
+                                  (tok (tok-name (car tok-list))
+                                       (tok-orig-name (car tok-list))
+                                       (tok-val (car tok-list))
+                                       (tok-start (car tok-list))
+                                       (tok-start (car tok-list))))
+                              0 
                              (length tok-list)
                              success-k
                              fail-k
-                              0 (make-tasks null null 
+                              0 
+                              (make-tasks null null 
                                          (make-hasheq) (make-hasheq)
                                          (make-hash) #t)))))))))]))


 (module* test racket/base
  (require (submod "..")
-           parser-tools/lex)
+           parser-tools/lex
+           racket/block
+           rackunit)
+
+  ;; Test: parsing regular expressions.
+  ;; Here is a test case on locations:
+  (block
+   (define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF))
+   (define lex (lexer-src-pos ["|" (token-OR lexeme)]
+                              ["^" (token-ANCHOR lexeme)]
+                              ["*" (token-STAR lexeme)]
+                              [(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)]
+                              ["(" (token-LPAREN lexeme)]
+                              [")" (token-RPAREN lexeme)]
+                              [whitespace (return-without-pos (lex input-port))]
+                              [(eof) (token-EOF 'eof)]))
+   (define -parse (cfg-parser
+                   (tokens regexp-tokens)
+                   (start top)
+                   (end EOF)
+                   (src-pos)
+                   (grammar [top [(maybe-anchor regexp)
+                                  (cond [$1 
+                                         `(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))]
+                                        [else
+                                         `(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]]
+                            [maybe-anchor [(ANCHOR) #t]
+                                          [() #f]]
+                            [regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))]
+                                    [(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))]
+                                    [(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))]
+                                    [(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]])))
+   (define (pos->sexp pos)
+     (position-offset pos))
+   
+   (define (parse s)
+     (define ip (open-input-string s))
+     (port-count-lines! ip)
+     (-parse (lambda () (lex ip))))
+   
+   (check-equal? (parse "abc")
+                 '(unanchored (lit "abc" 1 4) 1 4))
+   (check-equal? (parse "a | (b*) | c")
+                 '(unanchored (or (or (lit "a" 1 2)
+                                      (group (star (lit "b" 6 7) 6 8) 5 9)
+                                      1 9)
+                                  (lit "c" 12 13)
+                                  1 13)
+                              1 13)))
+  
+  
+  
+  
  
  ;; Tests used during development
  (define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF))
@ -772,7 +852,6 @@
     [whitespace (lex input-port)]
     [(eof) (token-EOF 'eof)]))
  
-  
  (define parse
    (cfg-parser
     (tokens non-terminals)
@ -792,7 +871,6 @@
                        [(<random> PLUS) (add1 $1)]
                        [(<random> PLUS) (add1 $1)]])))
  
-  (define (result)
  (let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**" 
                              #;"+*|+**|-" #;"-|-*|-|-*"
                              #;"-|-*|-|-**|-|-*|-|-**"
@ -801,5 +879,31 @@
               -|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****"
                              ;; This one fails:
                              #;"+*")])
-      (time (parse (lambda () (lex p))))))
-  (result))
+    (check-equal? (parse (lambda () (lex p)))
+                  '((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
+                        ||
+                        (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
+                       .
+                       *)
+                      ||
+                      (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
+                        ||
+                        (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
+                       .
+                       *))
+                     .
+                     *)
+                    ||
+                    (((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
+                        ||
+                        (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
+                       .
+                       *)
+                      ||
+                      (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
+                        ||
+                        (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
+                       .
+                       *))
+                     .
+                     *)))))