diff --git a/collects/parser-tools/lex.ss b/collects/parser-tools/lex.ss index 9380884..9538131 100644 --- a/collects/parser-tools/lex.ss +++ b/collects/parser-tools/lex.ss @@ -3,10 +3,12 @@ ;; Provides the syntax used to create lexers and the functions needed to ;; create and use the buffer that the lexer reads from. See doc.txt. - (require-for-syntax "private-lex/util.ss" + (require-for-syntax (lib "define.ss" "syntax") + "private-lex/util.ss" "private-lex/actions.ss" "private-lex/front.ss" "private-lex/unicode-chars.ss") + (require (lib "readerr.ss" "syntax") (lib "cffi.ss" "compiler") @@ -119,6 +121,18 @@ "Form should be (define-lex-abbrevs (name re) ...)" stx)))) + (define-syntax (define-lex-trans stx) + (syntax-case stx () + ((_ name-form body-form) + (let-values (((name body) + (normalize-definition (syntax (define-syntax name-form body-form) #'lambda)))) + #`(define-syntax name (make-lex-trans body)))) + (_ + (raise-syntax-error + #f + "Form should be (define-lex-trans name transformer)" + stx)))) + (define (get-next-state-helper char min max table) (if (>= min max) diff --git a/collects/parser-tools/private-lex/stx.ss b/collects/parser-tools/private-lex/stx.ss index 08f82e7..7171a38 100644 --- a/collects/parser-tools/private-lex/stx.ss +++ b/collects/parser-tools/private-lex/stx.ss @@ -1,98 +1,112 @@ (module stx mzscheme - (require (lib "stx.ss" "syntax") - "util.ss") + (require "util.ss") (provide parse) + + (define (repetition-error stx) + (raise-syntax-error + 'regular-expression + "must be (repetition non-negative-exact-integer non-negative-exact-integer-or-+inf.0 re)" + stx)) - - (define (num-arg-err s expect given) + (define (char-range-error stx) (raise-syntax-error 'regular-expression - (format "operator expects ~a arguments, given ~a" expect given) - s)) - + "must be (char-range char-or-single-char-string char-or-single-char-string)" + stx)) + + (define (char-range-arg c stx) + (cond + ((char? c) (integer->char c)) + ((and (string? c (= string-length c 1))) + (integer->char (string-ref c 0))) + (else + (char-range-error stx)))) ;; parse : syntax-object -> s-re (see re.ss) ;; checks for errors and generates the plain s-exp form for s - (define (parse s) - (let ((s-e (syntax-e s))) - (cond - ((char? s-e) s-e) - ((string? s-e) s-e) - ((symbol? s-e) - (let ((expand (syntax-local-value s (lambda () #f)))) - (unless (lex-abbrev? expand) - (raise-syntax-error 'regular-expression "undefined abbreviation" s)) - (parse (lex-abbrev-abbrev expand)))) - ((stx-null? s) - (raise-syntax-error 'regular-expression "invalid regular expression" s)) - ((stx-list? s) - (let* ((ar (stx->list (stx-cdr s))) - (num-args (length ar))) - (case (syntax-e (stx-car s)) - ((epsilon) '(epsilon)) - ((*) - (unless (= num-args 1) - (num-arg-err s 1 num-args)) - `(* ,(parse (car ar)))) - ((+) - (unless (= num-args 1) - (num-arg-err s 1 num-args)) - `(+ ,(parse (car ar)))) - ((?) - (unless (= num-args 1) - (num-arg-err s 1 num-args)) - `(? ,(parse (car ar)))) - ((~) - (unless (= num-args 1) - (num-arg-err s 1 num-args)) - `(~ ,(parse (car ar)))) - ((:) `(: ,@(map parse ar))) - ((&) `(& ,@(map parse ar))) - ((@) `(@ ,@(map parse ar))) - ((-) - (unless (= num-args 2) - (num-arg-err s 2 num-args)) - (let ((c1 (parse (car ar))) - (c2 (parse (cadr ar)))) - (if (and (or (char? c1) (and (string? c1) (= 1 (string-length c1)))) - (or (char? c2) (and (string? c2) (= 1 (string-length c2))))) - (let ((i1 (char->integer (if (char? c1) c1 (string-ref c1 0)))) - (i2 (char->integer (if (char? c2) c2 (string-ref c2 0))))) - (if (<= i1 i2) - `(- ,c1 ,c2) - (raise-syntax-error - 'regular-expression - (format "first argument ~a does not preceed second argument ~a" - c1 c2) - s))) - (raise-syntax-error - 'regular-expression - (format "expects single character arguments, given ~a and ~a" - (syntax-object->datum (car ar)) - (syntax-object->datum (cadr ar))) - s)))) - ((^) - (let ((res (map parse ar))) - (if (not (andmap pure-char? res)) - (raise-syntax-error - 'regular-expression - (format - "expects single character or character range arguments, given ~a" - (map syntax-object->datum ar)) - s)) - `(^ ,@res))) - (else - (raise-syntax-error - 'regular-expression - "invalid operator" - s))))) - (else - (raise-syntax-error - 'regular-expression - "invalid regular expression" - s))))) - + (define (parse stx) + (syntax-case stx (repetition union intersection complement concatenation + char-range char-complement) + (_ + (identifier? stx) + (let ((expansion (syntax-local-value stx (lambda () #f)))) + (unless (lex-abbrev? expansion) + (raise-syntax-error 'regular-expression + "undefined abbreviation" + stx)) + (parse (lex-abbrev-abbrev expand)))) + (_ + (or (char? (syntax-e stx)) (string? (syntax-e stx))) + (syntax-e stx)) + ((repetition arg ...) + (let ((arg-list (syntax->list (syntax (arg ...))))) + (unless (= 3 (length arg-list)) + (repetition-error stx)) + (let ((lo-val (car arg-list)) + (hi-val (cadr arg-list)) + (re (caddr arg-list))) + (unless (and (exact? lo-val) (integer? lo-val) (> lo-val 0) + (or (and (exact? hi-val) (integer? hi-val) (> hi-val 0)) + (eq? hi-val +inf.0))) + (repetition-error stx)) + `(repetition ,lo-val ,hi-val ,(parse re))))) + ((union re ...) + `(union ,@(map parse (syntax->list (syntax (re ...)))))) + ((intersection re ...) + `(intersection ,@(map parse (syntax->list (syntax (re ...)))))) + ((complement re ...) + (let ((re-list (syntax->list (syntax (re ...))))) + (unless (= 1 (length re-list)) + (raise-syntax-error 'regular-expression + "must be (complement re)" + stx)) + `(complement ,(parse (car re-list))))) + ((concatenation re ...) + `(concatenation ,@(map parse (syntax->list (syntax (re ...)))))) + ((char-range arg ...) + (let ((arg-list (syntax->list (syntax (arg ...))))) + (unless (= 2 (length arg-list)) + (char-range-error stx)) + (let ((i1 (char-range-arg (car arg-list) stx)) + (i2 (char-range-arg (cadr arg-list) stx))) + (if (<= i1 i2) + `(char-range ,(integer->char i1) ,(integer->char i2)) + (raise-syntax-error + 'regular-expression + (format "first argument ~a does not preceed second argument ~a" + (car arg-list) (cdr arg-list)) + stx))))) + ((char-complement arg ...) + (let ((arg-list (syntax->list (syntax (arg ...))))) + (unless (= 1 (length arg-list)) + (raise-syntax-error + 'regular-expression + "must be (char-complement char-set-re)" + stx)) + (let ((parsed (parse (car arg-list)))) + (unless (pure-char? parsed) + (raise-syntax-error + 'regular-expression + "must be (char-complement char-set-re)" + stx)) + `(char-complement ,parsed)))) + ((op form ...) + (identifier? (syntax op)) + (let ((expansion (syntax-local-value (syntax op) (lambda () #f)))) + (unless (lex-trans? expansion) + (raise-syntax-error 'regular-expression + "undefined operator in" + stx)) + (parse ((lex-trans-f expansion) stx)))) + (_ + (raise-syntax-error + 'regular-expression + "must be char, string, identifier, or (op args ...)" + stx)))) + + + (define (pure-char? s-re) (cond ((char? s-re) #t) @@ -100,8 +114,8 @@ ((list? s-re) (let ((op (car s-re))) (case op - ((: ^) (andmap pure-char? (cdr s-re))) - ((-) #t) + ((union intersection) (andmap pure-char? (cdr s-re))) + ((char-range char-complement) #t) (else #f)))) (else #f))) diff --git a/collects/parser-tools/private-lex/util.ss b/collects/parser-tools/private-lex/util.ss index 721ce0e..13878b0 100644 --- a/collects/parser-tools/private-lex/util.ss +++ b/collects/parser-tools/private-lex/util.ss @@ -4,6 +4,7 @@ (provide (all-defined)) (define-struct lex-abbrev (abbrev)) + (define-struct lex-trans (f)) #;(define-syntax test-block (syntax-rules ()