From c0928f7478e7b7efe3ce05a44d9ad0a3dff89aa7 Mon Sep 17 00:00:00 2001 From: Scott Owens Date: Fri, 23 Apr 2004 23:35:40 +0000 Subject: [PATCH] *** empty log message *** original commit: 575f1c847171c0e98ac471f36ad0b51a6df19e03 --- collects/parser-tools/lex.ss | 47 ++++++++++++- .../parser-tools/private-lex/unicode-chars.ss | 69 +++++++++++++++++++ 2 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 collects/parser-tools/private-lex/unicode-chars.ss diff --git a/collects/parser-tools/lex.ss b/collects/parser-tools/lex.ss index 04bba9d..3ffcf53 100644 --- a/collects/parser-tools/lex.ss +++ b/collects/parser-tools/lex.ss @@ -5,7 +5,8 @@ (require-for-syntax "private-lex/util.ss" "private-lex/actions.ss" - "private-lex/front.ss") + "private-lex/front.ss" + "private-lex/unicode-chars.ss") (require (lib "readerr.ss" "syntax") (lib "cffi.ss" "compiler") @@ -13,7 +14,9 @@ (provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs position-offset position-line position-col position? - define-tokens define-empty-tokens token-name token-value token? file-path) + define-tokens define-empty-tokens token-name token-value token? file-path + any-char any-string nothing alphabetic lower-case upper-case title-case + numeric symbolic punctuation graphic whitespace blank iso-control) (define file-path (make-parameter #f)) @@ -249,5 +252,45 @@ (define (get-position ip) (let-values (((line col off) (port-next-location ip))) (make-position off line col))) + + + (define-syntax (create-unicode-abbrevs stx) + (syntax-case stx () + ((_ ctxt) + (with-syntax (((ranges ...) (map (lambda (range) + `(: ,@(map (lambda (x) `(- ,(integer->char (car x)) + ,(integer->char (cdr x)))) + range))) + (list alphabetic-ranges + lower-case-ranges + upper-case-ranges + title-case-ranges + numeric-ranges + symbolic-ranges + punctuation-ranges + graphic-ranges + whitespace-ranges + blank-ranges + iso-control-ranges))) + ((names ...) (map (lambda (sym) + (datum->syntax-object (syntax ctxt) sym #f)) + '(alphabetic + lower-case + upper-case + title-case + numeric + symbolic + punctuation + graphic + whitespace + blank + iso-control)))) + (syntax (define-lex-abbrevs (names ranges) ...)))))) + + (define-lex-abbrev any-char (^)) + (define-lex-abbrev any-string (&)) + (define-lex-abbrev nothing (:)) + (create-unicode-abbrevs #'here) + ) diff --git a/collects/parser-tools/private-lex/unicode-chars.ss b/collects/parser-tools/private-lex/unicode-chars.ss new file mode 100644 index 0000000..26379bc --- /dev/null +++ b/collects/parser-tools/private-lex/unicode-chars.ss @@ -0,0 +1,69 @@ +(module unicode-chars mzscheme + (require "util.ss") + + (provide (all-defined)) + + ;; mapped-chars : (listof (list nat nat bool)) + (define mapped-chars (make-known-char-range-list)) + + + ;; gat-chars-for-x : (nat -> bool) (listof (list nat nat bool)) -> (listof (cons nat nat)) + (define (get-chars-for char-x? mapped-chars) + (cond + ((null? mapped-chars) null) + (else + (let* ((range (car mapped-chars)) + (low (car range)) + (high (cadr range)) + (x (char-x? low))) + (cond + ((caddr range) + (if x + (cons (cons low high) + (get-chars-for char-x? (cdr mapped-chars))) + (get-chars-for char-x? (cdr mapped-chars)))) + (else + (let loop ((range-start low) + (i (car range)) + (parity x)) + (cond + ((> i high) + (if parity + (cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars))) + (get-chars-for char-x? (cdr mapped-chars)))) + ((eq? parity (char-x? i)) + (loop range-start (add1 i) parity)) + (parity + (cons (cons range-start (sub1 i)) (loop i (add1 i) #f))) + (else + (loop i (add1 i) #t)))))))))) + + + (define (compute-ranges x?) + (get-chars-for (lambda (x) (x? (integer->char x))) mapped-chars)) + + (define alphabetic-ranges (compute-ranges char-alphabetic?)) ;; 325 + (define lower-case-ranges (compute-ranges char-lower-case?)) ;; 405 + (define upper-case-ranges (compute-ranges char-upper-case?)) ;; 380 + (define title-case-ranges (compute-ranges char-title-case?)) ;; 10 + (define numeric-ranges (compute-ranges char-numeric?)) ;; 47 + (define symbolic-ranges (compute-ranges char-symbolic?)) ;; 153 + (define punctuation-ranges (compute-ranges char-punctuation?)) ;; 86 + (define graphic-ranges (compute-ranges char-graphic?)) ;; 401 + (define whitespace-ranges (compute-ranges char-whitespace?)) ;; 10 + (define blank-ranges (compute-ranges char-blank?)) ;; 9 + #;(define hexadecimal-ranges (compute-ranges char-hexadecimal?)) + (define iso-control-ranges (compute-ranges char-iso-control?)) ;; 2 + + + + + (test-block () + ((get-chars-for odd? '()) '()) + ((get-chars-for odd? '((1 4 #f) (8 13 #f))) '((1 . 1) (3 . 3) (9 . 9) (11 . 11) (13 . 13))) + ((get-chars-for (lambda (x) + (odd? (quotient x 10))) + '((1 5 #t) (17 19 #t) (21 51 #f))) + '((17 . 19) (30 . 39) (50 . 51)))) + + ) \ No newline at end of file