*** empty log message ***

original commit: 575f1c847171c0e98ac471f36ad0b51a6df19e03
tokens
Scott Owens 20 years ago
parent a1bd7de973
commit c0928f7478

@ -5,7 +5,8 @@
(require-for-syntax "private-lex/util.ss"
"private-lex/actions.ss"
"private-lex/front.ss")
"private-lex/front.ss"
"private-lex/unicode-chars.ss")
(require (lib "readerr.ss" "syntax")
(lib "cffi.ss" "compiler")
@ -13,7 +14,9 @@
(provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs
position-offset position-line position-col position?
define-tokens define-empty-tokens token-name token-value token? file-path)
define-tokens define-empty-tokens token-name token-value token? file-path
any-char any-string nothing alphabetic lower-case upper-case title-case
numeric symbolic punctuation graphic whitespace blank iso-control)
(define file-path (make-parameter #f))
@ -249,5 +252,45 @@
(define (get-position ip)
(let-values (((line col off) (port-next-location ip)))
(make-position off line col)))
(define-syntax (create-unicode-abbrevs stx)
(syntax-case stx ()
((_ ctxt)
(with-syntax (((ranges ...) (map (lambda (range)
`(: ,@(map (lambda (x) `(- ,(integer->char (car x))
,(integer->char (cdr x))))
range)))
(list alphabetic-ranges
lower-case-ranges
upper-case-ranges
title-case-ranges
numeric-ranges
symbolic-ranges
punctuation-ranges
graphic-ranges
whitespace-ranges
blank-ranges
iso-control-ranges)))
((names ...) (map (lambda (sym)
(datum->syntax-object (syntax ctxt) sym #f))
'(alphabetic
lower-case
upper-case
title-case
numeric
symbolic
punctuation
graphic
whitespace
blank
iso-control))))
(syntax (define-lex-abbrevs (names ranges) ...))))))
(define-lex-abbrev any-char (^))
(define-lex-abbrev any-string (&))
(define-lex-abbrev nothing (:))
(create-unicode-abbrevs #'here)
)

@ -0,0 +1,69 @@
(module unicode-chars mzscheme
(require "util.ss")
(provide (all-defined))
;; mapped-chars : (listof (list nat nat bool))
(define mapped-chars (make-known-char-range-list))
;; gat-chars-for-x : (nat -> bool) (listof (list nat nat bool)) -> (listof (cons nat nat))
(define (get-chars-for char-x? mapped-chars)
(cond
((null? mapped-chars) null)
(else
(let* ((range (car mapped-chars))
(low (car range))
(high (cadr range))
(x (char-x? low)))
(cond
((caddr range)
(if x
(cons (cons low high)
(get-chars-for char-x? (cdr mapped-chars)))
(get-chars-for char-x? (cdr mapped-chars))))
(else
(let loop ((range-start low)
(i (car range))
(parity x))
(cond
((> i high)
(if parity
(cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars)))
(get-chars-for char-x? (cdr mapped-chars))))
((eq? parity (char-x? i))
(loop range-start (add1 i) parity))
(parity
(cons (cons range-start (sub1 i)) (loop i (add1 i) #f)))
(else
(loop i (add1 i) #t))))))))))
(define (compute-ranges x?)
(get-chars-for (lambda (x) (x? (integer->char x))) mapped-chars))
(define alphabetic-ranges (compute-ranges char-alphabetic?)) ;; 325
(define lower-case-ranges (compute-ranges char-lower-case?)) ;; 405
(define upper-case-ranges (compute-ranges char-upper-case?)) ;; 380
(define title-case-ranges (compute-ranges char-title-case?)) ;; 10
(define numeric-ranges (compute-ranges char-numeric?)) ;; 47
(define symbolic-ranges (compute-ranges char-symbolic?)) ;; 153
(define punctuation-ranges (compute-ranges char-punctuation?)) ;; 86
(define graphic-ranges (compute-ranges char-graphic?)) ;; 401
(define whitespace-ranges (compute-ranges char-whitespace?)) ;; 10
(define blank-ranges (compute-ranges char-blank?)) ;; 9
#;(define hexadecimal-ranges (compute-ranges char-hexadecimal?))
(define iso-control-ranges (compute-ranges char-iso-control?)) ;; 2
(test-block ()
((get-chars-for odd? '()) '())
((get-chars-for odd? '((1 4 #f) (8 13 #f))) '((1 . 1) (3 . 3) (9 . 9) (11 . 11) (13 . 13)))
((get-chars-for (lambda (x)
(odd? (quotient x 10)))
'((1 5 #t) (17 19 #t) (21 51 #f)))
'((17 . 19) (30 . 39) (50 . 51))))
)
Loading…
Cancel
Save