#lang racket/base ;; Constructs to create and access grammars, the internal ;; representation of the input to the parser generator. (require racket/class yaragg/parser-tools/private-yacc/yacc-helper racket/contract) (provide ;; Things that work on items start-item? item-prod item->string sym-at-dot move-dot-right itemstring non-term? term? non-termbit-vector term-index non-term-index ;; Things that work on precs prec-num prec-assoc grammar% ;; Things that work on productions prod-index prod-prec prod-rhs prod-lhs prod-action (contract-out [item (prod? (or/c #f natural-number/c) . -> . item?)] [term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?)] [non-term (symbol? (or/c #f natural-number/c) . -> . non-term?)] [prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?)] [prod (non-term? (vectorof (or/c non-term? term?)) (or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?)])) ;; Each production has a unique index 0 <= index <= number of productions (struct prod (lhs rhs index prec action) #:mutable) ;; The dot-pos field is the index of the element in the rhs ;; of prod that the dot immediately precedes. ;; Thus 0 <= dot-pos <= (vector-length rhs). (struct item (prod dot-pos) #:transparent) ;; gram-sym = (union term? non-term?) ;; Each term has a unique index 0 <= index < number of terms ;; Each non-term has a unique index 0 <= index < number of non-terms (struct term (sym index prec) #:mutable) (struct non-term (sym index) #:mutable) ;; a precedence declaration. (struct prec (num assoc) #:transparent) ;;---------------------- LR items -------------------------- ;; item bool ;; Lexicographic comparison on two items. (define (item bool ;; The start production always has index 0 (define (start-item? i) (zero? (non-term-index (prod-lhs (item-prod i))))) ;; move-dot-right: LR-item -> LR-item | #f ;; moves the dot to the right in the item, unless it is at its ;; rightmost, then it returns false (define (move-dot-right i) (cond [(= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f] [else (item (item-prod i) (add1 (item-dot-pos i)))])) ;; sym-at-dot: LR-item -> gram-sym | #f ;; returns the symbol after the dot in the item or #f if there is none (define (sym-at-dot i) (define dp (item-dot-pos i)) (define rhs (prod-rhs (item-prod i))) (cond [(= dp (vector-length rhs)) #f] [else (vector-ref rhs dp)])) ;; print-item: LR-item -> (define (item->string it) (define (print-sym i) (define gs (vector-ref (prod-rhs (item-prod it)) i)) (cond ((term? gs) (format "~a " (term-sym gs))) (else (format "~a " (non-term-sym gs))))) (string-append (format "~a -> " (non-term-sym (prod-lhs (item-prod it)))) (let loop ((i 0)) (cond [(= i (vector-length (prod-rhs (item-prod it)))) (if (= i (item-dot-pos it)) ". " "")] [(= i (item-dot-pos it)) (string-append ". " (print-sym i) (loop (add1 i)))] [else (string-append (print-sym i) (loop (add1 i)))])))) ;; --------------------- Grammar Symbols -------------------------- (define (non-termstring gs) (symbol->string (gram-sym-symbol gs))) ;; term-list->bit-vector: term list -> int ;; Creates a number where the nth bit is 1 if the term with index n is in ;; the list, and whose nth bit is 0 otherwise (define (term-list->bit-vector terms) (if (null? terms) 0 (bitwise-ior (arithmetic-shift 1 (term-index (car terms))) (term-list->bit-vector (cdr terms))))) ;; ------------------------- Grammar ------------------------------ ;; prods: production list list ;; where there is one production list per non-term ;; init-prods: production list ;; The productions parsing can start from ;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable (struct grammar (prods init-prods terms non-terms end-terms all-prods num-prods num-terms num-non-terms nt->prods nullable-non-terms)) (define (make-grammar #:prods prods #:init-prods init-prods #:terms terms #:non-terms non-terms #:end-terms end-terms) (define all-prods (apply append prods)) (define num-prods (length all-prods)) (define num-terms (length terms)) (define num-non-terms (length non-terms)) (for ([(nt count) (in-indexed non-terms)]) (set-non-term-index! nt count)) (for ([(t count) (in-indexed terms)]) (set-term-index! t count)) (for ([(prod count) (in-indexed all-prods)]) (set-prod-index! prod count)) ;; indexed by the index of the non-term - contains the list of productions for that non-term (define nt->prods (make-vector (length prods) #f)) (for ([prods (in-list prods)]) (vector-set! nt->prods (non-term-index (prod-lhs (car prods))) prods)) (define nullable-non-terms (nullable all-prods num-non-terms)) (grammar prods init-prods terms non-terms end-terms all-prods num-prods num-terms num-non-terms nt->prods nullable-non-terms)) (define (grammar-prods-for-non-term g nt) (vector-ref (grammar-nt->prods g) (non-term-index nt))) (define (grammar-nullable-non-term? g nt) (vector-ref (grammar-nullable-non-terms g) (non-term-index nt))) (define (grammar-nullable-after-dot? g item) (define rhs (prod-rhs (item-prod item))) (define prod-length (vector-length rhs)) (let loop ((i (item-dot-pos item))) (cond [(< i prod-length) (and (non-term? (vector-ref rhs i)) (grammar-nullable-non-term? g (vector-ref rhs i)) (loop (add1 i)))] [(= i prod-length)]))) (define ((grammar-nullable-non-term-thunk g) nt) (grammar-nullable-non-term? g nt)) (define ((grammar-nullable-after-dot?-thunk g) item) (grammar-nullable-after-dot? g item)) (define grammar% (class object% (super-instantiate ()) (init prods init-prods terms non-terms end-terms) (define backing-struct (make-grammar #:prods prods #:init-prods init-prods #:terms terms #:non-terms non-terms #:end-terms end-terms)) (define/public (get-num-terms) (grammar-num-terms backing-struct)) (define/public (get-num-non-terms) (grammar-num-non-terms backing-struct)) (define/public (get-prods-for-non-term nt) (grammar-prods-for-non-term backing-struct nt)) (define/public (get-prods) (grammar-all-prods backing-struct)) (define/public (get-init-prods) (grammar-init-prods backing-struct)) (define/public (get-terms) (grammar-terms backing-struct)) (define/public (get-non-terms) (grammar-non-terms backing-struct)) (define/public (get-num-prods) (grammar-num-prods backing-struct)) (define/public (get-end-terms) (grammar-end-terms backing-struct)) (define/public (nullable-non-term? nt) (grammar-nullable-non-term? backing-struct nt)) (define/public (nullable-after-dot? item) (grammar-nullable-after-dot? backing-struct item)) (define/public (nullable-non-term-thunk) (grammar-nullable-non-term-thunk backing-struct)) (define/public (nullable-after-dot?-thunk) (grammar-nullable-after-dot?-thunk backing-struct)))) ;; nullable: production list * int -> non-term set ;; determines which non-terminals can derive epsilon (define (nullable prods num-nts) (define nullable (make-vector num-nts #f)) (define added #f) ;; possible-nullable: producion list -> production list ;; Removes all productions that have a terminal (define (possible-nullable prods) (for/list ([prod (in-list prods)] #:when (vector-andmap non-term? (prod-rhs prod))) prod)) ;; set-nullables: production list -> production list ;; makes one pass through the productions, adding the ones ;; known to be nullable now to nullable and returning a list ;; of productions that we don't know about yet. (define (set-nullables prods) (cond [(null? prods) null] [(vector-ref nullable (gram-sym-index (prod-lhs (car prods)))) (set-nullables (cdr prods))] [(vector-andmap (λ (nt) (vector-ref nullable (gram-sym-index nt))) (prod-rhs (car prods))) (vector-set! nullable (gram-sym-index (prod-lhs (car prods))) #t) (set! added #t) (set-nullables (cdr prods))] [else (cons (car prods) (set-nullables (cdr prods)))])) (let loop ((P (possible-nullable prods))) (cond [(null? P) nullable] [else (set! added #f) (define new-P (set-nullables P)) (if added (loop new-P) nullable)])))