;;;-*- Mode:common-LISP; Package:SI; Base:10 -*- ;;; RESTRICTED RIGHTS LEGEND ;;;Use, duplication, or disclosure by the Government is subject to ;;;restrictions as set forth in subdivision (c)(1)(ii) of the Rights in ;;;Technical Data and Computer Software clause at 52.227-7013. ;;; TEXAS INSTRUMENTS INCORPORATED. ;;; P.O. BOX 2909 ;;; AUSTIN, TEXAS 78769 ;;; MS 2151 ;;; Copyright (C) 1985-1989 Texas Instruments Incorporated. All rights reserved. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; ;;; Based on a theory of parsing presented in: ;;; ;;; ;;; ;;; Pratt, Vaughan R., ``Top Down Operator Precedence,'' ;;; ;;; ACM Symposium on Principles of Programming Languages ;;; ;;; Boston, MA; October, 1973. ;;; ;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; On this page is the tokenizer. The next page is the parser. ;;; After that come the operator definitions. ;;; Macros and functions used by the tokenizer loop. (set-dispatch-macro-character #\# #\ 'infix-toplevel-parse standard-readtable) (set-dispatch-macro-character #\# #\ 'infix-toplevel-parse common-lisp-readtable) (proclaim '(inline infix-readchar)) (defun infix-readchar () "Reads a charcter." (internal-read-char *standard-input*)) (proclaim '(inline infix-unreadchar)) (defun infix-unreadchar (ch) "Unreads a character." (unread-char ch *standard-input*)) (defun infix-readchar-peek () "Reads and then unreads a character, i.e., tells you the next character" (let ((ch (infix-readchar))) (infix-unreadchar ch))) (defmacro infix-return-token (c string) `(progn ,(if c `(infix-unreadchar ,c)) (return (if number-so-far (read-from-string ,string) (if previous-lookup (cdr previous-lookup) (intern string)))))) (defun infix-whitespacep (ch) "Checks to see if the character is a space, return, linefeed, tab, or page." (member ch '(#\Space #\Newline #\Linefeed #\Tab #\Page) :test #'eq)) ;; First step: the tokenizer. (defvar infix-token-table ()) ;; The tokenizer is a simple loop with the character tyi'd pushed on the ;; token buffer after a series of special cases are checked. ;; The boolean state variables could be replaced with predicates ;; that look back on what in in the buffer, however the present implementation ;; is highly straightforward. (defun infix-read-token () (do ((c (infix-skip-whitespace) (infix-readchar)) (string (make-array 40 :element-type 'string-char :fill-pointer 0)) tem (this-quoted-p nil nil) (quoted-p nil) (number-so-far t) (previous-lookup nil)) (nil) (cond ((null c) (if (equal string "") (progn (cerror :no-action () 'read-end-of-file "End of file on ~s within infix expression." *standard-input*) (return ')) (infix-return-token c string))) ((or (infix-whitespacep c) (eql c #\)) (if (equal string "") (return ') (infix-return-token c string))) ((= c #\!) (if (equal string "") (progn (infix-unreadchar #\!) (return '!)) (infix-return-token c string))) ((= c #\\) (setq quoted-p t this-quoted-p t number-so-far ()) (setq c (infix-readchar))) ((= c #\") (if (equal string "") (progn (infix-unreadchar c) (return (read () t))) (infix-return-token c string)))) (vector-push-extend (if this-quoted-p c (char-upcase c)) string) (when number-so-far (unless (infix-number-token-p string) (cond ((= (char-upcase c) #\e) (if (eq number-so-far t) (setq number-so-far 'e) (setq number-so-far ()))) ((<= #\a (char-upcase c) #\z) (setq number-so-far ())) ((and (member c '(#\+ #\-) :test #'eq) (eq number-so-far 'e)) (setq number-so-far 'esign)) ((equal string ".") (if (digit-char-p (infix-readchar-peek)) (setq number-so-far t) (return '|.|))) ((= (length string) 1) (setq number-so-far ())) (t (vector-pop string) (infix-return-token c string))))) (when (and (not previous-lookup) (not number-so-far) (not this-quoted-p) (/= (length string) 1) (dolist (elt infix-token-table) (when (and (= 1 (length (car elt))) (= (aref (car elt) 0) c)) (return t)))) (vector-pop string) (infix-return-token c string)) (unless quoted-p (setq tem (assoc string infix-token-table :test #'string-equal)) (and (null tem) previous-lookup (progn (infix-unreadchar c) (return (cdr previous-lookup)))) (setq previous-lookup tem)))) ;; Skip past whitespace and comments. ;; Return the first nonwhite charater not in a comment. (defun infix-skip-whitespace () (do ((commentp nil) (c)) (nil) (setq c (infix-readchar)) (cond ((null c) (return c)) ((= c #\%) (setq commentp (not commentp))) ((infix-whitespacep c)) ((not commentp) (return c))))) ;; Make an entry for token (a symbol) in our token-table. (defun infix-puttok (token) (unless (assoc (symbol-name token) infix-token-table :test #'equal) (push (cons (symbol-name token) token) infix-token-table))) (defun infix-number-token-p (string) ;; It's more efficient to determine the type of ;; the token by collecting information in state variables ;; as it is read. However we aren't that sure of our bookkeeping. ;; This way we accept whatever the reader does. (ignore-errors (multiple-value-bind (value end-pos) (read-from-string string t) (and (numberp value) (>= end-pos (length string)))))) ;;; The actual parser. (defvar infix-token () "The token waiting to be examined, in parsing an infix expression.") (defvar infix-last-token () "While invoking a token's handlers, this is that token. Infix-token will be the following token.") (defparameter infix-nudl '(infix-start-exp-function)) (defparameter infix-ledl '(infix-continue-exp-function)) (defparameter infix-lbpl '(infix-left-binding-power)) (proclaim '(inline infix-getden)) (defun infix-getden (indl) (and (symbolp infix-token) (cadr (getl infix-token indl)))) (defun infix-toplevel-parse (*standard-input* ignore ignore) (let ((infix-token (infix-read-token))) (infix-parse -1))) ;(defun infix-test (string) ; (with-input-from-string (*standard-input* string) ; (let ((infix-token (infix-read-token))) ; (infix-parse -1)))) (defun infix-parse (right-binding-power) "Reads and returns one expression, using right binding power right-binding-power. Encountering a token whose left binding power is  right-binding-power causes us to return, leaving that token ungobbled." (block () (block top (return (do ((translation ;; process a token that begins an expression (or should do so). (let ((start-exp-function (infix-getden infix-nudl)) (left-binding-power (infix-getden infix-lbpl))) (if start-exp-function (progn (setq infix-token (infix-read-token)) (funcall start-exp-function)) (if left-binding-power (progn (cerror :no-action () 'read-error-1 "Missing token in infix expression before \"~a\"." infix-token) (return ())) (prog1 infix-token (setq infix-token (infix-read-token)))))) ;; process a token that extends an expression. (let ((continue-exp-function (infix-getden infix-ledl))) (if (null continue-exp-function) (progn (cerror :no-action () 'read-error-1 "\"~a\" with left-argument in infix expression." infix-token) (setq infix-token (infix-read-token)) translation) ;ignore this token. (progn (setq infix-token (infix-read-token)) (funcall continue-exp-function translation)))))) (nil) (when (>= right-binding-power (or (infix-getden infix-lbpl) 0)) (return translation)))) ()))) (defun infix-parse-list (right-binding-power separator-token end-token) "Reads a list of expressions, using right-binding-power for each one. We expect expressions in the list to be separated by tokens eq to separator-token and the list to be terminated by a token eq to end-token. The end-token is gobbled. If end-token is nil, we accept any ending token and don't gobble it." (do (accum (first t ())) (nil) (select infix-token (separator-token (setq infix-token (infix-read-token))) (end-token (setq infix-token (infix-read-token)) (return (nreverse accum))) (t (unless first (if end-token (cerror :no-action () 'read-error-1 "\"~a\" read in infix expression where \"~a\" or \"~A\" was expected." infix-token separator-token end-token)) (return (nreverse accum))))) (push (infix-parse right-binding-power) accum))) ;;; Operator-defining macros. (defmacro definfix (token left-binding-power (arg) &body body) "Define token (a symbol) as an infix operator for infix expression parsing. Left-binding-power is a measure of precedence on the left. Arg is a symbol used in body to refer to the expression on the left. Body should parse the arguments on the right and return the expression that contains this operation. Example: (definfix + 50 (left) `(+ ,left ,(infix-parse 51))) 51 is used within to make it left-associative. 47 would be right-associative." `(progn (infix-puttok ',token) (defprop ,token ,left-binding-power infix-left-binding-power) (defun (:property ,token infix-continue-exp-function) (,arg) ,@body))) (defmacro defprefix (token &body body) "Define token (a symbol) as an prefix operator for infix expression parsing. Body should parse the arguments on the right and return the expression that contains this operation. Example: (defprefix - `(- ,(infix-parse 1000)))" `(progn (infix-puttok ',token) (defun (:property ,token infix-start-exp-function) () ,@body))) (defmacro defdelimiter (token) "Define token (a symbol) as a delimiter token for infix expression parsing. This token has no syntax assigned to it; other tokens' definitions will check explicitly for encountering this token. This is used for comma and close parenthesis." `(progn (infix-puttok ',token) (defprop ,token 0 infix-left-binding-power))) (defmacro defrepinfix (token left-binding-power &optional (right-binding-power left-binding-power) (function token)) "Define token as a multi-argument infix operator for infix expression parsing. Token is also used by default as the function for the expression to call, unless you override that by specifying a function. Example: (defrepinfix + 50) makes a+b+c parse into (+ a b c). Right-binding-power better be greater than or equal to left-binding-power; it defaults to be equal." `(definfix ,token ,left-binding-power (left) `(,',function ,left ,@(infix-parse-list ,right-binding-power ',token ())))) ;; Definitions of operators. (defprop  -1 infix-left-binding-power) (defun (:property |(| infix-start-exp-function) () `(progn . ,(infix-parse-list 0 '|,| '|)| ))) (definfix [ 200 (left) `(aref ,left ,@(infix-parse-list 0 '|,| ']))) (defun (:property [ infix-start-exp-function) () `(list . ,(infix-parse-list 0 '|,| ']))) (definfix |(| 200 (left) `(,left . ,(infix-parse-list 0 '|,| '|)|))) (defdelimiter |)|) (defdelimiter |,|) (defdelimiter ]) (definfix |:| 180 (left) `(setf ,left ,(infix-parse 20))) (definfix ^ 140 (left) `(expt ,left ,(infix-parse 139))) (defrepinfix * 120) (defrepinfix / 120) ;;; JK 3/26/87 Remove duplicate definition. ;;;(defrepinfix / 120) (defrepinfix + 100) (defrepinfix - 100) (defprefix - `(- ,(infix-parse 100))) (defprefix + (infix-parse 100)) (definfix |.| 95 (left) (let ((right (infix-parse 94))) (if (and (consp right) (eq (car right) 'list*)) `(list* ,left ,@(cdr right)) `(list* ,left ,right)))) (definfix @ 95 (left) (let ((right (infix-parse 94))) (if (and (consp right) (eq (car right) 'append)) `(append ,left ,@(cdr right)) `(append ,left ,right)))) (definfix  80 (left) `(member ,left ,(infix-parse 79) :test #'eq)) (defrepinfix < 80) (defrepinfix > 80) (defrepinfix = 80) (defrepinfix  80) (defrepinfix  80) (defrepinfix  80) (defprefix not `(not ,(infix-parse 70))) (defrepinfix and 60) (defrepinfix or 50) ;; rbp of ":" (assignment) is 20. (defdelimiter then) (defdelimiter else) (defprefix if () (let ((cond-form (infix-parse 45))) (if (eq infix-token 'then) (setq infix-token (infix-read-token)) (cerror :no-action () 'read-error-1 "No then in an infix if expression.")) (let ((then-forms (infix-parse-list 25 '|,| ())) else-forms) (when (eq infix-token 'else) (setq infix-token (infix-read-token)) (setq else-forms (infix-parse-list 25 '|,| ()))) (cond ((null else-forms) `(when ,cond-form ,@then-forms)) ((null then-forms) `(unless ,cond-form ,@else-forms)) ((null (cdr then-forms)) `(if ,cond-form ,(car then-forms) ,@else-forms)) (t `(if ,cond-form (progn . ,then-forms) ,@else-forms)))))) (defprefix ! () ;; Reading ! as a token untyi's it. So flush the untyi'd one. (infix-readchar) (prog1 (read *standard-input* t () t ) ;; Read in the token that follows the !'d s-expression. (setq infix-token (infix-read-token))))