;;;; Modifications to Arc ;;;; John Fox ;;;; last modified 20 April 1999 ;;; some functions for manipulating strings (defun string-find (string chars &optional (start 0)) "Function ARGS: (STRING CHARS &optional (START 0)) Returns the position of the first occurrence starting at position START of the string CHARS in the string STRING, or NIL if CHARS doesn't appear in STRING." (let ((len (length chars)) (length (length string))) (if (<= (+ start len) length) (dolist (posn (iseq start (- length len))) (if (string-equal string chars :start1 posn :end1 (+ posn len)) (return posn)))))) (defun insert-characters (string position chars) "Function ARGS: (STRING POSITION CHARS) Inserts the string CHARS after POSITION in STRING. If POSITION is negative, then CHARS is inserted at the start of STRING; if POSITION >= 1 less than the length of STRING, then CHARS is inserted at the end of STRING." (cond ((< position 0) (concatenate 'string chars string)) ((>= position (1- (length string))) (concatenate 'string string chars)) (t (let ((start (select string (iseq 0 position))) (end (select string (iseq (1+ position) (1- (length string)))))) (concatenate 'string start chars end))))) (defun delete-characters (string position len) "Function ARGS: (STRING POSITION LEN) Deletes LEN characters from STRING starting at POSITION." (cond ((= position 0) (concatenate 'string (select string (iseq len (1- (length string)))))) ((= (length string) (+ position len)) (concatenate 'string (select string (iseq 0 (- (length string) len 1))))) (t (let* ((last (1- (length string))) (start (select string (iseq 0 (- position 1)))) (end (select string (iseq (+ position len) last)))) (concatenate 'string start end))))) (defun string-replace (string chars with-chars &key (times 'all) (posn 0)) "Function ARGS: (STRING CHARS WITH-CHARS &key (TIMES 'ALL) (POSN 0)) Starting at position POSN, replace up to TIMES occurrences of the string CHARS in STRING with the string WITH-CHARS" (let* ((len (length chars)) (len-with (length with-chars)) (length (length string)) (repeat (if (equal times 'all) (- length len) times))) (if (or (= repeat 0) (> (+ posn len -2) length)) string (let ((position (string-find string chars posn))) (if (not position) string (let ((new-string (insert-characters (delete-characters string position len) (1- position) with-chars))) (string-replace new-string chars with-chars :times (- repeat 1) :posn (+ position len-with)))))))) ;;; Add recoding capability to Arc datasets (defmeth dataset-proto :recode-dialog () (let* ((title (send text-item-proto :new "Recode a Variable")) (names (send self :types (append '(:variate :text)))) (all (send allocate-items-proto :new)) (candidates-l (send subordinate-list-proto :new names 'primary all)) (candidates-n (send text-item-proto :new "Candidates")) (old-variable-l (send subordinate-list-proto :new (list "") 'secondary all)) (old-variable-n (send text-item-proto :new "Variable to recode")) (new-variable-l (send edit-text-item-proto :new "" :text-length 20)) (new-variable-n (send text-item-proto :new "New variable name (optional)")) (expression-l (send edit-text-item-proto :new "" :text-length 60)) (expression-n (send text-item-proto :text "Recode expression")) (cancel (send modal-button-proto :new "Cancel" :action #'(lambda () nil))) (done (send modal-button-proto :new "OK" :action #'(lambda () (list NIL (send all :value) (send new-variable-l :text) (send expression-l :text))))) (again (send modal-button-proto :new "Again" :action #'(lambda () (list T (send all :value) (send new-variable-l :text) (send expression-l :text))))) (dialog (send modal-dialog-proto :new (list (list title) (list (list candidates-n candidates-l) (list old-variable-n old-variable-l) (list new-variable-n new-variable-l)) (list (list expression-n expression-l)) (list done again cancel))))) (send dialog :default-button done) (send dialog :modal-dialog))) (rc-menu-item 'recode-menu-item "Recode variable..." :recode) (defparameter *arc-data-menu-items* (append (list recode-menu-item) *arc-data-menu-items*)) (defmeth dataset-proto :recode () (let ((ans (send self :recode-dialog))) (when ans (let* ((old-var (second (second ans))) (new-var? (third ans)) (new-var (if (equal "" new-var?) (first old-var) new-var?)) (expression (fourth ans)) (old-data (send self :name-to-data (first old-var))) (new-data (recode-variable old-data expression))) (send self :variate :data new-data :name new-var :info (format nil "Recode of ~a: ~a" (first old-var) expression))) (if (first ans) (send self :recode))))) (defun all (x) "Args (x) Returns T if all of the elements of the sequence X are T, otherwise returns nil." (if (null x) t (if (null (car x)) nil (all (cdr x))))) (defun recode-variable (variable expression) "Args: (variable expression) Recodes VARIABLE according to the instructions in the string EXPRESSION, returning the recoded variable. Recoding instructions are of the form (old values -> new-value), (min:max -> new-value), (min: -> new-value), or (:max -> new-value), and EXPRESSION consists of a sequence of such specifications; if a value matches more than one specification, the first one is effective. A list of old values can be separated by spaces or commas. In formulating recode expressions, you can use = in place of ->; to in place of : ; lo or low for the smallest value of the variable; and hi or high for the largest value. Finally, the last specification can be of the form (else -> new-value). Examples: (recode-variable education \"(:11->1)(12->2)(13:->3)\") (recode-variable numbers \"(1 3 5 -> \"odd\")(2,4,6=\"even\")\") (recode-variable \"(low to 30=1)(else=2)\")" (let* ((expressn (string-replace (string-replace (string-replace (string-replace expression "=" "->") ":" " to ") "->" " -> ") "," " ")) (var (remove-missing variable)) (numeric? (all (mapcar #'numberp var))) (low (if numeric? (min var))) (high (if numeric? (max var))) (variable2 (copy-list variable)) (result (copy-list variable)) (expr (reverse (read-from-string (concatenate 'string "(" expressn ")"))))) (setf (select variable2 (find-missing variable2)) '?) (dolist (term expr) (if (equal 'else (first term)) (progn (if (or (/= (length term) 3) (not (equal (second term) '->))) (error "Recode expression not well formed: ~a" term)) (setf result (repeat (third term) (length result)))) (if (member 'to term) (let ((trm (subst low 'low (subst low 'lo (subst high 'high (subst high 'hi term)))))) (cond ((and (equal (position 'to trm) 0) (equal (length trm) 4) (equal (third trm) '->)) (setf (select result (which (<= variable (second trm)))) (fourth trm))) ((and (equal (position 'to trm) 1) (equal (length trm) 5) (equal (fourth trm) '->)) (setf (select result (which (pand (>= variable (first trm)) (<= variable (third trm))))) (fifth trm))) ((and (equal (position 'to trm) 1) (equal (length trm) 4) (equal (third trm) '->)) (setf (select result (which (>= variable (first trm)))) (fourth trm))) (t (error "Recode expression not well formed: ~a" trm)))) (let ((old-codes (select term (iseq (- (length term) 2)))) (new-code (first (reverse term))) (-> (second (reverse term)))) (when (not (equal -> '->)) (error "Recode expression not well formed: ~a" term)) (setf (select result (where old-codes variable2)) new-code))))) (when (not (member '? (mapcar #'first expr))) (setf (select result (find-missing variable)) not-a-number)) result)) (defun pand (a b) "Args: (a b) Parallel AND for the sequences A and B" (let ((result (repeat nil (min (length a) (length b))))) (dotimes (i (length result) result) (setf (select result i) (and (select a i) (select b i)))))) (defun where (list1 list2) "Args: (list1 list2) Finds the positions of all elements of LIST1 in LIST2 (or NIL). Example: (where '(1 2) '(1 2 3 4 1 2 3 4))" (apply #'append (map-elements #'(lambda (element) (which (map-elements #'equal element list2))) (cons nil list1)))) ;;; F-tests and Wald tests (defmeth glim-proto :degrees-of-freedom-term (term) "Message args: (term) Returns the degrees of freedom for a term in the model." (let* ((ds (send self :get-dataset)) (x (send ds :data term))) (length x))) (defmeth normalreg-proto :test-linear-hypothesis (hyp &key (right-hand-side 0) (quiet nil)) "Args: (hyp &key (right-hand-side 0) (quiet nil)) Method for computing the F-statistic for a linear hypothesis in a regression model. hyp is a hypothesis matrix, each row specifying a linear combination of coefficients, with as many columns as coefficients in the model. right-hand-side is a scalar or vector giving the right-hand-side of the hypothesis (default 0). If quiet is t, then a vector of the sum of squares, F, numerator and denominator df, and p-value for the hypothesis is returned. If quiet is nil (the default), then the method prints a report." (let* ((h (if (matrixp hyp) hyp (matrix (cons '1 (list (length hyp))) hyp))) (dfh (first (array-dimensions h))) (dfe (send self :df)) (sse (send self :sum-of-squares)) (xtxi (send self :xtxinv)) (b (send self :coef-estimates)) (s (send self :sigma-hat)) (v (* (^ s 2) xtxi)) (num (%* (-(%* b (transpose h)) right-hand-side) (inverse (%* h v (transpose h))) (- (%* h b) right-hand-side))) (f (/ num dfh)) (ss (/ (* num sse) dfe)) (p-value (- 1 (f-cdf f dfh dfe)))) (if quiet (list ss f dfh dfe p-value) (format t "sum of squares = ~a F[~a\,~a] = ~a p = ~,4f~%" ss dfh dfe f p-value)))) (defmeth glim-proto :test-linear-hypothesis (hyp &key (right-hand-side 0) (quiet nil)) "Args: (hyp &key (right-hand-side 0) (quiet nil)) Method for computing the Wald test for a linear hypothesis in a generalized linear model. hyp is a hypothesis matrix, each row specifying a linear combination of coefficients, with as many columns as coefficients in the model. right-hand-side is a scalar or vector giving the right-hand-side of the hypothesis (default 0). If quiet is t, then a vector of the chisquare, df, and p-value for the hypothesis is returned. If quiet is nil (the default), then the method prints a report." (let* ((h (if (matrixp hyp) hyp (matrix (cons '1 (list (length hyp))) hyp))) (dfh (first (array-dimensions h))) (v (send self :xtxinv)) (b (send self :coef-estimates)) (wald (%* (-(%* b (transpose h)) right-hand-side) (inverse (%* h v (transpose h))) (- (%* h b) right-hand-side))) (p-value (- 1 (chisq-cdf wald dfh)))) (if quiet (list wald dfh p-value) (format t "Wald chisquare statistic = ~a df = ~a p = ~,4f~%" wald dfh p-value)))) ;; this function is a work-around for a bug in inverse (setf inv #'inverse) (defun inverse (x) (if (= 1 (reduce #'* (array-dimensions x))) (matrix '(1 1) (list (/ 1 (aref x 0 0)))) (apply inv (list x)))) (defmeth normalreg-proto :f-tests () "Message args: () Prints an analysis-of-variance table with an F-test for each term in the model, using the sum of squares for that term 'after' all of the other terms." (let* ((terms (send self :predictors)) (number-terms (length terms)) (df (mapcar #'(lambda (term) (send self :degrees-of-freedom-term term)) terms))) (when (/= (sum df)(length (send self :basis))) (format t "~%Coefficients aliased, no tests calculated.~%") (return-from :f-tests)) (let* ((pos (repeat (iseq number-terms) df)) (positions (if (send self :intercept) (adjoin -1 pos) pos)) (imat (identity-matrix (length positions))) (all-cols (iseq (length positions))) (test (if (send self :intercept) (send self :test-linear-hypothesis (select imat 0 all-cols) :quiet t)))) (format t "~%~% F-Tests for terms in Model ~a" (send self :name)) (format t "~%Sum of Squares df F p Source~%") (format t "----------------------------------------------------------~%") (if (send self :intercept) (format t "~15g ~8d ~10g ~10,5f ~a~%" (select test 0) (select test 2) (select test 1) (select test 4) "Constant")) (dotimes (term number-terms) (let* ((df-term (select df term)) (pos-term (which (= term positions))) (hyp-matrix (select imat pos-term all-cols)) (test (send self :test-linear-hypothesis hyp-matrix :quiet t))) (format t "~15g ~8d ~10g ~10,5f ~a~%" (select test 0) (select test 2) (select test 1) (select test 4) (select terms term))))))) (defmeth glim-proto :wald-tests () "Message args: () Prints a Wald chisquare test for each term in the model, testing the hypothesis that the term (i.e., set of coefficients) is zero." (let* ((terms (send self :predictors)) (number-terms (length terms)) (df (mapcar #'(lambda (term) (send self :degrees-of-freedom-term term)) terms))) (when (/= (sum df)(length (send self :basis))) (format t "~%Coefficients aliased, no tests calculated.~%") (return-from :wald-tests)) (let* ((pos (repeat (iseq number-terms) df)) (positions (if (send self :intercept) (adjoin -1 pos) pos)) (imat (identity-matrix (length positions))) (all-cols (iseq (length positions))) (test (if (send self :intercept) (send self :test-linear-hypothesis (select imat 0 all-cols) :quiet t)))) (format t "~%~% Wald-Tests for terms in Model ~a" (send self :name)) (format t "~% df Wald Chisq. p Source~%") (format t "------------------------------------------------~%") (if (send self :intercept) (format t "~8d ~10g ~10,5f ~a~%" (select test 1)(select test 0) (select test 2)"Constant")) (dotimes (term number-terms) (let* ((df-term (select df term)) (pos-term (which (= term positions))) (hyp-matrix (select imat pos-term all-cols)) (test (send self :test-linear-hypothesis hyp-matrix :quiet t))) (format t "~8d ~10g ~10,5f ~a~%" (select test 1) (select test 0) (select test 2) (select terms term))))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; This adds a :pca method to the spin-proto. Sending the message to an ;; instance will add the three principal axes, in color. If the call is ;; (send p :pca :allaxes nil) then only the first principal axes is drawn ;; (in red, of course). If (send p :pca :erase t) is used, then the ;; axes are erased. ;; ;; Version 0.5: November 20, 1996; Jan de Leeuw ;; Version 0.6: November 24, 1996; John Fox added color and :erase ;; Version 1.0: November 27, 1996; Jan de Leeuw added :allaxes and ;; formatted printoutput ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defmeth spin-proto :pca (&key (colors t) (allaxes t) erase) (let* ((n (send self :num-points)) (x (send self :point-coordinate 0 (iseq n))) (y (send self :point-coordinate 1 (iseq n))) (z (send self :point-coordinate 2 (iseq n))) (c (* (1- n) (covariance-matrix x y z))) (m (list (mean x) (mean y) (mean z))) (g (eigen c)) (e (second g)) (h (first g)) (s (sum h)) (f (sqrt h)) (cols (if colors (list 'red 'yellow 'blue) (list nil nil nil))) (axis-colors (if erase (repeat (send self :back-color) 3) cols))) (send self :dircos m (first e) (/ (elt f 0) 2) :color (select axis-colors 0)) (if allaxes (send self :dircos m (second e) (/ (elt f 1) 2) :color (select axis-colors 1))) (if allaxes (send self :dircos m (third e) (/ (elt f 2) 2) :color (select axis-colors 2))) (format t "~10,6f ~10,6f ~10,6f~%" (elt (first e) 0) (elt (first e) 1) (elt (first e) 2)) (if allaxes (format t "~10,6f ~10,6f ~10,6f~%" (elt (second e) 0) (elt (second e) 1) (elt (second e) 2))) (if allaxes (format t "~10,6f ~10,6f ~10,6f~%" (elt (third e) 0) (elt (third e) 1) (elt (third e) 2))) (format t "~10,6f ~10,6f ~10,6f~%" (/ (elt h 0) s) (/ (elt h 1) s) (/ (elt h 2) s)) )) (defmeth spin-proto :dircos (m w u &key color) (send self :abline (+ m (* u w)) (- m (* u w)) :color color) ) (defmeth spin-proto :abline (a b &key color) (let ((line-color (if color color (send self :draw-color)))) (send self :add-lines (make-pairs a b) :color line-color)) ) (defun make-pairs (x y) (let ((n (length x))) (mapcar #'(lambda (z) (list (elt x z) (elt y z))) (iseq n)) )) ;;; Add generalized variance-inflation factors to regression models (defun delete-missing (variables &key (miss not-a-number) (test #'eq)) "Args: (variables &key (miss not-a-number) (test #'eq)) VARIABLES is a list of sequences, containing the variables; MISS gives the missing data code (default not-a-number); TEST gives the equality test (default #'eq). The function returns a list a sequences containing only observations with valid data for all variables. Example: (delete-missing (list x y z) :miss '*)" (let* ((bad (remove-duplicates (combine (mapcar #'(lambda (x) (which (map-elements test miss x))) variables)))) (all (iseq (length (first variables)))) (good (sort-data (set-difference all bad)))) (mapcar #'(lambda (x) (select x good)) variables))) (defun correlation-matrix (x) "Args: (x) X is a data matrix; correlations are calculated and returned for the columns of X, removing rows with missing data, represented by not-a-number." (let* ((x-cols (delete-missing (column-list x))) (x-valid (apply #'bind-columns x-cols)) (s (covariance-matrix x-valid)) (sd (^ (diagonal s) .5)) (d (outer-product sd sd))) (/ s d))) (defmeth normalreg-proto :vifs () "Message args: () Prints the generalized variance-inflation factor for each term in the model. For 1 df effects, this is just the usual VIF." (when (not (send self :intercept)) (format t "~%No intercept, no VIFs calculated.~%") (return from :vifs)) (let* ((r (correlation-matrix (send self :x))) (r-inv (inverse r)) (det-r (determinant r)) (terms (send self :predictors)) (number-terms (length terms)) (df (mapcar #'(lambda (term) (send self :degrees-of-freedom-term term)) terms))) (when (/= (sum df)(length (send self :basis))) (format t "~%Coefficients aliased, no VIFs calculated.~%") (return-from :vifs)) (let ((positions (repeat (iseq number-terms) df))) (format t "~%Generalized Variance-Inflation Factors for Model ~a" (send self :name)) (format t "~% df GVIF GVIF^(1/2df) term") (format t "~%----------------------------------------~%") (dotimes (term number-terms) (let* ((df-term (select df term)) (pos-term (which (= term positions))) (pos-others (which (/= term positions))) (vif (if (= df-term 1) (aref r-inv (first pos-term) (first pos-term)) (/ (* (determinant (select r pos-term pos-term)) (determinant (select r pos-others pos-others))) det-r))) (root-vif (^ vif (/ 1 (* 2 df-term))))) (format t "~4d ~10g ~10g ~a~%" df-term vif root-vif (select terms term))))))) ;;; modify Arc menus (rc-menu-item 'F-tests-menu-item "F-tests" :f-tests) (rc-menu-item 'VIFs-menu-item "Variance-Inflation Factors" :vifs) (defparameter *arc-linear-regression-menu-items* (append *arc-linear-regression-menu-items* (list 'F-tests-menu-item 'VIFs-menu-item 'dash))) (rc-menu-item 'wald-tests-menu-item "Wald tests" :wald-tests) (defparameter *arc-glm-regression-menu-items2* (append *arc-glm-regression-menu-items2* (list 'wald-tests-menu-item 'dash))) ;;; Function to calculate maximum-likelihood estimates ;;; for Box-Tidwell regression model ;;; last modified: 4/4/98 J. Fox (defun box-tidwell (x y &key (other-x nil) (tol .01)(quiet nil)) "Args: (x y &key (other-x nil) (tol .01) (score-test t)) Calculates and returns the maximum-likelihood estimates of the transformation parameters for the Box-Tidwell regression model. x a list of lists or vectors containing the independent variables that are candidates for transformation y a list or vector of dependent-variable values :other-x a list of lists or vectors containing independent variables that are not candidates for transformation; default NIL. :tol convergence criterion. When the maximum change in the transformation estimates is less than tol iterations cease. :quiet if T, iteration history is suppressed; default NIL. Example: (box-tidwell (list income education) prestige :other-x (list percent-women)" (let* ((max (mapcar #'max x)) (xs (mapcar #'/ x max)) (k (iseq 1 (length x))) (first-r (regression-model (append xs other-x) y :print nil )) (b0 (select (send first-r :coef-estimates) k)) (x-log-x (* xs (log xs))) (first-r0 (regression-model (append x-log-x xs other-x) y :print nil )) (c0 (select (send first-r0 :coef-estimates) k)) (p (1+ (/ c0 b0))) (p-final p) (se0 (select (send first-r0 :coef-standard-errors) k)) (z0 (abs (/ c0 se0))) (p0 (* 2 (- 1 (normal-cdf z0))))) (format t "score statistics ~a~%p-values ~a" z0 p0) (do* ((current-x xs (^ xs current-p)) (x-log-x (* xs (log xs))(* current-x (log current-x))) (r first-r (regression-model (append current-x other-x) y :print nil )) (b0 (select (send r :coef-estimates) k)(select (send r :coef-estimates) k)) (r0 first-r0 (regression-model (append x-log-x current-x other-x) y :print nil )) (c0 (select (send r0 :coef-estimates) k) (select (send r0 :coef-estimates) k)) (p (1+ (/ c0 b0))(1+ (/ c0 b0))) (last-p 1 current-p) (current-p p (* current-p p))) ((> tol (max (abs (- current-p last-p)))) current-p) (if (not quiet) (format t "~% powers = ~a " current-p))))) ;;; function to calculate constructed variable for Box-Cox transformation ;; 3/27/95 (defun box-cox-constructed-variable (y) "Args: (y) Compute constructed variable for Box-Cox transformation of y" (* y (- (log (/ y (geometric-mean y))) 1))) ;;;; Add multinomial logit model to Arc ;;;; John Fox ;;;; Last modified 22 March 1998 (defproto mlogit-model-proto '(logL info gradient coef-estimates weights-name response-categories w missing needs-computing compute-info-matrix menu) () glim-proto) (defmeth mlogit-model-proto :isnew (&rest args) (slot-value 'needs-computing t) ;(apply #'call-next-method args)) ;attempt to pass model name, causes a bug (call-next-method)) ;;; some accessor functions (defmeth mlogit-model-proto :compute-info-matrix (&optional (compute nil set)) "Message args: (&optional (compute nil set)) If T, compute information matrix. If nil, use scores to approximate the information matrix." (if set (setf (slot-value 'compute-info-matrix) compute)) (slot-value 'compute-info-matrix)) (defmeth mlogit-model-proto :needs-computing (&optional (needs nil set)) "Message args: (&optional (needs nil set)) If T, model needs re-computing." (if set (setf (slot-value 'needs-computing) needs)) (slot-value 'needs-computing)) (defmeth mlogit-model-proto :response-categories (&optional cats) "Message args: (&optional cats) Set or retrieve response variable categories." (if cats (setf (slot-value 'response-categories) cats)) (slot-value 'response-categories)) (defmeth mlogit-model-proto :missing (&optional miss) "Message args: (&optional miss) Set or retrieve logical list of missing-values indicators." (if miss (setf (slot-value 'missing) miss)) (slot-value 'missing)) (defmeth mlogit-model-proto :weights-name (&optional name) "Message args: (&optional name) Set or retrieve name of case-weight variable." (if name (setf (slot-value 'weights-name) name)) (slot-value 'weights-name)) (defmeth mlogit-model-proto :w (&optional w) "Message args: (&optional w) Set or retrieve matrix of response indicators, filtering out missing and excluded data." (if w (setf (slot-value 'w) w)) (let ((vals (slot-value 'w)) (incl (copy-list (send self :included))) (missing (send self :missing))) (setf (select incl missing) nil) ; filter out missing and excluded data (included vals incl))) ;;; the information matrix, etc., may need re-computing (defmeth mlogit-model-proto :info (&optional info) "Message args: (&optional info) Set or retrieve information matrix, recomputing model if necessary." (if info (setf (slot-value 'info) info) (if (send self :needs-computing) (send self :compute :quiet T))) (slot-value 'info)) (defmeth mlogit-model-proto :logL (&optional logL) "Message args: (&optional logL) Set or retrieve log-likelihood, recomputing model if necessary." (if logL (setf (slot-value 'logL) logL) (if (send self :needs-computing) (send self :compute :quiet T))) (slot-value 'logL)) (defmeth mlogit-model-proto :gradient (&optional gradient) "Message args: (&optional gradient) Set or retrieve gradient vector. If estimates fail to converge, contains list of log-likelihood, gradient, XtX matrix, and estimates at last iteration." (if gradient (setf (slot-value 'gradient) gradient) (if (send self :needs-computing) (send self :compute :quiet T))) (slot-value 'gradient)) (defmeth mlogit-model-proto :coef-estimates (&optional (coefs nil set)) "Message args: (&optional (coefs nil set)) Set or retrieve coefficients, recomputing model if necessary. Current coefficients are used as start values; if nil, then zeroes are used as start values." (if set (setf (slot-value 'coef-estimates) coefs) (if (send self :needs-computing) (send self :compute :quiet T))) (slot-value 'coef-estimates)) (defmeth mlogit-model-proto :coef-standard-errors () "Message args: () Returns coefficient standard errors, recomputing the model if necessary." (if (send self :needs-computing) (send self :compute :quiet T)) (let* ((info (send self :info)) (inv-info (inverse info))) (sqrt (diagonal inv-info)))) (defmeth mlogit-model-proto :xtxinv () "Message args: () Returns the covariance matrix of the coefficients, by inverting the contents of the information-matrix slot, recomputing the model if necessary." (if (send self :needs-computing) (send self :compute :quiet T)) (let ((info (send self :info))) (inverse info))) (defun rotate (list) "Args (list) Returns list rotated one position to the right, with last element at the front." (combine (last list) (reverse (cdr (reverse list))))) (defmeth mlogit-model-proto :y (&optional new-y) "Message args: (&optional new-y) Sets or retrieves the response variable y, also setting the response indicators w and response-categories slots." (if new-y (list (setf (slot-value 'y) new-y) (send self :response-categories (sort-data (remove-duplicates (remove-missing new-y) :test #'equal))) (send self :needs-computing t) ;; w is the matrix of response indicators, uses the Arc recode-values function (send self :w (let* ((cats (send self :response-categories)) (values (rotate (iseq (length cats)))) (y (recode-values new-y cats values)) (vals (iseq (max (remove-missing y))))) (flet ((dummy (val) (mapcar #'(lambda (v) (if (= v val) 1 0)) y))) (apply #'bind-columns (mapcar #'dummy vals))))))) (slot-value 'y)) (defmeth mlogit-model-proto :x-matrix () "Message args: () Returns the model-matrix, filtering for missing and excluded cases." (let* ((int? (send self :intercept)) (x (send self :x)) (n (array-dimension x 0)) (incl (copy-list (send self :included))) (missing (send self :missing)) (xmatrix (cond (int? (bind-columns (repeat 1 n) x)) ((matrixp x) x) (t (matrix (list n 1) x))))) (setf (select incl missing) nil) ; filters missing and excluded cases (included xmatrix incl))) (defmeth mlogit-model-proto :summary () "Message args: () Prints a summary of the model, recomputing the model if necessary." (if (send self :needs-computing) (send self :compute)) (let* ((dataset (send (send self :get-dataset) :name)) (model (send self :name)) (incl (copy-list (send self :included))) (missing (send self :missing)) (n-included (send self :num-included)) (n-cases (send self :num-cases)) (n-excluded (- n-cases n-included)) (intercept (send self :intercept)) (predictors (send self :predictor-names)) (response (send self :response-name)) (weights (send self :weights-name)) (predictor-names (if intercept (combine "Constant" predictors) predictors)) (k (length predictor-names)) (ks (iseq k)) (cats (send self :response-categories)) (m-1 (- (length cats) 1)) (coefs (send self :coef-estimates)) (ses (send self :coef-standard-errors)) (coef (matrix (list m-1 k) coefs)) (se (matrix (list m-1 k) ses)) (logL (send self :logL))) (setf (select incl missing) nil) (let ((excluded (select (send self :case-labels) (which (mapcar #'not incl))))) (format t "~%Data set = ~a, Name of Model = ~a" dataset model) (format t "~%Multinomial Logit Model") (format t "~%Response = ~a" response) (format t "~%Predictors = ~a" predictors) (format t "~%Case Weights = ~a" weights) (format t "~%Number of cases used = ~a" n-included) (if (/= n-excluded 0) (if (< n-excluded 20) (format t "~%Cases excluded: ~a" excluded) (format t "~%Number of cases excluded = ~a" n-excluded))) (dolist (j (iseq m-1)) (format t "~%~%Logit [~a=~a/~a=~a]" response (select cats (+ j 1)) response (first cats)) (format t "~%Coefficient Std. Error") (dolist (p ks) (format t "~%~10f ~10f ~a" (select coef j p) (select se j p) (select predictor-names p)))) (format t "~%~%-2 log-likelihood = ~10f~%" (- (* 2 logL)))))) (defmeth mlogit-model-proto :deriv () "Message args: () Returns the log-likelihood, gradient vector, and XtX matrix at the current parameter estimates; these are left in the gradient slot if there is no convergence." (send self :gradient (let* ((g (send self :coef-estimates)) (w (send self :w)) (x (send self :x-matrix)) (weights (send self :weights)) (n (array-dimension x 0)) (k (array-dimension x 1)) (m-1 (array-dimension w 1)) (logL 0) (gamma (transpose (matrix (list m-1 k) g))) (pars (iseq k)) (deriv (repeat 0 m-1)) (npars (* k m-1)) (xtx (matrix (list npars npars) (repeat 0 (* npars npars))))) (dotimes (i n) (let ((sum1 0) (sum2 1) (deriv-i nil) (xi (select x i pars))) (dotimes (j m-1) (let* ((gammaj (select gamma pars j)) (xigj (matmult xi gammaj))) (setf sum1 (+ sum1 (* (aref w i j) xigj))) (setf sum2 (+ sum2 (exp xigj))))) (setf logL (+ logL (* (select weights i) (+ sum1 (- (log sum2)))))) (dotimes (j m-1) (let* ((p (/ (exp (matmult xi (select gamma pars j))) sum2)) (term (- (* (aref w i j) xi) (matmult p xi)))) (push term deriv-i) (setf (select deriv j) (+ (select deriv j) (* (select weights i) term))))) (let ((deriv-i (apply #'combine (reverse deriv-i)))) (setf xtx (+ xtx (* (select weights i) (outer-product deriv-i deriv-i))))))) (list (aref logL 0 0)(apply #'combine deriv) xtx)))) (defmeth mlogit-model-proto :info-matrix () "Message args: () Computes and returns the information matrix." (send self :info (let* ((g (send self :coef-estimates)) (w (send self :w)) (x (send self :x-matrix)) (weights (send self :weights)) (n (array-dimension x 0)) (k (array-dimension x 1)) (m-1 (array-dimension w 1)) (gamma (transpose (matrix (list m-1 k) g))) (pars (iseq k)) (npars (length g)) (cats (iseq m-1)) (inf (matrix (list npars npars) (repeat 0 (* npars npars))))) (labels ((infjj (j xi fac) ; compute diagonal blocks (let ((xigj (select (matmult xi (select gamma pars j)) 0 0))) (/ (* (matmult (transpose xi) xi) (exp xigj) (- fac (exp xigj))) (^ fac 2)))) (infjjp (j jp xi fac) ; compute off-diagonal blocks (- (/ (* (matmult (transpose xi) xi) (select (exp (matmult xi (+ (select gamma pars jp) (select gamma pars j)))) 0 0)) (^ fac 2)))) (sumrow (p xi fac) (flet ((doit (m) (if (= m p) (infjj m xi fac) (infjjp m p xi fac)))) (apply #'bind-columns (mapcar #'doit cats)))) (sumcol (xi fac) (apply #'bind-rows (mapcar #'(lambda (cat) (sumrow cat xi fac)) cats)))) (dotimes (i n) (let ((xi (select x i pars))) (setf inf (+ inf (* (select weights i) (let* ((term nil) (sum 0) (fac (select (+ 1 (dotimes (j m-1 sum) (setf sum (+ sum (exp (matmult xi (select gamma pars j))))))) 0 0))) (sumcol xi fac))))))) inf)))) (defun max-relative-error (x y) "Function args: (x y) A convergence check from Tierney (1996)." (max (/ (abs (- x y)) (pmax (sqrt machine-epsilon) (abs y))))) (defconstant *tolerance* .001) ; this is the convergence tolerance for coefficients (defmeth mlogit-model-proto :compute (&key quiet (tol *tolerance*) (maxiter 20)) "Message args: (&key quiet (tol *tolerance*) (maxiter 20) Computes maximum-likelihood estimates by the modified method of scoring. If :quiet is T, then the iteration history is suppressed; :tot (default *tolerance*) is the maximum relative change in the coefficients for convergence; :maxiter (default 20) is the maximum number of iterations." (send self :needs-computing nil) (let* ((w (send self :w)) (x (send self :x-matrix)) (n (array-dimension x 0)) (k (array-dimension x 1)) (m-1 (array-dimension w 1)) (start (send self :coef-estimates)) (gamma (if start start (repeat 0 (* k m-1)))) (last-gamma gamma) (logL 0)) (send self :coef-estimates gamma) (let ((result (dotimes (i maxiter) (setf last-gamma gamma) (let* ((logL-grad-xtx (send self :deriv)) (logL (first logL-grad-xtx)) (grad (second logL-grad-xtx)) (xtx (third logL-grad-xtx))) (if (not quiet) (format t "~&Iteration ~a: logL = ~a~%" i logL)) (setf gamma (+ gamma (matmult (inverse xtx) grad))) (send self :coef-estimates gamma) (if (> tol (max-relative-error gamma last-gamma)) (return (list logL grad xtx gamma))))))) (if (not result) (error "Convergence failed, maximum iterations exceeded.")) (send self :logL (select result 0)) (send self :gradient (select result 1)) (send self :info (if (send self :compute-info-matrix) ; compute info matrix or use XtX (send self :info-matrix) (select result 2))) (send self :coef-estimates (select result 3))))) ;;; define mlogit-menu-item (rc-menu-item 'mlogit-menu-item "Fit multinomial logit model..." :fit-mlogit) (when (null (position 'mlogit-menu-item *arc-fit-menu-items*)) (defparameter *arc-fit-menu-items* (append *arc-fit-menu-items* (list 'mlogit-menu-item)))) (defmeth dataset-proto :fit-mlogit () "Message args: () Arc fit-method for multinomial logit models." (send self :ones) (let* ((name (format nil "M~a" (send self :model-number))) (names (send self :types (append '(:ones :variate :factor :factor1 :effect :interaction :text)))) (ans (mlogit-dialog names :name name :candidates names))) (if ans (apply #'send self :make-mlogit ans)))) (defun mlogit-dialog (names &key (mod-type "Multinomial Logit Model") (name "mlogit") (weights "") (response "") (candidates names) (predictors (repeat "" (length names))) (intercept t) (compute-info-matrix t)) "Function args: (names &key reg-name type) (mod-type \"Multinomial Logit Model\") (name \"mlogit\") (weights \"\") (response \"\") (candidates names) (predictors (repeat \"\" (length names))) (intercept t) (compute-info-matrix t)) Opens an Arc dialog to define a multinomial logistic regression." (let* ((version (send text-item-proto :new (format nil "Arc ~a" *arc-version*))) (all (send allocate-items-proto :new)) (candidates-l (send subordinate-list-proto :new (pad-list candidates (length names)) 'primary all)) (candidates-n (send text-item-proto :new "Candidates")) (predictors-l (send subordinate-list-proto :new (pad-list predictors (length names)) 'secondary all)) (predictors-n (send text-item-proto :new "Predictors")) (response-l (send subordinate-list-proto :new (list response) 'oneitem all)) (response-n (send text-item-proto :new "Response... ")) (weight-l (send subordinate-list-proto :new (if weights (list weights) '("")) 'oneitem all)) (weight-n (send text-item-proto :new "Case Weights... ")) (intercept-n (send toggle-item-proto :new "Fit Intercept" :value intercept)) (infomatrix-n (send toggle-item-proto :new "Calculate Info Matrix" :value compute-info-matrix)) (name-l (send edit-text-item-proto :new name :text-length 14 )) (name-n (send text-item-proto :new (format nil "Name for ~%~a" mod-type))) (get-answer #'(lambda () (let* ((ans (send all :value)) (raw (send all :all-values))) (cond ((or (null (select ans 1)) (null (select ans 2)) (null (select ans 3))) (message-dialog "Predictor, response, or case weights missing") (mlogit-dialog names :mod-type mod-type :name (send name-l :text) :candidates (select raw 0) :predictors (select raw 1) :response (first (select raw 2)) :weights (first (select raw 3)) :intercept (send intercept-n :value) :compute-info-matrix (send infomatrix-n :value))) (t (list :predictors (select ans 1) :candidates (select ans 0) :response (first (select ans 2)) :weights (first (select ans 3)) :intercept (send intercept-n :value) :compute-info-matrix (send infomatrix-n :value) :name (send name-l :text))))))) (cancel (send modal-button-proto :new "Cancel")) (done (send modal-button-proto :new "Done" :action #'(lambda () (funcall get-answer)))) (dialog (send modal-dialog-proto :new (list (list version name-n name-l) (list (list candidates-n candidates-l response-n weight-n) (list predictors-n predictors-l response-l weight-l) (list intercept-n infomatrix-n done cancel)))))) (send dialog :default-button done) (send dialog :modal-dialog) )) (defmeth dataset-proto :make-mlogit (&rest args &key predictors response intercept compute-info-matrix weights name) "Message args: (&rest args &key predictors response intercept compute-info-matrix weights name) Sets up multinomial logit model from Arc." (let* ((x (apply #'append (mapcar #'(lambda (n) (send self :data n)) predictors))) (y (first (send self :data response))) (miss (remove-duplicates (combine (mapcar #'find-missing (cons y x))))) (missing (select miss (which miss))) (wts (first (send self :data weights))) (dataset self) (r (send mlogit-model-proto :new :name name))) (flet ((get-effect-names (var-names) (let* ((datalists (mapcar #'(lambda (name) (send dataset :find-datalist name)) var-names)) (effect-names (mapcar #'(lambda (dl) (send dl :labels)) datalists))) (combine effect-names)))) (set (intern (string-upcase name)) r) (send (send r :menu) :remove) ; kludge to get correct menu title (send (send r :menu) :title name) (send (send r :menu) :install) (send self :models r) (send r :x x) (send r :y y) (send r :weights-name weights) (send r :weights wts) (send r :missing missing) (send r :intercept intercept) (send r :compute-info-matrix compute-info-matrix) (send r :predictor-names (get-effect-names predictors)) (send r :response-name response) (send r :type :mlogit-model) (send r :add-slot 'args args) (send r :add-slot 'data self) (defmeth r :included (&rest args) (apply #'send dataset :included args)) (defmeth r :data () (slot-value 'data)) (send r :compute) (when *show-display-fit* (send r :summary)) r))) (defun included (data included) "Function args: (data included) Filters list, vector, or matrix data for logical list of included observations." (let ((incl (which included))) (cond ((listp data) (select data incl)) ((vectorp data) (select data incl)) ((matrixp data) (select data incl (iseq (array-dimension data 1)))) (t (error "~%Not a list, vector, or matrix~%"))))) (defmeth mlogit-model-proto :num-included () "Message args: () Returns number of included observations." (let* ((incl (copy-list (send self :included))) (missing (send self :missing))) (setf (select incl missing) nil) (length (which incl)))) ;;;over-ride menu method -- shouldn't be necessary (defmeth mlogit-model-proto :menu (&key title) (let ((menu (slot-value 'menu)) (name (if title title "MLogit"))) (if menu menu (setf (slot-value 'menu) (send self :make-menu name))))) (defmeth mlogit-model-proto :menu-items () "Message args: () Arc menu items for multinomial logit model." (mapcar #'(lambda (a) (make-menu-item a self)) *arc-multinomial-logit-model-menu-items*)) (defmeth mlogit-model-proto :plotlist () "Message args: () Arc list of model-related variables for multinomial logit model." (list :dev-components)) (defmeth mlogit-model-proto :remove-menu () (send (send self :menu) :remove) (send (send self :data) :delete-model self)) (rc-menu-item 'mlogit-summary-item "Summary" :summary) (rc-menu-item 'mlogit-remove-item "Remove model" :remove-menu) (defparameter *arc-multinomial-logit-model-menu-items* (list 'mlogit-summary-item 'wald-tests-menu-item 'mlogit-remove-item)) (defmeth mlogit-model-proto :dev-components () "Message args: () Returns a list of case-wise components of the deviance for the model." (let* ((g (send self :coef-estimates)) (w (send self :w)) (x (send self :x-matrix)) (weights (send self :weights)) (dev-comps nil) (n (array-dimension x 0)) (k (array-dimension x 1)) (m-1 (array-dimension w 1)) (gamma (transpose (matrix (list m-1 k) g))) (pars (iseq k))) (dotimes (i n) (let ((dev (* -2 (select weights i) (+ (let ((sum 0)) (dotimes (j m-1 sum) (setf sum (+ sum (* (aref w i j) (matmult (select x i pars) (select gamma pars j))))))) (- (log (+ 1 (let ((sum 0)) (dotimes (j m-1 sum) (setf sum (+ sum (exp (matmult (select x i pars) (select gamma pars j)))))))))))))) (push (aref dev 0 0) dev-comps))) (send self :expand-to-missing dev-comps))) (defmeth mlogit-model-proto :expand-to-missing (x) "Message args: (x) Pads x with missing data corresponding to excluded observations." (let* ((incl (send self :included)) (result (repeat not-a-number (length incl)))) (setf (select result (which incl)) x) result)) (defmeth mlogit-model-proto :wald-tests () "Message args: () Prints a Wald chisquare test for each term in the model, testing the hypothesis that the term (i.e., set of coefficients) is zero." (flet ((expand-hyp-matrix (h m-1) (let* ((ncol (array-dimension h 1)) (nrow (array-dimension h 0)) (zero (matrix (list nrow ncol) (repeat 0 (* nrow ncol))))) (apply #'bind-rows (mapcar #'(lambda (j) (apply #'bind-columns (mapcar #'(lambda (p) (if (= p j) h zero)) (iseq m-1)))) (iseq m-1)))))) (let* ((dataset (send (send self :get-dataset) :name)) (terms (send self :predictors)) (number-terms (length terms)) (df (mapcar #'(lambda (term) (send self :degrees-of-freedom-term term)) terms)) (m-1 (- (length (send self :response-categories)) 1)) (pos (repeat (iseq number-terms) df)) (positions (if (send self :intercept) (adjoin -1 pos) pos)) (imat (identity-matrix (length positions))) (all-cols (iseq (length positions))) (test (if (send self :intercept) (send self :test-linear-hypothesis (expand-hyp-matrix (select imat 0 all-cols) m-1) :quiet t)))) (format t "~%~% Wald-Tests for terms in Model ~a" (send self :name)) (format t ", Dataset = ~a" dataset) (format t "~% df Wald Chisq. p Source~%") (format t "------------------------------------------------~%") (if (send self :intercept) (format t "~8d ~10g ~10,5f ~a~%" (select test 1)(select test 0) (select test 2) "Constant")) (dotimes (term number-terms) (let* ((df-term (select df term)) (pos-term (which (= term positions))) (h-mat (select imat pos-term all-cols)) (hyp-matrix (expand-hyp-matrix h-mat m-1)) (test (send self :test-linear-hypothesis hyp-matrix :quiet t))) ;(print-matrix hyp-matrix) (format t "~8d ~10g ~10,5f ~a~%" (select test 1) (select test 0) (select test 2) (select terms term)))))))