Source code for spreg.probit

"""Probit regression class and diagnostics."""

__author__ = "Luc Anselin luc.anselin@asu.edu, Pedro V. Amaral pedro.amaral@asu.edu"

import numpy as np
import numpy.linalg as la
import scipy.optimize as op
from scipy.stats import norm, chi2
chisqprob = chi2.sf
import scipy.sparse as SP
from . import user_output as USER
from . import summary_output as SUMMARY
from .utils import spdot, spbroadcast, set_warn

__all__ = ["Probit"]


class BaseProbit(object):

    """
    Probit class to do all the computations

    Parameters
    ----------

    x           : array
                  nxk array of independent variables (assumed to be aligned with y)
    y           : array
                  nx1 array of dependent binary variable
    w           : W
                  PySAL weights instance or spatial weights sparse matrix
                  aligned with y
    optim       : string
                  Optimization method.
                  Default: 'newton' (Newton-Raphson).
                  Alternatives: 'ncg' (Newton-CG), 'bfgs' (BFGS algorithm)
    scalem      : string
                  Method to calculate the scale of the marginal effects.
                  Default: 'phimean' (Mean of individual marginal effects)
                  Alternative: 'xmean' (Marginal effects at variables mean)
    maxiter     : int
                  Maximum number of iterations until optimizer stops                  

    Attributes
    ----------

    x           : array
                  Two dimensional array with n rows and one column for each
                  independent (exogenous) variable, including the constant
    y           : array
                  nx1 array of dependent variable
    betas       : array
                  kx1 array with estimated coefficients
    predy       : array
                  nx1 array of predicted y values
    n           : int
                  Number of observations
    k           : int
                  Number of variables
    vm          : array
                  Variance-covariance matrix (kxk)
    z_stat      : list of tuples
                  z statistic; each tuple contains the pair (statistic,
                  p-value), where each is a float                  
    xmean       : array
                  Mean of the independent variables (kx1)
    predpc      : float
                  Percent of y correctly predicted
    logl        : float
                  Log-Likelihhod of the estimation
    scalem      : string
                  Method to calculate the scale of the marginal effects.
    scale       : float
                  Scale of the marginal effects.
    slopes      : array
                  Marginal effects of the independent variables (k-1x1)
		          Note: Disregards the presence of dummies.
    slopes_vm   : array
                  Variance-covariance matrix of the slopes (k-1xk-1)
    LR          : tuple
                  Likelihood Ratio test of all coefficients = 0
		          (test statistics, p-value)
    Pinkse_error: float
                  Lagrange Multiplier test against spatial error correlation.
                  Implemented as presented in :cite:`Pinkse2004`.
    KP_error    : float
                  Moran's I type test against spatial error correlation.
                  Implemented as presented in  :cite:`Kelejian2001`.
    PS_error    : float
                  Lagrange Multiplier test against spatial error correlation.
                  Implemented as presented in :cite:`Pinkse1998`.
    warning     : boolean
                  if True Maximum number of iterations exceeded or gradient 
                  and/or function calls not changing.

    Examples
    --------
    >>> import numpy as np
    >>> import libpysal
    >>> import spreg
    >>> np.set_printoptions(suppress=True) #prevent scientific format
    >>> dbf = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r')
    >>> y = np.array([dbf.by_col('CRIME')]).T
    >>> x = np.array([dbf.by_col('INC'), dbf.by_col('HOVAL')]).T
    >>> x = np.hstack((np.ones(y.shape),x))
    >>> w = libpysal.io.open(libpysal.examples.get_path("columbus.gal"), 'r').read()
    >>> w.transform='r'
    >>> model = spreg.probit.BaseProbit((y>40).astype(float), x, w=w)
    >>> print(np.around(model.betas, decimals=6))
    [[ 3.353811]
     [-0.199653]
     [-0.029514]]

    >>> print(np.around(model.vm, decimals=6))
    [[ 0.852814 -0.043627 -0.008052]
     [-0.043627  0.004114 -0.000193]
     [-0.008052 -0.000193  0.00031 ]]

    >>> tests = np.array([['Pinkse_error','KP_error','PS_error']])
    >>> stats = np.array([[model.Pinkse_error[0],model.KP_error[0],model.PS_error[0]]])
    >>> pvalue = np.array([[model.Pinkse_error[1],model.KP_error[1],model.PS_error[1]]])
    >>> print(np.hstack((tests.T,np.around(np.hstack((stats.T,pvalue.T)),6))))
    [['Pinkse_error' '3.131719' '0.076783']
     ['KP_error' '1.721312' '0.085194']
     ['PS_error' '2.558166' '0.109726']]
    """

    def __init__(self, y, x, w=None, optim='newton', scalem='phimean', maxiter=100):
        self.y = y
        self.x = x
        self.n, self.k = x.shape
        self.optim = optim
        self.scalem = scalem
        self.w = w
        self.maxiter = maxiter
        par_est, self.warning = self.par_est()
        self.betas = np.reshape(par_est[0], (self.k, 1))
        self.logl = -float(par_est[1])

    @property
    def vm(self):
        try:
            return self._cache['vm']
        except AttributeError:
            self._cache = {}
            H = self.hessian(self.betas)
            self._cache['vm'] = -la.inv(H)
        except KeyError:
            H = self.hessian(self.betas)
            self._cache['vm'] = -la.inv(H)
        return self._cache['vm']
    
    @vm.setter
    def vm(self, val):
        try:
            self._cache['vm'] = val
        except AttributeError:
            self._cache = {}
        self._cache['vm'] = val

    @property #could this get packaged into a separate function or something? It feels weird to duplicate this.  
    def z_stat(self):
        try: 
            return self._cache['z_stat']
        except AttributeError:
            self._cache = {}
            variance = self.vm.diagonal()
            zStat = self.betas.reshape(len(self.betas),) / np.sqrt(variance)
            rs = {}
            for i in range(len(self.betas)):
                rs[i] = (zStat[i], norm.sf(abs(zStat[i])) * 2)
            self._cache['z_stat'] = rs.values()
        except KeyError:
            variance = self.vm.diagonal()
            zStat = self.betas.reshape(len(self.betas),) / np.sqrt(variance)
            rs = {}
            for i in range(len(self.betas)):
                rs[i] = (zStat[i], norm.sf(abs(zStat[i])) * 2)
            self._cache['z_stat'] = rs.values()
        return self._cache['z_stat']

    @z_stat.setter
    def z_stat(self, val):
        try:
            self._cache['z_stat'] = val
        except AttributeError:
            self._cache = {}
        self._cache['z_stat'] = val

    @property
    def slopes_std_err(self):
        try:
            return self._cache['slopes_std_err']
        except AttributeError:
            self._cache = {}
            self._cache['slopes_std_err'] = np.sqrt(self.slopes_vm.diagonal())
        except KeyError:
            self._cache['slopes_std_err'] = np.sqrt(self.slopes_vm.diagonal())
        return self._cache['slopes_std_err']    
    
    @slopes_std_err.setter
    def slopes_std_err(self, val):
        try:
            self._cache['slopes_std_err'] = val
        except AttributeError:
            self._cache = {}
        self._cache['slopes_std_err'] = val

    @property
    def slopes_z_stat(self):
        try:
            return self._cache['slopes_z_stat']
        except AttributeError:
            self._cache = {}
            return self.slopes_z_stat
        except KeyError:
            zStat = self.slopes.reshape(
                len(self.slopes),) / self.slopes_std_err
            rs = {}
            for i in range(len(self.slopes)):
                rs[i] = (zStat[i], norm.sf(abs(zStat[i])) * 2)
            self._cache['slopes_z_stat'] = list(rs.values())
        return self._cache['slopes_z_stat']

    @slopes_z_stat.setter
    def slopes_z_stat(self, val):
        try:
            self._cache['slopes_z_stat'] = val
        except AttributeError:
            self._cache = {}
        self._cache['slopes_z_stat'] = val

    @property
    def xmean(self):
        try:
            return self._cache['xmean']
        except AttributeError:
            self._cache = {}
            try: #why is this try-accept? can x be a list??
                self._cache['xmean'] = np.reshape(sum(self.x) / self.n, (self.k, 1))
            except:
                self._cache['xmean'] = np.reshape(sum(self.x).toarray() / self.n, (self.k, 1))
        except KeyError:
            try:
                self._cache['xmean'] = np.reshape(sum(self.x) / self.n, (self.k, 1))
            except:
                self._cache['xmean'] = np.reshape(sum(self.x).toarray() / self.n, (self.k, 1))
        return self._cache['xmean']

    @xmean.setter
    def xmean(self, val):
        try:
            self._cache['xmean'] = val
        except AttributeError:
            self._cache = {}
        self._cache['xmean'] = val

    @property
    def xb(self):
        try:
            return self._cache['xb']
        except AttributeError:
            self._cache = {}
            self._cache['xb'] = spdot(self.x, self.betas)
        except KeyError:
            self._cache['xb'] = spdot(self.x, self.betas)
        return self._cache['xb']

    @xb.setter
    def xb(self, val):
        try:
            self._cache['xb'] = val
        except AttributeError:
            self._cache = {}
        self._cache['xb'] = val

    @property
    def predy(self):
        try:
            return self._cache['predy']
        except AttributeError:
            self._cache = {}
            self._cache['predy'] = norm.cdf(self.xb)
        except KeyError:
            self._cache['predy'] = norm.cdf(self.xb)
        return self._cache['predy']
    
    @predy.setter
    def predy(self, val):
        try:
            self._cache['predy'] = val
        except AttributeError:
            self._cache = {}
        self._cache['predy'] = val

    @property
    def predpc(self):
        try:
            return self._cache['predpc']
        except AttributeError:
            self._cache = {}
            predpc = abs(self.y - self.predy)
            for i in range(len(predpc)):
                if predpc[i] > 0.5:
                    predpc[i] = 0
                else:
                    predpc[i] = 1
            self._cache['predpc'] = float(100.0 * np.sum(predpc) / self.n)
        except KeyError:
            predpc = abs(self.y - self.predy)
            for i in range(len(predpc)):
                if predpc[i] > 0.5:
                    predpc[i] = 0
                else:
                    predpc[i] = 1
            self._cache['predpc'] = float(100.0 * np.sum(predpc) / self.n)
        return self._cache['predpc']

    @predpc.setter
    def predpc(self, val):
        try:
            self._cache['predpc'] = val
        except AttributeError:
            self._cache = {}
        self._cache['predpc'] = val
    
    @property
    def phiy(self):
        try:
            return self._cache['phiy']
        except AttributeError:
            self._cache = {}
            self._cache['phiy'] = norm.pdf(self.xb)
        except KeyError:
            self._cache['phiy'] = norm.pdf(self.xb)
        return self._cache['phiy']
    
    @phiy.setter
    def phiy(self, val):
        try:
            self._cache['phiy'] = val
        except AttributeError:
            self._cache = {}
        self._cache['phiy'] = val

    @property
    def scale(self):
        try:
            return self._cache['scale']
        except AttributeError:
            self._cache = {}
            if self.scalem == 'phimean':
                self._cache['scale'] = float(1.0 * np.sum(self.phiy) / self.n)
            elif self.scalem == 'xmean':
                self._cache['scale'] = float(norm.pdf(np.dot(self.xmean.T, self.betas)))
        except KeyError:
            if self.scalem == 'phimean':
                self._cache['scale'] = float(1.0 * np.sum(self.phiy) / self.n)
            if self.scalem == 'xmean':
                self._cache['scale'] = float(norm.pdf(np.dot(self.xmean.T, self.betas)))
        return self._cache['scale']

    @scale.setter
    def scale(self, val):
        try:
            self._cache['scale'] = val
        except AttributeError:
            self._cache = {}
        self._cache['scale'] = val

    @property
    def slopes(self):
        try:
            return self._cache['slopes']
        except AttributeError:
            self._cache = {}
            self._cache['slopes'] = self.betas[1:] * self.scale
        except KeyError:
            self._cache['slopes'] = self.betas[1:] * self.scale
        return self._cache['slopes']

    @slopes.setter
    def slopes(self, val):
        try:
            self._cache['slopes'] = val
        except AttributeError:
            self._cache = {}
        self._cache['slopes'] = val

    @property
    def slopes_vm(self):
        try:
            return self._cache['slopes_vm']
        except AttributeError:
            self._cache = {}
            x = self.xmean
            b = self.betas
            dfdb = np.eye(self.k) - spdot(b.T, x) * spdot(b, x.T)
            slopes_vm = (self.scale ** 2) * \
                np.dot(np.dot(dfdb, self.vm), dfdb.T)
            self._cache['slopes_vm'] = slopes_vm[1:, 1:]
        except KeyError:
            x = self.xmean
            b = self.betas
            dfdb = np.eye(self.k) - spdot(b.T, x) * spdot(b, x.T)
            slopes_vm = (self.scale ** 2) * \
                np.dot(np.dot(dfdb, self.vm), dfdb.T)
            self._cache['slopes_vm'] = slopes_vm[1:, 1:]
        return self._cache['slopes_vm']

    @slopes_vm.setter
    def slopes_vm(self, val):
        try:
            self._cache['slopes_vm'] = val
        except AttributeError:
            self._cache = {}
        self._cache['slopes_vm'] = val

    @property
    def LR(self):
        try:
            return self._cache['LR']
        except AttributeError:
            self._cache = {}
            P = 1.0 * np.sum(self.y) / self.n
            LR = float(
                -2 * (self.n * (P * np.log(P) + (1 - P) * np.log(1 - P)) - self.logl))
            self._cache['LR'] = (LR, chisqprob(LR, self.k))
        except KeyError:
            P = 1.0 * np.sum(self.y) / self.n
            LR = float(
                -2 * (self.n * (P * np.log(P) + (1 - P) * np.log(1 - P)) - self.logl))
            self._cache['LR'] = (LR, chisqprob(LR, self.k))
        return self._cache['LR']

    @LR.setter
    def LR(self, val):
        try:
            self._cache['LR'] = val
        except AttributeError:
            self._cache = {}
        self._cache['LR'] = val

    @property
    def u_naive(self):
        try:
            return self._cache['u_naive']
        except AttributeError:
            self._cache = {}
            self._cache['u_naive'] = self.y - self.predy
        except KeyError:
            u_naive = self.y - self.predy
            self._cache['u_naive'] = u_naive
        return self._cache['u_naive']

    @u_naive.setter
    def u_naive(self, val):
        try:
            self._cache['u_naive'] = val
        except AttributeError:
            self._cache = {}
        self._cache['u_naive'] = val
    
    @property
    def u_gen(self):
        try:
            return self._cache['u_gen']
        except AttributeError:
            self._cache = {}
            Phi_prod = self.predy * (1 - self.predy)
            u_gen = self.phiy * (self.u_naive / Phi_prod)
            self._cache['u_gen'] = u_gen
        except KeyError:
            Phi_prod = self.predy * (1 - self.predy)
            u_gen = self.phiy * (self.u_naive / Phi_prod)
            self._cache['u_gen'] = u_gen
        return self._cache['u_gen']
    
    @u_gen.setter
    def u_gen(self, val):
        try:
            self._cache['u_gen'] = val
        except AttributeError:
            self._cache = {}
        self._cache['u_gen'] = val

    @property
    def Pinkse_error(self):
        try:
            return self._cache['Pinkse_error']
        except AttributeError:
            self._cache = {}
            self._cache['Pinkse_error'], self._cache[
                'KP_error'], self._cache['PS_error'] = sp_tests(self)
        except KeyError:
            self._cache['Pinkse_error'], self._cache[
                'KP_error'], self._cache['PS_error'] = sp_tests(self)
        return self._cache['Pinkse_error']

    @Pinkse_error.setter
    def Pinkse_error(self, val):
        try:
            self._cache['Pinkse_error'] = val
        except AttributeError:
            self._cache = {}
        self._cache['Pinkse_error'] = val

    @property
    def KP_error(self):
        try:
            return self._cache['KP_error']
        except AttributeError:
            self._cache = {}
            self._cache['Pinkse_error'], self._cache[
                'KP_error'], self._cache['PS_error'] = sp_tests(self)
        except KeyError:
            self._cache['Pinkse_error'], self._cache[
                'KP_error'], self._cache['PS_error'] = sp_tests(self)
        return self._cache['KP_error']

    @KP_error.setter
    def KP_error(self, val):
        try:
            self._cache['KP_error'] = val
        except AttributeError:
            self._cache = {}
        self._cache['KP_error'] = val

    @property
    def PS_error(self):
        try:
            return self._cache['PS_error']
        except AttributeError:
            self._cache = {}
            self._cache['Pinkse_error'], self._cache[
                'KP_error'], self._cache['PS_error'] = sp_tests(self)
        except KeyError:
            self._cache['Pinkse_error'], self._cache[
                'KP_error'], self._cache['PS_error'] = sp_tests(self)
        return self._cache['PS_error']

    @PS_error.setter
    def PS_error(self, val):
        try:
            self._cache['PS_error'] = val
        except AttributeError:
            self._cache = {}
        self._cache['PS_error'] = val

    def par_est(self):
        start = np.dot(la.inv(spdot(self.x.T, self.x)),
                       spdot(self.x.T, self.y))
        flogl = lambda par: -self.ll(par)
        if self.optim == 'newton':
            fgrad = lambda par: self.gradient(par)
            fhess = lambda par: self.hessian(par)
            par_hat = newton(flogl, start, fgrad, fhess, self.maxiter)
            warn = par_hat[2]
        else:
            fgrad = lambda par: -self.gradient(par)
            if self.optim == 'bfgs':
                par_hat = op.fmin_bfgs(
                    flogl, start, fgrad, full_output=1, disp=0)
                warn = par_hat[6]
            if self.optim == 'ncg':
                fhess = lambda par: -self.hessian(par)
                par_hat = op.fmin_ncg(
                    flogl, start, fgrad, fhess=fhess, full_output=1, disp=0)
                warn = par_hat[5]
        if warn > 0:
            warn = True
        else:
            warn = False
        return par_hat, warn

    def ll(self, par):
        beta = np.reshape(np.array(par), (self.k, 1))
        q = 2 * self.y - 1
        qxb = q * spdot(self.x, beta)
        ll = sum(np.log(norm.cdf(qxb)))
        return ll

    def gradient(self, par):
        beta = np.reshape(np.array(par), (self.k, 1))
        q = 2 * self.y - 1
        qxb = q * spdot(self.x, beta)
        lamb = q * norm.pdf(qxb) / norm.cdf(qxb)
        gradient = spdot(lamb.T, self.x)[0]
        return gradient

    def hessian(self, par):
        beta = np.reshape(np.array(par), (self.k, 1))
        q = 2 * self.y - 1
        xb = spdot(self.x, beta)
        qxb = q * xb
        lamb = q * norm.pdf(qxb) / norm.cdf(qxb)
        hessian = spdot(self.x.T, spbroadcast(self.x,-lamb * (lamb + xb)))
        return hessian


[docs]class Probit(BaseProbit): """ Classic non-spatial Probit and spatial diagnostics. The class includes a printout that formats all the results and tests in a nice format. The diagnostics for spatial dependence currently implemented are: * Pinkse Error :cite:`Pinkse2004` * Kelejian and Prucha Moran's I :cite:`Kelejian2001` * Pinkse & Slade Error :cite:`Pinkse1998` Parameters ---------- x : array nxk array of independent variables (assumed to be aligned with y) y : array nx1 array of dependent binary variable w : W PySAL weights instance aligned with y optim : string Optimization method. Default: 'newton' (Newton-Raphson). Alternatives: 'ncg' (Newton-CG), 'bfgs' (BFGS algorithm) scalem : string Method to calculate the scale of the marginal effects. Default: 'phimean' (Mean of individual marginal effects) Alternative: 'xmean' (Marginal effects at variables mean) maxiter : int Maximum number of iterations until optimizer stops name_y : string Name of dependent variable for use in output name_x : list of strings Names of independent variables for use in output name_w : string Name of weights matrix for use in output name_ds : string Name of dataset for use in output Attributes ---------- x : array Two dimensional array with n rows and one column for each independent (exogenous) variable, including the constant y : array nx1 array of dependent variable betas : array kx1 array with estimated coefficients predy : array nx1 array of predicted y values n : int Number of observations k : int Number of variables vm : array Variance-covariance matrix (kxk) z_stat : list of tuples z statistic; each tuple contains the pair (statistic, p-value), where each is a float xmean : array Mean of the independent variables (kx1) predpc : float Percent of y correctly predicted logl : float Log-Likelihhod of the estimation scalem : string Method to calculate the scale of the marginal effects. scale : float Scale of the marginal effects. slopes : array Marginal effects of the independent variables (k-1x1) slopes_vm : array Variance-covariance matrix of the slopes (k-1xk-1) LR : tuple Likelihood Ratio test of all coefficients = 0 (test statistics, p-value) Pinkse_error: float Lagrange Multiplier test against spatial error correlation. Implemented as presented in :cite:`Pinkse2004` KP_error : float Moran's I type test against spatial error correlation. Implemented as presented in :cite:`Kelejian2001` PS_error : float Lagrange Multiplier test against spatial error correlation. Implemented as presented in :cite:`Pinkse1998` warning : boolean if True Maximum number of iterations exceeded or gradient and/or function calls not changing. name_y : string Name of dependent variable for use in output name_x : list of strings Names of independent variables for use in output name_w : string Name of weights matrix for use in output name_ds : string Name of dataset for use in output title : string Name of the regression method used Examples -------- We first need to import the needed modules, namely numpy to convert the data we read into arrays that ``spreg`` understands and ``libpysal`` to perform all the analysis. >>> import numpy as np >>> import libpysal >>> np.set_printoptions(suppress=True) #prevent scientific format Open data on Columbus neighborhood crime (49 areas) using libpysal.io.open(). This is the DBF associated with the Columbus shapefile. Note that libpysal.io.open() also reads data in CSV format; since the actual class requires data to be passed in as numpy arrays, the user can read their data in using any method. >>> dbf = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') Extract the CRIME column (crime) from the DBF file and make it the dependent variable for the regression. Note that libpysal requires this to be an numpy array of shape (n, 1) as opposed to the also common shape of (n, ) that other packages accept. Since we want to run a probit model and for this example we use the Columbus data, we also need to transform the continuous CRIME variable into a binary variable. As in :cite:`McMillen1992`, we define y = 1 if CRIME > 40. >>> y = np.array([dbf.by_col('CRIME')]).T >>> y = (y>40).astype(float) Extract HOVAL (home values) and INC (income) vectors from the DBF to be used as independent variables in the regression. Note that libpysal requires this to be an nxj numpy array, where j is the number of independent variables (not including a constant). By default this class adds a vector of ones to the independent variables passed in. >>> names_to_extract = ['INC', 'HOVAL'] >>> x = np.array([dbf.by_col(name) for name in names_to_extract]).T Since we want to the test the probit model for spatial dependence, we need to specify the spatial weights matrix that includes the spatial configuration of the observations into the error component of the model. To do that, we can open an already existing gal file or create a new one. In this case, we will use ``columbus.gal``, which contains contiguity relationships between the observations in the Columbus dataset we are using throughout this example. Note that, in order to read the file, not only to open it, we need to append '.read()' at the end of the command. >>> w = libpysal.io.open(libpysal.examples.get_path("columbus.gal"), 'r').read() Unless there is a good reason not to do it, the weights have to be row-standardized so every row of the matrix sums to one. In libpysal, this can be easily performed in the following way: >>> w.transform='r' We are all set with the preliminaries, we are good to run the model. In this case, we will need the variables and the weights matrix. If we want to have the names of the variables printed in the output summary, we will have to pass them in as well, although this is optional. >>> from spreg import Probit >>> model = Probit(y, x, w=w, name_y='crime', name_x=['income','home value'], name_ds='columbus', name_w='columbus.gal') Once we have run the model, we can explore a little bit the output. The regression object we have created has many attributes so take your time to discover them. >>> np.around(model.betas, decimals=6) array([[ 3.353811], [-0.199653], [-0.029514]]) >>> np.around(model.vm, decimals=6) array([[ 0.852814, -0.043627, -0.008052], [-0.043627, 0.004114, -0.000193], [-0.008052, -0.000193, 0.00031 ]]) Since we have provided a spatial weigths matrix, the diagnostics for spatial dependence have also been computed. We can access them and their p-values individually: >>> tests = np.array([['Pinkse_error','KP_error','PS_error']]) >>> stats = np.array([[model.Pinkse_error[0],model.KP_error[0],model.PS_error[0]]]) >>> pvalue = np.array([[model.Pinkse_error[1],model.KP_error[1],model.PS_error[1]]]) >>> print(np.hstack((tests.T,np.around(np.hstack((stats.T,pvalue.T)),6)))) [['Pinkse_error' '3.131719' '0.076783'] ['KP_error' '1.721312' '0.085194'] ['PS_error' '2.558166' '0.109726']] Or we can easily obtain a full summary of all the results nicely formatted and ready to be printed simply by typing 'print model.summary' """
[docs] def __init__( self, y, x, w=None, optim='newton', scalem='phimean', maxiter=100, vm=False, name_y=None, name_x=None, name_w=None, name_ds=None, spat_diag=False): n = USER.check_arrays(y, x) y = USER.check_y(y, n) if w != None: USER.check_weights(w, y) spat_diag = True ws = w.sparse else: ws = None x_constant,name_x,warn = USER.check_constant(x,name_x) set_warn(self, warn) BaseProbit.__init__(self, y=y, x=x_constant, w=ws, optim=optim, scalem=scalem, maxiter=maxiter) self.title = "CLASSIC PROBIT ESTIMATOR" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = USER.set_name_x(name_x, x) self.name_w = USER.set_name_w(name_w, w) SUMMARY.Probit(reg=self, w=w, vm=vm, spat_diag=spat_diag)
def newton(flogl, start, fgrad, fhess, maxiter): """ Calculates the Newton-Raphson method Parameters ---------- flogl : lambda Function to calculate the log-likelihood start : array kx1 array of starting values fgrad : lambda Function to calculate the gradient fhess : lambda Function to calculate the hessian maxiter : int Maximum number of iterations until optimizer stops """ warn = 0 iteration = 0 par_hat0 = start m = 1 while (iteration < maxiter and m >= 1e-04): H = -la.inv(fhess(par_hat0)) g = fgrad(par_hat0).reshape(start.shape) Hg = np.dot(H, g) par_hat0 = par_hat0 + Hg iteration += 1 m = np.dot(g.T, Hg) if iteration == maxiter: warn = 1 logl = flogl(par_hat0) return (par_hat0, logl, warn) def sp_tests(reg): """ Calculates tests for spatial dependence in Probit models Parameters ---------- reg : regression object output instance from a probit model """ if reg.w != None: try: w = reg.w.sparse except: w = reg.w Phi = reg.predy phi = reg.phiy # Pinkse_error: Phi_prod = Phi * (1 - Phi) u_naive = reg.u_naive u_gen = reg.u_gen sig2 = np.sum((phi * phi) / Phi_prod) / reg.n LM_err_num = np.dot(u_gen.T, (w * u_gen)) ** 2 trWW = np.sum((w * w).diagonal()) trWWWWp = trWW + np.sum((w * w.T).diagonal()) LM_err = float(1.0 * LM_err_num / (sig2 ** 2 * trWWWWp)) LM_err = np.array([LM_err, chisqprob(LM_err, 1)]) # KP_error: moran = moran_KP(reg.w, u_naive, Phi_prod) # Pinkse-Slade_error: u_std = u_naive / np.sqrt(Phi_prod) ps_num = np.dot(u_std.T, (w * u_std)) ** 2 trWpW = np.sum((w.T * w).diagonal()) ps = float(ps_num / (trWW + trWpW)) # chi-square instead of bootstrap. ps = np.array([ps, chisqprob(ps, 1)]) else: raise Exception("W matrix must be provided to calculate spatial tests.") return LM_err, moran, ps def moran_KP(w, u, sig2i): """ Calculates Moran-flavoured tests Parameters ---------- w : W PySAL weights instance aligned with y u : array nx1 array of naive residuals sig2i : array nx1 array of individual variance """ try: w = w.sparse except: pass moran_num = np.dot(u.T, (w * u)) E = SP.lil_matrix(w.get_shape()) E.setdiag(sig2i.flat) E = E.asformat('csr') WE = w * E moran_den = np.sqrt(np.sum((WE * WE + (w.T * E) * WE).diagonal())) moran = float(1.0 * moran_num / moran_den) moran = np.array([moran, norm.sf(abs(moran)) * 2.]) return moran def _test(): import doctest start_suppress = np.get_printoptions()['suppress'] np.set_printoptions(suppress=True) doctest.testmod() np.set_printoptions(suppress=start_suppress) if __name__ == '__main__': _test() import numpy as np import libpysal dbf = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'), 'r') y = np.array([dbf.by_col('CRIME')]).T var_x = ['INC', 'HOVAL'] x = np.array([dbf.by_col(name) for name in var_x]).T w = libpysal.io.open(libpysal.examples.get_path("columbus.gal"), 'r').read() w.transform = 'r' probit1 = Probit( (y > 40).astype(float), x, w=w, name_x=var_x, name_y="CRIME", name_ds="Columbus", name_w="columbus.dbf") print(probit1.summary)