Added symmetric poweriterations to sandbox

2007-12-12 22:08:55 +00:00 · 2007-12-12 22:08:55 +00:00 · 1103245d85
parent 0956581c42
commit 1103245d85
10 changed files with 851 additions and 172 deletions
--- a/arpack/arpack.py
+++ b/arpack/arpack.py
@ -30,7 +30,7 @@ class get_matvec:
        if isinstance(obj, sb.ndarray):
            self.callfunc = self.type1
            return
-        meth = getattr(obj,self.methname,None)
+        meth = getattr(obj, self.methname, None)
        if not callable(meth):
            raise ValueError, "Object must be an array "\
                  "or have a callable %s attribute." % (self.methname,)
@ -48,12 +48,12 @@ class get_matvec:
        return sb.dot(self.obj.A, x)

    def type2(self, x):
-        return self.obj(x,*self.args)
+        return self.obj(x, *self.args)


-def eigen(A,k=6,M=None,ncv=None,which='LM',
-          maxiter=None,tol=0, return_eigenvectors=True):
-    """ Return k eigenvalues and eigenvectors of the matrix A.
+def eigen(A, k=6, M=None, ncv=None, which='LM',
+          maxiter=None, tol=0, return_eigenvectors=True, v0=None):
+    """Return k eigenvalues and eigenvectors of the matrix A.

    Solves A * x[i] = w[i] * x[i], the standard eigenvalue problem for
    w[i] eigenvalues with corresponding eigenvectors x[i].
@ -69,6 +69,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
    M -- (Not implemented)
          A symmetric positive-definite matrix for the generalized
          eigenvalue problem A * x = w * M * x
+    v0 -- Initial starting solution (n x 1)

    Outputs:

@ -99,8 +100,8 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',

    """
    try:
-        n,ny=A.shape
-        n==ny
+        n, ny = A.shape
+        n == ny
    except:
        raise AttributeError("matrix is not square")
    if M is not None:
@ -108,12 +109,12 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
        
    # some defaults
    if ncv is None:
-        ncv=2*k+1
-    ncv=min(ncv,n)
-    if maxiter==None:
-        maxiter=n*10
+        ncv = 2*k + 1
+    ncv = min(ncv, n)
+    if maxiter == None:
+        maxiter = n*10

-    # guess type        
+    # guess type      
    resid = sb.zeros(n,'f')
    try:
        typ = A.dtype.char
@ -129,7 +130,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
        raise ValueError("k must be less than rank(A), k=%d"%k)
    if maxiter <= 0:
        raise ValueError("maxiter must be positive, maxiter=%d"%maxiter)
-    whiches=['LM','SM','LR','SR','LI','SI']
+    whiches = ['LM','SM','LR','SR','LI','SI']
    if which not in whiches:
        raise ValueError("which must be one of %s"%' '.join(whiches))
    if ncv > n or ncv < k:
@ -141,17 +142,26 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
    eigextract = _arpack.__dict__[ltr+'neupd']
    matvec = get_matvec(A)

-    v = sb.zeros((n,ncv),typ) # holds Ritz vectors
-    resid = sb.zeros(n,typ) # residual
-    workd = sb.zeros(3*n,typ) # workspace
-    workl = sb.zeros(3*ncv*ncv+6*ncv,typ) # workspace
-    iparam = sb.zeros(11,'int') # problem parameters
-    ipntr = sb.zeros(14,'int') # pointers into workspaces
-    info = 0
+    v = sb.zeros((n, ncv), typ) # holds Ritz vectors
+    if v0 == None:
+        resid = sb.zeros(n, typ) # residual
+        info = 0
+    else: # starting vector is given
+        nn, kk = v0.shape
+        if nn != n:
+            raise ValueError("starting vector must be: (%d, 1), got: (%d, %d)" %(n, nn, kk))
+        resid = v0[:,0].astype(typ)
+        info = 1
+        
+    workd = sb.zeros(3*n, typ) # workspace
+    workl = sb.zeros(3*ncv*ncv+6*ncv, typ) # workspace
+    iparam = sb.zeros(11, 'int') # problem parameters
+    ipntr = sb.zeros(14, 'int') # pointers into workspaces
+    
    ido = 0

    if typ in 'FD':
-        rwork = sb.zeros(ncv,typ.lower())
+        rwork = sb.zeros(ncv, typ.lower())

    # only supported mode is 1: Ax=lx
    ishfts = 1
@ -160,7 +170,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
    iparam[0] = ishfts
    iparam[2] = maxiter
    iparam[6] = mode1
-
+    
    while True:
        if typ in 'fd':
            ido,resid,v,iparam,ipntr,info =\
@ -173,9 +183,9 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',

        if (ido == -1 or ido == 1):
            # compute y = A * x
-            xslice = slice(ipntr[0]-1, ipntr[0]-1+n)
-            yslice = slice(ipntr[1]-1, ipntr[1]-1+n)
-            workd[yslice]=matvec(workd[xslice])
+            xslice = slice(ipntr[0] - 1, ipntr[0] - 1 + n)
+            yslice = slice(ipntr[1] - 1, ipntr[1] - 1 + n)
+            workd[yslice] = matvec(workd[xslice])
        else: # done
            break

@ -233,7 +243,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',


 def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
-                    maxiter=None,tol=0, return_eigenvectors=True):
+                    maxiter=None,tol=0, return_eigenvectors=True, v0=None):
    """ Return k eigenvalues and eigenvectors of the real symmetric matrix A.

    Solves A * x[i] = w[i] * x[i], the standard eigenvalue problem for
@ -253,6 +263,8 @@ def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
          A symmetric positive-definite matrix for the generalized
          eigenvalue problem A * x = w * M * x

+    v0 -- Starting vector (n, 1)
+    
    Outputs:

    w -- An real array of k eigenvalues 
@ -325,12 +337,22 @@ def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
    matvec = get_matvec(A)

    v = sb.zeros((n,ncv),typ)
-    resid = sb.zeros(n,typ)
+    if v0 == None:
+        resid = sb.zeros(n, typ) # residual
+        info = 0
+    else: # starting solution is given
+        nn, kk = v0.shape
+        if nn != n:
+            raise ValueError("starting vectors must be: (%d, %d), got: (%d, %d)" %(n, k, nn, kk))
+        resid = v0[:,0].astype(typ)
+        info = 1
+    
+    #resid = sb.zeros(n,typ)
    workd = sb.zeros(3*n,typ)
    workl = sb.zeros(ncv*(ncv+8),typ)
    iparam = sb.zeros(11,'int')
    ipntr = sb.zeros(11,'int')
-    info = 0
+    #info = 0
    ido = 0

    # only supported mode is 1: Ax=lx
@ -340,8 +362,7 @@ def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
    iparam[0] = ishfts
    iparam[2] = maxiter
    iparam[6] = mode1
-
-
+    
    while True:
        ido,resid,v,iparam,ipntr,info =\
            eigsolver(ido,bmat,which,k,tol,resid,v,iparam,ipntr,
--- a/pyblm/crossvalidation.py
+++ b/pyblm/crossvalidation.py
@ -2,16 +2,149 @@

 The primary use is crossvalidation.
 """
-__all__ = ['lpls_val', 'pls_jk', 'lpls_jk']
+__all__ = ['pca_val', 'pls_val', 'lpls_val', 'pls_jk', 'lpls_jk']
 __docformat__ = "restructuredtext en"

 from numpy import dot,empty,zeros,sqrt,atleast_2d,argmax,asarray,median,\
-     array_split,arange, isnan, any
+     array_split,arange, isnan, any,newaxis
 from numpy.random import shuffle

-from engines import pls
+from engines import pls, pca
 from engines import nipals_lpls as lpls

+def pca_val(a, a_max, nsets=None, center_axis=[0]):
+    """Returns error estimate of crossvalidated PCA.
+    
+    *Parameters*:
+
+        a : {array}
+            data matrix (n x m)
+        a_max : {integer}
+            Maximum number of components in model.
+        center_axis:
+            Centering
+        nsets : {integer}
+            number of segments
+
+    *Returns*:
+
+        rmsep : {array}
+            Squared error of prediction for each component and xvar (a_max, m)
+        xhat : {array}
+            Crossvalidated predicted a (a_max, m, n)
+        aopt : {integer}
+            Estimate of optimal number of components
+    
+    *Notes*:
+    
+        - Crossvalidation method is currently set to random blocks of diagonals.
+        
+    """
+    n, m = a.shape
+    if nsets == None:
+        nsets = n
+
+    err = zeros((a_max, n, m), dtype=a.dtype)
+    err_mn = zeros((a_max, n, m), dtype=a.dtype)
+    xhat = zeros((a_max, n, m), dtype=a.dtype)
+    mn_a = .5*(a.mean(0) + a.mean(1)[:,newaxis])
+    
+    for i, val in enumerate(diag_cv(a.shape, nsets)):
+        old_values = a.take(val)
+        new_values = mn_a.take(val)
+        # impute mean values
+        b = a.copy()
+        a.put(val, new_values)
+        dat = pca(a, a_max, mode='normal', center_axis=center_axis)
+        Ti, Pi = dat['T'], dat['P']
+        bc = b - dat['mnx']
+        bc2 = b - b.mean(0)
+        for j in xrange(a_max):
+            # predict the imputed values    
+            a_pred = dot(Ti[:,:j+1], Pi[:,:j+1].T).take(val)
+            a_true = bc2.take(val)
+            err[j,:,:].put(val, (a_true - a_pred)**2)
+            err_mn[j,:,:].put(val, (bc.take(val) - a_pred)**2)
+            xhat[j,:,:].put(val, a_pred)
+        
+        # put original values back
+        a.put(val, old_values)
+    
+    rmsep = sqrt(err).mean(1) # take mean over samples
+    rmsep2 = sqrt(err_mn).mean(1)
+    aopt = rmsep.mean(-1).argmin()
+    return rmsep, xhat, aopt, err, rmsep2
+
+def pls_val(X, Y, a_max=2, nsets=None, center_axis=[0,0], verbose=False):
+    """Performs crossvalidation for generalisation error in pls.
+
+    
+
+    *Parameters*:
+    
+        X : {array}
+            Main data matrix (m, n)
+        Y : {array}
+            External row data (m, l)
+        a_max : {integer}, optional
+            Maximum number of components to calculate (0, min(m,n))
+        nsets : (integer), optional
+            Number of crossvalidation sets
+        center_axis : {array-like}, optional
+            A three element array-like structure with elements in [-1,0,1,2],
+            that decides the type of centering used.
+            -1 : nothing
+            0 : row center
+            1 : column center
+            2 : double center
+        verbose : {boolean}, optional
+            Verbosity of console output. For use in debugging.
+    
+    *Returns*:
+    
+        rmsep : {array}
+            Root mean squred error of prediction (for each y-var)
+        yhat : {array}
+            Estimated responses
+        aopt : {integer}
+            Estimated value of optimal number of components
+
+    """
+    dt = X.dtype
+    m, n = X.shape
+    k, l = Y.shape
+    assert m == k, "X (%d,%d) - Y (%d,%d) dim mismatch" %(m, n, k, l)
+    assert n == p, "X (%d,%d) - Z (%d,%d) dim mismatch" %(m, n, o, p)
+    if nsets == None:
+        nsets = m 
+    if nsets > X.shape[0]:
+        print "nsets (%d) is larger than number of variables (%d).\nnsets:  %d -> %d" %(nsets, m, nsets, m)
+        nsets = m
+    if m > n:
+        # kernel boosting
+        Yhat = _w_pls_predict(X, Y, a_max)
+        
+    Yhat = empty((a_max, k, l), dtype=dt)
+    for cal, val in cv(k, nsets):
+        # do the training model
+        dat = pls(X[cal], Y[cal], a_max=a_max,center_axis=center_axis)
+
+        # center test data
+        xi = X[val,:] - dat['mnx']
+        ym = dat['mny']
+        
+        # predictions
+        for a in range(a_max):
+            Yhat[a,val,:] = ym + dot(xi, dat['B'][a])
+    
+    sep = (Y - Yhat)**2
+    rmsep = sqrt(sep.mean(1)).T
+    #aopt = find_aopt_from_sep(rmsep)
+    
+    # todo: need a better support for classification error
+    error = prediction_error(Yhat, Y, method='1/2')
+    
+    return rmsep, Yhat, error

 def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=False, verbose=False):
    """Performs crossvalidation for generalisation error in lpls.
@ -72,47 +205,29 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
    assert (alpha >= 0 and alpha<=1), "Alpha needs to be within [0,1], got: %.2f" %alpha
    
    Yhat = empty((a_max, k, l), 'd')
-    for cal, val in cv(nsets, k):
+    for cal, val in cv(k, nsets):
        # do the training model
        dat = lpls(X[cal], Y[cal], Z, a_max=a_max, alpha=alpha,
                   center_axis=center_axis, zorth=zorth, verbose=verbose)

        # center test data
-        if center_axis[0] != 1:
-            xi = X[val,:] - dat['mnx']
-        else:
-            xi = X[val] - X[cal].mean(1)[:,newaxis]
-        if center_axis[2] != 1:
-            ym = dat['mny']
-        else:
-            ym = Y[cal].mean(1)[:,newaxis]
+        xi = X[val,:] - dat['mnx']
+        
+        ym = dat['mny'][val,:]
        # predictions
        for a in range(a_max):
            Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
-        #if permute:
-        #    xcal = X[cal]
-        #    for a in range(1,a_max,1):
-        #        for n in range(10):
-        #            shuffle(cal)
-        #            dat = lpls(xcal, Y[cal], Z, a_max=a_max, alpha=alpha,
-        #                       center_axis=center_axis, verbose=verbose)
-                    
-            
-    # todo: need a better support for classification error
-    y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
-    if y_is_class:
-        pass
-        #Yhat, err = class_error(Yhat, Y)
-        #return Yhat, err
-
    sep = (Y - Yhat)**2
    rmsep = sqrt(sep.mean(1)).T
-    aopt = find_aopt_from_sep(rmsep)
+    #aopt = find_aopt_from_sep(rmsep)
    
-    return rmsep, Yhat, aopt
+    # todo: need a better support for classification error
+    error = prediction_error(Yhat, Y, method='1/2')
+    
+    return rmsep, Yhat, error

-def pca_jk(a, aopt, center_axis=[0], nsets=None):
-    """Returns jack-knife segements from PCA.
+def pca_jk(a, aopt, nsets=None, center_axis=[0], method='cv'):
+    """Returns jack-knife segments from PCA.
    
    *Parameters*:

@ -124,6 +239,12 @@ def pca_jk(a, aopt, center_axis=[0], nsets=None):
            Centering
        nsets : {integer}
            number of segments
+        method : {'cv', 'diag', 'bs', 'bs_diag'}
+            Perturbation method is one of:
+            cv = leave samples out
+            diag = impute diagonals
+            bs = leave samples out with replacement (bootstrap)
+            bs_diag = impute diagonals 

    *Returns*:

@ -135,22 +256,32 @@ def pca_jk(a, aopt, center_axis=[0], nsets=None):
        - Crossvalidation method is currently set to random blocks of samples.
        
    """
+    m, n = a.shape
    if nsets == None:
-        nsets = a.shape[0]
+        nsets = m

    Pcv = empty((nsets, a.shape[1], aopt), dtype=a.dtype)
-    mn_a = .5*(a.mean(0) + a.mean(1)[:,newaxis])
-    for i, (cal, val) in enumerate(cv_diag(a.shape, nsets)):
-        old_values = a.take(ind)
-        new_values = mn_a.take(ind)
-        a.put(ind, new_values)
-        dat = pca(a, aopt, mode='fast', scale='loads', center_axis=center_axis)
-        PP[i,:,:] = dat['P']
-        a.put(ind, old_values)
+    if method == 'diag':
+        mn_a = .5*(a.mean(0) + a.mean(1)[:,newaxis])
+        for i, val in enumerate(diag_cv(a.shape, nsets)):
+            old_values = a.take(val)
+            new_values = mn_a.take(val)
+            # impute mean values
+            a.put(val, new_values)
+            dat = pca(a, aopt, mode='fast', scale='loads', center_axis=center_axis)
+            Pcv[i,:,:] = dat['P']
+            # put original values back
+            a.put(val, old_values)
+    elif method == 'cv':
+        print "using ....cv "
+        for i, (cal, val) in enumerate(cv(m, nsets)):
+            Pcv[i,:,:] = pca(a[cal,:], aopt, mode='fast', scale='loads', center_axis = center_axis)['P']
+    else:
+        raise NotImplementedError(method)
        
-    return PP
+    return Pcv

-def pls_jk(X, Y, a_opt, nsets=None, center_axis=True, verbose=False):
+def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
    """ Returns jack-knife segements of W.
    
    *Parameters*:
@ -187,9 +318,7 @@ def pls_jk(X, Y, a_opt, nsets=None, center_axis=True, verbose=False):
    for i, (cal, val) in enumerate(cv(k, nsets)):
        if verbose:
            print "Segment number: %d" %i
-        dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast', center_axis=[0, 0])
-        if any(isnan(dat['W'])):
-            1/0
+        dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast', center_axis=center_axis)
        Wcv[i,:,:] = dat['W']
        
    return Wcv
@ -257,18 +386,18 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=

    return WWx, WWz

-def find_aopt_from_sep(sep, method='vanilla'):
+def find_aopt_from_sep(err, method='vanilla'):
    """Returns an estimate of optimal number of components.
    
-    The estimate is based on the squared error of prediction from
+    The estimate is based on the error of prediction from
    crossvalidation. This is pretty much wild guessing and it is
    recomended to inspect model parameters and prediction errors
    closely before deciding on the optimal number of components.

    *Parameters*:

-        sep : {array}
-            Squared error of prediction
+        err : {array}
+            Error of prediction
        method : ['vanilla', '75perc']
            Mehtod used to estimate optimal number of components

@ -280,22 +409,23 @@ def find_aopt_from_sep(sep, method='vanilla'):
    
    if method == 'vanilla':
        # min rmsep
-        rmsecv = sqrt(sep.mean(0))
+        rmsecv = sqrt(err.mean(0))
        return rmsecv.argmin() + 1

    elif method == '75perc':
-        prct = .75 #percentile
-        ind = 1.*sep.shape[0]*prct
-        med = median(sep)
-        prc_75 = []
-        for col in sep.T:
-            col = sorted(col)
-            prc_75.append(col[int(ind)])
-        prc_75 = asarray(prc_75)
-        for i in range(1, sep.shape[1], 1):
-            if med[i-1]<prc_75[i]:
-                return i
-        return len(med)
+        raise NotImplementedError
+        #prct = .75 #percentile
+        #ind = 1.*err.shape[0]*prct
+        #med = median(err)
+        #prc_75 = []
+        #for col in err.T:
+        #    col = sorted(col)
+        #    prc_75.append(col[int(ind)])
+        #prc_75 = asarray(prc_75)
+        #for i in range(1, err.shape[1], 1):
+        #    if med[i-1]<prc_75[i]:
+        #        return i
+        #return len(med)

 def cv(N, K, randomise=True, sequential=False):
    """Generates K (training, validation) index pairs.
@ -351,13 +481,29 @@ def cv(N, K, randomise=True, sequential=False):
            validation = [i for i in index if i % K == k]
            yield training, validation

-def diag_cv(shape, nsets=9):
+def diag_cv(shape, nsets=9, randomise=True):
    """Generates K (training, validation) index pairs.

    *Parameters*:

-        N : {integer}
-        alpha -- scalar, approx. portion of data perturbed  
+        shape : {tuple}
+            Array shape
+        nsets : {integer}
+            Number of cv sets
+        randomise : {boolean}
+            Randomise diagonal index
+
+    *Returns*:
+
+        training : {array-like}
+            training-indices
+        validation : {array-like}
+            validation-indices
+
+    *Notes*:
+
+        This index is based on the full index (raveled row-major ordering).
+        It extracts along diagonals to ensure balanced removal along both axis
    """
    try:
        m, n = shape
@ -365,59 +511,195 @@ def diag_cv(shape, nsets=9):
        raise ValueError("shape needs to be a two-tuple")
    if nsets>m or nsets>n:
         msg = "You may not use more subsets than max(n_rows, n_cols)"
-         raise ValueError, msg
+         nsets = min(m, n)
    nm = n*m
    index = arange(nm)
-    n_ind = arange(n)
-    shuffle(n_ind) # random start diag
+    n_ind = arange(n+1)
+    #shuffle(n_ind) # random start diag
    start_inds = array_split(n_ind, nsets)
    for v in range(nsets):
-        validation = []
-        for start in start_inds[v]: 
-            ind = arange(start+v, nm, n+1)
-            [validation.append(i) for i in ind]
-        training = [j for j in index if j not in validation]
-        yield training, validation
+        validation = set()
+        for start in start_inds[v]:
+            ind = arange(start, nm, n+1)
+            validation.update(ind)
+        #training = [j for j in index if j not in validation]
+        yield list(validation)
        

-def class_error(y_hat, y, method='vanilla'):
-    """ Not used.
-    """
-    a_opt, k, l = y_hat.shape
-    y_hat_c = zeros((k, l), dtype='d')
-    if method == vanilla:
-        pass
-    for a in range(a_opt):
-        for i in range(k):
-            y_hat_c[a, val, argmax(y_hat[a,val,:])] = 1.0
-    err = 100*((y_hat_c + y) == 2).sum(1)/y.sum(0).astype('d')
-
-    return y_hat_c, err
-
 def prediction_error(y_hat, y, method='squared'):
    """Loss function on multiclass Y.

    Assumes y is a binary dummy class matrix (samples, classes)
    """
    k, l = y.shape
-    
-    if method == 'hinge':
-        pass
-    elif method == 'smooth_hinge':
-        z = 90
-    elif method == 'abs':
-        err = abs(y - y_hat)
-    elif method == 'squared':
-        err = (y - y_hat)**2
-    elif method == '0/1':
-        pred = zeros_like(y_hat)
-        for i, row in enumerate(y_hat):
-            largest = row.argsort()[-1]
-            pred[i, largest] = 1.
-        err = abs(y - pred)
-    elif method == '1/2':
-        y_hat[y_hat>.5] = 1
-        y_hat[y_hat<.5] = 0
-        err = abs(y - y_hat)
+    a_max, kk, ll = y_hat.shape
+    error = empty((a_max, l))
+    for a in range(a_max):
+        yha = y_hat[a, :, :]
+        if method == 'hinge':
+            err = zeros((k, l))
+            for j in range(l):
+                for i in range(k):
+                    if y[i,j] == 1:
+                        if yha[i, j] >= 1:
+                            err[i,j] = 0
+                        else:
+                            err[i,j] = abs(y[i,j] - yha[i,j])
+                    elif y[i,j] == 0:
+                        if yha[i, j] <= 0:
+                            err[i,j] = 0
+                        else:
+                            err[i,j] = abs(y[i,j] - yha[i,j])
+
+        elif method == 'smooth_hinge':
+            err = zeros((k, l))
+            for j in range(l):
+                for i in range(k):
+                    if y[i,j] == 1:
+                        if yha[i, j] >= 1:
+                            err[i,j] = 0
+                        elif yha[i,j] < 1 and yha[i,j] > 0:
+                            err[i,j] = abs(y[i,j] - yha[i,j])
+                        else:
+                            err[i,j] = 1
+
+                    elif y[i,j] == 0:
+                        if yha[i, j] <= 0:
+                            err[i,j] = 0
+                        elif yha[i,j] < 1 and yha[i,j] > 0:
+                            err[i,j] = abs(y[i,j] - yha[i,j])
+                        else:
+                            err[i,j] = 1
+
+        elif method == 'abs':
+            err = abs(y - yha)
+        elif method == 'squared': 
+            err = (y - yha)**2
+        elif method == '0/1':
+            pred = zeros((k, l))
+            for i, row in enumerate(yha):
+                largest = row.argsort()[-1]
+                pred[i, largest] = 1.
+            err = abs(y - pred)
+        elif method == '1/2':
+            yh = yha.copy()
+            yh[yha>.5] = 1
+            yh[yha<.5] = 0
+            err = abs(y - yh)
+        else:
+            raise ValueError("Option: %s (method) not valid" %method)
        
-    return err
+        error[a,:] = err.mean(0)
+    
+    return error
+
+def _wkernel_pls_val(X, Y, a_max, n_blocks=None):
+    """Returns rmsep and aopt for pls tailored for wide X.
+
+    The error of cross validation is calculated
+    based on random block cross-validation. With number of blocks equal to
+    number of samples [default] gives leave-one-out cv.
+    The pls model is based on the simpls algorithm for wide X, an is quite
+    fast in very high dimensional X data. 
+
+    *Parameters*:
+    
+    X : ndarray 
+        column centered data matrix of size (samples x variables)
+    Y : ndarray
+        column centered response matrix of size (samples x responses)
+    a_max : scalar 
+        Maximum number of components
+    n_blocks : scalar
+        Number of blocks in cross validation
+    
+    *Returns*:
+    
+    rmsep : ndarray
+        Root Mean Square Error of cross-validated Predictions 
+    aopt : scalar
+        Guestimate of the optimal number of components
+
+    SeeAlso:
+
+    - pls_cv_val : Same output, not optimised for wide X
+    - w_simpls : Simpls algorithm for wide X
+    
+    Notes
+    -----
+
+    Based (cowardly translated) on m-files from the Chemoact toolbox
+    X, Y inputs need to be centered (fixme: check)
+    
+
+    Examples
+    --------
+
+    >>> import numpy as n
+    >>> X = n.array([[1., 2., 3.],[]])
+    >>> Y = n.array([[1., 2., 3.],[]])
+    >>> w_pls(X, Y, 1)
+    [4,5,6], 1
+    """
+    dt = X.dtype
+    k, l = m_shape(Y)
+    PRESS = zeros((l, a_max+1), dtype=dt)
+    if n_blocks==None:
+        n_blocks = Y.shape[0]
+    XXt = dot(X, X.T)
+    V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True)
+    for Din, Doi, Yin, Yout in V:
+        ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
+        PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
+        
+        dat = w_simpls(Din, Yin, a_max)
+        Q, U, H = dat['Q'], dat['U'], dat['H']
+        That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) ))
+        
+        Yhat = zeros((a_max, k, l), dtype=dt)
+        for j in range(l):
+            TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,a_max)))) )
+            Yhat[:,:,l] = TQ
+            E = Yout[:,j][:,newaxis] - TQ
+            E = E + sum(E, 0)/Din.shape[0] 
+            PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
+    #Yhat = Yin - dot(That,Q.T)
+    msep = PRESS/(Y.shape[0])
+    #aopt = find_aopt_from_sep(msep)
+    return Yhat, sqrt(msep)
+
+def _w_pls(aat, b, aopt):
+    """ Pls for wide matrices.
+    Fast pls for crossval, used in calc rmsep for wide X
+    There is no P or W.  T is normalised
+
+    aat = centered kernel matrix
+    b = centered y
+    """
+    bb = b.copy()
+    k, l = m_shape(b)
+    m, m = m_shape(aat)
+    U = empty((m, aopt)) # W
+    T = empty((m, aopt))
+    R = empty((m, aopt)) # R
+    PROJ = empty((m, aopt)) # P?
+
+    for i in range(aopt):
+        if has_sym:
+            s, q = symeig(dot(dot(b.T, aat), b), range=(l,l),overwrite=True)
+        else:
+            q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
+            q = q[:,:1]
+        u = dot(b , q) #y-factor scores
+        U[:,i] = u.ravel()
+        t = dot(aat, u)
+
+        t = t/vnorm(t)
+        T[:,i] = t.ravel()
+        r = dot(aat, t)#score-weights
+        #r = r/vnorm(r)
+        R[:,i] = r.ravel()
+        PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
+        if i<aopt:
+            b = b - dot(PROJ[:,:i+1], dot(R[:,:i+1].T,  b) )
+    C = dot(bb.T, T)
--- a/pyblm/engines.py
+++ b/pyblm/engines.py
@ -8,7 +8,7 @@ __docformat__ = "restructuredtext en"
 from math import sqrt as msqrt

 from numpy import dot,empty,zeros,apply_along_axis,newaxis,finfo,sqrt,r_,expand_dims,\
-minimum, any, isnan
+minimum,any,isnan,ones,tile
 from numpy.linalg import inv,svd
 from scipy.sandbox import arpack

@ -79,11 +79,13 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
    """
    
    m, n = X.shape
-    min_aopt = min(m, n)
+    max_aopt = min(m, n)
    if center_axis != None:
        X, mnx = center(X, center_axis[0])
-        min_aopt = min_aopt - 1
-    assert(aopt <= min_aopt)
+        max_aopt = max_aopt - 1
+    if aopt > max_aopt:
+        print "Using aopt: %d" %max_aopt
+        aopt = max_aopt
    if m > (n+100) or n > (m+100):
        u, s, v = esvd(X, aopt)
    else:
@ -108,7 +110,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
        P = P*s

    if mode in ['fast', 'f', 'F']:
-        return {'T':T, 'P':P, 'aopt':aopt}
+        return {'T':T, 'P':P, 'aopt':aopt, 'mnx': mnx}
    
    if mode in ['detailed', 'd', 'D']:
        E = empty((aopt, m, n))
@ -135,7 +137,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
    expvarx = r_[0, 100*e.cumsum()/(X*X).sum()]
    
    return {'T': T, 'P': P, 'E': E, 'evx': expvarx, 'leverage': lev, 'ssqx': ssq,
-            'aopt': aopt, 'eigvals': e}
+            'aopt': aopt, 'eigvals': e, 'mnx': mnx}

 def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]):
    """ Principal Component Regression.
@ -690,39 +692,80 @@ def center(a, axis):
    ### TODO ###
    # Perhaps not(!) use broadcasting and return full centering
    # matrix instead ?
+    # 

-    
    # check if we have a vector
    is_vec = len(a.shape) == 1
    if not is_vec:
        is_vec = a.shape[0] == 1 or a.shape[1] == 1
    if is_vec:
        if axis == 2:
-            warnings.warn("Double centering of vector ignored, using ordinary centering")
+            #warnings.warn("Double centering of vector ignored, using ordinary centering")
+            raise ValueError("Double centering of vector is not a valid option")
        if axis == -1:
-            mn = 0
+            mn = zeros(a.shape)
        else:
-            mn = a.mean()
+            mn = a.mean()*ones(a.shape)
        return a - mn, mn
    
    if axis == -1:
        mn = zeros((1,a.shape[1],))
-        #mn = tile(mn, (a.shape[0], 1))
+        mn = tile(mn, (a.shape[0], 1))
    elif axis == 0:
        mn = a.mean(0)[newaxis]
-        #mn = tile(mn, (a.shape[0], 1)) 
+        mn = tile(mn, (a.shape[0], 1)) 
    elif axis == 1:
        mn = a.mean(1)[:,newaxis]
-        #mn = tile(mn, (1, a.shape[1]))
+        mn = tile(mn, (1, a.shape[1]))
    elif axis == 2:
        #fixme: double centering returns column mean as loc-vector, ok?
        mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
-        return a - mn , a.mean(0)[newaxis]
+        return a - mn , mn
    else:
        raise IOError("input error: axis must be in [-1,0,1,2]")

    return a - mn, mn

+def inv_center(a, mn_a, axis):
+    """Inverse centering.
+
+    Adding row, column or double centering to a matrix.
+    This method uses broadcasting, so the size of a needs only
+    to match the axis argument.
+    
+    *Parameters*:
+
+        a : {array}
+            Input data, to be centered
+       
+        axis : {integer}
+            Which centering to perform. 
+            0 = col center, 1 = row center, 2 = double center
+            -1 = nothing
+
+    *Returns*:
+
+        a_centered : {array}
+            Centered data matrix
+
+    *Notes*
+    
+    May just as well used to subtract a mean (just use negative mn_a) 
+
+    """
+    if axis == -1:
+        return a
+    m, n = a.shape
+    k, o = mn_a.shape
+    if axis == 0: #row centering
+        assert(n == o and k == 1)
+    elif axis == 1: # column centering
+        assert(n == k and o == 1)
+    elif axis == 2: # double centering
+        assert(n == o and m == k)
+
+    return a + mn_a
+        
 def _scale(a, axis):
    """ Matrix scaling to unit variance.

--- a/pyblm/statistics.py
+++ b/pyblm/statistics.py
@ -14,7 +14,7 @@ from engines import pls
 from engines import nipals_lpls as lpls


-def hotelling(Pcv, P, p_center='median', cov_center=median,
+def hotelling(Pcv, P, p_center='median', cov_center='median',
              alpha=0.3, crot=True, strict=False):
    """Returns regularized hotelling T^2.

@ -31,8 +31,8 @@ def hotelling(Pcv, P, p_center='median', cov_center=median,
            Calibration model paramter
        p_center : {'median', 'mean', 'cal_model'}, optional
            Location method for sub-segments
-        cov_center : {py_func}, optional
-            Location function
+        cov_center : {'median', 'mean', 'cal_model'}, optional
+            Pooled covariance estimate
        alpha : {float}, optional
            Regularisation towards pooled covariance estimate.
        crot : {boolean}, optional
@ -79,33 +79,43 @@ def hotelling(Pcv, P, p_center='median', cov_center=median,
    for i in xrange(n):
        Pi = Pcv[:,i,:] # (n_sets x amax) 
        Pi_ctr = P_ctr[i,:] # (1 x amax)
-        Pim = (Pi - Pi_ctr)*msqrt(n_sets-1)
-        Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
-        
-    Cov = cov_center(Cov_i)
-    reg_cov = (1. - alpha)*Cov_i + alpha*Cov
+        #Pim = (Pi - Pi_ctr)*msqrt(n_sets-1)
+        #Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
+        Pim = (Pi - Pi_ctr)
+        Cov_i[i] = dot(Pim.T, Pim)
+    if cov_center == 'median':
+        Cov_p = median(Cov_i)
+    elif cov_center == 'mean':
+        Cov_p = Cov.mean(0)
+    else:
+        print "Pooled covariance est. invalid, using median"
+        print cov_center
+        Cov_p = median(Cov_i)
+    reg_cov = (1. - alpha)*Cov_i + alpha*Cov_p
    for i in xrange(n):
        Pc = P_ctr[i,:]
        sigma = reg_cov[i]
        T_sq[i] = dot(dot(Pc, inv(sigma)), Pc)
    return T_sq

-def procrustes(a, b, strict=True, center=False, verbose=False):
+def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False):
    """Orthogonal rotation of b to a.

-    Procrustes rotation is an orthogonal rotoation of one subspace
+    Procrustes rotation is an orthogonal rotation of one subspace
    onto another by minimising the squared error.

    *Parameters*:
    
        a : {array}
-            Input array
+            Input array, stationary
        b : {array}
-            Input array
+            Input array, rotate this to max. fit to a
        strict : {boolean}
            Only do flipping and shuffling
        center : {boolean}
            Center before rotation, translate back after
+        force_norm : {boolean}
+            Ensure that columns of a and b are orthonormal
        verbose : {boolean}
            Show sum of squares

@ -125,6 +135,12 @@ def procrustes(a, b, strict=True, center=False, verbose=False):
        a = a - mn_a
        mn_b = b.mean(0)
        b = b - mn_b
+
+    if force_norm:
+        a_norm = apply_along_axis(norm, 0, a)
+        a = a/a_norm
+        b_norm = apply_along_axis(norm, 0, b)
+        b = b/b_norm
    
    u, s, vt = svd(dot(b.T, a))    
    Cm = dot(u, vt) # Cm: orthogonal rotation matrix
@ -166,13 +182,13 @@ def _ensure_strict(C, only_flips=True):
    
    """
    if only_flips:
-        C = eye(Cm.shape[0])*sign(Cm)
+        C = eye(C.shape[0])*sign(C)
        return C
    Cm = zeros(C.shape, dtype='d')
    Cm[abs(C)>.6] = 1.
    if det(Cm)>1:
        raise NotImplementedError
-    return Cm*S
+    return Cm*sign(C)

 def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
               sim_method='shuffle', p_center='med', cov_center=median,
@ -248,7 +264,7 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
    Wc, Lc = lpls_jk(X, Y, Z ,aopt, zorth=zorth)
    cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
    cal_tsq_z = hotelling(Lc, dat['L'], alpha=alpha)
-    
+    print "morn"
    # Perturbations
    index = arange(m)
    for i in range(n_iter):
@ -257,9 +273,10 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
        dat = lpls(X, Y[indi,:], Z, aopt, scale='loads', center_axis=center_axis, zorth=zorth)
        Wi, Li = lpls_jk(X, Y[indi,:], Z, aopt, nsets=nsets)
        pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
-        pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=alpha)
+        # no reason to borrow variance in dag (alpha ->some small value)
+        pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=0.01) 
   
-    return _fdr(cal_tsq_z, pert_tsq_z, median), _fdr(cal_tsq_x, pert_tsq_x, median)
+    return cal_tsq_z, pert_tsq_z, cal_tsq_x, pert_tsq_x


 def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
@ -335,7 +352,7 @@ def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
        Wi = pls_jk(X, Y[indi,:], aopt, nsets=nsets, center_axis=center_axis)
        pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
   
-    return _fdr(cal_tsq_x, pert_tsq_x, median)
+    return cal_tsq_x, pert_tsq_x



@ -372,7 +389,7 @@ def _fdr(tsq, tsqp, loc_method=median):
        gene expression data using dimension reduction methods, BMC
        bioinformatics, 2007
    """
-    n, = tsq.shape
+    n = tsq.shape[0]
    k, m = tsqp.shape
    assert(n==k)
    n_false = empty((n, m), 'd')
--- a/sandbox/powerit/Makefile
+++ b/sandbox/powerit/Makefile
@ -0,0 +1,27 @@
+CC=gcc
+PYTHON_INCLUDE=/usr/include/python2.5
+PYTHON_LIBRARY=/usr/lib/python2.5
+NUMPY_INCLUDE=$(PYTHON_LIBRARY)/site-packages/numpy/core/include/numpy/
+GOTO_LIBRARY=/home/flatberg/GotoBLAS
+BLAS_LIBRARY=/usr/lib
+BLAS=blas
+GOTO=goto
+
+
+all: numpy_module.so
+
+numpy_module.so: numpy_module.o sympowerit.o
+	$(CC) -Wall -shared numpy_module.o sympowerit.o -o _numpy_module.so -L$(PYTHON_LIBRARY) -lpython2.5 -L$(BLAS_LIBRARY) -l$(BLAS) -llapack -lg2c -lm
+
+numpy_module.o: numpy_module.c numpy_module.h 
+	$(CC) -fPIC -Wall -O2 -g -c -I$(PYTHON_INCLUDE) -I$(NUMPY_INCLUDE) numpy_module.c
+
+c_egines.o: sympowerit.c
+	$(CC) -Wall -O2 -g -c sympowerit.c
+
+clean:
+	-rm numpy_module.o sympowerit.o
+	-rm -rf _numpy_module.so
+
+
+
--- a/sandbox/powerit/numpy_module.c
+++ b/sandbox/powerit/numpy_module.c
@ -0,0 +1,70 @@
+/* A file to test imorting C modules for handling arrays to Python */
+
+/* Python.h includes <stdio.h>, <string.h>, <errno.h>, <limits.h>, and <stdlib.h> (if available)*/
+#include "Python.h"
+#include "arrayobject.h"
+#include "numpy_module.h"
+#include "sympowerit.h"
+#include <cblas.h>
+
+
+/* ==== Set up the methods table ====================== */
+static PyMethodDef _numpy_moduleMethods[] = {
+	{"sym_powerit", sym_powerit, METH_VARARGS},
+	{NULL, NULL}     /* Sentinel - marks the end of this structure */
+};
+
+
+/* ==== Initialize the numpy_module functions =============== */
+// Module name must be _numpy_module in compile and linked 
+void init_numpy_module()  {
+	(void) Py_InitModule("_numpy_module", _numpy_moduleMethods);
+	import_array();  // Must be present for NumPy.  Called first after above line.
+}
+
+
+/* =========== Power iteration on symmetric matrix ========== */ 
+static PyObject *sym_powerit(PyObject *self, PyObject *args)
+{
+  PyArrayObject *X=NULL, *T=NULL, *E=NULL,*U=NULL;
+  double  *tin, *ein, tol;
+  int amax, n, info, maxiter, verbose;
+  int dims[2];
+  
+  /* Parse tuple of input arguments*/
+  if (!PyArg_ParseTuple(args, "O!O!iidii", 
+			&PyArray_Type, &X,
+			&PyArray_Type, &T, 
+			&n,
+			&amax, 
+			&tol,
+			&maxiter,
+			&verbose)
+      )  
+    return NULL;
+  if (NULL == X)  return NULL;
+  
+  /* Get the dimensions of the input */
+  n = X->dimensions[0];
+  
+  /* Create output/ work arrays, no inplace calculations */
+  dims[0] = n;
+  dims[1] = amax;
+  U = (PyArrayObject*) PyArray_NewCopy(T, NPY_CORDER);
+  dims[1] = n;
+  E = (PyArrayObject*) PyArray_NewCopy(X, NPY_CORDER);
+  
+  /* Get pointers to contigous data (row-major)*/
+  ein = (double *)E->data;
+  tin = (double *)U->data;
+  
+  /* Call sympower method */
+  info = sympowerit (ein, n, tin, amax, tol, maxiter, verbose);
+  
+  Py_DECREF(E);
+  
+  return Py_BuildValue("Ni", U, info);
+
+}
+
+
--- a/sandbox/powerit/numpy_module.h
+++ b/sandbox/powerit/numpy_module.h
@ -0,0 +1 @@
+/* Header to test of C modules for arrays for Python: sympowerit.c */

/* Python callablefunctions */
static PyObject *sym_powerit(PyObject *self, PyObject *args);
--- a/sandbox/powerit/sympowerit.c
+++ b/sandbox/powerit/sympowerit.c
@ -0,0 +1,116 @@
+#include <cblas.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* =============== Main functions body ====================*/
+
+int sympowerit(double *E, int n, double *T, int amax, double tol, 
+	       int maxiter, int verbose)
+{
+/*
+PURPOSE:   Estimate eigenvectos of a symmetric matrix using the power method.
+
+CALLING SEQUENCE:
+           info = sympowerit(*E, n, *T, amax, tol, maxiter);
+
+INPUTS:
+           E        symmetric matrix from XX^T/X^TX of centered data matrix
+                    type:  *double
+	   n        elements in X first dimension
+                    type:  int 
+                    type:  *double
+	   T        workspace for scores (m, amax)
+	            type:  *double
+	   amax     maximum number of principal components
+	            type:  int
+	   tol      convergence limit
+	            type: double
+	   maxiter  maximum number of iteraitons
+	            type: int
+	   verbose  the amount of debug output
+*/
+  
+  int iter, a, j;
+  int info = 0;
+  double sumsq, l2norm, *y,*x, sgn, diff, alpha;
+  double lambda = 0.0;
+  
+  if (verbose>1) verbose=1;
+  
+  /* Allocate space for working vector and eigenvector */
+  y = (double *) malloc(n*sizeof(double));
+  x = (double *) malloc(n*sizeof(double));
+
+  /* Loop over all components to be estimated*/
+  for (a=0; a<=amax-1; a++){
+    /* Reset work-/eigen-vector*/
+    for (j=0; j<n; j++) {
+      y[j] = 0.0;
+      x[j] = T[(j*amax)+a];
+    }
+    /*x[0] = 1.0;*/
+    
+    /*  Main power-iteration loop */
+    for ( iter = 0; iter <= maxiter; iter++ ) {
+      /* Matrix-vector product */
+      /*cblas_dsymv (CblasRowMajor, CblasUpper, n, 1.0, E, n,
+	x, 1, 0.0, y, 1);*/
+      cblas_dgemv (CblasRowMajor, CblasNoTrans, n, n, 1.0, E, n,
+		   x, 1, 0.0, y, 1);
+
+       /* Normalise y */
+       sumsq = y[0] * y[0];
+       lambda = x[0] * y[0];
+       for ( j = 1; j < n; j++ ) {
+	 sumsq += y[j] * y[j];
+	 lambda += x[j] * y[j];
+       }
+       l2norm = sqrt ( sumsq );
+       for ( j = 0; j < n; j++ ) y[j] /= l2norm;
+       
+       /*Check for convergence */
+       sgn = ( lambda < 0 ? -1.0 : 1.0 );
+       diff = x[0] - sgn * y[0];
+       sumsq = diff * diff;   
+       x[0] = y[0];
+       for ( j = 0; j < n; j++ ) {
+	 diff = x[j] - sgn * y[j];
+	 sumsq += diff * diff;   
+	 x[j] = y[j];
+       }
+       if ( sqrt ( sumsq ) < tol ) {
+	 if (verbose == 1){
+	   printf("\nComp: %d\n", a);
+	   printf("Converged in %d iterations\n", iter);
+	 }
+	 break;
+       }
+       if (iter >= maxiter){
+	 if (verbose == 1){
+	   printf("\nComp: %d\n", a);
+	   printf("Max iter reached.\n");
+	   printf("Error: %.2E\n", sumsq);
+	 }
+	 info = 1;
+	 break;
+       }
+    }
+    /* Calculate T */
+    for (j=0; j<n; j++){
+      y[j] = sgn*sqrt(sgn*lambda)*x[j];
+      T[(j*amax)+a] = y[j];
+    }
+    
+    /* rank one deflation of residual matrix */
+    /*cblas_dsyr (CblasRowMajor, CblasUpper, n, -1.0, x, 1, E, n);*/
+    alpha = -1.0*lambda;
+    cblas_dger (CblasRowMajor, n, n, alpha, x, 1, x, 1, E, n);
+  }
+  
+  /* Free used space */
+  free(x);
+  free(y);
+  
+  return (info);
+}
--- a/sandbox/powerit/sympowerit.h
+++ b/sandbox/powerit/sympowerit.h
@ -0,0 +1,2 @@
+int sympowerit (double *X, int n, double *T, int amax, double tol, int maxiter, int verbose);
+/* Simple symmetric power iterations */
--- a/sandbox/powerit/sympowerit.py
+++ b/sandbox/powerit/sympowerit.py
@ -0,0 +1,100 @@
+from numpy import *
+
+HAS_SYMPOWER=True
+try:
+    from _numpy_module import sym_powerit
+except:
+    raise ImportError("Sym_powerit module not present")
+    HAS_SYMPOWER = False
+
+class SymPowerException(Exception):
+    pass
+
+_ERRORCODES = {1: "Some eigenvectors did not converge, try to increase \nthe number of iterations or lower the tolerance level",
+               0: ""}
+
+
+def sympowerit(xx, T0=None, mn_center=False, a_max=10, tol=1e-7, maxiter=100,
+               verbose=0):
+    """Estimate eigenvectos of a symmetric matrix using the power method.
+
+    *Parameters*:
+
+        xx : {array}
+            Symmetric square array (m, m)
+        T0 : {array}
+            Initial solution (m, a_max), optional
+        mn_center : {boolean}, optional
+            Mean centering
+        a_max : {integer}, optional
+            Number of components to extract
+        tol : {float}, optional
+            Tolerance level of eigenvector solver
+        maxiter : {integer}
+            Maximum number of poweriterations to use
+        verbose : {integer}
+            Debug output (==1)
+
+    *Returns*:
+        v : {array}
+            Eigenvectors of xx, (m , a_max)
+    """
+    
+    valid_types = ['D','d','F','f']
+    dtype = xx.dtype.char
+    n, m = xx.shape
+    if not(dtype in valid_types):
+        msg = "Array type: (%s) needs to be a float or double" %dtype
+        raise SymPowerException(msg)
+    if not (m==n):
+        msg = "Input array needs to be square, input: (%d,%d)" %(m,n)
+        raise SymPowerException(msg)
+    # small test of symmetry
+    N = 5
+    num = random.randint(0,n,N)
+    for i in range(5):
+        j = N-5
+        if abs(xx[num[i],num[j]] - xx[num[j],num[i]])>1e-15:
+            msg = "Array needs to be symmetric"
+            raise SymPowerException(msg)
+
+    if not a_max:
+        a_max = 10
+    
+    if T0 !=None:
+        tn, tm = T0.shape
+        if not (tn==n):
+            msg = "Start eigenvectors need to match input array ()"
+            raise SymPowerException(msg)
+        if not (tm==a_max):
+            msg = "Start eigenvectors need to match input a_max ()"
+            raise SymPowerException(msg)
+    else:
+        T0 = zeros((n, a_max), 'd')
+        T0[0,:] = ones((a_max,),'d')
+
+    if mn_center:
+        xx = _center(xx)
+
+    # call c-function
+    T, info = sym_powerit(xx, T0, n, a_max, tol, maxiter, verbose)
+    
+    if info != 0:
+        if verbose:
+            print _ERRORCODES.get(info, "Dont know this error")
+    return T
+
+
+def _center(xx, ret_mn=False):
+    """Returns mean centered symmetric kernel matrix.
+    """
+    n = xx.shape[0]
+    h = xx.sum(0)[:,newaxis]
+    h = (h - mean(h)/2)/n
+    mn_a = h + h.T
+    xxc = xx - mn_a
+    if ret_mn:
+        return xxc, mn_a
+    return xxc
+
+
				`@ -0,0 +1 @@`
				`/* Header to test of C modules for arrays for Python: sympowerit.c / / Python callablefunctions / static PyObject sym_powerit(PyObject self, PyObject args);`