#ifndef lint
static char SCCSid[] = "@(#) ./sparse/row/ftrperm.c 07/23/93";
#endif

/*
    This file contains a simple sparse factor and solve routine.
    These are intended for problems where the matrix is re-ordered
    to improve performance (e.g., numerical stability, reduction of fill)

    Let 
        R = reordering of rows
	C = reordering of columns
	Cinv = inverse of C
    (Often R = C; this is necessary if symmetry is to be preserved).
    These can be defined in terms of their actions.  Given a vector
    x_j, R(x)_j = x_(r(j)).

    Then the problem
    
        A x = b 

    may be cast as

        (R A C) Cinv x = R b

    Now, (R A C) is L U after factoring, so this is

        L U Cinv x = R b

    or

        x = C Uinv Linv R b

    and we finally have

        x = (C Uinv) (Linv R) b

    This would be fine if we insisted that R and C be strict permutations.
    In order to allow an additional permutation to be introduced by the
    user (to allow, for example, an arbitrary arrangement of the data
    x and b), a permutation may be applied to the the rows of the factor
    acter the matrix factor is generated.  This should be done with
    SpPermute() to insure that the elements remain properly sorted.
    Because of this, we leave this as

        Cinv x = Uinv (Linv R) b

    Apply Cinv directly to the Factor (may leave the values unordered).
    Then only R needs to be applied to b in the solve of the lower-triangular
    system.
 */

#include "tools.h"
#include "sparse/spmat.h"
#include "sparse/sppriv.h"
#include "inline/spops.h"

/*+
   SpiComputeFillPerm - Find storage for factor of matrix B in BB 
                          (descriptors allocated but not set).

   Note: 
   Because fill propagates down, a single row effectively adds 
   all of the fill for the rows above.  This is crucial in reducing the
   cost of the fill algorithm.  The rule is

$   Let IM = # of non-zeros to include.
$   1. If, at row "prow", we add a row (row) that contains column prow, then
      set IM[row] = nz;
$   2. IM[prow] = nz of row.

   This should be FillPermBase, and NOT allocate any storage.
 +*/
int SpiComputeFillPerm( B, BB )
SpMat        *B;
SpMatSplit   *BB;
{
int          prow, row, *fill, *xi, nz, nzi, n, fm, nnz, idx, m, nzf, 
             *nzl, err = 0, *im;
SpMat        *BBf = BB->factor;
SpVec        *pivot_row, *elim_row, **rs, **frs;
int          *r = BBf->map->rowmap, *ic = BBf->map->icolmap;
SpRowMat     *R = (SpRowMat *)B->data, *RR = (SpRowMat *)BBf->data;

/* Allocate temporaries to compute fill in; row to compute values of fill in */
BBf->nz = -1;   /* For now, don't keep track of the number of non-zeros in
                   the factored matrix.  We can use this value to provide
                   more options for allocating storage for the data */
n    = B->rows;
im   = (int *)MALLOC((n+1)*2*sizeof(int)); CHKPTRV(im,-(n+2));
fill = im + n + 1;

/* Nothing to do for the first row */
BB->nzl[0] = 0;
rs         = R->rs;
frs        = RR->rs;
nzl        = BB->nzl;
for (prow=0; prow<n; prow++) {
    /* Update row "prow" using all rows that are needed to eliminate
       entries on this row.  */
    elim_row  = rs[r[prow]];
    xi        = elim_row->i;
    /* Add the initial row (fill) */
    nz        = elim_row->nz;
    nzf       = nz;
    /* May be inserted in any order.  This should probably try to do a 
       better job of finding the insert point (this amounts to an n^2
       bubble sort.  At the very least, remembering the previous insert
       location would be useful. */
    fill[n] = n;
    while (nz--) {
	fm        = n;
	idx       = ic[*xi++];
	do {
	    m = fm;
	    fm= fill[m];
	    } while (fm < idx);
	fill[m]  = idx;
	fill[idx] = fm;
	}

    nzi = 0;
    /* Eliminate into (fill) */
    row = fill[n];
    while (row < prow) {
	pivot_row  = frs[row];
	nz         = nzl[row];
	/* Get the pivot row */
	xi         = pivot_row->i + nz;
	if (*xi++ != row) 
	    {SETERRC(-(row+1),"Zero pivot encountered"); 
            return -(row+1);}      /* zero (missing) pivot */
	nz++;
	nnz     = im[row] - nz;
	fm      = row;
	while (nnz-- > 0) {
	    idx       = *xi++;
	    nz++;
	    if (idx == prow) im[row] = nz;
	    /* find fm such that fill[m] <= idx fill[fill[fm]] */
	    /* elements are ordered */
	    do {
		m  = fm;
		fm = fill[m];
		} while (fm < idx);
	    if (fm != idx) {
		/* insert */
		fill[m]   = idx;
		fill[idx] = fm;
		fm        = idx;
		nzf++;
		}
	    }
	row   = fill[row];
	nzi++;
	};

    /* Allocate elim_row */
    row      = fill[n];
    elim_row = frs[prow];
    SPiMallocNVt(BBf,nzf,&elim_row->v,&elim_row->i,err); CHKERRV(1,-(row+1));
    elim_row->maxn = nzf;
    elim_row->nz   = nzf;

    /* Save the number of non-zeros in the lower triangle */
    nzl[prow] = nzi;
    
    /* Store filled row */
    nnz = nzf;
    xi  = elim_row->i;
    fm  = row;
    while (nnz--) {
	*xi++ = fm;
	fm    = fill[fm];
	}
    im[prow] = nzf;
    }

/* Recover temp used to find fill */
FREE( im );
return ERR_NONE;
}

/* Given a matrix B and a computed fill area BB, find the numerical 
   factor.  
 */
int SpComputeFactorPerm( B, BB )
SpMat       *B;
SpMatSplit *BB;
{
int          prow, row, *xi, nz, n, nnz, *yi;
double       multiplier, *pc, *xv, *rtmp;
SpMat *BBf = BB->factor;
SpVec *pivot_row, *elim_row, **rs;
int          *r = BBf->map->rowmap, *ic = BBf->map->icolmap;
SpRowMat     *R = (SpRowMat *)B->data, *RR = (SpRowMat *)BBf->data;
register int i;
int          nf = 0;

/* Allocate temporaries to compute fill in; row to compute values of fill in */
n    = B->rows;
rtmp = (double *)MALLOC( (n+1) * sizeof(double) );
CHKPTRV(rtmp,-(n+2));

/* Nothing to do for the first row */
BB->nzl[0] = 0;
rs         = RR->rs;
for (prow=0; prow<n; prow++) {
    /* Update row "prow" using all rows that are needed to eliminate
       entries on this row. */
    elim_row  = rs[prow];
    yi = xi   = elim_row->i;
    row       = *yi++;
    nz        = elim_row->nz;
    /* Set rtmp to 0 */
    for (i=0; i<nz; i++) rtmp[xi[i]] = 0.0;

    /* Load in the initial row from B */
    pivot_row = R->rs[r[prow]];
    xi        = pivot_row->i;
    xv        = pivot_row->v;
    nz        = pivot_row->nz;
    for (i=0; i<nz; i++) rtmp[ic[xi[i]]] = xv[i];
    
    /* Eliminate into (rtmp) */
    while (row < prow) {
	pivot_row = rs[row];
	pc        = rtmp+row;
	if (*pc != 0.0) {
	    nz         = BB->nzl[row];
	    /* Get the pivot row */
	    xv         = pivot_row->v + nz;
	    xi         = pivot_row->i + nz;
	    if (*xi++ != row) 
		{SETERRC(-(row+1),"Zero pivot encountered"); 
                return -(row+1);}   /* zero (missing) pivot */
	    multiplier = *pc * *xv++;
	    *pc        = multiplier;
	    nnz        = pivot_row->nz - nz - 1;
	    nf         += 2 * nnz;
	    SPARSEDENSESMAXPY(rtmp,multiplier,xv,xi,nnz);
	    }
	row   = *yi++;
	};
    /* copy row into v */
    xv  = elim_row->v;
    xi  = elim_row->i;
    nz  = elim_row->nz;
    /* Replace diagonal entry with inverse (pivot multiplier) */
    rtmp[prow] = 1.0 / rtmp[prow];
    GATHER(xv,xi,rtmp,nz);
    }

/* Recover temp used to find compute fill */
BB->nf = nf + n;
FREE( rtmp );
return ERR_NONE;
}

/* Given a matrix B and a computed fill area BB, find the numerical 
   factor.  
   This version attempts to make use of "pivot blocks": blocks that
   start with a pivot row and have identical structure.  Such rows are
   common in orderings such as nested disection and minimum degree (since
   they tend to produce completely filled in blocks towards the "bottom"
   of the matrix).  Smaller blocks come up in multi-component problems.
   
   There are two cases:
   
   within an block:
      First, we use the usual code to eliminate the non-zeros outside of 
      the block.  Then we use ... to eliminate within the block

   outside of a block:
      If the row to use as a pivot is in a block, then the entire block
      may be used to eliminate with (note that because we assume that 
      the diagonals are non-zero and blocks are contiguous rows, 
      eliminating with any row in a block will require eliminating with
      all of them).
 */
int SpRComputeFactorPermBlock( B, BB )
SpMat       *B;
SpMatSplit *BB;
{
int          prow, row, *xi, nz, n, nnz, *yi, 
             lastinblock, goalrow, boff;
double       multiplier, *pc, *xv, *rtmp, *r1tmp;
SpMat *BBf = BB->factor;
SpVec *pivot_row, *elim_row, **rs;
int          *r = BBf->map->rowmap, *ic = BBf->map->icolmap;
SpRowMat     *R = (SpRowMat *)B->data, *RR = (SpRowMat *)BBf->data;
register int i;
/* This is the blocked row structure, if it exists */
SpRowBlock   *blks= RR->blks;
int          nf = 0;

/* Allocate temporaries to compute fill in; row to compute values of fill in */
n    = B->rows;

r1tmp = (double *)MALLOC( (2*n+1) * sizeof(double) );
rtmp  = r1tmp + n;
CHKPTRV(r1tmp,-(n+2));

/* Nothing to do for the first row */
BB->nzl[0] = 0;
rs         = RR->rs;
lastinblock= -1;
for (prow=0; prow<n; prow++) {
    /* Update row "prow" using all rows that are needed to eliminate
       entries on this row. */
    elim_row  = rs[prow];
    yi = xi   = elim_row->i;
    row       = *yi++;
    nz        = elim_row->nz;
    /* Set rtmp to 0 */
    for (i=0; i<nz; i++) rtmp[xi[i]] = 0.0;

    /* Load in the initial row from B */
    pivot_row = R->rs[r[prow]];
    xi        = pivot_row->i;
    xv        = pivot_row->v;
    nz        = pivot_row->nz;
    for (i=0; i<nz; i++) rtmp[ic[xi[i]]] = xv[i];
    
    /* Discover if we are the first row in a block */
    if (lastinblock < 0) {
	if (elim_row->blki >= 0) {
	    lastinblock = prow + blks->blocksize[elim_row->blki] - 1;
	    goalrow     = prow;
	    }
	else 
	    goalrow     = prow;
	}
    /* While lastinblock is >= 0, we don't change the goal row */
    /* Eliminate into (rtmp) */
    while (row < goalrow) {
	/* Once we get to the rows that are in the same block as the prow,
	   we should switch to eliminating all of the element out of the
	   block and then use the in-block routine */
	pivot_row = rs[row];
	pc        = rtmp+row;
	if (*pc != 0.0) {
	    int pbi, ninblock, blockwidth, *offs;
	    if ((pbi = pivot_row->blki) >= 0) {
		/* This handles within block ops as well */
		if (pbi == elim_row->blki)
		    ninblock = prow - row;
		else
		    ninblock = blks->blocksize[pbi];
		offs       = blks->offsets[pbi];
		boff       = row - blks->startrows[pbi];
		blockwidth = rs[row]->nz - offs[boff];
		if (boff > 0) {
		    /* This can happen if some leading elements had *pc == 0 */
		    blockwidth -= boff;
		    ninblock   -= boff;
		    offs       += boff;
		    }
		SpiElimWithBlock( rs, rtmp, row, ninblock,
				  blockwidth, offs, boff, r1tmp );
		yi += ninblock - 1;
		nf += ninblock * (2 * blockwidth - (ninblock-1) / 2 );
		}
	    else {
		nz         = BB->nzl[row];
		/* Get the pivot row */
		xv         = pivot_row->v + nz;
		xi         = pivot_row->i + nz;
		if (*xi++ != row) 
		    {SETERRC(-(row+1),"Zero pivot encountered"); 
		     return -(row+1);}   /* zero (missing) pivot */
		multiplier = *pc * *xv++;
		*pc        = multiplier;
		nnz        = pivot_row->nz - nz - 1;
		SPARSEDENSESMAXPY(rtmp,multiplier,xv,xi,nnz);
		nf         += 2 * nnz;
		}
	    }
	row   = *yi++;
	};
    /* copy row into v */
    xv  = elim_row->v;
    xi  = elim_row->i;
    nz  = elim_row->nz;
    /* Replace diagonal entry with inverse (pivot multiplier) */
    if (lastinblock < 0) {
	rtmp[prow] = 1.0 / rtmp[prow];
	nf ++;
	}
    GATHER(xv,xi,rtmp,nz);
    /* When we reach the end of a block, we can eliminate the remaining
       elements below the block with a special routine for blocks */
    if (prow == lastinblock) {
	int pbi, ninblock, blockwidth, *offs;
	pbi        = elim_row->blki;
	/* This handles within block ops as well */
	ninblock   = blks->blocksize[pbi];
	offs       = blks->offsets[pbi];
	blockwidth = rs[blks->startrows[pbi]]->nz - offs[0];
	SpiElimWithinBlock( rs, blks->startrows[pbi], ninblock, 
			    blockwidth, offs );
	nf += blockwidth * ninblock * (ninblock - 1);
	lastinblock = -1;
	}
    }

/* Recover temp used to find fill */
FREE( r1tmp );
BB->nf = nf;      /* Is this the right number ? */
return ERR_NONE;
}


