#ifndef lint
static char SCCSid[] = "@(#) ./comm/procset.c 07/23/93";
#endif

/*
   This file contains routines to manage subsets of processors.
   To make the operations efficient, once a subset is defined, 
   the processor set is "compiled", generating the information that
   is used in applying operations (such as the global operations) to
   the processor subset.

   An important feature to add is code to insure that processor set
   names are unique.  We need to keep an active list of names that 
   a particular processor is a part of.
 */

#include "tools.h"
#include "comm/comm.h"
#include "comm/global/global.h"

static int _PICurType = (MSG_GLOBAL-1);

/*@
    PSCreate - Create a processor set structure.

    Input Parameter:
.   name - a unique name for this processor set.  All processors that
           will use this processor set must use the same name.  This
	   value must be non-negative.

    Notes:
    The "name" parameter is intended to distinguish between multiple
    processor sets in the same program; if only one processor set is
    being used, a value of zero may be used for name.
@*/    
ProcSet *PSCreate( name )
int name;
{
ProcSet       *new;
int           nw, n, i;
unsigned long *is;

n   = NUMNODES;
nw  = (n + 31) >> 5;      /* 1 << 5 == bits_in_a_long */
new = (ProcSet *)MALLOC( sizeof(ProcSet) + (nw-1) * sizeof(unsigned long) );
CHKPTRN(new);
new->name    = name;

new->l_child = -1;
new->r_child = -1;
new->am_left = 0;
new->parent  = -1;
new->root    = -1;
new->left    = -1;
new->right   = -1;

new->north   = -1;
new->south   = -1;
new->east    = -1;
new->west    = -1;
new->nx      = 0;
new->ny      = 0;

new->ctxoff  = 0;         /* need to set this from some registry (perhaps
                             when compiled */

new->npset     = 0;
new->lidx      = -1;
new->node_nums = (int *)0;
new->node_inums= (int *)0;

new->pset.nw = nw;
new->pset.nb = n;
is = new->pset.b;
for (i=0; i<nw; i++) is[i] = 0;

return new;
}

/*@
    PSDestroy - Destroy a processor set structure.

    Input Parameter:
.   procset - processor set structure to delete
@*/    
void PSDestroy( procset )
ProcSet *procset;
{
if (!procset) return;

if (procset->node_nums)
    FREE(procset->node_nums);
if (procset->node_inums)
    FREE(procset->node_inums);
FREE(procset);
}

/*@
    PSAddMember - Add one or more processors as members of processor set.

    Input parameters:
.   procset - processor set to add to
.   p       - array of processor numbers
.   np      - number of processors
@*/
void PSAddMember( procset, p, np )
ProcSet *procset;
int     *p, np;
{
Set *s = &(procset->pset);
int i;

/* Should check here that the procset has not been compiled */
for (i=0; i<np; i++) 
    SETBIT(s,p[i]);
}

/*@
    PSSetMeshSize - Set the size of the mesh to use

    Input Parameters:
.   procset - procset to set
.   nx,ny   - size of mesh (procset is an nx x ny set of processors, 
              perhaps a virtual rectangle).

    Notes:
    This should be called before PSCompile.
@*/
void PSSetMeshSize( procset, nx, ny )
ProcSet *procset;
int     nx, ny;
{
procset->nx = nx;
procset->ny = ny;
}

/*@
    PSCompile - "Compile" a processor set.

    Input Parameter:
.   procset - processor set to compile

    Notes:
    "Compile" here means to initialize the processor set structure.

    The routines used to determine the various neighbors (in the tree,
    ring, and mesh) can be set by the user.

    
@*/    
void PSCompile( procset )
ProcSet *procset;
{
int   *iwork, n, i, np, myid, mi, parent, r_child, l_child, am_left, 
      left, right;
int   north, south, east, west;
Set   *s = &procset->pset;

/* printf( "[%d] starting PSCompile\n", MYPROCID ); fflush(stdout); */

/* Find the left and right child and the parent.  For starters, we
   just enumerate the elements and use the obvious virtual mapping
   for the entries.  Some nodes may have only on child. */
myid  = MYPROCID;
n     = NUMNODES;
procset->myid   = myid;
procset->nump   = n;
procset->ctxoff = 1;   /* This value toggles */

iwork = (int *)MALLOC( n * sizeof(int) );   CHKPTR(iwork);

for (i=0; i<n; i++) iwork[i] = 0;
/* Find virtual mapping */
/* 
printf( "[%d] starting finding virtual mapping\n", MYPROCID ); fflush(stdout);
 */
np = 0;
mi = -1;
for (i=0; i<n; i++) {
    if (ISSET(s,i)) {
	if (i == myid) mi = np;
	iwork[np++] = i;
	}
    }
if (mi < 0) {
    SETERRC(1,"This processor is not a member of its own procset" );
    return;
    }
/* 
printf( "[%d] starting check on number in set\n", MYPROCID ); fflush(stdout);
 */

if (np == 0) {
    /* The processor set is empty !! */
    procset->node_nums = 0;
    procset->npset     = 0;
    procset->lidx      = -1;
    }
else {
    /* Save the mapping */
    /* printf( "[%d] saving the mapping\n", MYPROCID ); fflush(stdout); */
    procset->node_nums = (int *)MALLOC( np * sizeof(int) );   
    CHKPTR(procset->node_nums);
    MEMCPY(procset->node_nums,iwork,np*sizeof(int));
    procset->npset = np;
    procset->lidx  = mi;
    }

/* Find indices of tree nodes */
/* printf( "[%d] finding indices in tree nodes\n", MYPROCID ); fflush(stdout);
 */

/* mi    = index of this node in the virtual array; */
PIGetCollectiveTree( mi, np, &l_child, &r_child, &parent, &am_left );
#ifdef FOO
l_child = PINbrTree( procset, mi, np, PS_LCHILD );
r_child = PINbrTree( procset, mi, np, PS_RCHILD );
parent  = PINbrTree( procset, mi, np, PS_PARENT );
#endif
/* map to full node numbers */
r_child = (r_child >= 0) ? iwork[r_child] : -1;
l_child = (l_child >= 0) ? iwork[l_child] : -1;
parent  = (mi == 0) ? -1 : iwork[parent];

procset->l_child = l_child;
procset->r_child = r_child;
procset->parent  = parent;
procset->am_left = am_left;
procset->root    = (np > 0) ? iwork[0] : -1;

/* Find indices of ring nodes.  We should consider ordering these
   for neighbors (for a hypercube, take Grey code ordering) */
/* printf( "[%d] finding ring nodes\n", MYPROCID ); fflush(stdout); */
left  = PINbrRing( procset, mi, np, -1, 1 );
right = PINbrRing( procset, mi, np,  1, 1 );
procset->left    = iwork[left];
procset->right   = iwork[right];


/* Find indices of mesh nodes.  Again, we may want to order these so that
   the actual neighbors are used. */
/* printf( "[%d] finding mesh nodes\n", MYPROCID ); fflush(stdout); */
north = PINbrMesh2d( procset, mi, np, procset->nx, procset->ny,  0,  1, 1, 1 );
south = PINbrMesh2d( procset, mi, np, procset->nx, procset->ny,  0, -1, 1, 1 );
east  = PINbrMesh2d( procset, mi, np, procset->nx, procset->ny,  1,  0, 1, 1 );
west  = PINbrMesh2d( procset, mi, np, procset->nx, procset->ny, -1,  0, 1, 1 );
procset->north = (north >= 0) ? iwork[north] : -1;
procset->south = (south >= 0) ? iwork[south] : -1;
procset->east  = (east  >= 0) ? iwork[east]  : -1;
procset->west  = (west  >= 0) ? iwork[west]  : -1;

/* Save the inverse mapping. Not implemented yet. */

FREE(iwork);
/* printf( "[%d] exiting PSCompile\n", MYPROCID ); fflush(stdout); */
}

/*@
    PSPartition - Compute a partition dynamically by using an id value to
    partition processors into disjoint sets.

    Input parameters:
.   pval    - value used to partition processors.  Every processor with the
              same pval will be put into the same procset.  A negative value
	      excludes the processor from all processor sets (and a NULL
	      procset is returned).
.   procset - processor set to partition

    Returns:
    Processor set (already compiled)
@*/
ProcSet *PSPartition( pval, procset )
int     pval;
ProcSet *procset;
{
int *work, *work2, i, myp, nump;
ProcSet *ps;

/* Eventually, this should use the local indices in the processor set
   to reduce the amount of space/time used by this code */
/* printf( "[%d] starting partition\n", MYPROCID ); fflush(stdout); */
nump = NUMNODES;
myp  = (procset) ? procset->myid  : MYPROCID;

/* Find out who is in each processor set. */
work  = (int *)MALLOC( 2 * nump * sizeof(int) );   CHKPTRN(work);
work2 = work + nump;
for (i=0; i<nump; i++) work[i] = 0;
work[myp] = pval;
/* Could also use GCOLX here */
/* printf( "[%d] starting gisum\n", MYPROCID ); fflush(stdout); */
GISUM( work, nump, work2, procset );
/* printf( "[%d] ending gisum\n", MYPROCID ); fflush(stdout); */

/* Scan for everyone in my set */
/* printf( "[%d] starting create %d\n", MYPROCID, pval ); fflush(stdout); */
if (pval >= 0) {
    ps = PSCreate( pval );   CHKERRN(1);
    for (i=0; i<nump; i++) {
	if (work[i] == pval) {
	    PSAddMember( ps, &i, 1 );
	    }
	}
    FREE(work);
    /* printf( "[%d] starting compile\n", MYPROCID ); fflush(stdout); */
    PSCompile( ps );
    /* printf( "[%d] leaving partition\n", MYPROCID ); fflush(stdout); */
    return ps;
    }
else {
    FREE(work);
    /* printf( "[%d] leaving partition\n", MYPROCID ); fflush(stdout); */
    return 0;
    }

}

/*@
    PSUnion - form a procset from the union of two procsets

    Input Parameters:
.   ps1,ps2 - procsets
.   name    - name for the new processor set (see PSCreate)

    Returns:
    Pointer to new procset
@*/
ProcSet *PSUnion( ps1, ps2, name )
ProcSet *ps1, *ps2;
int     name;
{
ProcSet *new;
int     nw, i, nump;
unsigned long *is, *is1, *is2;

new = PSCreate( name );   CHKERRN(1);

/* If either of the sets is null, return a set that is all nodes */
if (!ps1 || !ps2) {
    nump = NUMNODES;
    for (i=0; i<nump; i++) 
	PSAddMember( new, &i, 1 );
    }
else {
    /* Get the processors in each set */
    nw  = new->pset.nw;
    is  = new->pset.b;
    is1 = ps1->pset.b;
    is2 = ps2->pset.b;
    for (i=0; i<nw; i++) is[i] = is1[i] | is2[i];
    }

PSCompile( new );
return new;
}


/*@
    PIGetTypes - Get a range of message types that are unique to
    a processor set

    Input Parameters:
.   procset - processor set
.   n       - number of types to return 

    Returns:
    The lowest of the selected values.

    Note:
    All members of a processor set must call this routine (synchronously).
    Currently, there is no method for returning types to the system.
@*/
int PIGetTypes( procset, n )
ProcSet *procset;
int     n;
{
int type, i;

type = _PICurType - n;
GIMIN( &type, 1, &i, procset );

_PICurType = type;

return type;
}
