/* 
   CM-5 interface, using the CMMD routines 

   For CMMD version 1
   These actually are NOT correct.  The send/recv on this machine
   are strongly blocking --- a send does not complete until there
   is a posted receive for it!

   After much anquish, I've decided to provide these routines as
   if the send does not block.  It is then the user's responsibility
   to insure that no deadlock occurs.  To aid the user in writing 
   portable code, the macro SENDS_BLOCK_UNTIL_RECV is defined.

   In addition, the cm5 does NOT provide a hostless model of computation.
   To deal with that, I'm requiring that the user routine be called 
   "worker".  The a generic interface will be provided that can call
   worker.

   For CMMD version 2, much has been fixed.  
   There is now a hostless model.
   These are now usable send/receive routines
 */

#ifndef __commcm5
#define __commcm5

#ifndef __commh
#include "comm/comm.h"
#endif

#include <cm/cmmd.h>

extern int __CM5LEN, __CM5FROM, __CM5TYPE;
extern int __NUMNODES, __MYPROCID;

typedef CMMD_mcb ASYNCRecvId_t;
typedef CMMD_mcb ASYNCSendId_t;

#define MSG_INT   0
#define MSG_LNG   1
#define MSG_FLT   2
#define MSG_DBL   3
#define MSG_OTHER 4

#if CMMDVersion < 2 

#define NO_ASYNC_SEND
#define NO_ASYNC_RECV
#define NO_FORCE
#define NO_NATIVE_GLOBAL
#define SENDS_BLOCK_UNTIL_RECV

#define SENDSYNC(type,buffer,length,to,datatype) \
        {LOGSENDSTART(length,type,to);\
	     CMMD_send(to,type,(char *)(buffer),length);\
         LOGSENDEND(type);}
#define RECVSYNC(type,buffer,length,datatype) {\
        LOGRECVSTART(type);CMMD_receive(ANY_NODE,type,(char*)(buffer),length);\
        LOGRECVEND(type,0,0);}
#define SENDSYNCNOMEM(type,buffer,length,to,datatype) \
        SENDSYNC(type,buffer,length,to,datatype)
#define RECVSYNCNOMEM(type,buffer,length,datatype)  \
        RECVSYNC(type,buffer,length,datatype);
#define RECVSYNCUNSZ(type,bufp,size,datatype) {\
        SYNCPROBE(type);\
	__CM5LEN = CMMD_bytes_received();
        bufp=(char*)MALLOC(__CM5LEN);\
	RECVSYNC(type,bufp,__CM5LEN,MSG_OTHER);}

/* CMMD provides async handling for messages of <= 16 bytes (!).
   I haven't enabled them here, since this is a rather special case,
   and further, it must be enabled on the host(!) as well.  It may
   be added as experiance warrants. */

#define SYNCPROBE(type) while(CMMD_msg_pending(ANY_NODE,type)!=1);
#define ASYNCPROBE(type) CMMD_msg_pending(ANY_NODE,type)
#define ASYNCDONE(id)    1
#define RECVPROBED(type,buffer,length,datatype) \
        RECVSYNC(type,buffer,length,datatype)

#define RECVLEN()  CMMD_bytes_received()
#define RECVFROM() CMMD_msg_sender()
#define RECVTYPE() CMMD_msg_tag()

#define MSGALLOCSEND(msg,maxmsg,type)  msg = (type *)MALLOC(maxmsg)
#define MSGFREESEND(msg)               FREE(msg)
#define MSGALLOCRECV(msg,maxmsg,type)  msg = (type *)MALLOC(maxmsg)
#define MSGFREERECV(msg)               FREE(msg)

/* 
   There are some global operations.  These are:
   CMMD_sync_with_nodes()
   CMMD_set_global_or(int val), val = CMMD_get_global_or()
   as well as scan and reduction operations, concatenation (for GCOL).
   For example GDSUM on all processors is
   for (k=0; k<n; k++) 
       value[k] = CMMD_reduce_double( value[k], CMMD_combiner_add );
 */
   

#define NUMNODES __NUMNODES
#define MYPROCID __MYPROCID
/* The max value below is EXACT(!).  See the documentation on CMMD */
/* We'll probably cut this down to allow a few fixed values for the
   global ops */
#define MSGTYPERANGE(low,high) {*(low)=0;*(high)=127;}
#define MSGDISTANCE(from,to)   gdistdf(from,to)
#define MSGDIAMETER            (NUMNODES-1)

#else 
/* CMMD Version 2 and later.
   This version supports asynchronous sends and a variety of force type
   (a send that blocks until the receiver is ready for it)
 */

#define NO_NATIVE_GLOBAL

#define SENDSYNC(type,buffer,length,to,datatype) \
        {LOGSENDSTART(length,type,to);\
	     CMMD_send_noblock(to,type,(char*)(buffer),length);\
         LOGSENDEND(type);}
#define RECVSYNC(type,buffer,length,datatype) {\
      LOGRECVSTART(type);\
	  CMMD_receive_block(CMMD_ANY_NODE,type,(char*)(buffer),length);\
        LOGRECVEND(type,0,0);}
#define SENDSYNCNOMEM(type,buffer,length,to,datatype) \
        SENDSYNC(type,buffer,length,to,datatype)
#define RECVSYNCNOMEM(type,buffer,length,datatype)  \
        RECVSYNC(type,buffer,length,datatype);
#define RECVSYNCUNSZ(type,bufp,size,datatype) {\
        SYNCPROBE(type);\
	__CM5LEN = CMMD_bytes_received(); \
        bufp=(char*)MALLOC(__CM5LEN);\
	RECVSYNC(type,(char*)(bufp),__CM5LEN,MSG_OTHER);}

/* Asynchronous routines */
#define SENDASYNC(type,buffer,length,to,datatype,id) \
        {LOGSENDSTART(length,type,to);\
	    id=CMMD_send_async(to,type,(char*)(buffer),length,(void *(*)())0);\
		 LOGSENDEND(type);}
#define RECVASYNC(type,buffer,length,datatype,id) \
     {id=CMMD_receive_async(CMMD_ANY_NODE,type,(char*)(buffer),length,\
         (void *(*)())0);}
#define SENDWAIT(type,buffer,length,to,datatype,id) {\
         CMMD_msg_wait(id);CMMD_free_mcb(id);}
#define RECVWAIT(type,buffer,length,datatype,id)     \
        {LOGRECVWAIT(type);CMMD_msg_wait(id);CMMD_free_mcb(id);\
	     LOGRECVREADY(type);\
	     LOGRECVSTART(type);LOGRECVEND(type,0,0);}
#define SENDASYNCNOMEM(type,buffer,length,to,datatype,id) \
        SENDASYNC(type,buffer,length,to,datatype,id)
#define RECVASYNCNOMEM(type,buffer,length,datatype,id) \
        RECVASYNC(type,buffer,length,dataype,id)
#define SENDWAITNOMEM(type,buffer,length,to,datatype,id)  \
        SENDWAIT(type,buffer,length,to,datatype,id)
#define RECVWAITNOMEM(type,buffer,length,datatype,id)     \
        RECVWAIT(type,buffer,length,datatype,id)
#define RECVCANCEL(id) CMMD_free_mcb(id)
#define SENDCANCEL(id) CMMD_free_mcb(id)

/* Forcetype routines.  Only SYNCHRONOUS forcetype routines are supported.
   These use the blocking send/recvs */
#define SENDASYNCFORCE(type,buffer,length,to,datatype,id) \
        SENDASYNC(type,buffer,length,to,datatype,id)
#define RECVASYNCFORCE(type,buffer,length,datatype,id) \
         RECVASYNC(type,buffer,length,datatype,id)
#define SENDSYNCFORCE(type,buffer,length,to,datatype) \
        {LOGSENDSTART(length,type,to);\
        CMMD_send_block(to,type,(char*)(buffer),length);LOGSENDEND(type);}
#define RECVSYNCFORCE(type,buffer,length,datatype)  \
      {LOGRECVSTART(type);\
       CMMD_receive_block(CMMD_ANY_NODE,type,(char *)(buffer),length);\
	     LOGRECVEND(type,0,0);}
#define SENDWAITFORCE(type,buffer,length,to,datatype,id)  \
             SENDWAIT(type,buffer,length,to,datatype,id)
#define SENDWAITNOMEMFORCE(type,buffer,length,to,datatype,id)  \
             SENDWAIT(type,buffer,length,to,datatype,id)
#define RECVWAITFORCE(type,buffer,length,datatype,id)     \
        RECVWAIT(type,buffer,length,datatype,id)
#define SENDASYNCNOMEMFORCE(type,buffer,length,to,datatype,id) \
        SENDASYNC(type,buffer,length,to,datatype,id)
#define RECVASYNCNOMEMFORCE(type,buffer,length,datatype,id) \
        RECVASYNC(type,buffer,length,dataype,id)
#define SENDSYNCNOMEMFORCE(type,buffer,length,to,datatype) \
        SENDSYNCFORCE(type,buffer,length,to,datatype)
#define RECVSYNCNOMEMFORCE(type,buffer,length,datatype)  \
        RECVSYNCFORCE(type,buffer,length,datatype)
#define RECVWAITNOMEMFORCE(type,buffer,length,datatype,id)     \
        RECVWAIT(type,buffer,length,datatype,id)

#define SYNCPROBE(type) while(CMMD_msg_pending(CMMD_ANY_NODE,type)!=1);
#define ASYNCPROBE(type) CMMD_msg_pending(CMMD_ANY_NODE,type)
#define ASYNCDONE(id)    1
#define RECVPROBED(type,buffer,length,datatype) \
        RECVSYNC(type,buffer,length,datatype)

#define RECVLEN()  CMMD_bytes_received()
#define RECVFROM() CMMD_msg_sender()
#define RECVTYPE() CMMD_msg_tag()

#define MSGALLOCSEND(msg,maxmsg,type)  msg = (type *)MALLOC(maxmsg)
#define MSGFREESEND(msg)               FREE(msg)
#define MSGALLOCRECV(msg,maxmsg,type)  msg = (type *)MALLOC(maxmsg)
#define MSGFREERECV(msg)               FREE(msg)

/* 
   There are some global operations.  These are:
   CMMD_sync_with_nodes()
   CMMD_set_global_or(int val), val = CMMD_get_global_or()
   as well as scan and reduction operations, concatenation (for GCOL).
 */
#define GSCATTERGLOB(buf,size,issrc,datatype) {\
    if (issrc)CMMD_bc_to_nodes((char*)(buf),size);else\
              CMMD_receive_bc_from_node((char*)(buf),size);}
#define GSCATTERSRCGLOB(buf,size,src,datatype) {\
    if ((src)==MYPROCID) CMMD_bc_to_nodes((char*)(buf),size);else\
              CMMD_receive_bc_from_node((char*)(buf),size);}
#define GSYNCGLOB() CMMD_sync_with_nodes()
#define GISUMGLOB(val,n,work) \
    CMMD_reduce_v(val,val,CMMD_combiner_add,sizeof(int),n)
#define GDSUMGLOB(val,n,work) \
    CMMD_reduce_v(val,val,CMMD_combiner_dadd,sizeof(double),n)
#define GFSUMGLOB(val,n,work) \
    CMMD_reduce_v(val,val,CMMD_combiner_fadd,sizeof(float),n)
#define GIMAXGLOB(val,n,work) \
    CMMD_reduce_v(val,val,CMMD_combiner_max,sizeof(int),n)
#define GDMAXGLOB(val,n,work) \
    CMMD_reduce_v(val,val,CMMD_combiner_dmax,sizeof(double),n)
#define GIMINGLOB(val,n,work) \
    CMMD_reduce_v(val,val,CMMD_combiner_min,sizeof(int),n)
#define GDMINGLOB(val,n,work) \
    CMMD_reduce_v(val,val,CMMD_combiner_dmin,sizeof(double),n)
#define GIORGLOB(val,n,work) \
    CMMD_reduct_v(val,val,CMMD_combiner_ior,sizeof(int),n)

#define NUMNODES __NUMNODES
#define MYPROCID __MYPROCID
#define MSGTYPERANGE(low,high) {*(low)=0;*(high)=2000000000;}
#define MSGDISTANCE(from,to)   gdistdf(from,to)
#define MSGSIZES(min,max)      {*min=0;*max=(16384*8096);}
#define MSGDIAMETER            (NUMNODES-1)

#endif

#endif


