
/*
 *           PVM 3.0:  Parallel Virtual Machine System 3.0
 *               University of Tennessee, Knoxville TN.
 *           Oak Ridge National Laboratory, Oak Ridge TN.
 *                   Emory University, Atlanta GA.
 *      Authors:  A. L. Beguelin, J. J. Dongarra, G. A. Geist,
 *          R. J. Manchek, B. K. Moore, and V. S. Sunderam
 *                   (C) 1992 All Rights Reserved
 *
 *                              NOTICE
 *
 * Permission to use, copy, modify, and distribute this software and
 * its documentation for any purpose and without fee is hereby granted
 * provided that the above copyright notice appear in all copies and
 * that both the copyright notice and this permission notice appear in
 * supporting documentation.
 *
 * Neither the Institutions (Emory University, Oak Ridge National
 * Laboratory, and University of Tennessee) nor the Authors make any
 * representations about the suitability of this software for any
 * purpose.  This software is provided ``as is'' without express or
 * implied warranty.
 *
 * PVM 3.0 was funded in part by the U.S. Department of Energy, the
 * National Science Foundation and the State of Tennessee.
 */

/*
 *	pvmd.c
 *
 *	Mr. pvm daemon.
 *
$Log$
 */


#include <sys/param.h>
#ifdef IMA_TITN
#include <bsd/sys/types.h>
#else
#include <sys/types.h>
#endif
#include <sys/time.h>
#include <sys/wait.h>
#ifdef IMA_RS6K
#include <sys/select.h>
#endif
#include <sys/stat.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
#include <signal.h>
#include <ctype.h>
#ifdef	SYSVSTR
#include <string.h>
#else
#include <strings.h>
#endif
#include <netdb.h>
#include <pwd.h>

#include "global.h"
#include "fromlib.h"
#include "tdpro.h"
#include "ddpro.h"
#include "protoglarp.h"
#include "pvmalloc.h"
#include "host.h"
#include "pvmdabuf.h"
#include "pvmfrag.h"
#include "mesg.h"
#include "pkt.h"
#include "task.h"
#include "waitc.h"
#include "listmac.h"

#if defined(IMA_CRAY) || defined(IMA_I860)
#define	MAXPATHLEN	CANBSIZ
#endif

#ifndef	max
#define	max(a,b)	((a)>(b)?(a):(b))
#endif

#ifndef	min
#define	min(a,b)	((a)<(b)?(a):(b))
#endif

/* struct timeval ops */

#define	TVXLTY(xtv, ytv) \
	((xtv)->tv_sec < (ytv)->tv_sec || \
	 ((xtv)->tv_sec == (ytv)->tv_sec && (xtv)->tv_usec < (ytv)->tv_usec))

#define	TVCLEAR(tvp)	((tvp)->tv_sec = (tvp)->tv_usec = 0)

#define	TVISSET(tvp)	((tvp)->tv_sec || (tvp)->tv_usec)

#define	TVXSUBY(ztv, xtv, ytv)	\
	if ((xtv)->tv_usec >= (ytv)->tv_usec) {	\
		(ztv)->tv_sec = (xtv)->tv_sec - (ytv)->tv_sec;	\
		(ztv)->tv_usec = (xtv)->tv_usec - (ytv)->tv_usec;	\
	} else {	\
		(ztv)->tv_sec = (xtv)->tv_sec - (ytv)->tv_sec - 1;	\
		(ztv)->tv_usec = (xtv)->tv_usec + 1000000 - (ytv)->tv_usec;	\
	}

#define	TVXADDY(ztv, xtv, ytv)	\
	if (((ztv)->tv_usec = (xtv)->tv_usec + (ytv)->tv_usec) < 1000000) {	\
		(ztv)->tv_sec = (xtv)->tv_sec + (ytv)->tv_sec;	\
	} else {	\
		(ztv)->tv_usec -= 1000000;	\
		(ztv)->tv_sec = (xtv)->tv_sec + (ytv)->tv_sec + 1;	\
	}

char *getenv();
void pvmbailout();
void reap();
void catch();
void evilsig();
char *inadport_decimal();
char *inadport_hex();
char *debug_flags();
char *dmname();
char *tmname();


/***************
 **  Globals  **
 **           **
 ***************/

extern int errno;

extern struct task *locltasks;	/* from task.c */
extern struct waitc *waitlist;	/* from waitc.c */

char **epaths = 0;				/* exec search path */
int debugmask = 0;				/* which debugging info */
struct htab *filehosts = 0;		/* advisory host table from hostfile */
struct htab *hosts = 0;			/* active host table */
char *loclsnam = 0;				/* t-d socket or addr file path */
int loclsock = -1;				/* pvmd-task master tcp socket */
char *myarchname = ARCHCLASS;
int myhostpart = 0;				/* host number shifted to tid host field */
int myndf = 0;					/* host native data enc */
int mytid = 0;					/* this pvmd tid */
int myunixpid = -1;				/* pvmd pid */
int netsock = -1;				/* host-host udp socket */
struct htab *newhosts = 0;		/* hosts being added by pvmd' */
struct htab *oldhosts = 0;		/* real host table (for pvmd') */
int ourudpmtu = UDPMAXLEN;		/* local UDP MTU */
int ppnetsock = -1;				/* pvmd' host-host udp socket */
int pprime = 0;					/* pvmd' pid for when we're forked */
int pprwid = 0;					/* wait id for when pvmd' is done */
int runstate = 0;				/* pvmd run state */
int tidhmask = TIDHOST;			/* mask for host field of tids */
int tidlmask = TIDLOCAL;		/* mask for local field of tids */
int useruid = -1;				/* our uid */
char *username = 0;				/* our loginname */


/***************
 **  Private  **
 **           **
 ***************/

static char rcsid[] = "$Id$";
static int *deads = 0;			/* circ queue of dead pids */
static char pvmtxt[512];		/* scratch for error log */
static int ndead = 0;			/* len of deads */
static int rdead = 0;			/* read ptr for deads */
static int slavemode = 0;		/* started by master pvmd */
static int wdead = 0;			/* write ptr for deads */
static fd_set wrk_rfds;			/* fd_sets for select() in work() */
static fd_set wrk_wfds;
static fd_set wrk_efds;
static int wrk_nfds = 0;		/* 1 + highest bit set in fds */


main(argc, argv)
	int argc;
	char **argv;
{
	int i, j, ac;
	int bad = 0;
	char *homedir;
	char *name = 0;
	struct passwd *pe;
	char buf[64];

	if ((useruid = getuid()) == -1) {
		pvmlogerror("main() can't getuid()\n");
		pvmbailout(0);
	}

	pvmlogopen();

	if (pe = getpwuid(useruid))
		username = STRALLOC(pe->pw_name);
	else
		pvmlogerror("main() can't getpwuid\n");
	endpwent();

	if ((myunixpid = getpid()) == -1) {
		pvmlogerror("main() can't getpid()\n");
		pvmbailout(0);
	}

	if (!(homedir = getenv("HOME")))
		homedir = "/tmp";

	myndf = getarchcode(myarchname);

	for (i = j = ac = 1; i < argc; i++) {
		if (argv[i][0] == '-') {
			switch (argv[i][1]) {

			case 'd':
				if (debugmask = pvmxtoi(argv[i] + 2)) {
					sprintf(pvmtxt, "main() debugmask is %x (%s)\n",
							debugmask, debug_flags(debugmask));
					pvmlogerror(pvmtxt);
				}
				break;

			case 'n':
				name = argv[i] + 2;
				break;

			case 's':
				slavemode = 1;
				break;

			default:
				sprintf(pvmtxt, "main() unknown switch: %s\n", argv[i]);
				pvmlogerror(pvmtxt);
				bad++;
			}

		} else {
			argv[j++] = argv[i];
			ac++;
		}
	}
	argc = ac;

	if (bad || argc > 2) {
		pvmlogerror("usage: pvmd3 [-ddebugmask] [-nhostname] [hostfile]\n");
		pvmbailout(0);
	}

	if (debugmask) {
		sprintf(pvmtxt, "version %s\n", PVM_VER);
		pvmlogerror(pvmtxt);
		sprintf(pvmtxt, "ddpro %d tdpro %d\n", DDPROTOCOL, TDPROTOCOL);
		pvmlogerror(pvmtxt);
	}

	if (slavemode)					/* slave pvmd */
		slave_config();

	else							/* master pvmd */
		master_config(name, argc == 2 ? argv[1] : "");

	myhostpart = hosts->ht_local << (ffs(tidhmask) - 1);
	mytid = myhostpart | TIDPVMD;

	ndead = 1000;	/* XXX hum, static limit makes this easy to do */
	deads = TALLOC(ndead, int, "pids");
	bzero((char*)deads, ndead * sizeof(int));

#ifndef	SYSVSIGNAL
	(void)signal(SIGCHLD, reap);
#else
	(void)signal(SIGCLD, reap);
#endif

	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
		(void)signal(SIGINT, catch);
	if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
		(void)signal(SIGTERM, catch);

	(void)signal(SIGHUP, SIG_IGN);

/*
	(void)signal(SIGILL, evilsig);
	(void)signal(SIGFPE, evilsig);
	(void)signal(SIGBUS, evilsig);
	(void)signal(SIGSEGV, evilsig);
	(void)signal(SIGSYS, evilsig);
*/

	if (chdir(homedir) == -1) {
		pvmlogperror(homedir);
		pvmbailout(0);
	}

	task_init();
	wait_init();
	nmd_init();

	sprintf(buf, "PVM_ARCH=%s", myarchname);
	pvmputenv(buf);

/* XXX hack to start slaves automatically */

	if (!slavemode && filehosts) {
		struct hostd *hp;
		int hh;
		int n = 0;
		struct mesg *mp;

		for (hh = filehosts->ht_last; hh >= 1; hh--)
			if ((hp = filehosts->ht_hosts[hh]) && !(hp->hd_flag & HF_NOSTART))
				n++;
		if (n) {
			mp = mesg_new(0);
			mp->m_cod = DM_ADDHOST;
			pkint(mp, n);
			for (hh = 1; hh <= filehosts->ht_last; hh++)
				if ((hp = filehosts->ht_hosts[hh]) && !(hp->hd_flag & HF_NOSTART))
					pkstr(mp, hp->hd_name);
			mp->m_dst = TIDPVMD;
			sendmessage(mp);
		}
	}

	work();
	pvmbailout(0);		/* not reached */
	exit(0);
	return 0;
}


static char *dflgs[] = {
	"pkt", "msg", "tsk", "slv", "hst", "sel"
};

char *
debug_flags(mask)
	int mask;
{
	static char buf[64];
	int bit, i;

	buf[0] = 0;
	for (bit = 1, i = 0; i < sizeof(dflgs)/sizeof(dflgs[0]); i++, bit *= 2)
		if (mask & bit) {
			if (buf[0])
				strcat(buf, ",");
			strcat(buf, dflgs[i]);
		}
	return buf;
}


void
evilsig(sig)
	int sig;
{
	(void)signal(SIGILL, SIG_DFL);
	(void)signal(SIGFPE, SIG_DFL);
	(void)signal(SIGBUS, SIG_DFL);
	(void)signal(SIGSEGV, SIG_DFL);
	(void)signal(SIGSYS, SIG_DFL);
	i_dump(1);
	abort();
	pvmbailout(sig);
}


void
catch(sig)
	int sig;
{
	(void)signal(SIGINT, SIG_DFL);
	(void)signal(SIGTERM, SIG_DFL);
	sprintf(pvmtxt, "catch() caught signal %d\n", sig);
	pvmlogerror(pvmtxt);
	pvmbailout(sig);
}


/*	reap()
*
*	Child process has exited.  Put its pid in the fifo of tasks
*	to be cleaned up (in the work loop).
*/

void
reap(sig)
	int sig;
{
	int pid;

	sig = sig;

#ifndef SYSVSIGNAL
	while ((pid = wait3(0, WNOHANG, (struct rusage *)0)) > 0) {
		deads[wdead] = pid;
		if (++wdead >= ndead)
			wdead = 0;
	}
#else
	if ((pid = wait((int *)0)) > 0) {
		deads[wdead] = pid;
		if (++wdead >= ndead)
			wdead = 0;
	}
	(void)signal(SIGCLD, reap);
#endif
}


/*	pvmbailout()
*
*	We're hosed.  Clean up as much as possible and exit.
*/

void
pvmbailout(n)
	int n;
{
	struct task *tp;

	sprintf(pvmtxt, "pvmbailout(%d)\n", n);
	pvmlogerror(pvmtxt);

	if (loclsnam)
		(void)unlink(loclsnam);

	if (locltasks)
		for (tp = locltasks->t_link; tp != locltasks; tp = tp->t_link) {
			if (tp->t_pid)
				(void)kill(tp->t_pid, SIGKILL);
			if (tp->t_authnam)
				(void)unlink(tp->t_authnam);
		}

	exit(n);
}


wrk_fds_add(fd, sets)
	int fd;				/* the fd */
	int sets;			/* which sets */
{
#ifdef	SANITY
	if (fd < 0 || fd >= FD_SETSIZE) {
		sprintf(pvmtxt, "wrk_fds_add() bad fd %d\n", fd);
		pvmlogerror(pvmtxt);
		return 1;
	}
#endif
	if (sets & 1)
		FD_SET(fd, &wrk_rfds);
	if (sets & 2)
		FD_SET(fd, &wrk_wfds);
	if (sets & 4)
		FD_SET(fd, &wrk_efds);

	/* if this is new highest, adjust nfds */

	if (fd >= wrk_nfds)
		wrk_nfds = fd + 1;
	return 0;
}


wrk_fds_delete(fd, sets)
	int fd;				/* the fd */
	int sets;			/* which sets */
{
#ifdef	SANITY
	if (fd < 0 || fd >= FD_SETSIZE) {
		sprintf(pvmtxt, "wrk_fds_delete() bad fd %d\n", fd);
		pvmlogerror(pvmtxt);
		return 1;
	}
#endif
	if (sets & 1)
		FD_CLR(fd, &wrk_rfds);
	if (sets & 2)
		FD_CLR(fd, &wrk_wfds);
	if (sets & 4)
		FD_CLR(fd, &wrk_efds);

	/* if this was highest, may have to adjust nfds to new highest */

	if (fd + 1 == wrk_nfds)
		while (wrk_nfds > 0) {
			wrk_nfds--;
			if (FD_ISSET(wrk_nfds, &wrk_rfds)
			|| FD_ISSET(wrk_nfds, &wrk_wfds)
			|| FD_ISSET(wrk_nfds, &wrk_efds)) {
				wrk_nfds++;
				break;
			}
		}
	return 0;
}


print_fdset(pad, n, f)
	char *pad;		/* label at head */
	int n;			/* max fd + 1 */
	fd_set *f;		/* fd set */
{
	char *p = pvmtxt;
	int i;
	char *s = "";

	strcpy(p, pad);
	p += strlen(p);
	for (i = 0; i < n; i++)
		if (FD_ISSET(i, f)) {
			sprintf(p, "%s%d", s, i);
			p += strlen(p);
			s = ",";
		}
	strcat(p, "\n");
	pvmlogerror(pvmtxt);
}


/*	work()
*
*	The whole sausage
*/

work()
{
	struct task *tp;
	int hh;
	struct hostd *hp;
	static int lastpinged = 0;	/* dst of last null packet */
	fd_set rfds, wfds;			/* result of select */
/*
	fd_set efds;
*/
	int nrdy;					/* number of fds ready after select */
	struct timeval tnow;
	struct timeval tout;
	struct mesg *mp;

	if (debugmask || myhostpart)
		pvmlogerror("ready\n");

	FD_ZERO(&wrk_rfds);
	FD_ZERO(&wrk_wfds);
	FD_ZERO(&wrk_efds);
	wrk_nfds = 0;

	if (loclsock >= 0)
		wrk_fds_add(loclsock, 1);
	wrk_fds_add(netsock, 1);

	for (; ; ) {

	/*
	*	clean up after any tasks that we got SIGCHLDs
	*/
		while (rdead != wdead) {
			if (deads[rdead] == pprime) {
				hostfailentry(hosts->ht_hosts[0]);
				pprime = 0;

			} else {
				if (tp = task_findpid(deads[rdead])) {
					task_cleanup(tp);
					task_free(tp);
				}
			}
			if (++rdead >= ndead)
				rdead = 0;
		}

		netoutput();

		if (runstate == PVMDHALTING) {
			pvmlogerror("work() pvmd halting\n");
			pvmbailout(0);
		}

	/* XXX would like to do this timeout calc stuff in netoutput() */
		TVCLEAR(&tout);
		for (hh = 0; hh <= hosts->ht_last; hh++) {
			if (hp = hosts->ht_hosts[hh])
				if (TVISSET(&hp->hd_rtv)
				&& (!TVISSET(&tout) || TVXLTY(&hp->hd_rtv, &tout)))
					tout = hp->hd_rtv;
		}
		gettimeofday(&tnow, (struct timezone*)0);

		if (!TVISSET(&tout)) {
			tout.tv_sec = tnow.tv_sec + 60;	/* XXX const, make it random */
			tout.tv_usec = tnow.tv_usec;

			if (runstate == PVMDPRIME) {
				exit(0);
			}
		}
		if (TVXLTY(&tout, &tnow)) {
			TVCLEAR(&tout);

		} else {
			TVXSUBY(&tout, &tout, &tnow);
		}

		if (debugmask & PDMPACKET) {
			sprintf(pvmtxt, "work() select tout is %d.%06d\n",
					tout.tv_sec, tout.tv_usec);
			pvmlogerror(pvmtxt);
		}

		rfds = wrk_rfds;
		wfds = wrk_wfds;
/*
		efds = wrk_efds;
*/
		if (debugmask & PDMSELECT) {
			sprintf(pvmtxt, "work() wrk_nfds=%d\n", wrk_nfds);
			pvmlogerror(pvmtxt);
			print_fdset("work() rfds=", wrk_nfds, &rfds);
			print_fdset("work() wfds=", wrk_nfds, &wfds);
		}

		if ((nrdy = select(wrk_nfds, &rfds, &wfds, (fd_set*)0, &tout)) == -1) {
			if (errno != EINTR) {
				pvmlogperror("work() select");
				pvmbailout(0);
			}
		}
		if (debugmask & PDMPACKET) {
			sprintf(pvmtxt, "work() SELECT returns %d\n", nrdy);
			pvmlogerror(pvmtxt);
		}

	/* if select timed out, send ping packet to a host */
	/* XXX select times out for retries too, so lots of packets
	   XXX may get sent
	   XXX also this should only send to hosts we are waitcing on. */

		if (!nrdy && runstate != PVMDSTARTUP) {
			do {
				if (++lastpinged > hosts->ht_last)
					lastpinged = 1;
			} while (!(hp = hosts->ht_hosts[lastpinged]));

			if (hp->hd_hostpart != myhostpart
			&& hp->hd_txq->pk_link == hp->hd_txq) {
				mp = mesg_new(0);
				mp->m_cod = DM_NULL;
				mp->m_dst = hosts->ht_hosts[lastpinged]->hd_hostpart | TIDPVMD;
				sendmessage(mp);
			}
		}

	/*
	*	check network socket and local master socket for action
	*/

		if (nrdy > 0) {
			if (FD_ISSET(netsock, &rfds)) {
				netinput();
				nrdy--;
			}
			if (loclsock >= 0 && FD_ISSET(loclsock, &rfds)) {
				loclconn();
				nrdy--;
			}
		}

	/*
	*	check tasks for action
	*/

		if (loclsock >= 0) {
			for (tp = locltasks->t_link;
					nrdy > 0 && tp != locltasks;
					tp = tp->t_link) {

				if (tp->t_sock >= 0 && FD_ISSET(tp->t_sock, &rfds)) {
					nrdy--;
					if (loclinput(tp)) {
						if (debugmask & PDMTASK) {
							sprintf(pvmtxt,
									"work() error reading from t%x, marking dead\n",
									tp->t_tid);
							pvmlogerror(pvmtxt);
						}
						tp = tp->t_rlink;
						task_cleanup(tp->t_link);
						task_free(tp->t_link);
						continue;
					}
				}

				if (tp->t_sock >= 0 && FD_ISSET(tp->t_sock, &wfds)) {
					nrdy--;
					if (locloutput(tp)) {
						tp = tp->t_rlink;
						task_cleanup(tp->t_link);
						task_free(tp->t_link);
						continue;
					}
				}

				if (tp->t_out >= 0 && FD_ISSET(tp->t_out, &rfds)) {
					loclstout(tp);
					nrdy--;
				}
			}
		}
	}
}


/*	netoutput()
*
*	Scan host table and send to any pvmds needing a first packet
*	or retry.
*/

netoutput()
{
	struct hostd *hp;
	char *cp;					/* data to send */
	int len;					/* length to send */
	struct timeval tnow;
	struct pkt *pp;
	int src, dst;
	int ff;
	char dummy[DDFRAGHDR];
	int hh;
	int cc;						/* sendto result */
	struct timeval tx;

/* XXX this will get expensive as nhosts increases */
	for (hh = 0; hh <= hosts->ht_last; hh++) {

		if (!(hp = hosts->ht_hosts[hh]))
			continue;

		cp = 0;
		pp = hp->hd_txq->pk_link;
/*
		if (!pp) {
			char *p = 0;
			pvmlogerror("aargh, hd_txq->pk_link is null\n");
			i_dump(1);
			ht_dump(hosts);
			*p = 1;
		}
*/
		if (pp->pk_buf) {						/* pkts ready for host */

			gettimeofday(&tnow, (struct timezone*)0);

			if (TVISSET(&hp->hd_rtv)) {			/* waiting for ack */
				if (hp->hd_flag & HF_SHUTDOWN) {
					hostfailentry(hp);
					ht_delete(hosts, hp);
					continue;
				}
				if (TVXLTY(&tnow, &hp->hd_rtv))	/* still waiting */
					goto out1;
				if (hp->hd_nrty >= DDMINRETRIES
				&& hp->hd_rto.tv_sec >= DDMINTIMEOUT) {		/* host is toast */
					sprintf(pvmtxt, "netoutput() timed out sending to %s\n",
							hp->hd_name);
					pvmlogerror(pvmtxt);
					hd_dump(hp);
					hostfailentry(hp);
					ht_delete(hosts, hp);
					continue;
				}

			} else {							/* first try of this pkt */
				hp->hd_nrty = 0;
				TVXADDY(&hp->hd_rta, &hp->hd_rtt, &hp->hd_rtt);
				TVCLEAR(&hp->hd_rto);
			}

			cp = pp->pk_dat - DDFRAGHDR;
			len = pp->pk_len + DDFRAGHDR;
			bcopy(cp, dummy, sizeof(dummy));	/* save under header */
			dst = pp->pk_dst;
			src = pp->pk_src;
			ff = (pp->pk_flag & (FFSOM|FFEOM)) | FFDATA;
			if (debugmask & PDMPACKET) {
				sprintf(pvmtxt, "netoutput() pkt to %s seq %d ack %d retry %d\n",
						hp->hd_name, hp->hd_txseq, hp->hd_rxseq, hp->hd_nrty);
				pvmlogerror(pvmtxt);
			}

		} else {
/* XXX this isn't the right way to do this.  we don't wait for an ack
   XXX from the last message, among other things.  but it will work if
   XXX no one beats on it too hard.  */

			if (hp->hd_flag & HF_SHUTDOWN) {
				hostfailentry(hp);
				ht_delete(hosts, hp);
				continue;
			}
		}

out1:
		if (!cp) {								/* no pkt to send */

			if (!(hp->hd_flag & HF_DOACK))		/* no ack needed */
				continue;
			else {								/* ack needed */
				cp = dummy;
				len = DDFRAGHDR;
				dst = hp->hd_hostpart | TIDPVMD;
				src = mytid;
				ff = 0;
				if (debugmask & PDMPACKET) {
					sprintf(pvmtxt, "netoutput() dummy to %s seq %d ack %d\n",
							hp->hd_name, hp->hd_txseq, hp->hd_rxseq);
					pvmlogerror(pvmtxt);
				}
			}
		}
		hp->hd_flag &= ~HF_DOACK;

		pvmput32(cp, dst);
		pvmput32(cp + 4, src);
		pvmput16(cp + 8, hp->hd_txseq);
		pvmput16(cp + 10, hp->hd_rxseq);
		pvmput8(cp + 12, ff);

/*
if (!(random() & 3)) {
	pvmlogerror("netoutput() oops, dropped one\n");
	cc = -1;
} else {
*/
		if ((cc = sendto(netsock, cp, len, 0,
				(struct sockaddr*)&hp->hd_sad, sizeof(hp->hd_sad))) == -1
		&& errno != EINTR
		&& errno != ENOBUFS) {
			pvmlogperror("netoutput() sendto");
			pvmbailout(0);
		}
/*
}
*/

		if (cp != dummy) {
			bcopy(dummy, cp, sizeof(dummy));	/* restore under header */

			/* set timer for next retry */

			if (cc != -1) {

				hp->hd_last = tnow;
				TVXADDY(&hp->hd_rtv, &tnow, &hp->hd_rta);
				TVXADDY(&hp->hd_rto, &hp->hd_rto, &hp->hd_rta);
				++hp->hd_nrty;
				if (hp->hd_rta.tv_sec < DDMAXRTT) {
					TVXADDY(&hp->hd_rta, &hp->hd_rta, &hp->hd_rta);
				}

			} else {
				tx.tv_sec = DDERRRETRY/1000000;
				tx.tv_usec = DDERRRETRY%1000000;
				TVXADDY(&hp->hd_rtv, &tnow, &tx);
				TVXADDY(&hp->hd_rto, &hp->hd_rto, &tx);
			}
		}
	}
	return 0;
}


/*	netinput()
*
*	Input from a remote pvmd.
*	Accept a packet, do protocol stuff then pass pkt to netinpkt().
*/

int
netinput()
{
	struct sockaddr_in osad;	/* sender's ip addr */
	int oslen;					/* sockaddr length */
	struct pkt *pp = 0;			/* the packet */
	int src, dst;				/* packet src, dst */
	int sqn, aqn;				/* sequence, ack numbers */
	int ff;						/* packet flags */
	int hh;
	struct hostd *hp = 0;		/* sending host */
	struct timeval tnow;
	struct mca *mcap = 0;
	char *cp;
	struct timeval tdiff;		/* packet rtt */
	int rttusec;

	pp = pk_new(ourudpmtu);
	if (TDFRAGHDR > DDFRAGHDR)
		pp->pk_dat += TDFRAGHDR - DDFRAGHDR;

	oslen = sizeof(osad);
	if ((pp->pk_len = recvfrom(netsock, pp->pk_dat,
			pp->pk_max - (pp->pk_dat - pp->pk_buf),
			0, (struct sockaddr*)&osad, &oslen)) == -1) {
		if (errno != EINTR)
			pvmlogperror("netinput() recvfrom(loclsock)");
		goto done;
	}

/*
if (!(random() & 3)) {
	pvmlogerror("netinput() oops, dropped one\n");
	goto done;
}
*/

	gettimeofday(&tnow, (struct timezone*)0);

	cp = pp->pk_dat;
	pp->pk_len -= DDFRAGHDR;
	pp->pk_dat += DDFRAGHDR;
	pp->pk_dst = dst = pvmget32(cp);
	pp->pk_src = src = pvmget32(cp + 4);
	sqn = pvmget16(cp + 8);
	aqn = pvmget16(cp + 10);
	pp->pk_flag = ff = pvmget8(cp + 12);

	hh = (src & tidhmask) >> (ffs(tidhmask) - 1);
	if (hh < 0 || hh > hosts->ht_last || !(hp = hosts->ht_hosts[hh])
	|| (osad.sin_addr.s_addr != hp->hd_sad.sin_addr.s_addr)
	|| (osad.sin_port != hp->hd_sad.sin_port)) {
		sprintf(pvmtxt, "netinput() bogus pkt from %s\n",
				inadport_decimal(&osad));
		pvmlogerror(pvmtxt);
		goto done;
	}

	if (debugmask & PDMPACKET) {
		sprintf(pvmtxt,
		"netinput() pkt from %s src t%x dst t%x seq %d ack %d ff %x len %d\n",
				hp->hd_name, src, dst, sqn, aqn, ff, pp->pk_len);
		pvmlogerror(pvmtxt);
	}

	/* throw out packet if it's not for us */

	if ((dst & tidhmask) != myhostpart) {
		for (mcap = hp->hd_mcas->mc_link; mcap != hp->hd_mcas;
				mcap = mcap->mc_link)
			if (mcap->mc_tid == dst)
				break;
		if (mcap == hp->hd_mcas) {
			sprintf(pvmtxt,
					"netinput() pkt from t%x for t%x scrapped (not us)\n",
					src, dst);
			pvmlogerror(pvmtxt);
			goto done;
		}
	}

	/* update rtt for this host XXX assumes ack is for last pkt sent */

	if (SEQNUMCOMPARE(hp->hd_txseq, aqn)) {
		TVXSUBY(&tdiff, &tnow, &hp->hd_last);
		rttusec = tdiff.tv_sec * 1000000 + tdiff.tv_usec;
		if (rttusec < 1)
			rttusec = 1000;	/* XXX const */
		else
			if (rttusec > DDMAXRTT*1000000)
				rttusec = DDMAXRTT*1000000;
		rttusec += 3 * (hp->hd_rtt.tv_sec * 1000000 + hp->hd_rtt.tv_usec);
		rttusec /= 4;
		hp->hd_rtt.tv_sec = rttusec / 1000000;
		hp->hd_rtt.tv_usec = rttusec % 1000000;
	}

	/* if the ack is news, bump the head pkt from this host's txq */

	while (SEQNUMCOMPARE(hp->hd_txseq, aqn)) {
		struct pkt *pp2;

		hp->hd_txseq = NEXTSEQNUM(hp->hd_txseq);
		pp2 = hp->hd_txq->pk_link;
		LISTDELETE(pp2, pk_link, pk_rlink);
		pk_free(pp2);
		TVCLEAR(&hp->hd_rtv);
	}

	if (ff & FFDATA)
		hp->hd_flag |= HF_DOACK;
	else {
		if (debugmask & PDMPACKET)
			pvmlogerror("netinput() null pkt\n");
		goto done;
	}

	if (SEQNUMCOMPARE(sqn, hp->hd_rxseq)) {
		if (debugmask & PDMPACKET) {
			sprintf(pvmtxt, "netinput() pkt resent from %s seq %d\n",
					hp->hd_name, sqn);
			pvmlogerror(pvmtxt);
		}
		goto done;
	}
	hp->hd_rxseq = NEXTSEQNUM(sqn);

	netinpkt(hp, pp);
	return 0;

done:
	pk_free(pp);
	return 0;
}


/*	netinpkt()
*
*	Consume pkt from network.  It's either for the pvmd and needs to
*	be reassembled into a message or it's for a local task and needs
*	to be put on the queue to be sent.
*/

netinpkt(hp, pp)
	struct hostd *hp;
	struct pkt *pp;
{
	struct mca *mcap = 0;
	struct task *tp;
	struct mesg *mp;
	struct frag *fp;
	struct pkt *pp2;
	int src = pp->pk_src;
	int dst = pp->pk_dst;
	int ff = pp->pk_flag;
	char *cp;
	int i;

	if (dst & TIDGID) {
		for (mcap = hp->hd_mcas->mc_link; mcap != hp->hd_mcas;
				mcap = mcap->mc_link)
			if (mcap->mc_tid == dst)
				break;
		if (mcap == hp->hd_mcas) {
			sprintf(pvmtxt,
					"netinpkt() pkt from t%x for t%x scrapped (not us)\n",
					src, dst);
			pvmlogerror(pvmtxt);
			goto done;
		}

		for (i = mcap->mc_ndst; i-- > 0; ) {
			dst = mcap->mc_dsts[i];
			if (tp = task_find(dst)) {		/* to local task */
				pp2 = pk_new(0);
				pp2->pk_src = src;
				pp2->pk_dst = dst;
				pp2->pk_flag = ff;
				pp2->pk_buf = pp->pk_buf;
				pp2->pk_max = pp->pk_max;
				pp2->pk_dat = pp->pk_dat;
				pp2->pk_len = pp->pk_len;
				da_ref(pp->pk_buf);

	/* add task's sock to wrk_wfds */

				if (tp->t_sock >= 0)
					wrk_fds_add(tp->t_sock, 2);

				LISTPUTBEFORE(tp->t_txq, pp2, pk_link, pk_rlink);

			} else {
				sprintf(pvmtxt,
				"netinpkt() mc pkt from t%x for t%x scrapped (no dst)\n",
						src, dst);
				pvmlogerror(pvmtxt);
			}
		}
		if (ff & FFEOM) {
			if (debugmask & PDMMESSAGE) {
				sprintf(pvmtxt, "netinpkt() freed mca %x from t%x\n",
						mcap->mc_tid, hp->hd_name);
				pvmlogerror(pvmtxt);
			}
			mca_free(mcap);
		}
		goto done;
	}

	if ((dst & ~tidhmask) == TIDPVMD) {		/* for pvmd */
		if ((src & ~tidhmask) != TIDPVMD) {
			sprintf(pvmtxt,
			"netinpkt() pkt from t%x to t%x scrapped (won't speak to tasks)\n",
			src, dst);
			pvmlogerror(pvmtxt);
			goto done;
		}
		if (ff & FFSOM) {			/* start of message */
			if (hp->hd_rxm) {
				sprintf(pvmtxt, "netinpkt() repeated start pkt from %s\n",
						hp->hd_name);
				pvmlogerror(pvmtxt);
				goto done;
			}
			cp = pp->pk_dat;
			pp->pk_len -= DDMSGHDR;
			pp->pk_dat += DDMSGHDR;
			hp->hd_rxm = mesg_new(0);
			hp->hd_rxm->m_cod = pvmget32(cp);
			hp->hd_rxm->m_wid = pvmget32(cp + 4);
			hp->hd_rxm->m_dst = dst;
			hp->hd_rxm->m_src = src;

		} else {					/* middle or end of message */
			if (!hp->hd_rxm) {
				sprintf(pvmtxt,
						"netinpkt() spurious pkt (no message) from %s\n",
						hp->hd_name);
				pvmlogerror(pvmtxt);
				goto done;
			}
		}

		fp = fr_new(0);
		fp->fr_buf = pp->pk_buf;
		fp->fr_dat = pp->pk_dat;
		fp->fr_max = pp->pk_max;
		fp->fr_len = pp->pk_len;
		da_ref(pp->pk_buf);
		LISTPUTBEFORE(hp->hd_rxm->m_frag, fp, fr_link, fr_rlink);
		hp->hd_rxm->m_len += fp->fr_len;

		if (ff & FFEOM) {		/* end of message */
			mp = hp->hd_rxm;
			hp->hd_rxm = 0;
			mp->m_cfrag = mp->m_frag->fr_link;
			mp->m_cpos = 0;
			netentry(hp, mp);
		}

	} else {								/* for a task */
		if (tp = task_find(dst)) {

	/* add task's sock to wrk_wfds */

			if (tp->t_sock >= 0)
				wrk_fds_add(tp->t_sock, 2);

			LISTPUTBEFORE(tp->t_txq, pp, pk_link, pk_rlink);
			pp = 0;

		} else {
			sprintf(pvmtxt,
					"netinpkt() pkt from t%x for t%x scrapped (no dst)\n",
					src, dst);
			pvmlogerror(pvmtxt);
			goto done;
		}
	}

done:
	if (pp)
		pk_free(pp);
	return 0;
}


/*	loclconn()
*
*	Task has attempted to connect.  Accept the new connection and make
*	a blank context for it.
*/

loclconn()
{
	struct task *tp;			/* new task context */
	int i;

	tp = task_new(0);
	tp->t_salen = sizeof(tp->t_sad);

	if ((tp->t_sock = accept(loclsock, (struct sockaddr*)&tp->t_sad,
			&tp->t_salen)) == -1) {
		pvmlogperror("loclconn() accept");
		task_free(tp);

	} else {
		if (debugmask & PDMTASK) {
			sprintf(pvmtxt, "loclconn() accept from %s sock %d\n",
					inadport_decimal(&tp->t_sad), tp->t_sock);
			pvmlogerror(pvmtxt);
		}
#ifndef NOSOCKOPT
		i = 1;
		if (setsockopt(tp->t_sock, IPPROTO_TCP, TCP_NODELAY,
				(char*)&i, sizeof(int)) == -1) {
			pvmlogperror("loclconn() setsockopt");
		}
#endif
		if ((i = fcntl(tp->t_sock, F_GETFL, 0)) == -1)
			pvmlogperror("loclconn: fcntl");
		else {
#ifdef O_NDELAY
			i |= O_NDELAY;
#else
			i |= FNDELAY;
#endif
			(void)fcntl(tp->t_sock, F_SETFL, i);
		}
		wrk_fds_add(tp->t_sock, 1);
	}
	return 0;
}


/*	locloutput()
*
*	Output to local task.  Sends packets until write() blocks.
*	Deletes task's bit from wrk_wfds if no more data to send.
*
*	Returns 0 if okay, else -1 if unrecoverable error.
*/

locloutput(tp)
	struct task *tp;
{
	struct pkt *pp;
	char *cp;
	int n;

	while ((pp = tp->t_txq->pk_link)->pk_buf) {

	/*
	* if new packet add td frag header, else continue sending
	*/

		if (!(pp->pk_flag & PF_STARTED)) {
			if (debugmask & PDMPACKET) {
				sprintf(pvmtxt,
					"locloutput() src t%x dst t%x ff %x len %d\n",
					pp->pk_src, pp->pk_dst, pp->pk_flag & (FFSOM|FFEOM),
					pp->pk_len);
				pvmlogerror(pvmtxt);
			}
			pp->pk_flag |= PF_STARTED;
			cp = (pp->pk_dat -= TDFRAGHDR);
			pvmput32(cp, pp->pk_dst);
			pvmput32(cp + 4, pp->pk_src);
			pvmput32(cp + 8, pp->pk_len);
			pvmput8(cp + 12, pp->pk_flag & (FFSOM|FFEOM));
			pp->pk_len += TDFRAGHDR;
		}

	/*
	* send as much as possible; skip to next packet when all sent
	*/

/*
		sprintf(pvmtxt, "locloutput() try write %d to %d\n",
				pp->pk_len, tp->t_sock);
		pvmlogerror(pvmtxt);
*/
		n = write(tp->t_sock, pp->pk_dat, pp->pk_len);

		if (n == -1) {
			if (errno != EWOULDBLOCK) {
				if (debugmask & PDMTASK) {
					pvmlogperror("locloutput() write");
					sprintf(pvmtxt, "locloutput() marking t%x dead\n",
							tp->t_tid);
					pvmlogerror(pvmtxt);
				}
				return -1;
			}
			break;
		}

		if (n > 0) {
			if (debugmask & PDMPACKET) {
				sprintf(pvmtxt,
						"locloutput() src t%x dst t%x wrote %d\n",
						pp->pk_src, pp->pk_dst, n);
				pvmlogerror(pvmtxt);
			}
/*
			sprintf(pvmtxt, "locloutput() wrote %d to %d\n", n, tp->t_sock);
			pvmlogerror(pvmtxt);
*/
			if ((pp->pk_len -= n) > 0) {
				pp->pk_dat += n;

			} else {
				LISTDELETE(pp, pk_link, pk_rlink);
				pk_free(pp);
			}
		}
	}

	if (tp->t_txq->pk_link == tp->t_txq) {
		wrk_fds_delete(tp->t_sock, 2);

	/* flush context if TF_CLOSE set */

		if (tp->t_flag & TF_CLOSE)
			return -1;
	}

	return 0;
}


/*	loclinput()
*
*	Input from a task.  Read socket, reassemble packets and route them.
*	Returns 0 if okay, else -1 if error (work() should cleanup the
*	task context).
*/

loclinput(tp)
	struct task *tp;
{
	int src;			/* pkt src */
	int dst;			/* pkt dst */
	int ff;				/* pkt flags */
	char *cp;
	struct pkt *pp = 0;
	struct pkt *pp2;
	struct frag *fp;
	struct mesg *mp;
	struct hostd *hp;
	struct task *tp2;
	int n;

	/*
	* if no current packet, start a new one
	*/

	if (!(pp = tp->t_rxp)) {
		pp = tp->t_rxp = pk_new(ourudpmtu);
		if (DDFRAGHDR > TDFRAGHDR)
			pp->pk_dat += DDFRAGHDR - TDFRAGHDR;
	}

	/*
	* read as much as will fit into this packet buffer.
	* chances are we'll get exactly one header and packet in a single read.
	*/

/*
	sprintf(pvmtxt, "loclinput() read pk_len=%d pk_dat=+%d\n",
			pp->pk_len, pp->pk_dat - pp->pk_buf);
	pvmlogerror(pvmtxt);
*/
	n = read(tp->t_sock, pp->pk_dat + pp->pk_len,
			pp->pk_max - (pp->pk_dat - pp->pk_buf) - pp->pk_len);
/*
	sprintf(pvmtxt, "loclinput() = %d\n", n);
	pvmlogerror(pvmtxt);
*/

	if (n == -1) {
		if (errno != EWOULDBLOCK) {
			pvmlogperror("loclinput() read");
			sprintf(pvmtxt, "loclinput() marking t%x dead\n",
					tp->t_tid);
			pvmlogerror(pvmtxt);
			return -1;
		}
		return 0;
	}
	if (!n) {
		if (debugmask & (PDMPACKET|PDMMESSAGE|PDMTASK)) {
			sprintf(pvmtxt, "loclinput() read EOF from t%x sock %d\n",
					tp->t_tid, tp->t_sock);
			pvmlogerror(pvmtxt);
		}
		return -1;
	}

	pp->pk_len += n;

	/*
	* if we now have complete packet(s), accept them
	*/

	do {
		pp = tp->t_rxp;

		if (pp->pk_len < TDFRAGHDR)
			return 0;
		n = TDFRAGHDR + pvmget32(pp->pk_dat + 8);	/* header + body */
		if (pp->pk_len < n)
			return 0;

		if (pp->pk_len > n) {
			tp->t_rxp = pk_new(ourudpmtu);
			if (DDFRAGHDR > TDFRAGHDR)
				tp->t_rxp->pk_dat += DDFRAGHDR - TDFRAGHDR;
			bcopy(pp->pk_dat + n, tp->t_rxp->pk_dat, pp->pk_len - n);
			tp->t_rxp->pk_len = pp->pk_len - n;

			pp->pk_len = n;

		} else
			tp->t_rxp = 0;

/*
		pvmhdump(pp->pk_dat, pp->pk_len, "loclinput() ");
*/
		cp = pp->pk_dat;
		pp->pk_len -= TDFRAGHDR;
		pp->pk_dat += TDFRAGHDR;
		pp->pk_dst = dst = pvmget32(cp);
		pp->pk_src = src = tp->t_tid;
		pp->pk_flag = ff = pvmget8(cp + 12);

		if (debugmask & PDMPACKET) {
			sprintf(pvmtxt,
					"loclinput() src t%x dst t%x ff %x len %d\n",
					src, dst, ff, pp->pk_len);
			pvmlogerror(pvmtxt);
/*
			pvmhdump(pp->pk_dat, pp->pk_len, "loclinput() ");
*/
		}

	/*
	* if to multicast addr, replicate pkt in each q
	*/

		if ((dst & TIDGID) && tp->t_mca && tp->t_mca->mc_tid == dst) {

			struct mca *mcap = tp->t_mca;
			int i;

			for (i = mcap->mc_ndst; i-- > 0; ) {
				dst = mcap->mc_dsts[i];
				if (hp = tidtohost(hosts, dst)) {
					pp2 = pk_new(0);
					pp2->pk_src = src;
					pp2->pk_dst = mcap->mc_tid;
					pp2->pk_flag = ff;
					pp2->pk_buf = pp->pk_buf;
					pp2->pk_max = pp->pk_max;
					pp2->pk_dat = pp->pk_dat;
					pp2->pk_len = pp->pk_len;
					da_ref(pp->pk_buf);

					if (hp->hd_hostpart == myhostpart) {
						netinpkt(hp, pp2);

					} else {
						pkt_to_host(hp, pp2);
					}

				} else {
					sprintf(pvmtxt,
					"loclinput() pkt src t%x dst t%x scrapped (no such host)\n",
							src, dst);
					pvmlogerror(pvmtxt);
				}
			}

	/* free mca on last pkt */

			if (ff & FFEOM) {
				if (debugmask & PDMMESSAGE) {
					sprintf(pvmtxt, "loclinput() freed mca %x for t%x\n",
							mcap->mc_tid, tp->t_tid);
					pvmlogerror(pvmtxt);
				}
				mca_free(mcap);
				tp->t_mca = 0;
			}
			goto done;
		}

	/*
	* route packet to local task or remote host, or local pvmd message
	* reassembly
	*/

		if (!(dst & tidhmask) || (dst & tidhmask) == myhostpart) {	/* for local */

			if ((dst & ~tidhmask) == TIDPVMD) {				/* for the pvmd */
				if (ff & FFSOM) {			/* start of message */
					if (tp->t_rxm) {
						sprintf(pvmtxt, "loclinput() repeated start pkt t%x\n",
								tp->t_tid);
						pvmlogerror(pvmtxt);
						goto done;
					}
					cp = pp->pk_dat;
					pp->pk_len -= TTMSGHDR;
					pp->pk_dat += TTMSGHDR;
					tp->t_rxm = mesg_new(0);
					tp->t_rxm->m_cod = pvmget32(cp);
					tp->t_rxm->m_enc = pvmget32(cp + 4);
					tp->t_rxm->m_dst = dst;
					tp->t_rxm->m_src = tp->t_tid;

				} else {					/* middle or end of message */
					if (!tp->t_rxm) {
						sprintf(pvmtxt,
							"loclinput() pkt with no message src t%x\n",
							tp->t_tid);
						pvmlogerror(pvmtxt);
						goto done;
					}
				}

				fp = fr_new(0);
				fp->fr_buf = pp->pk_buf;
				fp->fr_dat = pp->pk_dat;
				fp->fr_max = pp->pk_max;
				fp->fr_len = pp->pk_len;
				da_ref(pp->pk_buf);

				LISTPUTBEFORE(tp->t_rxm->m_frag, fp, fr_link, fr_rlink);
				tp->t_rxm->m_len += fp->fr_len;

				if (ff & FFEOM) {		/* end of message */
					mp = tp->t_rxm;
					tp->t_rxm = 0;
					mp->m_cfrag = mp->m_frag->fr_link;
					mp->m_cpos = 0;
					loclentry(tp, mp);
	/*
	* if sock is -1, tm_conn2() wants us to throw out this context
	* because it's been merged into another.
	*/
					if (tp->t_sock == -1) {
						pk_free(pp);
						return -1;
					}
				}

			} else {						/* for a local task */
				if (!tp->t_tid) {
					sprintf(pvmtxt, "loclinput() pkt src t%x (not a task)\n",
						src, dst);
					pvmlogerror(pvmtxt);
					goto done;
				}
				if (tp2 = task_find(dst)) {

	/* add task's sock to wrk_wfds */

					if (tp2->t_sock >= 0)
						wrk_fds_add(tp2->t_sock, 2);

					LISTPUTBEFORE(tp2->t_txq, pp, pk_link, pk_rlink);
					pp = 0;

				} else {
					sprintf(pvmtxt,
							"loclinput() pkt src t%x dst t%x scrapped (no dst)\n",
							src, dst);
					pvmlogerror(pvmtxt);
				}
			}

		} else {							/* not for this host */

			if (hp = tidtohost(hosts, dst)) {
				pkt_to_host(hp, pp);
				pp = 0;

			} else {
				sprintf(pvmtxt,
					"loclinput() pkt src t%x dst t%x scrapped (no such host)\n",
					src, dst);
				pvmlogerror(pvmtxt);
				goto done;
			}
		}

done:
		if (pp) {
			pk_free(pp);
		}

	} while (tp->t_rxp);

	return 0;
}


/*	loclstout()
*
*	Flush stdout/err pipe from a task.
*	XXX for now, log it and ship it to the console pvmd.
*	XXX someday perhaps send it to the inherited output device.
*/

loclstout(tp)
	struct task *tp;
{
	char *p;
	int n;
	struct mesg *mp;

	sprintf(pvmtxt, "[t%x] ", tp->t_tid);
	p = pvmtxt + strlen(pvmtxt);
	n = read(tp->t_out, p, sizeof(pvmtxt) - (p - pvmtxt) - 1);
	if (n == -1) {
		wrk_fds_delete(tp->t_out, 1);
		(void)close(tp->t_out);
		tp->t_out = -1;

	} else
		if (n > 0) {
			p[n] = 0;
			pvmlogerror(pvmtxt);
			if (hosts->ht_cons != hosts->ht_local) {
				mp = mesg_new(0);
				mp->m_cod = DM_TASKOUT;
				mp->m_dst = hosts->ht_hosts[hosts->ht_cons]->
						hd_hostpart | TIDPVMD;
				pkint(mp, tp->t_tid);
				pkstr(mp, p);
				sendmessage(mp);
			}
		}
}


/*	mesg_to_task()
*
*	Append a message to the send queue for a task.
*
*	N.B. Message must contain at least one frag or this will honk.
*/

int
mesg_to_task(tp, mp)
	struct task *tp;
	struct mesg *mp;
{
	struct frag *fp = mp->m_frag->fr_link;
	struct pkt *pp;
	int ff = FFSOM;			/* frag flags */
	int dst = mp->m_dst;

	/* if nothing yet in q, add task's sock to wrk_wfds */

	if (tp->t_sock >= 0)
		wrk_fds_add(tp->t_sock, 2);

	do {
		if (ff & FFSOM) {				/* first frag */

		/* copy first frag, prepend t-d header */

			pp = pk_new(ourudpmtu);
			pp->pk_dat += MAXHDR;
			bcopy(fp->fr_dat, pp->pk_dat, fp->fr_len);
			pp->pk_len = fp->fr_len;

			pp->pk_dat -= TTMSGHDR;
			pp->pk_len += TTMSGHDR;
			pvmput32(pp->pk_dat, mp->m_cod);
			pvmput32(pp->pk_dat + 4, mp->m_enc);

		} else {						/* nth frag */
			pp = pk_new(0);
			pp->pk_buf = fp->fr_buf;
			pp->pk_dat = fp->fr_dat;
			pp->pk_max = fp->fr_max;
			pp->pk_len = fp->fr_len;
			da_ref(pp->pk_buf);
		}
		if (fp->fr_link == mp->m_frag)
			ff |= FFEOM;
		pp->pk_src = TIDPVMD;
		pp->pk_dst = dst;
		pp->pk_flag = ff;
		ff = 0;
		if (mp->m_flag & MM_PRIO) {
			LISTPUTAFTER(tp->t_txq, pp, pk_link, pk_rlink);
		} else {
			LISTPUTBEFORE(tp->t_txq, pp, pk_link, pk_rlink);
		}
	} while ((fp = fp->fr_link) != mp->m_frag);

	return 0;
}


/*	sendmessage()
*
*	Send a message.  If it's for a local task or remote host, cut
*	apart the fragments and queue to be sent.  If it's for the local
*	pvmd, just call netentry() with the whole message.
*
*	N.B. MM_PRIO only works for single-frag messages.
*/

int
sendmessage(mp)
	struct mesg *mp;
{
	struct hostd *hp = 0;
	struct task *tp;
	struct frag *fp;
	struct pkt *pp;
	int ff = FFSOM;
	int dst = mp->m_dst;

	if (debugmask & PDMMESSAGE) {
		if (TIDISTASK(dst))
			sprintf(pvmtxt, "sendmessage() dst t%x code %s len %d\n",
					dst, tmname(mp->m_cod), mp->m_len);
		else
			sprintf(pvmtxt, "sendmessage() dst t%x code %s len %d\n",
					dst, dmname(mp->m_cod), mp->m_len);
		pvmlogerror(pvmtxt);
	}

	/*
	*	add a frag to empty message to simplify handling
	*/

	if ((fp = mp->m_frag->fr_link) == mp->m_frag) {
		fp = fr_new(ourudpmtu);
		fp->fr_dat += MAXHDR;
		LISTPUTBEFORE(mp->m_frag, fp, fr_link, fr_rlink);
	}

	/*
	*	route message
	*/

	if (!(dst & tidhmask) || (dst & tidhmask) == myhostpart) {	/* to local */

		if (TIDISTASK(dst)) {				/* to local task */

			if (tp = task_find(dst)) {
				mesg_to_task(tp, mp);

			} else {
				sprintf(pvmtxt, "sendmessage() scrapped, no such task t%x\n",
						dst);
				pvmlogerror(pvmtxt);
			}

		} else {				/* to myself */
			mp->m_src = mytid;
			mp->m_ref++;
			mp->m_cfrag = mp->m_frag->fr_link;
			mp->m_cpos = 0;
			netentry(hosts->ht_hosts[hosts->ht_local], mp);
		}

	} else {					/* to remote */

	/* lookup host */

		if (runstate == PVMDHTUPD)
			hp = tidtohost(newhosts, dst);
		if (!hp && !(hp = tidtohost(hosts, dst))) {
			sprintf(pvmtxt, "sendmessage() scrapped, no such host t%x\n", dst);
			pvmlogerror(pvmtxt);
			goto bail;
		}

	/* packetize frags */

		do {
			if (fp == mp->m_frag->fr_link) {	/* first frag */

		/* copy first frag, prepend d-d header */
				pp = pk_new(ourudpmtu);
				pp->pk_dat += MAXHDR;
				bcopy(fp->fr_dat, pp->pk_dat, fp->fr_len);
				pp->pk_len = fp->fr_len;

				pp->pk_dat -= DDMSGHDR;
				pp->pk_len += DDMSGHDR;
				pvmput32(pp->pk_dat, mp->m_cod);
				pvmput32(pp->pk_dat + 4, mp->m_wid);

			} else {							/* nth frag */
				pp = pk_new(0);
				pp->pk_buf = fp->fr_buf;
				pp->pk_dat = fp->fr_dat;
				pp->pk_max = fp->fr_max;
					pp->pk_len = fp->fr_len;
			}
			if (fp->fr_link == mp->m_frag)
				ff |= FFEOM;
			pp->pk_src = mytid;
			pp->pk_dst = dst;
			pp->pk_flag = ff;
			ff = 0;
			if (mp->m_flag & MM_PRIO) {
				pvmlogerror("sendmessage() PRIO message to host?  scrapped\n");

			} else {
				pkt_to_host(hp, pp);
			}
		} while ((fp = fp->fr_link) != mp->m_frag);
	}

bail:
	mesg_unref(mp);
	return 0;
}


/*	bytepk()
*
*	Append a stream of bytes to msg.  Allocate more fragments as
*	necessary.
*	Returns 0 else 1 if malloc fails.
*/

static int
bytepk(mp, cp, num, siz, lnc)
	struct mesg *mp;	/* the message */
	char *cp;			/* base of data */
	int num;			/* num of chunks */
	int siz;			/* size of chunk */
	int lnc;			/* lead to next chunk */
{
	struct frag *fp;			/* working frag */
	int togo;					/* bytes left in chunk */
	int r;						/* bytes (space) left in frag */
	struct frag *nfp;

	if (siz == lnc) {		/* if contiguous, treat as single chunk */
		lnc = (siz *= num);
		num = 1;
	}
	lnc -= siz;		/* now bytes between chunks */

	fp = mp->m_frag->fr_rlink;

	while (num-- > 0) {		/* copy chunks until done */

		for (togo = siz; togo > 0; ) {
			r = fp->fr_max - (fp->fr_dat - fp->fr_buf) - fp->fr_len;

			if (togo <= r) {	/* space in frag for entire chunk */
				bcopy(cp, fp->fr_dat + fp->fr_len, togo);
				fp->fr_len += togo;
				cp += togo;
				togo = 0;

			} else {
				if (r > 0) {	/* space for part of chunk */
					bcopy(cp, fp->fr_dat + fp->fr_len, r);
					fp->fr_len += r;
					togo -= r;
					cp += r;

				} else {		/* no space, add new frag */
					if (!(nfp = fr_new(ourudpmtu)))
						return 1;
					nfp->fr_dat += MAXHDR;
					LISTPUTAFTER(fp, nfp, fr_link, fr_rlink);
					fp = nfp;
				}
			}
		}
		cp += lnc;
	}
	return 0;
}


/*	byteupk()
*
*	Extract bytes from msg.
*	Returns 0 else 1 if end of message.
*/

static int
byteupk(mp, cp, num, siz, lnc)
	struct mesg *mp;	/* the message */
	char *cp;			/* base of data */
	int num;			/* num of chunks */
	int siz;			/* size of chunk */
	int lnc;			/* lead to next chunk */
{
	struct frag *fp;			/* working frag */
	int togo;					/* bytes left in chunk */
	int r;						/* bytes (data) left in frag */

	if (mp->m_cfrag == mp->m_frag)
		return 1;

	if (siz == lnc) {		/* if contiguous, treat as single chunk */
		lnc = (siz *= num);
		num = 1;
	}
	lnc -= siz;		/* now bytes between chunks */

	while (num-- > 0) {		/* copy chunks until done */

		for (togo = siz; togo > 0; ) {
			fp = mp->m_cfrag;
			r = fp->fr_len - mp->m_cpos;

			if (togo <= r) {	/* frag contains rest of chunk */
				bcopy(fp->fr_dat + mp->m_cpos, cp, togo);
				mp->m_cpos += togo;
				cp += togo;
				togo = 0;

			} else {
				if (r > 0) {	/* frag contains part of chunk */
					bcopy(fp->fr_dat + mp->m_cpos, cp, r);
					mp->m_cpos += r;
					togo -= r;
					cp += r;

				} else {		/* no space, add new frag */
					mp->m_cpos = 0;
					if ((mp->m_cfrag = fp->fr_link) == mp->m_frag)
						return 1;
				}
			}
		}
		cp += lnc;
	}
	return 0;
}


/*	pkint()
*
*	Pack a 32-bit int into a message.
*	Returns 0 if ok, else 1 if malloc fails.
*/

int
pkint(mp, i)
	struct mesg *mp;		/* message to pack */
	int i;					/* int to pack */
{
	char buf[4];

	buf[0] = i >> 24;
	buf[1] = i >> 16;
	buf[2] = i >> 8;
	buf[3] = i;
	return bytepk(mp, buf, 4, 1, 1);
}


/*	upkint()
*
*	Unpack a signed 32-bit int from a message.
*	Returns 0 if ok, else 1 if end of message.
*/

int
upkint(mp, np)
	struct mesg *mp;		/* message to unpack */
	int *np;				/* int to unpack into */
{
	int cc;
	char buf[4];

	if (cc = byteupk(mp, buf, 4, 1, 1))
		return cc;
	*np = (0x80 & buf[0] ? ~0xffffffff : 0)
	+ ((0xff & buf[0]) << 24)
	+ ((0xff & buf[1]) << 16)
	+ ((0xff & buf[2]) << 8)
	+ (0xff & buf[3]);
	return 0;
}


/*	upkuint()
*
*	Unpack an unsigned 32-bit int from a message.
*	Returns 0 if ok, else 1 if end of message.
*/

int
upkuint(mp, np)
	struct mesg *mp;		/* message to unpack */
	int *np;				/* int to unpack into */
{
	int cc;
	char buf[4];

	if (cc = byteupk(mp, buf, 4, 1, 1))
		return cc;
	*np = ((0xff & buf[0]) << 24)
	+ ((0xff & buf[1]) << 16)
	+ ((0xff & buf[2]) << 8)
	+ (0xff & buf[3]);
	return 0;
}


/*	pkstr()
*
*	Pack a null-term string into a message.
*	Returns 0 if ok, else 1 if malloc fails.
*/

int
pkstr(mp, s)
	struct mesg *mp;		/* message to pack */
	char *s;				/* string to pack */
{
	int cc;
	int l = strlen(s) + 1;

	if (cc = pkint(mp, l))
		return cc;
	return bytepk(mp, s, l, 1, 1);
}


/*	upkstr()
*
*	Unpack a string from a message.  Result is null-terminated.
*	Any length greater than mlen is discarded from message..
*	Returns 0 if ok, else 1 if end of message.
*/

int
upkstr(mp, s, mlen)
	struct mesg *mp;		/* message to unpack */
	char *s;				/* space to unpack in */
	int mlen;				/* max bytes to unpack incl null */
{
	int cc;
	int l;

	if (cc = upkint(mp, &l))
		return cc;
	if (l < mlen)
		return byteupk(mp, s, l, 1, 1);
	if (cc = byteupk(mp, s, mlen, 1, 1))
		return cc;
	s[mlen - 1] = 0;
	while (mlen < l && !byteupk(mp, (char*)&cc, 1, 1, 1));
	return 0;
}


/*	upkstralloc()
*
*	Unpack a string from a message.  Result is null-terminated,
*	and in dynamic space..
*	Returns 0 if ok, else 1 if end of message.
*/

int
upkstralloc(mp, ss)
	struct mesg *mp;		/* message to unpack */
	char **ss;				/* return pointer */
{
	int cc;
	int l;

	if (cc = upkint(mp, &l))
		goto fail;
	*ss = TALLOC(l, char, "ustr");
	if (!(cc = byteupk(mp, *ss, l, 1, 1)))
		return cc;
	PVM_FREE(*ss);

fail:
	*ss = 0;
	return cc;
}


/*	forkexec()
*
*	Search directories in epaths for given file.
*	Clean up any files we opened, fork and exec the named process.
*	Leave std{out,err} open so the process can whine if it needs to.
*
*	Returns 0 if ok (and fills in tpp), else returns PvmNoFile or
*	PvmOutOfRes
*
*	N.B. must be able to use argv[-1].
*/

int
forkexec(flags, name, argv, tpp)
	int flags;				/* exec options */
	char *name;				/* filename */
	char **argv;			/* arg list (argv[-1] must be there) */
	struct task **tpp;		/* return task context */
{
	int tid;				/* task tid */
	int pid;				/* task pid */
	int pfd[2];				/* pipe back from task */
	struct task *tp;		/* new task context */
	char path[MAXPATHLEN];
	struct stat sb;
	char **ep;
	int i;

	if ((tid = tid_new()) < 0) {
		pvmlogerror("forkexec() out of tids?\n");
		return PvmOutOfRes;
	}
	tp = task_new(tid);

	/* search for file */

	for (ep = epaths; *ep; ep++) {
		(void)strcpy(path, *ep);
		(void)strcat(path, "/");
		(void)strncat(path, name, sizeof(path) - strlen(path) - 1);

		if (stat(path, &sb) == -1
				|| ((sb.st_mode & S_IFMT) != S_IFREG)
				|| !(sb.st_mode & S_IEXEC)) {
			if (debugmask & PDMTASK) {
				sprintf(pvmtxt, "forkexec() stat failed <%s>\n", path);
				pvmlogerror(pvmtxt);
			}
			continue;
		}
#ifdef	IMA_TITN
		if (socketpair(AF_UNIX, SOCK_STREAM, 0, pfd) == -1) {
			pvmlogperror("forkexec() socketpair");
			task_free(tp);
			return PvmOutOfRes;
		}
#else
		if (pipe(pfd) == -1) {
			pvmlogperror("forkexec() pipe");
			task_free(tp);
			return PvmOutOfRes;
		}
#endif
		if (!(pid = fork())) {
			char c[32];

	/* close any random fds */

			dup2(pfd[1], 1);
			dup2(1, 2);
			for (i = getdtablesize(); --i > 2; )
				(void)close(i);
	/*
	* put expected pid in environment for libpvm in case
	* the process we exec forks before connecting back to the pvmd
	*/
			sprintf(c, "PVMEPID=%d", getpid());
			pvmputenv(c);
			if (flags & PvmTaskDebug) {
				argv[0] = path;
				argv--;
	/* XXX kinda sick to compile in this filename */
				argv[0] = "debugger";
				execv("pvm3/lib/debugger", argv);

			} else {
				execv(path, argv);
			}
			exit(1);
		}
		if (pid == -1) {
			pvmlogperror("forkexec() fork");
			(void)close(pfd[0]);
			(void)close(pfd[1]);
			task_free(tp);
			return PvmOutOfRes;
		}
		(void)close(pfd[1]);

		task_setpid(tp, pid);
		tp->t_out = pfd[0];

		tp->t_a_out = STRALLOC(name);
		wrk_fds_add(tp->t_out, 1);

		*tpp = tp;
		if (debugmask & PDMTASK) {
			sprintf(pvmtxt, "forkexec() new task t%x pid %d pfd=%d\n",
					tp->t_tid, pid, pfd[0]);
			pvmlogerror(pvmtxt);
		}
		return 0;
	}
	if (debugmask & PDMTASK) {
		sprintf(pvmtxt, "forkexec() didn't find <%s>\n", name);
		pvmlogerror(pvmtxt);
	}
	task_free(tp);
	return PvmNoFile;
}


/*	beprime()
*
*	Pvmd[master] becomes pvmd'[master].
*	Set runstate, make ppnetsock the real netsock, close loclsock.
*/

beprime()
{
	struct htab *htp;
	struct task *tp;

	runstate = PVMDPRIME;

	if ((myunixpid = getpid()) == -1) {
		pvmlogerror("dm_addhost() can't getpid()\n");
		pvmbailout(0);
	}

	myhostpart = 0;
	mytid = TIDPVMD;

	htp = ht_new(hosts->ht_local);
	htp->ht_master = hosts->ht_local;
	htp->ht_local = 0;
	ht_insert(htp, hosts->ht_hosts[hosts->ht_local]);
	ht_insert(htp, hosts->ht_hosts[0]);
	htp->ht_hosts[htp->ht_master]->hd_txseq
			= htp->ht_hosts[0]->hd_rxseq;
	htp->ht_hosts[htp->ht_master]->hd_rxseq
			= htp->ht_hosts[0]->hd_txseq;

	oldhosts = hosts;
	hosts = htp;

	(void)close(loclsock);
	loclsock = -1;
	(void)close(netsock);
	netsock = ppnetsock;
	ppnetsock = -1;

	while ((tp = locltasks->t_link) != locltasks)
		task_free(tp);

	FD_ZERO(&wrk_rfds);
	FD_ZERO(&wrk_wfds);
	FD_ZERO(&wrk_efds);
	wrk_nfds = 0;

	wrk_fds_add(netsock, 1);

	return 0;
}


/*	pkt_to_host()
*
*	Add pkt to send queue for a host.  If data plus header length is
*	greater than host mtu, refragment into >1 pkts.
*/

pkt_to_host(hp, pp)
	struct hostd *hp;
	struct pkt *pp;
{
	int mmtu = hp->hd_mtu < ourudpmtu ? hp->hd_mtu : ourudpmtu;

	if (pp->pk_len + DDFRAGHDR <= mmtu) {
		LISTPUTBEFORE(hp->hd_txq, pp, pk_link, pk_rlink);

	} else {
		struct pkt *pp2;
		int lim = mmtu - DDFRAGHDR;
		char *cp = pp->pk_dat;
		int togo;
		int n;
		int ff = pp->pk_flag & FFSOM;
		int fe = pp->pk_flag & FFEOM;

		for (togo = pp->pk_len; togo > 0; togo -= n) {
			n = min(togo, lim);
			pp2 = pk_new(0);
			pp2->pk_src = pp->pk_src;
			pp2->pk_dst = pp->pk_dst;
			if (n == togo)
				ff |= fe;
			pp2->pk_flag = ff;
			ff = 0;
			pp2->pk_buf = pp->pk_buf;
			pp2->pk_max = pp->pk_max;
			pp2->pk_dat = cp;
			pp2->pk_len = n;
			da_ref(pp->pk_buf);
			cp += n;
			LISTPUTBEFORE(hp->hd_txq, pp2, pk_link, pk_rlink);
		}
		pk_free(pp);
	}
	return 0;
}


