/* Defaults.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 *
 * $Id: Defaults.c,v 1.7 90/10/06 00:11:37 lee Rel1-10 $
 *
 * $Log:	Defaults.c,v $
 * Revision 1.7  90/10/06  00:11:37  lee
 * Prepared for first beta release.
 * 
 * Revision 1.6  90/08/29  21:46:25  lee
 * Alpha release.
 * 
 * Revision 1.5  90/08/09  19:16:08  lee
 * *** empty log message ***
 * 
 * Revision 1.4  90/04/21  17:26:26  lee
 * now passes gcc -W (before Canada...)
 * 
 * Revision 1.3  90/03/23  17:58:57  lee
 * Integrated with globals.h and added a few more comments.
 * Also fixed a bug whereby the configuration file over-rode both
 * command-line options and environment variables!
 * 
 * Revision 1.2  90/03/20  20:52:38  lee
 * removed some globals...
 * 
 *
 */

#define DefineThem /* turn externs off so we do initialisations here */
# include "globals.h" /* defines and declarations for database filenames */
#undef DefineThem
#undef EXTERN
#include <fcntl.h>
#include <errno.h>
#ifdef SYSV
extern int _filbuf(); /* this must appear before stdio.h is included... */
#endif
#include <stdio.h>
#include <malloc.h>
#include <ctype.h>
#include "emalloc.h"
#include <sys/types.h>
#include "fileinfo.h"
#include "wordinfo.h"
#include "phrase.h"

/* $Id: Defaults.c,v 1.7 90/10/06 00:11:37 lee Rel1-10 $
 *
 * This file is part of nx-text, Liam Quin's text retrieval package.
 *
 * Defaults.c -- set up filenames etc. from defaults + cmd line + env.
 *
 * -DUNDERHOME is used here, as is DEFAULTCOMMONWORDS, etc. from Makefile.
 * See comments in Makefile.
 *
 * $Log:	Defaults.c,v $
 * Revision 1.7  90/10/06  00:11:37  lee
 * Prepared for first beta release.
 * 
 * Revision 1.6  90/08/29  21:46:25  lee
 * Alpha release.
 * 
 * Revision 1.5  90/08/09  19:16:08  lee
 * *** empty log message ***
 * 
 * Revision 1.4  90/04/21  17:26:26  lee
 * now passes gcc -W (before Canada...)
 * 
 * Revision 1.3  90/03/23  17:58:57  lee
 * Integrated with globals.h and added a few more comments.
 * Also fixed a bug whereby the configuration file over-rode both
 * command-line options and environment variables!
 * 
 * Revision 1.2  90/03/20  20:52:38  lee
 * removed some globals...
 * 
 *
 */

/* System and Library calls used in this function:
 *
 */
extern int open(), close();
extern void exit();

extern int atoi(), strcmp(), strlen();
#ifndef tolower
 extern int tolower();
#endif
extern char *strcpy();
extern void perror();
extern int ReadCommonWords(), IsDir();
int cknatstr();
static int NextChar();
static void ReadDefaultFile();


typedef enum {
    FW_Cmdline,
    FW_Envvar,
    FW_Default, /* use the default */
    FW_File, /* from the config file */
    FW_None /* don't use any at all */
} t_FromWhere;

static t_FromWhere DirFromWhere = FW_Default;
static t_FromWhere CommonFromWhere = FW_Default;
static t_FromWhere DocFromWhere = FW_Default;

extern int MakeDocPath(); /* hand it DOCPATH... */
extern int AsciiTrace;

char *mkdbm();
char *joinstr3();

/* should PCM_HalfCase be in globals.h??? */
t_PhraseCaseMatch PhraseMatchLevel = PCM_HalfCase;

void
SetDefaults(argc, argv)
    int argc;
    char **argv;
{
    extern char *getenv();
    extern char *progname;
    char *p;

    /* main() should have set progname.  If it didn't. we don't strip
     * the leading / as this is (I hope!) a testing and not a production
     * version... and an early test at that!
     */
    if (!progname || !*progname) progname = argv[0];

    /* loop over arguments, looking for
     * -d -- set directory for database
     * -c -- common words file
     *
     * don't use getopts, as we'll be using that later in main(),
     * and it doesn't like being called twice.
     * As a result, main() should ignore the z: option.
     */
    while (--argc > 0) {
	if (**++argv == '-' || **argv == '+') {
	    char TurnOn = (**argv == '-');

	    switch((*argv)[1]) {
	    case 'm': /* precise matching */
		argv[0][1] = 'z'; /* so it gets ignored by getopt */

		if (!*(p = &argv[0][2])) {
		    if (argc > 1) {
			argc--; argv++;
			p = (*argv);
		    } else {
			fprintf(stderr,
			"%s: -m must be followed by a, h or p; see -x\n",
								progname);
			exit(1);
		    }
		}
		if (p[1]) {
		    fprintf(stderr,
		    "%s: -m must be followed by a, h or p, not \"%s\"\n",
								progname, p);
		}

		switch (*p) {
		case 'p': /* precise */
		    PhraseMatchLevel = PCM_SameCase;
		    break;
		case 'h': /* heuristic */
		    PhraseMatchLevel = PCM_HalfCase;
		    break;
		case 'a': /* any, approxmate */
		    PhraseMatchLevel = PCM_AnyCase;
		    break;
		default:
		    fprintf(stderr,
		    "%s: -m must be followed by \"p\", \"h\" or \"a\";\n",
								    progname);
		    fprintf(stderr,
		    "use %s -xv for more explanation.\n", progname);
		    exit(1);
		}
		break;

	    case 'v': /* -v is the same as -t1 */
		argv[0][1] = 'Z'; /* so it gets ignored by getopt */
		++AsciiTrace;
		break;
	    case 't': /* trace level */
		argv[0][1] = 'z'; /* so it gets ignored by getopt */
		if (argv[0][2] != '\0') {
		    p = &argv[0][2];
		} else {
		    if (argc > 1) {
			argc--;
			p = (*++argv);
		    } else {
			p = "1";
		    }
		}
		if (cknatstr(p)) {
		    AsciiTrace = atoi(p);
		} else {
		    fprintf(stderr, "%s: -t: \"%s\" is not a number\n",
								    progname, p);
		    exit(1);
		}
		if (AsciiTrace <= 0) AsciiTrace = 1;
		fprintf(stderr, "%s: trace level set to %d\n",
							    progname, AsciiTrace);

		break;
	    case 'c': /* common file */
		if (TurnOn) {
		    CommonFromWhere = FW_Cmdline;
		    argv[0][1] = 'z'; /* so it gets ignored by getopt */
		    if ((*argv)[2] != '\0') {
			CommonWordFile = &(*argv[2]);
		    } else {
			if (argc > 1) {
			    CommonWordFile = argv[1];
			    argc--; argv++;
			} else {
			    fprintf(stderr,
				"%s: -c option must be followed by a filename",
									progname);
			    exit(1);
			}
		    }
		} else { /* Turn off, +c, may be undocumented right now */
		    CommonFromWhere = FW_None;
		    break;
		}
		break;
	    case 'd':
		argv[0][1] = 'z'; /* so it gets ignored by getopt */
		DirFromWhere = FW_Cmdline;
		if (argv[0][2] != '\0') {
		    DatabaseDir = &argv[0][2];
		} else {
		    if (argc > 1) {
			DatabaseDir = argv[1];
			argc--; argv++;
		    } else {
			/* @error */
			fprintf(stderr,
				"%s: %cd must be followed by a directory name",
						progname, TurnOn ? '-' : '+');
			exit(1);
		    }
		}
		break;
	    } /* end switch */
	} else {
	    /* not an option, so stop looking */
	    break;
	}
    } /* end while */

    /* now we have parsed the command line arguments, so look for the
     * default directory
     */
    if (DirFromWhere == FW_Default) {
	char *t;

	if ((t = getenv("LQTEXTDIR")) != (char *) 0) {
	    DatabaseDir = emalloc(strlen(t) + 1);
	    (void) strcpy(DatabaseDir, t);
	    DirFromWhere = FW_Envvar;
	} else {
#ifdef UNDERHOME
	    char *home = getenv("HOME");

	    if (home) {
		DatabaseDir = joinstr3(home, "/", UNDERHOME);
		if (!IsDir(DatabaseDir)) {
		    fprintf(stderr,
			    "%s: database directory \"%s\" inaccessible.\n",
			    progname, DatabaseDir);
		    exit(1);
		}
	    } else {
		fprintf(stderr, "%s: can't find your login directory ($HOME)\n",
					progname);
		exit(1);
	    }
#endif /* UNDERHOME*/
	    /* in either case it's the default... */
	    DirFromWhere = FW_Default;
	}
    }

    if (!DatabaseDir || !*DatabaseDir) {
	/* This can happen if there is no default, or if the user types
	 * lqword -d ""
	 * just to be malicious :-)
	 */
	fprintf(stderr,
	"%s: You must give a database directory with -d or $LQTEXTDIR\n",
			progname);
	fprintf(stderr, "        use %s -xv for more details.\n", progname);
	exit(1);
    }

    /* IsDir is in DocPath.c -- perhaps this should be, too. */
    if (!IsDir(DatabaseDir)) {
	char *msg = (char *) 0;

	switch (DirFromWhere) {
	case FW_Cmdline:
	    msg = " (specified with the -d option)";
	    break;
	case FW_Envvar:
	    msg = " (from $LQTEXTDIR)";
	    break;
	}
	fprintf(stderr, "%s: \"%s\"%s is not a directory.\n",
					progname, DatabaseDir, msg ? msg : " ");
	exit(1);
    }

    /* set default filenames */
#define IfNot(x, y) ((x) ? (x) : (y))

    FileIndex = mkdbm(DatabaseDir, IfNot(FileIndex, FILEINDEX));
    WordIndex = mkdbm(DatabaseDir, IfNot(WordIndex, WORDINDEX));

    DataBase = joinstr3(DatabaseDir, "/", IfNot(DataBase, DATABASE));
    FidFile = joinstr3(DatabaseDir, "/", IfNot(FidFile, FIDFILE));
    WidFile = joinstr3(DatabaseDir, "/", IfNot(WidFile, WIDFILE));
    WidIndexFile =
	    joinstr3(DatabaseDir, "/", IfNot(WidIndexFile, WIDINDEXFILE));

    ReadDefaultFile();

    if (AsciiTrace) {
	fprintf(stderr, "%s: lqtext directory \"%s\"\n",progname,DatabaseDir);
    }

    if (CommonFromWhere == FW_Default) {
	char *c = getenv("LQCOMMON");

	if (c) {
	    CommonWordFile = emalloc(strlen(c) + 1);
	    (void) strcpy(CommonWordFile, c);
	    CommonFromWhere = FW_Envvar;
	}
    }

    if (CommonFromWhere != FW_None && CommonWordFile && *CommonWordFile) {
	extern int errno;
	int c;

	if (*CommonWordFile != '/') {
	    CommonWordFile = joinstr3(DatabaseDir, "/", CommonWordFile);
	}

	if ((c = open(CommonWordFile, O_RDONLY, 0)) < 0) {
	    if (CommonFromWhere != FW_Default) {
		int e = errno;
		char *msg = " ";

		switch (CommonFromWhere) {
		case FW_Cmdline:
		    msg = " (from the -c option)";
		    break;
		case FW_Envvar:
		    msg = " (from $COMMONWORDS)";
		    break;
		}

		fprintf(stderr,"%s: can't read common-word file%s ",progname,msg);
		errno = e;
		if (errno) {
		    perror(CommonWordFile);
		} else {
		    fprintf(stderr, "\"%s\"\n", CommonWordFile);
		}
		exit(1);
	    }
	    CommonWordFile = (char *) 0;
	} else {
	    (void) close(c); /* it's OK */
	}
    }

    if ((p = getenv("DOCPATH")) != (char *) 0) {
	switch (DocFromWhere) {
	case FW_File:
	    if (AsciiTrace > 1) {
		fprintf(stderr, "%s: DOCPATH (%s) overrides %s (%s)\n",
#ifdef CONFIGFILE
				progname, p, CONFIGFILE, DocPath
#else
				progname, p, "README", DocPath
#endif
		);
	    }
	    efree(DocPath);
	    /* FALL THROUGH */
	case FW_Default:
	default: /* ? */
	    DocPath = emalloc((unsigned) (strlen(p) + 1));
	    (void) strcpy(DocPath, p);
	    DocFromWhere = FW_Envvar;
	    break;
	}
    }

    if (!DocPath || !*DocPath) {
	DocPath = ".";
    }

#define SetOrNot(s) ( (s && *s) ? s : (s ? "[empty]" : "[null]" ) )

    /* this is always here -- it's only checked once, and is actually
     * rather useful.
     */
    if (AsciiTrace > 2) {
	fprintf(stderr, "%s: CommonWordFile = \"%s\"\n", progname,
		SetOrNot(CommonWordFile));
	fprintf(stderr, "%s: DatabaseDir = \"%s\"\n", progname,
		SetOrNot(DatabaseDir));
	fprintf(stderr, "%s: DocPath  = \"%s\"\n", progname,
		SetOrNot(DocPath));
	fprintf(stderr, "%s: FileIndex = \"%s\"\n", progname,
		SetOrNot(FileIndex));
	fprintf(stderr, "%s: WordIndex = \"%s\"\n", progname,
		SetOrNot(WordIndex));
	fprintf(stderr, "%s: DataBase = \"%s\"\n", progname,
		SetOrNot(DataBase));
	fprintf(stderr, "%s: FidFile = \"%s\"\n", progname,
		SetOrNot(FidFile));
	fprintf(stderr, "%s: WidFile = \"%s\"\n", progname,
		SetOrNot(WidFile));
	fprintf(stderr, "%s: WidIndexFile = \"%s\"\n", progname,
		SetOrNot(WidIndexFile));
    }

    (void) MakeDocPath(DocPath);
    /* DocPath is no longer needed, so getenv() can be called again now */

    if (CommonWordFile && *CommonWordFile) {
	(void) ReadCommonWords(CommonWordFile);
    }
}

void
DefaultUsage()
{
    fprintf(stderr, "\
	-c file	-- ignore words that are listed in the namd file\n\
	-d dir	-- use the lq-text database in the named directory\n\
	-m c	-- set matching criteria -- c is \"p\", \"h\" or \"a\"\n");
    if (AsciiTrace) {
	fprintf(stderr, "\
		   -m p  uses precise matching, where CaSe is significant;\n\
		   -m h  uses heuristic matching, which is the default, and\n\
		   -m a  uses approximate matching.\n");
    }

    fprintf(stderr, "\n\
	-t N	-- set trace level t N (default is zero)\n\
	-x	-- print %s explanation\n\
	-xv	-- print %s explanation\n\
	-V	-- print version information\n\
	-v	-- be verbose (same as -t 1)\n",
			AsciiTrace ? "a shorter" : "this",
			AsciiTrace ? "this" : "a longer");
    if (AsciiTrace) {
	fprintf(stderr, "\
The current database directory is \"%s\";\n\
%s will search the path \"%s\" for documents.\n", DatabaseDir, progname, DocPath);
    }
}

/* This should be in smalldb.c I think */
char *
mkdbm(root, prefix)
    char *root; /* /tmp/lqtext */
    char *prefix; /* wordlist, --> /tmp/lqtext.{dir,pag} for dbm */
{
#if DBMCREAT == 0
    extern int errno;
#endif
    /* Although ndbm will create files automatically, gdbm and dbm will
     * not, so we do that here.
     * Also, it might take a while to get to here, so it will be a lot
     * better if we get an error message now.
     */
    char *p = joinstr3(root, "/", prefix);

#if DBMCREAT == 0
    q = joinstr3(p, ".", "dir");
    errno = 0; /* paranoia */

    if ((i = open(q)) < 0 && errno == ENOENT) {
	i = open(q, O_CREAT|O_RDWR, 0666); /* rw-rw-rw & umask */

	if (i < 0) {
	    fprintf(stderr, "%s: can't create \"%s\"\n", progname, q);
	    (void) exit(1);
	}

	(void) close(i);
    }
    (void) strcpy(&q[strlen(q) - 3], "pag");

    if ((i = open(q)) < 0 && errno == ENOENT) {
	i = open(q, O_CREAT|O_RDWR, 0666); /* rw-rw-rw & umask */

	if (i < 0) {
	    fprintf(stderr, "%s: can't create \"%s\"\n", progname, q);
	    (void) exit(1);
	}

	(void) close(i);
    }

    (void) efree(q);

#endif /*DBMCREAT*/

    return p; /* the prefix for dbm, not the whole path */
}

/* this belongs in string.c or something */
char *
joinstr3(a, b, c)
    char *a, *b, *c;
{
    char *p;
    int i = strlen(a), j = (b[0] != '\0' && b[1] == '\0') ? 1 : strlen(b);

    p = emalloc(i + j + strlen(c) + 1);
    /* ASSERT: p != 0 */
    (void) strcpy(p, a);
    (void) strcpy(&p[i], b);
    (void) strcpy(&p[i + j], c);

    return p;
}

#define LCNOMAP 0 /* Token -- leave case alone */
#define LCMAP 1  /* map to lower case */

static int RMLine = 0;

static void
ReadDefaultFile()
{
    extern int errno;

    static char *NextToken(); /* see below */
#ifdef CONFIGFILE
    char *ReadMe = joinstr3(DatabaseDir, "/", CONFIGFILE);
#else
    char *ReadMe = joinstr3(DatabaseDir, "/", "README");
#endif
    FILE *fp;
    char *Token;


    /* This is paranoid... */
    if (!ReadMe || !*ReadMe) {
	fprintf(stderr, "%s: Internal: %s: %d: ReadMe %s\n",
		progname, __FILE__, __LINE__, SetOrNot(ReadMe));
	exit(1);
    }

    errno = 0;
    if ((fp = fopen(ReadMe, "r")) == (FILE *) 0) {
	if (errno == EPERM) {
	    fprintf(stderr,
	    "%s: Warning: you don't have permission to read \"%s\"\n",
							progname, ReadMe);
	} else if (AsciiTrace) {
	    int e = errno;

	    fprintf(stderr, "%s: warning: can't open config file ", progname);
	    errno = e;
	    perror(ReadMe);
	}
	return;
    }

    /* Read README up to an "end" line, ignoring lines starting with # */

    while ((Token = NextToken(fp, ReadMe, LCMAP)) != (char *) 0) {
	if (STREQ(Token, "end")) goto finish;
	if (STREQ(Token, "common")) {
	    if (!(Token = NextToken(fp, ReadMe, LCNOMAP))) {
		fprintf(stderr, "%s: %s %d: unexpected eof at common file\n",
				progname, ReadMe, RMLine);
		exit(1);
	    } else if (CommonFromWhere == FW_Default) {
		    CommonWordFile = emalloc((unsigned) (strlen(Token) + 1));
		    (void) strcpy(CommonWordFile, Token);
		    CommonFromWhere = FW_File;
	    }
	} else if (STREQ(Token, "path") || STREQ(Token, "docpath")) {
	    if (!(Token = NextToken(fp, ReadMe, LCNOMAP))) {
		fprintf(stderr, "%s: %s: %d: unexpected eof at common file\n",
				progname, ReadMe, RMLine);
		exit(1);
	    } else {
		DocPath = emalloc((unsigned) (strlen(Token) + 1));
		(void) strcpy(DocPath, Token);
		DocFromWhere = FW_File;
	    }
	} else {
	    fprintf(stderr, "%s: \"%s\": %d: token(\"%s\") unexpected\n",
				progname, ReadMe, RMLine, Token);
	    exit(1);
	}
    } /* while */

finish:
    (void) fclose(fp);
    return;
}

static char *
NextToken(fd, Name, Map)
    FILE *fd;
    char *Name;
    int Map;
{
    int ch;
    static char buf[50];
    register char *q = buf;
    int InQuote = 0;
    int OriginalMap = Map;

    while ((ch = NextChar(fd, Name, Map)) != EOF) {
	switch (ch) {
	case '"': case '\'':
	    if (q == buf && !InQuote) InQuote = ch;
	    else if (ch == InQuote) {
		*q = '\0';
		if (AsciiTrace > 10) {
		    fprintf(stderr, "RM[%s] ", buf);
		}
		return buf;
	    }
	    Map = 0; /* no case conversion inside strings */
	    break;
	case '\\':
	    if ((ch = NextChar(fd, Name, Map)) == EOF) {
		fprintf(stderr, "%s: %s; %d: EOF after \\ unexpected!\n",
							progname, Name, RMLine);
		exit(1);
	    }
	    *q++ = ch;
	    break;
	case ' ':
	case '\n':
	    if (InQuote) {
		fprintf(stderr, "%s: %s: %d: missing quote -->%s<--\n",
						progname, Name, RMLine, InQuote);
		exit(1);
	    }
	    *q = '\0';
	    if (q > buf) return buf;
	    else return NextToken(fd, Name, OriginalMap);
	    /*NOTREACHED*/
	    break;
	default:
	    *q++ = ch;
	}
    }
    if (q > buf) {
	fprintf(stderr, "%s: %s: %d: unexpected end of file\n",
							progname, Name, RMLine);
	exit(1);
    }
    return (char *) 0;
}

static int
NextChar(fd, Name, Map)
    FILE *fd;
    char *Name;
    int Map;
{
    int ch;

    while ((ch = getc(fd)) != EOF) {
	switch (ch) {
	case '#':
	    do {
		if ((ch = getc(fd)) == EOF) {
		    fprintf(stderr, "%s: %s: %d: unexpected end of file\n",
				progname, Name, RMLine);
		    exit(1);
		}
	    } while (ch != '\n');
	    /* ASSERT: ch == '\n' */
	    ++RMLine;
	    break;
	case '\n':
	    ++RMLine; 
	    if (!Map) return ch;
	    /* else FALL THROUGH */
	case ' ': case '\t': case '\f': case '\r':
	    if (!Map) {
		return ' ';
	    }

	default:
	    return (Map && isupper(ch)) ? tolower(ch) : ch;
	}
    } /* while */
    return EOF;
}

int
cknatstr(str)
    char *str;
{
    /* check that a string represents a positive or 0 number */
    register char *p = str;

    /* skip leading white space */
    while (isspace(*p)) p++;
    if (!*p) return 0;

    /* allow a leading sign */
    if (*p == '-' || *p == '+') p++;
    if (!*p) return 0;

    /* now skip digits... */
    while (isdigit(*p)) p++;

    return (p > str && *p == '\0');
}

/* you can tell I am tired by the extra end-while etc. comments.
 * wonder if it will work?
 * perhaps if I took my socks off too.
 *
 * Hmm, yeah, that worked.
 */
