/* MailFilter.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* $Id: MailFilter.c,v 1.5 90/10/06 00:57:24 lee Rel1-10 $
 */

/* Filter for mail articles.
 * Throw away all of the header except
 *	Subject
 *	From
 *	Date
 *	Cc:
 *	Organi[sz]ation
 *	To:
 *
 * See FilterMain and wordrules.h for more info.
 *
 */

#ifdef SYSV
 extern int _filbuf(), _flsbuf();
#endif
#include <stdio.h>
#include <malloc.h>
#include <ctype.h>
#include "wordrules.h"

#include "emalloc.h"

#define STREQ(boy, girl) ((*(boy) == *(girl)) && !strcmp(boy, girl))

extern char *progname;

/** Unix system calls used in this file **/
    /* (none) */
/** Unix Library Functions used in this file: **/
#ifndef tolower
 extern int tolower();
#endif
extern int strcmp();

/** Functions within this file used before they're defined: **/
void Header(), Body();
int GetChar();

/** **/

void Filter();

char *KeepThese[] = { /* keep this list in lower case, sorted! */
    "cc",
    "date",
    "from",
    "organisation",
    "organization",
    "subject",
    "to",
    0
};

int icstreq(s1, s2) /* case insensitive strcmp */
    char *s1, *s2;
{
    register char ch1, ch2;

    while (*s1 && *s2) {
	if (*s1 != *s2) {
	    if (isupper(*s1)) {
		ch1 = tolower(*s1);
		ch2 = (*s2);
	    } else  if (isupper(*s2)) {
		/* Note that we only have to test one character for case! */
		ch1 = (*s1);
		ch2 = tolower(*s2);
	    } else {
		return 0; /* not the same */
	    }
	    if (ch1 != ch2) return 0; /* the strings differ */
	}
	s1++; s2++;
    }
    if (!*s1 && !*s2) {
	return 1;
    }
    return 0; /* they are different */
}

int
IsWanted(String)
    char *String;
{
    char **pp;
    int ch = String[0];

    if (isupper(ch)) ch = tolower(ch);

    for (pp = KeepThese; *pp && **pp; pp++) {
	if (**pp > ch) break; /* gone too far */
	else if (icstreq(String, *pp)) return 1;
    }
    return 0;
}

void
Filter(InputFile, Name)
    FILE *InputFile;
    char *Name;
{
    Header(InputFile, Name);
    Body(InputFile, Name);
}

typedef enum {
    F_NotSeenAnythingYet,
    F_InTheFirstWord,
    F_AfterTheFirstWord
} t_FirstWord;

int InWord = 0;

/* For a mail article, the Header ends at the first line which is not
 * a valid mail header -- i.e., is not indented and doesn't start with
 * a capitalised word followed by a single space (uucp) or colon (RFC822).
 * A blank line also ends the header.
 */
void
Header(InputFile, Name)
    FILE *InputFile;
    char *Name;
{
    int AtStartOfLine = 1;
    int IgnoreLine = 0; /* initialised for lint... */
    t_FirstWord FirstWord = F_NotSeenAnythingYet;
    int ch;
    static int BufLen;
    static char *Buffer = 0;
    int AtStartOfWord;
    register char *q;

    if (Buffer == 0) {
	BufLen = 24;
	Buffer = emalloc(BufLen);
    }

    q = Buffer;
    InWord = 0;

    while ((ch = GetChar(InputFile)) != EOF) {
	if (ch == '\n') {
	    if (AtStartOfLine) { /* a blank line */
		putchar('\n');
		return;
	    }
	}

	InWord = InWord ? WithinWord(ch) : StartsWord(ch);

	switch (FirstWord) {
	case F_NotSeenAnythingYet:
	    if (InWord) {
		FirstWord = F_InTheFirstWord;
		if (q - Buffer >= BufLen - 1) {
		    int where = q - Buffer;

		    BufLen += 24;
		    Buffer = erealloc(Buffer, BufLen);
		    q = &Buffer[where];
		}
		*q++ = ch;
	    } else {
		if (AtStartOfLine && ch != ' ' && ch != '\t') {
		    putchar(ch);
		    return;
		}
		putchar(' ');
	    }
	    break;
	case F_InTheFirstWord:
	    if (InWord) {
		if (q - Buffer >= BufLen - 1) {
		    int where = q - Buffer;

		    BufLen += 24;
		    Buffer = erealloc(Buffer, BufLen);
		    q = &Buffer[where];
		}
		*q++ = ch;
		break;
	    } else { /* reached the end of the first word on the line */
		*q = '\0';
		/* See if it's a keyword */
		if ((IgnoreLine = !IsWanted(Buffer)) != 0) {
		    /* Turn the word into one that won't get indexed,
		     * so that word counmts are unaffected:
		     * We use qxxxxxxx (any number of x's) for this.
		     */
		    for (q = Buffer; *q; q++) {
			putchar((q == Buffer) ? 'q' : 'x');
		    }
		    putchar (ch == '\n' ? '\n' : ' ');
		} else {
		    printf("%s%c", Buffer, ch == '\n' ? ch : ' ');
		}
		FirstWord = F_AfterTheFirstWord;
	    }
	    break;
	default:
	    if ((AtStartOfLine = (ch == '\n'))) {
		IgnoreLine = 0;
		q = Buffer;
		FirstWord = F_NotSeenAnythingYet;
		AtStartOfWord = 1;
	    }
	    if (InWord && !IgnoreLine) {
		putchar(ch);
	    } else {
		if (AtStartOfWord && InWord) {
		    putchar('q');
		    AtStartOfWord = 0;
		} else if (InWord) {
		    putchar('x');
		} else if (isspace(ch)) {
		    putchar(ch);
		} else {
		    putchar(' ');
		}
	    }
	    if (!InWord) AtStartOfWord = 1;
	}
	if ((AtStartOfLine = (ch == '\n'))) {
	    IgnoreLine = 0;
	    q = Buffer;
	    FirstWord = F_NotSeenAnythingYet;
	    AtStartOfWord = 1;
	}
    }
    if (ch == EOF) {
	fprintf(stderr, "%s: warning: Mail folder %s has no message body\n",
			progname, Name);
    }
}

void
Body(InputFile, Name)
    FILE *InputFile;
    char *Name;
{
    int ch;

    while ((ch = GetChar(InputFile)) != EOF) {
	if (InWord = InWord ? WithinWord(ch) : StartsWord(ch)) {
	    putchar(ch);
	} else {
	    putchar((ch == '\n') ? '\n' : ' ');
	}
    }
}

#ifdef __GNU__
inline
#endif
int
GetChar(fd)
    FILE *fd;
{
    static int LastChar = 0;

    if (LastChar) {
	int ch = LastChar;
	LastChar = 0;
	return ch;
    }

    /* Only return a single quote if it is surrounded by letters */
    if ((LastChar = getc(fd)) == '\'') {
	LastChar = getc(fd);
	if (InWord && isalpha(LastChar)) return '\'';
	else return ' ';
    } else {
	int ch = LastChar;
	LastChar = 0;
	return ch;
    }
}

/*
 * $Log:	MailFilter.c,v $
 * Revision 1.5  90/10/06  00:57:24  lee
 * Prepared for first beta release.
 * 
 * Revision 1.4  90/09/20  16:35:40  lee
 * Fixed icstrcmp() and IsWanted() so that the unwanted parts of headers
 * get deleted again.... (oops!)
 * 
 * Revision 1.3  90/09/19  21:11:54  lee
 * Improved end-of-header detection.
 * Now supports turning unindexed stuff into qxxxxx-words.
 * 
 * Revision 1.2  90/08/29  21:55:57  lee
 * Now handles mh mail better.
 * 
 * Revision 1.1  90/08/09  19:17:56  lee
 * Initial revision
 * 
 * Revision 1.2  89/09/16  21:16:01  lee
 * First demonstratable version.
 * 
 * Revision 1.1  89/09/07  21:05:48  lee
 * Initial revision
 * 
 */
