/* numbers.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* Routines to read and write numbers in a compressed format, preserving
 * block boundaries.
 * The actual compression seems to be about 50%, as most numbers turn
 * out to fit in 16 bits.  Interestingly, there is room for another one
 * or two bits, I think, that could be used for something else, in the
 * main pblock index.  For example, it could mark whether words were
 * plural/-"ing"/"un"-/ with 2 bits.
 *
 * $Id: numbers.c,v 1.4 90/10/06 00:12:16 lee Rel1-10 $
 *
 * $Log:	numbers.c,v $
 * Revision 1.4  90/10/06  00:12:16  lee
 * Prepared for first beta release.
 * 
 * Revision 1.3  90/08/09  19:16:49  lee
 * BSD lint and fixes...
 * 
 * Revision 1.2  90/04/18  19:47:13  lee
 * More flexible (and slightly more compact) number format.
 * 
 * Revision 2.2  89/10/08  20:46:36  lee
 * Working version of nx-text engine.  Addfile and wordinfo work OK.
 * 
 * Revision 2.1  89/10/02  01:15:15  lee
 * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
 * 
 * Revision 1.2  89/09/16  21:16:26  lee
 * First demonstratable version.
 * 
 * Revision 1.1  89/09/07  21:06:01  lee
 * Initial revision
 * 
 *
 */

#include "globals.h"

#ifdef SYSV
	extern int _filbuf(), _flsbuf();
#endif
#include <stdio.h>
#include "numbers.h"

/* ReadNumber and WriteNumber take/return a long, using a compression
 * algorithm to reduce the amount of data taken.
 * The current algorithm is simply like internet addresses:
 * a 0 in the top bit followed by a 0 means it's one byte
 * a 0 followed by a 1 means it's 2 bytes
 * a 1 followed by a 0 means it's 3 bytes, and
 * a 1 followed by a 1 means it's 4 bytes.
 * A better alternative might simply use a 1 in the top bit, hence fitting
 * 7 bits into each bytes.  The advantages of considering more than
 * one number at a time and using compress-style LS packing are not clear.
 * In particular, speed of recovery is an issue too.
 *
 * The routines use (char *) pointers instead of files prefixes with an s.
 * see numbers.h for some related macros.
 *
 */


#ifdef TESTNUMBERS
char *progname;

int
main(ac, av)
    int ac;
    char *av[];
{
    extern long atol();
    FILE *f;
    extern FILE *fopen();

    progname = av[0];

    while (--ac) {
	unsigned long L = atol(*++av);
	unsigned long L2;

	f = fopen("/tmp/boy", "w");
	printf("Write %u\n", L);
	fWriteNumber(f, L);
	fclose(f);
	f = fopen("/tmp/boy", "r");
	L2 = fReadNumber(f);
	printf("Read %u\n", L2);
	if (L != L2) {
	    printf("**** ERROR **** %ld != %ld\n", L, L2);
	}
	fclose(f);
    }
    return 0;
}
#endif /*TESTNUMBERS*/

INLINE void
fWriteNumber(f, Number)
    FILE *f;
    unsigned long Number;
{
    /* Compressed numbers:
     * 7 bit numbers --> single byte;
     * 8...14 bits --> 2 bytes
     * 15...21 bits --> 3 bytes
     * 22..28 bits --> 4 bytes
     * 29..32 bits --> 5 bytes
     */
    while (Number > 0177) {
	putc((Number & 0177) | 0200, f);
	Number >>= 7;
    }
    putc(Number & 0177, f);
}

#define PutC(ch, S)  (*((*S)++) = (char) (ch))

INLINE void
sWriteNumber(s, Number)
    char **s;
    unsigned long Number;
{
    /* Compressed numbers:
     * 7 bit numbers --> single byte;
     * 8...14 bits --> 2 bytes
     * 15...21 bits --> 3 bytes
     * 22..28 bits --> 4 bytes
     * 29..32 bits --> 5 bytes
     */
    while (Number > 0177) {
	PutC((Number & 0177) | 0200, s);
	Number >>= 7;
    }
    PutC(Number & 0177, s);
}

INLINE unsigned long
fReadNumber(f)
    FILE *f;
{
    unsigned long Result = 0L;
    int ThereIsMore;
    int Shift = 0;

    /* Read a number, 7 bits at a time, lsb first, until there is
     * a byte without the top bit set -- that's the most significant
     * byte, and there is no more of this number.
     */
    do {
	Result |= ((ThereIsMore = getc(f)) & 0177) << Shift;
	ThereIsMore &= 0200;
	Shift += 7;
    } while (ThereIsMore);
    return Result;
}

#define GetC(S) \
    ( (unsigned int) * (unsigned char *) ((* (unsigned char **)S)++) )

INLINE unsigned long
sReadNumber(s)
    char **s;
{
    unsigned long Result = 0L;
    int ThereIsMore;
    int Shift = 0;

    /* Read a number, 7 bits at a time, lsb first, until there is
     * a byte without the top bit set -- that's the most significant
     * byte, and there is no more of this number.
     */
    do {
	Result |= ((ThereIsMore = GetC(s)) & 0177) << Shift;
	ThereIsMore &= 0200;
	Shift += 7;
    } while (ThereIsMore);
    return Result;
}
