 /*
  * Khoros: $Id$
  */

#if !defined(lint) && !defined(SABER)
static char rcsid[] = "Khoros: $Id$";
#endif

 /*
  * $Log$
  */

/*
 *   Copyright, 1991, The Regents of the University of California.
 *   This software was produced under a U.S. Government contract
 *   (W-7405-ENG-36) by the Los Alamos National Laboratory, which is
 *   operated by the University of California for the U.S. Department
 *   of Energy.  The U.S. Government is licensed to use, reproduce,
 *   and distribute this software.  Neither the Government nor the
 *   University makes any warranty, express or implied, or assumes
 *   any liability responsibility for the use of this software.
 */

#include<stdio.h>

#include "lanlmisc.h"

#define VALID_CLUSTER -1
#define VERY_INVALID_CLUSTER 100000

/*
 *  merge_alg_3 ()    Merging Algorithm 3:  Merge clusters together based
 *                    on the Mahalanobis distance measure.  At any given 
 *                    iteration, the two clusters for which the minimum
 *                    "merging criterion value" is found are merged.
 *                    A cluster definition consists of a mean vector, number 
 *                    of elements and upper triangular portion of the
 *                    covariance matrix.
 *
 *                    Returned value is always 1.
 *
 *  Written by:  Patrick M. Kelly
 *  Date:        8/6/91
 */

int merge_alg_3 ( dim, num_entries, final_num, means, count, cov_upper, 
	mapping )

int dim;			/* Problem dimension */
int *num_entries;	/* Number of entries in the table */
int final_num;		/* Desired number of final clusters */
float **means;		/* Mean vectors */
int *count;		/* Number of elements */
float ***cov_upper;	/* Upper triangular covariance matrix */
int **mapping;		/* Mapping for the cluster reduction */

{
	int i, j, k;			/* Loop control */
	int row, col;			/* Location of smalles distance */
	float **sq_dists;		/* Squared distances */
	float max;			/* Maximum starting distance */
	int num_deleted;		/* Number of clusters deleted */
	float mindist;			/* Minimum squared distance */
	int num_cur_entries;	/* Number of current entries in set of clusters */
	float ***cov_cfactors;	/* Cholesky factorization of cov. matrices */

	float **vecs_1;          /* Mid-calculation vectors */
	float **vals_1;          /* Mid-calculation values */
	float **vals_2;          /* Mid-calculation values */
	float k1, k2;			/* Mid-calculation values */
	float alpha_1;			/* Mid-calculation values */

	float *tmp_vec;		/* Temporary */
	float tmp_val;			/* Temporary */

	/*  FACTOR THE COVARIANCE MATRICES (CHOLESKY FACTORIZATION)  */
	/*  INITIALIZE ALL CLUSTERS TO VALID OR INVALID  */

	sq_dists = SM_get_matrix ( *num_entries );

	cov_cfactors = (float ***) malloc ( *num_entries * sizeof (float **) );
	if ( cov_cfactors == NULL ) {
		fprintf (stderr, "MEMORY ALLOCATION FAILURE\n");
		exit ( -1 );
	}

	for ( i = 0 ; i < *num_entries ; i ++ ) {
		cov_cfactors [i] = SM_get_matrix ( dim );
		SM_copy_matrix ( dim, cov_upper [i], cov_cfactors [i] );
		if ( ! SM_cfactor ( dim, cov_cfactors [i] ) ) {
			fprintf (stderr,"WARNING: matrix not pd %d\n",i);
			sq_dists [i][i] = VERY_INVALID_CLUSTER;
		} else {
			sq_dists [i][i] = VALID_CLUSTER;
		}
	}

	/*
	 *                          -1
	 *  vecs_1 [i]      =  SIGMA   mu
	 *                          i    i
	 *
	 *
	 *                       T      -1
	 *  vals_1 [i][j]   =  mu  SIGMA   mu
	 *                       i      j    i
	 *
	 *
	 *                       T      -1
	 *  vals_2 [i][j]   =  mu  SIGMA   mu
	 *                       i      j    j
	 */

	tmp_vec = (float *) malloc ( dim * sizeof(float) );
	if ( tmp_vec == NULL ) {
		fprintf (stderr, "MEMORY ALLOCATION FAILURE\n");
		exit ( -1 );
	}

	vecs_1 = contig_matrix ( *num_entries, dim );
	vals_1 = contig_matrix ( *num_entries, *num_entries );
	vals_2 = contig_matrix ( *num_entries, *num_entries );

	for ( j = 0 ; j < *num_entries ; j ++ ) {
	if ( sq_dists [j][j] == VALID_CLUSTER ) {

		for ( k = 0 ; k < dim ; k ++ ) vecs_1 [j][k] = means [j][k];
		SM_csolve ( dim, cov_cfactors [j], vecs_1 [j] );
		for ( i = 0 ; i < *num_entries ; i ++ ) {
			for ( k = 0 ; k < dim ; k ++ ) tmp_vec [k] = means [i][k];
			SM_csolve ( dim, cov_cfactors [j], tmp_vec );
			vals_1 [i][j] = dot_product ( means [i], tmp_vec, dim );
			vals_2 [i][j] = dot_product ( means [i], vecs_1 [j], dim );
		}

	}
	}

	/*  INITIALIZE TABLE OF MERGING CRITERION VALUES  */

	max = 0.0;

	for ( i = 0 ; i < *num_entries ; i ++ ) {
	for ( j = i+1 ; j < *num_entries ; j ++ ) {

		if ( (sq_dists [j][j] == VALID_CLUSTER) && 
			(sq_dists [i][i] == VALID_CLUSTER) ) {

			k1 = vals_1 [i][j] + vals_1 [j][j] - 2.0 * vals_2 [i][j];
			k2 = vals_1 [j][i] + vals_1 [i][i] - 2.0 * vals_2 [j][i];
			tmp_val = k1 * k2;
			if ( tmp_val < 0.0 ) {
				fprintf ( stderr, "ERROR:  sqrt domain error\n");
				fprintf ( stderr, "i = %d   j = %d\n",i,j);
				fprintf ( stderr, "k1 = %f   k2 = %f\n",k1,k2);
				exit (-1);
			}
			tmp_val = sqrt ( tmp_val );
			alpha_1 = ( - k2 + tmp_val ) / ( k1 - k2 );
			sq_dists [i][j] = alpha_1 * alpha_1 * k1;
			if ( sq_dists [i][j] > max ) max = sq_dists [i][j] ;

		}

	}
	}

	/*  REDUCE THE NUMBER OF CLUSTERS  */

	num_deleted = 0;

	while ( num_deleted < (*num_entries - final_num) ) {

		/*  FIND MINIMUM DISTANCE  */

		UT_find_min_pos_2 (sq_dists, *num_entries, &mindist, &row, &col, max);

		/*  MERGE CLUSTERS  */

		SM_comb_stats ( dim, count[row], count[col], &(count[row]),
			means[row], means[col], means[row], cov_upper[row], 
			cov_upper[col], cov_upper[row] );

		/*  DELETE CLUSTER BY SETTING DIAGONAL ELEMENT > 0  */

		num_deleted ++;
		sq_dists [col][col] = (float) row;    /* annexed by cluster "row" */

		/*  ANYTHING PREVIOUSLY ANNEXED BY "col" MUST BE CHANGED  */

		for ( k = 0 ; k < *num_entries ; k ++ ) {
			if ( sq_dists [k][k] == (float) col ) {
				sq_dists [k][k] = (float) row ;
			}
		}

		/*  RE-FACTOR THE NEW COVARIANCE MATRIX  */

		SM_copy_matrix ( dim, cov_upper [row], cov_cfactors [row] );
		if ( ! SM_cfactor ( dim, cov_cfactors [row] ) ) {
			fprintf (stderr,"ERROR matrix %d is not pd\n",row);
			exit (-1);
		}

		/*  CALCULATE NEW VALUES  */

		for ( k = 0 ; k < dim ; k ++ ) vecs_1 [row][k] = means [row][k];
		SM_csolve ( dim, cov_cfactors [row], vecs_1 [row] );

		for ( i = 0 ; i < *num_entries ; i ++ ) {
		if ( sq_dists [i][i] == VALID_CLUSTER ){

			for ( k = 0 ; k < dim ; k ++ ) tmp_vec [k] = means [i][k];
			SM_csolve ( dim, cov_cfactors [row], tmp_vec );
			vals_1 [i][row] = dot_product ( means [i], tmp_vec, dim );
			vals_2 [i][row] = dot_product ( means [i], vecs_1 [row], dim);
			for ( k = 0 ; k < dim ; k ++ ) tmp_vec [k] = means [row][k];
			SM_csolve ( dim, cov_cfactors [i], tmp_vec );
			vals_1 [row][i] = dot_product ( means [row], tmp_vec, dim);
			vals_2 [row][i] = dot_product ( means [row], vecs_1 [i], dim);

		}
		}

		for ( i = 0 ; i < row ; i ++ ) {
		if ( sq_dists [i][i] == VALID_CLUSTER ){

			k1 = vals_1 [i][row] + vals_1 [row][row] - 2.0 * vals_2 [i][row];
			k2 = vals_1 [row][i] + vals_1 [i][i] - 2.0 * vals_2 [row][i];
			tmp_val = k1 * k2;
			if ( tmp_val < 0.0 ) {
				fprintf ( stderr, "ERROR:  sqrt domain error\n");
				fprintf ( stderr, "i = %d   j = %d\n",i,row);
				fprintf ( stderr, "k1 = %f   k2 = %f\n",k1,k2);
				exit (-1);
			}
			tmp_val = sqrt ( tmp_val );
			alpha_1 = ( - k2 + tmp_val ) / ( k1 - k2 );
			sq_dists [i][row] = alpha_1 * alpha_1 * k1;
		}
		}

		for ( i = row+1 ; i < *num_entries ; i ++ ) {
		if ( sq_dists [i][i] == VALID_CLUSTER ){

			k1 = vals_1 [i][row] + vals_1 [row][row] - 2.0 * vals_2 [i][row];
			k2 = vals_1 [row][i] + vals_1 [i][i] - 2.0 * vals_2 [row][i];
			tmp_val = k1 * k2;
			if ( tmp_val < 0.0 ) {
				fprintf ( stderr, "ERROR:  sqrt domain error\n");
				fprintf ( stderr, "i = %d   j = %d\n",i,row);
				fprintf ( stderr, "k1 = %f   k2 = %f\n",k1,k2);
				exit (-1);
			}
			tmp_val = sqrt ( tmp_val );
			alpha_1 = ( - k2 + tmp_val ) / ( k1 - k2 );
			sq_dists [row][i] = alpha_1 * alpha_1 * k1;

		}
		}

	}

	/*  CREATE THE MAPPING FOR THE CLUSTER NUMBERS  */

	*mapping = (int *) malloc ( sizeof(int) * *num_entries );
	if ( *mapping == NULL ) {
		fprintf(stderr,"MEMORY ALLOCATION FAILURE\n");
		exit (-1);
	}

	/*  PHYSICALLY DELETE INVALID CLUSTERS  */

	num_cur_entries = 0;
	for ( i = 0 ; i < *num_entries ; i ++ ) {

		if ( (sq_dists [i][i] == VALID_CLUSTER) || 
			(sq_dists [i][i] == VERY_INVALID_CLUSTER) ) {

			means [num_cur_entries] = means [i];
			cov_upper [num_cur_entries] = cov_upper [i];
			count [num_cur_entries] = count [i];

			(*mapping) [i] = num_cur_entries ;

			num_cur_entries ++;

		} else {

			(*mapping) [i] = (*mapping) [(int)sq_dists [i][i]];

		}

		if ( (*mapping) [i] >= final_num ) 
			fprintf (stderr,"WARNING: (%d) is wrong (%d)\n",i,(*mapping)[i]);

	}

	/*  FREE MEMORY AND EXIT  */

	free ( sq_dists [0] );
	free ( sq_dists );

	for ( i = 0 ; i < *num_entries ; i ++ ) {
		free ( cov_cfactors [i][0] );
		free ( cov_cfactors [i] );
	}
	free ( cov_cfactors );

	free ( vecs_1 [0] );
	free ( vecs_1 );

	free ( vals_1 [0] );
	free ( vals_1 );

	free ( vals_2 [0] );
	free ( vals_2 );

	free ( tmp_vec );

	*num_entries -= num_deleted;
	if ( *num_entries != num_cur_entries ) {
		fprintf (stderr,"WARNING:  Bogus number of entries?\n");
		fprintf (stderr,"          %d vs %d\n",*num_entries,num_cur_entries);
	}

     return (1);
}

