//datainfo.cpp
#include"datainfo.h"
#include"allocmem.h"
#include"math.h"
#include"stdio.h"
#include<stdlib.h>



DataInfoAndPrepro::DataInfoAndPrepro()
{
	N = 0, Nout = 0, Nv = 0, ClustersSoFar = 0;	GlobalWtsMartrix = NULL; DistanceMeasure = NULL;
	Inputs = NULL; Outputs = NULL; InputMean = NULL; InputVariance = NULL;OutlierDistance = NULL; 
	GlobalCrossCor = NULL;	GlobalAutoCor = NULL; ModWts = NULL; ClusterCenters = NULL;
}

void DataInfoAndPrepro::init()
{
	int i;
	GlobalWtsMartrix = FarAllocateMatrixMemory(Nout,N+1);
	DistanceMeasure = FarAllocateMemory(N);InputMean = FarAllocateMemory(N);
	InputVariance = FarAllocateMemory(N); Inputs = FarAllocateMemory(N+1);
	Outputs = FarAllocateMemory(Nout); GlobalAutoCor = FarAllocateMatrixMemory(N+1,N+1);
	GlobalCrossCor = FarAllocateMatrixMemory(Nout,N+1);OutlierDistance = FarAllocateMemory(ClustersSoFar);
	ClusterCenters = FarAllocateMatrixMemory(ClustersSoFar,N);
	ModWts = (double***)calloc(ClustersSoFar, sizeof(double**));
	if(	ModWts == NULL)
	{
		printf("Memory could not be allocated....\n");
		exit(1);
	}

	for(i = 0; i < ClustersSoFar; i++)
	{
		ModWts[i]      = FarAllocateMatrixMemory(Nout, N+1);
	}
}

DataInfoAndPrepro::~DataInfoAndPrepro()
{
	free(GlobalWtsMartrix);free(DistanceMeasure);free(InputMean);free(InputVariance);free(Inputs);
	free(Outputs); free(GlobalAutoCor);free(OutlierDistance); free(ClusterCenters);free(ModWts);
	free(GlobalCrossCor); fclose(fpTestingdata);
}

void DataInfoAndPrepro::GetDataInformationFromUser()
{
	int i,j,k;
	char FileTemp[100];
	FILE  * fpwt;
	printf(" Enter the Testing file name: ");
	do
	{
		scanf("%s",DatafileName);
		fpTestingdata = fopen(DatafileName,"r");
	}
	while(fpTestingdata == NULL);
	
	printf("\n Does the testing file has desired output? \n");
	printf("\n Choose (0) for NO \n");
	printf("\n        (1) for YES : ");

	do
	{
		scanf("%d",&OutputFlag);
	}
	while(OutputFlag != 0 && OutputFlag != 1);
	
	printf(" Enter the weights file name: ");
	do
	{
		scanf("%s",WeightsFileName);
		fpwt = fopen(WeightsFileName,"r");
	}
	while(fpwt == NULL);
	

	fscanf(fpwt,"%s",FileTemp);
	fscanf(fpwt,"%d",&N);
	fscanf(fpwt,"%d",&Nout);
	fscanf(fpwt,"%d",&ClustersSoFar);
	//initiate the matrix
	init();
	/* next read the Linear Model wts	*/
	for( i =0; i < Nout; i++)
	{
		for(j = 0 ; j <= N; j ++)
		{
			fscanf(fpwt,"%lf", &GlobalWtsMartrix[i][j]);
		}
	}


	/* next read the Distance measure wts	*/

	for( i =0; i < N ; i++)
	{
		 
		fscanf(fpwt,"%lf",&DistanceMeasure[i]);
	}
	/* next read the feature mean vector	*/

	for( i =0; i < N; i++)
	{
		fscanf(fpwt,"%lf", &InputMean[i]);
	}
	/* next read the feature variance  vector	*/

	for( i =0; i < N; i++)
	{
		fscanf(fpwt,"%lf",&InputVariance[i]);
	}
	/* read now the individual module wts */
	for( i =0; i < ClustersSoFar; i++)
	{
		for( j =0; j < Nout; j++)
		{
			for( k =0; k <= N; k++)
			{
				fscanf(fpwt,"%lf",&ModWts[i][j][k]);
			}
			
		}
	}

	/* read cluster centers	*/
	for( i =0; i < ClustersSoFar; i++)
	{
		for( j =0; j < N; j++)
		{
			fscanf(fpwt,"%lf",&ClusterCenters[i][j]);
		}
		
	}
	/* read Outlier distances	*/
	for( i =0; i < ClustersSoFar; i++)
	{
		fscanf(fpwt,"%lf",&OutlierDistance[i]);
	}
	fclose(fpwt);
	
}

void DataInfoAndPrepro::ProcessData()
{
	int i,j, NearestCluster, *PatternsPerCluster,Members,RealNv=0;
	double *ClusterErrors,*PredictOutputs,*OrigInputs;
	double  DistanceSum,MinDistance, temp, TotalError=0.0;
	char str[25];
	FILE * result;

	PatternsPerCluster = (int*)calloc(ClustersSoFar,sizeof(int));
	PredictOutputs = AllocateMemory(Nout);
	OrigInputs = AllocateMemory(N);
	ClusterErrors = AllocateMemory(ClustersSoFar);
	result = OpenFile("c:/result.txt","w+");

	rewind(fpTestingdata);
	Inputs[N] = 1.0;

	/* initialize  PatternsPerCluster array to 0	*/
	for ( i = 0; i < ClustersSoFar; i++)
	{
		PatternsPerCluster[i] = 0 ;
	}

	fprintf(result,"\t%s\n","To be processed File Name:");
	fprintf(result,"\t%s\n\n",DatafileName);
	fprintf(result,"\t%s\n","Weights File Name:");
	fprintf(result,"\t%s\n\n",WeightsFileName);
	fprintf(result,"\tNo of Input:\t%d\n",N);
	fprintf(result,"\tNo of Output:\t%d\n",Nout);
	if(OutputFlag)
	{
		fprintf(result,"\tThis data has desired output.\n");
	}
	else
	{
		fprintf(result,"\tThis data do not has desired output.\n");
	}
	fprintf(result,"\tNo of Clusters:\t%d\n\n",ClustersSoFar);
	fprintf(result,"\n\n\tThe following if the format of the result:\n\tPatterns, Input(s), Predict Output(s), Membership, Actual Output(s) if it has. \n\n");
//	while(!feof(fpForFitData))
	while(!feof(fpTestingdata))
	{
		RealNv ++;
		/* read one vector from estimated outputs file	*/
		for ( i = 0; i < N ; i++)
		{
			if (fscanf(fpTestingdata, "%s",str ))
			{
				/* These inputs are not normalized Inputs*/
					Inputs[i] = atof(str);
					OrigInputs[i] = atof(str);
			}
			else
				break;
		}
		if(OutputFlag)
		{
			for( i = 0; i < Nout; i ++)
			{
				if (fscanf(fpTestingdata, "%s",str ))
				{
					/* These inputs are not normalized Inputs*/
					Outputs[i] = atof(str);
				}
				else
					break;
			}
		}

		if(feof(fpTestingdata))
		{
			break;
		}
		
		//first add the global linear mapping
		for(i = 0 ; i < Nout; i ++)
		{
			for( j = 0; j <= N; j ++)
			{
				PredictOutputs[i] += Inputs[j] *  GlobalWtsMartrix[i][j];
			}
		}
		//then norminize the inputs 
		for( i = 0 ; i < N; i ++)
		{
			Inputs[i] -= InputMean[i];
			Inputs[i] /= InputVariance[i];
		}
		//determine the membership of this pattern
		DistanceSum = 0.0;
		MinDistance = 1.0E20;

		for(i = 0; i < ClustersSoFar; i++)
		{
			DistanceSum = 0.0;
			for ( j = 0; j < N; j++)
			{
				temp = Inputs[j] - ClusterCenters[i][j];
				DistanceSum += (DistanceMeasure[j]* (temp*temp));
			}
			if(DistanceSum <= MinDistance)
			{
				MinDistance = DistanceSum;
				NearestCluster = i;
			}
		}
		//substract the clusters mapping 
		for(i = 0; i < Nout; i ++)
		{
			for( j = 0 ; j <= N; j ++)
			{
				PredictOutputs[i] += Inputs[j]*ModWts[NearestCluster][i][j];
			}
		}
		PatternsPerCluster[NearestCluster] += 1;
		if(OutputFlag)
		{
			for( i = 0; i < Nout; i ++)
			{
				ClusterErrors[NearestCluster] += (PredictOutputs[i]-Outputs[i])*(PredictOutputs[i]-Outputs[i]);
			}
		}
		//output to the file of the result
		fprintf(result,"\t%d",RealNv);
		for(i = 0; i < N; i ++)
		{
			fprintf(result,"\t%f",OrigInputs[i]);
		}
		//fprintf(result,"\t");
		for(i = 0; i < Nout; i ++)
		{
			fprintf(result,"\t%f",PredictOutputs[i]);
		}
		fprintf(result,"\t%d",NearestCluster);
		if(OutputFlag)
		{
			for(i = 0 ; i < Nout; i ++)
			{
				fprintf(result,"\t%f",Outputs[i]);
			}
		}
		fprintf(result,"%\n");
		//reset the PredictOutputs
		for(i = 0; i < Nout; i ++)
		{
			PredictOutputs[i] = 0.0;
		}

	}
	RealNv --;
	fprintf(result,"\n\n\tThe Total No. of Patterns:\t%d",RealNv);
	for(i = 0; i < ClustersSoFar; i++)
	{
		Members = PatternsPerCluster[i];
		printf("\nModule %d has %d members  ",
		 i,Members);
		if(OutputFlag)
		{
			printf("\tError %lf",ClusterErrors[i]);
		}
//		fprintf(fpResultDatail,"Module %d has %d members  Error %lf\n",
//		 i,Members,ClusterErrors[i]  );
		 TotalError += ClusterErrors[i];
	}
	if(OutputFlag)
	{
		printf("\n\tTotal MS Error :  %lf\n",TotalError/ RealNv);
	}
	
	for(i = 0; i < ClustersSoFar; i++)
	{
		Members = PatternsPerCluster[i];
		fprintf(result,"\n\tModule\t%d\thas\t%d\tmembers\t",
		 i,Members);
		if(OutputFlag)
		{
			fprintf(result,"\tError\t%lf",ClusterErrors[i]);
		}
	}
	if(OutputFlag)
	{
		fprintf(result,"\n\tTotal MS Error :  %lf\t\n",TotalError/ RealNv);
	}


}
