#include <afxwin.h>						// mfc defines
#include <afxtempl.h>					// mfc templates

#include "NumberRule.h"
#include "resource.h"

extern HINSTANCE hinstDLL;				// handle to DLL module

NumberRule::~NumberRule() {};

/*======================================================================*/
/* NumberRule::getTokenLength											*/
/*	This is the function called from NewGeneric.cpp.					*/
/*	It returns the number length if 'text' parameter points to a number,*/
/*	and 0 if not.														*/
/*																		*/
/*	How it works:														*/
/*	There is a 'while' loop, that is applying the rules in all possible */
/*	ways, in order to take the longest valid number.					*/
/*	This is needed because of ambiguities that might occur because of	*/
/*	iterations '*' and '+', optionals '{..}' and alternatives '(..|..|)'*/
/*																		*/
/*	Here is an example of ambiguity:									*/
/*		Let's suppose that we have this simple rule:					*/
/*																		*/
/*		zeroEndedNumber=Dd*0											*/
/*																		*/
/*		Obviously, this rule should accept al sequences of digits that	*/
/*		begins with a non-zero digit and ends with a '0'.				*/
/*		Let's take a sample: "2000"										*/
/*		The sample rule is a sequence of three smaller rules:			*/
/*			1. D	- a nonzero digit.									*/
/*			2. d*	- zero or more digits								*/
/*			3. 0	- the character '0'									*/
/*		if we apply these rules in order trying to get the maximum		*/
/*		length for each one, we'll get a failure:						*/
/*		1. the first rule is eating the character '2' and leaves		*/
/*			us with "000"												*/
/*		2. the second rule can be applied zero, one, two or three times.*/
/*		   the longest token we get is when we apply it three times:	*/
/*		   it will eat all three zeros, leaving us with an				*/
/*		   empty string: ""												*/
/*		3. at this point, with the empty string, this rule cannot pass: */
/*		   there is no '0' char in the empty string.					*/
/*																		*/
/*		This example tell us that there is no (reasonable) way to know	*/
/*		in advance how many times should an iteration be aplied. We		*/
/*		have to test them all, and get the longest token.				*/
/*		The same is true for the other constructs enumerated above.		*/
/*																		*/
/*	Now, let's get back to the alghorithm:								*/
/*	The function called in the loop,									*/
/*																		*/
/*		bool getNextLength( LPCTSTR text, int maxLength,				*/
/*							int& tokenLength, RuleStatus status )		*/
/*																		*/
/*	is a virtual function that has to be implemented by each			*/
/*	type of rule.														*/
/*	The first two parameters are the text to be parsed, and it's length.*/
/*	The third is a reference to an integer where the length of the		*/
/*	accepted token has to be returned.									*/
/*	The last parameter is a RuleStatus object, which is in fact a		*/
/*	stack of ints, where each rule can store needed status information. */
/*	The return value is a boolean that tells if the rule van be applied */
/*	on the given text.													*/
/*																		*/
/*	On the first call of the function, the status stack is empty, and	*/
/*	the rule should do one of the virtually many possible parses of		*/
/*	the text (for example, the IterationRule [*] will apply the rule	*/
/*	zero times. In the same time, it will place in the 'status' variable*/
/*	all the information needed, such that, on the next call of same		*/
/*	function, to be able to apply the rule in the next possible way		*/
/*	(for example, the IterationRule [*] will apply the rule once).		*/
/*	The process repeats until the function returns false, which			*/
/*	means there are no more ways of applying the rule.					*/
/*======================================================================*/

int NumberRule::getTokenLength (
	LPCTSTR text,
	int maxLength
	)
{
	int tokenLength = 0;				// longest token length's
	CIntArray statusStorage;			// array of ints storing status variables
	RuleStatus status (&statusStorage); // status variables
	int crtLength;						// length of current token parsed

	while (getNextLength (text, maxLength, crtLength, status))
	{
		if (crtLength > tokenLength)
			tokenLength = crtLength;	// keep maximum length in tokenLength
	}

	return tokenLength;
}

/*======================================================================*/
/* NumberRule::createRule												*/
/*	used to create the number rule from definition strings in ini file. */
/* Parameter allRules is a multistring format							*/
/*	ruleName\0ruleDefinition\0...ruleName\0ruleDefinition\0\0			*/
/*======================================================================*/

NumberRule* NumberRule::createRule (
	LPTSTR allRules						// set of rules as multistring
	)
{
	TCHAR szNumber[256];
	LPCTSTR number = szNumber;

	// read from resources the name of the main rule ("number")
	LoadString (hinstDLL, IDS_NUMBERRULE, szNumber, sizeof szNumber);

	// create this main rule
	return getRuleByName (number, allRules);
}

/*======================================================================*/
/* NumberRule::getRuleByName											*/
/*	used to create a rule specific rule from it's definition string		*/
/*	in ini file.														*/
/* Parameters:															*/
/*	allRules - all rules from the ini file in multistring format		*/
/*	ruleString - name of the rule. This string is NOT zero terminated,	*/
/*		so there is no way to say the name of the rule using this string*/
/*		alone. The function is using each rule name from allRules		*/
/*		parameter and check the ruleString to see if it begins with		*/
/*		that name. If it does, then creates a NumberRule object using	*/
/*		the definition string (from the ini) of that rule.				*/
/*======================================================================*/

NumberRule* NumberRule::getRuleByName (
	LPCTSTR& ruleString,				// rule name
	LPTSTR allRules						// all rules as multistring
	)
{
	if (allRules == NULL)
		return NULL;

	for (LPCTSTR ruleName = allRules; *ruleName != 0; )
	{
		int		ruleNameLen = _tcslen (ruleName);				// rule name
		LPCTSTR ruleDefinition = ruleName + ruleNameLen + 1;	// length of rule name plus the ending zero
		int		ruleDefinitionLen = _tcslen (ruleDefinition);	// rule definition string

		if (_tcsncicmp (ruleName, ruleString, ruleNameLen) == 0)
		{
			// we found the rule, advance the ruleString pointer after the rule name
			ruleString += ruleNameLen;
			// create the rule using it's definition
			return createRule (ruleDefinition, allRules);
		}
		ruleName = ruleDefinition + ruleDefinitionLen + 1;		// next rule name
	}
	return NULL;
}

/*======================================================================*/
/* NumberRule::createRule												*/
/*	used to create a rule specific rule from it's definition string		*/
/*	in ini file.														*/
/* Parameters:															*/
/*	ruleDefinition - the definition string of the rule					*/
/*	allRules - all rules from the ini file in multistring format		*/
/*			   if current rule definition uses other rules, then		*/
/*			   we'll need this parameter								*/
/* First, the function creates a SequenceRule and delegates it to parse */
/* the definition string. If at the end the SequenceRule contains only	*/
/* one child rule, then this child is extracted from sequence and		*/
/* returned as result of the function, while the sequence is deleted.	*/
/*======================================================================*/

NumberRule* NumberRule::createRule (
	LPCTSTR& ruleDefinition,			// rule definition as string
	LPTSTR allRules						// all rules as multistring
	)
{
	SequenceRule* seqRule = new SequenceRule;

	// initialize the seqRule parsing the ruleDefinition
	seqRule->processDefinition (ruleDefinition, allRules);

	// if seqRule has only one child, then it is not needed.
	if (seqRule->getRulesCount() == 1)
	{
		// remove the only rule from seqRule management
		NumberRule* rule = seqRule->removeRule (0);
		delete seqRule;
		return rule;
	}

	return seqRule;
}

/*======================================================================*/
/* OrRule destructor													*/
/*	deletes all alternatives.											*/
/*======================================================================*/

OrRule::~OrRule()
{
	for (int i = 0; i < alternatives.GetSize(); ++i)
		delete alternatives [i];
}

/*======================================================================*/
/* OrRule::addAlternative												*/
/*	insert an alternative into the alternatives array.					*/
/*	order is not important												*/
/*======================================================================*/

void OrRule::addAlternative (NumberRule* alternative)
{
	alternatives.Add (alternative);
}

/*======================================================================*/
/* OrRule::getNextLength												*/
/*======================================================================*/

bool OrRule::getNextLength (
	LPCTSTR text,
	int maxLength,
	int& tokenLength,
	RuleStatus status
	)
{
	// the index of the next alternative to be checked
	int crtRuleIdx;

	// Child rule status. The actual array of ints is the same,
	// but we need another instance of RuleStatus because
	RuleStatus childStatus;

	// status is empty only the first time we try this rule on that string.
	if (status.isEmpty())
	{	// start with first rule
		crtRuleIdx = 0;
		// the status stack is empty for us, should be empty for children too.
		childStatus = status.beginChildSection();
	}
	else
	{	// get the last rule we checked at the prev call from the status stack
		crtRuleIdx = status.pop();
		// the status stack now contains in the top the child rule data
		childStatus = status;
	}

	// test each rule, starting with the last checked one,
	// until we find a solution, or there are no more rules.
	for (; crtRuleIdx < alternatives.GetSize(); ++crtRuleIdx)
	{
		NumberRule* crtRule = alternatives [crtRuleIdx];

		if (crtRule->getNextLength (text, maxLength, tokenLength, childStatus))
		{	// this rule has a solution.
			// push the index on the stack for next call.
			status.push (crtRuleIdx);
			return true;
		}

		// we'll go to (the first time) to the next alternative rule,
		// the childStatus stack should be empty
		childStatus = status.beginChildSection();
	}

	return false;
}

/*======================================================================*/
/* SequenceRule destructor.												*/
/*	delete all children													*/
/*======================================================================*/

SequenceRule::~SequenceRule()
{
	for (int i = 0; i < sequence.GetSize(); ++i)
		delete sequence [i];
}

/*======================================================================*/
/* SequenceRule::addRule.												*/
/*	add a rule at a end of the sequence array							*/
/*======================================================================*/

void SequenceRule::addRule (
	NumberRule* rule					// rule to add
	)
{
	sequence.Add (rule);
}

/*======================================================================*/
/* SequenceRule::removeRule.											*/
/*	removes and returns a rule from the sequence, without deleting it	*/
/*======================================================================*/

NumberRule* SequenceRule::removeRule (
	int idx								// rule index
	)
{
	NumberRule* rule = sequence [idx];
	sequence.RemoveAt (idx);
	return rule;
}

/*======================================================================*/
/* SequenceRule::removeRule.											*/
/*	returns the number of rules in the sequence							*/
/*======================================================================*/

int SequenceRule::getRulesCount (
	void								// no argument
	) const
{
	return sequence.GetSize();
}

/*======================================================================*/
/* SequenceRule::getNextLength											*/
/*======================================================================*/

bool SequenceRule::getNextLength (
	LPCTSTR text,
	int maxLength,
	int& tokenLength,
	RuleStatus status
	)
{
	// a sequence rule with only one rule in sequence does not make much sense
	_ASSERTE( sequence.GetSize() > 1 );

	int			crtRuleIdx;		// the index of the child rule to be checked
	int			crtOffset;		// the starting offset in text from which the current rule should be checked
	RuleStatus	childStatus;	// the child rule status

	// status is empty only the first time we try this rule on that string.
	if (status.isEmpty())
	{	// start with first rule, from the beginning of text
		crtRuleIdx = 0;
		crtOffset = 0;
		// the status stack is empty for us, should be empty for children also.
		childStatus = status.beginChildSection();
	}
	else
	{	// if this is not the first time we checked
		// this rule, then the last child checked is always the
		// last child in sequence. (A SequenceRule pass only if all
		// the children pass)
		crtRuleIdx = sequence.GetUpperBound();
		// take the offset in text for the last rule from status stack
		crtOffset = status.pop();
		// the status now has at the top the status of the child
		childStatus = status;
	}

	for ( ; ; )
	{	int			length;							// the length of the current rule token
		NumberRule*	rule = sequence [crtRuleIdx];	// current rule

		if (rule->getNextLength (text + crtOffset, maxLength - crtOffset, length, childStatus ))
		{	// the current rule succeded.
			// if it is the first, we do not need to save the offset,
			// because it is always 0. Else, put it on the status stack.
			// Note that now, the child rule already placed on the status stack
			// it's status variables.
			if (crtRuleIdx > 0)
				status.push (crtOffset);

			// the next rule to check should begin parsing the text after current rule token.
			crtOffset += length;

			// if this was the last rule in the sequence, return with success
			if (crtRuleIdx == sequence.GetUpperBound())
			{	tokenLength = crtOffset;
				return true;
			}

			// go to next rule. This rule was not previously checked with this text,
			// so prevent it reading from the stack
			childStatus = status.beginChildSection();
			++crtRuleIdx;
		}
		else
		{	// the current rule failed, then we'll try to get a new parsing
			// variant from the previous rule. But if this was the first rule, then
			// there are no more parsing solutions.
			if (crtRuleIdx == 0)
				return false;

			// read from the stack the offset in text for previous rule.
			// do it only if this rule is not the first one, because for the first
			// rule there is no need to ssave the offset: it is always 0
			crtOffset = (--crtRuleIdx > 0) ? status.pop() : 0;

			// the status contains the data placed here by the child.
			// allow the child to read it.
			childStatus = status;
		}
	}
}

/*======================================================================*/
/* SequenceRule::processDefinition										*/
/*	Parse the rule definition and construct it							*/
/*======================================================================*/

void SequenceRule::processDefinition (
	LPCTSTR& ruleDefinition,			// rule definition
	LPTSTR allRules						// set of rules as multistring
	)
{
	// check each character of the definition
	for (; *ruleDefinition; ++ruleDefinition)
	{
		// the rule corresponding to the current position in the definition

		NumberRule* rule = NULL;

		switch (*ruleDefinition)
		{
			case _T('d'):		// decimal digit, eq [0123456789]
				rule = new RangeRule (_T("0123456789")); break;

			case _T('D'):		// positive decimal digit, eq [123456789]
				rule = new RangeRule (_T("123456789")); break;

			case _T('h'):		// hex digit, eq [0123456789abcdefABCDEF]
				rule = new RangeRule (_T("0123456789abcdefABCDEF")); break;

			case _T('H'):		// positive hex digit, eq [123456789abcdefABCDEF]
				rule = new RangeRule(_T("123456789abcdefABCDEF")); break;

			case _T('o'):		// octal digit, eq [01234567]
				rule = new RangeRule(_T("01234567")); break;

			case _T('O'):		// positive octal digit, eq [1234567]
				rule = new RangeRule(_T("1234567")); break;

			case _T('+'):		// one or more of previous expression
			{	NumberRule*	prevRule = sequence [sequence.GetUpperBound()];

				rule = new IterationRule (1, -1, prevRule);
				sequence.RemoveAt (sequence.GetUpperBound());
				break;
			}

			case _T('*'):		// zero or more of previous expression
			{	NumberRule* prevRule = sequence [sequence.GetUpperBound()];

				rule = new IterationRule (0, -1, prevRule);
				sequence.RemoveAt (sequence.GetUpperBound());
				break;
			}

			case _T('['):		// any of the characters enclosed
				++ruleDefinition;
				rule = new RangeRule (RangeRule::getRangeChars (ruleDefinition));
				break;

			case _T('{'):		// the format enclosed is optional
			{	NumberRule* optionalRule;

				++ruleDefinition;
				optionalRule = createRule (ruleDefinition, allRules);
				rule = new IterationRule ( 0, 1, optionalRule);
				break;
			}

			case _T('('):		// one of the formats delimited by '|'
			{	OrRule* orRule = new OrRule;

				do
				{	NumberRule* alternative;

					++ruleDefinition;
					alternative = createRule (ruleDefinition, allRules);
					orRule->addAlternative (alternative);
				}
				while (*ruleDefinition != ')');
				rule = orRule;
				break;
			}

			// each parsing of the definition ends at one of these characters.
			// this function is a recursive function
			case _T('}'):
			case _T('|'):
			case _T(')'):
			case _T(']'):
				return;

			case _T('\\'):		// replaced with the definition of name
				++ruleDefinition;
				if ((rule = getRuleByName (ruleDefinition, allRules)) != NULL)
				{	--ruleDefinition;
					break;
				}
				// else fall down

			default:
				rule = new RangeRule (*ruleDefinition);
				break;
		}

		sequence.Add (rule);
	}
}

/*======================================================================*/
/* RangeRule::RangeRule (LPCTSTR _validChars)							*/
/*	RangeRule constructor: a valid token is a character from the		*/
/*	_validChars parameter												*/
/*======================================================================*/

RangeRule::RangeRule (
	LPCTSTR _validChars
	)
{
	int length = strlen (_validChars) + 1;
	validChars = new TCHAR [length];
	memcpy (validChars, _validChars, length);
}

/*======================================================================*/
/* RangeRule::RangeRule (TCHAR validChar)								*/
/*	RangeRule constructor: the only valid token is a character equal	*/
/*	with validChar parameter											*/
/*======================================================================*/

RangeRule::RangeRule (
	TCHAR validChar
	)
{
	validChars = new TCHAR [2];
	validChars [0] = validChar;
	validChars [1] = 0;
}

/*======================================================================*/
/* RangeRule destructor													*/
/*======================================================================*/

RangeRule::~RangeRule()
{
	delete validChars;
}

/*======================================================================*/
/* RangeRule::getRangeChars												*/
/*	parse the parameter definition, which is a range ([]) construct.	*/
/*======================================================================*/

LPCTSTR RangeRule::getRangeChars (
	LPCTSTR& ruleDefinition
	)
{
	static TCHAR	chars [256];
	int				length = 0;

	for (; *ruleDefinition != 0; ++ruleDefinition)
	{
		switch (*ruleDefinition)
		{
			case _T(']'):
				chars [length] = 0;
				return chars;

			case _T('\\'):
				++ruleDefinition;

			default:
				chars [length++] = *ruleDefinition;
		}
	}
	chars [length] = 0;
	return chars;
}

/*======================================================================*/
/*	RangeRule::getNextLength											*/
/*======================================================================*/

bool RangeRule::getNextLength(
	LPCTSTR text,
	int maxLength,
	int& tokenLength,
	RuleStatus status
	)
{
	// this rule can have only one solution, so that,
	// if this is not the first time we check this rule on
	// this text (eq. the status is not empty) then we should return a failure.

	if (!status.isEmpty())
	{	status.pop();	// remove the status variable from the stack
		return false;
	}

	// if there are no more chars in text, then we cannot succed
	if	(maxLength > 0)
	{	// check the first char in text against the valid chars
		for (char* validChar = validChars; *validChar != 0; ++validChar)
			if (*text == *validChar)
			{	// the *text is a valid char.
				// push a value on the status stack, in order to
				// know that we've been here
				status.push (0);
				tokenLength = 1;
				return true;
			}
	}
	return false;
}

/*======================================================================*/
/*	IterationRule constructor											*/
/*======================================================================*/

IterationRule::IterationRule (
	int _minIterationCount,
	int _maxIterationCount,
	NumberRule* _rule
	)
	: minIterationCount (_minIterationCount),
	maxIterationCount (_maxIterationCount),
	rule (_rule)
{
	if (maxIterationCount == -1)
		maxIterationCount = INT_MAX;
}

/*======================================================================*/
/*	IterationRule destructor											*/
/*======================================================================*/

IterationRule::~IterationRule()
{
	delete rule;
}

/*======================================================================*/
/*	IterationRule::getNextLength										*/
/*======================================================================*/

bool IterationRule::getNextLength (
	LPCTSTR text,
	int maxLength,
	int& tokenLength,
	RuleStatus status
	)
{
	int	count;		// the number of times the rule was applied
	int	crtOffset;	// the offset in text at which the next iteration should begin

	// status is empty only the first time we try this rule on that string.
	if (status.isEmpty())
	{	count = 0;
		crtOffset = 0;
	}
	else
	{	// get the count from status stack
		count = status.pop();
		// get the offest in text from status stack only if the rule was applied
		// at least once, because otherwise it is 0
		crtOffset = (count <= 1) ? 0 : status.pop();
	}

	// because we'll start by trying to apply the rule one more time,
	// the childStatus stack should be empty

	RuleStatus childStatus = status.beginChildSection();

	for ( ; ; )
	{	int length = 0;

		// count <= maxIterationCount checks if we are allowed
		// to apply the rule one more time.
		if (count <= maxIterationCount &&
			(count == 0 || rule->getNextLength (text + crtOffset, maxLength - crtOffset, length, childStatus)))
		{
			// the rule succeded. Save the offset on the stack
			if (count > 1)
				status.push (crtOffset);

			crtOffset += length;

			// if we applied the rule at least minIterationCount, return success
			if (count++ >= minIterationCount)
			{	tokenLength = crtOffset;
				if (count > 1)
					// this saves the offset at which the next iteration should begin
					status.push (crtOffset);

				// save the number of times the rule was applied
				status.push (count);
				return true;
			}
			// we are going to apply the rule one more time,
			// so the child should have no variables on the status stack
			childStatus = status.beginChildSection();
		}
		else
		{	// the rule failed. We are going to go back one step,
			// and try another variant for the previous application of the rule.
			// If there is no previous rule, then return
			// failure (there is no more possible parsing of the text)

			if (--count == 0)
				return false;

			// get the offset for pev iteration from the stack
			crtOffset = (count <= 1) ? 0 : status.pop();

			// the status contains on the top the variables of the
			// previous iteration
			childStatus = status;
		}
	}
}
