//-[stdhead]-------------------------------------------------------
//Project:Java preprocessor
//File   :lexer.cpp
//Started:05.07.00 10:17:59
//Updated:21.07.00 18:12:43
//Author :Nick E. Geht
//Subj   :lexical analyzer
//Version:1.0
//Requres:
//-----------------------------------------------------------------
#include "stdafx.h"
#include "lexer.h"

//initialize lexer
CJppLexer::CJppLexer(PSTR szText )
{
        m_szText=szText;
        m_nLine=1;
        m_nChar=0;
}

//finalize lexer
CJppLexer::~CJppLexer()
{
        if (m_szText) free(m_szText);
}

//get next lexem
//the method return:
//      0       - end of text
//      1       - end of line
//      2       - character lexem
//      3       - other lexem
//      4       - string constant
//      5       - # sign
//      6       - space
int CJppLexer::GetLexem(PSTR szLexDest,bool SkipSpace)
{
        char    cc;
        bool    fBreak=false;
        int     i;
        //first step
        //check whitespace

        *szLexDest=0;

        i=0;
        while(!fBreak)
        {
                cc=m_szText[m_nChar];
                switch(cc)
                {
                case    0:              //zero-char - end of text
                        return 0;
                case    ' ':            //skip spaces, tabs, and CR
                case    '\t':
                case    '\r':
                        i++;            //count space char
                        m_nChar++;
                        break;
                case    '\n':           //check LF - this is the end of line
                        m_nLine++;
                        m_fNewLine=true;
                        m_nChar++;
                        return 1;
                default:
                        fBreak=true;    //any other char - begin processing
                }
        }
        if (i&&!SkipSpace)
        {
                szLexDest[0]=' ';
                szLexDest[1]=0;
                return 6;       //report about space char
        }
        //other char may be:
        //      lexem start
        //      number start
        //      string start
        //      other lexem type start
        i=0;
        if (islexem(cc))
        {
                //there is lexem (identifer or number)
                fBreak=false;
                while(!fBreak)
                {
                        cc=m_szText[m_nChar];
                        if (islexem(cc))
                        {
                                szLexDest[i++]=cc;
                                m_nChar++;
                        }
                        else
                        {
                                fBreak=true;
                        }
                }
                szLexDest[i]=0;
                return 2;
        }

        if (isstring(cc))
        {
                //there is character lexem (string or char)
                char    cStart;
                cStart=cc;      //remember start character
                i=0;
                fBreak=false;
                szLexDest[i++]=cc;
                do {
                        m_nChar++;
                        cc=m_szText[m_nChar];
                        if (cc==cStart)
                        {
                                //we take string delimiter another once
                                //there is end of character constant
                                m_nChar++;
                                szLexDest[i++]=cc;
                                fBreak=true;
                        }
                        else
                        if (cc=='\\')
                        {
                                //we take escape character,
                                //do not analyze next character
                                szLexDest[i++]='\\';
                                cc=szLexDest[i++]=m_szText[++m_nChar];
                                if (cc==0) return 0;
                        }
                        else
                        if (cc=='\n')
                        {
                                //there is error
                                //but we pass this error into Java compiler
                                fBreak=true;
                        }
                        else
                        {
                                szLexDest[i++]=cc;
                        }
                }while(!fBreak);
                szLexDest[i]=0;
                return 4;
        }
        //there is not a number, not a lexem and not a string
        //check for string is comment
        if (cc=='/')
        {
                cc=m_szText[m_nChar+1];
                switch (cc)
                {
                case    '/':            //to end of line comment
                        m_nChar+=2;     //skip comment start
                        fBreak=false;
                        while(!fBreak)
                        {
                                cc=m_szText[m_nChar];
                                switch (cc)
                                {
                                case    '\n':   //end of comment
                                        fBreak=true;
                                        break;
                                case    0:
                                        fBreak=true;
                                        break;
                                default:
                                        m_nChar++;
                                        break;
                                }

                        }
                        return 6;
                case    '*':            //to */ terminator comment
                        m_nChar+=2;     //skip comment start
                        fBreak=false;
                        while(!fBreak)
                        {
                                cc=m_szText[m_nChar];
                                switch (cc)
                                {
                                case    '\n':   //end of line
                                        m_nChar++;
                                        m_nLine++;
                                        break;
                                case    '*':    //end of comment possible
                                        cc=m_szText[m_nChar+1];
                                        if (cc=='/')
                                        {
                                                m_nChar+=2;
                                                fBreak=true;
                                        }
                                        else    m_nChar++;
                                        break;
                                case    0:
                                        fBreak=true;
                                        break;
                                default:
                                        m_nChar++;
                                        break;
                                }
                        }
                        return 6;
                }
                cc='/';
        }
        //no comment, report about symbol
        szLexDest[0]=cc;
        szLexDest[1]=0;
        m_nChar++;
        if (cc=='#') return 5;  //sharp character
                else return 3;  //other character
}

//check & drop newline event
bool CJppLexer::CheckNewLine()
{
        bool    fRet;
        fRet=m_fNewLine;
        m_fNewLine=!m_fNewLine;
        return fRet;
}

//get line number
int CJppLexer::GetLine()
{
        return m_nLine;
}

//is character a lexem char
bool CJppLexer::islexem(char cc)
{
        if (((cc>='0')&&(cc<='9'))||
            ((cc>='A')&&(cc<='Z'))||
            ((cc>='a')&&(cc<='z'))||
            ((cc=='_')))        return true;
        else                    return false;
}

//is character a string terminator
bool CJppLexer::isstring(char cc)
{
        if ((cc=='\"')||(cc=='\'')) return true;
                               else return false;
}

void CJppLexer::EmptyText()
{
        m_szText=NULL;
}

