/* kxTok - quick little tokenizer for stuff first
 * loaded into memory.  Originally developed for
 * "Key eXpression" evaluator. 
 *
 * This file is copyright 2002 Jim Kent, but license is hereby
 * granted for all use - public, private or commercial. */

#include "common.h"
#include "kxTok.h"


boolean includeQuotes = FALSE;

static struct kxTok *kxTokNew(enum kxTokType type, char *string, int stringSize,
	boolean spaceBefore)
/* Allocate and initialize a new token. */
{
struct kxTok *tok;
int totalSize = stringSize + sizeof(*tok);
tok = needMem(totalSize);
tok->type = type;
tok->spaceBefore = spaceBefore;
memcpy(tok->string, string, stringSize);
return tok;
}

struct kxTok *kxTokenizeFancy(char *text, boolean wildAst,
			      boolean wildPercent, boolean includeHyphen)
/* Convert text to stream of tokens. If 'wildAst' is
 * TRUE then '*' character will be treated as wildcard
 * rather than multiplication sign.  
 * If wildPercent is TRUE then the '%' character will be treated as a 
 * wildcard (as in SQL) rather than a modulo (kxtMod) or percent sign.
 * If includeHyphen is TRUE then a '-' character in the middle of a String 
 * token will be treated as a hyphen (part of the String token) instead of 
 * a new kxtSub token. */
{
struct kxTok *tokList = NULL, *tok;
char c, *s, *start = NULL, *end = NULL;
enum kxTokType type = 0;
boolean spaceBefore = FALSE;

s = text;
for (;;)
    {
    if ((c = *s) == 0)
        break;
    start = s++;
    if (isspace(c))
        {
	spaceBefore = TRUE;
        continue;
        }
    else if (isalnum(c) || c == '?' || (wildAst && c == '*') ||
	     (wildPercent && c == '%'))
        {
        if (c == '?')
            type = kxtWildString;
	else if (wildAst && c == '*')
            type = kxtWildString;
	else if (wildPercent && c == '%')
            type = kxtWildString;
        else
            type = kxtString;
        for (;;)
            {
            c = *s;
            if (isalnum(c) || c == ':' || c == '_' || c == '.' ||
		(includeHyphen && c == '-'))
                ++s;
            else if (c == '?' || (wildAst && c == '*') ||
		     (wildPercent && c == '%'))
                {
                type = kxtWildString;
                ++s;
                }
            else
                break;
            }
        end = s;
        }
    else if (c == '"')
        {
        type = kxtString;
        if (! includeQuotes)
	    start = s;
        for (;;)
            {
            c = *s++;
            if (c == '"')
                break;
            if (c == '*' || c == '?' || (wildPercent && c == '%'))
                type = kxtWildString;
            }
	if (! includeQuotes)
	    end = s-1;
	else
	    end = s;
        }
    else if (c == '\'')
        {
        type = kxtString;
        if (! includeQuotes)
	    start = s;
        for (;;)
            {
            c = *s++;
            if (c == '\'')
                break;
            if (c == '*' || c == '?' || (wildPercent && c == '%'))
                type = kxtWildString;
            }
	if (! includeQuotes)
	    end = s-1;
	else
	    end = s;
        } 
    else if (c == '=')
        {
        type = kxtEquals;
        end = s;
        }
    else if (c == '&')
        {
        type = kxtAnd;
        end = s;
        }
    else if (c == '|')
        {
        type = kxtOr;
        end = s;
        }
    else if (c == '^')
        {
        type = kxtXor;
        end = s;
        }
    else if (c == '+')
        {
	type = kxtAdd;
	end = s;
	}
    else if (c == '-')
        {
	type = kxtSub;
	end = s;
	}
    else if (c == '*')
        {
	type = kxtMul;
	end = s;
	}
    else if (c == '/')
        {
	type = kxtDiv;
	end = s;
	}
    else if (c == '(')
        {
        type = kxtOpenParen;
        end = s;
        }
    else if (c == ')')
        {
        type = kxtCloseParen;
        end = s;
        }
    else if (c == '!')
        {
        type = kxtNot;
        end = s;
        }
    else if (c == '>')
        {
        if (*s == '=')
            {
            ++s;
            type = kxtGE;
            }
        else
            type = kxtGT;
        end = s;
        }
    else if (c == '<')
        {
        if (*s == '=')
            {
            ++s;
            type = kxtLE;
            }
        else
            type = kxtLT;
        end = s;
        }
    else if (c == '.')
        {
        type = kxtDot;
        end = s;
	}
    else if (c == '%')
        {
        type = kxtMod;
        end = s;
	}
    else if (ispunct(c))
        {
        type = kxtPunct;
        end = s;
	}
    else
        {
        errAbort("Unrecognized character %c", c);
        }
    tok = kxTokNew(type, start, end-start, spaceBefore);
    slAddHead(&tokList, tok);
    spaceBefore = FALSE;
    }
tok = kxTokNew(kxtEnd, "end", 3, spaceBefore);
slAddHead(&tokList, tok);
slReverse(&tokList);
return tokList;
}


struct kxTok *kxTokenize(char *text, boolean wildAst)
/* Convert text to stream of tokens. If 'wildAst' is
 * TRUE then '*' character will be treated as wildcard
 * rather than multiplication sign. */
{
return kxTokenizeFancy(text, wildAst, FALSE, FALSE);
}

void kxTokIncludeQuotes(boolean val)
/* Pass in TRUE if kxTok should include quote characters in string tokens. */
{
includeQuotes = val;
}