/*
 Program WinCaml: Graphical User Interface
 for interactive use of Caml-Light and Ocaml.
 Copyright (C) 2005-2017 Jean Mouric 35700 Rennes France
 email: jean.mouric@orange.fr
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

// File lex.yy.cpp

#include "platform.h"
#include "HighlightLexer.h"

size_t token_begin;
size_t token_end;

static int t;
static char* p;
static char* str;
static char buf[256];

static const int keywordsMax = 56;
static const char* keywords[] = {"and", "as", "asr", "begin", "class", "do", "done", "downto", "else", "end", "exception", "for", "fun", "function", "functor", "if", "in", "include", "inherit", "inherit!", "initializer", "land", "let", "lnot", "lor", "lsl", "lsr", "lxor","match", "method", "method!", "mod", "module", "mutable", "new", "not", "object", "of", "open", "or", "parser", "prefix", "rec", "sig", "struct", "then", "to", "try", "type", "val", "val!", "value", "virtual", "when", "where", "while", "with"};
static const int keyvalues[] = {KEYWORD, KEYWORD, OPERATOR, BEGIN1, KEYWORD, DO, DONE, KEYWORD, ELSE, END, KEYWORD, KEYWORD, KEYWORD, KEYWORD, KEYWORD, IF, IN1, KEYWORD, KEYWORD, KEYWORD, KEYWORD, OPERATOR, LET, OPERATOR, OPERATOR, OPERATOR, OPERATOR, OPERATOR, TRY, KEYWORD, KEYWORD, OPERATOR, KEYWORD, KEYWORD, KEYWORD, OPERATOR, KEYWORD, KEYWORD, KEYWORD, OPERATOR, KEYWORD, KEYWORD, KEYWORD, KEYWORD, KEYWORD, THEN, KEYWORD, TRY, KEYWORD, KEYWORD, KEYWORD, KEYWORD, KEYWORD,  KEYWORD, KEYWORD, KEYWORD, WITH};

static const int opCharsMax = 18;
static const char opChars[] = {'!', '$', '%', '&', '*', '+', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '^', '|', '~'};

static const int opFirstCharsMax = 11;
static const char opFirstChars[] = {'!', '$', '%', '&', '+', '/', '<', '=', '?', '@', '^', '~'};

//------------------------------------------------------------------------------------------------------------------------------------

bool is_digit(char c);
static inline int ret(int tok);
static inline int keyvalue(char* buf);
static inline bool isOpChar(char c);
static inline int decimalEscape();
static inline int hexadecimalEscape();
static inline bool isUpperCase(int c);
static inline bool isLowerCase(int c);
static inline bool isAlpha(int c);
static inline bool isAlNum(int c);

bool is_digit(char c)
{
    return c >= '0' && c <= '9';
}

static inline int ret(int tok)
{
    token_end = (size_t)(p - str);
    return tok;
}

static inline int keyvalue(char* s)
{
    int l1 = 0;
    int l2 = keywordsMax;
    int m;
    if (strcmp(s, keywords[0]) < 0 || strcmp(keywords[keywordsMax], s) < 0) {
        return 0;
    }
    while (((m = (l1 + l2) / 2) != l1)) {
        if (strcmp(keywords[m], s) < 0) {
            l1 = m;
        }
        else if (strcmp(keywords[m], s) > 0){
            l2 = m;
        }
        else return keyvalues[m];
    }
    if (strcmp(keywords[l1], s) == 0) return keyvalues[l1];
    else if (strcmp(keywords[l2], s) == 0) return keyvalues[l2];
    else return 0;
}

static inline bool isOpChar(char c)
{
    int l1 = 0;
    int l2 = opCharsMax;
    int m;
    if (c < opChars[0] || c > opChars[opCharsMax]) {
        return false;
    }
    while (((m = (l1 + l2) / 2) != l1)) {
        if (opChars[m] < c) {
            l1 = m;
        }
        else if (opChars[m] > c){
            l2 = m;
        }
        else return true;
    }
    return opChars[l1] == c || opChars[l2] == c;
}

static inline int decimalEscape()
{
    if (is_digit(*(p + 1)) && is_digit(*(p + 2)) && *(p + 3) == t) {
        p += 4;
        return ret(CHAR1);
    }
    return ret(CHAR1);
}

static inline int hexadecimalEscape()
{
    if (isxdigit(*(p + 1)) && isxdigit(*(p + 2)) && *(p + 3) == t) {
        p += 4;
        return ret(CHAR1);
    }
    return ret(CHAR1);
}

static inline bool isUpperCase(int c)
{
    return (64 < c && c < 91) || (191 < c && c < 215) || (215 < c && c < 223);
}

static inline bool isLowerCase(int c)
{
    return (96 < c && c < 123) || (222 < c && c < 247) || (248 < c && c < 256);
}

static inline bool isAlpha(int c)
{
    return isUpperCase(c) || isLowerCase(c);
}

static inline bool isAlNum(int c)
{
    return isAlpha(c) || is_digit(c);
}

//------------------------------------------------------------------------------------------------------------------------------------

static bool init = true;
static int (*tab[256])() = {NULL};
static  int whitespace();
static  void inittab();
static  int lpar();
static  int rpar();
static  int comment();
static  int lbrace();
static  int rbrace();
static  int lbracket();
static  int rbracket();
static  int bigger();
static  int verticalbar();
static  int doublequote();
static  int backslash();
static  int semicolon();
static  int colon();
static  int minussign();
static  int underscore();
static  int keysym();
static  int charliteral();
static  int hexadecimal();
static  int num();
static  int opfirstchars();
static  int alpha();

static  void inittab()
{
    tab[(int)' '] = tab[(int)'\n'] = tab[(int)'\t'] = whitespace;
    tab[(int)'('] = lpar;
    tab[(int)')'] = rpar;
    tab[(int)'*'] = comment;
    tab[(int)'{'] = lbrace;
    tab[(int)'}'] = rbrace;
    tab[(int)'['] = lbracket;
    tab[(int)']'] = rbracket;
    tab[(int)'>'] = bigger;
    tab[(int)'|'] = verticalbar;
    tab[(int)'"'] = doublequote;
    tab[(int)'\\'] = backslash;
    tab[(int)';'] = semicolon;
    tab[(int)':'] = colon;
    tab[(int)'-'] = minussign;
    tab[(int)'_'] = underscore;
    tab[(int)'#'] = tab[(int)','] = tab[(int)'.'] = keysym;
    tab[(int)'\''] = tab[(int)'`'] = charliteral;
    tab[(int)'0'] = hexadecimal;
	int i;
    for (i = (int)'1'; i <= (int)'9'; i++) {
        tab[i] = num;
    }
    for (i = 0; i <= opFirstCharsMax; i++) {
        tab[(int)opFirstChars[i]] = opfirstchars;
    }
    for (i = 65; i < 256; i++) {
        if (isAlpha(i)) {
            tab[i] = alpha;
        }
    }
}

static  int whitespace()
{
    return ret(WHITESPACE);
}

static  int lpar()
{
    if (*p == '*') {
        p++;
        return ret(LCOMMENT);
    }
    return ret(LPAR);
}

static  int rpar()
{
    return ret(RPAR);
}

static  int comment()
{
    if (*p == ')') {
        p++;
        return ret(RCOMMENT);
    }
    return opfirstchars();
}

static  int lbrace()
{
    if (*p == '<') {
        p++;
        return ret(LOBJECT);
    }
    return ret(LBRACE);
}

static  int rbrace()
{
    return ret(RBRACE);
}

static  int lbracket()
{
    if (*p == '|') {
        p++;
        return ret(LLIST);
    }
    if (*p == '<' || *p == '>') {
        p++;
        return ret(LSTREAM);
    }
    return ret(LBRACKET);
}

static  int rbracket()
{
    return ret(RBRACKET);
}

static  int bigger()
{
    if (*p == '}') {
        p++;
        return ret(ROBJECT);
    }
    if (*p == ']') {
        p++;
        return ret(RSTREAM);
    }
    return opfirstchars();
}

static  int verticalbar()
{
    if (*p == ']') {
        p++;
        return ret(RLIST);
    }
    char c = *p;
    if (!isOpChar(c))
    {
        return ret(KEYSYM);
    }
    return opfirstchars();
}

static  int doublequote()
{
    return ret(STRING);
}

static  int backslash()
{
    if (*p == '\\' || *p =='n' || *p == 't' || *p == 'r' || *p == 'x' || *p == 'b' || *p == '\'' || *p == '`'|| is_digit(*p)) {
        return charliteral();
    }
    if (*p == '"') {
        p++;
        return ret(UNKNOWN);
    }
    return opfirstchars();
}

static  int semicolon()
{
    if (*p == ';') {
        p++;
    }
    return ret(KEYSYM);
}

static  int colon()
{
    if (*p == ':' || *p == '=' || *p == '>') {
        p++;
        return ret(OPERATOR);
    }
    return ret(KEYSYM);
}

static  int minussign()
{
    if (*p == '>') {
        char c = *(p + 1);
        if (!isOpChar(c))
        {
            p++;
            return ret(KEYSYM);
        }
    }
    return opfirstchars();
}

static  int underscore()
{
    char* q = p;
    int i = alpha();
    if (p > q) {
        return ret(i);
    }
    return ret(KEYSYM);
}

static  int keysym()
{
    return ret(KEYSYM);
}

static  int charliteral()
{
    if (*p == '\\') {
        p++;
        if (is_digit(*p)) {
            return decimalEscape();
        }
        if (*p == 'x') {
            return hexadecimalEscape();
        }
        if (*p == 'n' || *p == 't' || *p == 'b' || *p == 'r' || *p == '\\' || *p == '\'' || *p == '`') {
            p++;
            if (*p == t) {p++; return ret(CHAR1);}
        }
    }
    if (*p == 'x' && *(p + 1) == 'x' && *(p + 2) == t ) {
        p += 3;
        return ret(CHAR1);
    }
    if (*p == '\0') {
        return ret(UNKNOWN);
    }
    if (*(p + 1) == t) {
        p += 2;
        return ret(CHAR1);
    }
    return ret(KEYSYM);
}

static  int hexadecimal()
{
    if (*p == 'x' || *p == 'X') {
        p++;
        while (isxdigit(*p)) {
            p++;
        }
        return ret(INTEGER);
    }
    return num();
}

static  int num()
{
    bool isFloat = false;
    while (is_digit(*p)) {
        p++;
    }
    if (*p == '.') {
        isFloat = true;
        p++;
        while (is_digit(*p)) {
            p++;
        }
    }
    if (*p == 'e' || *p == 'E') {
        isFloat = true;
        p++;
        if (*p == '-' || *p == '+') {
            p++;
        }
        while (is_digit(*p)) {
            p++;
        }
    }
    return (isFloat ? ret(FLOAT1) : ret(INTEGER));
}

static  int opfirstchars()
{
	char c = *p;
	if (c) {
		char c1 = *(p + 1);
		while(c1 != ')' && isOpChar(c))
		{
			if (c == ';' && c1 == ';') {
				break;
			}
			c = *++p;
			c1 = *(p + 1);
		}
	}
	return ret(OPERATOR);
}

static  int alpha()
{
    int i = 0;
    buf[i++] = t;
    while (isAlNum((unsigned char)*p) || *p == '_') {
        t = *p++;
        if (i < 255) buf[i++] = t;
    }
    buf[i] = 0;
    int keyval = keyvalue(buf);
    if (keyval != 0) {
        return ret(keyval);
    }
    return ret(IDENTIFIER);
}

//------------------------------------------------------------------------------------------------------------------------------------

typedef struct yy_buffer_state *BUFFER_STATE;
BUFFER_STATE yy_scan_string(const char* s);
int yylex();

BUFFER_STATE yy_scan_string(const char* s)
{
    if (init) {
        inittab();
        init = false;
    }
    size_t sz = strlen(s) + 1;
    str = (char*)malloc(sz);
    memcpy(str, s, sz);
    token_begin = 0;
    token_end = 0;
    p = (char*)str;
    return NULL;
}

int yylex()
{
    token_begin = token_end;
    t = (int)(unsigned char)*p++;
    if (t == 0) {
        free(str);
        return 0;
    }
    int (*f)() = tab[t];
    return f ? f() : ret(UNKNOWN);
}
