#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <assert.h>
#include "error.h"
#include "maa.h"
#include "tokenize.h"

/*
 * This is a basic s-expression tokenizer
 * it also tokenizes things like number, string, and symbol literals
 */

const token_t nulltok = {
  .token_type = EMPTY,
    {
      .null_token=false
    }
};

static const token_t quote_tok = {
  .token_type = QUOTE,
  .token= {
    .quote=true
  }
};

static const token_t left_paren = {
  .token_type = PAREN,
                .token = {
                  .parenthesis="("
                }
};

static const token_t right_paren = {
  .token_type = PAREN,
  .token = {
    .parenthesis=")"
  }
};

static inline const char *
string_head(uint32_t n,
            const char *in,
            char *out) {
  /* out must be large enough to store the number of characters
   * you want to select from in, plus a byte for the null terminator
   */
#ifndef NDEBUG
  size_t in_len = strlen(in);
#endif
  assert((n > 0 && n <= in_len));
  int iserror = snprintf(out, (size_t)n+1 , "%s", in);

  assert((iserror != -1) && ((size_t)iserror == in_len));

  if (iserror == -1) {
    printf("Out of memory");
    exit(EXIT_FAILURE);
  }
  return (const char*)out;
}

static inline token_t
make_token(token_val_t val,
           tok_t toktype) {
  token_t result;
  result.token_type = toktype;
  result.token = val;
  return result;
}

bool
push_token(token_stream *tokens,
           token_t token) {
  /*
   * Check if tokens points to NULL
   */

  size_t len;
  size_t max;

  CHECK(tokens);

  len = tokens->length;
  max = tokens->max_length;

  assert(len <= max);
  assert(max > 0);

  if (len == max) {
    /* We've reached the maximum stack size
     * So we must try to increase that by GROWTH_SIZE
     */
    token_t *new_tokens = xrealloc(tokens->tokens, sizeof (token_t) * (max * GROWTH_FACTOR));
    if (!new_tokens) {
      printf("Could not allocate enough memory for the token stack\n");
      exit(EXIT_FAILURE);
    }
    tokens->tokens = new_tokens;
    tokens->max_length = max * GROWTH_FACTOR;
    tokens->tokens[len] = token;
    tokens->length++;
    return true;
  }
  tokens->tokens[len] = token;
  tokens->length++;
  return true;
}

bool
pop_token(token_stream *tokens) {
  size_t len;
  CHECK(tokens);

  len = tokens->length;

  assert(len != 0);
  len--;
  CHECK(tokens->tokens);

  tokens->length--;
  return true;
}

inline token_t
peek_token(token_stream *tokens) {
  /*
   * Check if tokens points to NULL
   */
  size_t len = tokens->length;
  size_t max = tokens->max_length;
  CHECK(tokens);
  assert(len != 0);

  if (len == 0 || len > max) {
    return nulltok;
  }
  return tokens->tokens[len-1];
}

static inline uint32_t
match_int(source_t source,
          uint32_t begin,
          const uint32_t length) {
  /* Return false if there is no match
   * otherwise return the position of the end of the match + 1
   */
  uint32_t i = begin;
  uint32_t test;
  CHECK(source);
  assert(length > 0);

  if (source[i] == '+' ||
      source[i] == '-') {
    i++;
  }
  test = i;
  while (i < length &&
         isdigit(source[i])) {
    i++;
  }
  if (i == test)
    return false;
  return i;
}

static inline uint32_t
match_float(source_t source,
            uint32_t begin,
            const uint32_t length) {
  /* Return false if there is no match
   * otherwise:
   *  if there is a leading decimal point and then a valid int match:
   *    return the position of the end of the match
   *  if there is a leading valid int match:
   *    but no decimal point match after that:
   *      return false
   *    if there is a decimal point match and then a valid int match:
   *        return the position of the match
   *    if there is no valid int match:
   *      return false
   * ALWAYS returns the position + 1 to avoid confusion with false (which is a valid index)
   */
  uint32_t i, leading_int_match, trailing_int_match;
  CHECK(source);
  assert(length > 0);

  i = begin;
  leading_int_match = match_int(source, i, length);

  if (leading_int_match) {
    i = leading_int_match;
  }

  assert(i <= length);

  if (source[i] != '.' ||
      source[i] == '+' ||
      source[i] == '-') {
    if (((i+1) <= length) && /* Make sure there is at least two characters to look at */
        ((source[i] == '+') ||
         (source[i] == '-'))
        && (source[i+1] == '.')) {
      i++;
    }
    else {
      return false;
    }
  }
  i++;

  trailing_int_match = match_int(source, i, length);
  if (trailing_int_match) {
    return trailing_int_match;
  }
  return false;
}

static inline uint32_t
match_word(source_t source,
                 uint32_t begin,
                 const uint32_t length) {

  /* Return false if there is no match
   *    if there is a match for any characters that are not:
   *      whitespace
   *      a parenthesis ( )
   *      a brace { }
   *      a square bracket [ ]
   *        then return the position of the match + 1
   *    if there is nothing else to match:
   *      return false
   */
  uint32_t i = begin;
  CHECK(source);
  assert(length > 0);

  while (i < length &&
         !(source[i] == '(' ||
           source[i] == ')' ||
           isspace(source[i]))) {
    i++;
  }

  if (i == begin) {
    return false;
  }
  assert(i <= length);
  return i;
}

static inline uint32_t
is_empty_string(const char *source,
                uint32_t length) {
  int allspace = false;
  uint32_t i = 0;
  if (source[i] != '\"') {
    return false;
  }
  for (; i < length; i++) {
    if (!isspace(source[i])) {
      allspace = true;
    }
  }
  /*if (allspace) {
    printf("Actually found an empty string! Of length %d\n", i);
  }*/
  return allspace;
}


static inline uint32_t
match_string(source_t source,
             uint32_t begin,
             const uint32_t length) {
  CHECK(source);
  (void)length;
  assert(length > 0);
  uint32_t i = begin;
  if (source[i] != '\"') {
    return false;
  }
  i++;
  while (source[i] != '\"' &&
         (i < length) &&
         (i < (begin + MAX_STRING_SIZE))) {
    i++;
  }
  if ((i != (begin+1)) &&
      (i <= length) &&
      (source[i] == '\"')) {
    return i+1;
  }
  return false;
}

static inline void
extract_token(uint32_t position,
              uint32_t begin,
              const source_t source,
              const char *token_val) {
    assert(position > begin);
    string_head(position - begin,
                &source[begin],
                (char *)token_val);
}

token_stream
tokenize(source_t source,
         uint32_t begin,
         const uint32_t length) {
  /*
   * Remember to free everything from this struct
   * for example, token_stack.tokens will not necessarily be
   * equal to tokens after this function has run
   *
   */
  uint32_t position = begin;
  uint32_t allspace = false;
  const char *current_token_val;
  token_stream token_stack;
  token_val_t current_token;
  token_t *tokens = xcalloc(STACK_SIZE, sizeof (token_t));

  hsh_HashTable token_memo = hsh_create(NULL, NULL);

  assert(begin == 0);
  assert(length > 0);
  CHECK(source);

  token_stack.length = 0;
  token_stack.max_length = STACK_SIZE;
  token_stack.tokens = tokens;
  token_stack.memo = token_memo;
  char lookahead = '\0';
  assert(STACK_SIZE > 0);

  while (begin <= length && source[begin]) {
      /* Possibly matched a string
       * First look for closing "
       * Then look for a newline to close it if no "
       * Then stop after some large constant of characters maybe?
       * We're dealing with real text so people might forget to close
       * quotations, so we have to be clever about it and use heuristics (for performance)
       */
    if (source[begin] == '(') {
      /*Matched a left paren */
      position = begin + 1;
      push_token(&token_stack, left_paren);
    }
    else if (source[begin] == ')') {
      /*Matched a left paren */
      position = begin + 1;
      push_token(&token_stack, right_paren);
    }
    else if (isspace(source[begin])) {
      position = begin + 1;
      /* Matched a whitespace character */
    }
    else if ((position = match_string(source, begin, length))) {
      /* Possibly matched a string
       * First look for closing "
       * Then look for a newline to close it if no "
       * Then stop after some large constant of characters maybe?
       * We're dealing with real text so people might forget to close
       * quotations, so we have to be clever about it and use heuristics (for performance)
       */
      lookahead = source[position];
      source[position] = '\0';
      if ((current_token_val = hsh_retrieve(token_stack.memo, source+begin))) {
        current_token.string = current_token_val;
        source[position] = lookahead;
        allspace = false;
      }
      else {
        assert(position > begin);
        current_token_val = xcalloc(((position - begin) + 1), sizeof (char));
        CHECK(current_token_val);
        extract_token(position, begin, source, current_token_val);
        if ((allspace = is_empty_string(current_token_val, (position - begin) + 1))) {
          source[position] = lookahead;
          current_token.string = current_token_val;
        }
        else {
        source[position] = lookahead;
        hsh_insert(token_stack.memo, current_token_val, current_token_val);
        current_token.string = current_token_val;
        }
      }
      if (allspace) {
        push_token(&token_stack, make_token(current_token, STRING));
      }
    }
    else if ((position = match_float(source, begin, length))) {
      /* Matched a float */
      lookahead = source[position];
      source[position] = '\0';
      if ((current_token_val = hsh_retrieve(token_stack.memo, source+begin))) {
        current_token.floating = current_token_val;
        source[position] = lookahead;
      }
      else {
        assert(position > begin);
        current_token_val = xcalloc(((position - begin) + 1), sizeof (char));
        CHECK(current_token_val);
        extract_token(position, begin, source, current_token_val);
        source[position] = lookahead;
        hsh_insert(token_stack.memo, current_token_val, current_token_val);
        current_token.floating = current_token_val;
      }
      push_token(&token_stack, make_token(current_token, FLOATING));
    }
    else if ((position = match_int(source, begin, length))) {
      /* Matched an int */
      lookahead = source[position];
      source[position] = '\0';
      if ((current_token_val = hsh_retrieve(token_stack.memo, source+begin))) {
        current_token.integer = current_token_val;
        source[position] = lookahead;
      }
      else {
        assert(position > begin);
        assert(position <= length);

        current_token_val = xcalloc(((position - begin) + 1), sizeof (char));
        CHECK(current_token_val);
        extract_token(position, begin, source, current_token_val);
        source[position] = lookahead;
        hsh_insert(token_stack.memo, current_token_val, current_token_val);
        current_token.integer = current_token_val;
      }
      push_token(&token_stack, make_token(current_token, INTEGER));
    }
    else if (source[begin] == '\'') {
      /* Matched a quote (apostrophe) */
      position = begin + 1;
      push_token(&token_stack, quote_tok);
    }
    else if ((position = match_word(source, begin, length))) {
      /* Matched a word */
      lookahead = source[position];
      source[position] = '\0';
      if ((current_token_val = hsh_retrieve(token_stack.memo, source+begin))) {
        current_token.word = current_token_val;
        source[position] = lookahead;
      }
      else {
        assert(position > begin);
        assert(position <= length);

        current_token_val = xcalloc(((position - begin) + 1), sizeof (char));
        CHECK(current_token_val);
        extract_token(position, begin, source, current_token_val);
        source[position] = lookahead;
        hsh_insert(token_stack.memo, current_token_val, current_token_val);
        current_token.word = current_token_val;
      }
      push_token(&token_stack, make_token(current_token, WORD));
      /* Matched a word */
    }
    else if (position <= begin) {
      printf("Source is too large to read\n");
      exit(EXIT_FAILURE);
    }
    else {
      printf("Unmatched token\n");
      exit(EXIT_FAILURE);
    }
    begin = position;
  }

  return token_stack;
}

int
free_token(const void *key,
           const void *val) {
  /* silence warnings about unused parameters, key and val point to the same data*/
  (void)key;
  xfree((char *)val);
  return true;
}

bool
release_tokens(token_stream *tokens) {
  /* Iterate through the stack, release each token
   * Then release the entire stack
   */
  CHECK(tokens);
  CHECK(tokens->tokens);
  assert(tokens->max_length > 0);
  xfree(tokens->tokens);
  hsh_iterate(tokens->memo, free_token);

  hsh_destroy(tokens->memo);
  return true;
}

#ifndef TOK_LIB
int main(void) {
  void *source_code = malloc(111000);
  size_t nbytes = read(STDIN_FILENO, source_code, 111000);
  if (nbytes == 0) {
    exit(EXIT_FAILURE);
  }
  token_stream toks = tokenize(source_code, 0, nbytes);
  token_t current_tok;
  while (toks.length > 0) {
    current_tok = peek_token(&toks);
    switch (current_tok.token_type) {
      case SYMBOL:
        printf("symbol: %s\n", current_tok.token.symbol);
        break;
      case WORD:
        printf("identifer: %s\n", current_tok.token.word);
        break;
      case INTEGER:
        printf("integer: %s\n", current_tok.token.integer);
        break;
      case FLOATING:
        printf("floating: %s\n", current_tok.token.floating);
        break;
      case QUOTE:
        printf("quote: '\n");
        break;
      case WSPACE:
        printf("whitespace\n");
        break;
      case PAREN:
        printf("paren: %s\n", current_tok.token.parenthesis);
        break;
      case EMPTY:
        printf("this should not be empty\n");
        break;
      case STRING:
        printf("string: %s\n", current_tok.token.string);
        break;
      default:
        printf("oops, there was an unknown token, check valgrind or gdb\n");
    }
    pop_token(&toks);
  }
  release_tokens(&toks);
  return 0;
}
#endif