#include "etpan-indexer.h"

#include <CLucene.h>
#include <wchar.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <libetpan/libetpan.h>

#include "etpan-error.h"
#include "etpan-log.h"
#include "utf-8-conv.h"

enum {
  STATE_IDLE,
  STATE_WRITING,
  STATE_READING,
  STATE_SEARCHING,
};

enum {
  INDEXER_TYPE_RAM,
  INDEXER_TYPE_FILE,
};

struct lucene_data {
  lucene::index::IndexWriter * writer;
  lucene::index::IndexReader * reader;
  lucene::analysis::standard::StandardAnalyzer * analyzer;
  lucene::store::RAMDirectory * ram;
  lucene::search::IndexSearcher * searcher;
  int ram_created;
};

static void etpan_indexer_free(struct etpan_indexer * indexer);
static void lock(struct etpan_indexer * indexer);
static void unlock(struct etpan_indexer * indexer);

extern "C" struct etpan_indexer * etpan_indexer_new(const char * filename)
{
  struct etpan_indexer * indexer;
  struct lucene_data * data;
  int r;
  
  indexer = (struct etpan_indexer *) malloc(sizeof(* indexer));
  if (indexer == NULL)
    ETPAN_LOG_MEMORY_ERROR;
  
  data = (struct lucene_data *) malloc(sizeof(* data));
  if (data == NULL)
    ETPAN_LOG_MEMORY_ERROR;
  
  indexer->locked = 0;
  indexer->state = STATE_IDLE;
  data->writer = NULL;
  data->reader = NULL;
  data->searcher = NULL;
  data->ram_created = 0;
  data->analyzer = NULL;
  try {
    data->analyzer = new lucene::analysis::standard::StandardAnalyzer();
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), filename);
    etpan_crash();
  }
  if (filename != NULL) {
    indexer->type = INDEXER_TYPE_FILE;
    indexer->filename = strdup(filename);
    if (indexer->filename == NULL)
      ETPAN_LOG_MEMORY_ERROR;
    data->ram = NULL;
    data->reader = NULL;
  }
  else {
    indexer->type = INDEXER_TYPE_RAM;
    indexer->filename = NULL;
    data->ram = NULL;
    try {
      data->ram = new lucene::store::RAMDirectory();
    }
    catch (CLuceneError & e) {
      ETPAN_LOG("%s", e.what());
      etpan_crash();
    }
  }
  
  indexer->data = data;
  r = pthread_mutex_init(&indexer->lock, NULL);
  if (r != 0) {
    ETPAN_LOG("failed to create mutex (manager)");
    etpan_crash();
  }
  indexer->ref_count = 1;
  
  return indexer;
}

static inline struct lucene_data * get_data(struct etpan_indexer * indexer);
static void set_state(struct etpan_indexer * indexer, int state);

static void etpan_indexer_free(struct etpan_indexer * indexer)
{
  struct lucene_data * data;
  
  set_state(indexer, STATE_IDLE);

  unlock(indexer);
  
  data = get_data(indexer);
  pthread_mutex_destroy(&indexer->lock);
  if (data->searcher != NULL)
    delete data->searcher;
  if (data->ram != NULL)
    delete data->ram;
  delete data->analyzer;
  
  free(data);
  
  free(indexer->filename);
  free(indexer);
}

static void lock(struct etpan_indexer * indexer, int exists)
{
  if (!indexer->locked) {
    indexer->locked = 1;
    maillock_write_lock(indexer->filename, -1);
    
    if (exists) {
      if (lucene::index::IndexReader::isLocked(indexer->filename))
        lucene::index::IndexReader::unlock(indexer->filename);
    }
  }
}

static void unlock(struct etpan_indexer * indexer)
{
  if (indexer->locked)
    maillock_write_unlock(indexer->filename, -1);
}

static void set_state(struct etpan_indexer * indexer, int state)
{
  struct lucene_data * data;
  int file_exists;
  
  data = get_data(indexer);
  
  if (state == indexer->state) {
    return;
  }
  
  /* closing state */
  switch (indexer->state) {
  case STATE_IDLE:
    break;
  case STATE_WRITING:
    {
      try {
        data->writer->optimize();
      }
      catch (CLuceneError & e) {
        ETPAN_LOG("%s - %s", e.what(), indexer->filename);
        etpan_crash();
      }
      try {
        data->writer->close();
      }
      catch (CLuceneError & e) {
        ETPAN_LOG("%s - %s", e.what(), indexer->filename);
        etpan_crash();
      }
      delete data->writer;
      data->writer = NULL;
    }
    break;
  case STATE_READING:
    {
      try {
        data->reader->close();
      }
      catch (CLuceneError & e) {
        ETPAN_LOG("%s - %s", e.what(), indexer->filename);
        etpan_crash();
      }
      delete data->reader;
      data->reader = NULL;
    }
    break;
  case STATE_SEARCHING:
    {
      delete data->searcher;
      data->searcher = NULL;
    }
    break;
  }
  
  indexer->state = state;
  
  file_exists = 0;
  if (indexer->type == INDEXER_TYPE_FILE) {
    int r;
    struct stat buf_stat;
    
    r = stat(indexer->filename, &buf_stat);
    if (r == 0) {
      file_exists = 1;
    }
  }
  
  /* switching to new state */
  switch (indexer->state) {
  case STATE_IDLE:
    break;
  case STATE_WRITING:
    {
      if (indexer->type == INDEXER_TYPE_FILE) {
        
        if (file_exists) {
          lock(indexer, 1);
          data->writer = NULL;
          try {
            data->writer = new lucene::index::IndexWriter(indexer->filename,
                data->analyzer, false);
          }
          catch (CLuceneError & e) {
            ETPAN_LOG("%s - %s", e.what(), indexer->filename);
            etpan_crash();
          }
        }
        else {
          lock(indexer, 0);
          data->writer = NULL;
          try {
            data->writer = new lucene::index::IndexWriter(indexer->filename,
                data->analyzer, true);
          }
          catch (CLuceneError & e) {
            ETPAN_LOG("%s - %s", e.what(), indexer->filename);
            etpan_crash();
          }
        }
      }
      else {
        if (data->ram_created) {
          data->writer = NULL;
          try {
            data->writer = new lucene::index::IndexWriter(data->ram,
                data->analyzer, false);
          }
          catch (CLuceneError & e) {
            ETPAN_LOG("%s - %s", e.what(), indexer->filename);
            etpan_crash();
          }
        }
        else {
          data->writer = NULL;
          try {
            data->writer = new lucene::index::IndexWriter(data->ram,
                data->analyzer, true);
          }
          catch (CLuceneError & e) {
            ETPAN_LOG("%s - %s", e.what(), indexer->filename);
            etpan_crash();
          }
          data->ram_created = 1;
        }
      }
    }
    break;
  case STATE_READING:
    {
      if (indexer->type == INDEXER_TYPE_FILE) {
        
        if (file_exists) {
          lock(indexer, 1);
          data->reader = NULL;
          try {
            data->reader = lucene::index::IndexReader::open(indexer->filename);
          }
          catch (CLuceneError & e) {
            ETPAN_LOG("%s - %s", e.what(), indexer->filename);
            etpan_crash();
          }
        }
        else {
          indexer->state = STATE_IDLE;
        }
      }
      else {
        if (data->ram_created) {
          data->reader = NULL;
          try {
            data->reader = lucene::index::IndexReader::open(data->ram);
          }
          catch (CLuceneError & e) {
            ETPAN_LOG("%s - %s", e.what(), indexer->filename);
            etpan_crash();
          }
        }
        else {
          indexer->state = STATE_IDLE;
        }
      }
    }
    break;
  case STATE_SEARCHING:
    {
      if (indexer->type == INDEXER_TYPE_FILE) {
        
        if (file_exists) {
          lock(indexer, 1);
          data->searcher = NULL;
          try {
            data->searcher = new lucene::search::IndexSearcher(indexer->filename);
          }
          catch (CLuceneError & e) {
            ETPAN_LOG("%s - %s", e.what(), indexer->filename);
            etpan_crash();
          }
        }
        else {
          indexer->state = STATE_IDLE;
        }
      }
      else {
        if (data->ram_created) {
          data->searcher = NULL;
          try {
            data->searcher = new lucene::search::IndexSearcher(data->ram);
          }
          catch (CLuceneError & e) {
            ETPAN_LOG("%s - %s", e.what(), indexer->filename);
            etpan_crash();
          }
        }
        else {
          indexer->state = STATE_IDLE;
        }
      }
    }
    break;
  }
}

static inline struct lucene_data * get_data(struct etpan_indexer * indexer)
{
  struct lucene_data * data;
  
  data = (struct lucene_data *) indexer->data;
  
  return data;
}

extern "C" void etpan_indexer_lock(struct etpan_indexer * indexer)
{
  pthread_mutex_lock(&indexer->lock);
}

extern "C" void etpan_indexer_unlock(struct etpan_indexer * indexer)
{
  pthread_mutex_unlock(&indexer->lock);
}

extern "C" void etpan_indexer_clear(struct etpan_indexer * indexer)
{
  struct lucene_data * data;
  
  etpan_indexer_lock(indexer);
  
  data = get_data(indexer);
  
  set_state(indexer, STATE_IDLE);
  if (indexer->type == INDEXER_TYPE_FILE) {
    data->writer = NULL;
    try {
      data->writer = new lucene::index::IndexWriter(indexer->filename,
          data->analyzer, true);
    }
    catch (CLuceneError & e) {
      ETPAN_LOG("%s - %s", e.what(), indexer->filename);
      etpan_crash();
    }
  }
  else {
    data->writer = NULL;
    try {
      data->writer = new lucene::index::IndexWriter(data->ram,
          data->analyzer, true);
    }
    catch (CLuceneError & e) {
      ETPAN_LOG("%s - %s", e.what(), indexer->filename);
      etpan_crash();
    }
  }
  try {
    data->writer->optimize();
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  try {
    data->writer->close();
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  delete data->writer;
  data->writer = NULL;
  
  etpan_indexer_unlock(indexer);
}

static char * wcs_to_utf8(const wchar_t * wcs)
{
  size_t len;
  char * s;
  int r;
  
  len = wcslen(wcs);
  s = (char *) malloc(6 * len + 1);
  if (s == NULL)
    ETPAN_LOG_MEMORY_ERROR;
  
  r = etpan_wcs_to_utf8s(s, wcs, 6 * len + 1);
  if (r < 0) {
    free(s);
    return NULL;
  }
  
  return s;
}

static wchar_t * utf8_to_wcs(const char * s)
{
  wchar_t * wcs;
  size_t len;
  int r;
  
  len = strlen(s);
  wcs = (wchar_t *) malloc(sizeof(* wcs) * (len + 1));
  if (wcs == NULL)
    ETPAN_LOG_MEMORY_ERROR;
  
  r = etpan_utf8s_to_wcs(wcs, s, len + 1);
  if (r < 0) {
    free(wcs);
    return NULL;
  }
  
  return wcs;
}

extern "C" void etpan_indexer_set(struct etpan_indexer * indexer,
    char * doc_id, char * type, char * text)
{
  lucene::document::Document * doc;
  lucene::document::Field * field;
  wchar_t * wcs_text;
  wchar_t * wcs_type;
  wchar_t * wcs_doc_id;
  struct lucene_data * data;
  
  etpan_indexer_lock(indexer);
  
  set_state(indexer, STATE_WRITING);
  
  data = get_data(indexer);
  doc = NULL;
  try {
    doc = new lucene::document::Document();
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  wcs_type = utf8_to_wcs(type);
  wcs_text = utf8_to_wcs(text);
  field = NULL;
  try {
    field = lucene::document::Field::UnStored(wcs_type, wcs_text);
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  try {
    doc->add(* field);
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  
  wcs_doc_id = utf8_to_wcs(doc_id);
  field = NULL;
  try {
    field = lucene::document::Field::Keyword(L"id", wcs_doc_id);
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  try {
    doc->add(* field);
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  
  try {
    data->writer->addDocument(doc);
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  
  free(wcs_doc_id);
  free(wcs_type);
  free(wcs_text);
  delete doc;
  
  etpan_indexer_unlock(indexer);
}

extern "C" void etpan_indexer_delete(struct etpan_indexer * indexer,
    char * doc_id)
{
  lucene::index::Term * term;
  wchar_t * wcs_doc_id;
  struct lucene_data * data;
  
  etpan_indexer_lock(indexer);
  
  set_state(indexer, STATE_READING);
  
  if (indexer->state == STATE_IDLE)
    goto unlock;
  
  data = get_data(indexer);
  
  wcs_doc_id = utf8_to_wcs(doc_id);
  term = NULL;
  try {
    term = new lucene::index::Term(L"id", wcs_doc_id);
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  try {
    data->reader->deleteTerm(term);
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  delete term;
  free(wcs_doc_id);
  
 unlock:
  etpan_indexer_unlock(indexer);
}

extern "C" void etpan_indexer_flush(struct etpan_indexer * indexer)
{
  etpan_indexer_lock(indexer);
  
  set_state(indexer, STATE_IDLE);
  
  etpan_indexer_unlock(indexer);
}

extern "C" carray * etpan_indexer_search(struct etpan_indexer * indexer,
    char * expr)
{
  lucene::search::Query * query;
  lucene::search::Hits * hits;
  struct lucene_data * data;
  wchar_t * wcs_query_str;
  chash * response_hash;
  int i;
  chashiter * iter;
  carray * response;
  int r;
  
  etpan_indexer_lock(indexer);
  
  set_state(indexer, STATE_SEARCHING);
  
  response = carray_new(16);
  
  if (indexer->state == STATE_IDLE)
    goto unlock;
  
  data = get_data(indexer);
  
  wcs_query_str = utf8_to_wcs(expr);
  query = NULL;
  try {
    query = lucene::queryParser::QueryParser::parse(wcs_query_str, L"id",
        data->analyzer);
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  free(wcs_query_str);
  hits = NULL;
  try {
    hits = data->searcher->search(query);
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  
  response_hash = chash_new(CHASH_DEFAULTSIZE, CHASH_COPYKEY);
  if (response_hash == NULL)
    ETPAN_LOG_MEMORY_ERROR;
  for(i = 0 ; i < hits->length() ; i ++) {
    const wchar_t * wcs;
    char * s;
    lucene::document::Document * doc;
    lucene::document::Field * field;
    chashdatum key;
    chashdatum value;
    
    doc = NULL;
    try {
      doc = &hits->doc(i);
    }
    catch (CLuceneError & e) {
      ETPAN_LOG("%s - %s", e.what(), indexer->filename);
      etpan_crash();
    }
    field = NULL;
    try {
      field = doc->getField(L"id");
    }
    catch (CLuceneError & e) {
      ETPAN_LOG("%s - %s", e.what(), indexer->filename);
      etpan_crash();
    }
    wcs = NULL;
    try {
      wcs = field->stringValue();
    }
    catch (CLuceneError & e) {
      ETPAN_LOG("%s - %s", e.what(), indexer->filename);
      etpan_crash();
    }
    s = wcs_to_utf8(wcs);
    
    key.data = s;
    key.len = strlen(s) + 1;
    value.data = NULL;
    value.len = 0;
    r = chash_set(response_hash, &key, &value, NULL);
    if (r < 0)
      ETPAN_LOG_MEMORY_ERROR;
    free(s);
  }
  
  for(iter = chash_begin(response_hash) ; iter != NULL ;
      iter = chash_next(response_hash, iter)) {
    chashdatum key;
    char * s;
    
    chash_key(iter, &key);
    s = strdup((char *) key.data);
    if (s == NULL)
      ETPAN_LOG_MEMORY_ERROR;
    
    r = carray_add(response, s, NULL);
    if (r < 0)
      ETPAN_LOG_MEMORY_ERROR;
  }
  
  chash_free(response_hash);
  
  delete hits;
  delete query;
  
 unlock:
  etpan_indexer_unlock(indexer);
  
  return response;
}


extern "C" void etpan_indexer_ref(struct etpan_indexer * indexer)
{
  indexer->ref_count ++;
}

extern "C" void etpan_indexer_unref(struct etpan_indexer * indexer)
{
  indexer->ref_count --;
  if (indexer->ref_count == 0)
    etpan_indexer_free(indexer);
}

extern "C" void
etpan_indexer_free_search_result(carray * search_result)
{
  unsigned int i;
  
  for(i = 0 ; i < carray_count(search_result) ; i ++) {
    char * doc_id;
    
    doc_id = (char *) carray_get(search_result, i);
    free(doc_id);
  }
  carray_free(search_result);
}

extern "C" char * etpan_indexer_quote(char * str)
{
  size_t len;
  char * result;
  char * p;
  char * p_result;
  
  len = strlen(str);
  result = (char *) malloc(len * 2 + 1);
  if (result == NULL)
    ETPAN_LOG_MEMORY_ERROR;
  
  p_result = result;
  for(p = str ; (* p) != '\0' ; p ++) {
    switch (* p) {
    case '+':
    case '-':
    case '&':
    case '|':
    case '!':
    case '(':
    case ')':
    case '{':
    case '}':
    case '[':
    case ']':
    case '^':
    case '\"':
    case '~':
    case '*':
    case '?':
    case ':':
    case '\\':
    case ' ':
      * p_result = '\\';
      p_result ++;
      * p_result = * p;
      p_result ++;
      break;
    default:
      * p_result = * p;
      p_result ++;
      break;
    }
  }
  * p_result = '\0';
  
  return result;
}

extern "C" carray * etpan_indexer_get_keys(struct etpan_indexer * indexer)
{
  struct lucene_data * data;
  carray * result;
  unsigned int count;
  unsigned int i;
  int r;
  
  result = NULL;
  etpan_indexer_lock(indexer);
  
  set_state(indexer, STATE_READING);
  
  result = carray_new(16);
  if (result == NULL)
    ETPAN_LOG_MEMORY_ERROR;
  
  if (indexer->state == STATE_IDLE)
    goto unlock;
  
  data = get_data(indexer);
  
  count = 0;
  try {
    count = data->reader->numDocs();
  }
  catch (CLuceneError & e) {
    ETPAN_LOG("%s - %s", e.what(), indexer->filename);
    etpan_crash();
  }
  for(i = 0 ; i < count ; i ++) {
    lucene::document::Document * doc;
    lucene::document::Field * field;
    const wchar_t * wcs;
    char * s;
    int deleted;
    
    deleted = 0;
    try {
      deleted = data->reader->isDeleted(i);
    }
    catch (CLuceneError & e) {
      ETPAN_LOG("%s - %s", e.what(), indexer->filename);
      etpan_crash();
    }
    if (deleted)
      continue;
    
    doc = NULL;
    try {
      doc = data->reader->document(i);
    }
    catch (CLuceneError & e) {
      ETPAN_LOG("%s - %s", e.what(), indexer->filename);
      etpan_crash();
    }
    field = NULL;
    try {
      field = doc->getField(L"id");
    }
    catch (CLuceneError & e) {
      ETPAN_LOG("%s - %s", e.what(), indexer->filename);
      etpan_crash();
    }
    wcs = NULL;
    try {
      wcs = field->stringValue();
    }
    catch (CLuceneError & e) {
      ETPAN_LOG("%s - %s", e.what(), indexer->filename);
      etpan_crash();
    }
    s = wcs_to_utf8(wcs);
    r = carray_add(result, s, NULL);
    if (r < 0)
      ETPAN_LOG_MEMORY_ERROR;
    
    delete doc;
  }
  
 unlock:
  etpan_indexer_unlock(indexer);
  return result;
}

void etpan_indexer_set_max_response_count(unsigned int count)
{
  lucene::search::BooleanQuery::setMaxClauseCount(count);
}
