#ifndef IDS_HPP_INCLUDED
#define IDS_HPP_INCLUDED
#include <fstream>
#include <unordered_map>
#include <string_view>


#include "utfconv.hpp"
#include "htmldecode.hpp"

class idsconv {
  public:
    idsconv(const char *datfile) {
      std::ifstream infile(datfile);
      while(not infile.eof()) {
        int id;
        std::string name;
        infile >> id;
        infile.get();
        std::getline(infile,name);
        _map.insert(std::make_pair(name,id));
      }
      infile.close();
    }
    
    int hex2char(const char * hex) const {
      unsigned char val1, val2;
      if (*hex >= '0' && *hex <= '9') {
        val1 = *hex - '0';
      } else if (*hex >= 'a' && *hex <= 'f') {
        val1 = *hex - 'a' + 10;
      } else if (*hex >= 'A' && *hex <= 'F') {
        val1 = *hex - 'A' + 10;
      } else {
        return -1; // not an hex
      }
      ++hex;
      if (*hex >= '0' && *hex <= '9') {
        val2 = *hex - '0';
      } else if (*hex >= 'a' && *hex <= 'f') {
        val2 = *hex - 'a' + 10;
      } else if (*hex >= 'A' && *hex <= 'F') {
        val2 = *hex - 'A' + 10;
      } else {
        return -1; // not an hex
      }
      return (val1 << 4) | (val2 & 0xF);
    }

    int name_to_id(const std::string_view s) const {
      // Convert encoding
      char buffer[256]; // maximum title size
      buffer[255] = 0; 
      size_t j = 0;
      while(s.size() > j && (s.at(j) == ' ' || s.at(j) == '_')) {++j;} // remove starting spaces
      if (s.size() > j + 2 && std::strncmp(s.data() + j,"en:",3) == 0) {j += 3;} // link to self (for the english version)
      while(s.size() > j && (s.at(j) == ' ' || s.at(j) == '_')) {++j;} // remove starting spaces
      for (size_t i = 0; i < 255; ++i) {
        if (j == s.size()) {buffer[i] = 0; break;} // end string and exit
        char tmp = s[j];
        if (tmp == '_') {
          buffer[i] = ' '; // underscore to space
          ++j;
        } else if (tmp == '%' && j + 2 < s.size()) { // percent encoded
          int hval = hex2char(s.data() + j + 1);
          if (hval >= 0) {
            buffer[i] = (hval & 0xFF);
            j += 3; // used 3 bytes of data in s
          } else { // was not percent encoded
            buffer[i] = '%';
            ++j;
          }
        } else { // not encoded
          buffer[i] = tmp;
          ++j;
        }
      }
      // Decode html & xml
      std::string htmlencoded(&buffer[0]), htmldecoded, normalized;
      while(htmlencoded.size() > 0 && (htmlencoded.back() == ' ' || htmlencoded.back() == '_')) {htmlencoded.pop_back();} // remove trailing whitespace
      decodehtml(htmlencoded,htmldecoded);
      // convert to unicode 32 
      std::u32string utf32str;
      utf8_to_utf32(htmldecoded,utf32str);
      // Upper case
      std::setlocale(LC_ALL,"en_US.utf8");
      utf32str[0] = std::towupper(utf32str[0]);
      // Convert back to utf8
      utf32_to_utf8(utf32str,normalized);

      auto search = _map.find(normalized);
      if (search != _map.end()) {
        return search->second;
      } else {
        return -1;
      }
    }

  private:
    std::unordered_map<std::string,int> _map;
};
#endif

