// Created by Vladimir Tsvetkov on 08/07/14.
//clear && clear && g++ -o search -std=c++11 -g search.cpp ./../../src/string/unistring.cpp
#include "../../include/search/search.h"
/*Unistring starCharToUnistring(char *starStr)
{
Utf8String strseq = Utf8String(starStr);
Unistring strunistring = Unistring(strseq);
return strunistring;
}
Unistring stdStrToUnistring(std::string *str)
{
Unistring strunistring = starCharToUnistring((char*)(*str).c_str());
return strunistring;
}*/
int commonPrefix(Unistring *data, Unistring *pattern, int curIndex)
{
Unistring *cutPattern;
cutPattern = pattern->substr(curIndex);
for(int i = 0; i < data->size(); i++)
if (i == cutPattern->size() || (*data)[i] != (*cutPattern)[i])
return i;
return data->size();
}
std::set<Unistring> *insertSetIntoSet(std::set<Unistring> *set, std::set<Unistring> *insSet)
{
auto newSet = new std::set<Unistring>;
for(auto i:(*set))
newSet->insert(i);
for(auto i:(*insSet))
newSet->insert(i);
return newSet;
}
//attention! pattern is Unistring
bool isValidPattern(Unistring *pattern)
{
if (pattern->size() < 3)
{
return false;
} else
{
return true;
}
}
Unistring *preProcessPattern(Unistring *pattern)
{
auto *newPattern = new Unistring;
bool firstStar = false;
for(auto i:(*pattern))
{
if (i == unichar('*'))
{
if (!firstStar)
{
newPattern->append(i);
firstStar = true;
}
} else
{
newPattern->append(i);
firstStar = false;
}
}
return newPattern;
}
std::set<Unistring> *Trie::searchWordsFromTrie(Unistring *pattern)
{
std::set<Unistring> *finalset;
//Unistring newPattern = preProcessPattern(pattern);
//if (!isValidPattern(newPattern))
//{
//пичаль
//throw std::domain_error("слишком короткая строка");
//}
//if (pattern[0] == unichar('*'))
//{
// finalset = searchWords(root.leaves[pattern[0]], pattern, 1);
//} else
//{
finalset = searchWords(root, pattern, 0);
//}
return finalset;
}
/*std::set<Unistring> Trie::unsafeSearch(Unistring pattern)
{
std::set<Unistring> finalset;
finalset = searchWords(root, pattern, 0);
for(auto i:finalset)
{
i.print();
std::cout<<std::endl;
}
}*/
void Trie::findRemainingNodes(std::set<Unistring> *wordSet, trieNode *node)
{
if (node->isFinal)
{
wordSet->insert(*(node->data));
} else
{
for (auto i:node->leaves)
{
findRemainingNodes(wordSet, &i.second);
}
}
}
void DBG(std::string str)
{
std::cout << str << "\n";
}
std::set<Unistring> *Trie::searchWords(trieNode *node, Unistring *pattern, int curIndex)
{
std::cout << "searchWordsCall with curIndex " << curIndex << "\n";
if ( ((*(node->data))[0]).is_sentinel() && (curIndex == (pattern->size() - 1) || (*pattern)[curIndex] == unichar('*')))
{
auto wordSet = new std::set<Unistring>;
wordSet->insert(*node->data);
return wordSet;
}
DBG(" 1st end conditions checked");
if (!(*(node->data))[0].is_sentinel() && (curIndex == (pattern->size() - 1) || (*pattern)[curIndex] == unichar('*')))
{
//run recursive walking and return all nodes
auto wordSet = new std::set<Unistring>;
findRemainingNodes(wordSet, node);
return wordSet;
}
DBG("end conditions checked");
//int magic = 1;
auto *wordSet = new std::set<Unistring>;
unichar curChar = (*pattern)[curIndex];
if (curChar == unichar('*'))
{
wordSet=insertSetIntoSet(wordSet, searchWords(node, pattern, curIndex + 1)); //звезда пустой символ
//или запускаю поиск от этой же вершины с обрезанной буквой
//что делать если все буквы обрезали
if (node->data->size() == 0)
{
for(auto i:node->leaves)
{
wordSet=insertSetIntoSet(wordSet, searchWords(&i.second, pattern, curIndex));
}
} else
{
Unistring *newData = node->data->substr(1);
std::map<unichar, trieNode> newleaves = node->leaves;
auto *nextNode = new trieNode(newData, newleaves, false);
wordSet=insertSetIntoSet(wordSet, searchWords(nextNode, pattern, curIndex));
}
return wordSet;
} else
{
int commonPrefixsize;
if (curIndex == 0)
{
commonPrefixsize = commonPrefix(node->data, pattern, curIndex);
} else
{
commonPrefixsize = commonPrefix(node->data, pattern, curIndex-1);
}
//Unistring commonPrefix = node.data.substring(0, commonPrefixsize);
trieNode *nextNode;
if (commonPrefixsize == node->data->size())
{
//поиск от ребенка
nextNode = &node->leaves[(*pattern)[commonPrefixsize]];
} else
{
//сплитим ноду
Unistring *newData = node->data->substr(commonPrefixsize, node->data->size());
std::map<unichar, trieNode> newleaves = node->leaves;
nextNode = new trieNode(newData, newleaves, false);
}
wordSet=insertSetIntoSet(wordSet, searchWords(nextNode, pattern, curIndex + commonPrefixsize)); // нужно ли -1?
return wordSet;
}
auto *retSet = new std::set<Unistring>;
return retSet;
}
Trie::Trie(trieNode *nroot)
{
this->root = nroot;
}
trieNode::~trieNode(){}
trieNode::trieNode(){}
Trie::~Trie(){}
trieNode::trieNode(Unistring *ndata, std::map<unichar, trieNode> nleaves, bool nisFinal)
{
this->data = ndata;
this->leaves = nleaves;
this->isFinal = nisFinal;
}
void search_test()
{
/*
std::cout<<"start test stdStrToUnistring\n";
std::string pattern;
std::cin >> pattern;
std::cout<<pattern<<"\n";
Unistring patternunistring = stdStrToUnistring(&pattern);
patternunistring.print();
std::cout<<"\n";
std::cout<<"start test starCharToUnistring\n";
char str[] = {'a', 'b', 'r', 'a'};
std::cout<<str<<"\n";
Unistring strunistring = starCharToUnistring(str);
strunistring.print();
std::cout<<"\n";
std::cout<<"start test commonPrefixsize\n";
std::string data1, pattern1;
int curIndex;
std::cin >> data1;
std::cin >> pattern1;
std::cin >> curIndex;
int prefixsize = commonPrefix(stdStrToUnistring(&data1), stdStrToUnistring(&pattern1), curIndex);
std::cout << prefixsize;
*/
Unistring *str = new Unistring();
str->append(unichar('a'));
str->append(SentinelGenerator::instance().generate());
std::map<unichar, trieNode> nleaves1;
trieNode node1 = trieNode(str, nleaves1, true);
std::map<unichar, trieNode> nleaves;
nleaves[unichar('a')] = node1;
Unistring *mi = new Unistring;
auto nroot = new trieNode(mi, nleaves, false);
Trie tree = Trie(nroot);
str->print();
Unistring *pattern = new Unistring;
pattern->append(unichar('a'));
pattern->print();
tree.searchWordsFromTrie(pattern);
}