// Created by Vladimir Tsvetkov on 08/07/14.
//clear && clear && g++ -o search -std=c++11 -g search.cpp ./../../src/string/unistring.cpp
#include <iostream>
#include <string>
#include <set>
#include <map>
#include "../../include/string/unistring.h"
Unistring starCharToUnistring(char *starStr)
{
Utf8String strseq = Utf8String(starStr);
Unistring strunistring = Unistring(strseq);
return strunistring;
}
Unistring stdStrToUnistring(std::string *str)
{
Unistring strunistring = starCharToUnistring((char*)(*str).c_str());
return strunistring;
}
int commonPrefix(Unistring data, Unistring pattern, int curIndex)
{
Unistring cutPattern = *pattern.substr(curIndex);
for(int i = 0; i < data.size(); i++)
if (i == cutPattern.size() || data[i] != cutPattern[i])
return i;
return data.size();
}
std::set<Unistring> insertSetIntoSet(std::set<Unistring> set, std::set<Unistring> insSet)
{
std::set<Unistring> newSet;
for(auto i:set)
newSet.insert(i);
for(auto i:insSet)
newSet.insert(i);
return newSet;
}
class trieNode
{
public:
Unistring *data;
std::map<unichar, trieNode> leaves;
bool isFinal;
trieNode();
trieNode(Unistring ndata, std::map<unichar, trieNode> nleaves);
~trieNode();
};
class Trie
{
public:
trieNode root;
Trie();
~Trie();
std::set<Unistring> searchWordsFromTrie(Unistring pattern);
std::set<Unistring> searchWords(trieNode node, Unistring pattern, int curIndex);
void findRemainingNodes(std::set<Unistring> *wordSet, trieNode node);
};
std::set<Unistring> Trie::searchWordsFromTrie(Unistring pattern)
{
std::set<Unistring> finalset = searchWords(root, pattern, 0);
}
void Trie::findRemainingNodes(std::set<Unistring> *wordSet, trieNode node)
{
if (node.isFinal)
{
wordSet->insert(*node.data);
} else
{
for (auto i:node.leaves)
findRemainingNodes(wordSet, i.second);
}
}
std::set<Unistring> Trie::searchWords(trieNode node, Unistring pattern, int curIndex)
{
std::cout << "searchWordsCall with curIndex " << curIndex << "\n";
//или node.data is Sentinel? лучше так потому что когда я спличу вершины я не слежу чтобы они были финальные
if ((*node.data)[0] > KSA_SENTINEL && (curIndex == (pattern.size() - 1) || pattern[curIndex] == unichar('*')))
{
std::set<Unistring> wordSet;
wordSet.insert(*node.data);
return wordSet;
}
if ((*node.data)[0] <= KSA_SENTINEL && (curIndex == (pattern.size() - 1) || pattern[curIndex] == unichar('*')))
{
//run recirsive walking and return all nodes
std::set<Unistring> wordSet;
findRemainingNodes(&wordSet, node);
return wordSet;
}
std::set<Unistring> wordSet;
unichar curChar = pattern[curIndex];
if (curChar == unichar('*'))
{
wordSet=insertSetIntoSet(wordSet, searchWords(node, pattern, curIndex + 1)); //звезда пустой символ
//или запускаю поиск от этой же вершины с обрезанной буквой
Unistring *newData = node.data->substr(1, node.data->size());
std::map<unichar, trieNode> newleaves = node.leaves;
trieNode nextNode = trieNode(*newData, newleaves);
wordSet=insertSetIntoSet(wordSet, searchWords(node, pattern, curIndex));
//что делать если все буквы обрезали
if (node.data->size() == 0)
{
for(auto i:node.leaves)
{
wordSet=insertSetIntoSet(wordSet, searchWords(i.second, pattern, curIndex));
}
}
return wordSet;
} else
{
int commonPrefixsize = commonPrefix(*node.data, pattern, curIndex);
//Unistring commonPrefix = node.data.substring(0, commonPrefixsize);
trieNode nextNode;
if (commonPrefixsize == node.data->size())
{
//поиск от ребенка
nextNode = node.leaves[pattern[commonPrefixsize]];
} else
{
//сплитим ноду
Unistring *newData = node.data->substr(commonPrefixsize, node.data->size());
std::map<unichar, trieNode> newleaves = node.leaves;
nextNode = trieNode(*newData, newleaves);
}
wordSet=insertSetIntoSet(wordSet, searchWords(nextNode, pattern, curIndex + commonPrefixsize));
return wordSet;
}
}
void test()
{
/*std::cout<<"start test stdStrToUnistring\n";
std::string pattern;
std::cin >> pattern;
std::cout<<pattern<<"\n";
Unistring patternunistring = stdStrToUnistring(&pattern);
patternunistring.print();
std::cout<<"\n";
std::cout<<"start test starCharToUnistring\n";
char str[] = {'a', 'b', 'r', 'a'};
std::cout<<str<<"\n";
Unistring strunistring = starCharToUnistring(str);
strunistring.print();
std::cout<<"\n";
std::cout<<"start test commonPrefixsize\n";
std::string data, pattern;
int curIndex;
std::cin >> data;
std::cin >> pattern;
std::cin >> curIndex;
int prefixsize = commonPrefix(stdStrToUnistring(&data), stdStrToUnistring(&pattern), curIndex);
std::cout << prefixsize;*/
}
int main(int argc, const char * argv[])
{
//test();
return 0;
}