#include "../../include/search/search.h"
#include "../../include/dprint/dprint.h"
bool validatePattern(Unistring *pattern)
{
return (pattern->size() >= MIN_PATTERN_LENGTH);
}
Unistring *preProcessPattern(Unistring *pattern)
{
auto newPattern = new Unistring();
bool firstStar = false;
for(auto i:(*pattern))
{
if (i == unichar('*'))
{
if (!firstStar)
{
newPattern->append(i);
firstStar = true;
}
} else
{
newPattern->append(i);
firstStar = false;
}
}
return newPattern;
}
int findMaxCommonPrefix(Unistring data, Unistring pattern)
{
DBG("findMaxCommonPrefix for data and pattern");
data.print_as_int();
pattern.print_as_int();
DBG("MaxCommonPrefix is");
for(int i = 0; i < data.size(); i++)
{
if ((i == pattern.size()) || ((data)[i] != (pattern)[i]))
{
printf("ret %d\n",i);
DBG("");
return i;
}
}
printf("ret %ld\n",data.size());
DBG("");
return data.size();
}
void merge2Sets(std::set<int> *newSet, std::set<int> *appendSet)
{
DBG("merge2Sets");
DBG("first set:");
DBG_S(newSet);
DBG("append set:");
DBG_S(appendSet);
DBG("");
for(auto i:(*appendSet))
newSet->insert(i);
}
void TreeVertex::findDownNodes(std::set<int> *wordSet)
{
DBG("findDownNodes from Node");
DBG_N(this);
DBG("");
if (this->children.empty())
{
wordSet->insert(this->value);
} else
{
for(auto i:this->children)
i.second->findDownNodes(wordSet);
}
}
void TreeVertex::searchWordsFromNode(std::set<int> *wordSet, Unistring pattern)
{
DBG("searchWordsFromNode");
DBG_N(this);
DBG("pattern:");
pattern.print_as_int();
DBG("");
if (this->children.empty() && pattern.size() == 0)
{
DBG("found in leaf");
wordSet->insert(this->value);
return;
}
if (!(this->children.empty()) && ((pattern.size() == 0) || (pattern.size() == 1 && (pattern)[0] == '*')))
{
DBG("found not in leaf");
auto childNodesResultSet = new std::set<int>();
this->findDownNodes(childNodesResultSet);
merge2Sets(wordSet, childNodesResultSet);
return;
}
unichar curChar = (pattern)[0];
if (curChar == unichar('*'))
{
auto newSet = new std::set<int>;
this->searchWordsFromNode(newSet, pattern.substr(1));
merge2Sets(wordSet, newSet);
if (this->data.size() == 0 && !this->children.empty())
{
for(auto i:this->children)
{
auto newSet = new std::set<int>;
i.second->searchWordsFromNode(newSet, pattern);
merge2Sets(wordSet, newSet);
return;
}
} else
{
auto nextNode = new TreeVertex(this->data.substr(1), this->value, this->children);
auto newSet = new std::set<int>;
nextNode->searchWordsFromNode(newSet, pattern);
merge2Sets(wordSet, newSet);
return;
}
} else
{
int commonPrefixSize;
commonPrefixSize = findMaxCommonPrefix(this->data, pattern);
if (commonPrefixSize == this->data.size())
{
auto newSet = new std::set<int>;
if ((pattern.substr(commonPrefixSize).size() != 0) && (this->children.count((pattern.substr(commonPrefixSize))[0]) !=0))
{
TreeVertex *nextNode = this->children[(pattern.substr(commonPrefixSize))[0]];
nextNode->searchWordsFromNode(newSet, pattern.substr(commonPrefixSize));
} else
{
if (this->children.empty())
{
newSet->insert(this->value);
} else
{
//паттерн нулевой или из звездочки
if((pattern.substr(commonPrefixSize).size() == 0) || ((pattern.substr(commonPrefixSize).size() == 1 && (pattern)[0] == '*')))
{
auto childNodesResultSet = new std::set<int>();
this->findDownNodes(childNodesResultSet);
merge2Sets(newSet, childNodesResultSet);
} else
{
std::cout << "not found1" << std::endl;
}
}
}
merge2Sets(wordSet, newSet);
return;
} else
{
if (commonPrefixSize)
{
auto nextNode = new TreeVertex(this->data.substr(commonPrefixSize),this->value, this->children);
auto newSet = new std::set<int>;
nextNode->searchWordsFromNode(newSet, pattern.substr(commonPrefixSize));
merge2Sets(wordSet, newSet);
return;
} else
{
std::cout << "not found2" << std::endl;
}
}
}
return;
}
std::set<int> *SuffixTree::searchWordsFromSuffixTree(Unistring pattern)
{
auto finalSet = new std::set<int>;
if ((pattern)[0] == unichar('*'))
{
std::cout << "first symbol in pattern is star" << std::endl;
TreeVertex *callNode = root->children[(pattern)[1]];
if (callNode != nullptr)
{
callNode->searchWordsFromNode(finalSet, pattern.substr(1));
}
} else
{
std::cout << "first symbol in pattern is not star" << std::endl;
TreeVertex *callNode = root->children[(pattern)[0]];
if (callNode != nullptr)
{
callNode->searchWordsFromNode(finalSet, pattern);
}
}
return finalSet;
}