// Created by Vladimir Tsvetkov on 08/07/14. //clear && clear && g++ -o search -std=c++11 -g search.cpp ./../../src/string/unistring.cpp #include "../../include/search/search.h" void DBG(std::string str) { std::cout << str << std::endl; } void DBG_S(std::set *finalSet) { for(auto i:(*finalSet)) { i.print(); } } void DBG_M(std::map leaves) { int y = leaves.size(); for(auto i:leaves) { std::cout << i.first << " " << i.second << std::endl; } } void DBG_N(TrieNode *node) { DBG("Node DPRINT"); DBG("Node DATA"); node->data->print(); DBG("Node MAP"); DBG_M(node->leaves); DBG(""); } void DBG_T(TrieNode *tree, int indent) { std::cout << "data:"; tree->data->print(); for(int i = 0; i < indent; i++) std::cout << " "; for(auto i : tree->leaves) { std::cout << i.first << " -> "; DBG_T(i.second, indent+6); //std::cout << std::endl; } } bool validatePattern(Unistring *pattern) { if (pattern->size() < MAX_PATTERN_LENGTH) { return false; } else { return true; } } Unistring *preProcessPattern(Unistring *pattern) { auto newPattern = new Unistring(); bool firstStar = false; for(auto i:(*pattern)) { if (i == unichar('*')) { if (!firstStar) { newPattern->append(i); firstStar = true; } } else { newPattern->append(i); firstStar = false; } } return newPattern; } int findMaxCommonPrefix(Unistring *data, Unistring *pattern) { DBG("findMaxCommonPrefix for data and pattern"); data->print(); pattern->print(); DBG("MaxCommonPrefix is"); for(int i = 0; i < data->size(); i++) { printf("i num%d\n", i); if ((i == pattern->size()) || ((*data)[i] != (*pattern)[i])) { printf("i%d\n",i); DBG(""); return i; } } printf("ds%d\n",data->size()); DBG(""); return data->size(); } void merge2Sets(std::set *newSet, std::set *appendSet) { DBG("merge2Sets"); DBG("first set:"); DBG_S(newSet); DBG("append set:"); DBG_S(appendSet); DBG(""); for(auto i:(*appendSet)) newSet->insert(i); } TrieNode::TrieNode(Unistring *ndata, std::map nleaves, bool nflag) { this->data = ndata; this->leaves = nleaves; this->isFinal = nflag; } TrieNode::~TrieNode() { //delete this->data; } void TrieNode::findDownNodes(std::set *wordSet) { DBG("findDownNodes from Node"); DBG_N(this); DBG(""); if (this->isFinal) { wordSet->insert(*(this->data->substr(this->data->size() - 1))); } else { for(auto i:this->leaves) i.second->findDownNodes(wordSet); } } void TrieNode::searchWordsFromNode(std::set *wordSet, Unistring *pattern) { DBG("searchWordsFromNode"); DBG_N(this); DBG("pattern:"); pattern->print(); DBG(""); if ((*(this->data))[0].is_sentinel() && pattern->size() == 0) { wordSet->insert(*(this->data->substr(this->data->size() - 1))); return; } if (!(*(this->data))[0].is_sentinel() && ((pattern->size() == 0) || (pattern->size() == 1 && (*pattern)[0] == '*'))) { auto childNodesResultSet = new std::set(); this->findDownNodes(childNodesResultSet); merge2Sets(wordSet, childNodesResultSet); return; } unichar curChar = (*pattern)[0]; if (curChar == unichar('*')) { auto newSet = new std::set; this->searchWordsFromNode(newSet, pattern->substr(1)); merge2Sets(wordSet, newSet); if (this->data->size() == 0) { for(auto i:this->leaves) { auto newSet = new std::set; i.second->searchWordsFromNode(newSet, pattern); merge2Sets(wordSet, newSet); return; } } else { auto nextNode = new TrieNode(this->data->substr(1), this->leaves, false); auto newSet = new std::set; nextNode->searchWordsFromNode(newSet, pattern); merge2Sets(wordSet, newSet); return; } } else { int commonPrefixSize; commonPrefixSize = findMaxCommonPrefix(this->data, pattern); //commonPrefixSize++; if (commonPrefixSize == this->data->size()) { auto newSet = new std::set; if (this->leaves.count((*pattern)[1]) !=0) { TrieNode *nextNode = this->leaves[(*pattern)[1]]; nextNode->searchWordsFromNode(newSet, pattern->substr(commonPrefixSize)); } else { //only if * met for(auto i:this->leaves) { i.second->searchWordsFromNode(newSet, pattern->substr(commonPrefixSize)); } } merge2Sets(wordSet, newSet); return; } else { auto nextNode = new TrieNode(this->data->substr(commonPrefixSize), this->leaves, this->isFinal); auto newSet = new std::set; nextNode->searchWordsFromNode(newSet, pattern->substr(commonPrefixSize)); merge2Sets(wordSet, newSet); return; } } return; } Trie::Trie(TrieNode *nroot) { this->root = nroot; } Trie::~Trie() { } std::set *Trie::searchWordsFromTrie(Unistring *pattern) { auto finalSet = new std::set; TrieNode *callNode = root->leaves[(*pattern)[0]]; if (callNode != nullptr) { callNode->searchWordsFromNode(finalSet, pattern); } return finalSet; } void search_test() { Unistring *str2 = new Unistring(); str2->append(unichar('r')); str2->append(unichar('z')); str2->append(SentinelGenerator::instance().generate()); std::map nleaves12; auto node12 = new TrieNode(str2, nleaves12, true); Unistring *str = new Unistring(); str->append(unichar('a')); std::map nleaves1; nleaves1[unichar('r')] = node12; auto node1 = new TrieNode(str, nleaves1, false); std::map nleaves; nleaves[unichar('a')] = node1; Unistring *mi = new Unistring(); auto nroot = new TrieNode(mi, nleaves, false); Trie tree = Trie(nroot); auto pattern = new Unistring; pattern->append(unichar('a')); pattern->append(unichar('r')); std::set *finalSet; finalSet = tree.searchWordsFromTrie(pattern); DBG("RESULT:"); for(auto i:(*finalSet)) { std::cout << i[i.size() - 1] << std::endl; } DBG("ENDRESULT"); }