// Created by Vladimir Tsvetkov on 08/07/14. //clear && clear && g++ -o search -std=c++11 -g search.cpp ./../../src/string/unistring.cpp #include "../../include/search/search.h" /*Unistring starCharToUnistring(char *starStr) { Utf8String strseq = Utf8String(starStr); Unistring strunistring = Unistring(strseq); return strunistring; } Unistring stdStrToUnistring(std::string *str) { Unistring strunistring = starCharToUnistring((char*)(*str).c_str()); return strunistring; }*/ int commonPrefix(Unistring *data, Unistring *pattern, int curIndex) { Unistring *cutPattern; cutPattern = pattern->substr(curIndex); for(int i = 0; i < data->size(); i++) if (i == cutPattern->size() || (*data)[i] != (*cutPattern)[i]) return i; return data->size(); } std::set *insertSetIntoSet(std::set *set, std::set *insSet) { auto newSet = new std::set; for(auto i:(*set)) newSet->insert(i); for(auto i:(*insSet)) newSet->insert(i); return newSet; } //attention! pattern is Unistring bool isValidPattern(Unistring *pattern) { if (pattern->size() < 3) { return false; } else { return true; } } Unistring *preProcessPattern(Unistring *pattern) { auto *newPattern = new Unistring; bool firstStar = false; for(auto i:(*pattern)) { if (i == unichar('*')) { if (!firstStar) { newPattern->append(i); firstStar = true; } } else { newPattern->append(i); firstStar = false; } } return newPattern; } std::set *Trie::searchWordsFromTrie(Unistring *pattern) { std::set *finalset; //Unistring newPattern = preProcessPattern(pattern); //if (!isValidPattern(newPattern)) //{ //пичаль //throw std::domain_error("слишком короткая строка"); //} //if (pattern[0] == unichar('*')) //{ // finalset = searchWords(root.leaves[pattern[0]], pattern, 1); //} else //{ finalset = searchWords(root, pattern, 0); //} return finalset; } /*std::set Trie::unsafeSearch(Unistring pattern) { std::set finalset; finalset = searchWords(root, pattern, 0); for(auto i:finalset) { i.print(); std::cout< *wordSet, trieNode *node) { if (node->isFinal) { wordSet->insert(*(node->data)); } else { for (auto i:node->leaves) { findRemainingNodes(wordSet, &i.second); } } } void DBG(std::string str) { std::cout << str << "\n"; } std::set *Trie::searchWords(trieNode *node, Unistring *pattern, int curIndex) { std::cout << "searchWordsCall with curIndex " << curIndex << "\n"; if ( ((*(node->data))[0]).is_sentinel() && (curIndex == (pattern->size() - 1) || (*pattern)[curIndex] == unichar('*'))) { auto wordSet = new std::set; wordSet->insert(*node->data); return wordSet; } DBG(" 1st end conditions checked"); if (!(*(node->data))[0].is_sentinel() && (curIndex == (pattern->size() - 1) || (*pattern)[curIndex] == unichar('*'))) { //run recursive walking and return all nodes auto wordSet = new std::set; findRemainingNodes(wordSet, node); return wordSet; } DBG("end conditions checked"); //int magic = 1; auto *wordSet = new std::set; unichar curChar = (*pattern)[curIndex]; if (curChar == unichar('*')) { wordSet=insertSetIntoSet(wordSet, searchWords(node, pattern, curIndex + 1)); //звезда пустой символ //или запускаю поиск от этой же вершины с обрезанной буквой //что делать если все буквы обрезали if (node->data->size() == 0) { for(auto i:node->leaves) { wordSet=insertSetIntoSet(wordSet, searchWords(&i.second, pattern, curIndex)); } } else { Unistring *newData = node->data->substr(1); std::map newleaves = node->leaves; auto *nextNode = new trieNode(newData, newleaves, false); wordSet=insertSetIntoSet(wordSet, searchWords(nextNode, pattern, curIndex)); } return wordSet; } else { int commonPrefixsize; if (curIndex == 0) { commonPrefixsize = commonPrefix(node->data, pattern, curIndex); } else { commonPrefixsize = commonPrefix(node->data, pattern, curIndex-1); } //Unistring commonPrefix = node.data.substring(0, commonPrefixsize); trieNode *nextNode; if (commonPrefixsize == node->data->size()) { //поиск от ребенка nextNode = &node->leaves[(*pattern)[commonPrefixsize]]; } else { //сплитим ноду Unistring *newData = node->data->substr(commonPrefixsize, node->data->size()); std::map newleaves = node->leaves; nextNode = new trieNode(newData, newleaves, false); } wordSet=insertSetIntoSet(wordSet, searchWords(nextNode, pattern, curIndex + commonPrefixsize)); // нужно ли -1? return wordSet; } auto *retSet = new std::set; return retSet; } Trie::Trie(trieNode *nroot) { this->root = nroot; } trieNode::~trieNode(){} trieNode::trieNode(){} Trie::~Trie(){} trieNode::trieNode(Unistring *ndata, std::map nleaves, bool nisFinal) { this->data = ndata; this->leaves = nleaves; this->isFinal = nisFinal; } void search_test() { /* std::cout<<"start test stdStrToUnistring\n"; std::string pattern; std::cin >> pattern; std::cout<> data1; std::cin >> pattern1; std::cin >> curIndex; int prefixsize = commonPrefix(stdStrToUnistring(&data1), stdStrToUnistring(&pattern1), curIndex); std::cout << prefixsize; */ Unistring *str = new Unistring(); str->append(unichar('a')); str->append(SentinelGenerator::instance().generate()); std::map nleaves1; trieNode node1 = trieNode(str, nleaves1, true); std::map nleaves; nleaves[unichar('a')] = node1; Unistring *mi = new Unistring; auto nroot = new trieNode(mi, nleaves, false); Trie tree = Trie(nroot); str->print(); Unistring *pattern = new Unistring; pattern->append(unichar('a')); pattern->print(); tree.searchWordsFromTrie(pattern); }