trie_search

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
// Created by Vladimir Tsvetkov on 08/07/14.
//clear && clear && g++ -o search -std=c++11 -g search.cpp ./../../src/string/unistring.cpp
#include <iostream>
#include <string>
#include <set>
#include <map>
#include "../../include/string/unistring.h"
Unistring starCharToUnistring(char *starStr)
{
Utf8String strseq = Utf8String(starStr);
Unistring strunistring = Unistring(strseq);
return strunistring;
}
Unistring stdStrToUnistring(std::string *str)
{
Unistring strunistring = starCharToUnistring((char*)(*str).c_str());
return strunistring;
}
int commonPrefix(Unistring data, Unistring pattern, int curIndex)
{
Unistring cutPattern = *pattern.substr(curIndex);
for(int i = 0; i < data.size(); i++)
if (i == cutPattern.size() || data[i] != cutPattern[i])
return i;
return data.size();
}
std::set<Unistring> insertSetIntoSet(std::set<Unistring> set, std::set<Unistring> insSet)
{
std::set<Unistring> newSet;
for(auto i:set)
newSet.insert(i);
for(auto i:insSet)
newSet.insert(i);
return newSet;
}
class trieNode
{
public:
Unistring *data;
std::map<unichar, trieNode> leaves;
bool isFinal;
trieNode();
trieNode(Unistring ndata, std::map<unichar, trieNode> nleaves);
~trieNode();
};
class Trie
{
public:
trieNode root;
Trie();
~Trie();
std::set<Unistring> searchWordsFromTrie(Unistring pattern);
std::set<Unistring> searchWords(trieNode node, Unistring pattern, int curIndex);
void findRemainingNodes(std::set<Unistring> *wordSet, trieNode node);
};
std::set<Unistring> Trie::searchWordsFromTrie(Unistring pattern)
{
std::set<Unistring> finalset = searchWords(root, pattern, 0);
}
void Trie::findRemainingNodes(std::set<Unistring> *wordSet, trieNode node)
{
if (node.isFinal)
{
wordSet->insert(*node.data);
} else
{
for (auto i:node.leaves)
findRemainingNodes(wordSet, i.second);
}
}
std::set<Unistring> Trie::searchWords(trieNode node, Unistring pattern, int curIndex)
{
std::cout << "searchWordsCall with curIndex " << curIndex << "\n";
//или node.data is Sentinel? лучше так потому что когда я спличу вершины я не слежу чтобы они были финальные
if ((*node.data)[0] > KSA_SENTINEL && (curIndex == (pattern.size() - 1) || pattern[curIndex] == unichar('*')))
{
std::set<Unistring> wordSet;
wordSet.insert(*node.data);
return wordSet;
}
if ((*node.data)[0] <= KSA_SENTINEL && (curIndex == (pattern.size() - 1) || pattern[curIndex] == unichar('*')))
{
//run recirsive walking and return all nodes
std::set<Unistring> wordSet;
findRemainingNodes(&wordSet, node);
return wordSet;
}
std::set<Unistring> wordSet;
unichar curChar = pattern[curIndex];
if (curChar == unichar('*'))
{
wordSet=insertSetIntoSet(wordSet, searchWords(node, pattern, curIndex + 1)); //звезда пустой символ
//или запускаю поиск от этой же вершины с обрезанной буквой
Unistring *newData = node.data->substr(1, node.data->size());
std::map<unichar, trieNode> newleaves = node.leaves;
trieNode nextNode = trieNode(*newData, newleaves);
wordSet=insertSetIntoSet(wordSet, searchWords(node, pattern, curIndex));
//что делать если все буквы обрезали
if (node.data->size() == 0)
{
for(auto i:node.leaves)
{
wordSet=insertSetIntoSet(wordSet, searchWords(i.second, pattern, curIndex));
}
}
return wordSet;
} else
{
int commonPrefixsize = commonPrefix(*node.data, pattern, curIndex);
//Unistring commonPrefix = node.data.substring(0, commonPrefixsize);
trieNode nextNode;
if (commonPrefixsize == node.data->size())
{
//поиск от ребенка
nextNode = node.leaves[pattern[commonPrefixsize]];
} else
{
//сплитим ноду
Unistring *newData = node.data->substr(commonPrefixsize, node.data->size());
std::map<unichar, trieNode> newleaves = node.leaves;
nextNode = trieNode(*newData, newleaves);
}
wordSet=insertSetIntoSet(wordSet, searchWords(nextNode, pattern, curIndex + commonPrefixsize));
return wordSet;
}
}
void test()
{
/*std::cout<<"start test stdStrToUnistring\n";
std::string pattern;
std::cin >> pattern;
std::cout<<pattern<<"\n";
Unistring patternunistring = stdStrToUnistring(&pattern);
patternunistring.print();
std::cout<<"\n";
std::cout<<"start test starCharToUnistring\n";
char str[] = {'a', 'b', 'r', 'a'};
std::cout<<str<<"\n";
Unistring strunistring = starCharToUnistring(str);
strunistring.print();
std::cout<<"\n";
std::cout<<"start test commonPrefixsize\n";
std::string data, pattern;
int curIndex;
std::cin >> data;
std::cin >> pattern;
std::cin >> curIndex;
int prefixsize = commonPrefix(stdStrToUnistring(&data), stdStrToUnistring(&pattern), curIndex);
std::cout << prefixsize;*/
}
int main(int argc, const char * argv[])
{
//test();
return 0;
}