тоже FASTA

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#include <map>
#include <time.h>
#include <string>
#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <sqlite3.h>
#define DB_CONFIG_FILE_NAME "DB_config.txt"
#define DB_NAME "FASTA.db"
static int callback(void *NotUsed, int argc, char **argv, char **azColName){
for(int i=0; i<argc; i++){
printf("%s = %s\n", azColName[i], argv[i] ? argv[i] : "NULL");
}
printf("\n");
return 0;
}
void InsertDB(char* file_name, int magic_num) {
//open DB
sqlite3 *db;
int rc = sqlite3_open(DB_NAME, &db);
if( rc ){
printf("Can't open database: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
return;
}
//open FileName
std::ifstream input_seq(file_name);
char c;
std::string name = "";
std::string seq = "";
//insert input sequences
while (input_seq.get(c)) {
switch (c) {
case '>':
if (seq.length() && name.length()) {
//insert seq
sqlite3_stmt *stmt;
const char *pzTest;
std::string insert("INSERT INTO Sequence (description, string) VALUES(?, ?)");
rc = sqlite3_prepare_v2(db, insert.c_str(), insert.length(), &stmt, &pzTest);
if( rc == SQLITE_OK ){
sqlite3_bind_text(stmt, 1, name.c_str(), name.length(), 0);
sqlite3_bind_text(stmt, 2, seq.c_str(), seq.length(), 0);
sqlite3_step(stmt);
sqlite3_finalize(stmt);
} else { printf("Can't prepare sqlite3_prepare_v2 'INSERT INTO Sequence (description, string) VALUES(%s, %s)'\n", name.c_str(), seq.c_str()); }
//inser subseq
int mainID = sqlite3_last_insert_rowid(db);
for (int i = 0; i < seq.length() - magic_num + 1; i++) {
insert = "INSERT INTO SubSequence (string, position, baseString) VALUES(?, ?, ?)";
rc = sqlite3_prepare_v2(db, insert.c_str(), insert.length(), &stmt, &pzTest);
if( rc == SQLITE_OK ){
sqlite3_bind_text(stmt, 1, seq.substr(i, magic_num).c_str(), magic_num, 0);
sqlite3_bind_int(stmt, 2, i);
sqlite3_bind_int(stmt, 3, mainID);
sqlite3_step(stmt);
sqlite3_finalize(stmt);
} else { printf("Can't prepare sqlite3_prepare_v2 'INSERT INTO SubSequence (string, position, baseString) VALUES(%s, %d, %d)'\n", seq.substr(i, magic_num).c_str(), i, mainID); }
}
}
std::getline(input_seq, name);
seq = "";
break;
case '\t':
case '\n':
case '\r':
case ' ':
//skip ws
break;
default:
seq += c;
}
}
//close file
input_seq.close();
//close DB
sqlite3_close(db);
}
void CreateDB(char* file_name, char* magic_num) {
//adding sequences to DB
sqlite3 *db;
char *zErrMsg = 0;
int rc;
std::string dbName(DB_NAME);
std::ifstream fin("FASTAquery.sql");
std::string query;
std::getline(fin, query, '$');
rc = sqlite3_open(dbName.c_str(), &db);
if( rc ){
printf("Can't open database: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
return;
}
rc = sqlite3_exec(db, query.c_str(), callback, 0, &zErrMsg);
if( rc!=SQLITE_OK ){
printf("SQL error: %s\n", zErrMsg);
sqlite3_free(zErrMsg);
sqlite3_close(db);
return;
}
sqlite3_close(db);
InsertDB(file_name, atoi(magic_num));
//creating DB config file
std::ofstream conf_file(DB_CONFIG_FILE_NAME);
time_t rawtime;
struct tm * timeinfo;
time(&rawtime);
timeinfo = localtime(&rawtime);
conf_file << magic_num << '\n';
conf_file << "Creation time: " << asctime(timeinfo);
conf_file.close();
}
void AddDB(char* file_name) {
//trying to open DB config file
std::ifstream conf_file(DB_CONFIG_FILE_NAME);
int magic_num = 0;
conf_file >> magic_num;
conf_file.close();
//adding sequences to DB
InsertDB(file_name, magic_num);
}
void Search(char* seq_file_name) {
//read input seq
std::ifstream input_seq(seq_file_name);
std::string seq = "";
std::getline(input_seq, seq);
input_seq.close();
//open DB config file
std::ifstream conf_file(DB_CONFIG_FILE_NAME);
int magic_num = 0;
conf_file >> magic_num;
conf_file.close();
//open DB
sqlite3 *db;
int rc = sqlite3_open(DB_NAME, &db);
if( rc ){
printf("Can't open database: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
return;
}
//for substr
//hashmap for all strings from base who is good
std::map<int, std::string> dump;
for (int i = 0; i < seq.length() - magic_num + 1; i++) {
std::string substr = seq.substr(i, magic_num);
std::string select = "SELECT ID, string FROM sequence INNER JOIN (SELECT DISTINCT baseString FROM SubSequence WHERE string = ? ) AS keys ON sequence.id = keys.baseString";
sqlite3_stmt *stmt;
const char *pzTest;
rc = sqlite3_prepare_v2(db, select.c_str(), select.length(), &stmt, &pzTest);
if( rc == SQLITE_OK ) {
sqlite3_bind_text(stmt, 1, substr.c_str(), substr.length(), 0);
while (sqlite3_step(stmt) != SQLITE_DONE) {
int id_seq2 = sqlite3_column_int(stmt, 0);
std::string seq2((char*)sqlite3_column_text(stmt, 1));
dump.insert(std::pair<int, std::string>(id_seq2, seq2.c_str()));
}
sqlite3_finalize(stmt);
}
}
//filter
}
int main(int argc, char** argv) {
if (argc < 2) {
printf("Too few arguments\n");
printf("\tpossible arguments:\n");
printf("\t\tc <file name> <magic num> - create new DB from <file name> with <magic num>\n");
printf("\t\ta <file name> - add sequences from <file name> to exsisting DB\n");
printf("\t\ts <file name> - search seq from <file name> in exsisting DB\n");
return 0;
}
switch (*argv[1]) {
case 'c':
//create
if (argc > 3) CreateDB(argv[2], argv[3]);
else printf("Not enough arguments\n");
break;
case 'a':
//add
if (argc > 2) AddDB(argv[2]);
else printf("Not enough arguments\n");
break;
case 's':
//search
if (argc > 2) Search(argv[2]);
else printf("Not enough arguments\n");
break;
default:
printf("Unknown command %s\n", argv[1]);
}
return 0;
}