def reverse_complement seq out_seq complimetary_dict for nucl in seq -

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def reverse_complement(seq):
out_seq = ''
complimetary_dict = {'A':'T', 'T':'A', 'G':'C', 'C':'G'}
for nucl in seq[::-1]:
out_seq += complimetary_dict[nucl]
return out_seq
def humming_distance(seq1, seq2):
h_distance = 0
for index, nucl in enumerate(seq1):
if nucl != seq2[index]:
h_distance += 1
return h_distance
def k_mers_in_clump(seq, kmer):
kmer_len = len(kmer)
kmer_count = 0
for index, nucl in enumerate(seq[:-kmer_len + 1]):
if seq[index:index + kmer_len] == kmer:
kmer_count += 1
return kmer_count
def combinations(alphabet):
comb_list = []
n = len(alphabet)
a_dict = {}
for index, char in enumerate(alphabet):
a_dict[index] = char
for x in xrange(n):
for y in xrange(n):
for z in xrange(n):
comb_list.append(''.join([a_dict[x],a_dict[y],a_dict[z]]))
return comb_list
def filter_by_hemming(comb_list, pattern, distance):
f_list = []
for seq in comb_list:
if humming_distance(seq, pattern) <= distance:
f_list.append(seq)
return f_list
def skew(seq):
min_value = [0,0]
for index, nuck in enumerate(seq):
skew_value = gc_calc(seq[:index+1])
print seq[:index+1], skew_value
if min_value[1] < skew_value:
min_value = (index, skew_value)
return min_value
def gc_calc(seq):
g_num = 0
c_num = 0
for nucl in seq:
if nucl == 'G':
g_num += 1
elif nucl == 'C':
c_num += 1
return g_num - c_num
def find_kmer_with_mismatches(seq, kmer, distance):
kmer_count = 0
valid_kmers = []
vaild_kmers = filter_by_hemming(combinations(['A','C','G','T']), kmer, distance)
for v_kmer in vaild_kmers:
kmer_count += k_mers_in_clump(seq, v_kmer)
return kmer_count