def reverse_complement(seq):
out_seq = ''
complimetary_dict = {'A':'T', 'T':'A', 'G':'C', 'C':'G'}
for nucl in seq[::-1]:
out_seq += complimetary_dict[nucl]
return out_seq
def humming_distance(seq1, seq2):
h_distance = 0
for index, nucl in enumerate(seq1):
if nucl != seq2[index]:
h_distance += 1
return h_distance
def k_mers_in_clump(seq, kmer):
kmer_len = len(kmer)
kmer_count = 0
for index, nucl in enumerate(seq[:-kmer_len + 1]):
if seq[index:index + kmer_len] == kmer:
kmer_count += 1
return kmer_count
def combinations(alphabet):
comb_list = []
n = len(alphabet)
a_dict = {}
for index, char in enumerate(alphabet):
a_dict[index] = char
for x in xrange(n):
for y in xrange(n):
for z in xrange(n):
comb_list.append(''.join([a_dict[x],a_dict[y],a_dict[z]]))
return comb_list
def filter_by_hemming(comb_list, pattern, distance):
f_list = []
for seq in comb_list:
if humming_distance(seq, pattern) <= distance:
f_list.append(seq)
return f_list
def skew(seq):
min_value = [0,0]
for index, nuck in enumerate(seq):
skew_value = gc_calc(seq[:index+1])
print seq[:index+1], skew_value
if min_value[1] < skew_value:
min_value = (index, skew_value)
return min_value
def gc_calc(seq):
g_num = 0
c_num = 0
for nucl in seq:
if nucl == 'G':
g_num += 1
elif nucl == 'C':
c_num += 1
return g_num - c_num
def find_kmer_with_mismatches(seq, kmer, distance):
kmer_count = 0
valid_kmers = []
vaild_kmers = filter_by_hemming(combinations(['A','C','G','T']), kmer, distance)
for v_kmer in vaild_kmers:
kmer_count += k_mers_in_clump(seq, v_kmer)
return kmer_count