import requests, re, time
from bs4 import BeautifulSoup
import sqlite3
from datetime import datetime
now = datetime.now()
date_time = now.strftime("(%H-%M)%m-%d-%Y")
#print("Дата и время:", date_time)
bdy = 'BDY(23-32)04-12-2019.db'#'BDY' + date_time + '.db'
con = sqlite3.connect(bdy)
cur = con.cursor()
def create_table():
cur.execute('CREATE TABLE IF NOT EXISTS cpe(CPE_name TEXT, CVE_name TEXT)')
cur.execute('CREATE TABLE IF NOT EXISTS cve(CVE_name TEXT, Date1 TEXT, Description TEXT, CVSS_name TEXT, CVSS_description TEXT, CWE_name TEXT, Hyperlink TEXT)')
cur.execute('CREATE TABLE IF NOT EXISTS cwe(CWE_name TEXT, CAPEC_name TEXT )')
cur.execute('CREATE TABLE IF NOT EXISTS cwe_description(CWE_name TEXT, CWE_description TEXT, CWE_link TEXT)')
cur.execute('CREATE TABLE IF NOT EXISTS capec_description(CAPEC_name TEXT, CAPEC_description TEXT, CAPEC_link TEXT,id_ATT_CK TEXT, ATT_CK_name TEXT, ATT_CK_link TEXT)')
cur.execute('CREATE TABLE IF NOT EXISTS att_ck(id_att_ck TEXT, name_att_ck TEXT, description_att_ck TEXT,tactic TEXT, platform TEXT, permissions_required TEXT, effective_permissions TEXT, data_sources TEXT,defense_bypassed TEXT, version TEXT)')
def cpe_table_insert(cpe, cve):
con = sqlite3.connect(bdy)
cur = con.cursor()
cur.execute("INSERT INTO cpe (CPE_name, CVE_name) VALUES (?,?)",(cpe, cve))
con.commit()
def cwe_table_insert(cwe, capec):
conn = sqlite3.connect(bdy)
cur = conn.cursor()
cur.execute("INSERT INTO cwe (CWE_name , CAPEC_name ) VALUES (?,?)",(cwe, capec))
conn.commit()
def cve_insert_data(data_cve):
cve_name = data_cve[0]
date = data_cve[1]
Description = data_cve[2]
vector = data_cve[3]
svss_description = data_cve[4]
name_cwe_total = data_cve[5]
links_solutions_str = data_cve[6]
conn = sqlite3.connect(bdy)
cur = conn.cursor()
cur.execute("INSERT INTO cve (CVE_name, Date1, Description, CVSS_name, CVSS_description, CWE_name, Hyperlink) VALUES (?,?,?,?,?,?,?)",(cve_name, date, Description, vector, svss_description, name_cwe_total, links_solutions_str))
conn.commit()
def cwe_insert_data(cwe_n, cwe_d, cwe_l):
conn = sqlite3.connect(bdy)
cur = conn.cursor()
cur.execute("INSERT INTO cwe_description (CWE_name , CWE_description , CWE_link ) VALUES (?,?,?)",(cwe_n, cwe_d, cwe_l))
conn.commit()
def capec_insert_data(capec_n, capec_d, capec_l, id_ATT_CK, ATT_CK_name, ATT_CK_link):
conn = sqlite3.connect(bdy)
cur = conn.cursor()
cur.execute("INSERT INTO capec_description (CAPEC_name , CAPEC_description , CAPEC_link, id_ATT_CK, ATT_CK_name, ATT_CK_link) VALUES (?,?,?,?,?,?)",(capec_n, capec_d, capec_l, id_ATT_CK, ATT_CK_name, ATT_CK_link))
conn.commit()
def att_ck_insert_data(id_att_ck, name_att_ck, description_att_ck, tactic, platform, permissions_required, effective_permissions, data_sources,defense_bypassed, version):
conn = sqlite3.connect(bdy)
cur = conn.cursor()
cur.execute("INSERT INTO att_ck (id_att_ck , name_att_ck , description_att_ck, tactic, platform, permissions_required, effective_permissions, data_sources,defense_bypassed, version) VALUES (?,?,?,?,?,?,?,?,?,?)",(id_att_ck, name_att_ck, description_att_ck, tactic, platform, permissions_required, effective_permissions, data_sources,defense_bypassed, version))
conn.commit()
def comparison_cve(cve_name):
conn = sqlite3.connect(bdy)
cur = conn.cursor()
query = "SELECT * FROM cve WHERE CVE_name == " +'\"'+str(cve_name) + '\"' + ";"
query1 = "SELECT * FROM cve WHERE CVE_name == " +'\"'+str('CVE-2019-666') + '\"' + ";"
cur.execute(query)
data_res = cur.fetchall()
return data_res
def comparison_cwe(name_cwe):
conn = sqlite3.connect(bdy)
cur = conn.cursor()
query = "SELECT * FROM cwe WHERE CWE_name == " + '\"' + name_cwe + '\"' + ";"
query1 = "SELECT * FROM cwe WHERE CWE_name == " + '\"' + str('CWE-6572134') + '\"' + ";"
cur.execute(query)
cwe_res = cur.fetchall()
return cwe_res
def comparison_capec(name_capec):
conn = sqlite3.connect(bdy)
cur = conn.cursor()
query = "SELECT * FROM capec_description WHERE CAPEC_name == " + '\"' + name_capec + '\"' + ";"
query1 = "SELECT * FROM capec_description WHERE CAPEC_name == " + '\"' + str('CWE-6572134') + '\"' + ";"
cur.execute(query)
capec_res = cur.fetchall()
return capec_res
def get_html(url):
try:
r = requests.get(url).text
return r
except requests.exceptions.ConnectionError:
print('повтор', time.strftime("%Y-%m-%d-%H.%M.%S", time.localtime()))
time.sleep(10)
get_html(url)
def get_total_quantity_cpe(html):
soup = BeautifulSoup(html, 'lxml')
all_cpe = int(soup.find_all('strong')[1].next.replace(',',''))
return all_cpe
def get_cve_s_links_20(html):
soup = BeautifulSoup(html, 'lxml')
a_all=soup.find_all('a', class_='btn btn-sm')
links_cve20=[]
for link in a_all:
a = link.get('href')
link='https://nvd.nist.gov'+a
links_cve20.append(link)
return links_cve20
def get_name_cpe_20(html,url20):
try:
soup = BeautifulSoup(html, 'lxml')
table = soup.find('table', class_='table table-striped table-hover')
strongs = table.find_all('strong')
name_cpe_list_20 = []
for a in strongs:
name_cpe = a.find(text=re.compile('cpe'))
if (name_cpe!=None): name_cpe_list_20.append(name_cpe)
except:
name_cpe_list_20_html = get_html(url20)
if (name_cpe_list_20_html == None):
name_cpe_list_20_html = get_html(url20)
name_cpe_list_20 = get_name_cpe_20(name_cpe_list_20_html, url20)
return name_cpe_list_20
def get_links_cve_of_20cpe(name_cpe_list_20, links_cve_s_20):
name_cve_list = []
cve_links_list = []
i=0
for link in links_cve_s_20:
html = get_html(link)
if (html == None):
html = get_html(link)
soup = BeautifulSoup(html, 'lxml')
quantity = soup.find('div', class_='col-sm-12 col-lg-3').find('strong').next
print(quantity, "[", i, "]")
if (quantity=='0'):
name_cve_list.append('NONE')
cpe_table_insert(name_cpe_list_20[i], 'NONE')
i=i+1
print("Prosmotreno cpe: ", i, ' iz ', 20)
cpe_ = i
cpe = str("Prosmotreno cpe: " + str(cpe_) + ' iz ' + str(20) + 'cve: ' +str(quantity))
f = open('cpe.txt', 'a')
f.write(cpe+'\n')
f.close()
continue
quantity = re.sub(r',', '', quantity)
for q in range(0, int(quantity), 20):
link_all_cve = link + '&startIndex=' + str(q)
html_ = get_html(link_all_cve)
if (html_ == None):
html_ = get_html(link_all_cve)
soup = BeautifulSoup(html_, 'lxml')
table = soup.find('table', class_='table table-striped table-hover')
th = table.find_all('th', nowrap='nowrap')
for a in th:
st = str(a.find('a', href=re.compile('/v')))
if (st!='None'):
name_cve = st.split('>')[1].split('<')[0]
name_cve_list.append(name_cve)
cpe_table_insert(name_cpe_list_20[i], str(name_cve))
a = a.find('a')
if (str(a)!="None"):
cve_link=a.get('href')
cve_links_list.append('https://nvd.nist.gov'+cve_link)
i=i+1
print("Prosmotreno cpe: ", i, ' iz ', 20)
cpe_ = i
cpe = str("Prosmotreno cpe: " + str(cpe_) + ' iz ' + str(20) + 'cve: ' +str(quantity))
f = open('cpe.txt', 'a')
f.write(cpe+'\n')
f.close()
cve_links_list=list(set(cve_links_list))
#print(len(cve_links_list))
return cve_links_list
def get_data_cve(links_cve_of_20cpe):
links_cwe=[]
for link_cve in links_cve_of_20cpe:
cve_name_link = link_cve.split('/')[5]
data_res = comparison_cve(cve_name_link)
if (len(data_res) == 0):
html = get_html(link_cve)
if (html == None):
html = get_html(link_cve)
soup = BeautifulSoup(html, 'lxml')
div = soup.find('div', class_='col-lg-9 col-md-7 col-sm-12')
date = str('Date: ' + soup.find('span', {'data-testid': 'vuln-published-on'}).next)
# cve_name=str(soup.find('span', {'data-testid':'page-header-vuln-id'}).next)
cve_name = cve_name_link
#print(cve_name)
#print(date)
des = div.find('p', {'data-testid': 'vuln-description'}).text
#print(des)
base_score = div.find('a',
id='p_lt_WebPartZone1_zoneCenter_pageplaceholder_p_lt_WebPartZone1_zoneCenter_VulnerabilityDetail_VulnFormView_Cvss2CalculatorAnchor').text
#print(base_score)
des_full = '\n'.join([base_score, des])
vector = div.find('span', {'data-testid': 'vuln-cvss2-panel-vector'}).text
#print(vector)
table_links = div.find('table',
class_='table table-striped table-condensed table-bordered detail-table').find_all('a')
links_solutions = []
for a in table_links:
link_solution = 'Ссылка на решение: ' + a.get('href')
links_solutions.append(link_solution)
links_solutions_str = '\n'.join(links_solutions)
#print(links_solutions_str)
table_cwe = soup.find('table', {'data-testid': 'vuln-CWEs-table'})
cwe_id = table_cwe.find('td', {'data-testid': 'vuln-CWEs-link-0'}).text
cwe_name = table_cwe.find('td', {'data-testid': 'vuln-CWEs-link-0'}).next.next.next.next.text
full_name_cwe = cwe_name + '(' + cwe_id + ')'
#print(full_name_cwe)
link_cwe = table_cwe.find('td', {'data-testid': 'vuln-CWEs-link-0'}).find('a').get('href')
#print(link_cwe)
links_cwe.append(link_cwe)
svss_description = 'NONE'
data_cve = [cve_name, date, des_full, vector, svss_description, full_name_cwe, links_solutions_str]
cve_insert_data(data_cve)
links_cwe = list(set(links_cwe))
return links_cwe
def get_data_cwe(Links_cwe):
links_capec = []
names_capec = []
for link_cwe in Links_cwe:
name_cwe = 'CWE-'+str(link_cwe).split("/")[5].split('.')[0]
if (name_cwe=='CWE-CWE-noinfo' or name_cwe=='CWE-CWE-Other'): name_cwe = '-'.join(name_cwe.split('-')[1:])
cwe_res=comparison_cwe(name_cwe)
if(len(cwe_res)==0):
html = get_html(link_cwe)
if (html == None):
html = get_html(link_cwe)
soup = BeautifulSoup(html, 'lxml')
div = soup.find('div', id='Related_Attack_Patterns')
if (str(div)=='None'):
cwe_table_insert(name_cwe,'NONE')
div_discr = soup.find('div', id='Description')
if (str(div_discr)=='None'):
div_sum = soup.find('div', id='Summary')
if(str(div_sum)=='None'):
cwe_insert_data(name_cwe,'NULL',link_cwe)
continue
div_indent_sum = div_sum.find('div', class_='indent').next
cwe_insert_data(name_cwe,div_indent_sum, link_cwe)
continue
div_indent = div_discr.find('div', class_='indent').next
cwe_insert_data(name_cwe, div_indent, link_cwe)
continue
a_all=div.find_all('a', target='_blank', rel='noopener noreferrer')
for a in a_all:
name_capec=a.next
names_capec.append(name_capec)
link_capec=a.get('href')
links_capec.append(link_capec)
cwe_table_insert(name_cwe, name_capec)
div_discr = soup.find('div', id='Description')
if (str(div_discr) == 'None'):
div_sum = soup.find('div', id='Summary')
if (str(div_sum) == 'None'):
cwe_insert_data(name_cwe, 'NULL', link_cwe)
continue
div_indent_sum = div_sum.find('div', class_='indent').next
cwe_insert_data(name_cwe, div_indent_sum, link_cwe)
div_indent = div_discr.find('div', class_='indent').next
cwe_insert_data(name_cwe, div_indent, link_cwe)
links_capec=list(set(links_capec))
return links_capec
def get_data_capec(links_capec):
links_attack_all = []
links_attack = []
for link_capec in links_capec:
ids_ATT_CK = []
names_ATT_CK = []
name_capec = 'CAPEC-' + str(link_capec).split('/')[5].split('.')[0]
res_capec = comparison_capec(name_capec)
if (len(res_capec)==0):
html = get_html(link_capec)
if (html == None):
html = get_html(link_capec)
soup = BeautifulSoup(html, 'lxml')
div = soup.find('div', id = 'Description')
capec_discr = div.find('div', class_='indent').next
#print(name_capec, '\n', capec_discr, '\n', links_capec)
try:
div_attack = soup.find('div', class_='tax_title').parent.find_all('a')
except:
ids_ATT_CK.append('NONE')
names_ATT_CK.append('NONE')
capec_insert_data(name_capec, capec_discr, link_capec, str(ids_ATT_CK)[2:-2], str(names_ATT_CK)[2:-2], 'NONE')
continue
for a in div_attack:
link_attack = a.get('href')
links_attack.append(str(link_attack))
id = a.text
ids_ATT_CK.append(str(id))
name_ATT_CK = a.next.next.text
names_ATT_CK.append(str(name_ATT_CK))
for id, name, link in zip(ids_ATT_CK, names_ATT_CK, links_attack):
capec_insert_data(name_capec, capec_discr, link_capec, str('T'+id), str(name), str(link))
print(links_attack)
for i in links_attack:
links_attack_all.append(i)
links_attack.clear()
print('links', links_attack_all)
links_attack_all = list(set(links_attack_all))
return links_attack_all
def get_data_ATT_CK(links_ATT_CK):
for link in links_ATT_CK:
link = re.sub(r'Technique', 'techniques', link)
link = re.sub(r'wiki/', '', link)
link_html = get_html(link)
if (link_html == None):
link_html = get_html(link)
soup = BeautifulSoup(link_html, 'lxml')
div = soup.find('div', class_='container-fluid')
name_ATT_CK = str(div.find('h1').text).strip()
dis_ATT_CK = str(div.find('div', class_='col-md-8 description-body').text).strip()
id = link.split('/')[4]
spans_card = div.find('div', class_='card-body').find_all('span', class_='h5 card-title')
tactic = ''
platform = ''
permissions_required = ''
effective_permissions = ''
data_sources = ''
version = ''
defense_bypassed = ''
for span in spans_card:
span_str = str(span.text).strip()
if (span_str=='Tactic:'):
tactic = str(span.parent.text).strip()
tactic = re.sub(r',', ', ', re.sub(r'\n', ' ', re.sub(r' {20,}', '', tactic)))
continue
elif (span_str=='Platform:'):
platform = str(span.parent.text).strip()
continue
elif (span_str=='Permissions Required:'):
permissions_required = str(span.parent.text).strip()
continue
elif ( span_str=='Effective Permissions:'):
effective_permissions = str(span.parent.text).strip()
continue
elif (span_str=='Data Sources:'):
data_sources = str(span.parent.text).strip()
continue
elif (span_str=='Version:'):
version = str(span.parent.text).strip()
elif (span_str=='Defense Bypassed:'):
defense_bypassed = str(span.parent.text).strip()
continue
att_ck_insert_data(id, name_ATT_CK, dis_ATT_CK, tactic, platform, permissions_required, effective_permissions, data_sources,defense_bypassed, version)
def main():
create_table()
url = 'https://nvd.nist.gov/products/cpe/search/results?status=FINAL%2cDEPRECATED&orderBy=CPEURI&namingFormat=2.3'
all_cpe_html = get_html(url)
if (all_cpe_html == None):
all_cpe_html = get_html(url)
all_cpe = get_total_quantity_cpe(all_cpe_html)
print("Vsego cpe naideno: ", all_cpe)
for i in range(424715,all_cpe,20):
url20 = url + '&startIndex=' + str(i)
name_cpe_list_20_html = get_html(url20)
if (name_cpe_list_20_html == None):
name_cpe_list_20_html = get_html(url20)
name_cpe_list_20 = get_name_cpe_20(name_cpe_list_20_html,url20)
links_cve_s_20_html = get_html(url20)
if (links_cve_s_20_html == None):
links_cve_s_20_html = get_html(url20)
links_cve_s_20 = get_cve_s_links_20(links_cve_s_20_html)
links_cve_of_20cpe = get_links_cve_of_20cpe(name_cpe_list_20, links_cve_s_20)
Links_cwe = get_data_cve(links_cve_of_20cpe)
links_capec = get_data_cwe(Links_cwe)
links_ATT_CK = get_data_capec(links_capec)
get_data_ATT_CK(links_ATT_CK)
print("Prosmotreno cpe: ", i+20, ' iz ', all_cpe)
cpe_=i+20
cpe = str("Prosmotreno cpe: " + str(cpe_) +' iz ' + str(all_cpe))
f=open('cpe.txt', 'a')
f.write(cpe+'\n')
f.close()
#126489
if __name__ == '__main__':
main()