#!/usr/bin/python3
# coding: utf-8
# _________________________________________________
from PyQt5.QtWidgets import (QMainWindow, QLabel, QPushButton, QComboBox, QCheckBox, QRadioButton, QButtonGroup, QApplication)
from PyQt5.QtCore import Qt
# _________________________________________________
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
#
self.label__title = self.btn__select_query = self.label__filter = self.btn__toggle_filter = self.label__day = self.label__week = self.btn__toggle_day = self.btn__toggle_week = self.label__notime = self.btn__toggle_notime = self.button_group = self.btn__start_search = self.label__info = None
#
self.text_query = self.google_query = self.filters = self.list_found_blocks = self.file_html = None
self.filter_state = 1
#
self.queries = queries
self.query_list = list(self.queries.keys())
self.query_list.insert(0, '.')
self.query_list = sorted(self.query_list)
#
self.init_ui()
def init_ui(self):
self.label__title = QLabel('Google', self)
self.label__title.setObjectName('label__title')
#
self.btn__select_query = QComboBox(self)
self.btn__select_query.setObjectName('btn__select_query')
self.btn__select_query.addItems(self.query_list)
self.btn__select_query.activated[str].connect(self.select)
self.btn__select_query.move(15, 60)
#
self.label__filter = QLabel('фильтры', self)
self.label__filter.setObjectName('label__filter')
self.label__filter.move(15, 100)
#
self.btn__toggle_filter = QCheckBox(self)
self.btn__toggle_filter.setObjectName('btn__toggle_filter')
self.btn__toggle_filter.toggle()
self.btn__toggle_filter.stateChanged.connect(self.toggle_filters)
self.btn__toggle_filter.move(80, 100)
#
self.label__day = QLabel('сутки ', self)
self.label__day.setObjectName('label__day')
self.label__day.move(150, 100)
#
self.label__week = QLabel('неделя ', self)
self.label__week.setObjectName('label__week')
self.label__week.move(200, 100)
#
self.label__notime = QLabel('нет ', self)
self.label__notime.setObjectName('label__notime')
self.label__notime.move(270, 100)
#
self.btn__toggle_day = QRadioButton(self)
self.btn__toggle_day.setObjectName('btn__toggle_day')
self.btn__toggle_day.move(160, 120)
#
self.btn__toggle_week = QRadioButton(self)
self.btn__toggle_week.setObjectName('btn__toggle_week')
self.btn__toggle_week.move(215, 120)
#
self.btn__toggle_notime = QRadioButton(self)
self.btn__toggle_notime.setObjectName('btn__toggle_notime')
self.btn__toggle_notime.setChecked(True)
self.btn__toggle_notime.move(273, 120)
#
self.button_group = QButtonGroup()
self.button_group.addButton(self.btn__toggle_day)
self.button_group.addButton(self.btn__toggle_week)
self.button_group.addButton(self.btn__toggle_notime)
#
self.button_group.buttonClicked.connect(self.toggle_time)
#
self.btn__start_search = QPushButton('искать', self)
self.btn__start_search.setObjectName('btn__start_search')
self.btn__start_search.clicked.connect(self.start_search)
self.btn__start_search.move(15, 160)
#
self.label__info = QLabel('', self)
self.label__info.setObjectName('label__info')
self.label__info.move(15, 210)
#
with open(qss_file, "r") as styles:
self.setStyleSheet(styles.read())
#
self.setWindowTitle('Google')
self.show()
def select(self, text):
#
if self.sender().objectName() == 'btn__select_query':
self.label__info.setText('')
self.text_query = text
if self.text_query and self.text_query != '.':
num_filters = self.queries[self.text_query]
self.filters = domains_flt[num_filters] + words_flt[num_filters]
#
self.google_query = 'https://www.google.com.ua/search?q=' + self.text_query.replace(' ', '+')
# &tbs=qdr:d # за день
# &tbs=qdr:w # за неделю
def toggle_filters(self, state):
self.label__info.setText('')
if state == Qt.Checked:
self.filter_state = 1
else:
self.filter_state = 0
def toggle_time(self, button):
#
sender = button.objectName()
if sender == 'btn__toggle_day':
self.google_query = str(self.google_query) + '&tbs=qdr:d'
elif sender == 'btn__toggle_week':
self.google_query = str(self.google_query) + '&tbs=qdr:w'
elif sender == 'btn__toggle_notime':
self.google_query = str(self.google_query)
#
def start_search(self):
filter_state = self.filter_state
filters = self.filters
query = self.text_query
# найденные по запросу блоки результатов поиска
self.list_found_blocks = []
#
self.label__info.setText('')
#
if query and query != '.':
#
start = len_res = 0
#
while start < pagination:
# получаем содержимое страницы
page = requests.get(self.google_query + '&start=' + str(start), headers=header)
# преобразуем код страницы в BS4
soup = BeautifulSoup(page.text, 'lxml')
#
all_search = soup.find('div', id='search')
if not all_search:
self.label__info.setText('не найден div c id=search')
#
all_blocks = all_search.find_all('div', class_='g')
#
if all_blocks:
for block in all_blocks:
#
if filter_state is 1:
is_flt = ['0']
for flt in filters:
if re.search(str(flt), str(block)):
is_flt.append(re.search(str(flt), str(block)))
#
if len(is_flt) is 1:
# список блоков результата поиска
self.list_found_blocks.append(block)
len_res += 1
else:
# список блоков результата поиска
self.list_found_blocks.append(block)
len_res += 1
else:
self.label__info.setText('не найден div c class=g')
#
start += 10
#
self.label__info.setText('результат поиска: ' + str(len_res))
self.create_html()
#
else:
self.label__info.setText('выбрать запрос')
# форматирование страницы результатов
def format_html(self):
full_text = ''
# перебираем список блоков результата поиска
for found_block in self.list_found_blocks:
full_text = full_text + str(found_block)
#
html = '<!DOCTYPE html><html lang="ru"><head><meta charset="utf-8" /><meta http-equiv="Cache-Control" content="no-cache"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Google</title><link rel="stylesheet" type="text/css" href="app.css"><script src="jquery-3.3.1.min.js"></script><script src="app.js"></script></head><body><div class="result_search">. . .</div><div class="search">' + full_text + '</div></body></html>'
#
self.file_html.writelines(html)
self.file_html.close()
# создание страницы результатов
def create_html(self):
try:
open(files['out_html'])
except IOError as e:
file = open(files['out_html'], 'w')
file.close()
with open(files['out_html'], 'w') as self.file_html:
self.format_html()
else:
with open(files['out_html'], 'w') as self.file_html:
self.format_html()
if __name__ == '__main__':
import sys
import os
import re
import json
from fake_useragent import UserAgent
import requests
from bs4 import BeautifulSoup
import variables
#
qss_file = os.path.relpath('styles.qss')
# рабочая папка
os.chdir(variables.dirs['google'])
#
icons = variables.icons['google']
# переменные проэкта
vars = variables.var_google
# файлы проэкта
files = vars['files']
# список запросов
queries = vars['queries']
# количество страниц пагинации
pagination = vars['pagination']
# файл фильтра доменов
with open(files['domains']) as f:
domains_flt = json.load(f)
# файл фильтра слов
with open(files['words']) as f:
words_flt = json.load(f)
#
ua = UserAgent()
header = {'User-Agent': str(ua.chrome)}
#
app = QApplication(sys.argv)
window = MainWindow()
#
sys.exit(app.exec_())
# _________________________________________________