#-*- coding: utf8 -*-
import re
import urllib, urllib2
from hashlib import md5
from math import ceil
config = { 'email': 'a-2k@i.ua',
'id': '35904787',
'pass': 'gfhjkm' }
class LoginInfo:
def __init__(self, login, id, passwd):
self.login = login
self.id = id
self.passwd = passwd
self.cookies = 'remixmid=%s; remixemail=%s; remixpass=%s' % \
(id, login.replace('@','%40'), md5(passwd).hexdigest())
class Length(int):
def seconds(self):
return self
def minutes(self):
return self/60
def hours(self):
return (self/60)/60
def __str__(self):
return '%02d:%02d:%02d' % ( self.hours(),
self.minutes()-self.hours()*60,
self.seconds()-(self.hours()*60) -
(self.minutes()-self.hours()*60)*60 )
class Track:
def __init__(self, artist, title, url, length):
self.artist = artist
self.title = title
self.url = url
self.length = Length(int(length))
def __cmp__(self, other):
if type(self) == type(other):
if (self.artist == other.artist):
if (self.title==other.title):
if (self.length==other.length):
return 0
if self.length < other.length:
return -1
if self.length > other.length:
return 1
if self.title < other.title:
return -1
if self.title > other.title:
return 1
if self.artist < other.artist:
return -1
if self.artist > other.artist:
return 1
else:
if type(other) == int:
if self.length == other:
return 0
if self.length > other:
return 1
if self.length < other:
return -1
else:
raise TypeError()
def __str__(self):
return self.artist+' - '+self.title+' '+str(self.length)
class TrackList(list):
def __init__(self, unique_names = False, only_one_artist = '',\
only_one_track = ''):
self.unique_names = unique_names
self.only_one_artist = only_one_artist
self.only_one_track = only_one_track
super(TrackList, self).__init__(self)
def append(self, *args):
if isinstance(args[0], Track):
if args[0] not in self:
if self.unique_names:
for track in self:
if track.artist.upper()==args[0].artist.upper()\
and track.title.upper()==args[0].title.upper():
return
if track.length < args[0].length:
self.remove(track)
break
else:
return
if self.only_one_artist:
if args[0].artist != self.only_one_artist:
return
if self.only_one_track:
if args[0].title != self.only_one_track:
return
super(TrackList, self).append(args[0])
#self.__dict__.append(args[1])
else:
if len(args) == 4:
track = Track(args[0], args[1], args[2], args[3])
if track not in super(TrackList, self):
super(TrackList, self).append(track)
#self.__dict__.append(track)
else:
raise TypeError()
#class TrackInfoAgregator:
# last_page = 0
# total_tracks = 0
# request = ''
# last_page_text = ''
# def __init__(self):
# pass
# def load_page(self, page = self.last_page):
# pass
# def parse_page(self, text = self.last_page_text):
# pass
class Searcher:
def __init__(self, login, request, artist_only = False):
self.page_count = 0
self.last_page = -1
self.track_count = 0
self.unique_tracks = 0
self.login = login
self.pages = {}
self.request = str(request)
self.artist_only = artist_only
self.url = 'http://vkontakte.ru/audiosearch.php'
self.headers = {'User-agent': 'PLAYER',
'Cookie': self.login.cookies }
def load_base_page(self):
post = urllib.urlencode({ 'q': self.request })
req = urllib2.Request(self.url, post, self.headers)
urlreq = urllib2.urlopen(req)
pagetext = urlreq.read()
#print unicode(str(pagetext),'windows-1251')
rx = re.compile('<div class="summary">[^\d]*(\d+).*</')
for line in pagetext.split('\n'):
if '<div class="summary">' in line:
self.track_count = int(rx.findall(line)[0])
self.page_count = int(ceil(self.track_count/100.))
break
if self.last_page == -1:
self.last_page = 0
self.pages[0] = pagetext
#print pagetext
return pagetext
def load_next_page(self, page = None):
if page is None:
page = self.last_page + 1
if self.pages.has_key(page):
return pages[page]
post = urllib.urlencode({ 'q': self.request, 'st': page*100 })
req = urllib2.Request(self.url, post, self.headers)
urlreq = urllib2.urlopen(req)
pagetext = urlreq.read()
if page+1 == len(self.pages):
self.last_page += 1
else:
if page > len(self.pages):
last_page = page
self.pages[page] = pagetext
return pagetext
class SearchResultParser:
def __init__(self, login, request, artist_only = False, \
unique_names = False, only_one_artist = '', \
only_one_track = ''):
self.searcher = Searcher(login, request, artist_only)
self.tracks = TrackList(unique_names, only_one_artist,\
only_one_track)
self.last_page = -1
def parse_base_page(self):
self.searcher.load_base_page()
text = self.searcher.pages[0]
if self.last_page == -1:
self.last_page = 0
self._parse_page(text)
def parse_next_page(self, page = None):
if page is None:
page = self.last_page + 1
self.last_page += 1
else:
self.last_page = page
text = self.searcher.load_next_page(page)
self._parse_page(text)
def _parse_page(self, text):
rxa = re.compile('<b id="\w+\d+">([^<]*)</b>')
rxt = re.compile('<span id=\"[\w\d]*\">([^<]*)<')
artist = ''
title = ''
info = ''
for record in text.split('\n'):
if 'performer' in record:
artist = rxa.findall(record)[0]
title = rxt.findall(record)[0]
if 'operate' in record:
info = record[record.find('(')+1:record.find(')')]
info = info.split(',')
server = info[1]
user = info[2]
fname = info[3][1:-1]
length = info[4]
link = 'http://cs' + server + '.vkontakte.ru/u' + user\
+ '/audio/' + fname + '.mp3'
if artist and title and length and link:
self.tracks.append(Track(artist, title, link, length))
def get_tracks(*args):
"""Params:
email, id, password, status string"""
(email,id,passwd,target) = args
email = email.replace('@','%40');
passwd = md5(passwd).hexdigest()
url = 'http://vkontakte.ru/audiosearch.php'
cookie_data = 'remixmid=%s; remixemail=%s; remixpass=%s' % (id,
email, passwd)
headers = {'User-agent': 'vkpleer',
'Cookie': cookie_data,
}
post = urllib.urlencode({ 'q': target })
req = urllib2.Request(url, post, headers)
url = urllib2.urlopen(req)
page = url.read()
titles = []
tracks = []
count = 0
for record in page.split('\n'):
if 'performer' in record:
print record
performer = re.findall('<b id="\w+\d+">([^<]*)</b>',record)[0]
title=re.findall('<span id=\"[\w\d]*\">([^<]*)<',record)[0]
titles.append(title)
if 'operate' in record:
tracks.append(record[record.find('(')+1:record.find(')')])
if '<div class="summary">' in record:
count = int(re.findall('<div class="summary">[^\d]*(\d+).*</div>', record)[0])
pages = int(ceil(count/100.))
urls = []
for track in tracks:
info = track.split(',')
server = info[1]
user = info[2]
fname = info[3][1:-1]
length = info[4]
link = 'http://cs' + server + '.vkontakte.ru/u' + user \
+ '/audio/' + fname + '.mp3'
urls.append(link)
for i in range(len(titles)):
print titles[i], tracks[i], urls[i]
print count
print pages
#get_tracks(config['email'], config['id'], config['pass'], 'Nightwish')
import sys
def main():
login = LoginInfo('a-2k@i.ua','35904787', 'gfhjkm')
parser = SearchResultParser(login, sys.argv[1], unique_names = True, only_one_artist = sys.argv[1])
parser.parse_base_page()
# while len(parser.tracks) < 10:
# parser.parse_next_page()
for i in parser.tracks:
print unicode(str(i), 'windows-1251'), i.url
# parser.tracks = TrackList()
# parser.parse_next_page(10)
# for i in parser.tracks:
# print unicode(str(i), 'windows-1251')
#print searcher.track_count
main()
#[ x[x.find('('):x.find(')')] for x in l if 'operate' in x ]
#[x[86:].split(')')[0] for x in c.split('\n') if 'addAudio' in x ]
#titles = [re.findall('<span id=\"[\w\d]*\">([^<]*)<',q) for q in x.split('\n') if 'performer' in q ]
#(61100197,4236,3295868,'d51555b3138d',335);
#Что означают параметры ф-ции opearate:
#server=4236 — сервер (может видели что-то подобное cs4236.vkontakte.ru)
#user=3295868 — id юзера, у которого находится этот трек
#fname='d51555b3138d' — имя мр3-файла
#length=335 — длительность в секундах
#в итоге получаем ссылку на мр3-файл:
#'http://cs' + server + '.vkontakte.ru/u' + user + '/audio/' + fname + '.mp3';