#!/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2009 Alexander Lopatin
# This program is released under the terms of the
# GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007
import urllib2
import urllib
import re
import sys
import os
USER_AGENT = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.1) \
Gecko/20090715 Firefox/3.5.1"
HEADERS = {
'User-agent': USER_AGENT,
# 'Referer': 'http://vkontakte.ru/gsearch.php?section=audio',
}
direct_urls = []
def process_args(argv):
urls = []; opts = ""
for i in argv:
try:
if i[:7] == "http://":
urls.append(urllib.quote(i))
elif i[0] == '-':
opts += i + ' '
except Exception, text:
print("Error: %s" % text)
return urls, opts
def get_direct_urls(url):
direct_urls = []; s = ""
print("getting direct links..."),
page = urllib2.urlopen(urllib2.Request(
"http://cs.videosaver.ru/get/?url=%s&target=form" % url,
None, HEADERS)).read()
for i in page.split("\n"):
if i.find("<option value=") != -1 and i.find("title=") != -1:
s = i.split("<option value=\"")
break
if len(s) == 0:# and page.find("<a id=\"rlink\" href=\"") != -1:
direct_urls.append((
re.search(r'<a id="rlink" href="(.*?)" target="_top">', page)
.group(1),
url.split("/").pop() + ".flv"))
else:
s.pop(0)
for i in s:
direct_urls.append(
re.search(r'(^http.*?)" title="(.*?)"', i).groups())
print ("ok")
return direct_urls
def ask_download_list(direct_urls):
numi = []; k = 0
for i in direct_urls:
k += 1
print("%d. \"%s\"" % (k, i[1]))
if len(direct_urls) == 1:
numi = [1]
else:
input_success = False
while not input_success:
try:
nums = raw_input("download file number(s): ").split(",")
for i in nums:
try:
numi.append(int(i))
except ValueError:
i = i.split("-")
for j in xrange(int(i[0]), int(i[1])+1):
numi.append(j)
for i in numi:
if i > len(direct_urls):
raise IndexError, "%d is out of range" % i
input_success = True
except ValueError, text:
print("input error:", text)
except IndexError, text:
print("input error:", text)
return numi
def download_file(url, filename, opts, num, attempt=1):
global direct_urls
if filename.find(".") == -1:
filename += ".flv"
command = "wget '%s' -O '%s' %s" % (url, filename, opts)
#print(command)
wget_status = os.system(command)
if wget_status == 2:
raise KeyboardInterrupt
elif wget_status == 1: # 404 Not found
if attempt < 3:
direct_urls = get_direct_urls(i)
url = direct_urls[num-1][0]
filename = direct_urls[num-1][1]
attempt += 1
download_file(url, filename, opts, num, attempt)
def download_files(direct_urls, numi, opts):
for i in numi:
url = direct_urls[i-1][0]
filename = direct_urls[i-1][1]
download_file(url, filename, opts, i)
def main(argv=None):
global direct_urls
if (len(argv) < 2) or (argv[1][:7] != "http://"):
print("syntax: %s http://url1 [http://url2...] [wget options]" %
argv[0])
return 1
try:
urls, opts = process_args(argv)
for i in urls:
try:
direct_urls = get_direct_urls(i)
numi = ask_download_list(direct_urls)
download_files(direct_urls, numi, opts)
except urllib2.URLError, text:
print("URLError: %s", text)
except urllib2.HTTPError, text:
print("HTTPError: %s", text)
except AttributeError, text:
print("Cannot fetch link: %s" % text)
except KeyboardInterrupt:
print("Interrupted")
return 2
except EOFError:
return 2
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))