import rdflib
from rdflib import Graph, URIRef
from rdflib.namespace import RDF
from SPARQLWrapper import SPARQLWrapper, JSON
import psycopg2
import datetime
import threading
import time
threadLimiter = threading.BoundedSemaphore(50)
today_date = datetime.date.today()
host = "localhost"
sparqlHost = "http://localhost:8890/sparql"
counter = 0
uri = u'http://dbpedia.org/resource/Yugoslavia'
class myThread (threading.Thread):
def __init__(self, follower_id):
threading.Thread.__init__(self)
self.name = follower_id
def run(self):
threadLimiter.acquire()
try:
parseURI(self.name)
finally:
threadLimiter.release()
def parseURI(uri):
global maxNum
global today_date
global counter
try:
conn = psycopg2.connect("dbname=postgres user=postgres password=dba host=" +host)
cur = conn.cursor()
except:
conn = psycopg2.connect("dbname=postgres user=postgres password=dba host=" +host)
cur = conn.cursor()
g=rdflib.Graph()
lst=[]
if("dbpedia" not in uri):
try:
cur.execute("update uri set updated = %s where uri = %s ",('TRUE',uri))
conn.commit()
except:
cur.execute("update uri set updated = %s where uri = %s ",('TRUE',uri))
conn.commit()
cur.close()
conn.close()
return;
try:
g.parse(uri)
except:
#print "except: " + uri
try:
cur.execute("update uri set updated = %s where uri = %s ",('TRUE',uri))
conn.commit()
except:
cur.execute("update uri set updated = %s where uri = %s ",('TRUE',uri))
conn.commit()
cur.close()
conn.close()
return
for s,p,o in g:
a = o.encode('utf-8')
lst.append(a)
sparql = SPARQLWrapper("http://162.243.45.202:8890/sparql")
sparql.setQuery("""
PREFIX foaf:<http://xmlns.com/foaf/spec/#term_>
INSERT DATA
{
GRAPH <"""+uri+""">
{
<"""+s+"""> <"""+p+"""> <"""+o.encode('utf-8')+""">
}
}
""")
sparql.setReturnFormat(JSON)
sparql.method = 'POST'
results = sparql.query().convert()
print "Graph updated ",uri
var = ""
unique = set(lst)
for i in unique:
var = i
if("http://" in i):
try:
cur.execute("INSERT INTO uri (uri, updated_date, updated, graph_updated ) VALUES (%s, %s, %s, %s)",(i, today_date, 'FALSE', 'FALSE'))
conn.commit()
counter += 1
thread1 = myThread(i)
thread1.start()
print "counter: " + str(counter)
except:
counter = counter
#print "already exist " + uri
cur.close()
conn.close()
try:
conn = psycopg2.connect("dbname=postgres user=postgres password=dba host=" +host)
cur = conn.cursor()
except:
conn = psycopg2.connect("dbname=postgres user=postgres password=dba host=" +host)
cur = conn.cursor()
try:
cur.execute("update uri set updated = %s where uri = %s ",('TRUE',uri))
conn.commit()
except:
cur.execute("update uri set updated = %s where uri = %s ",('TRUE',uri))
conn.commit()
cur.close()
conn.close()
maxNum = 0
def begin():
global maxNum
conn = psycopg2.connect("dbname=postgres user=postgres password=dba host=" +host)
cur = conn.cursor()
# Oleg
#cur.execute("select count(uri) from uri where updated = 'FALSE' and id in (select id from uri where id <= (select max(id)-count(id)/10 from uri));")
#maxNum = cur.fetchone()[0]
#print str(maxNum) +" max number"
#cur.execute("select * from uri where updated = 'FALSE' and id in (select id from uri where id <= (select max(id)-count(id)/10 from uri));")
# Serik
#cur.execute("select count(uri) from uri where updated = 'FALSE' and id in (select id from uri where id < (select max(id)-count(id)/5000 from uri));")
#cur.execute("select count(uri) from uri where updated = 'FALSE' and id in (select id from uri where id > (select max(id)-40000 from uri));")
#cur.execute("select count(uri) from uri where updated = 'FALSE' and id in (select id from uri where id > 15000);")
#cur.execute("select count(uri) from uri where updated = 'FALSE' and id > 30000;")
cur.execute("select count(uri) from uri where updated = 'FALSE';")
maxNum = cur.fetchone()[0]
print str(maxNum) +" max number"
cur.execute("select * from uri where updated = 'FALSE';")
#cur.execute("select * from uri where updated = 'FALSE' and id in (select id from uri where id > (select max(id)-40000 from uri));")
#cur.execute("select * from uri where updated = 'FALSE' and id in (select id from uri where id > 15000);")
#cur.execute("select * from uri where updated = 'FALSE' and id > 30000;")
rows = cur.fetchall()
for row in rows:
cUri = row[1]
thread1 = myThread(cUri)
thread1.start()
conn.commit()
cur.close()
conn.close()
begin()