import sys
import os.path
import re
import ctags
import itertools
import clang.cindex
from utils import read_code, find_all_files
from config import BOOST_TAGS, INCLUDE_DIRS, BOOST_DIR
def extract_symbols_simple(filename):
data = read_code(filename, False)
res = []
for name in re.findall('(?:[A-Za-z]+::)+[A-Za-z]+', data):
results = parse_name(name, tags)
if results:
res += results
return res
boost_tags = ctags.CTags(BOOST_TAGS)
def parse_name(full_symbol_name, tags):
log = False
if full_symbol_name == 'boost::unordered_map':
log = True
entry = ctags.TagEntry()
parts = full_symbol_name.split('::')
parts.reverse()
res = []
bad_results = []
for i, part in enumerate(parts):
if tags.find(entry, part, ctags.TAG_FULLMATCH) == 1 and i < (len(parts) - 1):
if entry['namespace'] and entry['namespace'].endswith("::".join(parts[i+1:][::-1])):
if entry['kind'] in ['c', 'f']:
res.append(entry['file'])
else:
bad_results.append({'namespace': entry['namespace'], 'kind': entry['kind'], 'file': entry['file']})
else:
bad_results.append({'namespace': entry['namespace'], 'kind': entry['kind'], 'file': entry['file']})
while tags.findNext(entry):
if (i < len(parts) - 1):
if entry['namespace'] and entry['namespace'].endswith("::".join(parts[i+1:][::-1])):
if entry['kind'] in ['c', 'f']:
res.append(entry['file'])
else:
bad_results.append({'namespace': entry['namespace'], 'kind': entry['kind'], 'file': entry['file']})
else:
bad_results.append({'namespace': entry['namespace'], 'kind': entry['kind'], 'file': entry['file']})
if not res and bad_results:
res.append(bad_results[0]['file'])
return res
def process_item(x):
if not tags:
return "#include \"" + x.lower().replace('::', "/") + ".h\""
parts = x.split('::')
if len(parts) == 3:
if parts[2] in tags:
return "#include \"" + x.lower().replace('::', "/") + ".h\""
else:
if parts[1] in tags:
return '#include "' + parts[0].lower() + "/" + parts[1].lower() + '.h"'
else:
return None
else:
if parts[1] in tags:
return "#include \"" + x.lower().replace('::', "/") + ".h\""
elif parts[0] in tags:
return "#include \"" + parts[0].lower() + ".h\""
else:
return None
db = {}
rdb = {}
def process_header(filename):
includes = re.findall("#include\s+\<(boost/[a-z0-9_/]+.hpp)\>", read_code(filename, False))
include_name = filename.replace(BOOST_DIR, '')
db[include_name] = includes
for item in includes:
if item in rdb:
if include_name not in rdb[item]:
rdb[item].append(include_name)
else:
rdb[item] = [ include_name ]
for dirpath, dnames, fnames in os.walk(BOOST_DIR):
for f in fnames:
if f.endswith(".hpp"):
process_header(os.path.join(dirpath, f))
def extract_boost_symbols(filename):
data = read_code(filename)
res = {}
for name in re.findall('boost(?:::[a-z][a-z0-9_]+)+', data):
results = parse_name(name, boost_tags)
if results:
if name not in res:
res[name] = []
res[name] += results
return res
def get_parent_include(include):
def work(include, depth, path):
if include in path:
return [ (include, depth) ]
path.append(include)
res = [ (include, depth) ]
if include not in rdb or not rdb[include]:
return res
for parent in rdb[include]:
if parent in res:
continue
if parent not in rdb or not rdb[parent]:
res.append([parent, depth])
else:
res += work(parent, depth + 1, path)
return res
return work(include, 0, [])
def get_child_include(include):
def work(include, depth, path):
if include in path:
return [ (include, depth) ]
path.append(include)
res = [ (include, depth) ]
if include not in db or not db[include]:
return res
for parent in db[include]:
if parent in res:
continue
if parent not in db or not db[parent]:
res.append([parent, depth])
else:
res += work(parent, depth + 1, path)
return res
return work(include, 0, [])
def get_boost_includes(filename):
symbols = extract_boost_symbols(filename)
res = {}
for symbol in symbols:
if symbol.startswith('boost::atomic'):
res[symbol] = 'boost/atomic.hpp'
continue
if symbol.startswith('boost::mutex'):
res[symbol] = 'boost/thread.hpp'
continue
if symbol.count('enable_shared_from_this'):
res[symbol] = 'boost/enable_shared_from_this.hpp'
continue
if symbol.count('noncopyable'):
res[symbol] = 'boost/noncopyable.hpp'
continue
if os.path.isfile(BOOST_DIR + '/'.join(symbol.split('::')) + '.hpp'):
res[symbol] = '/'.join(symbol.split('::')) + '.hpp'
continue
results = []
for path in list(set(symbols[symbol])):
rank = 0
parents = [x[0] for x in get_parent_include(path)]
children = [x[0] for x in get_child_include(path)]
for child in children:
if child in parents:
rank += 1
results.append([path, rank])
results.sort(key=lambda x: x[1])
if results:
res[symbol] = results[0][0]
else:
print "not found:", symbol
includes_tmp = list(set(res.values()))
if read_code(filename).find('BOOST_FOREACH') != -1:
includes_tmp.append('boost/foreach.hpp')
includes = []
for x in includes_tmp:
if x == 'boost/exception.hpp':
x = 'boost/exception/all.hpp'
if x.startswith('boost/asio') and not x.endswith('error_code.hpp') and not '/ssl/' in x:
x = 'boost/asio.hpp'
if x.startswith('boost/asio/ssl'):
x = 'boost/asio/ssl.hpp'
if x.count("_ptr") and not x.count('weak_'):
x = 'boost/shared_ptr.hpp'
if x.count("/thread/") and x.endswith('mutex.hpp'):
x = 'boost/thread.hpp'
includes.append(x)
removed = []
for include1 in includes:
for include2 in includes:
if 'enable_shared_from_this' in include2:
continue
if 'shared_ptr' in include2:
continue
if include1 == include2:
continue
if include2 in [ x[0] for x in get_child_include(include1) if x[1] < 2 ]:
removed.append(include2)
res = []
for x in sorted(list(set(includes))):
if x not in removed:
res.append('#include <' + x + '>')
return res
def show_local_includes(filename):
paths = extract_symbols_simple(filename)
includes = []
for path in paths:
if path.endswith('.cpp'):
continue
found = False
for include_dir in INCLUDE_DIRS:
if path.startswith(include_dir):
includes.append(path.replace(include_dir + "/", ""))
found = True
break
if not found:
includes.append(path)
includes = list(set(includes))
includes.sort()
for item in includes:
print '#include "%s"' % item
if __name__ == "__main__":
filename = sys.argv[1]
#show_local_includes(filename)
print
show_boost_includes(filename)