import json from pprint import pprint import logging from collections import Counter from database import db """ Country: {u'data': {u'rules': [u'country'], u'showType': u'country', u'nodeType': u'name', u'id': u'1', u'value': u'Place of Origin'}} """ STAT = Counter() DUMP_LABEL = None DUMP_LABEL = 'Type' #def dump_partial(obj, ignore_keys=()): #obj2 = deepcopy(obj) #for key in ignore_keys: #if key in obj2: #del obj2[key] #pprint(obj2) def process_item(cat, item, position, parent=None): config = { 'cat_id': cat['_id'], 'id': None, 'type': None, 'label': None, 'default_value': None, 'options': [], # Non-primary attributes 'position': position, 'required': False, 'recommendation': False, 'interval': False, 'hidden': False, 'max_choices': None, # Trash 'node_type_name': False, } try: if DUMP_LABEL: if item['data'].get('value', '') == DUMP_LABEL: json.dump(item, open('var/item.json', 'w'), indent=2) for key, val in item['data'].items(): # data:id if key == 'id': config['id'] = int(val) # data:rules elif key == 'rules': for rule in val: if rule == 'input_string': config['type'] = 'string' elif rule == 'input_int': config['type'] = 'int' elif rule == 'country': config['type'] = 'country' elif rule == 'list_box': config['type'] = 'list_box' elif rule == 'check_box': config['type'] = 'check_box' elif rule == 'required': config['required'] = True elif rule == 'percent': config['type'] = 'percent' elif rule == 'model_numbers': # just text ??? config['type'] = 'model_numbers' elif rule == 'group_table': config['type'] = 'group_table' elif rule == 'group_item': config['type'] = 'group_item' elif rule.startswith('checkboxMax__'): config['max_choices'] = int(rule.split('checkboxMax__')[1]), elif rule == 'unit': config['type'] = 'unit' else: raise Exception('Unknown data->rules item: %s' % rule) # data:showType elif key == 'showType': if val not in item['data']['rules']: if val == 'input_int' and 'percent' in item['data']['rules']: pass elif val == 'interval': config['interval'] = True elif val == 'group_item': config['type'] = 'group_item' else: raise Exception('Value of data->showType is not in data->rules list: %s' % val) # data:nodeType elif key == 'nodeType': if val == 'name': config['node_type_name'] = True else: raise Exception('Unknown data->nodeType: %s' % val) # data:value elif key == 'value': if isinstance(val, unicode): config['label'] = val else: raise Exception('Unknown type of data->value: %s' % val) elif key == 'showStyles': for style in val: if style == 'required': pass elif style == 'recommendation': config['recommendation'] = True elif style == 'hidden': config['hidden'] = True else: raise Exception('Unknown data->showStyles item: %s' % style) else: raise Exception('Unknown data->key: %s' % key) #if config['type'] == 'unit': #pprint(config) #import pdb; pdb.set_trace() # Process nodes:* count = 0 options = [] if not 'nodes' in item: if config['type'] != 'country': raise Exception('Item does not contain nodes key') else: if config['type'] == 'int' and len(item['nodes']) == 1: print 'INT & 1 SUB-NODE' else: for node in item['nodes']: option_item = { 'id': None, 'value': None, 'selected': False, } if not 'data' in node: raise Exception('No data key in nodes:*') for data_key, data_val in node['data'].items(): if data_key == 'errorMessage': if data_val != '': raise Exception('Non empty nodes:*->data->errorMessage: %s' % data_val) elif data_key == 'selected': option_item['selected'] = True elif data_key == 'nodeType': if data_val != 'value': raise Exception('Unknown value of nodes:*->data->nodeType: %s' % data_val) elif data_key == 'id': option_item['id'] = data_val elif data_key == 'value': if not isinstance(data_val, basestring): raise Exception('Unknown type of nodes:*->data->value: %s' % data_val) else: option_item['value'] = data_val else: raise Exception('Unknown key in nodes:*->data: %s' % data_key) if 'nodes' in node: if node['data']['value'] == 'Other': pass # TODO: investiage internal content else: position = 0 for item2 in node['nodes']: position += 1 process_item(cat, item2, position=position, parent=config) #raise Exception('nodes:*->nodes exists and '\ #'data:*->value != "Other": %s' % node['data']['value']) for key in node.keys(): if not key in ('data', 'nodes'): raise Exception('Unknown nodes:* key: %s' % key) options.append(option_item) config['options'] = options except Exception, ex: # If this is nested call of process_item then # raise exception to upper level if parent is not None: raise print print '>>>>>>>>>>>>>>>>>>>>>>>>' print u'Cat: %s [%d]' % (cat['name'], cat['_id']) pprint(item) print '<<<<<<<<<<<<<<<<<<<<<<<<' print logging.error('', exc_info=ex) return None else: #if len(options) == 0: #count_flag = 'ZERO' #elif len(options) == 1: #count_flag = 'ONE' #else: #count_flag = 'MANY' #STAT[(config['type'], count_flag)] += 1 if config['type'] in ('string', 'percent', 'model_numbers'): if options[0] != {'selected': True, 'id': u'', 'value': u''}: raise Exception('Unexpected %s value' % config['type']) config['options'] = [] if config['type'] == 'int': blanks = ( {'selected': True, 'id': u'', 'value': u''}, {'selected': False, 'id': u'', 'value': u''}, ) if len(options) == 0: pass elif len(options) == 1 and options[0] in blanks: if config['interval']: raise Exception('Unexpected interval value') elif len(options) == 2 and options[0] in blanks and options[1] in blanks: if not config['interval']: raise Exception('Unexpected interval value') else: raise Exception('Unexpected int options' % options) config['options'] = [] if not config['node_type_name']: raise Exception('Node is not [@nodeType="name"]') return config def main(**kwargs): count = 0 db.result.drop() mask = {'attrs': {'$exists': True}} mask['_id'] = 300 for cat in db.cat.find(mask, sort=[('_id', 1)]): if cat['attrs']['success']: count += 1 info = json.loads(cat['attrs']['bindAttrjson']) for item in info: #{u'nodes': [{u'data': {u'nodeType': u'value', u'value': u'Ear Tags'}}], #u'data': {u'showType': u'fix_value', u'nodeType': u'name', u'value': u'Product Type'}} pass info = json.loads(cat['attrs']['sysAttrjson']) position = 0 for item in info: position += 1 result = process_item(cat, item, position=position) if result is None: return else: db.result.save(result) if DUMP_LABEL and result['label'] == DUMP_LABEL: pprint(result) print 'Processed: %d' % count #for key, count in STAT.most_common(): #print key, count