--- a
+++ b/icu_data_defs.py
@@ -0,0 +1,134 @@
+import pandas as pd
+import utils
+from constants import variable_type,clinical_source,NO_UNITS,column_names
+
+class data_dictionary(object):
+
+    def __init__(self,xls_fname):
+
+        self.load(xls_fname)
+
+    def load(self,xls_fname):
+        xls = pd.ExcelFile(xls_fname)
+        obj_dict = {}
+        obj_dict['xls_fname'] = xls_fname
+        df_tables={}
+        df_names={}
+        for sheet_name in xls.sheet_names:
+            df_tables[sheet_name] = xls.parse(sheet_name,index_col=0)
+            df_names[sheet_name] = sheet_name
+        obj_dict['tables'] = utils.Bunch(**df_tables)
+        obj_dict['table_names'] = utils.Bunch(**df_names)
+
+        self.__dict__.update(**obj_dict)
+        self.__refresh_components()
+
+    def __refresh_components(self):
+        components = map(str,self.tables.definitions.component.unique().tolist())
+        keys = map(lambda component: str.upper(component.replace(' ','_')),components)
+        self.__dict__['components'] = utils.Bunch(**dict(zip(keys,components)))
+
+    def save(self,xls_fname=None):
+        if xls_fname is None: xls_fname = self.xls_fname
+        writer = pd.ExcelWriter(xls_fname, engine='xlsxwriter')
+        for table_name,table in self.tables.__dict__.iteritems():
+            table.to_excel(writer,table_name)
+        writer.save()
+        return
+
+    def add_definition(self,component,units=NO_UNITS,
+                       variable_type=variable_type.QUANTITATIVE,
+                       clinical_source=clinical_source.OBSERVATION,
+                       lower_limit=pd.np.nan,
+                       upper_limit=pd.np.nan,
+                       list_id=pd.np.nan):
+        new_id = _next_id(self.tables.definitions)
+        self.tables.definitions.loc[new_id] = [component,units,variable_type,clinical_source,lower_limit,upper_limit,list_id]
+        self.__refresh_components()
+        return new_id
+
+    def add_panel(self,panel_name,panel_map):
+        """
+        panel map: {table_name:[ids]}
+        """
+        new_panel_id = _next_id(self.tables.panels)
+        new_list_id = _next_id(self.tables.lists)
+        self.tables.panels.loc[new_panel_id] = [panel_name,new_list_id]
+        for ref_table,ref_ids in panel_map.iteritems():
+            for ref_id in ref_ids:
+                self.add_item_to_panel(new_panel_id,ref_table,ref_id)
+        return new_panel_id
+
+    def add_item_to_panel(self,panel_id,ref_table,ref_id):
+        list_id = self.tables.panels.loc[panel_id,'list_id']
+        return self.__add_list_item(list_id,ref_table,ref_id,pd.np.nan)
+
+    def __add_list_item(self,list_id,ref_table,ref_id,seq_num):
+        orig_index_name = self.tables.lists.index.name
+        list_df = self.tables.lists.reset_index(drop=False)
+        new_id = _next_id(list_df)
+        list_df.loc[new_id] = [list_id,ref_table,ref_id,seq_num]
+        list_df.set_index(orig_index_name,inplace=True)
+        self.tables.lists = list_df
+        return new_id
+
+    def add_category(self,val_numeric,val_text):
+        new_id = _next_id(self.tables.categories)
+        self.tables.categories.loc[new_id] = [val_numeric,val_text]
+        return new_id
+
+    def add_category_list(self,categories,is_ordered=False):
+        new_list_id = _next_id(self.tables.lists)
+        for i,category_id in enumerate(categories):
+            self.__add_list_item(new_list_id,
+                                 self.table_names.categories,
+                                 category_id,
+                                 i if is_ordered else pd.np.nan)
+        return new_list_id
+
+    def get_panel_defintions(self,panel_id):
+        list_id = self.tables.panels.loc[panel_id,'list_id']
+        def_list = []
+        for index, row in self.tables.lists.loc[list_id].iterrows():
+            table = row['table']
+            id_ = row['id']
+            if table == 'panels':
+                defs = self.get_panel_defintions(id_)
+            else: defs = self.tables.__dict__[table].loc[[id_]]
+
+            def_list.append(defs)
+
+        return pd.concat(def_list)
+
+    def get_categories(self,component):
+        joined = self.tables.definitions.merge(self.tables.lists, left_on='list_id',right_index=True)
+        joined = joined.merge(self.tables.categories,left_on='id',right_index=True)
+        filtered = joined.loc[joined.component == component]
+        if filtered.shape[0] == 0: return None
+        out_df = filtered[['seq_num','val_numeric','val_text']].set_index('seq_num').sort_index()
+        return out_df
+
+    def defs_for_component(self,component):
+        return self.get_defs({column_names.COMPONENT : component})
+
+    def get_clinical_source(self,component):
+        return self.defs_for_component(component).loc[:,'clinical_source'].iloc[0]
+
+    def get_variable_type(self,component):
+        return self.defs_for_component(component).loc[:,'variable_type'].iloc[0]
+
+    def get_defs(self,data_specs=[],operator='or'):
+        return _filter_defs(self.tables.definitions,data_specs,operator)
+
+    def get_components(self,specs=[],panel_id=None,operator='or'):
+        if panel_id is not None:
+            defs = self.get_panel_defintions(panel_id)
+        else:
+            defs = self.tables.definitions
+        return _filter_defs(defs,specs,operator).component.unique().tolist()
+
+def _filter_defs(defs,specs,operator='or'):
+    return defs.loc[utils.complex_row_mask(defs,specs,operator)]
+
+def _next_id(df):
+    return max(df.index.tolist())+1