Diff of /src/MISC/jsonify.py [000000] .. [f87529]

Switch to side-by-side view

--- a
+++ b/src/MISC/jsonify.py
@@ -0,0 +1,57 @@
+import xml.etree.ElementTree as ET
+import json
+import os
+
+def parse_xml(xml_file):
+    tree = ET.parse(xml_file)
+    root = tree.getroot()
+
+    data = {}
+
+    # Extracting required fields
+    data['nct_id'] = root.findtext('id_info/nct_id')
+    data['brief_title'] = root.findtext('brief_title')
+    data['official_title'] = root.findtext('official_title')
+    data['brief_summary'] = root.findtext('brief_summary/textblock')
+    data['detailed_description'] = root.findtext('detailed_description/textblock')
+    data['overall_status'] = root.findtext('overall_status')
+    data['start_date'] = root.findtext('start_date')
+    data['completion_date'] = root.findtext('completion_date')
+    data['phase'] = root.findtext('phase')
+    data['study_type'] = root.findtext('study_type')
+    data['condition'] = root.findtext('condition')
+    data['intervention'] = { 'intervention_type': root.findtext('intervention/intervention_type'), 'intervention_name': root.findtext('intervention/intervention_name') }
+    data['gender'] = root.findtext('eligibility/gender')
+    data['minimum_age'] = root.findtext('eligibility/minimum_age')
+    data['maximum_age'] = root.findtext('eligibility/maximum_age')
+    city = root.findtext('location/facility/address/city')
+    state = root.findtext('location/facility/address/state')
+    country = root.findtext('location/facility/address/country')
+
+    data['location'] = {
+        'location_name': root.findtext('location/facility/name'),
+        'location_address': ', '.join(filter(None, [city, state, country]))
+    }
+    data['reference'] = [{'citation': ref.findtext('citation'), 'PMID': ref.findtext('PMID')} for ref in root.findall('reference')]
+    return data
+
+def convert_to_json(data):
+    json_data = json.dumps(data, indent=4)
+    return json_data
+
+def process_files(input_dir, output_dir):
+    for filename in os.listdir(input_dir):
+        if filename.endswith('.xml'):
+            xml_file = os.path.join(input_dir, filename)
+            data = parse_xml(xml_file)
+            json_data = convert_to_json(data)
+
+            # Save JSON to output directory
+            json_file = os.path.join(output_dir, filename.replace('.xml', '.json'))
+            with open(json_file, 'w') as f:
+                f.write(json_data)
+
+if __name__ == "__main__":
+    input_dir = '../data/trials_xmls/'
+    output_dir = '../data/trials_jsons/'
+    process_files(input_dir, output_dir)
\ No newline at end of file