a b/src/MISC/jsonify.py
1
import xml.etree.ElementTree as ET
2
import json
3
import os
4
5
def parse_xml(xml_file):
6
    tree = ET.parse(xml_file)
7
    root = tree.getroot()
8
9
    data = {}
10
11
    # Extracting required fields
12
    data['nct_id'] = root.findtext('id_info/nct_id')
13
    data['brief_title'] = root.findtext('brief_title')
14
    data['official_title'] = root.findtext('official_title')
15
    data['brief_summary'] = root.findtext('brief_summary/textblock')
16
    data['detailed_description'] = root.findtext('detailed_description/textblock')
17
    data['overall_status'] = root.findtext('overall_status')
18
    data['start_date'] = root.findtext('start_date')
19
    data['completion_date'] = root.findtext('completion_date')
20
    data['phase'] = root.findtext('phase')
21
    data['study_type'] = root.findtext('study_type')
22
    data['condition'] = root.findtext('condition')
23
    data['intervention'] = { 'intervention_type': root.findtext('intervention/intervention_type'), 'intervention_name': root.findtext('intervention/intervention_name') }
24
    data['gender'] = root.findtext('eligibility/gender')
25
    data['minimum_age'] = root.findtext('eligibility/minimum_age')
26
    data['maximum_age'] = root.findtext('eligibility/maximum_age')
27
    city = root.findtext('location/facility/address/city')
28
    state = root.findtext('location/facility/address/state')
29
    country = root.findtext('location/facility/address/country')
30
31
    data['location'] = {
32
        'location_name': root.findtext('location/facility/name'),
33
        'location_address': ', '.join(filter(None, [city, state, country]))
34
    }
35
    data['reference'] = [{'citation': ref.findtext('citation'), 'PMID': ref.findtext('PMID')} for ref in root.findall('reference')]
36
    return data
37
38
def convert_to_json(data):
39
    json_data = json.dumps(data, indent=4)
40
    return json_data
41
42
def process_files(input_dir, output_dir):
43
    for filename in os.listdir(input_dir):
44
        if filename.endswith('.xml'):
45
            xml_file = os.path.join(input_dir, filename)
46
            data = parse_xml(xml_file)
47
            json_data = convert_to_json(data)
48
49
            # Save JSON to output directory
50
            json_file = os.path.join(output_dir, filename.replace('.xml', '.json'))
51
            with open(json_file, 'w') as f:
52
                f.write(json_data)
53
54
if __name__ == "__main__":
55
    input_dir = '../data/trials_xmls/'
56
    output_dir = '../data/trials_jsons/'
57
    process_files(input_dir, output_dir)