[ce7075]: / bin / query_pubmed_for_pubs.py

Download this file

64 lines (53 with data), 2.3 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#
# Generate the publications list for the MIMIC website (http://mimic.physionet.org)
#
# Requires:
# - biopython ("pip install biopython")
# - json
from Bio import Entrez
import json
def search(query):
handle = Entrez.esearch(db='pubmed',
sort='relevance',
retmax='40',
retmode='xml',
term=query)
results = Entrez.read(handle)
return results
def fetch_details(id_list):
ids = ','.join(id_list)
handle = Entrez.efetch(db='pubmed',
retmode='xml',
id=ids)
results = Entrez.read(handle)
return results
def main():
Entrez.email = 'mimic-support@physionet.org'
query = """(Celi[Author] OR Mark[Author]) AND MIT AND ((MIMIC AND ICU) OR (MIMIC-II OR "MIMIC II" or "MIMIC 2" or "MIMIC 3" OR MIMIC-III or "MIMIC III"))"""
results = search(query)
id_list = results['IdList']
papers = fetch_details(id_list)
fn = "mimic_publications.html"
with open(fn, "w") as mimic_publ_file:
header = """<!--\n\nList of MIMIC-related publications generated automatically from PubMed with the following query:\n\n""" + query + \
""" \n\n-->\n"""
mimic_publ_file.write(header + '\n')
# Should write:
# Author list. Title. Journal. Issue. DOI. PMID.
with open(fn, "a") as mimic_publ_file:
for i, paper in enumerate(papers):
authors = paper['MedlineCitation']['Article']['AuthorList']
authors = json.dumps([[a['LastName'],a['Initials']] for a in authors])
authors = authors.replace('[','').replace(']','').replace('"','')
title = "%s" % (paper['MedlineCitation']['Article']['ArticleTitle'])
journaltitle = "%s" % (paper['MedlineCitation']['Article']['Journal']['Title'])
issue = (paper['MedlineCitation']['Article']['Journal']['Issue'])
doi = ""
pmid = "%s" % (paper['MedlineCitation']['PMID'])
record = ""
mimic_publ_file.write(title.encode('utf-8').strip() + '\n')
print(title)
# Pretty print the first paper in full to observe its structure
#print(json.dumps(papers[0], indent=2, separators=(',', ':')))
if __name__ == '__main__':
main()