In [1]:
import pandas as pd
import numpy as np
import pydicom
import glob

In [2]:
## First, read all of my DICOM files into a list
mydicoms = glob.glob("*.dcm")

### Let's look at the contents of the first DICOM:

In [3]:
dcm1 = pydicom.dcmread(mydicoms[0])

In [4]:
dcm1

(0008, 0016) SOP Class UID                       UI: Secondary Capture Image Storage
(0008, 0018) SOP Instance UID                    UI: 1.3.6.1.4.1.11129.5.5.162426174634548301003630270411628292460952
(0008, 0060) Modality                            CS: 'DX'
(0008, 1030) Study Description                   LO: 'Effusion|Nodule|Pleural_Thickening|Mass'
(0010, 0020) Patient ID                          LO: '29579'
(0010, 0040) Patient's Sex                       CS: 'F'
(0010, 1010) Patient's Age                       AS: '24'
(0020, 000d) Study Instance UID                  UI: 1.3.6.1.4.1.11129.5.5.113025392650823751977671880960497589856674
(0020, 000e) Series Instance UID                 UI: 1.3.6.1.4.1.11129.5.5.168055162156043936178718006100964727334210
(0028, 0002) Samples per Pixel                   US: 1
(0028, 0004) Photometric Interpretation          CS: 'MONOCHROME2'
(0028, 0010) Rows                                US: 1024
(0028, 0011) Columns                             US:

Looking at the attributes listed above, I can see that I'm looking to extract the following attributes: 
* Modality
* Study Description
* Patient ID
* Patient's Sex
* Patient's Age
* Rows
* Columns

In [5]:
dcm1.Modality

'DX'

In [6]:
dcm1.StudyDescription

'Effusion|Nodule|Pleural_Thickening|Mass'

In [7]:
dcm1.PatientID

'29579'

In [8]:
dcm1.PatientSex

'F'

In [9]:
dcm1.PatientAge

'24'

In [10]:
dcm1.Rows

1024

In [11]:
dcm1.Columns

1024

## Now, let's create the dataframe that we want, and populate it in a loop with all of our DICOMS:

In [12]:
all_data = []

for i in mydicoms: 
    dcm = pydicom.dcmread(i)
    fields = [dcm.PatientID, int(dcm.PatientAge), dcm.PatientSex, dcm.Modality, dcm.StudyDescription,
             dcm.Rows, dcm.Columns]
    all_data.append(fields)

In [13]:
mydata = pd.DataFrame(all_data, 
                      columns = ['PatientID','PatientAge','PatientSex','Modality','Findings','Rows','Columns'])

In [14]:
mydata

Unnamed: 0,PatientID,PatientAge,PatientSex,Modality,Findings,Rows,Columns
0,29579,24,F,DX,Effusion|Nodule|Pleural_Thickening|Mass,1024,1024
1,1688,59,F,DX,Infiltration|Nodule,1024,1024
2,13659,62,F,DX,Consolidation|Mass|Pneumonia|Pneumothorax,1024,1024
3,13118,69,M,DX,Atelectasis,1024,1024
4,10172,59,F,DX,Atelectasis|Effusion,1024,1024
5,5066,52,M,DX,Cardiomegaly|Effusion|Infiltration,1024,1024
6,23075,31,M,DX,Mass,1024,1024
