lung-cancer-detection / Git / Diff of /scripts/data/processing.py

Downloads: 1

Diff of /scripts/data/processing.py [000000] .. [0241e6]

 b/scripts/data/processing.py
+import warnings
+import pandas as pd
+import os
+warnings.filterwarnings('ignore')
+# Load the noisy dataset
+try:
+    original = pd.read_csv(open(os.path.join(os.path.dirname(__file__), '../../data/raw/original.csv'), 'r'))
+    synthetic = pd.read_csv(open(os.path.join(os.path.dirname(__file__), '../../data/raw/synthetic.csv'), 'r'))
+except FileNotFoundError as err:
+    print(f'Ann error occoured: {err}')
+# Process the original set
+def process_original_dataset():
+    return original.drop_duplicates().dropna().replace({'M': 1, 'F': 2, 'YES': 1, 'NO': 0})
+# Process the synthetic set
+def process_synthetic_dataset():
+    return synthetic.drop_duplicates().dropna().replace({'M': 1, 'F': 2, 'YES': 1, 'NO': 0})
+# Processing results
+def processing():
+    process_original_dataset().to_csv('../../data/processed/original.csv', index=False)
+    process_synthetic_dataset().to_csv('../../data/processed/synthetic.csv', index=False)
+    return 'Dataset Processed.'
+if __name__ == '__main__':
+   print(processing())