-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocess_data.py
More file actions
39 lines (34 loc) · 1.93 KB
/
preprocess_data.py
File metadata and controls
39 lines (34 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
import pydicom
import numpy as np
import cv2
import pandas as pd
def preprocess_dicom(metadata, input_folder, output_folder, img_size=(224, 224)):
for index, row in metadata.iterrows():
# Construct the directory path using raw strings
dir_path = os.path.normpath(os.path.join(input_folder, row['File Location'].replace('.\\', '').replace('\\', '/')))
print(f"Constructed directory path: {dir_path}")
print(f"Exists: {os.path.exists(dir_path)}, Is Directory: {os.path.isdir(dir_path)}")
if os.path.exists(dir_path) and os.path.isdir(dir_path):
if row['Modality'] == 'CT':
print(f"Processing directory: {dir_path}")
for filename in os.listdir(dir_path):
if filename.endswith('.dcm'):
file_path = os.path.join(dir_path, filename)
try:
dicom = pydicom.dcmread(file_path)
img = dicom.pixel_array
img = cv2.resize(img, img_size)
img = img / np.max(img) # Normalize
# Generate a unique filename
output_filename = f"{row['Subject ID']}_{index}_{filename.replace('.dcm', '.png')}"
output_path = os.path.normpath(os.path.join(output_folder, output_filename))
print(f"Saving processed image to: {output_path}")
cv2.imwrite(output_path, (img * 255).astype(np.uint8))
except Exception as e:
print(f"Error processing file {file_path}: {e}")
else:
print(f"Skipping non-CT modality directory: {dir_path}")
else:
print(f"Directory does not exist or is not a directory: {dir_path}")
print("Preprocessing complete.")