-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFeatureExtraction.py
More file actions
222 lines (181 loc) · 8.74 KB
/
FeatureExtraction.py
File metadata and controls
222 lines (181 loc) · 8.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# -*- coding: utf-8 -*-
"""
Runs the GLCM feature extraction on each tile image and saves the results to a feature vector bank.
"""
import os
import numpy as np
import pandas as pd
import skimage
from PIL import Image
import pandas as pd
import time
import re
from multiprocessing import Process
import warnings
import skimage.color
import GLCMFeatures as glcm
from Color_normalization import normalizeStaining
# to modify saved feature names, color channels, angles in data frame, or distances, change these lists
feature_names = ['Contrast', 'Correlation', 'Dissimilarity', 'Energy', 'Homogeneity', 'ASM', 'Autocorrelation', 'Cluster Prominence', 'Cluster Shade', 'Entropy',
'Max Probability', 'Sum of Squares', 'Sum Average', 'Sum Variance', 'Sum Entropy', 'Difference Variance', 'Difference Entropy','NID', 'NIM',
'Trace',]
channel_names = ['r', 'g', 'b', 'h', 's', 'v', 'L', 'A', 'B']
angle_names = ['0', '45', '90', '135']
distances = [1]
angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
# global feature_headers
feature_headers = [(feature + '-' + channel + '-' + angle)
for channel in channel_names
for angle in angle_names
for feature in feature_names]
# value set to remove the background tiles that were incorrectly made by the grid_tiler()
contrast_threshold = 30
# CPU multiprocessing
def feature_extraction(src, save_path):
processes = []
i = 1
for subset in os.listdir(src):
processes.append(Process(target=texture_features, args=(os.path.join(src, subset), os.path.join(save_path, (str(i) + '.csv')) ), ) )
i+=1
for process in processes:
process.start()
for process in processes:
process.join()
# checks for background tiles that were incorretly made by the grid_tiler()
def low_contrast_check(RGBimg):
Img_1 = RGBimg[:,:,0]
Img_2 = RGBimg[:,:,1]
Img_3 = RGBimg[:,:,2]
# creates the GLCM from each color channel
p_1 = skimage.feature.graycomatrix(np.array(Img_1), distances, angles, normed=True)
p_2 = skimage.feature.graycomatrix(np.array(Img_2), distances, angles, normed=True)
p_3 = skimage.feature.graycomatrix(np.array(Img_3), distances, angles, normed=True)
contrast_1 = glcm.graycoprops(p_1, prop='contrast')
contrast_2 = glcm.graycoprops(p_2, prop='contrast')
contrast_3 = glcm.graycoprops(p_3, prop='contrast')
if np.any(contrast_1 <= contrast_threshold) or np.any(contrast_2 <= contrast_threshold) or np.any(contrast_3 <= contrast_threshold):
return True
else:
return False
# main()
def texture_features(Folders_Path, save_path):
if save_path.endswith('.csv') == True:
pass
else:
try:
save_path = os.path.join(save_path, 'texture_results.csv')
except:
raise ValueError('save_path must either be a .csv or directory.')
ultima_counter = 0 #keeps track of total number of tiles
index = []
# finds the total number of tiles in order to construct a feature_matrix for storage
for folder in os.listdir(Folders_Path):
Tiles_Path = os.path.join(Folders_Path, folder)
for file in os.listdir(Tiles_Path):
if ultima_counter == 0:
Img = Image.open(os.path.join(Tiles_Path, file))
ultima_counter+=1
# builds feature_matrix
feature_matrix = np.zeros([len(feature_headers),ultima_counter], dtype=object)
ultima_counter = 0
for folder in os.listdir(Folders_Path):
tile_counter = 0 # counts number of tiles per sample
Tiles_Path = os.path.join(Folders_Path, folder)
for file in os.listdir(Tiles_Path):
time1 = time.time()
tile_counter+=1
ultima_counter+=1
image_path = os.path.join(Tiles_Path, file)
Img = Image.open(image_path)
Img_arr = np.asarray(Img)
tile_name = 'Tile' + str(tile_counter)
# Normalizes tile to H&E standard and first place to catch background tiles that made it through
with warnings.catch_warnings():
warnings.filterwarnings('error')
try:
RGBImg = normalizeStaining(Img_arr[:,:,0:3], tile_name, saveFile=tile_name)
except RuntimeWarning:
tile_counter-=1
ultima_counter-=1
Img.close()
# drop the last empty column of the feature matrix of zeros
feature_matrix = feature_matrix[:,:-1]
del Img, tile_name, Img_arr, image_path
continue
# second place to catch background tiles that made it through
low_contrast = low_contrast_check(Img_arr[:,:,0:3])
if low_contrast == True:
tile_counter-=1
ultima_counter-=1
Img.close()
# drop the last empty column of the feature matrix of zeros
feature_matrix = feature_matrix[:,:-1]
del Img, RGBImg, tile_name, Img_arr, image_path
continue
else:
RGBImg = Img_arr[:,:,0:3]
# Texture features of RGB channels
feature_vector_RGB = calc_features(RGBImg, distances=distances, angles=angles)
feature_vector = feature_vector_RGB
# Texture features of HSV channels
hsv_img = Img.convert('HSV')
hsv_img = np.array(hsv_img)
feature_vector_HSV = calc_features(hsv_img, distances=distances, angles=angles)
feature_vector = np.concatenate((feature_vector, feature_vector_HSV), axis=0)
# Texture features of LAB channels
lab_img = Img.convert('LAB')
lab_img = np.array(lab_img)
feature_vector_LAB = calc_features(lab_img, distances=distances, angles=angles)
feature_vector = np.concatenate((feature_vector, feature_vector_LAB), axis=0)
# storing features within the matrix
feature_matrix[:, ultima_counter-1] = feature_vector
# frees up space by deleting variables
Img.close()
del feature_vector, Img,
RGBImg, tile_name, hsv_img,
feature_vector_RGB, feature_vector_HSV, feature_vector_LAB, lab_img, image_path
# removes ext from file name before including it in dataframe
try:
file_name = re.sub('.png$', '', file)
except:
pass
try:
file_name = re.sub('.tiff$', '', file)
except:
pass
try:
index.append(file_name)
except:
index.append(file)
elapsed_time = time.time() - time1
print('\nTile ' + str(ultima_counter) + '\n' + str(elapsed_time))
# convert to DataFrame with texture features
feature_matrix = feature_matrix.T
feature_matrix = pd.DataFrame(feature_matrix, columns=feature_headers)
feature_matrix['Tile'] = index
feature_matrix.set_index('Tile', inplace=True)
feature_matrix.to_csv(save_path)
def calc_features(Img_arr, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4]):
Img_1 = Img_arr[:,:,0]
Img_2 = Img_arr[:,:,1]
Img_3 = Img_arr[:,:,2]
# creates the GLCM from each color channel
p_1 = skimage.feature.graycomatrix(np.array(Img_1), distances, angles, normed=True)
p_2 = skimage.feature.graycomatrix(np.array(Img_2), distances, angles, normed=True)
p_3 = skimage.feature.graycomatrix(np.array(Img_3), distances, angles, normed=True)
# calculates the co-occurrence features
feature_vector_1 = glcm.co_occurrence_features(p_1)
feature_vector_2 = glcm.co_occurrence_features(p_2)
feature_vector_3 = glcm.co_occurrence_features(p_3)
# all angles concatenated into one feature vector per color channel
for col in range(len(feature_vector_1.T)):
try:
v1 = np.concatenate((v1, feature_vector_1[:,col]), axis=0)
v2 = np.concatenate((v2, feature_vector_2[:,col]), axis=0)
v3 = np.concatenate((v3, feature_vector_3[:,col]), axis=0)
except:
v1 = feature_vector_1[:,col]
v2 = feature_vector_2[:,col]
v3 = feature_vector_3[:,col]
feature_vector = np.concatenate((v1, v2, v3), axis=0)
return feature_vector