1 year ago
#77240
CptKakashki
How to I create a tensor of euclidean distances from the maxval in an image array using TensorFlow?
I'm trying to write a custom loss function that inputs a batch of multichannel images and outputs a loss based on how well-clustered the pixel values are around the max value of each image on average.
I have a tensor of shape: (BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS), and I'm trying to create a new tensor of the same shape, but for each image (axes 1 and 2) I need an array of shape (IMAGE_HEIGHT, IMAGE_WIDTH) with values equal to the euclidean distance of that element from the element with the maximum value. Or in the case of multiple maximum values I suppose I could work with an array of shape (IMAGE_HEIGHT, IMAGE_WIDTH, NUM_OF_MAXVALS), and just take the minimum or maximum of each distance value.
I was able to adapt the answer to the question here, to do exactly what I want for a single 2D image, but I have been unable to generalize for batch and channel dimensions.
I've been trying to solve this for almost a week now, so any help would be very much appreciated. Thanks!!!
My code:
import tensorflow as tf
import numpy as np
# Define epsilon to avoid universe-ending, divide-by-zero errors
epsilon = tf.constant(1e-9)
# Make input data
t = np.array([[4,0,0,0],
[0,0,2,0],
[0,1,4,3],
[0,0,1,0]])
t = tf.convert_to_tensor(t, dtype=tf.float32)
print(t.numpy())
# Make coordinates
coords = tf.meshgrid(tf.range(tf.shape(t)[0]), tf.range(tf.shape(t)[1]), indexing='ij')
coords = tf.stack(coords, axis=-1)
# Masks
m = tf.equal(t, tf.reduce_max(t))
coords_masked = tf.boolean_mask(coords, m)
# Reshape coords
coords_reshaped = tf.reshape(coords, [-1, 1, 2])
# Find every pairwise distance
vec_d = coords_reshaped - coords_masked
# Calculate distances
dists = tf.linalg.norm( tf.cast(vec_d, dtype=t.dtype), axis=-1 )
# Find max distances if there are repeats
max_dists = tf.reduce_max(dists, axis=-1)
# Reshape
weights = tf.reshape(max_dists, [tf.shape(t)[0], tf.shape(t)[1]])
print('------------Weights------------')
print(weights.numpy().round(9))
# Add epsilon to input tensor
t = tf.add(t,epsilon)
# Multiply weights to layer
losses = tf.multiply(t, weights)
print('--------Weighted Losses--------')
print(losses.numpy().round(9))
# Find mean
loss_mean = tf.reduce_mean(losses)
print('-----------Mean Loss-----------')
print(loss_mean.numpy().round(9))
# Normalize by image size and max value
norm_factor = tf.cast( tf.reduce_max(tf.shape(t)), dtype=t.dtype )
norm_factor = tf.multiply(norm_factor, tf.reduce_max(t))
#norm_factor = tf.add(norm_factor, epsilon)
loss_mean_normalized = tf.divide(loss_mean, norm_factor)
print('-----Normalized Mean Loss------')
print(loss_mean_normalized.numpy().round(5))
EDIT
This is my "best" attempt so far, but it's not quite there.. I think I need to find a way to repeat the max-value coordinates for each image element in the batch and channel stack because right now it's just subtracting the max-value coordinates from the single coordinate element corresponding to the max-value element and leaving the rest unaltered.
import tensorflow as tf
import numpy as np
#-------------------------Make some mock data---------------------------------#
BATCH_SIZE = 2
NUM_CHANS = 3
IMG_SIZE_X = 5
IMG_SIZE_Y = 4
t = np.zeros(shape=(BATCH_SIZE,
IMG_SIZE_Y,
IMG_SIZE_X,
NUM_CHANS))
t[0,:,:,0] = np.array([[4,1,0,0,0],
[2,0,0,0,0],
[0,0,0,0,0],
[0,0,0,0,0]])
t[0,:,:,1] = np.array([[0,0,1,6,7],
[0,1,2,1,0],
[0,1,1,2,0],
[0,3,2,0,0]])
t[0,:,:,2] = np.array([[0,0,1,3,0],
[0,1,2,1,0],
[0,1,2,2,0],
[0,3,2,7,9]])
t[1,:,:,0] = np.array([[0,0,0,0,0],
[0,0,2,1,0],
[0,3,4,0,0],
[9,0,3,1,0]])
t[1,:,:,1] = np.array([[0,0,0,0,0],
[0,9,0,1,0],
[0,0,9,0,0],
[0,0,0,0,0]])
t[1,:,:,2] = np.array([[0,0,0,0,0],
[0,1,9,0,0],
[0,9,0,0,0],
[3,3,2,0,0]])
# Convert to TF tensor
t = tf.convert_to_tensor(t, dtype=tf.float32)
#--------------------------Do the thing!!!------------------------------------#
# Define epsilon to avoid universe-ending, divide-by-zero errors
epsilon = tf.constant(1e-9)
print(t.numpy())
# Make coordinates
coords = tf.meshgrid(tf.range(tf.shape(t)[1]), tf.range(tf.shape(t)[2]), indexing='ij')
coords = tf.stack(coords, axis=-1)
# Expand dimensions and repeat so there is a set of coords for each image in batch and channel dimensions
coords = tf.expand_dims(coords, axis=0)
coords = tf.repeat(coords, repeats=BATCH_SIZE, axis=0)
coords = tf.expand_dims(coords, axis=3)
coords = tf.repeat(coords, repeats=NUM_CHANS, axis=3)
# Masks
m = tf.equal(t, tf.reduce_max(t, axis=(1,2), keepdims=True))
# Expand the mask and repeat the bool values
m_x = tf.expand_dims(m, axis=-1)
m_x = tf.repeat(m_x, axis=-1, repeats=2)
coords_masked = tf.multiply(coords, tf.cast(m_x, dtype=tf.int32))
# Reshape coords
coords_reshaped = tf.reshape(coords, [BATCH_SIZE, -1, NUM_CHANS, 1, 2])
# Reshape coords_masked
coords_masked_reshaped = tf.reshape(coords_masked, tf.shape(coords_reshaped))
# Find every pairwise distance
vec_d = coords_reshaped - coords_masked_reshaped
# Calculate distances
dists = tf.linalg.norm( tf.cast(vec_d, dtype=t.dtype), axis=-1 )
# Find max distances if there are repeats
max_dists = tf.reduce_min(dists, axis=-1)
# Reshape
weights = tf.reshape(max_dists, [tf.shape(t)[0], tf.shape(t)[1], tf.shape(t)[2], tf.shape(t)[3]])
print('------------Weights------------')
print(weights.numpy().round(9))
# Add epsilon to input tensor
t = tf.add(t,epsilon)
# Multiply weights to layer
losses = tf.multiply(t, weights)
print('--------Weighted Losses--------')
print(losses.numpy().round(9))
# Find mean
loss_mean = tf.reduce_mean(losses)
print('-----------Mean Loss-----------')
print(loss_mean.numpy().round(9))
# Normalize by image size and max value
norm_factor = tf.cast( tf.reduce_max(tf.shape(t)), dtype=t.dtype )
norm_factor = tf.multiply(norm_factor, tf.reduce_max(t))
#norm_factor = tf.add(norm_factor, epsilon)
loss_mean_normalized = tf.divide(loss_mean, norm_factor)
print('-----Normalized Mean Loss------')
print(loss_mean_normalized.numpy().round(5))
python
tensorflow
machine-learning
loss-function
euclidean-distance
0 Answers
Your Answer