1 year ago

#77240

test-img

CptKakashki

How to I create a tensor of euclidean distances from the maxval in an image array using TensorFlow?

I'm trying to write a custom loss function that inputs a batch of multichannel images and outputs a loss based on how well-clustered the pixel values are around the max value of each image on average.

I have a tensor of shape: (BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS), and I'm trying to create a new tensor of the same shape, but for each image (axes 1 and 2) I need an array of shape (IMAGE_HEIGHT, IMAGE_WIDTH) with values equal to the euclidean distance of that element from the element with the maximum value. Or in the case of multiple maximum values I suppose I could work with an array of shape (IMAGE_HEIGHT, IMAGE_WIDTH, NUM_OF_MAXVALS), and just take the minimum or maximum of each distance value.

I was able to adapt the answer to the question here, to do exactly what I want for a single 2D image, but I have been unable to generalize for batch and channel dimensions.

I've been trying to solve this for almost a week now, so any help would be very much appreciated. Thanks!!!

My code:

import tensorflow as tf
import numpy as np

# Define epsilon to avoid universe-ending, divide-by-zero errors
epsilon = tf.constant(1e-9)

# Make input data
t = np.array([[4,0,0,0],
              [0,0,2,0],
              [0,1,4,3],
              [0,0,1,0]])

t = tf.convert_to_tensor(t, dtype=tf.float32)

print(t.numpy())

# Make coordinates
coords = tf.meshgrid(tf.range(tf.shape(t)[0]), tf.range(tf.shape(t)[1]), indexing='ij')
coords = tf.stack(coords, axis=-1)

# Masks
m = tf.equal(t, tf.reduce_max(t))
coords_masked = tf.boolean_mask(coords, m)

# Reshape coords
coords_reshaped = tf.reshape(coords, [-1, 1, 2])

# Find every pairwise distance
vec_d = coords_reshaped - coords_masked

# Calculate distances
dists = tf.linalg.norm( tf.cast(vec_d, dtype=t.dtype), axis=-1 )

# Find max distances if there are repeats
max_dists = tf.reduce_max(dists, axis=-1)

# Reshape
weights = tf.reshape(max_dists, [tf.shape(t)[0], tf.shape(t)[1]])

print('------------Weights------------')
print(weights.numpy().round(9))

# Add epsilon to input tensor
t = tf.add(t,epsilon)

# Multiply weights to layer
losses = tf.multiply(t, weights)
print('--------Weighted Losses--------')
print(losses.numpy().round(9))

# Find mean
loss_mean = tf.reduce_mean(losses)
print('-----------Mean Loss-----------')
print(loss_mean.numpy().round(9))

# Normalize by image size and max value
norm_factor = tf.cast( tf.reduce_max(tf.shape(t)), dtype=t.dtype )
norm_factor = tf.multiply(norm_factor, tf.reduce_max(t))
#norm_factor = tf.add(norm_factor, epsilon)
loss_mean_normalized = tf.divide(loss_mean, norm_factor)
print('-----Normalized Mean Loss------')
print(loss_mean_normalized.numpy().round(5))

EDIT

This is my "best" attempt so far, but it's not quite there.. I think I need to find a way to repeat the max-value coordinates for each image element in the batch and channel stack because right now it's just subtracting the max-value coordinates from the single coordinate element corresponding to the max-value element and leaving the rest unaltered.

import tensorflow as tf
import numpy as np


#-------------------------Make some mock data---------------------------------#
BATCH_SIZE = 2
NUM_CHANS = 3
IMG_SIZE_X = 5
IMG_SIZE_Y = 4

t = np.zeros(shape=(BATCH_SIZE,
                    IMG_SIZE_Y,
                    IMG_SIZE_X,
                    NUM_CHANS))

t[0,:,:,0] = np.array([[4,1,0,0,0],
                        [2,0,0,0,0],
                        [0,0,0,0,0],
                        [0,0,0,0,0]])

t[0,:,:,1] = np.array([[0,0,1,6,7],
                       [0,1,2,1,0],
                       [0,1,1,2,0],
                       [0,3,2,0,0]])
                      
t[0,:,:,2] = np.array([[0,0,1,3,0],
                       [0,1,2,1,0],
                       [0,1,2,2,0],
                       [0,3,2,7,9]])
                      
t[1,:,:,0] = np.array([[0,0,0,0,0],
                       [0,0,2,1,0],
                       [0,3,4,0,0],
                       [9,0,3,1,0]])

t[1,:,:,1] = np.array([[0,0,0,0,0],
                       [0,9,0,1,0],
                       [0,0,9,0,0],
                       [0,0,0,0,0]])
                      
t[1,:,:,2] = np.array([[0,0,0,0,0],
                       [0,1,9,0,0],
                       [0,9,0,0,0],
                       [3,3,2,0,0]])


# Convert to TF tensor
t = tf.convert_to_tensor(t, dtype=tf.float32)

#--------------------------Do the thing!!!------------------------------------#
# Define epsilon to avoid universe-ending, divide-by-zero errors
epsilon = tf.constant(1e-9)

print(t.numpy())

# Make coordinates
coords = tf.meshgrid(tf.range(tf.shape(t)[1]), tf.range(tf.shape(t)[2]), indexing='ij')
coords = tf.stack(coords, axis=-1)

# Expand dimensions and repeat so there is a set of coords for each image in batch and channel dimensions
coords = tf.expand_dims(coords, axis=0)
coords = tf.repeat(coords, repeats=BATCH_SIZE, axis=0)
coords = tf.expand_dims(coords, axis=3)
coords = tf.repeat(coords, repeats=NUM_CHANS, axis=3)

# Masks
m = tf.equal(t, tf.reduce_max(t, axis=(1,2), keepdims=True))

# Expand the mask and repeat the bool values
m_x = tf.expand_dims(m, axis=-1)
m_x = tf.repeat(m_x, axis=-1, repeats=2)

coords_masked = tf.multiply(coords, tf.cast(m_x, dtype=tf.int32))

# Reshape coords
coords_reshaped = tf.reshape(coords, [BATCH_SIZE, -1, NUM_CHANS, 1, 2])

# Reshape coords_masked
coords_masked_reshaped = tf.reshape(coords_masked, tf.shape(coords_reshaped))

# Find every pairwise distance
vec_d = coords_reshaped - coords_masked_reshaped

# Calculate distances
dists = tf.linalg.norm( tf.cast(vec_d, dtype=t.dtype), axis=-1 )

# Find max distances if there are repeats
max_dists = tf.reduce_min(dists, axis=-1)

# Reshape
weights = tf.reshape(max_dists, [tf.shape(t)[0], tf.shape(t)[1], tf.shape(t)[2], tf.shape(t)[3]])

print('------------Weights------------')
print(weights.numpy().round(9))

# Add epsilon to input tensor
t = tf.add(t,epsilon)

# Multiply weights to layer
losses = tf.multiply(t, weights)
print('--------Weighted Losses--------')
print(losses.numpy().round(9))

# Find mean
loss_mean = tf.reduce_mean(losses)
print('-----------Mean Loss-----------')
print(loss_mean.numpy().round(9))

# Normalize by image size and max value
norm_factor = tf.cast( tf.reduce_max(tf.shape(t)), dtype=t.dtype )
norm_factor = tf.multiply(norm_factor, tf.reduce_max(t))
#norm_factor = tf.add(norm_factor, epsilon)
loss_mean_normalized = tf.divide(loss_mean, norm_factor)
print('-----Normalized Mean Loss------')
print(loss_mean_normalized.numpy().round(5))

python

tensorflow

machine-learning

loss-function

euclidean-distance

0 Answers

Your Answer

Accepted video resources