Files
ANSLibs/ANS-HWiNFO/external/OpenCL/samples/extensions/khr/histogram/histogram.cl

92 lines
2.3 KiB
Common Lisp

uint binary_search(float value, __global float* levels_array, uint bins)
{
int left_id = 0;
uint right_id = bins;
while(left_id < right_id)
{
uint mid_id = (left_id + right_id) / 2;
float level = levels_array[mid_id];
bool smaller = level <= value;
left_id = smaller ? mid_id + 1 : left_id;
right_id = smaller ? right_id : mid_id;
}
return max(0, left_id - 1);
}
__kernel void histogram_shared(
uint input_size,
uint bins,
uint items_per_thread,
__global float* input_array,
__global float* levels_array,
__local uint* block_histogram,
__global uint* histogram
) {
size_t gid = get_global_id(0);
int lid = get_local_id(0);
uint lsize = get_local_size(0);
uint channel_per_thread = ( bins + lsize - 1 ) / lsize;
for(
uint channel = channel_per_thread * lid;
channel < min(channel_per_thread * (lid + 1) , bins);
channel++
){
block_histogram[channel] = 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
// Histogram calculation on shared memory
for( uint index = 0; index < items_per_thread; index++ )
{
size_t element_index = gid * items_per_thread + index;
if( element_index < input_size )
{
float value = input_array[element_index];
if( levels_array[0] < value && value < levels_array[bins] )
{
uint channel = binary_search(value, levels_array, bins);
atomic_add(&block_histogram[channel], 1);
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
// Sum up the results to global memory
for(
uint channel = channel_per_thread * lid;
channel < min(channel_per_thread * (lid + 1) , bins);
channel++
){
if( block_histogram[channel] > 0)
{
atomic_add(&histogram[channel], block_histogram[channel]);
}
}
}
__kernel void histogram_global(
uint input_size,
uint bins,
__global float* input_array,
__global float* levels_array,
__global uint* histogram
) {
int gid = get_global_id(0);
float value = input_array[gid];
if( levels_array[0] < value && value < levels_array[bins] )
return;
// binary search
uint channel = binary_search(input_array[gid], levels_array, bins);
atomic_add(&histogram[channel], 1u);
}