92 lines
2.3 KiB
Common Lisp
92 lines
2.3 KiB
Common Lisp
uint binary_search(float value, __global float* levels_array, uint bins)
|
|
{
|
|
int left_id = 0;
|
|
uint right_id = bins;
|
|
|
|
while(left_id < right_id)
|
|
{
|
|
uint mid_id = (left_id + right_id) / 2;
|
|
float level = levels_array[mid_id];
|
|
bool smaller = level <= value;
|
|
left_id = smaller ? mid_id + 1 : left_id;
|
|
right_id = smaller ? right_id : mid_id;
|
|
}
|
|
|
|
return max(0, left_id - 1);
|
|
}
|
|
|
|
__kernel void histogram_shared(
|
|
uint input_size,
|
|
uint bins,
|
|
uint items_per_thread,
|
|
__global float* input_array,
|
|
__global float* levels_array,
|
|
__local uint* block_histogram,
|
|
__global uint* histogram
|
|
) {
|
|
size_t gid = get_global_id(0);
|
|
int lid = get_local_id(0);
|
|
uint lsize = get_local_size(0);
|
|
uint channel_per_thread = ( bins + lsize - 1 ) / lsize;
|
|
|
|
for(
|
|
uint channel = channel_per_thread * lid;
|
|
channel < min(channel_per_thread * (lid + 1) , bins);
|
|
channel++
|
|
){
|
|
block_histogram[channel] = 0;
|
|
}
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
|
|
// Histogram calculation on shared memory
|
|
for( uint index = 0; index < items_per_thread; index++ )
|
|
{
|
|
size_t element_index = gid * items_per_thread + index;
|
|
|
|
if( element_index < input_size )
|
|
{
|
|
float value = input_array[element_index];
|
|
if( levels_array[0] < value && value < levels_array[bins] )
|
|
{
|
|
uint channel = binary_search(value, levels_array, bins);
|
|
|
|
atomic_add(&block_histogram[channel], 1);
|
|
}
|
|
}
|
|
}
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
// Sum up the results to global memory
|
|
for(
|
|
uint channel = channel_per_thread * lid;
|
|
channel < min(channel_per_thread * (lid + 1) , bins);
|
|
channel++
|
|
){
|
|
if( block_histogram[channel] > 0)
|
|
{
|
|
atomic_add(&histogram[channel], block_histogram[channel]);
|
|
}
|
|
}
|
|
}
|
|
|
|
__kernel void histogram_global(
|
|
uint input_size,
|
|
uint bins,
|
|
__global float* input_array,
|
|
__global float* levels_array,
|
|
__global uint* histogram
|
|
) {
|
|
int gid = get_global_id(0);
|
|
|
|
float value = input_array[gid];
|
|
|
|
if( levels_array[0] < value && value < levels_array[bins] )
|
|
return;
|
|
|
|
// binary search
|
|
uint channel = binary_search(input_array[gid], levels_array, bins);
|
|
|
|
atomic_add(&histogram[channel], 1u);
|
|
}
|