/* * Copyright (c) 2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // OpenCL SDK includes #include #include #include #include #include // STL includes #include #include #include #include #include #include // std::make_tuple // TCLAP includes #include // Sample-specific option struct HistogramOptions { size_t length; size_t bins; }; // Add option to CLI parsing SDK utility template <> auto cl::sdk::parse() { return std::make_tuple( std::make_shared>( "l", "length", "Length of input", false, 1'048'576, "positive integral"), std::make_shared>( "b", "bins", "Bins of histogram", false, 100, "positive integral")); } template <> HistogramOptions cl::sdk::comprehend( std::shared_ptr> length_arg, std::shared_ptr> bins_arg) { return HistogramOptions{ length_arg->getValue(), bins_arg->getValue() }; } int main(int argc, char* argv[]) { try { // Parse command-line options auto opts = cl::sdk::parse_cli(argc, argv); const auto& diag_opts = std::get<0>(opts); const auto& dev_opts = std::get<1>(opts); const auto& histogram_opts = std::get<2>(opts); // Create runtime objects based on user preference or default cl::Context context = cl::sdk::get_context(dev_opts.triplet); cl::Device device = context.getInfo().at(0); cl::CommandQueue queue{ context, device }; cl::Platform platform{ device.getInfo() }; // https://github.com/KhronosGroup/OpenCL-CLHPP/issues/150 if (!diag_opts.quiet) { std::cout << "Selected platform: " << platform.getInfo() << "\n" << "Selected device: " << device.getInfo() << "\n" << "Selected local memory size: " << device.getInfo() << "\n" << std::endl; } // Compile kernel const char* kernel_location = "./histogram.cl"; std::ifstream kernel_stream{ kernel_location }; if (!kernel_stream.is_open()) throw std::runtime_error{ std::string{ "Cannot open kernel source: " } + kernel_location }; cl::Program program{ context, std::string{ std::istreambuf_iterator{ kernel_stream }, std::istreambuf_iterator{} } }; program.build(device); auto histogram_shared = cl::KernelFunctor( program, "histogram_shared"); auto histogram_global = cl::KernelFunctor(program, "histogram_global"); // Initialize host-side storage const auto length = histogram_opts.length; const auto bins = histogram_opts.bins; const float min = -100.0f; const float max = 100.0f; auto prng = [engine = std::default_random_engine{}, dist = std::uniform_real_distribution{ min, max }]() mutable { return dist(engine); }; std::valarray input(length); std::valarray levels(bins + 1); std::valarray histogram(length); // Initialize input variables cl::sdk::fill_with_random(prng, input); cl_float epsilon = (max - min) / bins; for (cl_uint index = 0; index < bins; index++) { levels[index] = min + epsilon * index; } levels[bins] = max; // Initialize device-side storage cl::Buffer buf_input{ context, std::begin(input), std::end(input), true }, buf_levels{ context, std::begin(levels), std::end(levels), true }, buf_histogram{ context, std::begin(histogram), std::end(histogram), false }; // Execute kernel if (bins * sizeof(cl_uint) <= device.getInfo()) { size_t items_per_thread = 32; size_t grid_size = (length + items_per_thread - 1) / items_per_thread; histogram_shared(cl::EnqueueArgs{ queue, cl::NDRange{ grid_size } }, (cl_uint)length, (cl_uint)bins, (cl_uint)items_per_thread, buf_input, buf_levels, cl::Local(bins * sizeof(cl_uint)), buf_histogram); } else { histogram_global(cl::EnqueueArgs{ queue, cl::NDRange{ length } }, (cl_uint)length, (cl_uint)bins, buf_input, buf_levels, buf_histogram); } // Concurrently calculate reference dataset std::vector histogram_expected(bins, 0); for (const auto value : input) { if (value >= levels[0] && value < levels[bins]) { const auto bin_iter = std::upper_bound(std::begin(levels), std::end(levels), value); histogram_expected[bin_iter - std::begin(levels) - 1]++; } } // Fetch results cl::copy(queue, buf_histogram, std::begin(histogram), std::end(histogram)); // Validate if (std::equal(std::begin(histogram_expected), std::end(histogram_expected), std::begin(histogram))) std::cout << "Verification passed." << std::endl; else throw std::runtime_error{ "Verification FAILED!" }; return 0; } catch (cl::util::Error& e) { std::cerr << "OpenCL Utils error: " << e.what() << std::endl; std::exit(e.err()); } catch (cl::BuildError& e) { std::cerr << "OpenCL runtime error: " << e.what() << std::endl; for (auto& build_log : e.getBuildLog()) { std::cerr << "\tBuild log for device: " << build_log.first.getInfo() << "\n" << std::endl; std::cerr << build_log.second << "\n" << std::endl; } std::exit(e.err()); } catch (cl::Error& e) { std::cerr << "OpenCL rutnime error: " << e.what() << std::endl; std::exit(e.err()); } catch (std::exception& e) { std::cerr << "Error: " << e.what() << std::endl; std::exit(EXIT_FAILURE); } return 0; }