Files
ANSLibs/OpenVINO/runtime/include/openvino/pass/sdpa_to_paged_attention.hpp

38 lines
1.1 KiB
C++
Raw Permalink Normal View History

// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <vector>
#include "openvino/pass/pass.hpp"
namespace ov {
namespace pass {
/**
* @brief The transformation replaces KV-cache processing part in LLMs by PagedAttention operation.
* \ingroup ov_pass_cpp_api
*/
class OPENVINO_API SDPAToPagedAttention : public ModelPass {
public:
OPENVINO_MODEL_PASS_RTTI("SDPAToPagedAttention");
explicit SDPAToPagedAttention(bool use_per_layer_block_indices_inputs = false,
bool use_score_outputs = false,
bool allow_score_aggregation = false,
bool allow_cache_rotation = false,
bool allow_xattention = false);
bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
private:
bool m_use_per_layer_block_indices_inputs;
bool m_use_score_outputs;
bool m_allow_score_aggregation;
bool m_allow_cache_rotation;
bool m_allow_xattention;
};
} // namespace pass
} // namespace ov