Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove OVClassModelTestP #928

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions .github/workflows/llama_cpp_plugin_build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
pull_request:
paths:
- 'modules/llama_cpp_plugin/**'
- '.github/workflows/llama_cpp_plugin_build_and_test.yml'

permissions: read-all

Expand Down Expand Up @@ -46,6 +47,11 @@ jobs:
needs: build_ubuntu20
runs-on: ubuntu-20.04
steps:
- name: Set up Python 3.9
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
with:
python-version: "3.9"

- name: Download build artifacts
uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
with:
Expand All @@ -60,10 +66,10 @@ jobs:

- name: Prepare test data - convert test model files
run: |
pip install -r llama.cpp/requirements/requirements-convert-hf-to-gguf.txt
pip install -r llama.cpp/requirements/requirements-convert_hf_to_gguf.txt
huggingface-cli download gpt2 model.safetensors tokenizer.json tokenizer_config.json vocab.json config.json merges.txt --local-dir hf_gpt2
mkdir -p ${{ github.workspace }}/test_data
python3 llama.cpp/convert-hf-to-gguf.py hf_gpt2 --outtype f32 --outfile ${{ github.workspace }}/test_data/gpt2.gguf
python3 llama.cpp/convert_hf_to_gguf.py hf_gpt2 --outtype f32 --outfile ${{ github.workspace }}/test_data/gpt2.gguf

- name: Install libtbb2
run: |
Expand Down
2 changes: 2 additions & 0 deletions modules/custom_operations/tests/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ torch
onnx
tensorboard
pytest
# WA CVS-150813
numpy<2.0.0
# open3d==0.16.0 - need to update with new release
6 changes: 3 additions & 3 deletions modules/llama_cpp_plugin/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ LlamaCppModel::LlamaCppModel(const std::string& gguf_fname,
: ICompiledModel(nullptr, plugin),
m_gguf_fname(gguf_fname),
m_num_threads(num_threads) {
OPENVINO_DEBUG << "llama_cpp_plugin: loading llama model directly from GGUF... " << std::endl;
OPENVINO_DEBUG("llama_cpp_plugin: loading llama model directly from GGUF... \n");
llama_model_params mparams = llama_model_default_params();
mparams.n_gpu_layers = 99;
m_llama_model_ptr = llama_load_model_from_file(gguf_fname.c_str(), mparams);
OPENVINO_DEBUG << "llama_cpp_plugin: llama model loaded successfully from GGUF..." << std::endl;
OPENVINO_DEBUG("llama_cpp_plugin: llama model loaded successfully from GGUF...\n");

auto input_ids = std::make_shared<ov::opset13::Parameter>(ov::element::Type_t::i64, ov::PartialShape({-1, -1}));
auto fake_convert = std::make_shared<ov::opset13::Convert>(input_ids->output(0), ov::element::Type_t::f32);
Expand Down Expand Up @@ -71,7 +71,7 @@ std::shared_ptr<const ov::Model> LlamaCppModel::get_runtime_model() const {
}

void LlamaCppModel::set_property(const ov::AnyMap& properties) {
OPENVINO_DEBUG << "llama_cpp_plugin: attempted to set_property (did nothing)";
OPENVINO_DEBUG("llama_cpp_plugin: attempted to set_property (did nothing)");
}

ov::Any LlamaCppModel::get_property(const std::string& name) const {
Expand Down
8 changes: 4 additions & 4 deletions modules/llama_cpp_plugin/src/infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void allocate_tensor_impl(ov::SoPtr<ov::ITensor>& tensor,
LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr<const LlamaCppModel>& compiled_model,
size_t num_threads)
: ov::ISyncInferRequest(compiled_model) {
OPENVINO_DEBUG << "llama_cpp_plugin: infer request ctor called\n";
OPENVINO_DEBUG("llama_cpp_plugin: infer request ctor called\n");
llama_context_params cparams = llama_context_default_params();
cparams.n_threads = num_threads ? num_threads : std::thread::hardware_concurrency();
cparams.n_ctx = 0; // this means that the actual n_ctx will be taken equal to the model's train-time value
Expand All @@ -51,7 +51,7 @@ LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr<const L
}
void LlamaCppSyncInferRequest::set_tensors_impl(const ov::Output<const ov::Node> port,
const std::vector<ov::SoPtr<ov::ITensor>>& tensors) {
OPENVINO_DEBUG << "llama_cpp_plugin: set_tensors_impl called\n";
OPENVINO_DEBUG("llama_cpp_plugin: set_tensors_impl called\n");
}

void llama_batch_add_reimpl(struct llama_batch& batch,
Expand Down Expand Up @@ -131,12 +131,12 @@ void LlamaCppSyncInferRequest::infer() {
llama_batch_free(batch);
};
std::vector<ov::ProfilingInfo> LlamaCppSyncInferRequest::get_profiling_info() const {
OPENVINO_DEBUG << "llama_cpp_plugin: get_profiling_info() called\n";
OPENVINO_DEBUG("llama_cpp_plugin: get_profiling_info() called\n");
return std::vector<ov::ProfilingInfo>{};
};

std::vector<ov::SoPtr<ov::IVariableState>> LlamaCppSyncInferRequest::query_state() const {
OPENVINO_DEBUG << "llama_cpp_plugin: query_state() called\n";
OPENVINO_DEBUG("llama_cpp_plugin: query_state() called\n");
return {std::static_pointer_cast<ov::IVariableState>(std::make_shared<LlamaCppState>(m_llama_ctx))};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,6 @@ using namespace ov::test::behavior;

namespace {

//
// OV Class Common tests with <pluginName, device_name params>
//

INSTANTIATE_TEST_SUITE_P(smoke_OVClassNetworkTestP,
OVClassModelTestP,
::testing::Values(ov::test::utils::DEVICE_NVIDIA));

//
// OV Class GetMetric
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*InferRequestIOBBlobTest.*canProcessDeallocatedOutputBlobAfterGetAndSetBlob.*)",
// 119703
R"(.*smoke_GroupConvolutionBias(Add|AddAdd)_2D_ExplicitPaddingSymmetric2.*FP16*.*)",
// Issue: 128924
R"(.*smoke_OVClassNetworkTestP/OVClassModelTestP.ImportModelWithNullContextThrows.*)",
};

#ifdef _WIN32
Expand Down
Loading