44import shutil
55import logging
66import tempfile
7- from test_cli_image import run_wwb
7+ from test_cli_image import run_wwb , get_similarity
88from pathlib import Path
99
1010
1313tmp_dir = tempfile .mkdtemp ()
1414
1515
16- OV_RERANK_MODELS = {
17- ("cross-encoder/ms-marco-TinyBERT-L2-v2" , "text-classification" ),
18- ("Qwen/Qwen3-Reranker-0.6B" , "text-generation" ),
19- }
16+ def download_model (model_id , task , tmp_path ):
17+ MODEL_PATH = Path (tmp_path , model_id .replace ("/" , "_" ))
18+ subprocess .run (["optimum-cli" , "export" , "openvino" , "--model" , model_id , MODEL_PATH , "--task" , task , "--trust-remote-code" ],
19+ capture_output = True ,
20+ text = True )
21+ return MODEL_PATH
2022
2123
22- def setup_module ():
23- for model_info in OV_RERANK_MODELS :
24- model_id = model_info [0 ]
25- task = model_info [1 ]
26- MODEL_PATH = Path (tmp_dir , model_id .replace ("/" , "_" ))
27- subprocess .run (["optimum-cli" , "export" , "openvino" , "--model" , model_id , MODEL_PATH , "--task" , task , "--trust-remote-code" ],
28- capture_output = True ,
29- text = True )
24+ def remove_artifacts (artifacts_path , file_type = "outputs" ):
25+ logger .info (f"Remove { file_type } " )
26+ shutil .rmtree (artifacts_path )
3027
3128
32- def teardown_module ():
33- logger .info ("Remove models" )
34- shutil .rmtree (tmp_dir )
35-
36-
37- @pytest .mark .parametrize (("model_info" ), OV_RERANK_MODELS )
38- def test_reranking_genai (model_info , tmp_path ):
39- if sys .platform == 'darwin' :
40- pytest .xfail ("Ticket 175534" )
41-
29+ @pytest .mark .wwb_rerank
30+ @pytest .mark .parametrize (
31+ ("model_id" , "model_task" , "threshold" ),
32+ [
33+ ("cross-encoder/ms-marco-TinyBERT-L2-v2" , "text-classification" , 0.6 ),
34+ ("tomaarsen/Qwen3-Reranker-0.6B-seq-cls" , "text-classification" , 0.6 ),
35+ ("Qwen/Qwen3-Reranker-0.6B" , "text-generation" , 0.6 ),
36+ ],
37+ )
38+ @pytest .mark .xfail (sys .platform == 'darwin' , reason = "Hangs. Ticket 175534" , run = False )
39+ def test_reranking_optimum (model_id , model_task , threshold , tmp_path ):
4240 GT_FILE = Path (tmp_dir ) / "gt.csv"
43- model_id = model_info [0 ]
44- MODEL_PATH = Path (tmp_dir ) / model_id .replace ("/" , "_" )
41+ MODEL_PATH = download_model (model_id , model_task , tmp_path )
4542
46- # test GenAI
43+ # Collect reference with HF model
4744 run_wwb ([
4845 "--base-model" ,
49- MODEL_PATH ,
46+ model_id ,
5047 "--num-samples" ,
5148 "1" ,
5249 "--gt-data" ,
@@ -55,25 +52,17 @@ def test_reranking_genai(model_info, tmp_path):
5552 "CPU" ,
5653 "--model-type" ,
5754 "text-reranking" ,
58- "--genai"
55+ "--hf" ,
5956 ])
6057
58+ assert GT_FILE .exists ()
6159 assert Path (tmp_dir , "reference" ).exists ()
6260
63-
64- @pytest .mark .parametrize (
65- ("model_info" ), OV_RERANK_MODELS
66- )
67- @pytest .mark .xfail (sys .platform == 'darwin' , reason = "Hangs. Ticket 175534" , run = False )
68- def test_reranking_optimum (model_info , tmp_path ):
69- GT_FILE = Path (tmp_dir ) / "gt.csv"
70- model_id = model_info [0 ]
71- MODEL_PATH = Path (tmp_dir , model_id .replace ("/" , "_" ))
72-
73- # Collect reference with HF model
74- run_wwb ([
75- "--base-model" ,
76- model_id ,
61+ outpus_path = tmp_path / "optimum"
62+ # test Optimum
63+ outpus_optimum = run_wwb ([
64+ "--target-model" ,
65+ MODEL_PATH ,
7766 "--num-samples" ,
7867 "1" ,
7968 "--gt-data" ,
@@ -82,14 +71,24 @@ def test_reranking_optimum(model_info, tmp_path):
8271 "CPU" ,
8372 "--model-type" ,
8473 "text-reranking" ,
85- "--hf" ,
74+ "--output" ,
75+ outpus_path ,
8676 ])
8777
88- assert GT_FILE .exists ()
89- assert Path (tmp_dir , "reference" ).exists ()
78+ assert (outpus_path / "target" ).exists ()
79+ assert (outpus_path / "target.csv" ).exists ()
80+ assert (outpus_path / "metrics_per_question.csv" ).exists ()
81+ assert (outpus_path / "metrics.csv" ).exists ()
82+ assert "Metrics for model" in outpus_optimum
9083
91- # test Optimum
92- outpus = run_wwb ([
84+ similarity = get_similarity (outpus_optimum )
85+ assert similarity >= threshold
86+
87+ remove_artifacts (outpus_path .as_posix ())
88+
89+ outpus_path = tmp_path / "genai"
90+ # test GenAI
91+ outpus_genai = run_wwb ([
9392 "--target-model" ,
9493 MODEL_PATH ,
9594 "--num-samples" ,
@@ -100,20 +99,23 @@ def test_reranking_optimum(model_info, tmp_path):
10099 "CPU" ,
101100 "--model-type" ,
102101 "text-reranking" ,
102+ "--genai" ,
103103 "--output" ,
104- tmp_path ,
104+ outpus_path ,
105105 ])
106+ assert (outpus_path / "target" ).exists ()
107+ assert (outpus_path / "target.csv" ).exists ()
108+ assert (outpus_path / "metrics_per_question.csv" ).exists ()
109+ assert (outpus_path / "metrics.csv" ).exists ()
110+ assert "Metrics for model" in outpus_genai
106111
107- assert (tmp_path / "target" ).exists ()
108- assert (tmp_path / "target.csv" ).exists ()
109- assert (tmp_path / "metrics_per_question.csv" ).exists ()
110- assert (tmp_path / "metrics.csv" ).exists ()
111- assert "Metrics for model" in outpus
112+ similarity = get_similarity (outpus_genai )
113+ assert similarity >= threshold
112114
113115 # test w/o models
114116 run_wwb ([
115117 "--target-data" ,
116- tmp_path / "target.csv" ,
118+ outpus_path / "target.csv" ,
117119 "--num-samples" ,
118120 "1" ,
119121 "--gt-data" ,
@@ -124,3 +126,6 @@ def test_reranking_optimum(model_info, tmp_path):
124126 "text-reranking" ,
125127 "--genai"
126128 ])
129+
130+ remove_artifacts (outpus_path .as_posix ())
131+ remove_artifacts (MODEL_PATH .as_posix (), "model" )
0 commit comments