@@ -802,11 +802,6 @@ class BackendServiceImpl final : public backend::Backend::Service {
802802 return grpc::Status (grpc::StatusCode::INVALID_ARGUMENT, " \" documents\" must be a non-empty string array" );
803803 }
804804
805- // Tokenize the query
806- auto tokenized_query = tokenize_input_prompts (ctx_server.vocab , ctx_server.mctx , request->query (), /* add_special */ false , true );
807- if (tokenized_query.size () != 1 ) {
808- return grpc::Status (grpc::StatusCode::INVALID_ARGUMENT, " \" query\" must contain only a single prompt" );
809- }
810805 // Create and queue the task
811806 json responses = json::array ();
812807 bool error = false ;
@@ -818,10 +813,9 @@ class BackendServiceImpl final : public backend::Backend::Service {
818813 documents.push_back (request->documents (i));
819814 }
820815
821- auto tokenized_docs = tokenize_input_prompts (ctx_server.vocab , ctx_server.mctx , documents, /* add_special */ false , true );
822- tasks.reserve (tokenized_docs.size ());
823- for (size_t i = 0 ; i < tokenized_docs.size (); i++) {
824- auto tmp = format_rerank (ctx_server.vocab , tokenized_query[0 ], tokenized_docs[i]);
816+ tasks.reserve (documents.size ());
817+ for (size_t i = 0 ; i < documents.size (); i++) {
818+ auto tmp = format_rerank (ctx_server.model , ctx_server.vocab , ctx_server.mctx , request->query (), documents[i]);
825819 server_task task = server_task (SERVER_TASK_TYPE_RERANK);
826820 task.id = ctx_server.queue_tasks .get_new_id ();
827821 task.index = i;
0 commit comments