diff --git a/integrations/google_ai/pyproject.toml b/integrations/google_ai/pyproject.toml index 4e980a3e89..d282399e65 100644 --- a/integrations/google_ai/pyproject.toml +++ b/integrations/google_ai/pyproject.toml @@ -81,7 +81,7 @@ typing = "mypy --install-types --non-interactive --explicit-package-bases {args: [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/chat/gemini.py b/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/chat/gemini.py index 7319fefe4a..00b3a0d4d5 100644 --- a/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/chat/gemini.py +++ b/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/chat/gemini.py @@ -1,5 +1,5 @@ import json -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union import google.generativeai as genai from google.ai.generativelanguage import Content, Part @@ -146,9 +146,9 @@ def __init__( *, api_key: Secret = Secret.from_env_var("GOOGLE_API_KEY"), # noqa: B008 model: str = "gemini-2.0-flash", - generation_config: Optional[Union[GenerationConfig, Dict[str, Any]]] = None, - safety_settings: Optional[Dict[HarmCategory, HarmBlockThreshold]] = None, - tools: Optional[List[Tool]] = None, + generation_config: Optional[Union[GenerationConfig, dict[str, Any]]] = None, + safety_settings: Optional[dict[HarmCategory, HarmBlockThreshold]] = None, + tools: Optional[list[Tool]] = None, tool_config: Optional[content_types.ToolConfigDict] = None, streaming_callback: Optional[StreamingCallbackT] = None, ): @@ -192,7 +192,7 @@ def __init__( self._model = GenerativeModel(self._model_name) self._streaming_callback = streaming_callback - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -218,7 +218,7 @@ def to_dict(self) -> Dict[str, Any]: return data @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GoogleAIGeminiChatGenerator": + def from_dict(cls, data: dict[str, Any]) -> "GoogleAIGeminiChatGenerator": """ Deserializes the component from a dictionary. @@ -255,13 +255,13 @@ def _convert_to_google_tool(tool: Tool) -> FunctionDeclaration: return FunctionDeclaration(name=tool.name, description=tool.description, parameters=parameters) - @component.output_types(replies=List[ChatMessage]) + @component.output_types(replies=list[ChatMessage]) def run( self, - messages: List[ChatMessage], + messages: list[ChatMessage], streaming_callback: Optional[StreamingCallbackT] = None, *, - tools: Optional[List[Tool]] = None, + tools: Optional[list[Tool]] = None, ): """ Generates text based on the provided messages. @@ -308,13 +308,13 @@ def run( return {"replies": replies} - @component.output_types(replies=List[ChatMessage]) + @component.output_types(replies=list[ChatMessage]) async def run_async( self, - messages: List[ChatMessage], + messages: list[ChatMessage], streaming_callback: Optional[StreamingCallbackT] = None, *, - tools: Optional[List[Tool]] = None, + tools: Optional[list[Tool]] = None, ): """ Async version of the run method. Generates text based on the provided messages. @@ -367,7 +367,7 @@ async def run_async( @staticmethod def _convert_response_to_messages( response_body: Union[GenerateContentResponse, AsyncGenerateContentResponse], - ) -> List[ChatMessage]: + ) -> list[ChatMessage]: """ Converts the Google AI response to a list of `ChatMessage` instances. @@ -408,7 +408,7 @@ def _convert_response_to_messages( @staticmethod def _stream_response_and_convert_to_messages( stream: GenerateContentResponse, streaming_callback: StreamingCallbackT - ) -> List[ChatMessage]: + ) -> list[ChatMessage]: """ Streams the Google AI response and converts it to a list of `ChatMessage` instances. @@ -461,7 +461,7 @@ def _stream_response_and_convert_to_messages( @staticmethod async def _stream_response_and_convert_to_messages_async( stream: AsyncGenerateContentResponse, streaming_callback: AsyncStreamingCallbackT - ) -> List[ChatMessage]: + ) -> list[ChatMessage]: """ Streams the Google AI response and converts it to a list of `ChatMessage` instances. diff --git a/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/gemini.py b/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/gemini.py index d62598785b..3519053d88 100644 --- a/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/gemini.py +++ b/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/gemini.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Optional, Union import google.generativeai as genai from google.ai.generativelanguage import Content, Part @@ -77,8 +77,8 @@ def __init__( *, api_key: Secret = Secret.from_env_var("GOOGLE_API_KEY"), # noqa: B008 model: str = "gemini-2.0-flash", - generation_config: Optional[Union[GenerationConfig, Dict[str, Any]]] = None, - safety_settings: Optional[Dict[HarmCategory, HarmBlockThreshold]] = None, + generation_config: Optional[Union[GenerationConfig, dict[str, Any]]] = None, + safety_settings: Optional[dict[HarmCategory, HarmBlockThreshold]] = None, streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, ): """ @@ -107,7 +107,7 @@ def __init__( self._model = GenerativeModel(self.model_name) self.streaming_callback = streaming_callback - def _generation_config_to_dict(self, config: Union[GenerationConfig, Dict[str, Any]]) -> Dict[str, Any]: + def _generation_config_to_dict(self, config: Union[GenerationConfig, dict[str, Any]]) -> dict[str, Any]: if isinstance(config, dict): return config return { @@ -119,7 +119,7 @@ def _generation_config_to_dict(self, config: Union[GenerationConfig, Dict[str, A "stop_sequences": config.stop_sequences, } - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -142,7 +142,7 @@ def to_dict(self) -> Dict[str, Any]: return data @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GoogleAIGeminiGenerator": + def from_dict(cls, data: dict[str, Any]) -> "GoogleAIGeminiGenerator": """ Deserializes the component from a dictionary. @@ -180,7 +180,7 @@ def _convert_part(self, part: Union[str, ByteStream, Part]) -> Part: msg = f"Unsupported type {type(part)} for part {part}" raise ValueError(msg) - @component.output_types(replies=List[str]) + @component.output_types(replies=list[str]) def run( self, parts: Variadic[Union[str, ByteStream, Part]], @@ -212,7 +212,7 @@ def run( return {"replies": replies} - def _get_response(self, response_body: GenerateContentResponse) -> List[str]: + def _get_response(self, response_body: GenerateContentResponse) -> list[str]: """ Extracts the responses from the Google AI request. :param response_body: The response body from the Google AI request. @@ -227,7 +227,7 @@ def _get_response(self, response_body: GenerateContentResponse) -> List[str]: def _get_stream_response( self, stream: GenerateContentResponse, streaming_callback: Callable[[StreamingChunk], None] - ) -> List[str]: + ) -> list[str]: """ Extracts the responses from the Google AI streaming response. :param stream: The streaming response from the Google AI request. diff --git a/integrations/google_vertex/pyproject.toml b/integrations/google_vertex/pyproject.toml index e8010c41a7..5461b62186 100644 --- a/integrations/google_vertex/pyproject.toml +++ b/integrations/google_vertex/pyproject.toml @@ -81,7 +81,7 @@ typing = "mypy --install-types --non-interactive --explicit-package-bases {args: [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/google_vertex/src/haystack_integrations/components/embedders/google_vertex/document_embedder.py b/integrations/google_vertex/src/haystack_integrations/components/embedders/google_vertex/document_embedder.py index 3686bd5c39..9ca2e5767c 100755 --- a/integrations/google_vertex/src/haystack_integrations/components/embedders/google_vertex/document_embedder.py +++ b/integrations/google_vertex/src/haystack_integrations/components/embedders/google_vertex/document_embedder.py @@ -1,6 +1,6 @@ import math import time -from typing import Any, Dict, List, Literal, Optional +from typing import Any, Literal, Optional import vertexai from haystack import component, default_from_dict, default_to_dict, logging @@ -71,7 +71,7 @@ def __init__( retries: int = 3, progress_bar: bool = True, truncate_dim: Optional[int] = None, - meta_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", ) -> None: """ @@ -132,7 +132,7 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]: self.embedder = TextEmbeddingModel.from_pretrained(self.model) self.task_type = task_type - def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: + def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]: """ Prepare the texts to embed by concatenating the Document text with the metadata fields to embed. """ @@ -145,7 +145,7 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: texts_to_embed.append(text_to_embed) return texts_to_embed - def get_text_embedding_input(self, batch: List[Document]) -> List[TextEmbeddingInput]: + def get_text_embedding_input(self, batch: list[Document]) -> list[TextEmbeddingInput]: """ Converts a batch of Document objects into a list of TextEmbeddingInput objects. @@ -158,7 +158,7 @@ def get_text_embedding_input(self, batch: List[Document]) -> List[TextEmbeddingI texts_to_embed = self._prepare_texts_to_embed(documents=batch) return [TextEmbeddingInput(text=content, task_type=self.task_type) for content in texts_to_embed] - def embed_batch_by_smaller_batches(self, batch: List[str], subbatch=1) -> List[List[float]]: + def embed_batch_by_smaller_batches(self, batch: list[str], subbatch=1) -> list[list[float]]: """ Embeds a batch of text strings by dividing them into smaller sub-batches. Args: @@ -190,7 +190,7 @@ def embed_batch_by_smaller_batches(self, batch: List[str], subbatch=1) -> List[L return embeddings_batch - def embed_batch(self, batch: List[str]) -> List[List[float]]: + def embed_batch(self, batch: list[str]) -> list[list[float]]: """ Generate embeddings for a batch of text strings. @@ -205,8 +205,8 @@ def embed_batch(self, batch: List[str]) -> List[List[float]]: return embeddings - @component.output_types(documents=List[Document]) - def run(self, documents: List[Document]): + @component.output_types(documents=list[Document]) + def run(self, documents: list[Document]): """ Processes all documents in batches while adhering to the API's token limit per request. @@ -276,7 +276,7 @@ def run(self, documents: List[Document]): return {"documents": documents} - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -300,7 +300,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "VertexAIDocumentEmbedder": + def from_dict(cls, data: dict[str, Any]) -> "VertexAIDocumentEmbedder": """ Deserializes the component from a dictionary. diff --git a/integrations/google_vertex/src/haystack_integrations/components/embedders/google_vertex/text_embedder.py b/integrations/google_vertex/src/haystack_integrations/components/embedders/google_vertex/text_embedder.py index cb1fe8b2dc..362300d8c1 100755 --- a/integrations/google_vertex/src/haystack_integrations/components/embedders/google_vertex/text_embedder.py +++ b/integrations/google_vertex/src/haystack_integrations/components/embedders/google_vertex/text_embedder.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal, Optional, Union import vertexai from haystack import Document, component, default_from_dict, default_to_dict, logging @@ -84,8 +84,8 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]: self.embedder = TextEmbeddingModel.from_pretrained(self.model) self.task_type = task_type - @component.output_types(embedding=List[float]) - def run(self, text: Union[List[Document], List[str], str]): + @component.output_types(embedding=list[float]) + def run(self, text: Union[list[Document], list[str], str]): """ Processes text in batches while adhering to the API's token limit per request. @@ -106,7 +106,7 @@ def run(self, text: Union[List[Document], List[str], str]): embeddings = self.embedder.get_embeddings(text_embed_input)[0].values return {"embedding": embeddings} - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -121,7 +121,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "VertexAITextEmbedder": + def from_dict(cls, data: dict[str, Any]) -> "VertexAITextEmbedder": """ Deserializes the component from a dictionary. diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/captioner.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/captioner.py index ea95aa2013..2beb358154 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/captioner.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/captioner.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Optional import vertexai from haystack import logging @@ -68,7 +68,7 @@ def __init__( self._model = ImageTextModel.from_pretrained(self._model_name) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -80,7 +80,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageCaptioner": + def from_dict(cls, data: dict[str, Any]) -> "VertexAIImageCaptioner": """ Deserializes the component from a dictionary. @@ -91,7 +91,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageCaptioner": """ return default_from_dict(cls, data) - @component.output_types(captions=List[str]) + @component.output_types(captions=list[str]) def run(self, image: ByteStream): """Prompts the model to generate captions for the given image. diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/chat/gemini.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/chat/gemini.py index f3ce7864e1..834b3f4023 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/chat/gemini.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/chat/gemini.py @@ -1,5 +1,6 @@ import json -from typing import Any, AsyncIterable, Dict, Iterable, List, Optional, Union +from collections.abc import AsyncIterable, Iterable +from typing import Any, Optional, Union from haystack import logging from haystack.core.component import component @@ -146,9 +147,9 @@ def __init__( model: str = "gemini-1.5-flash", project_id: Optional[str] = None, location: Optional[str] = None, - generation_config: Optional[Union[GenerationConfig, Dict[str, Any]]] = None, - safety_settings: Optional[Dict[HarmCategory, HarmBlockThreshold]] = None, - tools: Optional[List[Tool]] = None, + generation_config: Optional[Union[GenerationConfig, dict[str, Any]]] = None, + safety_settings: Optional[dict[HarmCategory, HarmBlockThreshold]] = None, + tools: Optional[list[Tool]] = None, tool_config: Optional[ToolConfig] = None, streaming_callback: Optional[StreamingCallbackT] = None, ): @@ -205,14 +206,14 @@ def __init__( ) @staticmethod - def _generation_config_to_dict(config: Union[GenerationConfig, Dict[str, Any]]) -> Dict[str, Any]: + def _generation_config_to_dict(config: Union[GenerationConfig, dict[str, Any]]) -> dict[str, Any]: """Converts the GenerationConfig object to a dictionary.""" if isinstance(config, dict): return config return config.to_dict() @staticmethod - def _tool_config_to_dict(tool_config: ToolConfig) -> Dict[str, Any]: + def _tool_config_to_dict(tool_config: ToolConfig) -> dict[str, Any]: """Serializes the ToolConfig object into a dictionary.""" mode = tool_config._gapic_tool_config.function_calling_config.mode allowed_function_names = tool_config._gapic_tool_config.function_calling_config.allowed_function_names @@ -223,7 +224,7 @@ def _tool_config_to_dict(tool_config: ToolConfig) -> Dict[str, Any]: return config_dict - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -250,7 +251,7 @@ def to_dict(self) -> Dict[str, Any]: return data @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "VertexAIGeminiChatGenerator": + def from_dict(cls, data: dict[str, Any]) -> "VertexAIGeminiChatGenerator": """ Deserializes the component from a dictionary. @@ -260,7 +261,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "VertexAIGeminiChatGenerator": Deserialized component. """ - def _tool_config_from_dict(config_dict: Dict[str, Any]) -> ToolConfig: + def _tool_config_from_dict(config_dict: dict[str, Any]) -> ToolConfig: """Deserializes the ToolConfig object from a dictionary.""" function_calling_config = config_dict["function_calling_config"] return ToolConfig( @@ -280,7 +281,7 @@ def _tool_config_from_dict(config_dict: Dict[str, Any]) -> ToolConfig: return default_from_dict(cls, data) @staticmethod - def _convert_to_vertex_tools(tools: List[Tool]) -> List[VertexTool]: + def _convert_to_vertex_tools(tools: list[Tool]) -> list[VertexTool]: """ Converts a list of Haystack `Tool` to a list of Vertex `Tool` objects. @@ -301,13 +302,13 @@ def _convert_to_vertex_tools(tools: List[Tool]) -> List[VertexTool]: ) return [VertexTool(function_declarations=function_declarations)] - @component.output_types(replies=List[ChatMessage]) + @component.output_types(replies=list[ChatMessage]) def run( self, - messages: List[ChatMessage], + messages: list[ChatMessage], streaming_callback: Optional[StreamingCallbackT] = None, *, - tools: Optional[List[Tool]] = None, + tools: Optional[list[Tool]] = None, ): """ :param messages: @@ -360,13 +361,13 @@ def run( return {"replies": replies} - @component.output_types(replies=List[ChatMessage]) + @component.output_types(replies=list[ChatMessage]) async def run_async( self, - messages: List[ChatMessage], + messages: list[ChatMessage], streaming_callback: Optional[StreamingCallbackT] = None, *, - tools: Optional[List[Tool]] = None, + tools: Optional[list[Tool]] = None, ): """ Async version of the run method. Generates text based on the provided messages. @@ -424,14 +425,14 @@ async def run_async( return {"replies": replies} @staticmethod - def _convert_response_to_messages(response_body: GenerationResponse) -> List[ChatMessage]: + def _convert_response_to_messages(response_body: GenerationResponse) -> list[ChatMessage]: """ Converts the Google Vertex AI response to a list of `ChatMessage` instances. :param response_body: The response from Google AI request. :returns: List of `ChatMessage` instances. """ - replies: List[ChatMessage] = [] + replies: list[ChatMessage] = [] usage_metadata = response_body.usage_metadata openai_usage = { @@ -464,7 +465,7 @@ def _convert_response_to_messages(response_body: GenerationResponse) -> List[Cha def _stream_response_and_convert_to_messages( self, stream: Iterable[GenerationResponse], streaming_callback: StreamingCallbackT - ) -> List[ChatMessage]: + ) -> list[ChatMessage]: """ Streams the Google Vertex AI response and converts it to a list of `ChatMessage` instances. @@ -518,7 +519,7 @@ def _stream_response_and_convert_to_messages( @staticmethod async def _stream_response_and_convert_to_messages_async( stream: AsyncIterable[GenerationResponse], streaming_callback: AsyncStreamingCallbackT - ) -> List[ChatMessage]: + ) -> list[ChatMessage]: """ Streams the Google Vertex AI response and converts it to a list of `ChatMessage` instances. diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/code_generator.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/code_generator.py index 9ac3fbaf60..1b11bf6efa 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/code_generator.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/code_generator.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Optional import vertexai from haystack import logging @@ -71,7 +71,7 @@ def __init__( self._model = CodeGenerationModel.from_pretrained(self._model_name) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -83,7 +83,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "VertexAICodeGenerator": + def from_dict(cls, data: dict[str, Any]) -> "VertexAICodeGenerator": """ Deserializes the component from a dictionary. @@ -94,7 +94,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "VertexAICodeGenerator": """ return default_from_dict(cls, data) - @component.output_types(replies=List[str]) + @component.output_types(replies=list[str]) def run(self, prefix: str, suffix: Optional[str] = None): """ Generate code using a Google Vertex AI model. diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/gemini.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/gemini.py index 27ec46bb48..1c09d1b032 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/gemini.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/gemini.py @@ -1,4 +1,5 @@ -from typing import Any, Callable, Dict, Iterable, List, Optional, Union +from collections.abc import Iterable +from typing import Any, Callable, Optional, Union from haystack import logging from haystack.core.component import component @@ -64,8 +65,8 @@ def __init__( model: str = "gemini-2.0-flash", project_id: Optional[str] = None, location: Optional[str] = None, - generation_config: Optional[Union[GenerationConfig, Dict[str, Any]]] = None, - safety_settings: Optional[Dict[HarmCategory, HarmBlockThreshold]] = None, + generation_config: Optional[Union[GenerationConfig, dict[str, Any]]] = None, + safety_settings: Optional[dict[HarmCategory, HarmBlockThreshold]] = None, system_instruction: Optional[Union[str, ByteStream, Part]] = None, streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, ): @@ -118,7 +119,7 @@ def __init__( system_instruction=self._system_instruction, ) - def _generation_config_to_dict(self, config: Union[GenerationConfig, Dict[str, Any]]) -> Dict[str, Any]: + def _generation_config_to_dict(self, config: Union[GenerationConfig, dict[str, Any]]) -> dict[str, Any]: if isinstance(config, dict): return config return { @@ -130,7 +131,7 @@ def _generation_config_to_dict(self, config: Union[GenerationConfig, Dict[str, A "stop_sequences": config._raw_generation_config.stop_sequences, } - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -155,7 +156,7 @@ def to_dict(self) -> Dict[str, Any]: return data @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "VertexAIGeminiGenerator": + def from_dict(cls, data: dict[str, Any]) -> "VertexAIGeminiGenerator": """ Deserializes the component from a dictionary. @@ -182,7 +183,7 @@ def _convert_part(self, part: Union[str, ByteStream, Part]) -> Part: msg = f"Unsupported type {type(part)} for part {part}" raise ValueError(msg) - @component.output_types(replies=List[str]) + @component.output_types(replies=list[str]) def run( self, parts: Variadic[Union[str, ByteStream, Part]], @@ -211,7 +212,7 @@ def run( return {"replies": replies} - def _get_response(self, response_body: GenerationResponse) -> List[str]: + def _get_response(self, response_body: GenerationResponse) -> list[str]: """ Extracts the responses from the Vertex AI response. @@ -228,7 +229,7 @@ def _get_response(self, response_body: GenerationResponse) -> List[str]: def _get_stream_response( self, stream: Iterable[GenerationResponse], streaming_callback: Callable[[StreamingChunk], None] - ) -> List[str]: + ) -> list[str]: """ Extracts the responses from the Vertex AI streaming response. @@ -236,7 +237,7 @@ def _get_stream_response( :param streaming_callback: The handler for the streaming response. :returns: A list of string responses. """ - streaming_chunks: List[StreamingChunk] = [] + streaming_chunks: list[StreamingChunk] = [] for chunk in stream: streaming_chunk = StreamingChunk(content=chunk.text, meta=chunk.to_dict()) diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/image_generator.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/image_generator.py index 44b02a1b07..0a8ba9034a 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/image_generator.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/image_generator.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Optional import vertexai from haystack import logging @@ -61,7 +61,7 @@ def __init__( self._model = ImageGenerationModel.from_pretrained(self._model_name) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -73,7 +73,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageGenerator": + def from_dict(cls, data: dict[str, Any]) -> "VertexAIImageGenerator": """ Deserializes the component from a dictionary. @@ -84,7 +84,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageGenerator": """ return default_from_dict(cls, data) - @component.output_types(images=List[ByteStream]) + @component.output_types(images=list[ByteStream]) def run(self, prompt: str, negative_prompt: Optional[str] = None): """Produces images based on the given prompt. diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/question_answering.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/question_answering.py index 86606db182..36819fc9a3 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/question_answering.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/question_answering.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Optional import vertexai from haystack import logging @@ -61,7 +61,7 @@ def __init__( self._model = ImageTextModel.from_pretrained(self._model_name) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -73,7 +73,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageQA": + def from_dict(cls, data: dict[str, Any]) -> "VertexAIImageQA": """ Deserializes the component from a dictionary. @@ -84,7 +84,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageQA": """ return default_from_dict(cls, data) - @component.output_types(replies=List[str]) + @component.output_types(replies=list[str]) def run(self, image: ByteStream, question: str): """Prompts model to answer a question about an image. diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/text_generator.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/text_generator.py index cb48f33c87..80a76c5b41 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/text_generator.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/text_generator.py @@ -1,6 +1,6 @@ import importlib from dataclasses import fields -from typing import Any, Dict, List, Optional +from typing import Any, Optional import vertexai from haystack import logging @@ -71,7 +71,7 @@ def __init__( self._model = TextGenerationModel.from_pretrained(self._model_name) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -94,7 +94,7 @@ def to_dict(self) -> Dict[str, Any]: return data @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "VertexAITextGenerator": + def from_dict(cls, data: dict[str, Any]) -> "VertexAITextGenerator": """ Deserializes the component from a dictionary. @@ -111,7 +111,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "VertexAITextGenerator": ) return default_from_dict(cls, data) - @component.output_types(replies=List[str], safety_attributes=Dict[str, float], citations=List[Dict[str, Any]]) + @component.output_types(replies=list[str], safety_attributes=dict[str, float], citations=list[dict[str, Any]]) def run(self, prompt: str): """Prompts the model to generate text. diff --git a/integrations/google_vertex/tests/test_document_embedder.py b/integrations/google_vertex/tests/test_document_embedder.py index bcd3ce9df3..b63e4a9169 100644 --- a/integrations/google_vertex/tests/test_document_embedder.py +++ b/integrations/google_vertex/tests/test_document_embedder.py @@ -28,9 +28,10 @@ def mock_vertex_init_and_model(): """ Fixture to mock vertexai.init and TextEmbeddingModel.from_pretrained """ - with patch("vertexai.init") as mock_init, patch( - "vertexai.language_models.TextEmbeddingModel.from_pretrained" - ) as mock_from_pretrained: + with ( + patch("vertexai.init") as mock_init, + patch("vertexai.language_models.TextEmbeddingModel.from_pretrained") as mock_from_pretrained, + ): mock_embedder = MagicMock(spec=TextEmbeddingModel) mock_embedder.get_embeddings.return_value = [MockTextEmbeddingResponse([0.1] * 768)] mock_embedder.count_tokens.return_value = MockCountTokensResponse(total_tokens=10) diff --git a/integrations/google_vertex/tests/test_text_embedder.py b/integrations/google_vertex/tests/test_text_embedder.py index 9d1c8feade..383cc69f7d 100644 --- a/integrations/google_vertex/tests/test_text_embedder.py +++ b/integrations/google_vertex/tests/test_text_embedder.py @@ -20,9 +20,10 @@ def mock_vertex_init_and_model(): """ Fixture to mock vertexai.init and TextEmbeddingModel.from_pretrained """ - with patch("vertexai.init") as mock_init, patch( - "vertexai.language_models.TextEmbeddingModel.from_pretrained" - ) as mock_from_pretrained: + with ( + patch("vertexai.init") as mock_init, + patch("vertexai.language_models.TextEmbeddingModel.from_pretrained") as mock_from_pretrained, + ): mock_embedder = MagicMock(spec=TextEmbeddingModel) # Simulate returning a list with one response object for get_embeddings mock_embedder.get_embeddings.return_value = [MockTextEmbeddingResponse([0.1] * 768)] diff --git a/integrations/hanlp/pyproject.toml b/integrations/hanlp/pyproject.toml index f51210ba5f..a3674d278e 100644 --- a/integrations/hanlp/pyproject.toml +++ b/integrations/hanlp/pyproject.toml @@ -83,12 +83,12 @@ module = [ ignore_missing_imports = true [tool.black] -target-version = ["py38"] +target-version = ["py39"] line-length = 120 skip-string-normalization = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/hanlp/src/haystack_integrations/components/preprocessors/hanlp/chinese_document_splitter.py b/integrations/hanlp/src/haystack_integrations/components/preprocessors/hanlp/chinese_document_splitter.py index 198630c85a..e727065e34 100644 --- a/integrations/hanlp/src/haystack_integrations/components/preprocessors/hanlp/chinese_document_splitter.py +++ b/integrations/hanlp/src/haystack_integrations/components/preprocessors/hanlp/chinese_document_splitter.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from copy import deepcopy -from typing import Any, Callable, Dict, List, Literal, Optional, Tuple +from typing import Any, Callable, Literal, Optional from haystack import Document, component, logging from haystack.core.serialization import default_from_dict, default_to_dict @@ -149,7 +149,7 @@ def _validate_init_parameters( raise ValueError(msg) @component.output_types(documents=list[Document]) - def run(self, documents: List[Document]) -> Dict[str, List[Document]]: + def run(self, documents: list[Document]) -> dict[str, list[Document]]: """ Split documents into smaller chunks. @@ -175,7 +175,7 @@ def warm_up(self) -> None: self.chinese_tokenizer = hanlp.load(hanlp.pretrained.tok.FINE_ELECTRA_SMALL_ZH) self.split_sent = hanlp.load(hanlp.pretrained.eos.UD_CTB_EOS_MUL) - def _split_by_character(self, doc: Document) -> List[Document]: + def _split_by_character(self, doc: Document) -> list[Document]: """ Define a function to handle Chinese clauses @@ -202,7 +202,7 @@ def _split_by_character(self, doc: Document) -> List[Document]: ) # Define a function to handle Chinese clauses - def chinese_sentence_split(self, text: str) -> List[Dict[str, Any]]: + def chinese_sentence_split(self, text: str) -> list[dict[str, Any]]: """ Split Chinese text into sentences. @@ -223,7 +223,7 @@ def chinese_sentence_split(self, text: str) -> List[Dict[str, Any]]: return results - def _split_document(self, doc: Document) -> List[Document]: + def _split_document(self, doc: Document) -> list[Document]: if self.split_by == "sentence" or self.respect_sentence_boundary: return self._split_by_hanlp_sentence(doc) @@ -233,8 +233,8 @@ def _split_document(self, doc: Document) -> List[Document]: return self._split_by_character(doc) def _concatenate_sentences_based_on_word_amount( - self, sentences: List[str], split_length: int, split_overlap: int, granularity: str - ) -> Tuple[List[str], List[int], List[int]]: + self, sentences: list[str], split_length: int, split_overlap: int, granularity: str + ) -> tuple[list[str], list[int], list[int]]: """ Groups the sentences into chunks of `split_length` words while respecting sentence boundaries. @@ -251,11 +251,11 @@ def _concatenate_sentences_based_on_word_amount( chunk_word_count = 0 chunk_starting_page_number = 1 chunk_start_idx = 0 - current_chunk: List[str] = [] + current_chunk: list[str] = [] # output lists split_start_page_numbers = [] - list_of_splits: List[List[str]] = [] + list_of_splits: list[list[str]] = [] split_start_indices = [] for sentence_idx, sentence in enumerate(sentences): @@ -305,7 +305,7 @@ def _concatenate_sentences_based_on_word_amount( return text_splits, split_start_page_numbers, split_start_indices - def _split_by_hanlp_sentence(self, doc: Document) -> List[Document]: + def _split_by_hanlp_sentence(self, doc: Document) -> list[Document]: """ Split Chinese text into sentences. @@ -342,8 +342,8 @@ def _split_by_hanlp_sentence(self, doc: Document) -> List[Document]: return split_docs def _concatenate_units( - self, elements: List[str], split_length: int, split_overlap: int, split_threshold: int - ) -> Tuple[List[str], List[int], List[int]]: + self, elements: list[str], split_length: int, split_overlap: int, split_threshold: int + ) -> tuple[list[str], list[int], list[int]]: """ Concatenates the elements into parts of split_length units. @@ -364,9 +364,9 @@ def _concatenate_units( # Otherwise, proceed as before step = split_length - split_overlap step = max(step, 1) - text_splits: List[str] = [] - splits_pages: List[int] = [] - splits_start_idxs: List[int] = [] + text_splits: list[str] = [] + splits_pages: list[int] = [] + splits_start_idxs: list[int] = [] cur_start_idx = 0 cur_page = 1 segments = windowed(elements, n=split_length, step=step) @@ -399,12 +399,12 @@ def _concatenate_units( return text_splits, splits_pages, splits_start_idxs def _create_docs_from_splits( - self, text_splits: List[str], splits_pages: List[int], splits_start_idxs: List[int], meta: Dict[str, Any] - ) -> List[Document]: + self, text_splits: list[str], splits_pages: list[int], splits_start_idxs: list[int], meta: dict[str, Any] + ) -> list[Document]: """ Creates Document objects from splits enriching them with page number and the metadata of the original document. """ - documents: List[Document] = [] + documents: list[Document] = [] for i, (txt, split_idx) in enumerate(zip(text_splits, splits_start_idxs)): copied_meta = deepcopy(meta) @@ -460,7 +460,7 @@ def _add_split_overlap_information( overlapping_range = (0, overlapping_range[1] - overlapping_range[0]) previous_doc.meta["_split_overlap"].append({"doc_id": current_doc.id, "range": overlapping_range}) - def _number_of_sentences_to_keep(self, sentences: List[str], split_length: int, split_overlap: int) -> int: + def _number_of_sentences_to_keep(self, sentences: list[str], split_length: int, split_overlap: int) -> int: """ Returns the number of sentences to keep in the next chunk based on the `split_overlap` and `split_length`. @@ -486,7 +486,7 @@ def _number_of_sentences_to_keep(self, sentences: List[str], split_length: int, break return num_sentences_to_keep - def _split_by_function(self, doc: Document) -> List[Document]: + def _split_by_function(self, doc: Document) -> list[Document]: """ Split a document using a custom splitting function. @@ -515,7 +515,7 @@ def _split_by_function(self, doc: Document) -> List[Document]: meta=metadata, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. """ @@ -533,7 +533,7 @@ def to_dict(self) -> Dict[str, Any]: return serialized @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "ChineseDocumentSplitter": + def from_dict(cls, data: dict[str, Any]) -> "ChineseDocumentSplitter": """ Deserializes the component from a dictionary. """ diff --git a/integrations/jina/pyproject.toml b/integrations/jina/pyproject.toml index 5c7a003411..82cc7557b2 100644 --- a/integrations/jina/pyproject.toml +++ b/integrations/jina/pyproject.toml @@ -79,7 +79,7 @@ check_untyped_defs = true disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py b/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py index b98cefeaef..4eda3e27ba 100644 --- a/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py +++ b/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import json -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from urllib.parse import quote import requests @@ -67,7 +67,7 @@ def __init__( mode = JinaReaderMode.from_str(mode) self.mode = mode - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. :returns: @@ -81,7 +81,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "JinaReaderConnector": + def from_dict(cls, data: dict[str, Any]) -> "JinaReaderConnector": """ Deserializes the component from a dictionary. :param data: @@ -103,8 +103,8 @@ def _json_to_document(self, data: dict) -> Document: document = Document(content=content, meta=data) return document - @component.output_types(documents=List[Document]) - def run(self, query: str, headers: Optional[Dict[str, str]] = None) -> Dict[str, List[Document]]: + @component.output_types(documents=list[Document]) + def run(self, query: str, headers: Optional[dict[str, str]] = None) -> dict[str, list[Document]]: """ Process the query/URL using the Jina AI reader service. diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py index 5e8a697b5a..d775833629 100644 --- a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py +++ b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Optional import requests from haystack import Document, component, default_from_dict, default_to_dict @@ -43,7 +43,7 @@ def __init__( suffix: str = "", batch_size: int = 32, progress_bar: bool = True, - meta_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", task: Optional[str] = None, dimensions: Optional[int] = None, @@ -95,13 +95,13 @@ def __init__( self.dimensions = dimensions self.late_chunking = late_chunking - def _get_telemetry_data(self) -> Dict[str, Any]: + def _get_telemetry_data(self) -> dict[str, Any]: """ Data that is sent to Posthog for usage analytics. """ return {"model": self.model_name} - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. :returns: @@ -128,7 +128,7 @@ def to_dict(self) -> Dict[str, Any]: return default_to_dict(self, **kwargs) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "JinaDocumentEmbedder": + def from_dict(cls, data: dict[str, Any]) -> "JinaDocumentEmbedder": """ Deserializes the component from a dictionary. :param data: @@ -139,7 +139,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "JinaDocumentEmbedder": deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: + def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]: """ Prepare the texts to embed by concatenating the Document text with the metadata fields to embed. """ @@ -156,8 +156,8 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: return texts_to_embed def _embed_batch( - self, texts_to_embed: List[str], batch_size: int, parameters: Optional[Dict] = None - ) -> Tuple[List[List[float]], Dict[str, Any]]: + self, texts_to_embed: list[str], batch_size: int, parameters: Optional[dict] = None + ) -> tuple[list[list[float]], dict[str, Any]]: """ Embed a list of texts in batches. """ @@ -189,8 +189,8 @@ def _embed_batch( return all_embeddings, metadata - @component.output_types(documents=List[Document], meta=Dict[str, Any]) - def run(self, documents: List[Document]) -> Dict[str, Any]: + @component.output_types(documents=list[Document], meta=dict[str, Any]) + def run(self, documents: list[Document]) -> dict[str, Any]: """ Compute the embeddings for a list of Documents. @@ -208,7 +208,7 @@ def run(self, documents: List[Document]) -> Dict[str, Any]: raise TypeError(msg) texts_to_embed = self._prepare_texts_to_embed(documents=documents) - parameters: Dict[str, Any] = {} + parameters: dict[str, Any] = {} if self.task is not None: parameters["task"] = self.task if self.dimensions is not None: diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_image_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_image_embedder.py index d90e1d7d80..852dca138d 100644 --- a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_image_embedder.py +++ b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_image_embedder.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 from dataclasses import replace -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Optional import requests from haystack import Document, component, default_from_dict, default_to_dict, logging @@ -60,7 +60,7 @@ def __init__( file_path_meta_field: str = "file_path", root_path: Optional[str] = None, embedding_dimension: Optional[int] = None, - image_size: Optional[Tuple[int, int]] = None, + image_size: Optional[tuple[int, int]] = None, batch_size: int = 5, ): """ @@ -103,13 +103,13 @@ def __init__( } ) - def _get_telemetry_data(self) -> Dict[str, Any]: + def _get_telemetry_data(self) -> dict[str, Any]: """ Data that is sent to Posthog for usage analytics. """ return {"model": self.model_name} - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -128,7 +128,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "JinaDocumentImageEmbedder": + def from_dict(cls, data: dict[str, Any]) -> "JinaDocumentImageEmbedder": """ Deserializes the component from a dictionary. @@ -140,7 +140,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "JinaDocumentImageEmbedder": deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - def _extract_images_to_embed(self, documents: List[Document]) -> List[str]: + def _extract_images_to_embed(self, documents: list[Document]) -> list[str]: """ Validates the input documents and extracts the images to embed in the format expected by the Jina API. @@ -166,8 +166,8 @@ def _extract_images_to_embed(self, documents: List[Document]) -> List[str]: documents=documents, file_path_meta_field=self.file_path_meta_field, root_path=self.root_path ) - images_to_embed: List[Optional[str]] = [None] * len(documents) - pdf_page_infos: List[_PDFPageInfo] = [] + images_to_embed: list[Optional[str]] = [None] * len(documents) + pdf_page_infos: list[_PDFPageInfo] = [] for doc_idx, image_source_info in enumerate(images_source_info): if image_source_info["mime_type"] == "application/pdf": @@ -204,8 +204,8 @@ def _extract_images_to_embed(self, documents: List[Document]) -> List[str]: # tested above that image is not None, but mypy doesn't know that return images_to_embed # type: ignore[return-value] - @component.output_types(documents=List[Document]) - def run(self, documents: List[Document]) -> Dict[str, List[Document]]: + @component.output_types(documents=list[Document]) + def run(self, documents: list[Document]) -> dict[str, list[Document]]: """ Embed a list of image documents. @@ -229,7 +229,7 @@ def run(self, documents: List[Document]) -> Dict[str, List[Document]]: batch_images = images_to_embed[i : i + self.batch_size] # Prepare request parameters - parameters: Dict[str, Any] = {} + parameters: dict[str, Any] = {} if self.embedding_dimension is not None: parameters["dimensions"] = self.embedding_dimension diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py index a32012aa48..b27a8cfc47 100644 --- a/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py +++ b/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional +from typing import Any, Optional import requests from haystack import component, default_from_dict, default_to_dict @@ -82,19 +82,19 @@ def __init__( self.dimensions = dimensions self.late_chunking = late_chunking - def _get_telemetry_data(self) -> Dict[str, Any]: + def _get_telemetry_data(self) -> dict[str, Any]: """ Data that is sent to Posthog for usage analytics. """ return {"model": self.model_name} - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. :returns: Dictionary with serialized data. """ - kwargs: Dict[str, Any] = { + kwargs: dict[str, Any] = { "api_key": self.api_key.to_dict(), "model": self.model_name, "prefix": self.prefix, @@ -110,7 +110,7 @@ def to_dict(self) -> Dict[str, Any]: return default_to_dict(self, **kwargs) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "JinaTextEmbedder": + def from_dict(cls, data: dict[str, Any]) -> "JinaTextEmbedder": """ Deserializes the component from a dictionary. :param data: @@ -121,8 +121,8 @@ def from_dict(cls, data: Dict[str, Any]) -> "JinaTextEmbedder": deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - @component.output_types(embedding=List[float], meta=Dict[str, Any]) - def run(self, text: str) -> Dict[str, Any]: + @component.output_types(embedding=list[float], meta=dict[str, Any]) + def run(self, text: str) -> dict[str, Any]: """ Embed a string. @@ -141,7 +141,7 @@ def run(self, text: str) -> Dict[str, Any]: text_to_embed = self.prefix + text + self.suffix - parameters: Dict[str, Any] = {} + parameters: dict[str, Any] = {} if self.task is not None: parameters["task"] = self.task if self.dimensions is not None: diff --git a/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py b/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py index 97dace746f..0bd06a7815 100644 --- a/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py +++ b/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional +from typing import Any, Optional import requests from haystack import Document, component, default_from_dict, default_to_dict @@ -70,7 +70,7 @@ def __init__( } ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. :returns: @@ -85,7 +85,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "JinaRanker": + def from_dict(cls, data: dict[str, Any]) -> "JinaRanker": """ Deserializes the component from a dictionary. :param data: @@ -96,17 +96,17 @@ def from_dict(cls, data: Dict[str, Any]) -> "JinaRanker": deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - def _get_telemetry_data(self) -> Dict[str, Any]: + def _get_telemetry_data(self) -> dict[str, Any]: """ Data that is sent to Posthog for usage analytics. """ return {"model": self.model} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, query: str, - documents: List[Document], + documents: list[Document], top_k: Optional[int] = None, score_threshold: Optional[float] = None, ): @@ -155,7 +155,7 @@ def run( results = resp["results"] - ranked_docs: List[Document] = [] + ranked_docs: list[Document] = [] for result in results: index = result["index"] relevance_score = result["relevance_score"] diff --git a/integrations/langfuse/pyproject.toml b/integrations/langfuse/pyproject.toml index 4890fc6df9..712f0aa361 100644 --- a/integrations/langfuse/pyproject.toml +++ b/integrations/langfuse/pyproject.toml @@ -82,7 +82,7 @@ allow-direct-references = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py index 9cdb2d9c29..385c8e3ec3 100644 --- a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py +++ b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Optional import httpx from haystack import component, default_from_dict, default_to_dict, logging, tracing @@ -124,7 +124,7 @@ def __init__( span_handler: Optional[SpanHandler] = None, *, host: Optional[str] = None, - langfuse_client_kwargs: Optional[Dict[str, Any]] = None, + langfuse_client_kwargs: Optional[dict[str, Any]] = None, ) -> None: """ Initialize the LangfuseConnector component. @@ -172,7 +172,7 @@ def __init__( tracing.enable_tracing(self.tracer) @component.output_types(name=str, trace_url=str, trace_id=str) - def run(self, invocation_context: Optional[Dict[str, Any]] = None) -> Dict[str, str]: + def run(self, invocation_context: Optional[dict[str, Any]] = None) -> dict[str, str]: """ Runs the LangfuseConnector component. @@ -191,7 +191,7 @@ def run(self, invocation_context: Optional[Dict[str, Any]] = None) -> Dict[str, ) return {"name": self.name, "trace_url": self.tracer.get_trace_url(), "trace_id": self.tracer.get_trace_id()} - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize this component to a dictionary. @@ -218,7 +218,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "LangfuseConnector": + def from_dict(cls, data: dict[str, Any]) -> "LangfuseConnector": """ Deserialize this component from a dictionary. diff --git a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py index 5c4fda0f67..a2851d19c5 100644 --- a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py +++ b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py @@ -6,11 +6,12 @@ import os from abc import ABC, abstractmethod from collections import Counter +from collections.abc import Iterator from contextlib import AbstractContextManager from contextvars import ContextVar from dataclasses import dataclass from datetime import datetime -from typing import Any, Dict, Iterator, List, Literal, Optional, cast +from typing import Any, Literal, Optional, cast from haystack import default_from_dict, default_to_dict, logging from haystack.dataclasses import ChatMessage @@ -42,11 +43,11 @@ # External session metadata for trace correlation (Haystack system) # Stores trace_id, user_id, session_id, tags, version for root trace creation -tracing_context_var: ContextVar[Dict[Any, Any]] = ContextVar("tracing_context") +tracing_context_var: ContextVar[dict[Any, Any]] = ContextVar("tracing_context") # Internal span execution hierarchy for our tracer # Manages parent-child relationships and prevents cross-request span interleaving -span_stack_var: ContextVar[Optional[List["LangfuseSpan"]]] = ContextVar("span_stack", default=None) +span_stack_var: ContextVar[Optional[list["LangfuseSpan"]]] = ContextVar("span_stack", default=None) class LangfuseSpan(Span): @@ -63,7 +64,7 @@ def __init__(self, context_manager: AbstractContextManager) -> None: `langfuse.get_client().start_as_current_observation`. """ self._span = context_manager.__enter__() - self._data: Dict[str, Any] = {} + self._data: dict[str, Any] = {} self._context_manager = context_manager def set_tag(self, key: str, value: Any) -> None: @@ -114,7 +115,7 @@ def raw_span(self) -> LangfuseClientSpan: """ return self._span - def get_data(self) -> Dict[str, Any]: + def get_data(self) -> dict[str, Any]: """ Return the data associated with the span. @@ -122,7 +123,7 @@ def get_data(self) -> Dict[str, Any]: """ return self._data - def get_correlation_data_for_logs(self) -> Dict[str, Any]: + def get_correlation_data_for_logs(self) -> dict[str, Any]: return {} @@ -149,7 +150,7 @@ class SpanContext: name: str operation_name: str component_type: Optional[str] - tags: Dict[str, Any] + tags: dict[str, Any] parent_span: Optional[Span] trace_name: str = "Haystack" public: bool = False @@ -231,14 +232,14 @@ def handle(self, span: LangfuseSpan, component_type: Optional[str]) -> None: pass @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "SpanHandler": + def from_dict(cls, data: dict[str, Any]) -> "SpanHandler": return default_from_dict(cls, data) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: return default_to_dict(self) -def _sanitize_usage_data(usage: Dict[str, Any]) -> Dict[str, Any]: +def _sanitize_usage_data(usage: dict[str, Any]) -> dict[str, Any]: """ Sanitize usage data for Langfuse by flattening to a single-level dictionary. @@ -253,9 +254,9 @@ def _sanitize_usage_data(usage: Dict[str, Any]) -> Dict[str, Any]: if not isinstance(usage, dict): return {} - sanitized: Dict[str, Any] = {} + sanitized: dict[str, Any] = {} - def _flatten(data: Dict[str, Any], prefix: str = "") -> None: + def _flatten(data: dict[str, Any], prefix: str = "") -> None: """Recursively flatten nested dictionaries.""" for key, value in data.items(): full_key = f"{prefix}.{key}" if prefix else key @@ -354,7 +355,7 @@ def handle(self, span: LangfuseSpan, component_type: Optional[str]) -> None: span.raw_span().update_trace(input=coerced_input, output=coerced_output) # special case for ToolInvoker (to update the span name to be: `original_component_name - [tool_names]`) if component_type == "ToolInvoker": - tool_names: List[str] = [] + tool_names: list[str] = [] messages = span.get_data().get(_COMPONENT_INPUT_KEY, {}).get("messages", []) for message in messages: if isinstance(message, ChatMessage) and message.tool_calls: @@ -424,7 +425,7 @@ def __init__( ) self._tracer = tracer # Keep _context as deprecated shim to avoid AttributeError if anyone uses it - self._context: List[LangfuseSpan] = [] + self._context: list[LangfuseSpan] = [] self._name = name self._public = public self.enforce_flush = os.getenv(HAYSTACK_LANGFUSE_ENFORCE_FLUSH_ENV_VAR, "true").lower() == "true" @@ -433,7 +434,7 @@ def __init__( @contextlib.contextmanager def trace( - self, operation_name: str, tags: Optional[Dict[str, Any]] = None, parent_span: Optional[Span] = None + self, operation_name: str, tags: Optional[dict[str, Any]] = None, parent_span: Optional[Span] = None ) -> Iterator[Span]: tags = tags or {} span_name = tags.get(_COMPONENT_NAME_KEY, operation_name) diff --git a/integrations/langfuse/tests/test_tracing.py b/integrations/langfuse/tests/test_tracing.py index fbd30cffcc..2b003b742f 100644 --- a/integrations/langfuse/tests/test_tracing.py +++ b/integrations/langfuse/tests/test_tracing.py @@ -5,7 +5,7 @@ import json import os import time -from typing import Any, Dict, List +from typing import Any from urllib.parse import urlparse import pytest @@ -137,8 +137,8 @@ def __init__(self): self.sub_pipeline = Pipeline() self.sub_pipeline.add_component("llm", OpenAIChatGenerator()) - @component.output_types(replies=List[ChatMessage]) - def run(self, messages: List[ChatMessage]) -> Dict[str, Any]: + @component.output_types(replies=list[ChatMessage]) + def run(self, messages: list[ChatMessage]) -> dict[str, Any]: return {"replies": self.sub_pipeline.run(data={"llm": {"messages": messages}})["llm"]["replies"]} @component @@ -149,8 +149,8 @@ def __init__(self): self.sub_pipeline.add_component("sub_llm", SubGenerator()) self.sub_pipeline.connect("prompt_builder.prompt", "sub_llm.messages") - @component.output_types(replies=List[ChatMessage]) - def run(self, messages: List[ChatMessage]) -> Dict[str, Any]: + @component.output_types(replies=list[ChatMessage]) + def run(self, messages: list[ChatMessage]) -> dict[str, Any]: return { "replies": self.sub_pipeline.run( data={"prompt_builder": {"template": messages, "template_variables": {"location": "Berlin"}}} diff --git a/integrations/llama_cpp/pyproject.toml b/integrations/llama_cpp/pyproject.toml index b8a2e16b02..597a18f986 100644 --- a/integrations/llama_cpp/pyproject.toml +++ b/integrations/llama_cpp/pyproject.toml @@ -92,7 +92,7 @@ known-first-party = ["haystack_integrations"] [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py index 2ef77f0278..93ed5ca918 100644 --- a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py +++ b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py @@ -1,6 +1,7 @@ import json +from collections.abc import Iterator from datetime import datetime, timezone -from typing import Any, Dict, Iterator, List, Optional, Union +from typing import Any, Optional, Union from haystack import component, default_from_dict, default_to_dict, logging from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message @@ -42,7 +43,7 @@ logger = logging.getLogger(__name__) -FINISH_REASON_MAPPING: Dict[str, FinishReason] = { +FINISH_REASON_MAPPING: dict[str, FinishReason] = { "stop": "stop", "length": "length", "tool_calls": "tool_calls", @@ -124,7 +125,7 @@ def _convert_message_to_llamacpp_format(message: ChatMessage) -> ChatCompletionR result["content"] = text_contents[0] if tool_calls: - llamacpp_tool_calls: List[ChatCompletionMessageToolCall] = [] + llamacpp_tool_calls: list[ChatCompletionMessageToolCall] = [] for tc in tool_calls: if tc.id is None: msg = "`ToolCall` must have a non-null `id` attribute to be used with llama.cpp." @@ -193,8 +194,8 @@ def __init__( model: str, n_ctx: Optional[int] = 0, n_batch: Optional[int] = 512, - model_kwargs: Optional[Dict[str, Any]] = None, - generation_kwargs: Optional[Dict[str, Any]] = None, + model_kwargs: Optional[dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, *, tools: Optional[ToolsType] = None, streaming_callback: Optional[StreamingCallbackT] = None, @@ -278,7 +279,7 @@ def warm_up(self): self._model = Llama(**kwargs) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -300,7 +301,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "LlamaCppChatGenerator": + def from_dict(cls, data: dict[str, Any]) -> "LlamaCppChatGenerator": """ Deserializes the component from a dictionary. @@ -319,15 +320,15 @@ def from_dict(cls, data: Dict[str, Any]) -> "LlamaCppChatGenerator": ) return default_from_dict(cls, data) - @component.output_types(replies=List[ChatMessage]) + @component.output_types(replies=list[ChatMessage]) def run( self, - messages: List[ChatMessage], - generation_kwargs: Optional[Dict[str, Any]] = None, + messages: list[ChatMessage], + generation_kwargs: Optional[dict[str, Any]] = None, *, tools: Optional[ToolsType] = None, streaming_callback: Optional[StreamingCallbackT] = None, - ) -> Dict[str, List[ChatMessage]]: + ) -> dict[str, list[ChatMessage]]: """ Run the text generation model on the given list of ChatMessages. @@ -359,7 +360,7 @@ def run( flattened_tools = flatten_tools_or_toolsets(tools) _check_duplicate_tool_names(flattened_tools) - llamacpp_tools: List[ChatCompletionTool] = [] + llamacpp_tools: list[ChatCompletionTool] = [] if flattened_tools: for t in flattened_tools: llamacpp_tools.append( @@ -408,7 +409,7 @@ def _handle_streaming_response( response_stream: Iterator[CreateChatCompletionStreamResponse], streaming_callback: SyncStreamingCallbackT, component_info: ComponentInfo, - ) -> Dict[str, List[ChatMessage]]: + ) -> dict[str, list[ChatMessage]]: """ Take streaming responses from llama.cpp, convert to Haystack StreamingChunk objects, stream them, and finally convert them to a ChatMessage. @@ -434,7 +435,7 @@ def _handle_streaming_response( if chunk.get("choices") and len(chunk["choices"]) > 0: choice = chunk["choices"][0] - delta: Union[ChatCompletionStreamResponseDelta, ChatCompletionStreamResponseDeltaEmpty, Dict] = ( + delta: Union[ChatCompletionStreamResponseDelta, ChatCompletionStreamResponseDeltaEmpty, dict] = ( choice.get("delta", {}) ) diff --git a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py index 32c867894b..0a3fee97c4 100644 --- a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py +++ b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import component, logging @@ -30,8 +30,8 @@ def __init__( model: str, n_ctx: Optional[int] = 0, n_batch: Optional[int] = 512, - model_kwargs: Optional[Dict[str, Any]] = None, - generation_kwargs: Optional[Dict[str, Any]] = None, + model_kwargs: Optional[dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, ): """ :param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf". @@ -68,10 +68,10 @@ def warm_up(self): if self.model is None: self.model = Llama(**self.model_kwargs) - @component.output_types(replies=List[str], meta=List[Dict[str, Any]]) + @component.output_types(replies=list[str], meta=list[dict[str, Any]]) def run( - self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None - ) -> Dict[str, Union[List[str], List[Dict[str, Any]]]]: + self, prompt: str, generation_kwargs: Optional[dict[str, Any]] = None + ) -> dict[str, Union[list[str], list[dict[str, Any]]]]: """ Run the text generation model on the given prompt. diff --git a/integrations/llama_stack/pyproject.toml b/integrations/llama_stack/pyproject.toml index c38de32489..146a61f103 100644 --- a/integrations/llama_stack/pyproject.toml +++ b/integrations/llama_stack/pyproject.toml @@ -73,7 +73,7 @@ disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/llama_stack/src/haystack_integrations/components/generators/llama_stack/chat/chat_generator.py b/integrations/llama_stack/src/haystack_integrations/components/generators/llama_stack/chat/chat_generator.py index 10adb009e3..b633f1b5fb 100644 --- a/integrations/llama_stack/src/haystack_integrations/components/generators/llama_stack/chat/chat_generator.py +++ b/integrations/llama_stack/src/haystack_integrations/components/generators/llama_stack/chat/chat_generator.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Optional from haystack import component, default_from_dict, default_to_dict, logging from haystack.components.generators.chat import OpenAIChatGenerator @@ -60,12 +60,12 @@ def __init__( api_base_url: str = "http://localhost:8321/v1/openai/v1", organization: Optional[str] = None, streaming_callback: Optional[StreamingCallbackT] = None, - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, timeout: Optional[int] = None, tools: Optional[ToolsType] = None, tools_strict: bool = False, max_retries: Optional[int] = None, - http_client_kwargs: Optional[Dict[str, Any]] = None, + http_client_kwargs: Optional[dict[str, Any]] = None, ): """ Creates an instance of LlamaStackChatGenerator. To use this chat generator, @@ -129,7 +129,7 @@ def __init__( http_client_kwargs=http_client_kwargs, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize this component to a dictionary. @@ -152,7 +152,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "LlamaStackChatGenerator": + def from_dict(cls, data: dict[str, Any]) -> "LlamaStackChatGenerator": """ Deserialize this component from a dictionary. diff --git a/integrations/meta_llama/pyproject.toml b/integrations/meta_llama/pyproject.toml index e831adfc85..c05ae2d685 100644 --- a/integrations/meta_llama/pyproject.toml +++ b/integrations/meta_llama/pyproject.toml @@ -75,7 +75,7 @@ check_untyped_defs = true disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/meta_llama/src/haystack_integrations/components/generators/meta_llama/chat/chat_generator.py b/integrations/meta_llama/src/haystack_integrations/components/generators/meta_llama/chat/chat_generator.py index ebfc529314..39fea4341e 100644 --- a/integrations/meta_llama/src/haystack_integrations/components/generators/meta_llama/chat/chat_generator.py +++ b/integrations/meta_llama/src/haystack_integrations/components/generators/meta_llama/chat/chat_generator.py @@ -3,7 +3,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Optional from haystack import component, default_to_dict, logging from haystack.components.generators.chat import OpenAIChatGenerator @@ -61,7 +61,7 @@ def __init__( model: str = "Llama-4-Scout-17B-16E-Instruct-FP8", streaming_callback: Optional[StreamingCallbackT] = None, api_base_url: Optional[str] = "https://api.llama.com/compat/v1/", - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, tools: Optional[ToolsType] = None, ): """ @@ -134,7 +134,7 @@ def _prepare_api_call( api_args.pop("response_format") return api_args - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize this component to a dictionary. diff --git a/integrations/mistral/examples/indexing_ocr_pipeline.py b/integrations/mistral/examples/indexing_ocr_pipeline.py index 0258231b78..ec1e358698 100644 --- a/integrations/mistral/examples/indexing_ocr_pipeline.py +++ b/integrations/mistral/examples/indexing_ocr_pipeline.py @@ -9,7 +9,6 @@ # You can customize the ImageAnnotation and DocumentAnnotation schemas below # to extract different structured information from your documents. -from typing import List from haystack import Pipeline from haystack.components.writers import DocumentWriter @@ -34,8 +33,8 @@ class ImageAnnotation(BaseModel): # Define schema for structured document annotations class DocumentAnnotation(BaseModel): language: str = Field(..., description="Primary language of the document") - urls: List[str] = Field(..., description="URLs found in the document") - topics: List[str] = Field(..., description="Main topics covered in the document") + urls: list[str] = Field(..., description="URLs found in the document") + topics: list[str] = Field(..., description="Main topics covered in the document") # Initialize document store diff --git a/integrations/mistral/pyproject.toml b/integrations/mistral/pyproject.toml index de8b398fcc..e573746430 100644 --- a/integrations/mistral/pyproject.toml +++ b/integrations/mistral/pyproject.toml @@ -78,7 +78,7 @@ check_untyped_defs = true disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/mistral/src/haystack_integrations/components/converters/mistral/ocr_document_converter.py b/integrations/mistral/src/haystack_integrations/components/converters/mistral/ocr_document_converter.py index 18e4a82db1..156461f2f1 100644 --- a/integrations/mistral/src/haystack_integrations/components/converters/mistral/ocr_document_converter.py +++ b/integrations/mistral/src/haystack_integrations/components/converters/mistral/ocr_document_converter.py @@ -1,7 +1,7 @@ import json import re from pathlib import Path -from typing import Any, Dict, List, Optional, Type, Union +from typing import Any, Optional, Union from haystack import Document, component, default_from_dict, default_to_dict, logging from haystack.components.converters.utils import ( @@ -103,7 +103,7 @@ def __init__( api_key: Secret = Secret.from_env_var("MISTRAL_API_KEY"), model: str = "mistral-ocr-2505", include_image_base64: bool = False, - pages: Optional[List[int]] = None, + pages: Optional[list[int]] = None, image_limit: Optional[int] = None, image_min_size: Optional[int] = None, cleanup_uploaded_files: bool = True, @@ -141,7 +141,7 @@ def __init__( # Initialize Mistral client self.client = Mistral(api_key=self.api_key.resolve_value()) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -160,7 +160,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "MistralOCRDocumentConverter": + def from_dict(cls, data: dict[str, Any]) -> "MistralOCRDocumentConverter": """ Deserializes the component from a dictionary. @@ -172,14 +172,14 @@ def from_dict(cls, data: Dict[str, Any]) -> "MistralOCRDocumentConverter": deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - @component.output_types(documents=List[Document], raw_mistral_response=List[Dict[str, Any]]) + @component.output_types(documents=list[Document], raw_mistral_response=list[dict[str, Any]]) def run( self, - sources: List[Union[str, Path, ByteStream, DocumentURLChunk, FileChunk, ImageURLChunk]], - meta: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None, - bbox_annotation_schema: Optional[Type[BaseModel]] = None, - document_annotation_schema: Optional[Type[BaseModel]] = None, - ) -> Dict[str, Any]: + sources: list[Union[str, Path, ByteStream, DocumentURLChunk, FileChunk, ImageURLChunk]], + meta: Optional[Union[dict[str, Any], list[dict[str, Any]]]] = None, + bbox_annotation_schema: Optional[type[BaseModel]] = None, + document_annotation_schema: Optional[type[BaseModel]] = None, + ) -> dict[str, Any]: """ Extract text from documents using Mistral OCR. @@ -260,11 +260,11 @@ def run( def _process_single_source( self, source: Union[str, Path, ByteStream, DocumentURLChunk, FileChunk, ImageURLChunk], - user_metadata: Dict[str, Any], + user_metadata: dict[str, Any], bbox_annotation_format: Optional[Any], document_annotation_format: Optional[Any], - document_annotation_schema: Optional[Type[BaseModel]], - ) -> tuple[Optional[Document], Optional[Dict[str, Any]], Optional[str]]: + document_annotation_schema: Optional[type[BaseModel]], + ) -> tuple[Optional[Document], Optional[dict[str, Any]], Optional[str]]: """ Process a single source and return the document, raw response, and file_id if uploaded. @@ -312,7 +312,7 @@ def _process_single_source( ) return (None, None, uploaded_file_id) - def _cleanup_uploaded_files(self, file_ids: List[str]) -> None: + def _cleanup_uploaded_files(self, file_ids: list[str]) -> None: """ Delete uploaded files from Mistral storage. @@ -370,8 +370,8 @@ def _convert_source_to_chunk( def _process_ocr_response( self, ocr_response: OCRResponse, - user_metadata: Dict[str, Any], - document_annotation_schema: Optional[Type[BaseModel]], + user_metadata: dict[str, Any], + document_annotation_schema: Optional[type[BaseModel]], ) -> Document: """ Convert an OCR response from Mistral API into a single Haystack Document. diff --git a/integrations/mistral/src/haystack_integrations/components/embedders/mistral/document_embedder.py b/integrations/mistral/src/haystack_integrations/components/embedders/mistral/document_embedder.py index fbbe8e594d..b87f33d7d4 100644 --- a/integrations/mistral/src/haystack_integrations/components/embedders/mistral/document_embedder.py +++ b/integrations/mistral/src/haystack_integrations/components/embedders/mistral/document_embedder.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional +from typing import Any, Optional from haystack import component, default_to_dict from haystack.components.embedders import OpenAIDocumentEmbedder @@ -39,12 +39,12 @@ def __init__( suffix: str = "", batch_size: int = 32, progress_bar: bool = True, - meta_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", *, timeout: Optional[float] = None, max_retries: Optional[int] = None, - http_client_kwargs: Optional[Dict[str, Any]] = None, + http_client_kwargs: Optional[dict[str, Any]] = None, ): """ Creates a MistralDocumentEmbedder component. @@ -98,7 +98,7 @@ def __init__( self.timeout = timeout self.max_retries = max_retries - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. diff --git a/integrations/mistral/src/haystack_integrations/components/embedders/mistral/text_embedder.py b/integrations/mistral/src/haystack_integrations/components/embedders/mistral/text_embedder.py index 24d4ef21b9..74e9b5f365 100644 --- a/integrations/mistral/src/haystack_integrations/components/embedders/mistral/text_embedder.py +++ b/integrations/mistral/src/haystack_integrations/components/embedders/mistral/text_embedder.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Optional from haystack import component, default_to_dict from haystack.components.embedders import OpenAITextEmbedder @@ -38,7 +38,7 @@ def __init__( *, timeout: Optional[float] = None, max_retries: Optional[int] = None, - http_client_kwargs: Optional[Dict[str, Any]] = None, + http_client_kwargs: Optional[dict[str, Any]] = None, ): """ Creates an MistralTextEmbedder component. @@ -80,7 +80,7 @@ def __init__( self.timeout = timeout self.max_retries = max_retries - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. diff --git a/integrations/mistral/src/haystack_integrations/components/generators/mistral/chat/chat_generator.py b/integrations/mistral/src/haystack_integrations/components/generators/mistral/chat/chat_generator.py index 2318473882..990d625760 100644 --- a/integrations/mistral/src/haystack_integrations/components/generators/mistral/chat/chat_generator.py +++ b/integrations/mistral/src/haystack_integrations/components/generators/mistral/chat/chat_generator.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Optional from haystack import component, default_to_dict, logging from haystack.components.generators.chat import OpenAIChatGenerator @@ -65,12 +65,12 @@ def __init__( model: str = "mistral-small-latest", streaming_callback: Optional[StreamingCallbackT] = None, api_base_url: Optional[str] = "https://api.mistral.ai/v1", - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, tools: Optional[ToolsType] = None, *, timeout: Optional[float] = None, max_retries: Optional[int] = None, - http_client_kwargs: Optional[Dict[str, Any]] = None, + http_client_kwargs: Optional[dict[str, Any]] = None, ): """ Creates an instance of MistralChatGenerator. Unless specified otherwise in the `model`, this is for Mistral's @@ -155,7 +155,7 @@ def _prepare_api_call( api_args.pop("response_format") return api_args - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize this component to a dictionary. diff --git a/integrations/mistral/tests/test_ocr_document_converter.py b/integrations/mistral/tests/test_ocr_document_converter.py index 49ebecbb04..a363a65a5e 100644 --- a/integrations/mistral/tests/test_ocr_document_converter.py +++ b/integrations/mistral/tests/test_ocr_document_converter.py @@ -2,7 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 import os -from typing import List from unittest.mock import MagicMock, patch import pytest @@ -320,7 +319,7 @@ def test_run_with_document_annotations(self): # Define annotation schema class DocumentAnnotation(BaseModel): language: str = Field(..., description="Document language") - topics: List[str] = Field(..., description="Main topics") + topics: list[str] = Field(..., description="Main topics") # Create mock response with document annotation mock_page = MagicMock() diff --git a/integrations/mongodb_atlas/pyproject.toml b/integrations/mongodb_atlas/pyproject.toml index 490c0bc67f..d2460df5f3 100644 --- a/integrations/mongodb_atlas/pyproject.toml +++ b/integrations/mongodb_atlas/pyproject.toml @@ -80,7 +80,7 @@ disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/embedding_retriever.py b/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/embedding_retriever.py index 9e605d8bf9..1822e353ce 100644 --- a/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/embedding_retriever.py +++ b/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/embedding_retriever.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document @@ -45,7 +45,7 @@ def __init__( self, *, document_store: MongoDBAtlasDocumentStore, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE, ): @@ -72,7 +72,7 @@ def __init__( filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy) ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -88,7 +88,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "MongoDBAtlasEmbeddingRetriever": + def from_dict(cls, data: dict[str, Any]) -> "MongoDBAtlasEmbeddingRetriever": """ Deserializes the component from a dictionary. @@ -106,13 +106,13 @@ def from_dict(cls, data: Dict[str, Any]) -> "MongoDBAtlasEmbeddingRetriever": data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy) return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Retrieve documents from the MongoDBAtlasDocumentStore, based on the provided embedding similarity. @@ -134,13 +134,13 @@ def run( ) return {"documents": docs} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) async def run_async( self, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Asynchronously retrieve documents from the MongoDBAtlasDocumentStore, based on the provided embedding similarity. diff --git a/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/full_text_retriever.py b/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/full_text_retriever.py index 3bab4dc5e8..0dfbd8e68e 100644 --- a/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/full_text_retriever.py +++ b/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/full_text_retriever.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document @@ -43,7 +43,7 @@ def __init__( self, *, document_store: MongoDBAtlasDocumentStore, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE, ): @@ -69,7 +69,7 @@ def __init__( filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy) ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -85,7 +85,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "MongoDBAtlasFullTextRetriever": + def from_dict(cls, data: dict[str, Any]) -> "MongoDBAtlasFullTextRetriever": """ Deserializes the component from a dictionary. @@ -100,17 +100,17 @@ def from_dict(cls, data: Dict[str, Any]) -> "MongoDBAtlasFullTextRetriever": return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, - query: Union[str, List[str]], - fuzzy: Optional[Dict[str, int]] = None, + query: Union[str, list[str]], + fuzzy: Optional[dict[str, int]] = None, match_criteria: Optional[Literal["any", "all"]] = None, - score: Optional[Dict[str, Dict]] = None, + score: Optional[dict[str, dict]] = None, synonyms: Optional[str] = None, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Retrieve documents from the MongoDBAtlasDocumentStore by full-text search. @@ -150,17 +150,17 @@ def run( return {"documents": docs} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) async def run_async( self, - query: Union[str, List[str]], - fuzzy: Optional[Dict[str, int]] = None, + query: Union[str, list[str]], + fuzzy: Optional[dict[str, int]] = None, match_criteria: Optional[Literal["any", "all"]] = None, - score: Optional[Dict[str, Dict]] = None, + score: Optional[dict[str, dict]] = None, synonyms: Optional[str] = None, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Asynchronously retrieve documents from the MongoDBAtlasDocumentStore by full-text search. diff --git a/integrations/mongodb_atlas/src/haystack_integrations/document_stores/mongodb_atlas/document_store.py b/integrations/mongodb_atlas/src/haystack_integrations/document_stores/mongodb_atlas/document_store.py index 0655b96605..ffa3c37f65 100644 --- a/integrations/mongodb_atlas/src/haystack_integrations/document_stores/mongodb_atlas/document_store.py +++ b/integrations/mongodb_atlas/src/haystack_integrations/document_stores/mongodb_atlas/document_store.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 import re -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal, Optional, Union from haystack import default_from_dict, default_to_dict, logging from haystack.dataclasses.document import Document @@ -229,7 +229,7 @@ async def _ensure_connection_setup_async(self) -> None: database = self._connection_async[self.database_name] self._collection_async = database[self.collection_name] - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -246,7 +246,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "MongoDBAtlasDocumentStore": + def from_dict(cls, data: dict[str, Any]) -> "MongoDBAtlasDocumentStore": """ Deserializes the component from a dictionary. @@ -278,7 +278,7 @@ async def count_documents_async(self) -> int: assert self._collection_async is not None return await self._collection_async.count_documents({}) - def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]: + def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: """ Returns the documents that match the filters provided. @@ -294,7 +294,7 @@ def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Doc documents = list(self._collection.find(filters)) return [self._mongo_doc_to_haystack_doc(doc) for doc in documents] - async def filter_documents_async(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]: + async def filter_documents_async(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: """ Asynchronously returns the documents that match the filters provided. @@ -310,7 +310,7 @@ async def filter_documents_async(self, filters: Optional[Dict[str, Any]] = None) documents = await self._collection_async.find(filters).to_list() return [self._mongo_doc_to_haystack_doc(doc) for doc in documents] - def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int: + def write_documents(self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int: """ Writes documents into the MongoDB Atlas collection. @@ -332,7 +332,7 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D policy = DuplicatePolicy.FAIL mongo_documents = [self._haystack_doc_to_mongo_doc(doc) for doc in documents] - operations: List[Union[UpdateOne, InsertOne, ReplaceOne]] + operations: list[Union[UpdateOne, InsertOne, ReplaceOne]] written_docs = len(documents) if policy == DuplicatePolicy.SKIP: @@ -353,7 +353,7 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D return written_docs async def write_documents_async( - self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE + self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE ) -> int: """ Writes documents into the MongoDB Atlas collection. @@ -377,7 +377,7 @@ async def write_documents_async( mongo_documents = [self._haystack_doc_to_mongo_doc(doc) for doc in documents] - operations: List[Union[UpdateOne, InsertOne, ReplaceOne]] + operations: list[Union[UpdateOne, InsertOne, ReplaceOne]] written_docs = len(documents) if policy == DuplicatePolicy.SKIP: @@ -399,7 +399,7 @@ async def write_documents_async( return written_docs - def delete_documents(self, document_ids: List[str]) -> None: + def delete_documents(self, document_ids: list[str]) -> None: """ Deletes all documents with a matching document_ids from the document store. @@ -411,7 +411,7 @@ def delete_documents(self, document_ids: List[str]) -> None: return self._collection.delete_many(filter={"id": {"$in": document_ids}}) - async def delete_documents_async(self, document_ids: List[str]) -> None: + async def delete_documents_async(self, document_ids: list[str]) -> None: """ Asynchronously deletes all documents with a matching document_ids from the document store. @@ -423,7 +423,7 @@ async def delete_documents_async(self, document_ids: List[str]) -> None: return await self._collection_async.delete_many(filter={"id": {"$in": document_ids}}) - def delete_by_filter(self, filters: Dict[str, Any]) -> int: + def delete_by_filter(self, filters: dict[str, Any]) -> int: """ Deletes all documents that match the provided filters. @@ -448,7 +448,7 @@ def delete_by_filter(self, filters: Dict[str, Any]) -> int: msg = f"Failed to delete documents by filter from MongoDB Atlas: {e!s}" raise DocumentStoreError(msg) from e - async def delete_by_filter_async(self, filters: Dict[str, Any]) -> int: + async def delete_by_filter_async(self, filters: dict[str, Any]) -> int: """ Asynchronously deletes all documents that match the provided filters. @@ -473,7 +473,7 @@ async def delete_by_filter_async(self, filters: Dict[str, Any]) -> int: msg = f"Failed to delete documents by filter from MongoDB Atlas: {e!s}" raise DocumentStoreError(msg) from e - def update_by_filter(self, filters: Dict[str, Any], meta: Dict[str, Any]) -> int: + def update_by_filter(self, filters: dict[str, Any], meta: dict[str, Any]) -> int: """ Updates the metadata of all documents that match the provided filters. @@ -502,7 +502,7 @@ def update_by_filter(self, filters: Dict[str, Any], meta: Dict[str, Any]) -> int msg = f"Failed to update documents by filter in MongoDB Atlas: {e!s}" raise DocumentStoreError(msg) from e - async def update_by_filter_async(self, filters: Dict[str, Any], meta: Dict[str, Any]) -> int: + async def update_by_filter_async(self, filters: dict[str, Any], meta: dict[str, Any]) -> int: """ Asynchronously updates the metadata of all documents that match the provided filters. @@ -635,10 +635,10 @@ async def delete_all_documents_async(self, *, recreate_collection: bool = False) def _embedding_retrieval( self, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: int = 10, - ) -> List[Document]: + ) -> list[Document]: """ Find the documents that are most similar to the provided `query_embedding` by using a vector similarity metric. @@ -657,7 +657,7 @@ def _embedding_retrieval( filters = _normalize_filters(filters) if filters else {} - pipeline: List[Dict[str, Any]] = [ + pipeline: list[dict[str, Any]] = [ { "$vectorSearch": { "index": self.vector_search_index, @@ -686,8 +686,8 @@ def _embedding_retrieval( return documents async def _embedding_retrieval_async( - self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: int = 10 - ) -> List[Document]: + self, query_embedding: list[float], filters: Optional[dict[str, Any]] = None, top_k: int = 10 + ) -> list[Document]: """ Asynchronously find the documents that are most similar to the provided `query_embedding` by using a vector similarity metric. @@ -707,7 +707,7 @@ async def _embedding_retrieval_async( filters = _normalize_filters(filters) if filters else {} - pipeline: List[Dict[str, Any]] = [ + pipeline: list[dict[str, Any]] = [ { "$vectorSearch": { "index": self.vector_search_index, @@ -738,14 +738,14 @@ async def _embedding_retrieval_async( def _fulltext_retrieval( self, - query: Union[str, List[str]], - fuzzy: Optional[Dict[str, int]] = None, + query: Union[str, list[str]], + fuzzy: Optional[dict[str, int]] = None, match_criteria: Optional[Literal["any", "all"]] = None, - score: Optional[Dict[str, Dict]] = None, + score: Optional[dict[str, dict]] = None, synonyms: Optional[str] = None, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, - ) -> List[Document]: + ) -> list[Document]: """ Retrieve documents similar to the provided `query` using a full-text search. @@ -792,7 +792,7 @@ def _fulltext_retrieval( filters = _normalize_filters(filters) if filters else {} # Build the text search options - text_search: Dict[str, Any] = {"path": self.content_field or "content", "query": query} + text_search: dict[str, Any] = {"path": self.content_field or "content", "query": query} if match_criteria: text_search["matchCriteria"] = match_criteria if synonyms: @@ -803,7 +803,7 @@ def _fulltext_retrieval( text_search["score"] = score # Define the pipeline for MongoDB aggregation - pipeline: List[Dict[str, Any]] = [ + pipeline: list[dict[str, Any]] = [ { "$search": { "index": self.full_text_search_index, @@ -831,14 +831,14 @@ def _fulltext_retrieval( async def _fulltext_retrieval_async( self, - query: Union[str, List[str]], - fuzzy: Optional[Dict[str, int]] = None, + query: Union[str, list[str]], + fuzzy: Optional[dict[str, int]] = None, match_criteria: Optional[Literal["any", "all"]] = None, - score: Optional[Dict[str, Dict]] = None, + score: Optional[dict[str, dict]] = None, synonyms: Optional[str] = None, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, - ) -> List[Document]: + ) -> list[Document]: """ Asynchronously retrieve documents similar to the provided `query` using a full-text search asynchronously. @@ -885,7 +885,7 @@ async def _fulltext_retrieval_async( filters = _normalize_filters(filters) if filters else {} # Build the text search options - text_search: Dict[str, Any] = {"path": self.content_field or "content", "query": query} + text_search: dict[str, Any] = {"path": self.content_field or "content", "query": query} if match_criteria: text_search["matchCriteria"] = match_criteria if synonyms: @@ -896,7 +896,7 @@ async def _fulltext_retrieval_async( text_search["score"] = score # Define the pipeline for MongoDB aggregation - pipeline: List[Dict[str, Any]] = [ + pipeline: list[dict[str, Any]] = [ { "$search": { "index": self.full_text_search_index, @@ -923,7 +923,7 @@ async def _fulltext_retrieval_async( return [self._mongo_doc_to_haystack_doc(doc) for doc in documents] - def _mongo_doc_to_haystack_doc(self, mongo_doc: Dict[str, Any]) -> Document: + def _mongo_doc_to_haystack_doc(self, mongo_doc: dict[str, Any]) -> Document: """ Converts the dictionary coming out of MongoDB into a Haystack document @@ -937,7 +937,7 @@ def _mongo_doc_to_haystack_doc(self, mongo_doc: Dict[str, Any]) -> Document: mongo_doc["embedding"] = mongo_doc.pop(self.embedding_field, None) return Document.from_dict(mongo_doc) - def _haystack_doc_to_mongo_doc(self, haystack_doc: Document) -> Dict[str, Any]: + def _haystack_doc_to_mongo_doc(self, haystack_doc: Document) -> dict[str, Any]: """ Parses a Haystack Document to a MongoDB document. diff --git a/integrations/mongodb_atlas/src/haystack_integrations/document_stores/mongodb_atlas/filters.py b/integrations/mongodb_atlas/src/haystack_integrations/document_stores/mongodb_atlas/filters.py index 62b89b97c0..365a5951b2 100644 --- a/integrations/mongodb_atlas/src/haystack_integrations/document_stores/mongodb_atlas/filters.py +++ b/integrations/mongodb_atlas/src/haystack_integrations/document_stores/mongodb_atlas/filters.py @@ -2,14 +2,14 @@ # # SPDX-License-Identifier: Apache-2.0 from datetime import datetime -from typing import Any, Dict +from typing import Any from haystack.errors import FilterError UNSUPPORTED_TYPES_FOR_COMPARISON = (list,) -def _normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]: +def _normalize_filters(filters: dict[str, Any]) -> dict[str, Any]: """ Converts Haystack filters to MongoDB filters. """ @@ -26,7 +26,7 @@ def _normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]: return _parse_logical_condition(filters) -def _parse_logical_condition(condition: Dict[str, Any]) -> Dict[str, Any]: +def _parse_logical_condition(condition: dict[str, Any]) -> dict[str, Any]: if "operator" not in condition: msg = f"'operator' key missing in {condition}" raise FilterError(msg) @@ -56,7 +56,7 @@ def _parse_logical_condition(condition: Dict[str, Any]) -> Dict[str, Any]: raise FilterError(msg) -def _parse_comparison_condition(condition: Dict[str, Any]) -> Dict[str, Any]: +def _parse_comparison_condition(condition: dict[str, Any]) -> dict[str, Any]: field: str = condition["field"] if "operator" not in condition: msg = f"'operator' key missing in {condition}" @@ -70,11 +70,11 @@ def _parse_comparison_condition(condition: Dict[str, Any]) -> Dict[str, Any]: return COMPARISON_OPERATORS[operator](field, value) -def _equal(field: str, value: Any) -> Dict[str, Any]: +def _equal(field: str, value: Any) -> dict[str, Any]: return {field: {"$eq": value}} -def _not_equal(field: str, value: Any) -> Dict[str, Any]: +def _not_equal(field: str, value: Any) -> dict[str, Any]: return {field: {"$ne": value}} @@ -90,12 +90,12 @@ def _validate_type_for_comparison(value: Any) -> None: raise FilterError(msg) from exc -def _greater_than(field: str, value: Any) -> Dict[str, Any]: +def _greater_than(field: str, value: Any) -> dict[str, Any]: _validate_type_for_comparison(value) return {field: {"$gt": value}} -def _greater_than_equal(field: str, value: Any) -> Dict[str, Any]: +def _greater_than_equal(field: str, value: Any) -> dict[str, Any]: if value is None: # we want {field: {"$gte": null}} to return an empty result # $gte with null values in MongoDB returns a non-empty result, while $gt aligns with our expectations @@ -105,12 +105,12 @@ def _greater_than_equal(field: str, value: Any) -> Dict[str, Any]: return {field: {"$gte": value}} -def _less_than(field: str, value: Any) -> Dict[str, Any]: +def _less_than(field: str, value: Any) -> dict[str, Any]: _validate_type_for_comparison(value) return {field: {"$lt": value}} -def _less_than_equal(field: str, value: Any) -> Dict[str, Any]: +def _less_than_equal(field: str, value: Any) -> dict[str, Any]: if value is None: # we want {field: {"$lte": null}} to return an empty result # $lte with null values in MongoDB returns a non-empty result, while $lt aligns with our expectations @@ -120,7 +120,7 @@ def _less_than_equal(field: str, value: Any) -> Dict[str, Any]: return {field: {"$lte": value}} -def _not_in(field: str, value: Any) -> Dict[str, Any]: +def _not_in(field: str, value: Any) -> dict[str, Any]: if not isinstance(value, list): msg = f"{field}'s value must be a list when using 'not in' comparator in Pinecone" raise FilterError(msg) @@ -128,7 +128,7 @@ def _not_in(field: str, value: Any) -> Dict[str, Any]: return {field: {"$nin": value}} -def _in(field: str, value: Any) -> Dict[str, Any]: +def _in(field: str, value: Any) -> dict[str, Any]: if not isinstance(value, list): msg = f"{field}'s value must be a list when using 'in' comparator in Pinecone" raise FilterError(msg) diff --git a/integrations/mongodb_atlas/tests/test_embedding_retrieval.py b/integrations/mongodb_atlas/tests/test_embedding_retrieval.py index e894809958..7fe2190b00 100644 --- a/integrations/mongodb_atlas/tests/test_embedding_retrieval.py +++ b/integrations/mongodb_atlas/tests/test_embedding_retrieval.py @@ -2,7 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 import os -from typing import List import pytest from haystack.document_stores.errors import DocumentStoreError @@ -65,7 +64,7 @@ def test_empty_query_embedding(self): vector_search_index="cosine_index", full_text_search_index="full_text_index", ) - query_embedding: List[float] = [] + query_embedding: list[float] = [] with pytest.raises(ValueError): document_store._embedding_retrieval(query_embedding=query_embedding) diff --git a/integrations/mongodb_atlas/tests/test_fulltext_retrieval.py b/integrations/mongodb_atlas/tests/test_fulltext_retrieval.py index 784959b00e..b069c43cb4 100644 --- a/integrations/mongodb_atlas/tests/test_fulltext_retrieval.py +++ b/integrations/mongodb_atlas/tests/test_fulltext_retrieval.py @@ -4,7 +4,7 @@ import os from time import sleep -from typing import List, Union +from typing import Union from unittest.mock import MagicMock import pytest @@ -157,7 +157,7 @@ def test_synonyms_retrieval(self, document_store: MongoDBAtlasDocumentStore): assert results[0].score >= results[1].score @pytest.mark.parametrize("query", ["", []]) - def test_empty_query_raises_value_error(self, query: Union[str, List], document_store: MongoDBAtlasDocumentStore): + def test_empty_query_raises_value_error(self, query: Union[str, list], document_store: MongoDBAtlasDocumentStore): with pytest.raises(ValueError): document_store._fulltext_retrieval(query=query) diff --git a/integrations/mongodb_atlas/tests/test_fulltext_retrieval_async.py b/integrations/mongodb_atlas/tests/test_fulltext_retrieval_async.py index e47e307122..5ed8d13f5f 100644 --- a/integrations/mongodb_atlas/tests/test_fulltext_retrieval_async.py +++ b/integrations/mongodb_atlas/tests/test_fulltext_retrieval_async.py @@ -4,7 +4,7 @@ import os from time import sleep -from typing import List, Union +from typing import Union from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -116,7 +116,7 @@ async def test_synonyms_retrieval_async(self, document_store: MongoDBAtlasDocume @pytest.mark.parametrize("query", ["", []]) async def test_empty_query_raises_value_error_async( - self, query: Union[str, List], document_store: MongoDBAtlasDocumentStore + self, query: Union[str, list], document_store: MongoDBAtlasDocumentStore ): with pytest.raises(ValueError): await document_store._fulltext_retrieval_async(query=query) diff --git a/integrations/nvidia/pyproject.toml b/integrations/nvidia/pyproject.toml index c2fa78c95c..05469f6103 100644 --- a/integrations/nvidia/pyproject.toml +++ b/integrations/nvidia/pyproject.toml @@ -82,7 +82,7 @@ disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index ee941a85ef..b4c4e00aeb 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -4,7 +4,7 @@ import os import warnings -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Optional, Union from haystack import Document, component, default_from_dict, default_to_dict, logging from haystack.utils import Secret, deserialize_secrets_inplace @@ -45,7 +45,7 @@ def __init__( suffix: str = "", batch_size: int = 32, progress_bar: bool = True, - meta_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", truncate: Optional[Union[EmbeddingTruncateMode, str]] = None, timeout: Optional[float] = None, @@ -156,7 +156,7 @@ def warm_up(self): if not self.model: self.default_model() - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -179,14 +179,14 @@ def to_dict(self) -> Dict[str, Any]: ) @property - def available_models(self) -> List[Model]: + def available_models(self) -> list[Model]: """ Get a list of available models that work with NvidiaDocumentEmbedder. """ return self.backend.models() if self.backend else [] @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "NvidiaDocumentEmbedder": + def from_dict(cls, data: dict[str, Any]) -> "NvidiaDocumentEmbedder": """ Deserializes the component from a dictionary. @@ -200,7 +200,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaDocumentEmbedder": deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: + def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]: texts_to_embed = [] for doc in documents: meta_values_to_embed = [ @@ -213,8 +213,8 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: return texts_to_embed - def _embed_batch(self, texts_to_embed: List[str], batch_size: int) -> Tuple[List[List[float]], Dict[str, Any]]: - all_embeddings: List[List[float]] = [] + def _embed_batch(self, texts_to_embed: list[str], batch_size: int) -> tuple[list[list[float]], dict[str, Any]]: + all_embeddings: list[list[float]] = [] usage_prompt_tokens = 0 usage_total_tokens = 0 @@ -233,8 +233,8 @@ def _embed_batch(self, texts_to_embed: List[str], batch_size: int) -> Tuple[List return all_embeddings, {"usage": {"prompt_tokens": usage_prompt_tokens, "total_tokens": usage_total_tokens}} - @component.output_types(documents=List[Document], meta=Dict[str, Any]) - def run(self, documents: List[Document]) -> Dict[str, Union[List[Document], Dict[str, Any]]]: + @component.output_types(documents=list[Document], meta=dict[str, Any]) + def run(self, documents: list[Document]) -> dict[str, Union[list[Document], dict[str, Any]]]: """ Embed a list of Documents. diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index a86178f974..076a1d3020 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -4,7 +4,7 @@ import os import warnings -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import component, default_from_dict, default_to_dict, logging from haystack.utils import Secret, deserialize_secrets_inplace @@ -146,7 +146,7 @@ def warm_up(self): else: self.default_model() - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -165,14 +165,14 @@ def to_dict(self) -> Dict[str, Any]: ) @property - def available_models(self) -> List[Model]: + def available_models(self) -> list[Model]: """ Get a list of available models that work with NvidiaTextEmbedder. """ return self.backend.models() if self.backend else [] @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "NvidiaTextEmbedder": + def from_dict(cls, data: dict[str, Any]) -> "NvidiaTextEmbedder": """ Deserializes the component from a dictionary. @@ -186,8 +186,8 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaTextEmbedder": deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - @component.output_types(embedding=List[float], meta=Dict[str, Any]) - def run(self, text: str) -> Dict[str, Union[List[float], Dict[str, Any]]]: + @component.output_types(embedding=list[float], meta=dict[str, Any]) + def run(self, text: str) -> dict[str, Union[list[float], dict[str, Any]]]: """ Embed a string. diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py index 39c220fd70..9f623e7a19 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import os -from typing import Any, Dict, Optional +from typing import Any, Optional from haystack import component, default_to_dict, logging from haystack.components.generators.chat import OpenAIChatGenerator @@ -55,11 +55,11 @@ def __init__( model: str = "meta/llama-3.1-8b-instruct", streaming_callback: Optional[StreamingCallbackT] = None, api_base_url: Optional[str] = os.getenv("NVIDIA_API_URL", DEFAULT_API_URL), - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, tools: Optional[ToolsType] = None, timeout: Optional[float] = None, max_retries: Optional[int] = None, - http_client_kwargs: Optional[Dict[str, Any]] = None, + http_client_kwargs: Optional[dict[str, Any]] = None, ): """ Creates an instance of NvidiaChatGenerator. @@ -126,7 +126,7 @@ def __init__( http_client_kwargs=http_client_kwargs, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize this component to a dictionary. diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index ed5fe78801..312431cdad 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -4,7 +4,7 @@ import os import warnings -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.utils.auth import Secret, deserialize_secrets_inplace @@ -47,7 +47,7 @@ def __init__( model: Optional[str] = None, api_url: str = os.getenv("NVIDIA_API_URL", DEFAULT_API_URL), api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), - model_arguments: Optional[Dict[str, Any]] = None, + model_arguments: Optional[dict[str, Any]] = None, timeout: Optional[float] = None, ): """ @@ -134,7 +134,7 @@ def warm_up(self): else: self.default_model() - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -150,14 +150,14 @@ def to_dict(self) -> Dict[str, Any]: ) @property - def available_models(self) -> List[Model]: + def available_models(self) -> list[Model]: """ Get a list of available models that work with ChatNVIDIA. """ return self.backend.models() if self.backend else [] @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "NvidiaGenerator": + def from_dict(cls, data: dict[str, Any]) -> "NvidiaGenerator": """ Deserializes the component from a dictionary. @@ -170,8 +170,8 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaGenerator": deserialize_secrets_inplace(init_params, ["api_key"]) return default_from_dict(cls, data) - @component.output_types(replies=List[str], meta=List[Dict[str, Any]]) - def run(self, prompt: str) -> Dict[str, Union[List[str], List[Dict[str, Any]]]]: + @component.output_types(replies=list[str], meta=list[dict[str, Any]]) + def run(self, prompt: str) -> dict[str, Union[list[str], list[dict[str, Any]]]]: """ Queries the model with the provided prompt. diff --git a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py index 553297628b..ef931212e8 100644 --- a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py +++ b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py @@ -4,7 +4,7 @@ import os import warnings -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import Document, component, default_from_dict, default_to_dict, logging from haystack.utils import Secret, deserialize_secrets_inplace @@ -54,7 +54,7 @@ def __init__( top_k: int = 5, query_prefix: str = "", document_prefix: str = "", - meta_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", timeout: Optional[float] = None, ): @@ -122,7 +122,7 @@ def __init__( def class_name(cls) -> str: return "NvidiaRanker" - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize the ranker to a dictionary. @@ -143,7 +143,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "NvidiaRanker": + def from_dict(cls, data: dict[str, Any]) -> "NvidiaRanker": """ Deserialize the ranker from a dictionary. @@ -162,7 +162,7 @@ def warm_up(self): :raises ValueError: If the API key is required for hosted NVIDIA NIMs. """ if not self._initialized: - model_kwargs: Dict[str, Any] = {} + model_kwargs: dict[str, Any] = {} if self.truncate is not None: model_kwargs.update(truncate=str(self.truncate)) self.backend = NimBackend( @@ -179,7 +179,7 @@ def warm_up(self): self.model = self.backend.model self._initialized = True - def _prepare_documents_to_embed(self, documents: List[Document]) -> List[str]: + def _prepare_documents_to_embed(self, documents: list[Document]) -> list[str]: document_texts = [] for doc in documents: meta_values_to_embed = [ @@ -191,13 +191,13 @@ def _prepare_documents_to_embed(self, documents: List[Document]) -> List[str]: document_texts.append(self.document_prefix + text_to_embed) return document_texts - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, query: str, - documents: List[Document], + documents: list[Document], top_k: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Rank a list of documents based on a given query. diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py index b663889eca..1839d527ee 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py @@ -4,7 +4,7 @@ import os import warnings -from typing import Any, Dict, List, Literal, Optional, Tuple, Union +from typing import Any, Literal, Optional, Union import requests from haystack import logging @@ -26,7 +26,7 @@ def __init__( model_type: Optional[Literal["chat", "embedding", "ranking"]] = None, model: Optional[str] = None, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), - model_kwargs: Optional[Dict[str, Any]] = None, + model_kwargs: Optional[dict[str, Any]] = None, client: Optional[Union[str, Client]] = None, timeout: Optional[float] = None, ): @@ -73,7 +73,7 @@ def __init__( timeout = float(os.environ.get("NVIDIA_TIMEOUT", REQUEST_TIMEOUT)) self.timeout = timeout - def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: + def embed(self, texts: list[str]) -> tuple[list[list[float]], dict[str, Any]]: url = f"{self.api_url}/embeddings" try: @@ -98,7 +98,7 @@ def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: return embeddings, {"usage": data["usage"]} - def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: + def generate(self, prompt: str) -> tuple[list[str], list[dict[str, Any]]]: # We're using the chat completion endpoint as the NIM API doesn't support # the /completions endpoint. So both the non-chat and chat generator will use this. # This is the same for local containers and the cloud API. @@ -151,7 +151,7 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: return replies, meta - def models(self) -> List[Model]: + def models(self) -> list[Model]: url = f"{self.api_url}/models" res = self.session.get( @@ -174,7 +174,7 @@ def models(self) -> List[Model]: raise ValueError(msg) return models - def rank(self, query_text: str, document_texts: List[str]) -> List[Dict[str, Any]]: + def rank(self, query_text: str, document_texts: list[str]) -> list[dict[str, Any]]: url = self.api_url try: diff --git a/integrations/nvidia/tests/conftest.py b/integrations/nvidia/tests/conftest.py index f1c3042997..c41c2cbfcc 100644 --- a/integrations/nvidia/tests/conftest.py +++ b/integrations/nvidia/tests/conftest.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Optional import pytest from haystack.utils import Secret @@ -12,7 +12,7 @@ class MockBackend(NimBackend): - def __init__(self, model: str, api_key: Optional[Secret] = None, model_kwargs: Optional[Dict[str, Any]] = None): + def __init__(self, model: str, api_key: Optional[Secret] = None, model_kwargs: Optional[dict[str, Any]] = None): api_key = api_key or Secret.from_env_var("NVIDIA_API_KEY") super().__init__(api_url="", model=model, api_key=api_key, model_kwargs=model_kwargs or {}) @@ -24,7 +24,7 @@ def embed(self, texts): def models(self): return [Model(id="aa")] - def generate(self) -> Tuple[List[str], List[Dict[str, Any]]]: + def generate(self) -> tuple[list[str], list[dict[str, Any]]]: return ( ["This is a mocked response."], [{"role": "assistant", "usage": {"prompt_tokens": 5, "total_tokens": 10, "completion_tokens": 5}}],