From f0029e786715a62fc24100f4b8c836fb1cc15363 Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Thu, 6 Nov 2025 18:47:28 +0100 Subject: [PATCH 1/3] multimodal examples --- .../generators/amazonbedrockchatgenerator.mdx | 15 +++++++++++ .../generators/anthropicchatgenerator.mdx | 14 +++++++++++ .../generators/azureopenaichatgenerator.mdx | 23 +++++++++++++++++ .../generators/coherechatgenerator.mdx | 18 +++++++++++++ .../generators/googlegenaichatgenerator.mdx | 15 +++++++++++ .../generators/llamacppchatgenerator.mdx | 25 +++++++++++++++++++ .../generators/metallamachatgenerator.mdx | 15 +++++++++++ .../generators/mistralchatgenerator.mdx | 15 +++++++++++ .../generators/nvidiachatgenerator.mdx | 15 +++++++++++ .../generators/ollamachatgenerator.mdx | 15 +++++++++++ .../generators/openrouterchatgenerator.mdx | 15 +++++++++++ .../generators/stackitchatgenerator.mdx | 15 +++++++++++ .../generators/watsonxchatgenerator.mdx | 18 +++++++++++++ 13 files changed, 218 insertions(+) diff --git a/docs-website/docs/pipeline-components/generators/amazonbedrockchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/amazonbedrockchatgenerator.mdx index 3ed9c26db5..5dd9a5b193 100644 --- a/docs-website/docs/pipeline-components/generators/amazonbedrockchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/amazonbedrockchatgenerator.mdx @@ -96,6 +96,21 @@ response = generator.run(messages) print(response) ``` +With multimodal inputs: + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack_integrations.components.generators.amazon_bedrock import AmazonBedrockChatGenerator + +generator = AmazonBedrockChatGenerator(model="anthropic.claude-3-5-sonnet-20240620-v1:0") + +image = ImageContent.from_file_path("apple.jpg") +message = ChatMessage.from_user(content_parts=["Describe the image using 10 words at most.", image]) + +response = generator.run(messages=[message]) +print(response) +``` + ### In a pipeline In a RAG pipeline: diff --git a/docs-website/docs/pipeline-components/generators/anthropicchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/anthropicchatgenerator.mdx index 566f00299b..433522d0bf 100644 --- a/docs-website/docs/pipeline-components/generators/anthropicchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/anthropicchatgenerator.mdx @@ -148,6 +148,20 @@ message = ChatMessage.from_user("What's Natural Language Processing? Be brief.") print(generator.run([message])) ``` +With multimodal inputs: + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack_integrations.components.generators.anthropic import AnthropicChatGenerator + +image = ImageContent.from_file_path("path/to/image.jpg") +messages = [ChatMessage.from_user(content_parts=["What's in this image?", image])] + +generator = AnthropicChatGenerator() +result = generator.run(messages) +print(result) +``` + ### In a pipeline You can also useĀ `AnthropicChatGenerator`with the Anthropic chat models in your pipeline. diff --git a/docs-website/docs/pipeline-components/generators/azureopenaichatgenerator.mdx b/docs-website/docs/pipeline-components/generators/azureopenaichatgenerator.mdx index 5d7ad130fa..647ba25fce 100644 --- a/docs-website/docs/pipeline-components/generators/azureopenaichatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/azureopenaichatgenerator.mdx @@ -149,6 +149,29 @@ response = client.run( print(response) ``` +With multimodal inputs: + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack.components.generators.chat import AzureOpenAIChatGenerator + +llm = AzureOpenAIChatGenerator( + azure_endpoint="", + azure_deployment="gpt-4o-mini", +) + +image = ImageContent.from_file_path("apple.jpg", detail="low") +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) + +response = llm.run([user_message])["replies"][0].text +print(response) + +# Fresh red apple on straw. +``` + ### In a pipeline ```python diff --git a/docs-website/docs/pipeline-components/generators/coherechatgenerator.mdx b/docs-website/docs/pipeline-components/generators/coherechatgenerator.mdx index d5999e8b6c..582ef51ce9 100644 --- a/docs-website/docs/pipeline-components/generators/coherechatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/coherechatgenerator.mdx @@ -87,6 +87,24 @@ message = ChatMessage.from_user("What's Natural Language Processing? Be brief.") print(generator.run([message])) ``` +With multimodal inputs: + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack_integrations.components.generators.cohere import CohereChatGenerator + +# Create an image from file path or base64 +image = ImageContent.from_file_path("path/to/your/image.jpg") + +# Create a multimodal message with both text and image +messages = [ChatMessage.from_user(content_parts=["What's in this image?", image])] + +# Use a multimodal model like Command A Vision +generator = CohereChatGenerator(model="command-a-vision-07-2025") +response = generator.run(messages) +print(response) +``` + #### In a Pipeline You can also use `CohereChatGenerator` to use cohere chat models in your pipeline. diff --git a/docs-website/docs/pipeline-components/generators/googlegenaichatgenerator.mdx b/docs-website/docs/pipeline-components/generators/googlegenaichatgenerator.mdx index 9fa110c087..10a392f562 100644 --- a/docs-website/docs/pipeline-components/generators/googlegenaichatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/googlegenaichatgenerator.mdx @@ -128,6 +128,21 @@ response = chat_generator.run(messages=messages) print(response["replies"][0].text) ``` +With multimodal inputs: + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack_integrations.components.generators.google_genai import GoogleGenAIChatGenerator + +chat_generator = GoogleGenAIChatGenerator() + +image = ImageContent.from_file_path("apple.jpg") +messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] + +response = chat_generator.run(messages=messages) +print(response["replies"][0].text) +``` + You can also easily use function calls. First, define the function locally and convert into a [Tool](https://www.notion.so/docs/tool): ```python diff --git a/docs-website/docs/pipeline-components/generators/llamacppchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/llamacppchatgenerator.mdx index 4f06cbd0ac..19613a480f 100644 --- a/docs-website/docs/pipeline-components/generators/llamacppchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/llamacppchatgenerator.mdx @@ -155,6 +155,31 @@ messages = [ChatMessage.from_user("Who is the best American actor?")] result = generator.run(messages) ``` +### With multimodal (image + text) inputs + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack_integrations.components.generators.llama_cpp import LlamaCppChatGenerator + +# Create an image from file path or base64 +image = ImageContent.from_file_path("path/to/your/image.jpg") + +# Create a multimodal message with both text and image +messages = [ChatMessage.from_user(content_parts=["What's in this image?", image])] + +# Initialize with multimodal support +generator = LlamaCppChatGenerator( + model="llava-v1.5-7b-q4_0.gguf", + chat_handler_name="Llava15ChatHandler", # Use llava-1-5 handler + model_clip_path="mmproj-model-f16.gguf", # CLIP model + n_ctx=4096 # Larger context for image processing +) +generator.warm_up() + +result = generator.run(messages) +print(result) +``` + The `generation_kwargs` can also be passed to the `run` method of the generator directly: ```python diff --git a/docs-website/docs/pipeline-components/generators/metallamachatgenerator.mdx b/docs-website/docs/pipeline-components/generators/metallamachatgenerator.mdx index ffaf9d7bf7..a59330fdd0 100644 --- a/docs-website/docs/pipeline-components/generators/metallamachatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/metallamachatgenerator.mdx @@ -117,6 +117,21 @@ response = llm.run( print("\n\n Model used: ", response["replies"][0].meta["model"]) ``` +With multimodal inputs: + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack_integrations.components.generators.meta_llama import MetaLlamaChatGenerator + +llm = MetaLlamaChatGenerator(model="Llama-4-Scout-17B-16E-Instruct-FP8") + +image = ImageContent.from_file_path("apple.jpg") +messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] + +response = llm.run(messages) +print(response["replies"][0].text) +``` + ### In a pipeline ```python diff --git a/docs-website/docs/pipeline-components/generators/mistralchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/mistralchatgenerator.mdx index 126abf766d..c13690a8c4 100644 --- a/docs-website/docs/pipeline-components/generators/mistralchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/mistralchatgenerator.mdx @@ -97,6 +97,21 @@ message = ChatMessage.from_user("What's Natural Language Processing? Be brief.") print(generator.run([message])) ``` +With multimodal inputs: + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack_integrations.components.generators.mistral import MistralChatGenerator + +generator = MistralChatGenerator(model="pixtral-12b-2409") + +image = ImageContent.from_file_path("apple.jpg") +messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] + +response = generator.run(messages) +print(response) +``` + #### In a Pipeline Below is an example RAG Pipeline where we answer questions based on the URL contents. We add the contents of the URL into our `messages` in the `ChatPromptBuilder` and generate an answer with the `MistralChatGenerator`. diff --git a/docs-website/docs/pipeline-components/generators/nvidiachatgenerator.mdx b/docs-website/docs/pipeline-components/generators/nvidiachatgenerator.mdx index 815aadeec5..cabf8fc5ec 100644 --- a/docs-website/docs/pipeline-components/generators/nvidiachatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/nvidiachatgenerator.mdx @@ -92,6 +92,21 @@ print(result["replies"]) print(result["meta"]) ``` +With multimodal inputs: + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack_integrations.components.generators.nvidia import NvidiaChatGenerator + +generator = NvidiaChatGenerator(model="meta/llama-3.2-11b-vision-instruct") + +image = ImageContent.from_file_path("apple.jpg") +messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] + +result = generator.run(messages) +print(result["replies"]) +``` + ### In a Pipeline ```python diff --git a/docs-website/docs/pipeline-components/generators/ollamachatgenerator.mdx b/docs-website/docs/pipeline-components/generators/ollamachatgenerator.mdx index b56931b4db..c68da62715 100644 --- a/docs-website/docs/pipeline-components/generators/ollamachatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/ollamachatgenerator.mdx @@ -167,6 +167,21 @@ print(generator.run(messages=messages)) } ``` +With multimodal inputs: + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack_integrations.components.generators.ollama import OllamaChatGenerator + +generator = OllamaChatGenerator(model="llava", url="http://localhost:11434") + +image = ImageContent.from_file_path("apple.jpg") +messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] + +response = generator.run(messages=messages) +print(response) +``` + ### In a Pipeline ```python diff --git a/docs-website/docs/pipeline-components/generators/openrouterchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/openrouterchatgenerator.mdx index 1a7b622b54..77e2250032 100644 --- a/docs-website/docs/pipeline-components/generators/openrouterchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/openrouterchatgenerator.mdx @@ -108,6 +108,21 @@ response = client.run( print("\n\n Model used: ", response["replies"][0].meta["model"]) ``` +With multimodal inputs: + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack_integrations.components.generators.openrouter import OpenRouterChatGenerator + +client = OpenRouterChatGenerator(model="anthropic/claude-3-5-sonnet") + +image = ImageContent.from_file_path("apple.jpg") +messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] + +response = client.run(messages) +print(response["replies"][0].text) +``` + ### In a pipeline ```python diff --git a/docs-website/docs/pipeline-components/generators/stackitchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/stackitchatgenerator.mdx index c2f5d2f408..428f3b4d84 100644 --- a/docs-website/docs/pipeline-components/generators/stackitchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/stackitchatgenerator.mdx @@ -63,6 +63,21 @@ result = generator.run([ChatMessage.from_user("Tell me a joke.")]) print(result) ``` +With multimodal inputs: + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack_integrations.components.generators.stackit import STACKITChatGenerator + +generator = STACKITChatGenerator(model="meta-llama/Llama-3.2-11B-Vision-Instruct") + +image = ImageContent.from_file_path("apple.jpg") +messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] + +result = generator.run(messages) +print(result) +``` + ### In a pipeline You can also useĀ `STACKITChatGenerator` in your pipeline. diff --git a/docs-website/docs/pipeline-components/generators/watsonxchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/watsonxchatgenerator.mdx index a234e5f8cc..b2a658c74f 100644 --- a/docs-website/docs/pipeline-components/generators/watsonxchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/watsonxchatgenerator.mdx @@ -64,6 +64,24 @@ message = ChatMessage.from_user("What's Natural Language Processing? Be brief.") print(generator.run([message])) ``` +With multimodal inputs: + +```python +from haystack.dataclasses import ChatMessage, ImageContent +from haystack_integrations.components.generators.watsonx.chat.chat_generator import WatsonxChatGenerator + +# Create an image from file path or base64 +image = ImageContent.from_file_path("path/to/your/image.jpg") + +# Create a multimodal message with both text and image +messages = [ChatMessage.from_user(content_parts=["What's in this image?", image])] + +# Use a multimodal model +generator = WatsonxChatGenerator(model="meta-llama/llama-3-2-11b-vision-instruct") +response = generator.run(messages) +print(response) +``` + #### In a Pipeline You can also use `WatsonxChatGenerator` to use IBM watsonx.ai chat models in your pipeline. From 8b451d1858b45addcd76306bc5b2628580c4c497 Mon Sep 17 00:00:00 2001 From: Daria Fokina Date: Mon, 10 Nov 2025 17:06:47 +0100 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Stefano Fiorucci --- .../docs/pipeline-components/generators/coherechatgenerator.mdx | 1 - .../pipeline-components/generators/llamacppchatgenerator.mdx | 1 - 2 files changed, 2 deletions(-) diff --git a/docs-website/docs/pipeline-components/generators/coherechatgenerator.mdx b/docs-website/docs/pipeline-components/generators/coherechatgenerator.mdx index 582ef51ce9..eed6caa908 100644 --- a/docs-website/docs/pipeline-components/generators/coherechatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/coherechatgenerator.mdx @@ -93,7 +93,6 @@ With multimodal inputs: from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.cohere import CohereChatGenerator -# Create an image from file path or base64 image = ImageContent.from_file_path("path/to/your/image.jpg") # Create a multimodal message with both text and image diff --git a/docs-website/docs/pipeline-components/generators/llamacppchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/llamacppchatgenerator.mdx index 19613a480f..9eea198a54 100644 --- a/docs-website/docs/pipeline-components/generators/llamacppchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/llamacppchatgenerator.mdx @@ -161,7 +161,6 @@ result = generator.run(messages) from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.llama_cpp import LlamaCppChatGenerator -# Create an image from file path or base64 image = ImageContent.from_file_path("path/to/your/image.jpg") # Create a multimodal message with both text and image From 164cf5ae4fda85f97d97ae652c5f6317137c63c6 Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Mon, 10 Nov 2025 17:27:19 +0100 Subject: [PATCH 3/3] standardize names and outputs --- .../generators/amazonbedrockchatgenerator.mdx | 11 +++++++--- .../generators/anthropicchatgenerator.mdx | 16 +++++++++----- .../generators/coherechatgenerator.mdx | 16 ++++++++------ .../generators/googlegenaichatgenerator.mdx | 13 ++++++++---- .../generators/llamacppchatgenerator.mdx | 21 +++++++++++-------- .../generators/metallamachatgenerator.mdx | 11 +++++++--- .../generators/mistralchatgenerator.mdx | 11 +++++++--- .../generators/nvidiachatgenerator.mdx | 13 ++++++++---- .../generators/ollamachatgenerator.mdx | 11 +++++++--- .../generators/openrouterchatgenerator.mdx | 13 ++++++++---- .../generators/stackitchatgenerator.mdx | 13 ++++++++---- .../generators/watsonxchatgenerator.mdx | 17 ++++++++------- 12 files changed, 111 insertions(+), 55 deletions(-) diff --git a/docs-website/docs/pipeline-components/generators/amazonbedrockchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/amazonbedrockchatgenerator.mdx index 5dd9a5b193..262c119980 100644 --- a/docs-website/docs/pipeline-components/generators/amazonbedrockchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/amazonbedrockchatgenerator.mdx @@ -102,13 +102,18 @@ With multimodal inputs: from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.amazon_bedrock import AmazonBedrockChatGenerator -generator = AmazonBedrockChatGenerator(model="anthropic.claude-3-5-sonnet-20240620-v1:0") +llm = AmazonBedrockChatGenerator(model="anthropic.claude-3-5-sonnet-20240620-v1:0") image = ImageContent.from_file_path("apple.jpg") -message = ChatMessage.from_user(content_parts=["Describe the image using 10 words at most.", image]) +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) -response = generator.run(messages=[message]) +response = llm.run([user_message])["replies"][0].text print(response) + +# Red apple on straw mat. ``` ### In a pipeline diff --git a/docs-website/docs/pipeline-components/generators/anthropicchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/anthropicchatgenerator.mdx index 433522d0bf..e2946e477d 100644 --- a/docs-website/docs/pipeline-components/generators/anthropicchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/anthropicchatgenerator.mdx @@ -154,12 +154,18 @@ With multimodal inputs: from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.anthropic import AnthropicChatGenerator -image = ImageContent.from_file_path("path/to/image.jpg") -messages = [ChatMessage.from_user(content_parts=["What's in this image?", image])] +llm = AnthropicChatGenerator() -generator = AnthropicChatGenerator() -result = generator.run(messages) -print(result) +image = ImageContent.from_file_path("apple.jpg") +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) + +response = llm.run([user_message])["replies"][0].text +print(response) + +# Red apple on straw. ``` ### In a pipeline diff --git a/docs-website/docs/pipeline-components/generators/coherechatgenerator.mdx b/docs-website/docs/pipeline-components/generators/coherechatgenerator.mdx index eed6caa908..4b641b509d 100644 --- a/docs-website/docs/pipeline-components/generators/coherechatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/coherechatgenerator.mdx @@ -93,15 +93,19 @@ With multimodal inputs: from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.cohere import CohereChatGenerator -image = ImageContent.from_file_path("path/to/your/image.jpg") +# Use a multimodal model like Command A Vision +llm = CohereChatGenerator(model="command-a-vision-07-2025") -# Create a multimodal message with both text and image -messages = [ChatMessage.from_user(content_parts=["What's in this image?", image])] +image = ImageContent.from_file_path("apple.jpg") +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) -# Use a multimodal model like Command A Vision -generator = CohereChatGenerator(model="command-a-vision-07-2025") -response = generator.run(messages) +response = llm.run([user_message])["replies"][0].text print(response) + +# Red apple on straw. ``` #### In a Pipeline diff --git a/docs-website/docs/pipeline-components/generators/googlegenaichatgenerator.mdx b/docs-website/docs/pipeline-components/generators/googlegenaichatgenerator.mdx index 10a392f562..7be16473d8 100644 --- a/docs-website/docs/pipeline-components/generators/googlegenaichatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/googlegenaichatgenerator.mdx @@ -134,13 +134,18 @@ With multimodal inputs: from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.google_genai import GoogleGenAIChatGenerator -chat_generator = GoogleGenAIChatGenerator() +llm = GoogleGenAIChatGenerator() image = ImageContent.from_file_path("apple.jpg") -messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) -response = chat_generator.run(messages=messages) -print(response["replies"][0].text) +response = llm.run([user_message])["replies"][0].text +print(response) + +# Red apple on straw. ``` You can also easily use function calls. First, define the function locally and convert into a [Tool](https://www.notion.so/docs/tool): diff --git a/docs-website/docs/pipeline-components/generators/llamacppchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/llamacppchatgenerator.mdx index 9eea198a54..cef9ce3db5 100644 --- a/docs-website/docs/pipeline-components/generators/llamacppchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/llamacppchatgenerator.mdx @@ -161,22 +161,25 @@ result = generator.run(messages) from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.llama_cpp import LlamaCppChatGenerator -image = ImageContent.from_file_path("path/to/your/image.jpg") - -# Create a multimodal message with both text and image -messages = [ChatMessage.from_user(content_parts=["What's in this image?", image])] - # Initialize with multimodal support -generator = LlamaCppChatGenerator( +llm = LlamaCppChatGenerator( model="llava-v1.5-7b-q4_0.gguf", chat_handler_name="Llava15ChatHandler", # Use llava-1-5 handler model_clip_path="mmproj-model-f16.gguf", # CLIP model n_ctx=4096 # Larger context for image processing ) -generator.warm_up() +llm.warm_up() -result = generator.run(messages) -print(result) +image = ImageContent.from_file_path("apple.jpg") +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) + +response = llm.run([user_message])["replies"][0].text +print(response) + +# Red apple on straw. ``` The `generation_kwargs` can also be passed to the `run` method of the generator directly: diff --git a/docs-website/docs/pipeline-components/generators/metallamachatgenerator.mdx b/docs-website/docs/pipeline-components/generators/metallamachatgenerator.mdx index a59330fdd0..0ee01d8cd3 100644 --- a/docs-website/docs/pipeline-components/generators/metallamachatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/metallamachatgenerator.mdx @@ -126,10 +126,15 @@ from haystack_integrations.components.generators.meta_llama import MetaLlamaChat llm = MetaLlamaChatGenerator(model="Llama-4-Scout-17B-16E-Instruct-FP8") image = ImageContent.from_file_path("apple.jpg") -messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) -response = llm.run(messages) -print(response["replies"][0].text) +response = llm.run([user_message])["replies"][0].text +print(response) + +# Red apple on straw. ``` ### In a pipeline diff --git a/docs-website/docs/pipeline-components/generators/mistralchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/mistralchatgenerator.mdx index c13690a8c4..d5fb5b55fd 100644 --- a/docs-website/docs/pipeline-components/generators/mistralchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/mistralchatgenerator.mdx @@ -103,13 +103,18 @@ With multimodal inputs: from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.mistral import MistralChatGenerator -generator = MistralChatGenerator(model="pixtral-12b-2409") +llm = MistralChatGenerator(model="pixtral-12b-2409") image = ImageContent.from_file_path("apple.jpg") -messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) -response = generator.run(messages) +response = llm.run([user_message])["replies"][0].text print(response) + +# Red apple on straw. ``` #### In a Pipeline diff --git a/docs-website/docs/pipeline-components/generators/nvidiachatgenerator.mdx b/docs-website/docs/pipeline-components/generators/nvidiachatgenerator.mdx index cabf8fc5ec..0df6e9715b 100644 --- a/docs-website/docs/pipeline-components/generators/nvidiachatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/nvidiachatgenerator.mdx @@ -98,13 +98,18 @@ With multimodal inputs: from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.nvidia import NvidiaChatGenerator -generator = NvidiaChatGenerator(model="meta/llama-3.2-11b-vision-instruct") +llm = NvidiaChatGenerator(model="meta/llama-3.2-11b-vision-instruct") image = ImageContent.from_file_path("apple.jpg") -messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) -result = generator.run(messages) -print(result["replies"]) +response = llm.run([user_message])["replies"][0].text +print(response) + +# Red apple on straw. ``` ### In a Pipeline diff --git a/docs-website/docs/pipeline-components/generators/ollamachatgenerator.mdx b/docs-website/docs/pipeline-components/generators/ollamachatgenerator.mdx index c68da62715..76a907eb69 100644 --- a/docs-website/docs/pipeline-components/generators/ollamachatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/ollamachatgenerator.mdx @@ -173,13 +173,18 @@ With multimodal inputs: from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.ollama import OllamaChatGenerator -generator = OllamaChatGenerator(model="llava", url="http://localhost:11434") +llm = OllamaChatGenerator(model="llava", url="http://localhost:11434") image = ImageContent.from_file_path("apple.jpg") -messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) -response = generator.run(messages=messages) +response = llm.run([user_message])["replies"][0].text print(response) + +# Red apple on straw. ``` ### In a Pipeline diff --git a/docs-website/docs/pipeline-components/generators/openrouterchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/openrouterchatgenerator.mdx index 77e2250032..e9d7a16234 100644 --- a/docs-website/docs/pipeline-components/generators/openrouterchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/openrouterchatgenerator.mdx @@ -114,13 +114,18 @@ With multimodal inputs: from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.openrouter import OpenRouterChatGenerator -client = OpenRouterChatGenerator(model="anthropic/claude-3-5-sonnet") +llm = OpenRouterChatGenerator(model="anthropic/claude-3-5-sonnet") image = ImageContent.from_file_path("apple.jpg") -messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) -response = client.run(messages) -print(response["replies"][0].text) +response = llm.run([user_message])["replies"][0].text +print(response) + +# Red apple on straw. ``` ### In a pipeline diff --git a/docs-website/docs/pipeline-components/generators/stackitchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/stackitchatgenerator.mdx index 428f3b4d84..99180400ff 100644 --- a/docs-website/docs/pipeline-components/generators/stackitchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/stackitchatgenerator.mdx @@ -69,13 +69,18 @@ With multimodal inputs: from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.stackit import STACKITChatGenerator -generator = STACKITChatGenerator(model="meta-llama/Llama-3.2-11B-Vision-Instruct") +llm = STACKITChatGenerator(model="meta-llama/Llama-3.2-11B-Vision-Instruct") image = ImageContent.from_file_path("apple.jpg") -messages = [ChatMessage.from_user(content_parts=["What does the image show? Max 5 words.", image])] +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) -result = generator.run(messages) -print(result) +response = llm.run([user_message])["replies"][0].text +print(response) + +# Red apple on straw. ``` ### In a pipeline diff --git a/docs-website/docs/pipeline-components/generators/watsonxchatgenerator.mdx b/docs-website/docs/pipeline-components/generators/watsonxchatgenerator.mdx index b2a658c74f..4116640b4f 100644 --- a/docs-website/docs/pipeline-components/generators/watsonxchatgenerator.mdx +++ b/docs-website/docs/pipeline-components/generators/watsonxchatgenerator.mdx @@ -70,16 +70,19 @@ With multimodal inputs: from haystack.dataclasses import ChatMessage, ImageContent from haystack_integrations.components.generators.watsonx.chat.chat_generator import WatsonxChatGenerator -# Create an image from file path or base64 -image = ImageContent.from_file_path("path/to/your/image.jpg") +# Use a multimodal model +llm = WatsonxChatGenerator(model="meta-llama/llama-3-2-11b-vision-instruct") -# Create a multimodal message with both text and image -messages = [ChatMessage.from_user(content_parts=["What's in this image?", image])] +image = ImageContent.from_file_path("apple.jpg") +user_message = ChatMessage.from_user(content_parts=[ + "What does the image show? Max 5 words.", + image + ]) -# Use a multimodal model -generator = WatsonxChatGenerator(model="meta-llama/llama-3-2-11b-vision-instruct") -response = generator.run(messages) +response = llm.run([user_message])["replies"][0].text print(response) + +# Red apple on straw. ``` #### In a Pipeline