diff --git a/.code-samples.meilisearch.yaml b/.code-samples.meilisearch.yaml index 53e430a8..bf7062f3 100644 --- a/.code-samples.meilisearch.yaml +++ b/.code-samples.meilisearch.yaml @@ -767,6 +767,25 @@ get_batch_1: |- client.get_batch(BATCH_UID) get_similar_post_1: |- client.index("INDEX_NAME").get_similar_documents({"id": "TARGET_DOCUMENT_ID", "embedder": "default"}) +search_parameter_reference_media_1: |- + client.index('movies_fragments').search( + "a futuristic space movie", + { + "media": { + "textAndPoster": { + "text": "a futuristic space movie", + "image": { + "mime": "image/jpeg", + "data": "BASE64_ENCODED_IMAGE_DATA" + } + } + }, + "hybrid": { + "embedder": "multimodal", + "semanticRatio": 1.0 + }, + } + ) webhooks_get_1: |- client.get_webhooks() webhooks_get_single_1: |- diff --git a/datasets/multimodal_movies.json b/datasets/multimodal_movies.json new file mode 100644 index 00000000..4f4210e3 --- /dev/null +++ b/datasets/multimodal_movies.json @@ -0,0 +1,9 @@ +[ +{"id":"287947","title":"Shazam!","poster":"https://image.tmdb.org/t/p/w1280/xnopI5Xtky18MPhK40cZAGAOVeV.jpg","overview":"A boy is given the ability to become an adult superhero in times of need with a single magic word.","release_date":1553299200}, +{"id":"299537","title":"Captain Marvel","poster":"https://image.tmdb.org/t/p/w1280/AtsgWhDnHTq68L0lLsUrCnM7TjG.jpg","overview":"The story follows Carol Danvers as she becomes one of the universe’s most powerful heroes when Earth is caught in the middle of a galactic war between two alien races. Set in the 1990s, Captain Marvel is an all-new adventure from a previously unseen period in the history of the Marvel Cinematic Universe.","release_date":1551830400}, +{"id":"522681","title":"Escape Room","poster":"https://image.tmdb.org/t/p/w1280/8yZAx7tlKRZIg7pJfaPhl00yHIQ.jpg","overview":"Six strangers find themselves in circumstances beyond their control, and must use their wits to survive.","release_date":1546473600}, +{"id":"166428","title":"How to Train Your Dragon: The Hidden World","poster":"https://image.tmdb.org/t/p/w1280/xvx4Yhf0DVH8G4LzNISpMfFBDy2.jpg","overview":"As Hiccup fulfills his dream of creating a peaceful dragon utopia, Toothless’ discovery of an untamed, elusive mate draws the Night Fury away. When danger mounts at home and Hiccup’s reign as village chief is tested, both dragon and rider must make impossible decisions to save their kind.","release_date":1546473600}, +{"id":"450465","title":"Glass","poster":"https://image.tmdb.org/t/p/w1280/svIDTNUoajS8dLEo7EosxvyAsgJ.jpg","overview":"In a series of escalating encounters, security guard David Dunn uses his supernatural abilities to track Kevin Wendell Crumb, a disturbed man who has twenty-four personalities. Meanwhile, the shadowy presence of Elijah Price emerges as an orchestrator who holds secrets critical to both men.","release_date":1547596800}, +{"id":"495925","title":"Doraemon the Movie: Nobita's Treasure Island","poster":"https://image.tmdb.org/t/p/w1280/cmJ71gdZxCqkMUvGwWgSg3MK7pC.jpg","overview":"The story is based on Robert Louis Stevenson's Treasure Island novel.","release_date":1520035200}, +{"id":"329996","title":"Dumbo","poster":"https://image.tmdb.org/t/p/w1280/279PwJAcelI4VuBtdzrZASqDPQr.jpg","overview":"A young elephant, whose oversized ears enable him to fly, helps save a struggling circus, but when the circus plans a new venture, Dumbo and his friends discover dark secrets beneath its shiny veneer.","release_date":1553644800}, +{"id":"299536","title":"Avengers: Infinity War","poster":"https://image.tmdb.org/t/p/w1280/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg","overview":"As the Avengers and their allies have continued to protect the world from threats too large for any one hero to handle, a new danger has emerged from the cosmic shadows: Thanos. A despot of intergalactic infamy, his goal is to collect all six Infinity Stones, artifacts of unimaginable power, and use them to inflict his twisted will on all of reality. Everything the Avengers have fought for has led up to this moment - the fate of Earth and existence itself has never been more uncertain.","release_date":1524618000}] diff --git a/docs/conf.py b/docs/conf.py index 22644da0..a4fac860 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -58,9 +58,9 @@ # This value contains a list of modules to be mocked up. autodoc_mock_imports = ["camel_converter"] -html_title = 'Meilisearch Python | Documentation' +html_title = "Meilisearch Python | Documentation" # Add Fathom analytics script html_js_files = [ - ("https://cdn.usefathom.com/script.js", { "data-site": "QNBPJXIV", "defer": "defer" }) + ("https://cdn.usefathom.com/script.js", {"data-site": "QNBPJXIV", "defer": "defer"}) ] diff --git a/meilisearch/client.py b/meilisearch/client.py index 37025d26..be11faf6 100644 --- a/meilisearch/client.py +++ b/meilisearch/client.py @@ -1110,3 +1110,41 @@ def _valid_uuid(uuid: str) -> bool: ) match = uuid4hex.match(uuid) return bool(match) + + def get_experimental_features(self) -> Dict[str, Any]: + """Retrieve the current settings for all experimental features. + + Returns + ------- + features: + A dictionary mapping feature names to their enabled/disabled state. + For example: {"multimodal": True, "vectorStore": False} + + Raises + ------ + MeilisearchApiError + An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + """ + return self.http.get(self.config.paths.experimental_features) + + def update_experimental_features(self, features: Dict[str, bool]) -> Dict[str, Any]: + """Update one or more experimental features. + + Parameters + ---------- + features: + A dictionary mapping feature names to booleans. + For example, {"multimodal": True} to enable multimodal, + or {"multimodal": True, "vectorStore": False} to update multiple features. + + Returns + ------- + features: + The updated experimental features settings as a dictionary. + + Raises + ------ + MeilisearchApiError + An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + """ + return self.http.patch(self.config.paths.experimental_features, body=features) diff --git a/meilisearch/config.py b/meilisearch/config.py index ba6a2638..df39e64a 100644 --- a/meilisearch/config.py +++ b/meilisearch/config.py @@ -47,6 +47,7 @@ class Paths: localized_attributes = "localized-attributes" edit = "edit" network = "network" + experimental_features = "experimental-features" webhooks = "webhooks" def __init__( diff --git a/meilisearch/models/embedders.py b/meilisearch/models/embedders.py index 9dcd5d00..a58125ea 100644 --- a/meilisearch/models/embedders.py +++ b/meilisearch/models/embedders.py @@ -167,6 +167,12 @@ class RestEmbedder(CamelBase): Template defining the data Meilisearch sends to the embedder document_template_max_bytes: Optional[int] Maximum allowed size of rendered document template (defaults to 400) + indexing_fragments: Optional[Dict[str, Dict[str, Any]]] + Defines how to fragment documents for indexing (multi-modal search). + Fragments can contain complex nested structures (e.g., lists of objects). + search_fragments: Optional[Dict[str, Dict[str, Any]]] + Defines how to fragment search queries (multi-modal search). + Fragments can contain complex nested structures (e.g., lists of objects). request: Dict[str, Any] A JSON value representing the request Meilisearch makes to the remote embedder response: Dict[str, Any] @@ -185,6 +191,8 @@ class RestEmbedder(CamelBase): dimensions: Optional[int] = None document_template: Optional[str] = None document_template_max_bytes: Optional[int] = None + indexing_fragments: Optional[Dict[str, Dict[str, Any]]] = None + search_fragments: Optional[Dict[str, Dict[str, Any]]] = None request: Dict[str, Any] response: Dict[str, Any] headers: Optional[Dict[str, str]] = None diff --git a/tests/client/test_experimental_features.py b/tests/client/test_experimental_features.py new file mode 100644 index 00000000..f321da1c --- /dev/null +++ b/tests/client/test_experimental_features.py @@ -0,0 +1,22 @@ +"""Tests for experimental features API.""" + + +def test_get_experimental_features(client): + """Test getting experimental features.""" + response = client.get_experimental_features() + assert isinstance(response, dict) + # Check that at least one known experimental feature is present + assert "multimodal" in response or "vectorStoreSetting" in response + + +def test_update_experimental_features(client): + """Test updating experimental features.""" + # Enable multimodal + response = client.update_experimental_features({"multimodal": True}) + assert isinstance(response, dict) + assert response.get("multimodal") is True + + # Disable multimodal + response = client.update_experimental_features({"multimodal": False}) + assert isinstance(response, dict) + assert response.get("multimodal") is False diff --git a/tests/client/test_multimodal.py b/tests/client/test_multimodal.py new file mode 100644 index 00000000..fd1eac12 --- /dev/null +++ b/tests/client/test_multimodal.py @@ -0,0 +1,289 @@ +import base64 +import json +import os +from pathlib import Path + +import pytest +import requests + +from meilisearch import Client +from meilisearch.errors import MeilisearchApiError +from tests import common + +# ---------------- ENV ---------------- +VOYAGE_API_KEY = os.getenv("VOYAGE_API_KEY") + +INDEX_UID = "multi-modal-search-test" +EMBEDDER_NAME = "multimodal" + +# ---------------- Paths ---------------- +# datasets folder (movies.json) +DATASETS_DIR = Path(__file__).resolve().parent.parent.parent / "datasets" +MOVIES = json.loads((DATASETS_DIR / "multimodal_movies.json").read_text()) + +# fixtures folder (images) +FIXTURES_DIR = Path(__file__).resolve().parent.parent / "fixtures" + + +# ---------------- Helper ---------------- +def load_image_base64(file_name: str) -> str: + """ + Load an image from the fixtures folder and return as base64 string. + """ + file_path = FIXTURES_DIR / file_name + encoded = base64.b64encode(file_path.read_bytes()).decode("utf-8") + return encoded + + +# ---------------- Embedder Config ---------------- +# Match JS test exactly - fragments have complex nested objects +EMBEDDER_CONFIG = { + "source": "rest", + "url": "https://api.voyageai.com/v1/multimodalembeddings", + "apiKey": VOYAGE_API_KEY, + "dimensions": 1024, + "indexingFragments": { + "textAndPoster": { + "value": { + "content": [ + { + "type": "text", + "text": "A movie titled {{doc.title}} whose description starts with {{doc.overview|truncatewords:20}}.", + }, + { + "type": "image_url", + "image_url": "{{doc.poster}}", + }, + ], + }, + }, + "text": { + "value": { + "content": [ + { + "type": "text", + "text": "A movie titled {{doc.title}} whose description starts with {{doc.overview|truncatewords:20}}.", + }, + ], + }, + }, + "poster": { + "value": { + "content": [ + { + "type": "image_url", + "image_url": "{{doc.poster}}", + }, + ], + }, + }, + }, + "searchFragments": { + "textAndPoster": { + "value": { + "content": [ + { + "type": "text", + "text": "{{media.textAndPoster.text}}", + }, + { + "type": "image_base64", + "image_base64": "data:{{media.textAndPoster.image.mime}};base64,{{media.textAndPoster.image.data}}", + }, + ], + }, + }, + "text": { + "value": { + "content": [ + { + "type": "text", + "text": "{{media.text.text}}", + }, + ], + }, + }, + "poster": { + "value": { + "content": [ + { + "type": "image_url", + "image_url": "{{media.poster.poster}}", + }, + ], + }, + }, + }, + "request": { + "inputs": ["{{fragment}}", "{{..}}"], + "model": "voyage-multimodal-3", + }, + "response": { + "data": [ + { + "embedding": "{{embedding}}", + }, + "{{..}}", + ], + }, +} + + +# ---------------- Tests ---------------- +@pytest.mark.skipif(not VOYAGE_API_KEY, reason="Voyage API key not set") +class TestMultimodalSearch: + """Multi-modal search tests""" + + # Class attribute populated by setup_index fixture + search_client: Client + + @pytest.fixture(autouse=True) + def clear_indexes(self, client): + """ + Override the global clear_indexes fixture to exclude the multimodal test index. + This prevents the index from being deleted between tests in this class. + """ + yield + # Delete all indexes except the multimodal test index + indexes = client.get_indexes() + for index in indexes["results"]: + if index.uid != INDEX_UID: + try: + task = client.index(index.uid).delete() + client.wait_for_task(task.task_uid) + except MeilisearchApiError as exc: + # Index may already have been deleted by another test run; log and continue. + print(f"Warning: failed to delete index {index.uid} during cleanup: {exc}") + + @pytest.fixture(scope="class", autouse=True) + def setup_index(self, request): + """Setup index with embedder configuration.""" + client = Client(common.BASE_URL, common.MASTER_KEY) + + # Enable multimodal experimental feature + client.update_experimental_features({"multimodal": True}) + + # Delete the index if it already exists + try: + task = client.index(INDEX_UID).delete() + client.wait_for_task(task.task_uid) + except MeilisearchApiError: + pass # Index doesn't exist, which is fine + + # Create index + task = client.create_index(INDEX_UID) + client.wait_for_task(task.task_uid) + + # Update settings with embedder config + # Use raw HTTP request because fragments with complex objects + # may not pass Pydantic validation + settings_payload = { + "searchableAttributes": ["title", "overview"], + "embedders": { + EMBEDDER_NAME: EMBEDDER_CONFIG, + }, + } + + response = requests.patch( + f"{common.BASE_URL}/indexes/{INDEX_UID}/settings", + headers={ + "Authorization": f"Bearer {common.MASTER_KEY}", + "Content-Type": "application/json", + }, + json=settings_payload, + timeout=30, + ) + response.raise_for_status() + + # Wait for settings update task (embedder config can take longer) + task_data = response.json() + task_uid = task_data.get("taskUid") + client.wait_for_task(task_uid, timeout_in_ms=60_000) + + index = client.get_index(INDEX_UID) + + # Add documents + task = index.add_documents(MOVIES) + # Use longer timeout for document indexing with embeddings + # Each document needs embeddings generated via Voyage API, which can be slow + client.wait_for_task( + task.task_uid, + timeout_in_ms=300_000, # 5 minutes timeout for embedding generation + interval_in_ms=1000, # Poll every 1 second instead of 50ms to reduce log noise + ) + + # Verify index is ready by checking stats + stats = index.get_stats() + assert stats.number_of_documents == len( + MOVIES + ), f"Expected {len(MOVIES)} documents, got {stats.number_of_documents}" + + # Store for tests on the class + # Use request.cls to ensure attributes are available on test instances + request.cls.search_client = client + + def test_text_query(self): + """Test text query search""" + query = "The story follows Carol Danvers" + response = self.search_client.index(INDEX_UID).search( + query, + { + "media": { + "text": { + "text": query, + }, + }, + "hybrid": { + "embedder": EMBEDDER_NAME, + "semanticRatio": 1, + }, + }, + ) + assert response["hits"][0]["title"] == "Captain Marvel" + + def test_image_query(self): + """Test image query search""" + # Find Dumbo in the movies list + dumbo_movie = next(m for m in MOVIES if m["title"] == "Dumbo") + dumbo_poster = dumbo_movie["poster"] + + response = self.search_client.index(INDEX_UID).search( + None, + { + "media": { + "poster": { + "poster": dumbo_poster, + }, + }, + "hybrid": { + "embedder": EMBEDDER_NAME, + "semanticRatio": 1, + }, + }, + ) + assert response["hits"][0]["title"] == "Dumbo" + + def test_text_and_image_query(self): + """Test text and image query""" + query = "a futuristic movie" + master_yoda_base64 = load_image_base64("master-yoda.jpeg") + + response = self.search_client.index(INDEX_UID).search( + query, + { + "media": { + "textAndPoster": { + "text": query, + "image": { + "mime": "image/jpeg", + "data": master_yoda_base64, + }, + }, + }, + "hybrid": { + "embedder": EMBEDDER_NAME, + "semanticRatio": 1, + }, + }, + ) + assert response["hits"][0]["title"] == "Captain Marvel" diff --git a/tests/fixtures/master-yoda.jpeg b/tests/fixtures/master-yoda.jpeg new file mode 100644 index 00000000..f241a85a Binary files /dev/null and b/tests/fixtures/master-yoda.jpeg differ