elastic · Jan-Kazlouski-elastic · Oct 23, 2025 · Nov 12, 2025 · Nov 12, 2025 · JoshMock
diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
diff --git a/output/schema/schema.json b/output/schema/schema.json
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
@@ -387,6 +387,7 @@ inference-api-put-jinaai,https://www.elastic.co/docs/api/doc/elasticsearch/opera
 inference-api-put-llama,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-llama,,
 inference-api-put-mistral,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-mistral,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-mistral.html,
 inference-api-put-openai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-openai.html,
+inference-api-put-openshift-ai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openshift-ai,,
 inference-api-put-voyageai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai,,
 inference-api-put-watsonx,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-watsonx-ai.html,
 inference-api-stream,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-stream-inference,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/stream-inference-api.html,

@@ -0,0 +1,43 @@
+{
+  "inference.put_openshift_ai": {
+    "documentation": {
+      "url": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openshift-ai",
+      "description": "Create an OpenShift AI inference endpoint"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/{task_type}/{openshiftai_inference_id}",
+          "methods": ["PUT"],
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
+            },
+            "openshiftai_inference_id": {
+              "type": "string",
+              "description": "The inference ID"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference endpoint's task and service settings",
+      "required": true
+    },
+    "params": {
+      "timeout": {
+        "type": "time",
+        "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+        "default": "30s"
+      }
+    }
+  }
+}
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -1844,6 +1844,68 @@ export enum OpenAIServiceType {
   openai
 }
 
+export class OpenShiftAiServiceSettings {
+  /**
+   * A valid API key for your OpenShift AI endpoint.
+   * Can be found in `Token authentication` section of model related information.
+   */
+  api_key: string
+  /**
+   * The URL of the OpenShift AI hosted model endpoint.
+   */
+  url: string
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the hosted model's documentation for the name if needed.
+   * Service has been tested and confirmed to be working with the following models:
+   * * For `text_embedding` task - `gritlm-7b`.
+   * * For `completion` and `chat_completion` tasks - `llama-31-8b-instruct`.
+   * * For `rerank` task - `bge-reranker-v2-m3`.
+   */
+  model_id?: string
+  /**
+   * For a `text_embedding` task, the maximum number of tokens per input before chunking occurs.
+   */
+  max_input_tokens?: integer
+  /**
+   * For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.
+   */
+  similarity?: OpenShiftAiSimilarityType
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from the OpenShift AI API.
+   * By default, the `openshift_ai` service sets the number of requests allowed per minute to 3000.
+   */
+  rate_limit?: RateLimitSetting
+}
+
+export enum OpenShiftAiTaskType {
+  text_embedding,
+  completion,
+  chat_completion,
+  rerank
+}
+
+export enum OpenShiftAiServiceType {
+  openshift_ai
+}
+
+export enum OpenShiftAiSimilarityType {
+  cosine,
+  dot_product,
+  l2_norm
+}
+
+export class OpenShiftAiTaskSettings {
+  /**
+   * For a `rerank` task, whether to return the source documents in the response.
+   */
+  return_documents?: boolean
+  /**
+   * For a `rerank` task, the number of most relevant documents to return.
+   */
+  top_n?: integer
+}
+
 export class VoyageAIServiceSettings {
   /**
    * The number of dimensions for resulting output embeddings.

diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
@@ -41,6 +41,7 @@ import {
   TaskTypeLlama,
   TaskTypeMistral,
   TaskTypeOpenAI,
+  TaskTypeOpenShiftAi,
   TaskTypeVoyageAI,
   TaskTypeWatsonx
 } from '../_types/TaskType'
@@ -302,6 +303,17 @@ export class InferenceEndpointInfoOpenAI extends InferenceEndpoint {
   task_type: TaskTypeOpenAI
 }
 
+export class InferenceEndpointInfoOpenShiftAi extends InferenceEndpoint {
+  /**
+   * The inference Id
+   */
+  inference_id: string
+  /**
+   * The task type
+   */
+  task_type: TaskTypeOpenShiftAi
+}
+
 export class InferenceEndpointInfoVoyageAI extends InferenceEndpoint {
   /**
    * The inference Id
@@ -413,6 +425,7 @@ export class RateLimitSetting {
    * * `mistral` service: `240`
    * * `openai` service and task type `text_embedding`: `3000`
    * * `openai` service and task type `completion`: `500`
+   * * `openshift_ai` service: `3000`
    * * `voyageai` service: `2000`
    * * `watsonxai` service: `120`
    */

diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts
@@ -142,6 +142,13 @@ export enum TaskTypeOpenAI {
   completion
 }
 
+export enum TaskTypeOpenShiftAi {
+  text_embedding,
+  chat_completion,
+  completion,
+  rerank
+}
+
 export enum TaskTypeVoyageAI {
   text_embedding,
   rerank

diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts
@@ -49,6 +49,7 @@ import { TaskType } from '@inference/_types/TaskType'
  * * Llama (`chat_completion`, `completion`, `text_embedding`)
  * * Mistral (`chat_completion`, `completion`, `text_embedding`)
  * * OpenAI (`chat_completion`, `completion`, `text_embedding`)
+ * * OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)
  * * VoyageAI (`rerank`, `text_embedding`)
  * * Watsonx inference integration (`text_embedding`)
  * @rest_spec_name inference.put

diff --git a/specification/inference/put_openshift_ai/PutOpenShiftAiRequest.ts b/specification/inference/put_openshift_ai/PutOpenShiftAiRequest.ts
@@ -0,0 +1,86 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
+import {
+  OpenShiftAiServiceSettings,
+  OpenShiftAiServiceType,
+  OpenShiftAiTaskSettings,
+  OpenShiftAiTaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
+
+/**
+ * Create an OpenShift AI inference endpoint.
+ *
+ * Create an inference endpoint to perform an inference task with the `openshift_ai` service.
+ * @rest_spec_name inference.put_openshift_ai
+ * @availability stack since=9.3.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @cluster_privileges manage_inference
+ * @doc_id inference-api-put-openshift-ai
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/{task_type}/{openshiftai_inference_id}'
+      methods: ['PUT']
+    }
+  ]
+  path_parts: {
+    /**
+     * The type of the inference task that the model will perform.
+     * NOTE: The `chat_completion` task type only supports streaming and only through the _stream API.
+     */
+    task_type: OpenShiftAiTaskType
+    /**
+     * The unique identifier of the inference endpoint.
+     */
+    openshiftai_inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * The chunking configuration object.
+     * @ext_doc_id inference-chunking
+     */
+    chunking_settings?: InferenceChunkingSettings
+    /**
+     * The type of service supported for the specified task type. In this case, `openshift_ai`.
+     */
+    service: OpenShiftAiServiceType
+    /**
+     * Settings used to install the inference model. These settings are specific to the `openshift_ai` service.
+     */
+    service_settings: OpenShiftAiServiceSettings
+    /**
+     * Settings to configure the inference task.
+     * These settings are specific to the task type you specified.
+     */
+    task_settings?: OpenShiftAiTaskSettings
+  }
+}
diff --git a/specification/inference/put_openshift_ai/PutOpenShiftAiResponse.ts b/specification/inference/put_openshift_ai/PutOpenShiftAiResponse.ts
@@ -0,0 +1,25 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { InferenceEndpointInfoOpenShiftAi } from '@inference/_types/Services'
+
+export class Response {
+  /** @codegen_name endpoint_info */
+  body: InferenceEndpointInfoOpenShiftAi
+}
diff --git a/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample1.yaml b/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample1.yaml
@@ -0,0 +1,15 @@
+summary: A text embedding task
+description:
+  Run `PUT _inference/text_embedding/openshift-ai-text-embedding` to create an inference endpoint
+  that performs a `text_embedding` task.
+method_request: 'PUT _inference/text_embedding/openshift-ai-text-embedding'
+# type: "request"
+value: |-
+  {
+      "service": "openshift_ai",
+      "service_settings": {
+          "url": "openshift-ai-embeddings-url",
+          "api_key": "openshift-ai-embeddings-token",
+          "model_id": "gritlm-7b"
+      }
+  }
diff --git a/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample2.yaml b/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample2.yaml
@@ -0,0 +1,15 @@
+summary: A completion task
+description:
+  Run `PUT _inference/completion/openshift-ai-completion` to create an inference endpoint
+  that performs a `completion` task.
+method_request: 'PUT _inference/completion/openshift-ai-completion'
+# type: "request"
+value: |-
+  {
+      "service": "openshift_ai",
+      "service_settings": {
+          "url": "openshift-ai-completion-url",
+          "api_key": "openshift-ai-completion-token",
+          "model_id": "llama-31-8b-instruct"
+      }
+  }