elastic · tobio · Nov 13, 2025 · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/docs/resources/elasticsearch_ml_datafeed_state.md b/docs/resources/elasticsearch_ml_datafeed_state.md
@@ -0,0 +1,192 @@
+---
+# generated by https://github.com/hashicorp/terraform-plugin-docs
+page_title: "elasticstack_elasticsearch_ml_datafeed_state Resource - terraform-provider-elasticstack"
+subcategory: "Ml"
+description: |-
+  Manages the state of an existing Elasticsearch ML datafeed by starting or stopping it. This resource does not create or configure a datafeed, but instead manages the operational state of an existing datafeed.
+  Note: Starting a non-realtime datafeed (i.e with an absolute end time) will result in the datafeed automatically stopping once all available data has been processed. By default, Terraform will restart the datafeed from the configured start time and reprocess all data again. It's recommended to ignore changes to the state attribute via the resource lifecycle https://developer.hashicorp.com/terraform/tutorials/state/resource-lifecycle#ignore-changes for non-realtime datafeeds.
+---
+
+# elasticstack_elasticsearch_ml_datafeed_state (Resource)
+
+Manages the state of an existing Elasticsearch ML datafeed by starting or stopping it. This resource does not create or configure a datafeed, but instead manages the operational state of an existing datafeed.
+
+Note: Starting a non-realtime datafeed (i.e with an absolute end time) will result in the datafeed automatically stopping once all available data has been processed. By default, Terraform will restart the datafeed from the configured start time and reprocess all data again. It's recommended to ignore changes to the `state` attribute via the [resource lifecycle](https://developer.hashicorp.com/terraform/tutorials/state/resource-lifecycle#ignore-changes) for non-realtime datafeeds.
+
+## Example Usage
+
+```terraform
+## The following resources setup a realtime ML datafeed.
+resource "elasticstack_elasticsearch_index" "ml_datafeed_index" {
+  name = "ml-datafeed-data"
+  mappings = jsonencode({
+    properties = {
+      "@timestamp" = {
+        type = "date"
+      }
+      value = {
+        type = "double"
+      }
+      user = {
+        type = "keyword"
+      }
+    }
+  })
+}
+
+resource "elasticstack_elasticsearch_ml_anomaly_detection_job" "example" {
+  job_id      = "example-anomaly-job"
+  description = "Example anomaly detection job"
+
+  analysis_config {
+    bucket_span = "15m"
+    detectors {
+      function      = "mean"
+      field_name    = "value"
+      by_field_name = "user"
+    }
+  }
+
+  data_description {
+    time_field = "@timestamp"
+  }
+}
+
+resource "elasticstack_elasticsearch_ml_datafeed" "example" {
+  datafeed_id = "example-datafeed"
+  job_id      = elasticstack_elasticsearch_ml_anomaly_detection_job.example.job_id
+  indices     = [elasticstack_elasticsearch_index.ml_datafeed_index.name]
+
+  query = jsonencode({
+    bool = {
+      must = [
+        {
+          range = {
+            "@timestamp" = {
+              gte = "now-7d"
+            }
+          }
+        }
+      ]
+    }
+  })
+}
+
+resource "elasticstack_elasticsearch_ml_datafeed_state" "example" {
+  datafeed_id = elasticstack_elasticsearch_ml_datafeed.example.datafeed_id
+  state       = "started"
+  force       = false
+}
+
+## A non-realtime datafeed will automatically stop once all data has been processed.
+## It's recommended to ignore changes to the `state` attribute via the resource lifecycle for such datafeeds.
+
+resource "elasticstack_elasticsearch_ml_anomaly_detection_job" "non-realtime" {
+  job_id      = "non-realtime-anomaly-job"
+  description = "Test job for datafeed state testing with time range"
+  analysis_config = {
+    bucket_span = "1h"
+    detectors = [{
+      function             = "count"
+      detector_description = "count"
+    }]
+  }
+  data_description = {
+    time_field  = "@timestamp"
+    time_format = "epoch_ms"
+  }
+  analysis_limits = {
+    model_memory_limit = "10mb"
+  }
+}
+
+resource "elasticstack_elasticsearch_ml_job_state" "non-realtime" {
+  job_id = elasticstack_elasticsearch_ml_anomaly_detection_job.non-realtime.job_id
+  state  = "opened"
+
+  lifecycle {
+    ignore_changes = ["state"]
+  }
+}
+
+resource "elasticstack_elasticsearch_ml_datafeed" "non-realtime" {
+  datafeed_id = "non-realtime-datafeed"
+  job_id      = elasticstack_elasticsearch_ml_anomaly_detection_job.non-realtime.job_id
+  indices     = [elasticstack_elasticsearch_index.ml_datafeed_index.name]
+  query = jsonencode({
+    match_all = {}
+  })
+}
+
+resource "elasticstack_elasticsearch_ml_datafeed_state" "non-realtime" {
+  datafeed_id      = elasticstack_elasticsearch_ml_datafeed.non-realtime.datafeed_id
+  state            = "started"
+  start            = "2024-01-01T00:00:00Z"
+  end              = "2024-01-02T00:00:00Z"
+  datafeed_timeout = "60s"
+
+  lifecycle {
+    ignore_changes = ["state"]
+  }
+}
+```
+
+<!-- schema generated by tfplugindocs -->
+## Schema
+
+### Required
+
+- `datafeed_id` (String) Identifier for the ML datafeed.
+- `state` (String) The desired state for the ML datafeed. Valid values are `started` and `stopped`.
+
+### Optional
+
+- `datafeed_timeout` (String) Timeout for the operation. Examples: `30s`, `5m`, `1h`. Default is `30s`.
+- `elasticsearch_connection` (Block List, Deprecated) Elasticsearch connection configuration block. (see [below for nested schema](#nestedblock--elasticsearch_connection))
+- `end` (String) The time that the datafeed should end collecting data. When not specified, the datafeed continues in real-time. This property must be specified in RFC 3339 format.
+- `force` (Boolean) When stopping a datafeed, use to forcefully stop it.
+- `start` (String) The time that the datafeed should start collecting data. When not specified, the datafeed starts in real-time. This property must be specified in RFC 3339 format.
+- `timeouts` (Attributes) (see [below for nested schema](#nestedatt--timeouts))
+
+### Read-Only
+
+- `id` (String) Internal identifier of the resource
+
+<a id="nestedblock--elasticsearch_connection"></a>
+### Nested Schema for `elasticsearch_connection`
+
+Optional:
+
+- `api_key` (String, Sensitive) API Key to use for authentication to Elasticsearch
+- `bearer_token` (String, Sensitive) Bearer Token to use for authentication to Elasticsearch
+- `ca_data` (String) PEM-encoded custom Certificate Authority certificate
+- `ca_file` (String) Path to a custom Certificate Authority certificate
+- `cert_data` (String) PEM encoded certificate for client auth
+- `cert_file` (String) Path to a file containing the PEM encoded certificate for client auth
+- `endpoints` (List of String, Sensitive) A list of endpoints where the terraform provider will point to, this must include the http(s) schema and port number.
+- `es_client_authentication` (String, Sensitive) ES Client Authentication field to be used with the JWT token
+- `headers` (Map of String, Sensitive) A list of headers to be sent with each request to Elasticsearch.
+- `insecure` (Boolean) Disable TLS certificate validation
+- `key_data` (String, Sensitive) PEM encoded private key for client auth
+- `key_file` (String) Path to a file containing the PEM encoded private key for client auth
+- `password` (String, Sensitive) Password to use for API authentication to Elasticsearch.
+- `username` (String) Username to use for API authentication to Elasticsearch.
+
+
+<a id="nestedatt--timeouts"></a>
+### Nested Schema for `timeouts`
+
+Optional:
+
+- `create` (String) A string that can be [parsed as a duration](https://pkg.go.dev/time#ParseDuration) consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
+- `update` (String) A string that can be [parsed as a duration](https://pkg.go.dev/time#ParseDuration) consisting of numbers and unit suffixes, such as "30s" or "2h45m". Valid time units are "s" (seconds), "m" (minutes), "h" (hours).
+
+## Import
+
+Import is supported using the following syntax:
+
+The [`terraform import` command](https://developer.hashicorp.com/terraform/cli/commands/import) can be used, for example:
+
+```shell
+terraform import elasticstack_elasticsearch_ml_datafeed_state.example my-datafeed-id
+```
diff --git a/examples/resources/elasticstack_elasticsearch_ml_datafeed_state/import.sh b/examples/resources/elasticstack_elasticsearch_ml_datafeed_state/import.sh
@@ -0,0 +1 @@
+terraform import elasticstack_elasticsearch_ml_datafeed_state.example my-datafeed-id
diff --git a/examples/resources/elasticstack_elasticsearch_ml_datafeed_state/resource.tf b/examples/resources/elasticstack_elasticsearch_ml_datafeed_state/resource.tf
@@ -0,0 +1,113 @@
+## The following resources setup a realtime ML datafeed.
+resource "elasticstack_elasticsearch_index" "ml_datafeed_index" {
+  name = "ml-datafeed-data"
+  mappings = jsonencode({
+    properties = {
+      "@timestamp" = {
+        type = "date"
+      }
+      value = {
+        type = "double"
+      }
+      user = {
+        type = "keyword"
+      }
+    }
+  })
+}
+
+resource "elasticstack_elasticsearch_ml_anomaly_detection_job" "example" {
+  job_id      = "example-anomaly-job"
+  description = "Example anomaly detection job"
+
+  analysis_config {
+    bucket_span = "15m"
+    detectors {
+      function      = "mean"
+      field_name    = "value"
+      by_field_name = "user"
+    }
+  }
+
+  data_description {
+    time_field = "@timestamp"
+  }
+}
+
+resource "elasticstack_elasticsearch_ml_datafeed" "example" {
+  datafeed_id = "example-datafeed"
+  job_id      = elasticstack_elasticsearch_ml_anomaly_detection_job.example.job_id
+  indices     = [elasticstack_elasticsearch_index.ml_datafeed_index.name]
+
+  query = jsonencode({
+    bool = {
+      must = [
+        {
+          range = {
+            "@timestamp" = {
+              gte = "now-7d"
+            }
+          }
+        }
+      ]
+    }
+  })
+}
+
+resource "elasticstack_elasticsearch_ml_datafeed_state" "example" {
+  datafeed_id = elasticstack_elasticsearch_ml_datafeed.example.datafeed_id
+  state       = "started"
+  force       = false
+}
+
+## A non-realtime datafeed will automatically stop once all data has been processed.
+## It's recommended to ignore changes to the `state` attribute via the resource lifecycle for such datafeeds.
+
+resource "elasticstack_elasticsearch_ml_anomaly_detection_job" "non-realtime" {
+  job_id      = "non-realtime-anomaly-job"
+  description = "Test job for datafeed state testing with time range"
+  analysis_config = {
+    bucket_span = "1h"
+    detectors = [{
+      function             = "count"
+      detector_description = "count"
+    }]
+  }
+  data_description = {
+    time_field  = "@timestamp"
+    time_format = "epoch_ms"
+  }
+  analysis_limits = {
+    model_memory_limit = "10mb"
+  }
+}
+
+resource "elasticstack_elasticsearch_ml_job_state" "non-realtime" {
+  job_id = elasticstack_elasticsearch_ml_anomaly_detection_job.non-realtime.job_id
+  state  = "opened"
+
+  lifecycle {
+    ignore_changes = ["state"]
+  }
+}
+
+resource "elasticstack_elasticsearch_ml_datafeed" "non-realtime" {
+  datafeed_id = "non-realtime-datafeed"
+  job_id      = elasticstack_elasticsearch_ml_anomaly_detection_job.non-realtime.job_id
+  indices     = [elasticstack_elasticsearch_index.ml_datafeed_index.name]
+  query = jsonencode({
+    match_all = {}
+  })
+}
+
+resource "elasticstack_elasticsearch_ml_datafeed_state" "non-realtime" {
+  datafeed_id      = elasticstack_elasticsearch_ml_datafeed.non-realtime.datafeed_id
+  state            = "started"
+  start            = "2024-01-01T00:00:00Z"
+  end              = "2024-01-02T00:00:00Z"
+  datafeed_timeout = "60s"
+
+  lifecycle {
+    ignore_changes = ["state"]
+  }
+}
diff --git a/go.mod b/go.mod
@@ -14,6 +14,7 @@ require (
 	github.com/hashicorp/terraform-plugin-framework v1.16.1
 	github.com/hashicorp/terraform-plugin-framework-jsontypes v0.2.0
 	github.com/hashicorp/terraform-plugin-framework-timeouts v0.7.0
+	github.com/hashicorp/terraform-plugin-framework-timetypes v0.5.0
 	github.com/hashicorp/terraform-plugin-framework-validators v0.19.0
 	github.com/hashicorp/terraform-plugin-go v0.29.0
 	github.com/hashicorp/terraform-plugin-log v0.9.0

diff --git a/go.sum b/go.sum
@@ -617,6 +617,8 @@ github.com/hashicorp/terraform-plugin-framework-jsontypes v0.2.0 h1:SJXL5FfJJm17
 github.com/hashicorp/terraform-plugin-framework-jsontypes v0.2.0/go.mod h1:p0phD0IYhsu9bR4+6OetVvvH59I6LwjXGnTVEr8ox6E=
 github.com/hashicorp/terraform-plugin-framework-timeouts v0.7.0 h1:jblRy1PkLfPm5hb5XeMa3tezusnMRziUGqtT5epSYoI=
 github.com/hashicorp/terraform-plugin-framework-timeouts v0.7.0/go.mod h1:5jm2XK8uqrdiSRfD5O47OoxyGMCnwTcl8eoiDgSa+tc=
+github.com/hashicorp/terraform-plugin-framework-timetypes v0.5.0 h1:v3DapR8gsp3EM8fKMh6up9cJUFQ2iRaFsYLP8UJnCco=
+github.com/hashicorp/terraform-plugin-framework-timetypes v0.5.0/go.mod h1:c3PnGE9pHBDfdEVG9t1S1C9ia5LW+gkFR0CygXlM8ak=
 github.com/hashicorp/terraform-plugin-framework-validators v0.19.0 h1:Zz3iGgzxe/1XBkooZCewS0nJAaCFPFPHdNJd8FgE4Ow=
 github.com/hashicorp/terraform-plugin-framework-validators v0.19.0/go.mod h1:GBKTNGbGVJohU03dZ7U8wHqc2zYnMUawgCN+gC0itLc=
 github.com/hashicorp/terraform-plugin-go v0.29.0 h1:1nXKl/nSpaYIUBU1IG/EsDOX0vv+9JxAltQyDMpq5mU=

diff --git a/internal/clients/elasticsearch/ml_job.go b/internal/clients/elasticsearch/ml_job.go
@@ -15,25 +15,6 @@ import (
 	"github.com/hashicorp/terraform-plugin-framework/diag"
 )
 
-// MLJobStats represents the statistics structure for an ML job
-type MLJobStats struct {
-	Jobs []MLJob `json:"jobs"`
-}
-
-// MLJob represents a single ML job in the stats response
-type MLJob struct {
-	JobId string     `json:"job_id"`
-	State string     `json:"state"`
-	Node  *MLJobNode `json:"node,omitempty"`
-}
-
-// MLJobNode represents the node information for an ML job
-type MLJobNode struct {
-	Id         string                 `json:"id"`
-	Name       string                 `json:"name"`
-	Attributes map[string]interface{} `json:"attributes"`
-}
-
 // OpenMLJob opens a machine learning job
 func OpenMLJob(ctx context.Context, apiClient *clients.ApiClient, jobId string) diag.Diagnostics {
 	var diags diag.Diagnostics
@@ -120,7 +101,7 @@ func CloseMLJob(ctx context.Context, apiClient *clients.ApiClient, jobId string,
 }
 
 // GetMLJobStats retrieves the stats for a specific machine learning job
-func GetMLJobStats(ctx context.Context, apiClient *clients.ApiClient, jobId string) (*MLJob, diag.Diagnostics) {
+func GetMLJobStats(ctx context.Context, apiClient *clients.ApiClient, jobId string) (*models.MLJob, diag.Diagnostics) {
 	var diags diag.Diagnostics
 
 	esClient, err := apiClient.GetESClient()
@@ -148,8 +129,7 @@ func GetMLJobStats(ctx context.Context, apiClient *clients.ApiClient, jobId stri
 	if diags.HasError() {
 		return nil, diags
 	}
-
-	var jobStats MLJobStats
+	var jobStats models.MLJobStats
 	if err := json.NewDecoder(res.Body).Decode(&jobStats); err != nil {
 		diags.AddError("Failed to decode ML job stats response", err.Error())
 		return nil, diags
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		terraform import elasticstack_elasticsearch_ml_datafeed_state.example my-datafeed-id