Skip to content

Commit 033145a

Browse files
[Bug] Add synthetic properties check to remote ESQL validation (#5308)
* Add synthetic properties check * Add additional unit test for schema conflicts
1 parent f184b0a commit 033145a

File tree

3 files changed

+41
-14
lines changed

3 files changed

+41
-14
lines changed

detection_rules/index_mappings.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -160,25 +160,33 @@ def get_simulated_index_template_mappings(elastic_client: Elasticsearch, name: s
160160

161161

162162
def prune_mappings_of_unsupported_types(
163-
integration: str, stream: str, stream_mappings: dict[str, Any], log: Callable[[str], None]
163+
debug_str_data_source: str, stream_mappings: dict[str, Any], log: Callable[[str], None]
164164
) -> dict[str, Any]:
165165
"""Prune fields with unsupported types (ES|QL) from the provided mappings."""
166166
nested_multifields = find_nested_multifields(stream_mappings)
167167
for field in nested_multifields:
168-
field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields"
168+
parts = str(field).split(".fields.")[0].split(".")
169+
base_name = ".properties.".join(parts)
170+
field_name = f"{base_name}.fields"
169171
log(
170-
f"Warning: Nested multi-field `{field}` found in `{integration}-{stream}`. "
172+
f"Warning: Nested multi-field `{field}` found in `{debug_str_data_source}`. "
171173
f"Removing parent field from schema for ES|QL validation."
172174
)
173175
delete_nested_key_from_dict(stream_mappings, field_name)
174176
nested_flattened_fields = find_flattened_fields_with_subfields(stream_mappings)
175177
for field in nested_flattened_fields:
176-
field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields"
178+
# Remove both .fields and .properties entries for flattened fields
179+
# .properties entries can occur when being merged with non-ecs or custom schemas
180+
parts = str(field).split(".fields.")[0].split(".")
181+
base_name = ".properties.".join(parts)
182+
field_name = f"{base_name}.fields"
183+
property_name = f"{base_name}.properties"
177184
log(
178-
f"Warning: flattened field `{field}` found in `{integration}-{stream}` with sub fields. "
185+
f"Warning: flattened field `{field}` found in `{debug_str_data_source}` with sub fields. "
179186
f"Removing parent field from schema for ES|QL validation."
180187
)
181188
delete_nested_key_from_dict(stream_mappings, field_name)
189+
delete_nested_key_from_dict(stream_mappings, property_name)
182190
return stream_mappings
183191

184192

@@ -222,7 +230,7 @@ def prepare_integration_mappings( # noqa: PLR0913
222230
for stream in package_schema:
223231
flat_schema = package_schema[stream]
224232
stream_mappings = flat_schema_to_index_mapping(flat_schema)
225-
stream_mappings = prune_mappings_of_unsupported_types(integration, stream, stream_mappings, log)
233+
stream_mappings = prune_mappings_of_unsupported_types(f"{integration}-{stream}", stream_mappings, log)
226234
utils.combine_dicts(integration_mappings, deepcopy(stream_mappings))
227235
index_lookup[f"{integration}-{stream}"] = stream_mappings
228236

@@ -246,12 +254,13 @@ def get_index_to_package_lookup(indices: list[str], index_lookup: dict[str, Any]
246254
return index_lookup_indices
247255

248256

249-
def get_filtered_index_schema(
257+
def get_filtered_index_schema( # noqa: PLR0913
250258
indices: list[str],
251259
index_lookup: dict[str, Any],
252260
ecs_schema: dict[str, Any],
253261
non_ecs_mapping: dict[str, Any],
254262
custom_mapping: dict[str, Any],
263+
log: Callable[[str], None],
255264
) -> tuple[dict[str, Any], dict[str, Any]]:
256265
"""Check if the provided indices are known based on the integration format. Returns the combined schema."""
257266

@@ -304,7 +313,7 @@ def get_filtered_index_schema(
304313
# Need to use a merge here to not overwrite existing fields
305314
utils.combine_dicts(base, deepcopy(non_ecs_mapping.get(match, {})))
306315
utils.combine_dicts(base, deepcopy(custom_mapping.get(match, {})))
307-
filtered_index_lookup[match] = base
316+
filtered_index_lookup[match] = prune_mappings_of_unsupported_types(match, base, log)
308317
utils.combine_dicts(combined_mappings, deepcopy(base))
309318

310319
# Reduce the index lookup to only the matched indices (remote/Kibana schema validation source of truth)
@@ -403,7 +412,7 @@ def find_nested_multifields(mapping: dict[str, Any], path: str = "") -> list[Any
403412

404413

405414
def find_flattened_fields_with_subfields(mapping: dict[str, Any], path: str = "") -> list[str]:
406-
"""Recursively search for fields of type 'flattened' that have a 'fields' key in Elasticsearch mappings."""
415+
"""Recursively search for type 'flattened' that have a 'fields' or 'properties' key in Elasticsearch mappings."""
407416
flattened_fields_with_subfields: list[str] = []
408417

409418
for field, properties in mapping.items():
@@ -413,6 +422,9 @@ def find_flattened_fields_with_subfields(mapping: dict[str, Any], path: str = ""
413422
# Check if the field is of type 'flattened' and has a 'fields' key
414423
if properties.get("type") == "flattened" and "fields" in properties: # type: ignore[reportUnknownVariableType]
415424
flattened_fields_with_subfields.append(current_path) # type: ignore[reportUnknownVariableType]
425+
# Check if the field is of type 'flattened' and has a 'properties' key
426+
if properties.get("type") == "flattened" and "properties" in properties: # type: ignore[reportUnknownVariableType]
427+
flattened_fields_with_subfields.append(current_path) # type: ignore[reportUnknownVariableType]
416428

417429
# Recurse into subfields
418430
if "properties" in properties:
@@ -487,8 +499,7 @@ def prepare_mappings( # noqa: PLR0913
487499
# and also at a per index level as custom schemas can override non-ecs fields and/or indices
488500
non_ecs_schema = ecs.flatten(non_ecs_schema)
489501
non_ecs_schema = utils.convert_to_nested_schema(non_ecs_schema)
490-
non_ecs_schema = prune_mappings_of_unsupported_types("non-ecs", "non-ecs", non_ecs_schema, log)
491-
non_ecs_mapping = prune_mappings_of_unsupported_types("non-ecs", "non-ecs", non_ecs_mapping, log)
502+
non_ecs_schema = prune_mappings_of_unsupported_types("non-ecs", non_ecs_schema, log)
492503

493504
# Load custom schema and convert to index mapping format (nested schema)
494505
custom_mapping: dict[str, Any] = {}
@@ -498,15 +509,14 @@ def prepare_mappings( # noqa: PLR0913
498509
index_mapping = ecs.flatten(index_mapping)
499510
index_mapping = utils.convert_to_nested_schema(index_mapping)
500511
custom_mapping.update({index: index_mapping})
501-
custom_mapping = prune_mappings_of_unsupported_types("custom", "custom", custom_mapping, log)
502512

503513
# Load ECS in an index mapping format (nested schema)
504514
current_version = Version.parse(load_current_package_version(), optional_minor_and_patch=True)
505515
ecs_schema = get_ecs_schema_mappings(current_version)
506516

507517
# Filter combined mappings based on the provided indices
508518
combined_mappings, index_lookup = get_filtered_index_schema(
509-
indices, index_lookup, ecs_schema, non_ecs_mapping, custom_mapping
519+
indices, index_lookup, ecs_schema, non_ecs_mapping, custom_mapping, log
510520
)
511521

512522
index_lookup.update({"rule-ecs-index": ecs_schema})

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "detection_rules"
3-
version = "1.5.8"
3+
version = "1.5.9"
44
description = "Detection Rules is the home for rules used by Elastic Security. This repository is used for the development, maintenance, testing, validation, and release of rules for Elastic Security’s Detection Engine."
55
readme = "README.md"
66
requires-python = ">=3.12"

tests/test_rules_remote.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,3 +218,20 @@ def test_esql_filtered_keep(self):
218218
"""
219219
with pytest.raises(EsqlSchemaError):
220220
_ = RuleCollection().load_dict(production_rule)
221+
222+
def test_esql_non_ecs_schema_conflict_resolution(self):
223+
"""Test an ESQL rule that has a known conflict between non_ecs and integrations for correct handling."""
224+
file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"])
225+
original_production_rule = load_rule_contents(file_path)
226+
production_rule = deepcopy(original_production_rule)[0]
227+
production_rule["metadata"]["integration"] = ["azure", "o365"]
228+
production_rule["rule"]["query"] = """
229+
from logs-azure.signinlogs-* metadata _id, _version, _index
230+
| where @timestamp > now() - 30 minutes
231+
and event.dataset in ("azure.signinlogs")
232+
and event.outcome == "success"
233+
and azure.signinlogs.properties.user_id is not null
234+
| keep
235+
event.outcome
236+
"""
237+
_ = RuleCollection().load_dict(production_rule)

0 commit comments

Comments
 (0)