Skip to content

Commit 29d4aeb

Browse files
eric-forte-elasticshashank-elasticMikaayenson
authored
[Bug] [DAC] Auto Gen Schema Fails on Certain Subqueries (#5256)
* Add alignment checking for sub-queries * Allow field to be over written with original field * Update rule prompt to allow for int 0 values * Support custom schema index overwrite --------- Co-authored-by: shashank-elastic <91139415+shashank-elastic@users.noreply.github.com> Co-authored-by: Mika Ayenson, PhD <Mikaayenson@users.noreply.github.com>
1 parent 700443b commit 29d4aeb

File tree

6 files changed

+89
-50
lines changed

6 files changed

+89
-50
lines changed

detection_rules/cli_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,11 @@ def _convert_type(_val: Any) -> Any:
9292
)
9393

9494
while True:
95-
result = value or input(prompt) or default
95+
result = value if value is not None else input(prompt) or default
9696
if result == "n/a":
9797
result = None
9898

99-
if not result:
99+
if result is None:
100100
if is_required:
101101
value = None
102102
continue
@@ -318,7 +318,7 @@ def rule_prompt( # noqa: PLR0912, PLR0913, PLR0915
318318
contents[name] = threat_map
319319
continue
320320

321-
if kwargs.get(name):
321+
if name in kwargs:
322322
contents[name] = schema_prompt(name, value=kwargs.pop(name))
323323
continue
324324

detection_rules/etc/custom-consolidated-rules.ndjson

Lines changed: 11 additions & 10 deletions
Large diffs are not rendered by default.

detection_rules/index_mappings.py

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,29 @@ def get_simulated_index_template_mappings(elastic_client: Elasticsearch, name: s
159159
return template["template"]["mappings"]["properties"]
160160

161161

162+
def prune_mappings_of_unsupported_types(
163+
integration: str, stream: str, stream_mappings: dict[str, Any], log: Callable[[str], None]
164+
) -> dict[str, Any]:
165+
"""Prune fields with unsupported types (ES|QL) from the provided mappings."""
166+
nested_multifields = find_nested_multifields(stream_mappings)
167+
for field in nested_multifields:
168+
field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields"
169+
log(
170+
f"Warning: Nested multi-field `{field}` found in `{integration}-{stream}`. "
171+
f"Removing parent field from schema for ES|QL validation."
172+
)
173+
delete_nested_key_from_dict(stream_mappings, field_name)
174+
nested_flattened_fields = find_flattened_fields_with_subfields(stream_mappings)
175+
for field in nested_flattened_fields:
176+
field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields"
177+
log(
178+
f"Warning: flattened field `{field}` found in `{integration}-{stream}` with sub fields. "
179+
f"Removing parent field from schema for ES|QL validation."
180+
)
181+
delete_nested_key_from_dict(stream_mappings, field_name)
182+
return stream_mappings
183+
184+
162185
def prepare_integration_mappings( # noqa: PLR0913
163186
rule_integrations: list[str],
164187
event_dataset_integrations: list[EventDataset],
@@ -199,22 +222,7 @@ def prepare_integration_mappings( # noqa: PLR0913
199222
for stream in package_schema:
200223
flat_schema = package_schema[stream]
201224
stream_mappings = flat_schema_to_index_mapping(flat_schema)
202-
nested_multifields = find_nested_multifields(stream_mappings)
203-
for field in nested_multifields:
204-
field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields"
205-
log(
206-
f"Warning: Nested multi-field `{field}` found in `{integration}-{stream}`. "
207-
f"Removing parent field from schema for ES|QL validation."
208-
)
209-
delete_nested_key_from_dict(stream_mappings, field_name)
210-
nested_flattened_fields = find_flattened_fields_with_subfields(stream_mappings)
211-
for field in nested_flattened_fields:
212-
field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields"
213-
log(
214-
f"Warning: flattened field `{field}` found in `{integration}-{stream}` with sub fields. "
215-
f"Removing parent field from schema for ES|QL validation."
216-
)
217-
delete_nested_key_from_dict(stream_mappings, field_name)
225+
stream_mappings = prune_mappings_of_unsupported_types(integration, stream, stream_mappings, log)
218226
utils.combine_dicts(integration_mappings, deepcopy(stream_mappings))
219227
index_lookup[f"{integration}-{stream}"] = stream_mappings
220228

@@ -285,14 +293,19 @@ def get_filtered_index_schema(
285293
filtered_index_lookup = {
286294
key.replace("logs-endpoint.", "logs-endpoint.events."): value for key, value in filtered_index_lookup.items()
287295
}
288-
filtered_index_lookup.update(non_ecs_mapping)
289-
filtered_index_lookup.update(custom_mapping)
290296

291297
# Reduce the combined mappings to only the matched indices (local schema validation source of truth)
298+
# Custom and non-ecs mappings are filtered before being sent to this function in prepare mappings
292299
combined_mappings: dict[str, Any] = {}
293300
utils.combine_dicts(combined_mappings, deepcopy(ecs_schema))
294301
for match in matches:
295-
utils.combine_dicts(combined_mappings, deepcopy(filtered_index_lookup.get(match, {})))
302+
base = filtered_index_lookup.get(match, {})
303+
# Update filtered index with non-ecs and custom mappings
304+
# Need to use a merge here to not overwrite existing fields
305+
utils.combine_dicts(base, deepcopy(non_ecs_mapping.get(match, {})))
306+
utils.combine_dicts(base, deepcopy(custom_mapping.get(match, {})))
307+
filtered_index_lookup[match] = base
308+
utils.combine_dicts(combined_mappings, deepcopy(base))
296309

297310
# Reduce the index lookup to only the matched indices (remote/Kibana schema validation source of truth)
298311
filtered_index_mapping: dict[str, Any] = {}
@@ -458,20 +471,34 @@ def prepare_mappings( # noqa: PLR0913
458471
index_lookup.update(integration_index_lookup)
459472

460473
# Load non-ecs schema and convert to index mapping format (nested schema)
474+
# For non_ecs we need both a mapping and a schema as custom schemas can override non-ecs fields
475+
# In these cases we need to accept the overwrite keep the original non-ecs field in the schema
476+
non_ecs_schema: dict[str, Any] = {}
461477
non_ecs_mapping: dict[str, Any] = {}
462478
non_ecs = ecs.get_non_ecs_schema()
463479
for index in indices:
464-
non_ecs_mapping.update(non_ecs.get(index, {}))
465-
non_ecs_mapping = ecs.flatten(non_ecs_mapping)
466-
non_ecs_mapping = utils.convert_to_nested_schema(non_ecs_mapping)
480+
index_mapping = non_ecs.get(index, {})
481+
non_ecs_schema.update(index_mapping)
482+
index_mapping = ecs.flatten(index_mapping)
483+
index_mapping = utils.convert_to_nested_schema(index_mapping)
484+
non_ecs_mapping.update({index: index_mapping})
485+
486+
# These need to be handled separately as we need to be able to validate non-ecs fields as a whole
487+
# and also at a per index level as custom schemas can override non-ecs fields and/or indices
488+
non_ecs_schema = ecs.flatten(non_ecs_schema)
489+
non_ecs_schema = utils.convert_to_nested_schema(non_ecs_schema)
490+
non_ecs_schema = prune_mappings_of_unsupported_types("non-ecs", "non-ecs", non_ecs_schema, log)
491+
non_ecs_mapping = prune_mappings_of_unsupported_types("non-ecs", "non-ecs", non_ecs_mapping, log)
467492

468493
# Load custom schema and convert to index mapping format (nested schema)
469494
custom_mapping: dict[str, Any] = {}
470495
custom_indices = ecs.get_custom_schemas()
471496
for index in indices:
472-
custom_mapping.update(custom_indices.get(index, {}))
473-
custom_mapping = ecs.flatten(custom_mapping)
474-
custom_mapping = utils.convert_to_nested_schema(custom_mapping)
497+
index_mapping = custom_indices.get(index, {})
498+
index_mapping = ecs.flatten(index_mapping)
499+
index_mapping = utils.convert_to_nested_schema(index_mapping)
500+
custom_mapping.update({index: index_mapping})
501+
custom_mapping = prune_mappings_of_unsupported_types("custom", "custom", custom_mapping, log)
475502

476503
# Load ECS in an index mapping format (nested schema)
477504
current_version = Version.parse(load_current_package_version(), optional_minor_and_patch=True)
@@ -484,8 +511,9 @@ def prepare_mappings( # noqa: PLR0913
484511

485512
index_lookup.update({"rule-ecs-index": ecs_schema})
486513

487-
if (not integration_mappings or existing_mappings) and not non_ecs_mapping and not ecs_schema:
514+
if (not integration_mappings or existing_mappings) and not non_ecs_schema and not ecs_schema:
488515
raise ValueError("No mappings found")
489-
index_lookup.update({"rule-non-ecs-index": non_ecs_mapping})
516+
index_lookup.update({"rule-non-ecs-index": non_ecs_schema})
517+
utils.combine_dicts(combined_mappings, deepcopy(non_ecs_schema))
490518

491519
return existing_mappings, index_lookup, combined_mappings

detection_rules/rule_validators.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,13 @@ def text_fields(self, eql_schema: ecs.KqlSchema2Eql | endgame.EndgameSchema) ->
373373
def unique_fields(self) -> list[str]: # type: ignore[reportIncompatibleMethodOverride]
374374
return list({str(f) for f in self.ast if isinstance(f, eql.ast.Field)}) # type: ignore[reportUnknownVariableType]
375375

376-
def auto_add_field(self, validation_checks_error: eql.EqlParseError, index_or_dataview: str) -> None:
376+
def auto_add_field(
377+
self, validation_checks_error: eql.EqlParseError, index_or_dataview: str, field: str | None = None
378+
) -> None:
377379
"""Auto add a missing field to the schema."""
378-
field_name = extract_error_field(self.query, validation_checks_error)
380+
field_name = field
381+
if not field:
382+
field_name = extract_error_field(self.query, validation_checks_error)
379383
if not field_name:
380384
raise ValueError("No field name found")
381385
field_type = ecs.get_all_flattened_schema().get(field_name)
@@ -584,6 +588,8 @@ def add_stack_targets(query_text: str, include_endgame: bool) -> None:
584588

585589
def validate(self, data: "QueryRuleData", meta: RuleMeta, max_attempts: int = 10) -> None: # type: ignore[reportIncompatibleMethodOverride]
586590
"""Validate an EQL query using a unified plan of schema combinations."""
591+
# base field declaration
592+
field = None
587593
if meta.query_schema_validation is False or meta.maturity == "deprecated":
588594
return
589595

@@ -606,7 +612,7 @@ def validate(self, data: "QueryRuleData", meta: RuleMeta, max_attempts: int = 10
606612
)
607613
first_error: EQL_ERROR_TYPES | ValueError | None = None
608614
for t in ordered_targets:
609-
exc = self.validate_query_text_with_schema(
615+
exc, field = self.validate_query_text_with_schema(
610616
t.query_text,
611617
t.schema,
612618
err_trailer=t.err_trailer,
@@ -629,7 +635,7 @@ def validate(self, data: "QueryRuleData", meta: RuleMeta, max_attempts: int = 10
629635
and RULES_CONFIG.auto_gen_schema_file
630636
and data.index_or_dataview
631637
):
632-
self.auto_add_field(first_error, data.index_or_dataview[0]) # type: ignore[reportArgumentType]
638+
self.auto_add_field(first_error, data.index_or_dataview[0], field=field) # type: ignore[reportArgumentType]
633639
continue
634640

635641
# Raise the enriched parse error (includes target trailer + metadata)
@@ -645,7 +651,7 @@ def validate_query_text_with_schema( # noqa: PLR0913
645651
min_stack_version: str,
646652
beat_types: list[str] | None = None,
647653
integration_types: list[str] | None = None,
648-
) -> EQL_ERROR_TYPES | ValueError | None:
654+
) -> tuple[EQL_ERROR_TYPES | ValueError | None, str | None]:
649655
"""Validate the provided EQL query text against the schema (variant of validate_query_with_schema)."""
650656
try:
651657
config = set_eql_config(min_stack_version)
@@ -657,13 +663,16 @@ def validate_query_text_with_schema( # noqa: PLR0913
657663
# If the error is an unknown field and the field was referenced as optional (prefixed with '?'),
658664
# treat this target as non-fatal to honor EQL optional semantics.
659665

666+
# To support EQL sequence and sub query validation we need to return this field to overwrite
667+
# what would have been parsed via auto_add_field as the error message and query may be out of sync
668+
# depending on how the method is called.
660669
field = extract_error_field(query_text, exc)
661670
if (
662671
field
663672
and ("Unknown field" in message or "Field not recognized" in message)
664673
and f"?{field}" in self.query
665674
):
666-
return None
675+
return None, field
667676
if "Unknown field" in message and beat_types:
668677
trailer_parts.insert(0, "Try adding event.module or event.dataset to specify beats module")
669678
elif "Field not recognized" in message and isinstance(schema, ecs.KqlSchema2Eql):
@@ -691,10 +700,11 @@ def validate_query_text_with_schema( # noqa: PLR0913
691700
exc.source, # type: ignore[reportUnknownArgumentType]
692701
len(exc.caret.lstrip()),
693702
trailer=trailer,
694-
)
703+
), field
695704
except Exception as exc: # noqa: BLE001
696705
print(err_trailer)
697-
return exc # type: ignore[reportReturnType]
706+
return exc, None # type: ignore[reportReturnType]
707+
return None, None
698708

699709
def validate_rule_type_configurations(self, data: EQLRuleData, meta: RuleMeta) -> tuple[list[str], bool]:
700710
"""Validate EQL rule type configurations (timestamp_field, event_category_override, tiebreaker_field).

detection_rules/schemas/definitions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def validator_wrapper(value: Any) -> Any:
245245
list[NonEmptyStr], fields.List(NON_EMPTY_STRING_FIELD, validate=validate.Length(min=1, max=3))
246246
]
247247
PositiveInteger = Annotated[int, fields.Integer(validate=validate.Range(min=1))]
248-
RiskScore = Annotated[int, fields.Integer(validate=validate.Range(min=1, max=100))]
248+
RiskScore = Annotated[int, fields.Integer(validate=validate.Range(min=0, max=100))]
249249
RuleName = Annotated[str, fields.String(validate=elastic_rule_name_regexp(NAME_PATTERN))]
250250
SemVer = Annotated[str, fields.String(validate=validate.Regexp(VERSION_PATTERN))]
251251
SemVerMinorOnly = Annotated[str, fields.String(validate=validate.Regexp(MINOR_SEMVER))]

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "detection_rules"
3-
version = "1.5.7"
3+
version = "1.5.8"
44
description = "Detection Rules is the home for rules used by Elastic Security. This repository is used for the development, maintenance, testing, validation, and release of rules for Elastic Security’s Detection Engine."
55
readme = "README.md"
66
requires-python = ">=3.12"

0 commit comments

Comments
 (0)