[Bug] [DAC] Auto Gen Schema Fails on Certain Subqueries (#5256)

eric-forte-elastic · shashank-elastic · Mikaayenson · web-flow · commit 29d4aeb37aa3 · 2025-11-12T11:21:53.000-05:00
* Add alignment checking for sub-queries

* Allow field to be over written with original field

* Update rule prompt to allow for int 0 values

* Support custom schema index overwrite

---------

Co-authored-by: shashank-elastic &lt;91139415+shashank-elastic@users.noreply.github.com&gt;
Co-authored-by: Mika Ayenson, PhD &lt;Mikaayenson@users.noreply.github.com&gt;
diff --git a/detection_rules/cli_utils.py b/detection_rules/cli_utils.py
@@ -92,11 +92,11 @@ def _convert_type(_val: Any) -> Any:
     )
 
     while True:
-        result = value or input(prompt) or default
+        result = value if value is not None else input(prompt) or default
         if result == "n/a":
             result = None
 
-        if not result:
+        if result is None:
             if is_required:
                 value = None
                 continue
@@ -318,7 +318,7 @@ def rule_prompt(  # noqa: PLR0912, PLR0913, PLR0915
                 contents[name] = threat_map
             continue
 
-        if kwargs.get(name):
+        if name in kwargs:
             contents[name] = schema_prompt(name, value=kwargs.pop(name))
             continue
 
diff --git a/detection_rules/etc/custom-consolidated-rules.ndjson b/detection_rules/etc/custom-consolidated-rules.ndjson
diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py
@@ -159,6 +159,29 @@ def get_simulated_index_template_mappings(elastic_client: Elasticsearch, name: s
     return template["template"]["mappings"]["properties"]
 
 
+def prune_mappings_of_unsupported_types(
+    integration: str, stream: str, stream_mappings: dict[str, Any], log: Callable[[str], None]
+) -> dict[str, Any]:
+    """Prune fields with unsupported types (ES|QL) from the provided mappings."""
+    nested_multifields = find_nested_multifields(stream_mappings)
+    for field in nested_multifields:
+        field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields"
+        log(
+            f"Warning: Nested multi-field `{field}` found in `{integration}-{stream}`. "
+            f"Removing parent field from schema for ES|QL validation."
+        )
+        delete_nested_key_from_dict(stream_mappings, field_name)
+    nested_flattened_fields = find_flattened_fields_with_subfields(stream_mappings)
+    for field in nested_flattened_fields:
+        field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields"
+        log(
+            f"Warning: flattened field `{field}` found in `{integration}-{stream}` with sub fields. "
+            f"Removing parent field from schema for ES|QL validation."
+        )
+        delete_nested_key_from_dict(stream_mappings, field_name)
+    return stream_mappings
+
+
 def prepare_integration_mappings(  # noqa: PLR0913
     rule_integrations: list[str],
     event_dataset_integrations: list[EventDataset],
@@ -199,22 +222,7 @@ def prepare_integration_mappings(  # noqa: PLR0913
         for stream in package_schema:
             flat_schema = package_schema[stream]
             stream_mappings = flat_schema_to_index_mapping(flat_schema)
-            nested_multifields = find_nested_multifields(stream_mappings)
-            for field in nested_multifields:
-                field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields"
-                log(
-                    f"Warning: Nested multi-field `{field}` found in `{integration}-{stream}`. "
-                    f"Removing parent field from schema for ES|QL validation."
-                )
-                delete_nested_key_from_dict(stream_mappings, field_name)
-            nested_flattened_fields = find_flattened_fields_with_subfields(stream_mappings)
-            for field in nested_flattened_fields:
-                field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields"
-                log(
-                    f"Warning: flattened field `{field}` found in `{integration}-{stream}` with sub fields. "
-                    f"Removing parent field from schema for ES|QL validation."
-                )
-                delete_nested_key_from_dict(stream_mappings, field_name)
+            stream_mappings = prune_mappings_of_unsupported_types(integration, stream, stream_mappings, log)
             utils.combine_dicts(integration_mappings, deepcopy(stream_mappings))
             index_lookup[f"{integration}-{stream}"] = stream_mappings
 
@@ -285,14 +293,19 @@ def get_filtered_index_schema(
     filtered_index_lookup = {
         key.replace("logs-endpoint.", "logs-endpoint.events."): value for key, value in filtered_index_lookup.items()
     }
-    filtered_index_lookup.update(non_ecs_mapping)
-    filtered_index_lookup.update(custom_mapping)
 
     # Reduce the combined mappings to only the matched indices (local schema validation source of truth)
+    # Custom and non-ecs mappings are filtered before being sent to this function in prepare mappings
     combined_mappings: dict[str, Any] = {}
     utils.combine_dicts(combined_mappings, deepcopy(ecs_schema))
     for match in matches:
-        utils.combine_dicts(combined_mappings, deepcopy(filtered_index_lookup.get(match, {})))
+        base = filtered_index_lookup.get(match, {})
+        # Update filtered index with non-ecs and custom mappings
+        # Need to use a merge here to not overwrite existing fields
+        utils.combine_dicts(base, deepcopy(non_ecs_mapping.get(match, {})))
+        utils.combine_dicts(base, deepcopy(custom_mapping.get(match, {})))
+        filtered_index_lookup[match] = base
+        utils.combine_dicts(combined_mappings, deepcopy(base))
 
     # Reduce the index lookup to only the matched indices (remote/Kibana schema validation source of truth)
     filtered_index_mapping: dict[str, Any] = {}
@@ -458,20 +471,34 @@ def prepare_mappings(  # noqa: PLR0913
     index_lookup.update(integration_index_lookup)
 
     # Load non-ecs schema and convert to index mapping format (nested schema)
+    # For non_ecs we need both a mapping and a schema as custom schemas can override non-ecs fields
+    # In these cases we need to accept the overwrite keep the original non-ecs field in the schema
+    non_ecs_schema: dict[str, Any] = {}
     non_ecs_mapping: dict[str, Any] = {}
     non_ecs = ecs.get_non_ecs_schema()
     for index in indices:
-        non_ecs_mapping.update(non_ecs.get(index, {}))
-    non_ecs_mapping = ecs.flatten(non_ecs_mapping)
-    non_ecs_mapping = utils.convert_to_nested_schema(non_ecs_mapping)
+        index_mapping = non_ecs.get(index, {})
+        non_ecs_schema.update(index_mapping)
+        index_mapping = ecs.flatten(index_mapping)
+        index_mapping = utils.convert_to_nested_schema(index_mapping)
+        non_ecs_mapping.update({index: index_mapping})
+
+    # These need to be handled separately as we need to be able to validate non-ecs fields as a whole
+    # and also at a per index level as custom schemas can override non-ecs fields and/or indices
+    non_ecs_schema = ecs.flatten(non_ecs_schema)
+    non_ecs_schema = utils.convert_to_nested_schema(non_ecs_schema)
+    non_ecs_schema = prune_mappings_of_unsupported_types("non-ecs", "non-ecs", non_ecs_schema, log)
+    non_ecs_mapping = prune_mappings_of_unsupported_types("non-ecs", "non-ecs", non_ecs_mapping, log)
 
     # Load custom schema and convert to index mapping format (nested schema)
     custom_mapping: dict[str, Any] = {}
     custom_indices = ecs.get_custom_schemas()
     for index in indices:
-        custom_mapping.update(custom_indices.get(index, {}))
-    custom_mapping = ecs.flatten(custom_mapping)
-    custom_mapping = utils.convert_to_nested_schema(custom_mapping)
+        index_mapping = custom_indices.get(index, {})
+        index_mapping = ecs.flatten(index_mapping)
+        index_mapping = utils.convert_to_nested_schema(index_mapping)
+        custom_mapping.update({index: index_mapping})
+    custom_mapping = prune_mappings_of_unsupported_types("custom", "custom", custom_mapping, log)
 
     # Load ECS in an index mapping format (nested schema)
     current_version = Version.parse(load_current_package_version(), optional_minor_and_patch=True)
@@ -484,8 +511,9 @@ def prepare_mappings(  # noqa: PLR0913
 
     index_lookup.update({"rule-ecs-index": ecs_schema})
 
-    if (not integration_mappings or existing_mappings) and not non_ecs_mapping and not ecs_schema:
+    if (not integration_mappings or existing_mappings) and not non_ecs_schema and not ecs_schema:
         raise ValueError("No mappings found")
-    index_lookup.update({"rule-non-ecs-index": non_ecs_mapping})
+    index_lookup.update({"rule-non-ecs-index": non_ecs_schema})
+    utils.combine_dicts(combined_mappings, deepcopy(non_ecs_schema))
 
     return existing_mappings, index_lookup, combined_mappings
diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py
@@ -373,9 +373,13 @@ def text_fields(self, eql_schema: ecs.KqlSchema2Eql | endgame.EndgameSchema) ->
     def unique_fields(self) -> list[str]:  # type: ignore[reportIncompatibleMethodOverride]
         return list({str(f) for f in self.ast if isinstance(f, eql.ast.Field)})  # type: ignore[reportUnknownVariableType]
 
-    def auto_add_field(self, validation_checks_error: eql.EqlParseError, index_or_dataview: str) -> None:
+    def auto_add_field(
+        self, validation_checks_error: eql.EqlParseError, index_or_dataview: str, field: str | None = None
+    ) -> None:
         """Auto add a missing field to the schema."""
-        field_name = extract_error_field(self.query, validation_checks_error)
+        field_name = field
+        if not field:
+            field_name = extract_error_field(self.query, validation_checks_error)
         if not field_name:
             raise ValueError("No field name found")
         field_type = ecs.get_all_flattened_schema().get(field_name)
@@ -584,6 +588,8 @@ def add_stack_targets(query_text: str, include_endgame: bool) -> None:
 
     def validate(self, data: "QueryRuleData", meta: RuleMeta, max_attempts: int = 10) -> None:  # type: ignore[reportIncompatibleMethodOverride]
         """Validate an EQL query using a unified plan of schema combinations."""
+        # base field declaration
+        field = None
         if meta.query_schema_validation is False or meta.maturity == "deprecated":
             return
 
@@ -606,7 +612,7 @@ def validate(self, data: "QueryRuleData", meta: RuleMeta, max_attempts: int = 10
             )
             first_error: EQL_ERROR_TYPES | ValueError | None = None
             for t in ordered_targets:
-                exc = self.validate_query_text_with_schema(
+                exc, field = self.validate_query_text_with_schema(
                     t.query_text,
                     t.schema,
                     err_trailer=t.err_trailer,
@@ -629,7 +635,7 @@ def validate(self, data: "QueryRuleData", meta: RuleMeta, max_attempts: int = 10
                 and RULES_CONFIG.auto_gen_schema_file
                 and data.index_or_dataview
             ):
-                self.auto_add_field(first_error, data.index_or_dataview[0])  # type: ignore[reportArgumentType]
+                self.auto_add_field(first_error, data.index_or_dataview[0], field=field)  # type: ignore[reportArgumentType]
                 continue
 
             # Raise the enriched parse error (includes target trailer + metadata)
@@ -645,7 +651,7 @@ def validate_query_text_with_schema(  # noqa: PLR0913
         min_stack_version: str,
         beat_types: list[str] | None = None,
         integration_types: list[str] | None = None,
-    ) -> EQL_ERROR_TYPES | ValueError | None:
+    ) -> tuple[EQL_ERROR_TYPES | ValueError | None, str | None]:
         """Validate the provided EQL query text against the schema (variant of validate_query_with_schema)."""
         try:
             config = set_eql_config(min_stack_version)
@@ -657,13 +663,16 @@ def validate_query_text_with_schema(  # noqa: PLR0913
             # If the error is an unknown field and the field was referenced as optional (prefixed with '?'),
             # treat this target as non-fatal to honor EQL optional semantics.
 
+            # To support EQL sequence and sub query validation we need to return this field to overwrite
+            # what would have been parsed via auto_add_field as the error message and query may be out of sync
+            # depending on how the method is called.
             field = extract_error_field(query_text, exc)
             if (
                 field
                 and ("Unknown field" in message or "Field not recognized" in message)
                 and f"?{field}" in self.query
             ):
-                return None
+                return None, field
             if "Unknown field" in message and beat_types:
                 trailer_parts.insert(0, "Try adding event.module or event.dataset to specify beats module")
             elif "Field not recognized" in message and isinstance(schema, ecs.KqlSchema2Eql):
@@ -691,10 +700,11 @@ def validate_query_text_with_schema(  # noqa: PLR0913
                 exc.source,  # type: ignore[reportUnknownArgumentType]
                 len(exc.caret.lstrip()),
                 trailer=trailer,
-            )
+            ), field
         except Exception as exc:  # noqa: BLE001
             print(err_trailer)
-            return exc  # type: ignore[reportReturnType]
+            return exc, None  # type: ignore[reportReturnType]
+        return None, None
 
     def validate_rule_type_configurations(self, data: EQLRuleData, meta: RuleMeta) -> tuple[list[str], bool]:
         """Validate EQL rule type configurations (timestamp_field, event_category_override, tiebreaker_field).
diff --git a/detection_rules/schemas/definitions.py b/detection_rules/schemas/definitions.py
@@ -245,7 +245,7 @@ def validator_wrapper(value: Any) -> Any:
     list[NonEmptyStr], fields.List(NON_EMPTY_STRING_FIELD, validate=validate.Length(min=1, max=3))
 ]
 PositiveInteger = Annotated[int, fields.Integer(validate=validate.Range(min=1))]
-RiskScore = Annotated[int, fields.Integer(validate=validate.Range(min=1, max=100))]
+RiskScore = Annotated[int, fields.Integer(validate=validate.Range(min=0, max=100))]
 RuleName = Annotated[str, fields.String(validate=elastic_rule_name_regexp(NAME_PATTERN))]
 SemVer = Annotated[str, fields.String(validate=validate.Regexp(VERSION_PATTERN))]
 SemVerMinorOnly = Annotated[str, fields.String(validate=validate.Regexp(MINOR_SEMVER))]
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "detection_rules"
-version = "1.5.7"
+version = "1.5.8"
 description = "Detection Rules is the home for rules used by Elastic Security. This repository is used for the development, maintenance, testing, validation, and release of rules for Elastic Security’s Detection Engine."
 readme = "README.md"
 requires-python = ">=3.12"

Original file line number	Diff line number	Diff line change
`@@ -245,7 +245,7 @@ def validator_wrapper(value: Any) -> Any:`
`245`	`245`	`list[NonEmptyStr], fields.List(NON_EMPTY_STRING_FIELD, validate=validate.Length(min=1, max=3))`
`246`	`246`	`]`
`247`	`247`	`PositiveInteger = Annotated[int, fields.Integer(validate=validate.Range(min=1))]`
`248`		`-RiskScore = Annotated[int, fields.Integer(validate=validate.Range(min=1, max=100))]`
	`248`	`+RiskScore = Annotated[int, fields.Integer(validate=validate.Range(min=0, max=100))]`
`249`	`249`	`RuleName = Annotated[str, fields.String(validate=elastic_rule_name_regexp(NAME_PATTERN))]`
`250`	`250`	`SemVer = Annotated[str, fields.String(validate=validate.Regexp(VERSION_PATTERN))]`
`251`	`251`	`SemVerMinorOnly = Annotated[str, fields.String(validate=validate.Regexp(MINOR_SEMVER))]`