From b62c43dbc292113f95b5db820d1f2033f29c3b8b Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Tue, 4 Nov 2025 14:23:52 +0100 Subject: [PATCH 1/2] Implement conversion from t-digest to exponential histograms --- .../mapper/ParsedHistogramConverter.java | 74 ++++++++++++++++++- .../mapper/ParsedHistogramConverterTests.java | 69 +++++++++++++++++ 2 files changed, 141 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java index 200222b844bfd..0b0867ecd206b 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java @@ -12,6 +12,8 @@ import java.util.ArrayList; import java.util.List; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; + public class ParsedHistogramConverter { /** @@ -42,6 +44,68 @@ public static HistogramParser.ParsedHistogram exponentialToTDigest(ExponentialHi return new HistogramParser.ParsedHistogram(centroids, counts); } + /** + * Converts t-digest histograms to exponential histograms, trying to do the inverse + * of {@link #exponentialToTDigest(ExponentialHistogramParser.ParsedExponentialHistogram)} + * as accurately as possible. + * + * @param tDigest the t-digest histogram to convert + * @return the resulting exponential histogram + */ + public static ExponentialHistogramParser.ParsedExponentialHistogram tDigestToExponential(HistogramParser.ParsedHistogram tDigest) { + List centroids = tDigest.values(); + List counts = tDigest.counts(); + + int numNegativeCentroids = 0; + while (numNegativeCentroids < centroids.size() && centroids.get(numNegativeCentroids) < 0) { + numNegativeCentroids++; + } + + // iterate negative centroids from closest to zero to furthest away, + // which corresponds to ascending exponential histogram bucket indices + int scale = MAX_SCALE; + List negativeBuckets = new ArrayList<>(); + for (int i = numNegativeCentroids - 1; i >= 0; i--) { + double centroid = centroids.get(i); + long count = counts.get(i); + assert centroid < 0; + appendCentroidWithCountAsBucket(centroid, count, scale, negativeBuckets); + } + + long zeroCount = 0; + int firstPositiveIndex = numNegativeCentroids; + if (firstPositiveIndex < centroids.size() && centroids.get(firstPositiveIndex) == 0) { + // we have a zero-centroid, which we'll map to the zero bucket + zeroCount = counts.get(firstPositiveIndex); + firstPositiveIndex++; + } + + List positiveBuckets = new ArrayList<>(); + for (int i = firstPositiveIndex; i < centroids.size(); i++) { + double centroid = centroids.get(i); + long count = counts.get(i); + assert centroid > 0; + appendCentroidWithCountAsBucket(centroid, count, scale, positiveBuckets); + } + + return new ExponentialHistogramParser.ParsedExponentialHistogram( + scale, + 0.0, + zeroCount, + negativeBuckets, + positiveBuckets, + null, // sum, min, max will be estimated + null, + null + ); + } + + private static void appendCentroidWithCountAsBucket(double centroid, long count, int scale, List outputBuckets) { + long index = ExponentialScaleUtils.computeIndex(centroid, scale); + assert outputBuckets.isEmpty() || outputBuckets.getLast().index() < index; + outputBuckets.add(new IndexWithCount(index, count)); + } + private static void appendBucketCentroid( List centroids, List counts, @@ -52,7 +116,13 @@ private static void appendBucketCentroid( double lowerBound = ExponentialScaleUtils.getLowerBucketBoundary(expHistoBucket.index(), scale); double upperBound = ExponentialScaleUtils.getUpperBucketBoundary(expHistoBucket.index(), scale); double center = sign * (lowerBound + upperBound) / 2.0; - centroids.add(center); - counts.add(expHistoBucket.count()); + // the index + scale representation is higher precision than the centroid representation, + // so we can have multiple exp histogram buckets map to the same centroid. + if (centroids.isEmpty() == false && centroids.getLast() == center) { + counts.add(counts.removeLast() + expHistoBucket.count()); + } else { + centroids.add(center); + counts.add(expHistoBucket.count()); + } } } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverterTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverterTests.java index 94623e131cca9..a82469fd9cd24 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverterTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverterTests.java @@ -16,6 +16,7 @@ import org.elasticsearch.exponentialhistogram.ExponentialHistogramMerger; import org.elasticsearch.exponentialhistogram.ExponentialHistogramTestUtils; import org.elasticsearch.exponentialhistogram.ExponentialHistogramXContent; +import org.elasticsearch.exponentialhistogram.ExponentialScaleUtils; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; @@ -26,13 +27,81 @@ import org.elasticsearch.xpack.oteldata.otlp.docbuilder.MappingHints; import java.io.IOException; +import java.util.List; import java.util.stream.LongStream; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.lessThan; public class ParsedHistogramConverterTests extends ESTestCase { + public void testExponentialHistogramRoundTrip() { + ExponentialHistogram input = ExponentialHistogramTestUtils.randomHistogram(); + HistogramParser.ParsedHistogram tdigest = ParsedHistogramConverter.exponentialToTDigest(toParsed(input)); + ExponentialHistogramParser.ParsedExponentialHistogram output = ParsedHistogramConverter.tDigestToExponential(tdigest); + + // the conversion looses the width of the original buckets, but the bucket centers (arithmetic mean of boundaries) + // should be very close + + assertThat(output.zeroCount(), equalTo(input.zeroBucket().count())); + assertArithmeticBucketCentersClose(input.negativeBuckets().iterator(), output.negativeBuckets(), output.scale()); + assertArithmeticBucketCentersClose(input.positiveBuckets().iterator(), output.positiveBuckets(), output.scale()); + } + + private static void assertArithmeticBucketCentersClose( + BucketIterator originalBuckets, + List convertedBuckets, + int convertedScale + ) { + for (IndexWithCount convertedBucket : convertedBuckets) { + assertThat(originalBuckets.hasNext(), equalTo(true)); + + double originalCenter = (ExponentialScaleUtils.getLowerBucketBoundary(originalBuckets.peekIndex(), originalBuckets.scale()) + + ExponentialScaleUtils.getUpperBucketBoundary(originalBuckets.peekIndex(), originalBuckets.scale())) / 2.0; + double convertedCenter = (ExponentialScaleUtils.getLowerBucketBoundary(convertedBucket.index(), convertedScale) + + ExponentialScaleUtils.getUpperBucketBoundary(convertedBucket.index(), convertedScale)) / 2.0; + + double relativeError = Math.abs(convertedCenter - originalCenter) / Math.abs(originalCenter); + assertThat( + "original center=" + originalCenter + ", converted center=" + convertedCenter + ", relative error=" + relativeError, + relativeError, + closeTo(0, 0.0000001) + ); + + originalBuckets.advance(); + } + assertThat(originalBuckets.hasNext(), equalTo(false)); + } + + public void testToExponentialHistogramConversionWithCloseCentroids() { + // build a t-digest with two centroids very close to each other + List centroids = List.of(1.0, Math.nextAfter(1.0, 2)); + List counts = List.of(1L, 2L); + + HistogramParser.ParsedHistogram input = new HistogramParser.ParsedHistogram(centroids, counts); + ExponentialHistogramParser.ParsedExponentialHistogram converted = ParsedHistogramConverter.tDigestToExponential(input); + + assertThat(converted.zeroCount(), equalTo(0L)); + List posBuckets = converted.positiveBuckets(); + assertThat(posBuckets.size(), equalTo(2)); + assertThat(posBuckets.get(0).index(), lessThan(posBuckets.get(1).index())); + assertThat(posBuckets.get(0).count(), equalTo(1L)); + assertThat(posBuckets.get(1).count(), equalTo(2L)); + } + + public void testToTDigestConversionMergesCentroids() { + // build a histogram with two buckets very close to zero + ExponentialHistogram input = ExponentialHistogram.builder(ExponentialHistogram.MAX_SCALE, ExponentialHistogramCircuitBreaker.noop()) + .setPositiveBucket(ExponentialHistogram.MIN_INDEX, 1) + .setPositiveBucket(ExponentialHistogram.MIN_INDEX + 1, 2) + .build(); + // due to rounding errors they end up as the same centroid, but should have the count merged + HistogramParser.ParsedHistogram converted = ParsedHistogramConverter.exponentialToTDigest(toParsed(input)); + assertThat(converted.values(), equalTo(List.of(0.0))); + assertThat(converted.counts(), equalTo(List.of(3L))); + } + public void testSameConversionBehaviourAsOtlpMetricsEndpoint() { // our histograms are sparse, opentelemetry ones are dense. // to test against the OTLP conversion algorithm, we need to make our random histogram dense enough first From 9e1d8b48648b69c344a3908478f97d5a8a4bd195 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Mon, 10 Nov 2025 09:29:38 +0100 Subject: [PATCH 2/2] Update algorithm comment --- .../xpack/analytics/mapper/ParsedHistogramConverter.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java index 0b0867ecd206b..1b024d7920405 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java @@ -48,6 +48,11 @@ public static HistogramParser.ParsedHistogram exponentialToTDigest(ExponentialHi * Converts t-digest histograms to exponential histograms, trying to do the inverse * of {@link #exponentialToTDigest(ExponentialHistogramParser.ParsedExponentialHistogram)} * as accurately as possible. + *
+ * On a round-trip conversion from exponential histogram to T-Digest and back, + * the bucket centers will be preserved, however the bucket widths are lost. + * The conversion algorithm works by generating tiny buckets (scale set to MAX_SCALE) + * containing the T-Digest centroids. * * @param tDigest the t-digest histogram to convert * @return the resulting exponential histogram