diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java
index 200222b844bfd..1b024d7920405 100644
--- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java
+++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverter.java
@@ -12,6 +12,8 @@
import java.util.ArrayList;
import java.util.List;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE;
+
public class ParsedHistogramConverter {
/**
@@ -42,6 +44,73 @@ public static HistogramParser.ParsedHistogram exponentialToTDigest(ExponentialHi
return new HistogramParser.ParsedHistogram(centroids, counts);
}
+ /**
+ * Converts t-digest histograms to exponential histograms, trying to do the inverse
+ * of {@link #exponentialToTDigest(ExponentialHistogramParser.ParsedExponentialHistogram)}
+ * as accurately as possible.
+ *
+ * On a round-trip conversion from exponential histogram to T-Digest and back,
+ * the bucket centers will be preserved, however the bucket widths are lost.
+ * The conversion algorithm works by generating tiny buckets (scale set to MAX_SCALE)
+ * containing the T-Digest centroids.
+ *
+ * @param tDigest the t-digest histogram to convert
+ * @return the resulting exponential histogram
+ */
+ public static ExponentialHistogramParser.ParsedExponentialHistogram tDigestToExponential(HistogramParser.ParsedHistogram tDigest) {
+ List centroids = tDigest.values();
+ List counts = tDigest.counts();
+
+ int numNegativeCentroids = 0;
+ while (numNegativeCentroids < centroids.size() && centroids.get(numNegativeCentroids) < 0) {
+ numNegativeCentroids++;
+ }
+
+ // iterate negative centroids from closest to zero to furthest away,
+ // which corresponds to ascending exponential histogram bucket indices
+ int scale = MAX_SCALE;
+ List negativeBuckets = new ArrayList<>();
+ for (int i = numNegativeCentroids - 1; i >= 0; i--) {
+ double centroid = centroids.get(i);
+ long count = counts.get(i);
+ assert centroid < 0;
+ appendCentroidWithCountAsBucket(centroid, count, scale, negativeBuckets);
+ }
+
+ long zeroCount = 0;
+ int firstPositiveIndex = numNegativeCentroids;
+ if (firstPositiveIndex < centroids.size() && centroids.get(firstPositiveIndex) == 0) {
+ // we have a zero-centroid, which we'll map to the zero bucket
+ zeroCount = counts.get(firstPositiveIndex);
+ firstPositiveIndex++;
+ }
+
+ List positiveBuckets = new ArrayList<>();
+ for (int i = firstPositiveIndex; i < centroids.size(); i++) {
+ double centroid = centroids.get(i);
+ long count = counts.get(i);
+ assert centroid > 0;
+ appendCentroidWithCountAsBucket(centroid, count, scale, positiveBuckets);
+ }
+
+ return new ExponentialHistogramParser.ParsedExponentialHistogram(
+ scale,
+ 0.0,
+ zeroCount,
+ negativeBuckets,
+ positiveBuckets,
+ null, // sum, min, max will be estimated
+ null,
+ null
+ );
+ }
+
+ private static void appendCentroidWithCountAsBucket(double centroid, long count, int scale, List outputBuckets) {
+ long index = ExponentialScaleUtils.computeIndex(centroid, scale);
+ assert outputBuckets.isEmpty() || outputBuckets.getLast().index() < index;
+ outputBuckets.add(new IndexWithCount(index, count));
+ }
+
private static void appendBucketCentroid(
List centroids,
List counts,
@@ -52,7 +121,13 @@ private static void appendBucketCentroid(
double lowerBound = ExponentialScaleUtils.getLowerBucketBoundary(expHistoBucket.index(), scale);
double upperBound = ExponentialScaleUtils.getUpperBucketBoundary(expHistoBucket.index(), scale);
double center = sign * (lowerBound + upperBound) / 2.0;
- centroids.add(center);
- counts.add(expHistoBucket.count());
+ // the index + scale representation is higher precision than the centroid representation,
+ // so we can have multiple exp histogram buckets map to the same centroid.
+ if (centroids.isEmpty() == false && centroids.getLast() == center) {
+ counts.add(counts.removeLast() + expHistoBucket.count());
+ } else {
+ centroids.add(center);
+ counts.add(expHistoBucket.count());
+ }
}
}
diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverterTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverterTests.java
index 94623e131cca9..a82469fd9cd24 100644
--- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverterTests.java
+++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/ParsedHistogramConverterTests.java
@@ -16,6 +16,7 @@
import org.elasticsearch.exponentialhistogram.ExponentialHistogramMerger;
import org.elasticsearch.exponentialhistogram.ExponentialHistogramTestUtils;
import org.elasticsearch.exponentialhistogram.ExponentialHistogramXContent;
+import org.elasticsearch.exponentialhistogram.ExponentialScaleUtils;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
@@ -26,13 +27,81 @@
import org.elasticsearch.xpack.oteldata.otlp.docbuilder.MappingHints;
import java.io.IOException;
+import java.util.List;
import java.util.stream.LongStream;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.lessThan;
public class ParsedHistogramConverterTests extends ESTestCase {
+ public void testExponentialHistogramRoundTrip() {
+ ExponentialHistogram input = ExponentialHistogramTestUtils.randomHistogram();
+ HistogramParser.ParsedHistogram tdigest = ParsedHistogramConverter.exponentialToTDigest(toParsed(input));
+ ExponentialHistogramParser.ParsedExponentialHistogram output = ParsedHistogramConverter.tDigestToExponential(tdigest);
+
+ // the conversion looses the width of the original buckets, but the bucket centers (arithmetic mean of boundaries)
+ // should be very close
+
+ assertThat(output.zeroCount(), equalTo(input.zeroBucket().count()));
+ assertArithmeticBucketCentersClose(input.negativeBuckets().iterator(), output.negativeBuckets(), output.scale());
+ assertArithmeticBucketCentersClose(input.positiveBuckets().iterator(), output.positiveBuckets(), output.scale());
+ }
+
+ private static void assertArithmeticBucketCentersClose(
+ BucketIterator originalBuckets,
+ List convertedBuckets,
+ int convertedScale
+ ) {
+ for (IndexWithCount convertedBucket : convertedBuckets) {
+ assertThat(originalBuckets.hasNext(), equalTo(true));
+
+ double originalCenter = (ExponentialScaleUtils.getLowerBucketBoundary(originalBuckets.peekIndex(), originalBuckets.scale())
+ + ExponentialScaleUtils.getUpperBucketBoundary(originalBuckets.peekIndex(), originalBuckets.scale())) / 2.0;
+ double convertedCenter = (ExponentialScaleUtils.getLowerBucketBoundary(convertedBucket.index(), convertedScale)
+ + ExponentialScaleUtils.getUpperBucketBoundary(convertedBucket.index(), convertedScale)) / 2.0;
+
+ double relativeError = Math.abs(convertedCenter - originalCenter) / Math.abs(originalCenter);
+ assertThat(
+ "original center=" + originalCenter + ", converted center=" + convertedCenter + ", relative error=" + relativeError,
+ relativeError,
+ closeTo(0, 0.0000001)
+ );
+
+ originalBuckets.advance();
+ }
+ assertThat(originalBuckets.hasNext(), equalTo(false));
+ }
+
+ public void testToExponentialHistogramConversionWithCloseCentroids() {
+ // build a t-digest with two centroids very close to each other
+ List centroids = List.of(1.0, Math.nextAfter(1.0, 2));
+ List counts = List.of(1L, 2L);
+
+ HistogramParser.ParsedHistogram input = new HistogramParser.ParsedHistogram(centroids, counts);
+ ExponentialHistogramParser.ParsedExponentialHistogram converted = ParsedHistogramConverter.tDigestToExponential(input);
+
+ assertThat(converted.zeroCount(), equalTo(0L));
+ List posBuckets = converted.positiveBuckets();
+ assertThat(posBuckets.size(), equalTo(2));
+ assertThat(posBuckets.get(0).index(), lessThan(posBuckets.get(1).index()));
+ assertThat(posBuckets.get(0).count(), equalTo(1L));
+ assertThat(posBuckets.get(1).count(), equalTo(2L));
+ }
+
+ public void testToTDigestConversionMergesCentroids() {
+ // build a histogram with two buckets very close to zero
+ ExponentialHistogram input = ExponentialHistogram.builder(ExponentialHistogram.MAX_SCALE, ExponentialHistogramCircuitBreaker.noop())
+ .setPositiveBucket(ExponentialHistogram.MIN_INDEX, 1)
+ .setPositiveBucket(ExponentialHistogram.MIN_INDEX + 1, 2)
+ .build();
+ // due to rounding errors they end up as the same centroid, but should have the count merged
+ HistogramParser.ParsedHistogram converted = ParsedHistogramConverter.exponentialToTDigest(toParsed(input));
+ assertThat(converted.values(), equalTo(List.of(0.0)));
+ assertThat(converted.counts(), equalTo(List.of(3L)));
+ }
+
public void testSameConversionBehaviourAsOtlpMetricsEndpoint() {
// our histograms are sparse, opentelemetry ones are dense.
// to test against the OTLP conversion algorithm, we need to make our random histogram dense enough first