Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import java.util.ArrayList;
import java.util.List;

import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE;

public class ParsedHistogramConverter {

/**
Expand Down Expand Up @@ -42,6 +44,73 @@ public static HistogramParser.ParsedHistogram exponentialToTDigest(ExponentialHi
return new HistogramParser.ParsedHistogram(centroids, counts);
}

/**
* Converts t-digest histograms to exponential histograms, trying to do the inverse
* of {@link #exponentialToTDigest(ExponentialHistogramParser.ParsedExponentialHistogram)}
* as accurately as possible.
* <br>
* On a round-trip conversion from exponential histogram to T-Digest and back,
* the bucket centers will be preserved, however the bucket widths are lost.
* The conversion algorithm works by generating tiny buckets (scale set to MAX_SCALE)
* containing the T-Digest centroids.
*
* @param tDigest the t-digest histogram to convert
* @return the resulting exponential histogram
*/
public static ExponentialHistogramParser.ParsedExponentialHistogram tDigestToExponential(HistogramParser.ParsedHistogram tDigest) {
List<Double> centroids = tDigest.values();
List<Long> counts = tDigest.counts();

int numNegativeCentroids = 0;
while (numNegativeCentroids < centroids.size() && centroids.get(numNegativeCentroids) < 0) {
numNegativeCentroids++;
}

// iterate negative centroids from closest to zero to furthest away,
// which corresponds to ascending exponential histogram bucket indices
int scale = MAX_SCALE;
List<IndexWithCount> negativeBuckets = new ArrayList<>();
for (int i = numNegativeCentroids - 1; i >= 0; i--) {
double centroid = centroids.get(i);
long count = counts.get(i);
assert centroid < 0;
appendCentroidWithCountAsBucket(centroid, count, scale, negativeBuckets);
}

long zeroCount = 0;
int firstPositiveIndex = numNegativeCentroids;
if (firstPositiveIndex < centroids.size() && centroids.get(firstPositiveIndex) == 0) {
// we have a zero-centroid, which we'll map to the zero bucket
zeroCount = counts.get(firstPositiveIndex);
firstPositiveIndex++;
}

List<IndexWithCount> positiveBuckets = new ArrayList<>();
for (int i = firstPositiveIndex; i < centroids.size(); i++) {
double centroid = centroids.get(i);
long count = counts.get(i);
assert centroid > 0;
appendCentroidWithCountAsBucket(centroid, count, scale, positiveBuckets);
}

return new ExponentialHistogramParser.ParsedExponentialHistogram(
scale,
0.0,
zeroCount,
negativeBuckets,
positiveBuckets,
null, // sum, min, max will be estimated
null,
null
);
}

private static void appendCentroidWithCountAsBucket(double centroid, long count, int scale, List<IndexWithCount> outputBuckets) {
long index = ExponentialScaleUtils.computeIndex(centroid, scale);
assert outputBuckets.isEmpty() || outputBuckets.getLast().index() < index;
outputBuckets.add(new IndexWithCount(index, count));
}

private static void appendBucketCentroid(
List<Double> centroids,
List<Long> counts,
Expand All @@ -52,7 +121,13 @@ private static void appendBucketCentroid(
double lowerBound = ExponentialScaleUtils.getLowerBucketBoundary(expHistoBucket.index(), scale);
double upperBound = ExponentialScaleUtils.getUpperBucketBoundary(expHistoBucket.index(), scale);
double center = sign * (lowerBound + upperBound) / 2.0;
centroids.add(center);
counts.add(expHistoBucket.count());
// the index + scale representation is higher precision than the centroid representation,
// so we can have multiple exp histogram buckets map to the same centroid.
if (centroids.isEmpty() == false && centroids.getLast() == center) {
counts.add(counts.removeLast() + expHistoBucket.count());
} else {
centroids.add(center);
counts.add(expHistoBucket.count());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.elasticsearch.exponentialhistogram.ExponentialHistogramMerger;
import org.elasticsearch.exponentialhistogram.ExponentialHistogramTestUtils;
import org.elasticsearch.exponentialhistogram.ExponentialHistogramXContent;
import org.elasticsearch.exponentialhistogram.ExponentialScaleUtils;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
Expand All @@ -26,13 +27,81 @@
import org.elasticsearch.xpack.oteldata.otlp.docbuilder.MappingHints;

import java.io.IOException;
import java.util.List;
import java.util.stream.LongStream;

import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.lessThan;

public class ParsedHistogramConverterTests extends ESTestCase {

public void testExponentialHistogramRoundTrip() {
ExponentialHistogram input = ExponentialHistogramTestUtils.randomHistogram();
HistogramParser.ParsedHistogram tdigest = ParsedHistogramConverter.exponentialToTDigest(toParsed(input));
ExponentialHistogramParser.ParsedExponentialHistogram output = ParsedHistogramConverter.tDigestToExponential(tdigest);

// the conversion looses the width of the original buckets, but the bucket centers (arithmetic mean of boundaries)
// should be very close

assertThat(output.zeroCount(), equalTo(input.zeroBucket().count()));
assertArithmeticBucketCentersClose(input.negativeBuckets().iterator(), output.negativeBuckets(), output.scale());
assertArithmeticBucketCentersClose(input.positiveBuckets().iterator(), output.positiveBuckets(), output.scale());
}

private static void assertArithmeticBucketCentersClose(
BucketIterator originalBuckets,
List<IndexWithCount> convertedBuckets,
int convertedScale
) {
for (IndexWithCount convertedBucket : convertedBuckets) {
assertThat(originalBuckets.hasNext(), equalTo(true));

double originalCenter = (ExponentialScaleUtils.getLowerBucketBoundary(originalBuckets.peekIndex(), originalBuckets.scale())
+ ExponentialScaleUtils.getUpperBucketBoundary(originalBuckets.peekIndex(), originalBuckets.scale())) / 2.0;
double convertedCenter = (ExponentialScaleUtils.getLowerBucketBoundary(convertedBucket.index(), convertedScale)
+ ExponentialScaleUtils.getUpperBucketBoundary(convertedBucket.index(), convertedScale)) / 2.0;

double relativeError = Math.abs(convertedCenter - originalCenter) / Math.abs(originalCenter);
assertThat(
"original center=" + originalCenter + ", converted center=" + convertedCenter + ", relative error=" + relativeError,
relativeError,
closeTo(0, 0.0000001)
);

originalBuckets.advance();
}
assertThat(originalBuckets.hasNext(), equalTo(false));
}

public void testToExponentialHistogramConversionWithCloseCentroids() {
// build a t-digest with two centroids very close to each other
List<Double> centroids = List.of(1.0, Math.nextAfter(1.0, 2));
List<Long> counts = List.of(1L, 2L);

HistogramParser.ParsedHistogram input = new HistogramParser.ParsedHistogram(centroids, counts);
ExponentialHistogramParser.ParsedExponentialHistogram converted = ParsedHistogramConverter.tDigestToExponential(input);

assertThat(converted.zeroCount(), equalTo(0L));
List<IndexWithCount> posBuckets = converted.positiveBuckets();
assertThat(posBuckets.size(), equalTo(2));
assertThat(posBuckets.get(0).index(), lessThan(posBuckets.get(1).index()));
assertThat(posBuckets.get(0).count(), equalTo(1L));
assertThat(posBuckets.get(1).count(), equalTo(2L));
}

public void testToTDigestConversionMergesCentroids() {
// build a histogram with two buckets very close to zero
ExponentialHistogram input = ExponentialHistogram.builder(ExponentialHistogram.MAX_SCALE, ExponentialHistogramCircuitBreaker.noop())
.setPositiveBucket(ExponentialHistogram.MIN_INDEX, 1)
.setPositiveBucket(ExponentialHistogram.MIN_INDEX + 1, 2)
.build();
// due to rounding errors they end up as the same centroid, but should have the count merged
HistogramParser.ParsedHistogram converted = ParsedHistogramConverter.exponentialToTDigest(toParsed(input));
assertThat(converted.values(), equalTo(List.of(0.0)));
assertThat(converted.counts(), equalTo(List.of(3L)));
}

public void testSameConversionBehaviourAsOtlpMetricsEndpoint() {
// our histograms are sparse, opentelemetry ones are dense.
// to test against the OTLP conversion algorithm, we need to make our random histogram dense enough first
Expand Down