blob: 750a9f4e0de4536cbceb363ff776377b1080f43c [file] [log] [blame]
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h>
#include <stdio.h>
#include <vector>
template <typename ValueType>
class Histogram
{
public:
Histogram()
{
num_entries_ = 0;
num_low_outliers_ = 0;
num_high_outliers_ = 0;
low_percentile_ = 0;
high_percentile_ = 0;
SetWindowSize(10000);
SetCumulativeDensityThresholds(0.01f, 0.99f);
}
void AddSample(ValueType s)
{
int N = static_cast<int>(samples_.size());
samples_[num_entries_ % N] = s;
num_entries_++;
}
void SetBucketCount(int bc)
{
buckets_.resize(bc);
}
void SetWindowSize(int s)
{
samples_.resize(s);
}
void SetCumulativeDensityThresholds(float low_cd, float high_cd)
{
low_cum_density_ = low_cd;
high_cum_density_ = high_cd;
}
void ComputeHistogram()
{
const int NS = static_cast<int>(samples_.size());
int N = NS;
if (num_entries_ < N)
{
N = num_entries_;
}
if (N <= 0)
{
return;
}
std::vector<ValueType> sorted;
if (N == NS)
sorted = samples_;
else
sorted.insert(sorted.end(), samples_.begin(), samples_.begin() + N);
sort(sorted.begin(), sorted.end());
int idx_low = static_cast<int>(N * low_cum_density_);
int idx_high = static_cast<int>(N * high_cum_density_);
if (idx_high - idx_low + 1 < 1)
{
return; // No entries can be shown, quit
}
max_bucket_height_ = 0;
ValueType value_low = sorted[idx_low];
ValueType value_high = sorted[idx_high];
low_percentile_ = value_low;
high_percentile_ = value_high;
const int NB = static_cast<int>(buckets_.size()); // Number of Bins
float bucket_width = (value_high - value_low) / NB;
// If all values are the same, manually extend the range to
// (value-1, value+1)
const float EPS = 1e-4;
if (fabs(value_high - value_low) <= EPS)
{
value_low = value_low - 1;
value_high = value_high + 1;
bucket_width = (value_high - value_low) / NB;
}
else
{}
buckets_.assign(NB, 0);
num_low_outliers_ = 0;
num_high_outliers_ = 0;
for (int i = idx_low; i <= idx_high; i++)
{
ValueType v = sorted[i];
ValueType dist = (v - value_low);
int bucket_idx = dist / bucket_width;
if (bucket_idx < 0)
{
num_low_outliers_++;
}
else if (bucket_idx >= NB)
{
num_high_outliers_++;
}
else
{
buckets_[bucket_idx]++;
max_bucket_height_ =
std::max(max_bucket_height_, buckets_[bucket_idx]);
}
}
}
int BucketHeight(int idx)
{
if (idx < 0 || idx >= static_cast<int>(buckets_.size()))
return 0;
return buckets_[idx];
}
void Assign(Histogram<ValueType>* out)
{
out->num_entries_ = num_entries_;
out->samples_ = samples_;
out->num_low_outliers_ = num_low_outliers_;
out->num_high_outliers_ = num_high_outliers_;
out->buckets_ = buckets_;
out->low_cum_density_ = low_cum_density_;
out->high_cum_density_ = high_cum_density_;
out->low_percentile_ = low_percentile_;
out->high_percentile_ = high_percentile_;
out->max_bucket_height_ = max_bucket_height_;
}
int MaxBucketHeight()
{
return max_bucket_height_;
}
ValueType LowPercentile()
{
return low_percentile_;
}
ValueType HighPercentile()
{
return high_percentile_;
}
float LowCumDensity()
{
return low_cum_density_;
}
float HighCumDensity()
{
return high_cum_density_;
}
bool Empty()
{
return num_entries_ == 0;
}
int num_entries_;
std::vector<ValueType> samples_;
int num_low_outliers_, num_high_outliers_;
std::vector<int> buckets_;
float low_cum_density_, high_cum_density_;
// "1% percentile" means "1% of the samples are below this value"
ValueType low_percentile_, high_percentile_;
int max_bucket_height_;
};