blob: 750a9f4e0de4536cbceb363ff776377b1080f43c [file] [log] [blame]
Adedeji Adebisi12c5f112021-07-22 18:07:52 +00001// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16#include <math.h>
17#include <stdio.h>
18
19#include <vector>
20template <typename ValueType>
21class Histogram
22{
23 public:
24 Histogram()
25 {
26 num_entries_ = 0;
27 num_low_outliers_ = 0;
28 num_high_outliers_ = 0;
29 low_percentile_ = 0;
30 high_percentile_ = 0;
31 SetWindowSize(10000);
32 SetCumulativeDensityThresholds(0.01f, 0.99f);
33 }
34
35 void AddSample(ValueType s)
36 {
37 int N = static_cast<int>(samples_.size());
38 samples_[num_entries_ % N] = s;
39 num_entries_++;
40 }
41
42 void SetBucketCount(int bc)
43 {
44 buckets_.resize(bc);
45 }
46
47 void SetWindowSize(int s)
48 {
49 samples_.resize(s);
50 }
51
52 void SetCumulativeDensityThresholds(float low_cd, float high_cd)
53 {
54 low_cum_density_ = low_cd;
55 high_cum_density_ = high_cd;
56 }
57
58 void ComputeHistogram()
59 {
60 const int NS = static_cast<int>(samples_.size());
61 int N = NS;
62 if (num_entries_ < N)
63 {
64 N = num_entries_;
65 }
66 if (N <= 0)
67 {
68 return;
69 }
70 std::vector<ValueType> sorted;
71 if (N == NS)
72 sorted = samples_;
73 else
74 sorted.insert(sorted.end(), samples_.begin(), samples_.begin() + N);
75 sort(sorted.begin(), sorted.end());
76 int idx_low = static_cast<int>(N * low_cum_density_);
77 int idx_high = static_cast<int>(N * high_cum_density_);
78 if (idx_high - idx_low + 1 < 1)
79 {
80 return; // No entries can be shown, quit
81 }
82 max_bucket_height_ = 0;
83 ValueType value_low = sorted[idx_low];
84 ValueType value_high = sorted[idx_high];
85 low_percentile_ = value_low;
86 high_percentile_ = value_high;
87 const int NB = static_cast<int>(buckets_.size()); // Number of Bins
88 float bucket_width = (value_high - value_low) / NB;
89 // If all values are the same, manually extend the range to
90 // (value-1, value+1)
91 const float EPS = 1e-4;
92 if (fabs(value_high - value_low) <= EPS)
93 {
94 value_low = value_low - 1;
95 value_high = value_high + 1;
96 bucket_width = (value_high - value_low) / NB;
97 }
98 else
99 {}
100 buckets_.assign(NB, 0);
101 num_low_outliers_ = 0;
102 num_high_outliers_ = 0;
103 for (int i = idx_low; i <= idx_high; i++)
104 {
105 ValueType v = sorted[i];
106 ValueType dist = (v - value_low);
107 int bucket_idx = dist / bucket_width;
108 if (bucket_idx < 0)
109 {
110 num_low_outliers_++;
111 }
112 else if (bucket_idx >= NB)
113 {
114 num_high_outliers_++;
115 }
116 else
117 {
118 buckets_[bucket_idx]++;
119 max_bucket_height_ =
120 std::max(max_bucket_height_, buckets_[bucket_idx]);
121 }
122 }
123 }
124
125 int BucketHeight(int idx)
126 {
127 if (idx < 0 || idx >= static_cast<int>(buckets_.size()))
128 return 0;
129 return buckets_[idx];
130 }
131
132 void Assign(Histogram<ValueType>* out)
133 {
134 out->num_entries_ = num_entries_;
135 out->samples_ = samples_;
136 out->num_low_outliers_ = num_low_outliers_;
137 out->num_high_outliers_ = num_high_outliers_;
138 out->buckets_ = buckets_;
139 out->low_cum_density_ = low_cum_density_;
140 out->high_cum_density_ = high_cum_density_;
141 out->low_percentile_ = low_percentile_;
142 out->high_percentile_ = high_percentile_;
143 out->max_bucket_height_ = max_bucket_height_;
144 }
145
146 int MaxBucketHeight()
147 {
148 return max_bucket_height_;
149 }
150
151 ValueType LowPercentile()
152 {
153 return low_percentile_;
154 }
155
156 ValueType HighPercentile()
157 {
158 return high_percentile_;
159 }
160
161 float LowCumDensity()
162 {
163 return low_cum_density_;
164 }
165
166 float HighCumDensity()
167 {
168 return high_cum_density_;
169 }
170
171 bool Empty()
172 {
173 return num_entries_ == 0;
174 }
175
176 int num_entries_;
177 std::vector<ValueType> samples_;
178 int num_low_outliers_, num_high_outliers_;
179 std::vector<int> buckets_;
180 float low_cum_density_, high_cum_density_;
181 // "1% percentile" means "1% of the samples are below this value"
182 ValueType low_percentile_, high_percentile_;
183 int max_bucket_height_;
184};