Commit | Line | Data |
---|---|---|
145ca25e PZ |
1 | /* |
2 | * Floating proportions | |
3 | * | |
4 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | |
5 | * | |
6 | * Description: | |
7 | * | |
8 | * The floating proportion is a time derivative with an exponentially decaying | |
9 | * history: | |
10 | * | |
11 | * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i) | |
12 | * | |
13 | * Where j is an element from {prop_local}, x_{j} is j's number of events, | |
14 | * and i the time period over which the differential is taken. So d/dt_{-i} is | |
15 | * the differential over the i-th last period. | |
16 | * | |
17 | * The decaying history gives smooth transitions. The time differential carries | |
18 | * the notion of speed. | |
19 | * | |
20 | * The denominator is 2^(1+i) because we want the series to be normalised, ie. | |
21 | * | |
22 | * \Sum_{i=0} 1/2^(1+i) = 1 | |
23 | * | |
24 | * Further more, if we measure time (t) in the same events as x; so that: | |
25 | * | |
26 | * t = \Sum_{j} x_{j} | |
27 | * | |
28 | * we get that: | |
29 | * | |
30 | * \Sum_{j} p_{j} = 1 | |
31 | * | |
32 | * Writing this in an iterative fashion we get (dropping the 'd's): | |
33 | * | |
34 | * if (++x_{j}, ++t > period) | |
35 | * t /= 2; | |
36 | * for_each (j) | |
37 | * x_{j} /= 2; | |
38 | * | |
39 | * so that: | |
40 | * | |
41 | * p_{j} = x_{j} / t; | |
42 | * | |
43 | * We optimize away the '/= 2' for the global time delta by noting that: | |
44 | * | |
45 | * if (++t > period) t /= 2: | |
46 | * | |
47 | * Can be approximated by: | |
48 | * | |
49 | * period/2 + (++t % period/2) | |
50 | * | |
51 | * [ Furthermore, when we choose period to be 2^n it can be written in terms of | |
52 | * binary operations and wraparound artefacts disappear. ] | |
53 | * | |
54 | * Also note that this yields a natural counter of the elapsed periods: | |
55 | * | |
56 | * c = t / (period/2) | |
57 | * | |
58 | * [ Its monotonic increasing property can be applied to mitigate the wrap- | |
59 | * around issue. ] | |
60 | * | |
61 | * This allows us to do away with the loop over all prop_locals on each period | |
62 | * expiration. By remembering the period count under which it was last accessed | |
63 | * as c_{j}, we can obtain the number of 'missed' cycles from: | |
64 | * | |
65 | * c - c_{j} | |
66 | * | |
67 | * We can then lazily catch up to the global period count every time we are | |
68 | * going to use x_{j}, by doing: | |
69 | * | |
70 | * x_{j} /= 2^(c - c_{j}), c_{j} = c | |
71 | */ | |
72 | ||
73 | #include <linux/proportions.h> | |
74 | #include <linux/rcupdate.h> | |
75 | ||
76 | /* | |
77 | * Limit the time part in order to ensure there are some bits left for the | |
78 | * cycle counter. | |
79 | */ | |
80 | #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) | |
81 | ||
82 | int prop_descriptor_init(struct prop_descriptor *pd, int shift) | |
83 | { | |
84 | int err; | |
85 | ||
86 | if (shift > PROP_MAX_SHIFT) | |
87 | shift = PROP_MAX_SHIFT; | |
88 | ||
89 | pd->index = 0; | |
90 | pd->pg[0].shift = shift; | |
91 | mutex_init(&pd->mutex); | |
92 | err = percpu_counter_init_irq(&pd->pg[0].events, 0); | |
93 | if (err) | |
94 | goto out; | |
95 | ||
96 | err = percpu_counter_init_irq(&pd->pg[1].events, 0); | |
97 | if (err) | |
98 | percpu_counter_destroy(&pd->pg[0].events); | |
99 | ||
100 | out: | |
101 | return err; | |
102 | } | |
103 | ||
104 | /* | |
105 | * We have two copies, and flip between them to make it seem like an atomic | |
106 | * update. The update is not really atomic wrt the events counter, but | |
107 | * it is internally consistent with the bit layout depending on shift. | |
108 | * | |
109 | * We copy the events count, move the bits around and flip the index. | |
110 | */ | |
111 | void prop_change_shift(struct prop_descriptor *pd, int shift) | |
112 | { | |
113 | int index; | |
114 | int offset; | |
115 | u64 events; | |
116 | unsigned long flags; | |
117 | ||
118 | if (shift > PROP_MAX_SHIFT) | |
119 | shift = PROP_MAX_SHIFT; | |
120 | ||
121 | mutex_lock(&pd->mutex); | |
122 | ||
123 | index = pd->index ^ 1; | |
124 | offset = pd->pg[pd->index].shift - shift; | |
125 | if (!offset) | |
126 | goto out; | |
127 | ||
128 | pd->pg[index].shift = shift; | |
129 | ||
130 | local_irq_save(flags); | |
131 | events = percpu_counter_sum(&pd->pg[pd->index].events); | |
132 | if (offset < 0) | |
133 | events <<= -offset; | |
134 | else | |
135 | events >>= offset; | |
136 | percpu_counter_set(&pd->pg[index].events, events); | |
137 | ||
138 | /* | |
139 | * ensure the new pg is fully written before the switch | |
140 | */ | |
141 | smp_wmb(); | |
142 | pd->index = index; | |
143 | local_irq_restore(flags); | |
144 | ||
145 | synchronize_rcu(); | |
146 | ||
147 | out: | |
148 | mutex_unlock(&pd->mutex); | |
149 | } | |
150 | ||
151 | /* | |
152 | * wrap the access to the data in an rcu_read_lock() section; | |
153 | * this is used to track the active references. | |
154 | */ | |
155 | static struct prop_global *prop_get_global(struct prop_descriptor *pd) | |
156 | { | |
157 | int index; | |
158 | ||
159 | rcu_read_lock(); | |
160 | index = pd->index; | |
161 | /* | |
162 | * match the wmb from vcd_flip() | |
163 | */ | |
164 | smp_rmb(); | |
165 | return &pd->pg[index]; | |
166 | } | |
167 | ||
168 | static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) | |
169 | { | |
170 | rcu_read_unlock(); | |
171 | } | |
172 | ||
173 | static void | |
174 | prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) | |
175 | { | |
176 | int offset = *pl_shift - new_shift; | |
177 | ||
178 | if (!offset) | |
179 | return; | |
180 | ||
181 | if (offset < 0) | |
182 | *pl_period <<= -offset; | |
183 | else | |
184 | *pl_period >>= offset; | |
185 | ||
186 | *pl_shift = new_shift; | |
187 | } | |
188 | ||
189 | /* | |
190 | * PERCPU | |
191 | */ | |
192 | ||
f16b34aa PZ |
193 | #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) |
194 | ||
145ca25e PZ |
195 | int prop_local_init_percpu(struct prop_local_percpu *pl) |
196 | { | |
197 | spin_lock_init(&pl->lock); | |
198 | pl->shift = 0; | |
199 | pl->period = 0; | |
200 | return percpu_counter_init_irq(&pl->events, 0); | |
201 | } | |
202 | ||
203 | void prop_local_destroy_percpu(struct prop_local_percpu *pl) | |
204 | { | |
205 | percpu_counter_destroy(&pl->events); | |
206 | } | |
207 | ||
208 | /* | |
209 | * Catch up with missed period expirations. | |
210 | * | |
211 | * until (c_{j} == c) | |
212 | * x_{j} -= x_{j}/2; | |
213 | * c_{j}++; | |
214 | */ | |
215 | static | |
216 | void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) | |
217 | { | |
218 | unsigned long period = 1UL << (pg->shift - 1); | |
219 | unsigned long period_mask = ~(period - 1); | |
220 | unsigned long global_period; | |
221 | unsigned long flags; | |
222 | ||
223 | global_period = percpu_counter_read(&pg->events); | |
224 | global_period &= period_mask; | |
225 | ||
226 | /* | |
227 | * Fast path - check if the local and global period count still match | |
228 | * outside of the lock. | |
229 | */ | |
230 | if (pl->period == global_period) | |
231 | return; | |
232 | ||
233 | spin_lock_irqsave(&pl->lock, flags); | |
234 | prop_adjust_shift(&pl->shift, &pl->period, pg->shift); | |
f16b34aa | 235 | |
145ca25e PZ |
236 | /* |
237 | * For each missed period, we half the local counter. | |
238 | * basically: | |
239 | * pl->events >> (global_period - pl->period); | |
145ca25e | 240 | */ |
f16b34aa PZ |
241 | period = (global_period - pl->period) >> (pg->shift - 1); |
242 | if (period < BITS_PER_LONG) { | |
243 | s64 val = percpu_counter_read(&pl->events); | |
244 | ||
245 | if (val < (nr_cpu_ids * PROP_BATCH)) | |
246 | val = percpu_counter_sum(&pl->events); | |
247 | ||
248 | __percpu_counter_add(&pl->events, -val + (val >> period), | |
249 | PROP_BATCH); | |
250 | } else | |
251 | percpu_counter_set(&pl->events, 0); | |
252 | ||
145ca25e PZ |
253 | pl->period = global_period; |
254 | spin_unlock_irqrestore(&pl->lock, flags); | |
255 | } | |
256 | ||
257 | /* | |
258 | * ++x_{j}, ++t | |
259 | */ | |
260 | void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) | |
261 | { | |
262 | struct prop_global *pg = prop_get_global(pd); | |
263 | ||
264 | prop_norm_percpu(pg, pl); | |
f16b34aa | 265 | __percpu_counter_add(&pl->events, 1, PROP_BATCH); |
145ca25e PZ |
266 | percpu_counter_add(&pg->events, 1); |
267 | prop_put_global(pd, pg); | |
268 | } | |
269 | ||
270 | /* | |
271 | * Obtain a fraction of this proportion | |
272 | * | |
273 | * p_{j} = x_{j} / (period/2 + t % period/2) | |
274 | */ | |
275 | void prop_fraction_percpu(struct prop_descriptor *pd, | |
276 | struct prop_local_percpu *pl, | |
277 | long *numerator, long *denominator) | |
278 | { | |
279 | struct prop_global *pg = prop_get_global(pd); | |
280 | unsigned long period_2 = 1UL << (pg->shift - 1); | |
281 | unsigned long counter_mask = period_2 - 1; | |
282 | unsigned long global_count; | |
283 | ||
284 | prop_norm_percpu(pg, pl); | |
285 | *numerator = percpu_counter_read_positive(&pl->events); | |
286 | ||
287 | global_count = percpu_counter_read(&pg->events); | |
288 | *denominator = period_2 + (global_count & counter_mask); | |
289 | ||
290 | prop_put_global(pd, pg); | |
291 | } | |
292 | ||
293 | /* | |
294 | * SINGLE | |
295 | */ | |
296 | ||
297 | int prop_local_init_single(struct prop_local_single *pl) | |
298 | { | |
299 | spin_lock_init(&pl->lock); | |
300 | pl->shift = 0; | |
301 | pl->period = 0; | |
302 | pl->events = 0; | |
303 | return 0; | |
304 | } | |
305 | ||
306 | void prop_local_destroy_single(struct prop_local_single *pl) | |
307 | { | |
308 | } | |
309 | ||
310 | /* | |
311 | * Catch up with missed period expirations. | |
312 | */ | |
313 | static | |
314 | void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) | |
315 | { | |
316 | unsigned long period = 1UL << (pg->shift - 1); | |
317 | unsigned long period_mask = ~(period - 1); | |
318 | unsigned long global_period; | |
319 | unsigned long flags; | |
320 | ||
321 | global_period = percpu_counter_read(&pg->events); | |
322 | global_period &= period_mask; | |
323 | ||
324 | /* | |
325 | * Fast path - check if the local and global period count still match | |
326 | * outside of the lock. | |
327 | */ | |
328 | if (pl->period == global_period) | |
329 | return; | |
330 | ||
331 | spin_lock_irqsave(&pl->lock, flags); | |
332 | prop_adjust_shift(&pl->shift, &pl->period, pg->shift); | |
333 | /* | |
334 | * For each missed period, we half the local counter. | |
335 | */ | |
336 | period = (global_period - pl->period) >> (pg->shift - 1); | |
337 | if (likely(period < BITS_PER_LONG)) | |
338 | pl->events >>= period; | |
339 | else | |
340 | pl->events = 0; | |
341 | pl->period = global_period; | |
342 | spin_unlock_irqrestore(&pl->lock, flags); | |
343 | } | |
344 | ||
345 | /* | |
346 | * ++x_{j}, ++t | |
347 | */ | |
348 | void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) | |
349 | { | |
350 | struct prop_global *pg = prop_get_global(pd); | |
351 | ||
352 | prop_norm_single(pg, pl); | |
353 | pl->events++; | |
354 | percpu_counter_add(&pg->events, 1); | |
355 | prop_put_global(pd, pg); | |
356 | } | |
357 | ||
358 | /* | |
359 | * Obtain a fraction of this proportion | |
360 | * | |
361 | * p_{j} = x_{j} / (period/2 + t % period/2) | |
362 | */ | |
363 | void prop_fraction_single(struct prop_descriptor *pd, | |
364 | struct prop_local_single *pl, | |
365 | long *numerator, long *denominator) | |
366 | { | |
367 | struct prop_global *pg = prop_get_global(pd); | |
368 | unsigned long period_2 = 1UL << (pg->shift - 1); | |
369 | unsigned long counter_mask = period_2 - 1; | |
370 | unsigned long global_count; | |
371 | ||
372 | prop_norm_single(pg, pl); | |
373 | *numerator = pl->events; | |
374 | ||
375 | global_count = percpu_counter_read(&pg->events); | |
376 | *denominator = period_2 + (global_count & counter_mask); | |
377 | ||
378 | prop_put_global(pd, pg); | |
379 | } |