]> bbs.cooldavid.org Git - net-next-2.6.git/blame - lib/proportions.c
ipv6: Check the hop limit setting in ancillary data.
[net-next-2.6.git] / lib / proportions.c
CommitLineData
145ca25e
PZ
1/*
2 * Floating proportions
3 *
4 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
5 *
6 * Description:
7 *
8 * The floating proportion is a time derivative with an exponentially decaying
9 * history:
10 *
11 * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
12 *
13 * Where j is an element from {prop_local}, x_{j} is j's number of events,
14 * and i the time period over which the differential is taken. So d/dt_{-i} is
15 * the differential over the i-th last period.
16 *
17 * The decaying history gives smooth transitions. The time differential carries
18 * the notion of speed.
19 *
20 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
21 *
22 * \Sum_{i=0} 1/2^(1+i) = 1
23 *
24 * Further more, if we measure time (t) in the same events as x; so that:
25 *
26 * t = \Sum_{j} x_{j}
27 *
28 * we get that:
29 *
30 * \Sum_{j} p_{j} = 1
31 *
32 * Writing this in an iterative fashion we get (dropping the 'd's):
33 *
34 * if (++x_{j}, ++t > period)
35 * t /= 2;
36 * for_each (j)
37 * x_{j} /= 2;
38 *
39 * so that:
40 *
41 * p_{j} = x_{j} / t;
42 *
43 * We optimize away the '/= 2' for the global time delta by noting that:
44 *
45 * if (++t > period) t /= 2:
46 *
47 * Can be approximated by:
48 *
49 * period/2 + (++t % period/2)
50 *
51 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
52 * binary operations and wraparound artefacts disappear. ]
53 *
54 * Also note that this yields a natural counter of the elapsed periods:
55 *
56 * c = t / (period/2)
57 *
58 * [ Its monotonic increasing property can be applied to mitigate the wrap-
59 * around issue. ]
60 *
61 * This allows us to do away with the loop over all prop_locals on each period
62 * expiration. By remembering the period count under which it was last accessed
63 * as c_{j}, we can obtain the number of 'missed' cycles from:
64 *
65 * c - c_{j}
66 *
67 * We can then lazily catch up to the global period count every time we are
68 * going to use x_{j}, by doing:
69 *
70 * x_{j} /= 2^(c - c_{j}), c_{j} = c
71 */
72
73#include <linux/proportions.h>
74#include <linux/rcupdate.h>
75
145ca25e
PZ
76int prop_descriptor_init(struct prop_descriptor *pd, int shift)
77{
78 int err;
79
80 if (shift > PROP_MAX_SHIFT)
81 shift = PROP_MAX_SHIFT;
82
83 pd->index = 0;
84 pd->pg[0].shift = shift;
85 mutex_init(&pd->mutex);
86 err = percpu_counter_init_irq(&pd->pg[0].events, 0);
87 if (err)
88 goto out;
89
90 err = percpu_counter_init_irq(&pd->pg[1].events, 0);
91 if (err)
92 percpu_counter_destroy(&pd->pg[0].events);
93
94out:
95 return err;
96}
97
98/*
99 * We have two copies, and flip between them to make it seem like an atomic
100 * update. The update is not really atomic wrt the events counter, but
101 * it is internally consistent with the bit layout depending on shift.
102 *
103 * We copy the events count, move the bits around and flip the index.
104 */
105void prop_change_shift(struct prop_descriptor *pd, int shift)
106{
107 int index;
108 int offset;
109 u64 events;
110 unsigned long flags;
111
112 if (shift > PROP_MAX_SHIFT)
113 shift = PROP_MAX_SHIFT;
114
115 mutex_lock(&pd->mutex);
116
117 index = pd->index ^ 1;
118 offset = pd->pg[pd->index].shift - shift;
119 if (!offset)
120 goto out;
121
122 pd->pg[index].shift = shift;
123
124 local_irq_save(flags);
125 events = percpu_counter_sum(&pd->pg[pd->index].events);
126 if (offset < 0)
127 events <<= -offset;
128 else
129 events >>= offset;
130 percpu_counter_set(&pd->pg[index].events, events);
131
132 /*
133 * ensure the new pg is fully written before the switch
134 */
135 smp_wmb();
136 pd->index = index;
137 local_irq_restore(flags);
138
139 synchronize_rcu();
140
141out:
142 mutex_unlock(&pd->mutex);
143}
144
145/*
146 * wrap the access to the data in an rcu_read_lock() section;
147 * this is used to track the active references.
148 */
149static struct prop_global *prop_get_global(struct prop_descriptor *pd)
150{
151 int index;
152
153 rcu_read_lock();
154 index = pd->index;
155 /*
156 * match the wmb from vcd_flip()
157 */
158 smp_rmb();
159 return &pd->pg[index];
160}
161
162static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
163{
164 rcu_read_unlock();
165}
166
167static void
168prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
169{
170 int offset = *pl_shift - new_shift;
171
172 if (!offset)
173 return;
174
175 if (offset < 0)
176 *pl_period <<= -offset;
177 else
178 *pl_period >>= offset;
179
180 *pl_shift = new_shift;
181}
182
183/*
184 * PERCPU
185 */
186
f16b34aa
PZ
187#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
188
145ca25e
PZ
189int prop_local_init_percpu(struct prop_local_percpu *pl)
190{
191 spin_lock_init(&pl->lock);
192 pl->shift = 0;
193 pl->period = 0;
194 return percpu_counter_init_irq(&pl->events, 0);
195}
196
197void prop_local_destroy_percpu(struct prop_local_percpu *pl)
198{
199 percpu_counter_destroy(&pl->events);
200}
201
202/*
203 * Catch up with missed period expirations.
204 *
205 * until (c_{j} == c)
206 * x_{j} -= x_{j}/2;
207 * c_{j}++;
208 */
209static
210void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
211{
212 unsigned long period = 1UL << (pg->shift - 1);
213 unsigned long period_mask = ~(period - 1);
214 unsigned long global_period;
215 unsigned long flags;
216
217 global_period = percpu_counter_read(&pg->events);
218 global_period &= period_mask;
219
220 /*
221 * Fast path - check if the local and global period count still match
222 * outside of the lock.
223 */
224 if (pl->period == global_period)
225 return;
226
227 spin_lock_irqsave(&pl->lock, flags);
228 prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
f16b34aa 229
145ca25e
PZ
230 /*
231 * For each missed period, we half the local counter.
232 * basically:
233 * pl->events >> (global_period - pl->period);
145ca25e 234 */
f16b34aa
PZ
235 period = (global_period - pl->period) >> (pg->shift - 1);
236 if (period < BITS_PER_LONG) {
237 s64 val = percpu_counter_read(&pl->events);
238
239 if (val < (nr_cpu_ids * PROP_BATCH))
240 val = percpu_counter_sum(&pl->events);
241
242 __percpu_counter_add(&pl->events, -val + (val >> period),
243 PROP_BATCH);
244 } else
245 percpu_counter_set(&pl->events, 0);
246
145ca25e
PZ
247 pl->period = global_period;
248 spin_unlock_irqrestore(&pl->lock, flags);
249}
250
251/*
252 * ++x_{j}, ++t
253 */
254void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
255{
256 struct prop_global *pg = prop_get_global(pd);
257
258 prop_norm_percpu(pg, pl);
f16b34aa 259 __percpu_counter_add(&pl->events, 1, PROP_BATCH);
145ca25e
PZ
260 percpu_counter_add(&pg->events, 1);
261 prop_put_global(pd, pg);
262}
263
a42dde04
PZ
264/*
265 * identical to __prop_inc_percpu, except that it limits this pl's fraction to
266 * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
267 */
268void __prop_inc_percpu_max(struct prop_descriptor *pd,
269 struct prop_local_percpu *pl, long frac)
270{
271 struct prop_global *pg = prop_get_global(pd);
272
273 prop_norm_percpu(pg, pl);
274
275 if (unlikely(frac != PROP_FRAC_BASE)) {
276 unsigned long period_2 = 1UL << (pg->shift - 1);
277 unsigned long counter_mask = period_2 - 1;
278 unsigned long global_count;
279 long numerator, denominator;
280
281 numerator = percpu_counter_read_positive(&pl->events);
282 global_count = percpu_counter_read(&pg->events);
283 denominator = period_2 + (global_count & counter_mask);
284
285 if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
286 goto out_put;
287 }
288
289 percpu_counter_add(&pl->events, 1);
290 percpu_counter_add(&pg->events, 1);
291
292out_put:
293 prop_put_global(pd, pg);
294}
295
145ca25e
PZ
296/*
297 * Obtain a fraction of this proportion
298 *
299 * p_{j} = x_{j} / (period/2 + t % period/2)
300 */
301void prop_fraction_percpu(struct prop_descriptor *pd,
302 struct prop_local_percpu *pl,
303 long *numerator, long *denominator)
304{
305 struct prop_global *pg = prop_get_global(pd);
306 unsigned long period_2 = 1UL << (pg->shift - 1);
307 unsigned long counter_mask = period_2 - 1;
308 unsigned long global_count;
309
310 prop_norm_percpu(pg, pl);
311 *numerator = percpu_counter_read_positive(&pl->events);
312
313 global_count = percpu_counter_read(&pg->events);
314 *denominator = period_2 + (global_count & counter_mask);
315
316 prop_put_global(pd, pg);
317}
318
319/*
320 * SINGLE
321 */
322
323int prop_local_init_single(struct prop_local_single *pl)
324{
325 spin_lock_init(&pl->lock);
326 pl->shift = 0;
327 pl->period = 0;
328 pl->events = 0;
329 return 0;
330}
331
332void prop_local_destroy_single(struct prop_local_single *pl)
333{
334}
335
336/*
337 * Catch up with missed period expirations.
338 */
339static
340void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
341{
342 unsigned long period = 1UL << (pg->shift - 1);
343 unsigned long period_mask = ~(period - 1);
344 unsigned long global_period;
345 unsigned long flags;
346
347 global_period = percpu_counter_read(&pg->events);
348 global_period &= period_mask;
349
350 /*
351 * Fast path - check if the local and global period count still match
352 * outside of the lock.
353 */
354 if (pl->period == global_period)
355 return;
356
357 spin_lock_irqsave(&pl->lock, flags);
358 prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
359 /*
360 * For each missed period, we half the local counter.
361 */
362 period = (global_period - pl->period) >> (pg->shift - 1);
363 if (likely(period < BITS_PER_LONG))
364 pl->events >>= period;
365 else
366 pl->events = 0;
367 pl->period = global_period;
368 spin_unlock_irqrestore(&pl->lock, flags);
369}
370
371/*
372 * ++x_{j}, ++t
373 */
374void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
375{
376 struct prop_global *pg = prop_get_global(pd);
377
378 prop_norm_single(pg, pl);
379 pl->events++;
380 percpu_counter_add(&pg->events, 1);
381 prop_put_global(pd, pg);
382}
383
384/*
385 * Obtain a fraction of this proportion
386 *
387 * p_{j} = x_{j} / (period/2 + t % period/2)
388 */
389void prop_fraction_single(struct prop_descriptor *pd,
390 struct prop_local_single *pl,
391 long *numerator, long *denominator)
392{
393 struct prop_global *pg = prop_get_global(pd);
394 unsigned long period_2 = 1UL << (pg->shift - 1);
395 unsigned long counter_mask = period_2 - 1;
396 unsigned long global_count;
397
398 prop_norm_single(pg, pl);
399 *numerator = pl->events;
400
401 global_count = percpu_counter_read(&pg->events);
402 *denominator = period_2 + (global_count & counter_mask);
403
404 prop_put_global(pd, pg);
405}