[net-next-2.6.git] / lib / proportions.c

/*
 * Floating proportions
 *
 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 *
 * Description:
 *
 * The floating proportion is a time derivative with an exponentially decaying
 * history:
 *
 *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
 *
 * Where j is an element from {prop_local}, x_{j} is j's number of events,
 * and i the time period over which the differential is taken. So d/dt_{-i} is
 * the differential over the i-th last period.
 *
 * The decaying history gives smooth transitions. The time differential carries
 * the notion of speed.
 *
 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
 *
 *   \Sum_{i=0} 1/2^(1+i) = 1
 *
 * Further more, if we measure time (t) in the same events as x; so that:
 *
 *   t = \Sum_{j} x_{j}
 *
 * we get that:
 *
 *   \Sum_{j} p_{j} = 1
 *
 * Writing this in an iterative fashion we get (dropping the 'd's):
 *
 *   if (++x_{j}, ++t > period)
 *     t /= 2;
 *     for_each (j)
 *       x_{j} /= 2;
 *
 * so that:
 *
 *   p_{j} = x_{j} / t;
 *
 * We optimize away the '/= 2' for the global time delta by noting that:
 *
 *   if (++t > period) t /= 2:
 *
 * Can be approximated by:
 *
 *   period/2 + (++t % period/2)
 *
 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
 *   binary operations and wraparound artefacts disappear. ]
 *
 * Also note that this yields a natural counter of the elapsed periods:
 *
 *   c = t / (period/2)
 *
 * [ Its monotonic increasing property can be applied to mitigate the wrap-
 *   around issue. ]
 *
 * This allows us to do away with the loop over all prop_locals on each period
 * expiration. By remembering the period count under which it was last accessed
 * as c_{j}, we can obtain the number of 'missed' cycles from:
 *
 *   c - c_{j}
 *
 * We can then lazily catch up to the global period count every time we are
 * going to use x_{j}, by doing:
 *
 *   x_{j} /= 2^(c - c_{j}), c_{j} = c
 */

#include <linux/proportions.h>
#include <linux/rcupdate.h>

/*
 * Limit the time part in order to ensure there are some bits left for the
 * cycle counter.
 */
#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)

int prop_descriptor_init(struct prop_descriptor *pd, int shift)
{
	int err;

	if (shift > PROP_MAX_SHIFT)
		shift = PROP_MAX_SHIFT;

	pd->index = 0;
	pd->pg[0].shift = shift;
	mutex_init(&pd->mutex);
	err = percpu_counter_init_irq(&pd->pg[0].events, 0);
	if (err)
		goto out;

	err = percpu_counter_init_irq(&pd->pg[1].events, 0);
	if (err)
		percpu_counter_destroy(&pd->pg[0].events);

out:
	return err;
}

/*
 * We have two copies, and flip between them to make it seem like an atomic
 * update. The update is not really atomic wrt the events counter, but
 * it is internally consistent with the bit layout depending on shift.
 *
 * We copy the events count, move the bits around and flip the index.
 */
void prop_change_shift(struct prop_descriptor *pd, int shift)
{
	int index;
	int offset;
	u64 events;
	unsigned long flags;

	if (shift > PROP_MAX_SHIFT)
		shift = PROP_MAX_SHIFT;

	mutex_lock(&pd->mutex);

	index = pd->index ^ 1;
	offset = pd->pg[pd->index].shift - shift;
	if (!offset)
		goto out;

	pd->pg[index].shift = shift;

	local_irq_save(flags);
	events = percpu_counter_sum(&pd->pg[pd->index].events);
	if (offset < 0)
		events <<= -offset;
	else
		events >>= offset;
	percpu_counter_set(&pd->pg[index].events, events);

	/*
	 * ensure the new pg is fully written before the switch
	 */
	smp_wmb();
	pd->index = index;
	local_irq_restore(flags);

	synchronize_rcu();

out:
	mutex_unlock(&pd->mutex);
}

/*
 * wrap the access to the data in an rcu_read_lock() section;
 * this is used to track the active references.
 */
static struct prop_global *prop_get_global(struct prop_descriptor *pd)
{
	int index;

	rcu_read_lock();
	index = pd->index;
	/*
	 * match the wmb from vcd_flip()
	 */
	smp_rmb();
	return &pd->pg[index];
}

static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
{
	rcu_read_unlock();
}

static void
prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
{
	int offset = *pl_shift - new_shift;

	if (!offset)
		return;

	if (offset < 0)
		*pl_period <<= -offset;
	else
		*pl_period >>= offset;

	*pl_shift = new_shift;
}

/*
 * PERCPU
 */

int prop_local_init_percpu(struct prop_local_percpu *pl)
{
	spin_lock_init(&pl->lock);
	pl->shift = 0;
	pl->period = 0;
	return percpu_counter_init_irq(&pl->events, 0);
}

void prop_local_destroy_percpu(struct prop_local_percpu *pl)
{
	percpu_counter_destroy(&pl->events);
}

/*
 * Catch up with missed period expirations.
 *
 *   until (c_{j} == c)
 *     x_{j} -= x_{j}/2;
 *     c_{j}++;
 */
static
void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
{
	unsigned long period = 1UL << (pg->shift - 1);
	unsigned long period_mask = ~(period - 1);
	unsigned long global_period;
	unsigned long flags;

	global_period = percpu_counter_read(&pg->events);
	global_period &= period_mask;

	/*
	 * Fast path - check if the local and global period count still match
	 * outside of the lock.
	 */
	if (pl->period == global_period)
		return;

	spin_lock_irqsave(&pl->lock, flags);
	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
	/*
	 * For each missed period, we half the local counter.
	 * basically:
	 *   pl->events >> (global_period - pl->period);
	 *
	 * but since the distributed nature of percpu counters make division
	 * rather hard, use a regular subtraction loop. This is safe, because
	 * the events will only every be incremented, hence the subtraction
	 * can never result in a negative number.
	 */
	while (pl->period != global_period) {
		unsigned long val = percpu_counter_read(&pl->events);
		unsigned long half = (val + 1) >> 1;

		/*
		 * Half of zero won't be much less, break out.
		 * This limits the loop to shift iterations, even
		 * if we missed a million.
		 */
		if (!val)
			break;

		percpu_counter_add(&pl->events, -half);
		pl->period += period;
	}
	pl->period = global_period;
	spin_unlock_irqrestore(&pl->lock, flags);
}

/*
 *   ++x_{j}, ++t
 */
void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
{
	struct prop_global *pg = prop_get_global(pd);

	prop_norm_percpu(pg, pl);
	percpu_counter_add(&pl->events, 1);
	percpu_counter_add(&pg->events, 1);
	prop_put_global(pd, pg);
}

/*
 * Obtain a fraction of this proportion
 *
 *   p_{j} = x_{j} / (period/2 + t % period/2)
 */
void prop_fraction_percpu(struct prop_descriptor *pd,
		struct prop_local_percpu *pl,
		long *numerator, long *denominator)
{
	struct prop_global *pg = prop_get_global(pd);
	unsigned long period_2 = 1UL << (pg->shift - 1);
	unsigned long counter_mask = period_2 - 1;
	unsigned long global_count;

	prop_norm_percpu(pg, pl);
	*numerator = percpu_counter_read_positive(&pl->events);

	global_count = percpu_counter_read(&pg->events);
	*denominator = period_2 + (global_count & counter_mask);

	prop_put_global(pd, pg);
}

/*
 * SINGLE
 */

int prop_local_init_single(struct prop_local_single *pl)
{
	spin_lock_init(&pl->lock);
	pl->shift = 0;
	pl->period = 0;
	pl->events = 0;
	return 0;
}

void prop_local_destroy_single(struct prop_local_single *pl)
{
}

/*
 * Catch up with missed period expirations.
 */
static
void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
{
	unsigned long period = 1UL << (pg->shift - 1);
	unsigned long period_mask = ~(period - 1);
	unsigned long global_period;
	unsigned long flags;

	global_period = percpu_counter_read(&pg->events);
	global_period &= period_mask;

	/*
	 * Fast path - check if the local and global period count still match
	 * outside of the lock.
	 */
	if (pl->period == global_period)
		return;

	spin_lock_irqsave(&pl->lock, flags);
	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
	/*
	 * For each missed period, we half the local counter.
	 */
	period = (global_period - pl->period) >> (pg->shift - 1);
	if (likely(period < BITS_PER_LONG))
		pl->events >>= period;
	else
		pl->events = 0;
	pl->period = global_period;
	spin_unlock_irqrestore(&pl->lock, flags);
}

/*
 *   ++x_{j}, ++t
 */
void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
{
	struct prop_global *pg = prop_get_global(pd);

	prop_norm_single(pg, pl);
	pl->events++;
	percpu_counter_add(&pg->events, 1);
	prop_put_global(pd, pg);
}

/*
 * Obtain a fraction of this proportion
 *
 *   p_{j} = x_{j} / (period/2 + t % period/2)
 */
void prop_fraction_single(struct prop_descriptor *pd,
	       	struct prop_local_single *pl,
		long *numerator, long *denominator)
{
	struct prop_global *pg = prop_get_global(pd);
	unsigned long period_2 = 1UL << (pg->shift - 1);
	unsigned long counter_mask = period_2 - 1;
	unsigned long global_count;

	prop_norm_single(pg, pl);
	*numerator = pl->events;

	global_count = percpu_counter_read(&pg->events);
	*denominator = period_2 + (global_count & counter_mask);

	prop_put_global(pd, pg);
}
Commit	Line	Data
145ca25e PZ	1	/*
	2	* Floating proportions
	3	*
	4	* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
	5	*
	6	* Description:
	7	*
	8	* The floating proportion is a time derivative with an exponentially decaying
	9	* history:
	10	*
	11	* p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
	12	*
	13	* Where j is an element from {prop_local}, x_{j} is j's number of events,
	14	* and i the time period over which the differential is taken. So d/dt_{-i} is
	15	* the differential over the i-th last period.
	16	*
	17	* The decaying history gives smooth transitions. The time differential carries
	18	* the notion of speed.
	19	*
	20	* The denominator is 2^(1+i) because we want the series to be normalised, ie.
	21	*
	22	* \Sum_{i=0} 1/2^(1+i) = 1
	23	*
	24	* Further more, if we measure time (t) in the same events as x; so that:
	25	*
	26	* t = \Sum_{j} x_{j}
	27	*
	28	* we get that:
	29	*
	30	* \Sum_{j} p_{j} = 1
	31	*
	32	* Writing this in an iterative fashion we get (dropping the 'd's):
	33	*
	34	* if (++x_{j}, ++t > period)
	35	* t /= 2;
	36	* for_each (j)
	37	* x_{j} /= 2;
	38	*
	39	* so that:
	40	*
	41	* p_{j} = x_{j} / t;
	42	*
	43	* We optimize away the '/= 2' for the global time delta by noting that:
	44	*
	45	* if (++t > period) t /= 2:
	46	*
	47	* Can be approximated by:
	48	*
	49	* period/2 + (++t % period/2)
	50	*
	51	* [ Furthermore, when we choose period to be 2^n it can be written in terms of
	52	* binary operations and wraparound artefacts disappear. ]
	53	*
	54	* Also note that this yields a natural counter of the elapsed periods:
	55	*
	56	* c = t / (period/2)
	57	*
	58	* [ Its monotonic increasing property can be applied to mitigate the wrap-
	59	* around issue. ]
	60	*
	61	* This allows us to do away with the loop over all prop_locals on each period
	62	* expiration. By remembering the period count under which it was last accessed
	63	* as c_{j}, we can obtain the number of 'missed' cycles from:
	64	*
65	* c - c_{j}
66	*
67	* We can then lazily catch up to the global period count every time we are
68	* going to use x_{j}, by doing:
69	*
70	* x_{j} /= 2^(c - c_{j}), c_{j} = c
71	*/
72
73	#include <linux/proportions.h>
74	#include <linux/rcupdate.h>
75
76	/*
77	* Limit the time part in order to ensure there are some bits left for the
78	* cycle counter.
79	*/
80	#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
81
82	int prop_descriptor_init(struct prop_descriptor *pd, int shift)
83	{
84	int err;
85
86	if (shift > PROP_MAX_SHIFT)
87	shift = PROP_MAX_SHIFT;
88
89	pd->index = 0;
90	pd->pg[0].shift = shift;
91	mutex_init(&pd->mutex);
92	err = percpu_counter_init_irq(&pd->pg[0].events, 0);
93	if (err)
94	goto out;
95
96	err = percpu_counter_init_irq(&pd->pg[1].events, 0);
97	if (err)
98	percpu_counter_destroy(&pd->pg[0].events);
99
100	out:
101	return err;
102	}
103
104	/*
105	* We have two copies, and flip between them to make it seem like an atomic
106	* update. The update is not really atomic wrt the events counter, but
107	* it is internally consistent with the bit layout depending on shift.
108	*
109	* We copy the events count, move the bits around and flip the index.
110	*/
111	void prop_change_shift(struct prop_descriptor *pd, int shift)
112	{
113	int index;
114	int offset;
115	u64 events;
116	unsigned long flags;
117
118	if (shift > PROP_MAX_SHIFT)
119	shift = PROP_MAX_SHIFT;
120
121	mutex_lock(&pd->mutex);
122
123	index = pd->index ^ 1;
124	offset = pd->pg[pd->index].shift - shift;
125	if (!offset)
126	goto out;
127
128	pd->pg[index].shift = shift;
129
130	local_irq_save(flags);
131	events = percpu_counter_sum(&pd->pg[pd->index].events);
132	if (offset < 0)
133	events <<= -offset;
134	else
135	events >>= offset;
136	percpu_counter_set(&pd->pg[index].events, events);
137
138	/*
139	* ensure the new pg is fully written before the switch
140	*/
141	smp_wmb();
142	pd->index = index;
143	local_irq_restore(flags);
144
145	synchronize_rcu();
146
147	out:
148	mutex_unlock(&pd->mutex);
149	}
150
151	/*
152	* wrap the access to the data in an rcu_read_lock() section;
153	* this is used to track the active references.
154	*/
155	static struct prop_global prop_get_global(struct prop_descriptor pd)
156	{
157	int index;
158
159	rcu_read_lock();
160	index = pd->index;
161	/*
162	* match the wmb from vcd_flip()
163	*/
164	smp_rmb();
165	return &pd->pg[index];
166	}
167
168	static void prop_put_global(struct prop_descriptor pd, struct prop_global pg)
169	{
170	rcu_read_unlock();
171	}
172
173	static void
174	prop_adjust_shift(int pl_shift, unsigned long pl_period, int new_shift)
175	{
176	int offset = *pl_shift - new_shift;
177
178	if (!offset)
179	return;
180
181	if (offset < 0)
182	*pl_period <<= -offset;
183	else
184	*pl_period >>= offset;
185
186	*pl_shift = new_shift;
187	}
188
189	/*
190	* PERCPU
191	*/
192
193	int prop_local_init_percpu(struct prop_local_percpu *pl)
194	{
195	spin_lock_init(&pl->lock);
196	pl->shift = 0;
197	pl->period = 0;
198	return percpu_counter_init_irq(&pl->events, 0);
199	}
200
201	void prop_local_destroy_percpu(struct prop_local_percpu *pl)
202	{
203	percpu_counter_destroy(&pl->events);
204	}
205
206	/*
207	* Catch up with missed period expirations.
208	*
209	* until (c_{j} == c)
210	* x_{j} -= x_{j}/2;
211	* c_{j}++;
212	*/
213	static
214	void prop_norm_percpu(struct prop_global pg, struct prop_local_percpu pl)
215	{
216	unsigned long period = 1UL << (pg->shift - 1);
217	unsigned long period_mask = ~(period - 1);
218	unsigned long global_period;
219	unsigned long flags;
220
221	global_period = percpu_counter_read(&pg->events);
222	global_period &= period_mask;
223
224	/*
225	* Fast path - check if the local and global period count still match
226	* outside of the lock.
227	*/
228	if (pl->period == global_period)
229	return;
230
231	spin_lock_irqsave(&pl->lock, flags);
232	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
233	/*
234	* For each missed period, we half the local counter.
235	* basically:
236	* pl->events >> (global_period - pl->period);
237	*
238	* but since the distributed nature of percpu counters make division
239	* rather hard, use a regular subtraction loop. This is safe, because
240	* the events will only every be incremented, hence the subtraction
241	* can never result in a negative number.
242	*/
243	while (pl->period != global_period) {
244	unsigned long val = percpu_counter_read(&pl->events);
245	unsigned long half = (val + 1) >> 1;
246
247	/*
248	* Half of zero won't be much less, break out.
249	* This limits the loop to shift iterations, even
250	* if we missed a million.
251	*/
252	if (!val)
253	break;
254
255	percpu_counter_add(&pl->events, -half);
256	pl->period += period;
257	}
258	pl->period = global_period;
259	spin_unlock_irqrestore(&pl->lock, flags);
260	}
261
262	/*
263	* ++x_{j}, ++t
264	*/
265	void __prop_inc_percpu(struct prop_descriptor pd, struct prop_local_percpu pl)
266	{
267	struct prop_global *pg = prop_get_global(pd);
268
269	prop_norm_percpu(pg, pl);
270	percpu_counter_add(&pl->events, 1);
271	percpu_counter_add(&pg->events, 1);
272	prop_put_global(pd, pg);
273	}
274
275	/*
276	* Obtain a fraction of this proportion
277	*
278	* p_{j} = x_{j} / (period/2 + t % period/2)
279	*/
280	void prop_fraction_percpu(struct prop_descriptor *pd,
281	struct prop_local_percpu *pl,
282	long numerator, long denominator)
283	{
284	struct prop_global *pg = prop_get_global(pd);
285	unsigned long period_2 = 1UL << (pg->shift - 1);
286	unsigned long counter_mask = period_2 - 1;
287	unsigned long global_count;
288
289	prop_norm_percpu(pg, pl);
290	*numerator = percpu_counter_read_positive(&pl->events);
291
292	global_count = percpu_counter_read(&pg->events);
293	*denominator = period_2 + (global_count & counter_mask);
294
295	prop_put_global(pd, pg);
296	}
297
298	/*
299	* SINGLE
300	*/
301
302	int prop_local_init_single(struct prop_local_single *pl)
303	{
304	spin_lock_init(&pl->lock);
305	pl->shift = 0;
306	pl->period = 0;
307	pl->events = 0;
308	return 0;
309	}
310
311	void prop_local_destroy_single(struct prop_local_single *pl)
312	{
313	}
314
315	/*
316	* Catch up with missed period expirations.
317	*/
318	static
319	void prop_norm_single(struct prop_global pg, struct prop_local_single pl)
320	{
321	unsigned long period = 1UL << (pg->shift - 1);
322	unsigned long period_mask = ~(period - 1);
323	unsigned long global_period;
324	unsigned long flags;
325
326	global_period = percpu_counter_read(&pg->events);
327	global_period &= period_mask;
328
329	/*
330	* Fast path - check if the local and global period count still match
331	* outside of the lock.
332	*/
333	if (pl->period == global_period)
334	return;
335
336	spin_lock_irqsave(&pl->lock, flags);
337	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
338	/*
339	* For each missed period, we half the local counter.
340	*/
341	period = (global_period - pl->period) >> (pg->shift - 1);
342	if (likely(period < BITS_PER_LONG))
343	pl->events >>= period;
344	else
345	pl->events = 0;
346	pl->period = global_period;
347	spin_unlock_irqrestore(&pl->lock, flags);
348	}
349
350	/*
351	* ++x_{j}, ++t
352	*/
353	void __prop_inc_single(struct prop_descriptor pd, struct prop_local_single pl)
354	{
355	struct prop_global *pg = prop_get_global(pd);
356
357	prop_norm_single(pg, pl);
358	pl->events++;
359	percpu_counter_add(&pg->events, 1);
360	prop_put_global(pd, pg);
361	}
362
363	/*
364	* Obtain a fraction of this proportion
365	*
366	* p_{j} = x_{j} / (period/2 + t % period/2)
367	*/
368	void prop_fraction_single(struct prop_descriptor *pd,
369	struct prop_local_single *pl,
370	long numerator, long denominator)
371	{
372	struct prop_global *pg = prop_get_global(pd);
373	unsigned long period_2 = 1UL << (pg->shift - 1);
374	unsigned long counter_mask = period_2 - 1;
375	unsigned long global_count;
376
377	prop_norm_single(pg, pl);
378	*numerator = pl->events;
379
380	global_count = percpu_counter_read(&pg->events);
381	*denominator = period_2 + (global_count & counter_mask);
382
383	prop_put_global(pd, pg);
384	}