[net-next-2.6.git] / lib / proportions.c

/*
 * Floating proportions
 *
 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 *
 * Description:
 *
 * The floating proportion is a time derivative with an exponentially decaying
 * history:
 *
 *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
 *
 * Where j is an element from {prop_local}, x_{j} is j's number of events,
 * and i the time period over which the differential is taken. So d/dt_{-i} is
 * the differential over the i-th last period.
 *
 * The decaying history gives smooth transitions. The time differential carries
 * the notion of speed.
 *
 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
 *
 *   \Sum_{i=0} 1/2^(1+i) = 1
 *
 * Further more, if we measure time (t) in the same events as x; so that:
 *
 *   t = \Sum_{j} x_{j}
 *
 * we get that:
 *
 *   \Sum_{j} p_{j} = 1
 *
 * Writing this in an iterative fashion we get (dropping the 'd's):
 *
 *   if (++x_{j}, ++t > period)
 *     t /= 2;
 *     for_each (j)
 *       x_{j} /= 2;
 *
 * so that:
 *
 *   p_{j} = x_{j} / t;
 *
 * We optimize away the '/= 2' for the global time delta by noting that:
 *
 *   if (++t > period) t /= 2:
 *
 * Can be approximated by:
 *
 *   period/2 + (++t % period/2)
 *
 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
 *   binary operations and wraparound artefacts disappear. ]
 *
 * Also note that this yields a natural counter of the elapsed periods:
 *
 *   c = t / (period/2)
 *
 * [ Its monotonic increasing property can be applied to mitigate the wrap-
 *   around issue. ]
 *
 * This allows us to do away with the loop over all prop_locals on each period
 * expiration. By remembering the period count under which it was last accessed
 * as c_{j}, we can obtain the number of 'missed' cycles from:
 *
 *   c - c_{j}
 *
 * We can then lazily catch up to the global period count every time we are
 * going to use x_{j}, by doing:
 *
 *   x_{j} /= 2^(c - c_{j}), c_{j} = c
 */

#include <linux/proportions.h>
#include <linux/rcupdate.h>

int prop_descriptor_init(struct prop_descriptor *pd, int shift)
{
	int err;

	if (shift > PROP_MAX_SHIFT)
		shift = PROP_MAX_SHIFT;

	pd->index = 0;
	pd->pg[0].shift = shift;
	mutex_init(&pd->mutex);
	err = percpu_counter_init_irq(&pd->pg[0].events, 0);
	if (err)
		goto out;

	err = percpu_counter_init_irq(&pd->pg[1].events, 0);
	if (err)
		percpu_counter_destroy(&pd->pg[0].events);

out:
	return err;
}

/*
 * We have two copies, and flip between them to make it seem like an atomic
 * update. The update is not really atomic wrt the events counter, but
 * it is internally consistent with the bit layout depending on shift.
 *
 * We copy the events count, move the bits around and flip the index.
 */
void prop_change_shift(struct prop_descriptor *pd, int shift)
{
	int index;
	int offset;
	u64 events;
	unsigned long flags;

	if (shift > PROP_MAX_SHIFT)
		shift = PROP_MAX_SHIFT;

	mutex_lock(&pd->mutex);

	index = pd->index ^ 1;
	offset = pd->pg[pd->index].shift - shift;
	if (!offset)
		goto out;

	pd->pg[index].shift = shift;

	local_irq_save(flags);
	events = percpu_counter_sum(&pd->pg[pd->index].events);
	if (offset < 0)
		events <<= -offset;
	else
		events >>= offset;
	percpu_counter_set(&pd->pg[index].events, events);

	/*
	 * ensure the new pg is fully written before the switch
	 */
	smp_wmb();
	pd->index = index;
	local_irq_restore(flags);

	synchronize_rcu();

out:
	mutex_unlock(&pd->mutex);
}

/*
 * wrap the access to the data in an rcu_read_lock() section;
 * this is used to track the active references.
 */
static struct prop_global *prop_get_global(struct prop_descriptor *pd)
{
	int index;

	rcu_read_lock();
	index = pd->index;
	/*
	 * match the wmb from vcd_flip()
	 */
	smp_rmb();
	return &pd->pg[index];
}

static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
{
	rcu_read_unlock();
}

static void
prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
{
	int offset = *pl_shift - new_shift;

	if (!offset)
		return;

	if (offset < 0)
		*pl_period <<= -offset;
	else
		*pl_period >>= offset;

	*pl_shift = new_shift;
}

/*
 * PERCPU
 */

#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))

int prop_local_init_percpu(struct prop_local_percpu *pl)
{
	spin_lock_init(&pl->lock);
	pl->shift = 0;
	pl->period = 0;
	return percpu_counter_init_irq(&pl->events, 0);
}

void prop_local_destroy_percpu(struct prop_local_percpu *pl)
{
	percpu_counter_destroy(&pl->events);
}

/*
 * Catch up with missed period expirations.
 *
 *   until (c_{j} == c)
 *     x_{j} -= x_{j}/2;
 *     c_{j}++;
 */
static
void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
{
	unsigned long period = 1UL << (pg->shift - 1);
	unsigned long period_mask = ~(period - 1);
	unsigned long global_period;
	unsigned long flags;

	global_period = percpu_counter_read(&pg->events);
	global_period &= period_mask;

	/*
	 * Fast path - check if the local and global period count still match
	 * outside of the lock.
	 */
	if (pl->period == global_period)
		return;

	spin_lock_irqsave(&pl->lock, flags);
	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);

	/*
	 * For each missed period, we half the local counter.
	 * basically:
	 *   pl->events >> (global_period - pl->period);
	 */
	period = (global_period - pl->period) >> (pg->shift - 1);
	if (period < BITS_PER_LONG) {
		s64 val = percpu_counter_read(&pl->events);

		if (val < (nr_cpu_ids * PROP_BATCH))
			val = percpu_counter_sum(&pl->events);

		__percpu_counter_add(&pl->events, -val + (val >> period),
					PROP_BATCH);
	} else
		percpu_counter_set(&pl->events, 0);

	pl->period = global_period;
	spin_unlock_irqrestore(&pl->lock, flags);
}

/*
 *   ++x_{j}, ++t
 */
void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
{
	struct prop_global *pg = prop_get_global(pd);

	prop_norm_percpu(pg, pl);
	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
	percpu_counter_add(&pg->events, 1);
	prop_put_global(pd, pg);
}

/*
 * identical to __prop_inc_percpu, except that it limits this pl's fraction to
 * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
 */
void __prop_inc_percpu_max(struct prop_descriptor *pd,
			   struct prop_local_percpu *pl, long frac)
{
	struct prop_global *pg = prop_get_global(pd);

	prop_norm_percpu(pg, pl);

	if (unlikely(frac != PROP_FRAC_BASE)) {
		unsigned long period_2 = 1UL << (pg->shift - 1);
		unsigned long counter_mask = period_2 - 1;
		unsigned long global_count;
		long numerator, denominator;

		numerator = percpu_counter_read_positive(&pl->events);
		global_count = percpu_counter_read(&pg->events);
		denominator = period_2 + (global_count & counter_mask);

		if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
			goto out_put;
	}

	percpu_counter_add(&pl->events, 1);
	percpu_counter_add(&pg->events, 1);

out_put:
	prop_put_global(pd, pg);
}

/*
 * Obtain a fraction of this proportion
 *
 *   p_{j} = x_{j} / (period/2 + t % period/2)
 */
void prop_fraction_percpu(struct prop_descriptor *pd,
		struct prop_local_percpu *pl,
		long *numerator, long *denominator)
{
	struct prop_global *pg = prop_get_global(pd);
	unsigned long period_2 = 1UL << (pg->shift - 1);
	unsigned long counter_mask = period_2 - 1;
	unsigned long global_count;

	prop_norm_percpu(pg, pl);
	*numerator = percpu_counter_read_positive(&pl->events);

	global_count = percpu_counter_read(&pg->events);
	*denominator = period_2 + (global_count & counter_mask);

	prop_put_global(pd, pg);
}

/*
 * SINGLE
 */

int prop_local_init_single(struct prop_local_single *pl)
{
	spin_lock_init(&pl->lock);
	pl->shift = 0;
	pl->period = 0;
	pl->events = 0;
	return 0;
}

void prop_local_destroy_single(struct prop_local_single *pl)
{
}

/*
 * Catch up with missed period expirations.
 */
static
void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
{
	unsigned long period = 1UL << (pg->shift - 1);
	unsigned long period_mask = ~(period - 1);
	unsigned long global_period;
	unsigned long flags;

	global_period = percpu_counter_read(&pg->events);
	global_period &= period_mask;

	/*
	 * Fast path - check if the local and global period count still match
	 * outside of the lock.
	 */
	if (pl->period == global_period)
		return;

	spin_lock_irqsave(&pl->lock, flags);
	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
	/*
	 * For each missed period, we half the local counter.
	 */
	period = (global_period - pl->period) >> (pg->shift - 1);
	if (likely(period < BITS_PER_LONG))
		pl->events >>= period;
	else
		pl->events = 0;
	pl->period = global_period;
	spin_unlock_irqrestore(&pl->lock, flags);
}

/*
 *   ++x_{j}, ++t
 */
void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
{
	struct prop_global *pg = prop_get_global(pd);

	prop_norm_single(pg, pl);
	pl->events++;
	percpu_counter_add(&pg->events, 1);
	prop_put_global(pd, pg);
}

/*
 * Obtain a fraction of this proportion
 *
 *   p_{j} = x_{j} / (period/2 + t % period/2)
 */
void prop_fraction_single(struct prop_descriptor *pd,
	       	struct prop_local_single *pl,
		long *numerator, long *denominator)
{
	struct prop_global *pg = prop_get_global(pd);
	unsigned long period_2 = 1UL << (pg->shift - 1);
	unsigned long counter_mask = period_2 - 1;
	unsigned long global_count;

	prop_norm_single(pg, pl);
	*numerator = pl->events;

	global_count = percpu_counter_read(&pg->events);
	*denominator = period_2 + (global_count & counter_mask);

	prop_put_global(pd, pg);
}
Commit	Line	Data
145ca25e PZ	1	/*
	2	* Floating proportions
	3	*
	4	* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
	5	*
	6	* Description:
	7	*
	8	* The floating proportion is a time derivative with an exponentially decaying
	9	* history:
	10	*
	11	* p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
	12	*
	13	* Where j is an element from {prop_local}, x_{j} is j's number of events,
	14	* and i the time period over which the differential is taken. So d/dt_{-i} is
	15	* the differential over the i-th last period.
	16	*
	17	* The decaying history gives smooth transitions. The time differential carries
	18	* the notion of speed.
	19	*
	20	* The denominator is 2^(1+i) because we want the series to be normalised, ie.
	21	*
	22	* \Sum_{i=0} 1/2^(1+i) = 1
	23	*
	24	* Further more, if we measure time (t) in the same events as x; so that:
	25	*
	26	* t = \Sum_{j} x_{j}
	27	*
	28	* we get that:
	29	*
	30	* \Sum_{j} p_{j} = 1
	31	*
	32	* Writing this in an iterative fashion we get (dropping the 'd's):
	33	*
	34	* if (++x_{j}, ++t > period)
	35	* t /= 2;
	36	* for_each (j)
	37	* x_{j} /= 2;
	38	*
	39	* so that:
	40	*
	41	* p_{j} = x_{j} / t;
	42	*
	43	* We optimize away the '/= 2' for the global time delta by noting that:
	44	*
	45	* if (++t > period) t /= 2:
	46	*
	47	* Can be approximated by:
	48	*
	49	* period/2 + (++t % period/2)
	50	*
	51	* [ Furthermore, when we choose period to be 2^n it can be written in terms of
	52	* binary operations and wraparound artefacts disappear. ]
	53	*
	54	* Also note that this yields a natural counter of the elapsed periods:
	55	*
	56	* c = t / (period/2)
	57	*
	58	* [ Its monotonic increasing property can be applied to mitigate the wrap-
	59	* around issue. ]
	60	*
	61	* This allows us to do away with the loop over all prop_locals on each period
	62	* expiration. By remembering the period count under which it was last accessed
	63	* as c_{j}, we can obtain the number of 'missed' cycles from:
	64	*
65	* c - c_{j}
66	*
67	* We can then lazily catch up to the global period count every time we are
68	* going to use x_{j}, by doing:
69	*
70	* x_{j} /= 2^(c - c_{j}), c_{j} = c
71	*/
72
73	#include <linux/proportions.h>
74	#include <linux/rcupdate.h>
75
145ca25e PZ	76	int prop_descriptor_init(struct prop_descriptor *pd, int shift)
	77	{
	78	int err;
	79
	80	if (shift > PROP_MAX_SHIFT)
	81	shift = PROP_MAX_SHIFT;
	82
	83	pd->index = 0;
	84	pd->pg[0].shift = shift;
	85	mutex_init(&pd->mutex);
	86	err = percpu_counter_init_irq(&pd->pg[0].events, 0);
	87	if (err)
	88	goto out;
	89
	90	err = percpu_counter_init_irq(&pd->pg[1].events, 0);
	91	if (err)
	92	percpu_counter_destroy(&pd->pg[0].events);
	93
	94	out:
	95	return err;
	96	}
	97
	98	/*
	99	* We have two copies, and flip between them to make it seem like an atomic
	100	* update. The update is not really atomic wrt the events counter, but
	101	* it is internally consistent with the bit layout depending on shift.
	102	*
	103	* We copy the events count, move the bits around and flip the index.
	104	*/
	105	void prop_change_shift(struct prop_descriptor *pd, int shift)
	106	{
	107	int index;
	108	int offset;
	109	u64 events;
	110	unsigned long flags;
	111
	112	if (shift > PROP_MAX_SHIFT)
	113	shift = PROP_MAX_SHIFT;
	114
	115	mutex_lock(&pd->mutex);
	116
	117	index = pd->index ^ 1;
	118	offset = pd->pg[pd->index].shift - shift;
	119	if (!offset)
	120	goto out;
	121
	122	pd->pg[index].shift = shift;
	123
	124	local_irq_save(flags);
	125	events = percpu_counter_sum(&pd->pg[pd->index].events);
	126	if (offset < 0)
	127	events <<= -offset;
	128	else
	129	events >>= offset;
	130	percpu_counter_set(&pd->pg[index].events, events);
	131
	132	/*
	133	* ensure the new pg is fully written before the switch
	134	*/
	135	smp_wmb();
	136	pd->index = index;
	137	local_irq_restore(flags);
	138
	139	synchronize_rcu();
140
141	out:
142	mutex_unlock(&pd->mutex);
143	}
144
145	/*
146	* wrap the access to the data in an rcu_read_lock() section;
147	* this is used to track the active references.
148	*/
149	static struct prop_global prop_get_global(struct prop_descriptor pd)
150	{
151	int index;
152
153	rcu_read_lock();
154	index = pd->index;
155	/*
156	* match the wmb from vcd_flip()
157	*/
158	smp_rmb();
159	return &pd->pg[index];
160	}
161
162	static void prop_put_global(struct prop_descriptor pd, struct prop_global pg)
163	{
164	rcu_read_unlock();
165	}
166
167	static void
168	prop_adjust_shift(int pl_shift, unsigned long pl_period, int new_shift)
169	{
170	int offset = *pl_shift - new_shift;
171
172	if (!offset)
173	return;
174
175	if (offset < 0)
176	*pl_period <<= -offset;
177	else
178	*pl_period >>= offset;
179
180	*pl_shift = new_shift;
181	}
182
183	/*
184	* PERCPU
185	*/
186
f16b34aa PZ	187	#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
f16b34aa PZ	188
145ca25e PZ	189	int prop_local_init_percpu(struct prop_local_percpu *pl)
	190	{
	191	spin_lock_init(&pl->lock);
	192	pl->shift = 0;
	193	pl->period = 0;
	194	return percpu_counter_init_irq(&pl->events, 0);
	195	}
	196
	197	void prop_local_destroy_percpu(struct prop_local_percpu *pl)
	198	{
	199	percpu_counter_destroy(&pl->events);
	200	}
	201
	202	/*
	203	* Catch up with missed period expirations.
	204	*
	205	* until (c_{j} == c)
	206	* x_{j} -= x_{j}/2;
	207	* c_{j}++;
	208	*/
	209	static
	210	void prop_norm_percpu(struct prop_global pg, struct prop_local_percpu pl)
	211	{
	212	unsigned long period = 1UL << (pg->shift - 1);
	213	unsigned long period_mask = ~(period - 1);
	214	unsigned long global_period;
	215	unsigned long flags;
	216
	217	global_period = percpu_counter_read(&pg->events);
	218	global_period &= period_mask;
	219
	220	/*
	221	* Fast path - check if the local and global period count still match
	222	* outside of the lock.
	223	*/
	224	if (pl->period == global_period)
	225	return;
	226
	227	spin_lock_irqsave(&pl->lock, flags);
	228	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
f16b34aa	229
145ca25e PZ	230	/*
	231	* For each missed period, we half the local counter.
	232	* basically:
	233	* pl->events >> (global_period - pl->period);
145ca25e	234	*/
f16b34aa PZ	235	period = (global_period - pl->period) >> (pg->shift - 1);
	236	if (period < BITS_PER_LONG) {
	237	s64 val = percpu_counter_read(&pl->events);
	238
	239	if (val < (nr_cpu_ids * PROP_BATCH))
	240	val = percpu_counter_sum(&pl->events);
	241
	242	__percpu_counter_add(&pl->events, -val + (val >> period),
	243	PROP_BATCH);
	244	} else
	245	percpu_counter_set(&pl->events, 0);
	246
145ca25e PZ	247	pl->period = global_period;
	248	spin_unlock_irqrestore(&pl->lock, flags);
	249	}
	250
	251	/*
	252	* ++x_{j}, ++t
	253	*/
	254	void __prop_inc_percpu(struct prop_descriptor pd, struct prop_local_percpu pl)
	255	{
	256	struct prop_global *pg = prop_get_global(pd);
	257
	258	prop_norm_percpu(pg, pl);
f16b34aa	259	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
145ca25e PZ	260	percpu_counter_add(&pg->events, 1);
	261	prop_put_global(pd, pg);
	262	}
	263
a42dde04 PZ	264	/*
	265	* identical to __prop_inc_percpu, except that it limits this pl's fraction to
	266	* @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
	267	*/
	268	void __prop_inc_percpu_max(struct prop_descriptor *pd,
	269	struct prop_local_percpu *pl, long frac)
	270	{
	271	struct prop_global *pg = prop_get_global(pd);
	272
	273	prop_norm_percpu(pg, pl);
	274
	275	if (unlikely(frac != PROP_FRAC_BASE)) {
	276	unsigned long period_2 = 1UL << (pg->shift - 1);
	277	unsigned long counter_mask = period_2 - 1;
	278	unsigned long global_count;
	279	long numerator, denominator;
	280
	281	numerator = percpu_counter_read_positive(&pl->events);
	282	global_count = percpu_counter_read(&pg->events);
	283	denominator = period_2 + (global_count & counter_mask);
	284
	285	if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
	286	goto out_put;
	287	}
	288
	289	percpu_counter_add(&pl->events, 1);
	290	percpu_counter_add(&pg->events, 1);
	291
	292	out_put:
	293	prop_put_global(pd, pg);
	294	}
	295
145ca25e PZ	296	/*
	297	* Obtain a fraction of this proportion
	298	*
	299	* p_{j} = x_{j} / (period/2 + t % period/2)
	300	*/
	301	void prop_fraction_percpu(struct prop_descriptor *pd,
	302	struct prop_local_percpu *pl,
	303	long numerator, long denominator)
	304	{
	305	struct prop_global *pg = prop_get_global(pd);
	306	unsigned long period_2 = 1UL << (pg->shift - 1);
	307	unsigned long counter_mask = period_2 - 1;
	308	unsigned long global_count;
	309
	310	prop_norm_percpu(pg, pl);
	311	*numerator = percpu_counter_read_positive(&pl->events);
	312
	313	global_count = percpu_counter_read(&pg->events);
	314	*denominator = period_2 + (global_count & counter_mask);
	315
	316	prop_put_global(pd, pg);
	317	}
	318
	319	/*
	320	* SINGLE
	321	*/
	322
	323	int prop_local_init_single(struct prop_local_single *pl)
	324	{
	325	spin_lock_init(&pl->lock);
	326	pl->shift = 0;
	327	pl->period = 0;
	328	pl->events = 0;
	329	return 0;
	330	}
	331
	332	void prop_local_destroy_single(struct prop_local_single *pl)
	333	{
	334	}
	335
	336	/*
	337	* Catch up with missed period expirations.
	338	*/
	339	static
	340	void prop_norm_single(struct prop_global pg, struct prop_local_single pl)
	341	{
	342	unsigned long period = 1UL << (pg->shift - 1);
	343	unsigned long period_mask = ~(period - 1);
	344	unsigned long global_period;
	345	unsigned long flags;
	346
	347	global_period = percpu_counter_read(&pg->events);
	348	global_period &= period_mask;
	349
	350	/*
	351	* Fast path - check if the local and global period count still match
	352	* outside of the lock.
	353	*/
	354	if (pl->period == global_period)
	355	return;
	356
	357	spin_lock_irqsave(&pl->lock, flags);
	358	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
	359	/*
360	* For each missed period, we half the local counter.
361	*/
362	period = (global_period - pl->period) >> (pg->shift - 1);
363	if (likely(period < BITS_PER_LONG))
364	pl->events >>= period;
365	else
366	pl->events = 0;
367	pl->period = global_period;
368	spin_unlock_irqrestore(&pl->lock, flags);
369	}
370
371	/*
372	* ++x_{j}, ++t
373	*/
374	void __prop_inc_single(struct prop_descriptor pd, struct prop_local_single pl)
375	{
376	struct prop_global *pg = prop_get_global(pd);
377
378	prop_norm_single(pg, pl);
379	pl->events++;
380	percpu_counter_add(&pg->events, 1);
381	prop_put_global(pd, pg);
382	}
383
384	/*
385	* Obtain a fraction of this proportion
386	*
387	* p_{j} = x_{j} / (period/2 + t % period/2)
388	*/
389	void prop_fraction_single(struct prop_descriptor *pd,
390	struct prop_local_single *pl,
391	long numerator, long denominator)
392	{
393	struct prop_global *pg = prop_get_global(pd);
394	unsigned long period_2 = 1UL << (pg->shift - 1);
395	unsigned long counter_mask = period_2 - 1;
396	unsigned long global_count;
397
398	prop_norm_single(pg, pl);
399	*numerator = pl->events;
400
401	global_count = percpu_counter_read(&pg->events);
402	*denominator = period_2 + (global_count & counter_mask);
403
404	prop_put_global(pd, pg);
405	}