[net-next-2.6.git] / lib / proportions.c

/*
 * Floating proportions
 *
 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 *
 * Description:
 *
 * The floating proportion is a time derivative with an exponentially decaying
 * history:
 *
 *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
 *
 * Where j is an element from {prop_local}, x_{j} is j's number of events,
 * and i the time period over which the differential is taken. So d/dt_{-i} is
 * the differential over the i-th last period.
 *
 * The decaying history gives smooth transitions. The time differential carries
 * the notion of speed.
 *
 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
 *
 *   \Sum_{i=0} 1/2^(1+i) = 1
 *
 * Further more, if we measure time (t) in the same events as x; so that:
 *
 *   t = \Sum_{j} x_{j}
 *
 * we get that:
 *
 *   \Sum_{j} p_{j} = 1
 *
 * Writing this in an iterative fashion we get (dropping the 'd's):
 *
 *   if (++x_{j}, ++t > period)
 *     t /= 2;
 *     for_each (j)
 *       x_{j} /= 2;
 *
 * so that:
 *
 *   p_{j} = x_{j} / t;
 *
 * We optimize away the '/= 2' for the global time delta by noting that:
 *
 *   if (++t > period) t /= 2:
 *
 * Can be approximated by:
 *
 *   period/2 + (++t % period/2)
 *
 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
 *   binary operations and wraparound artefacts disappear. ]
 *
 * Also note that this yields a natural counter of the elapsed periods:
 *
 *   c = t / (period/2)
 *
 * [ Its monotonic increasing property can be applied to mitigate the wrap-
 *   around issue. ]
 *
 * This allows us to do away with the loop over all prop_locals on each period
 * expiration. By remembering the period count under which it was last accessed
 * as c_{j}, we can obtain the number of 'missed' cycles from:
 *
 *   c - c_{j}
 *
 * We can then lazily catch up to the global period count every time we are
 * going to use x_{j}, by doing:
 *
 *   x_{j} /= 2^(c - c_{j}), c_{j} = c
 */

#include <linux/proportions.h>
#include <linux/rcupdate.h>

/*
 * Limit the time part in order to ensure there are some bits left for the
 * cycle counter.
 */
#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)

int prop_descriptor_init(struct prop_descriptor *pd, int shift)
{
	int err;

	if (shift > PROP_MAX_SHIFT)
		shift = PROP_MAX_SHIFT;

	pd->index = 0;
	pd->pg[0].shift = shift;
	mutex_init(&pd->mutex);
	err = percpu_counter_init_irq(&pd->pg[0].events, 0);
	if (err)
		goto out;

	err = percpu_counter_init_irq(&pd->pg[1].events, 0);
	if (err)
		percpu_counter_destroy(&pd->pg[0].events);

out:
	return err;
}

/*
 * We have two copies, and flip between them to make it seem like an atomic
 * update. The update is not really atomic wrt the events counter, but
 * it is internally consistent with the bit layout depending on shift.
 *
 * We copy the events count, move the bits around and flip the index.
 */
void prop_change_shift(struct prop_descriptor *pd, int shift)
{
	int index;
	int offset;
	u64 events;
	unsigned long flags;

	if (shift > PROP_MAX_SHIFT)
		shift = PROP_MAX_SHIFT;

	mutex_lock(&pd->mutex);

	index = pd->index ^ 1;
	offset = pd->pg[pd->index].shift - shift;
	if (!offset)
		goto out;

	pd->pg[index].shift = shift;

	local_irq_save(flags);
	events = percpu_counter_sum(&pd->pg[pd->index].events);
	if (offset < 0)
		events <<= -offset;
	else
		events >>= offset;
	percpu_counter_set(&pd->pg[index].events, events);

	/*
	 * ensure the new pg is fully written before the switch
	 */
	smp_wmb();
	pd->index = index;
	local_irq_restore(flags);

	synchronize_rcu();

out:
	mutex_unlock(&pd->mutex);
}

/*
 * wrap the access to the data in an rcu_read_lock() section;
 * this is used to track the active references.
 */
static struct prop_global *prop_get_global(struct prop_descriptor *pd)
{
	int index;

	rcu_read_lock();
	index = pd->index;
	/*
	 * match the wmb from vcd_flip()
	 */
	smp_rmb();
	return &pd->pg[index];
}

static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
{
	rcu_read_unlock();
}

static void
prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
{
	int offset = *pl_shift - new_shift;

	if (!offset)
		return;

	if (offset < 0)
		*pl_period <<= -offset;
	else
		*pl_period >>= offset;

	*pl_shift = new_shift;
}

/*
 * PERCPU
 */

#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))

int prop_local_init_percpu(struct prop_local_percpu *pl)
{
	spin_lock_init(&pl->lock);
	pl->shift = 0;
	pl->period = 0;
	return percpu_counter_init_irq(&pl->events, 0);
}

void prop_local_destroy_percpu(struct prop_local_percpu *pl)
{
	percpu_counter_destroy(&pl->events);
}

/*
 * Catch up with missed period expirations.
 *
 *   until (c_{j} == c)
 *     x_{j} -= x_{j}/2;
 *     c_{j}++;
 */
static
void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
{
	unsigned long period = 1UL << (pg->shift - 1);
	unsigned long period_mask = ~(period - 1);
	unsigned long global_period;
	unsigned long flags;

	global_period = percpu_counter_read(&pg->events);
	global_period &= period_mask;

	/*
	 * Fast path - check if the local and global period count still match
	 * outside of the lock.
	 */
	if (pl->period == global_period)
		return;

	spin_lock_irqsave(&pl->lock, flags);
	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);

	/*
	 * For each missed period, we half the local counter.
	 * basically:
	 *   pl->events >> (global_period - pl->period);
	 */
	period = (global_period - pl->period) >> (pg->shift - 1);
	if (period < BITS_PER_LONG) {
		s64 val = percpu_counter_read(&pl->events);

		if (val < (nr_cpu_ids * PROP_BATCH))
			val = percpu_counter_sum(&pl->events);

		__percpu_counter_add(&pl->events, -val + (val >> period),
					PROP_BATCH);
	} else
		percpu_counter_set(&pl->events, 0);

	pl->period = global_period;
	spin_unlock_irqrestore(&pl->lock, flags);
}

/*
 *   ++x_{j}, ++t
 */
void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
{
	struct prop_global *pg = prop_get_global(pd);

	prop_norm_percpu(pg, pl);
	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
	percpu_counter_add(&pg->events, 1);
	prop_put_global(pd, pg);
}

/*
 * Obtain a fraction of this proportion
 *
 *   p_{j} = x_{j} / (period/2 + t % period/2)
 */
void prop_fraction_percpu(struct prop_descriptor *pd,
		struct prop_local_percpu *pl,
		long *numerator, long *denominator)
{
	struct prop_global *pg = prop_get_global(pd);
	unsigned long period_2 = 1UL << (pg->shift - 1);
	unsigned long counter_mask = period_2 - 1;
	unsigned long global_count;

	prop_norm_percpu(pg, pl);
	*numerator = percpu_counter_read_positive(&pl->events);

	global_count = percpu_counter_read(&pg->events);
	*denominator = period_2 + (global_count & counter_mask);

	prop_put_global(pd, pg);
}

/*
 * SINGLE
 */

int prop_local_init_single(struct prop_local_single *pl)
{
	spin_lock_init(&pl->lock);
	pl->shift = 0;
	pl->period = 0;
	pl->events = 0;
	return 0;
}

void prop_local_destroy_single(struct prop_local_single *pl)
{
}

/*
 * Catch up with missed period expirations.
 */
static
void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
{
	unsigned long period = 1UL << (pg->shift - 1);
	unsigned long period_mask = ~(period - 1);
	unsigned long global_period;
	unsigned long flags;

	global_period = percpu_counter_read(&pg->events);
	global_period &= period_mask;

	/*
	 * Fast path - check if the local and global period count still match
	 * outside of the lock.
	 */
	if (pl->period == global_period)
		return;

	spin_lock_irqsave(&pl->lock, flags);
	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
	/*
	 * For each missed period, we half the local counter.
	 */
	period = (global_period - pl->period) >> (pg->shift - 1);
	if (likely(period < BITS_PER_LONG))
		pl->events >>= period;
	else
		pl->events = 0;
	pl->period = global_period;
	spin_unlock_irqrestore(&pl->lock, flags);
}

/*
 *   ++x_{j}, ++t
 */
void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
{
	struct prop_global *pg = prop_get_global(pd);

	prop_norm_single(pg, pl);
	pl->events++;
	percpu_counter_add(&pg->events, 1);
	prop_put_global(pd, pg);
}

/*
 * Obtain a fraction of this proportion
 *
 *   p_{j} = x_{j} / (period/2 + t % period/2)
 */
void prop_fraction_single(struct prop_descriptor *pd,
	       	struct prop_local_single *pl,
		long *numerator, long *denominator)
{
	struct prop_global *pg = prop_get_global(pd);
	unsigned long period_2 = 1UL << (pg->shift - 1);
	unsigned long counter_mask = period_2 - 1;
	unsigned long global_count;

	prop_norm_single(pg, pl);
	*numerator = pl->events;

	global_count = percpu_counter_read(&pg->events);
	*denominator = period_2 + (global_count & counter_mask);

	prop_put_global(pd, pg);
}
Commit	Line	Data
145ca25e PZ	1	/*
	2	* Floating proportions
	3	*
	4	* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
	5	*
	6	* Description:
	7	*
	8	* The floating proportion is a time derivative with an exponentially decaying
	9	* history:
	10	*
	11	* p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
	12	*
	13	* Where j is an element from {prop_local}, x_{j} is j's number of events,
	14	* and i the time period over which the differential is taken. So d/dt_{-i} is
	15	* the differential over the i-th last period.
	16	*
	17	* The decaying history gives smooth transitions. The time differential carries
	18	* the notion of speed.
	19	*
	20	* The denominator is 2^(1+i) because we want the series to be normalised, ie.
	21	*
	22	* \Sum_{i=0} 1/2^(1+i) = 1
	23	*
	24	* Further more, if we measure time (t) in the same events as x; so that:
	25	*
	26	* t = \Sum_{j} x_{j}
	27	*
	28	* we get that:
	29	*
	30	* \Sum_{j} p_{j} = 1
	31	*
	32	* Writing this in an iterative fashion we get (dropping the 'd's):
	33	*
	34	* if (++x_{j}, ++t > period)
	35	* t /= 2;
	36	* for_each (j)
	37	* x_{j} /= 2;
	38	*
	39	* so that:
	40	*
	41	* p_{j} = x_{j} / t;
	42	*
	43	* We optimize away the '/= 2' for the global time delta by noting that:
	44	*
	45	* if (++t > period) t /= 2:
	46	*
	47	* Can be approximated by:
	48	*
	49	* period/2 + (++t % period/2)
	50	*
	51	* [ Furthermore, when we choose period to be 2^n it can be written in terms of
	52	* binary operations and wraparound artefacts disappear. ]
	53	*
	54	* Also note that this yields a natural counter of the elapsed periods:
	55	*
	56	* c = t / (period/2)
	57	*
	58	* [ Its monotonic increasing property can be applied to mitigate the wrap-
	59	* around issue. ]
	60	*
	61	* This allows us to do away with the loop over all prop_locals on each period
	62	* expiration. By remembering the period count under which it was last accessed
	63	* as c_{j}, we can obtain the number of 'missed' cycles from:
	64	*
65	* c - c_{j}
66	*
67	* We can then lazily catch up to the global period count every time we are
68	* going to use x_{j}, by doing:
69	*
70	* x_{j} /= 2^(c - c_{j}), c_{j} = c
71	*/
72
73	#include <linux/proportions.h>
74	#include <linux/rcupdate.h>
75
76	/*
77	* Limit the time part in order to ensure there are some bits left for the
78	* cycle counter.
79	*/
80	#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
81
82	int prop_descriptor_init(struct prop_descriptor *pd, int shift)
83	{
84	int err;
85
86	if (shift > PROP_MAX_SHIFT)
87	shift = PROP_MAX_SHIFT;
88
89	pd->index = 0;
90	pd->pg[0].shift = shift;
91	mutex_init(&pd->mutex);
92	err = percpu_counter_init_irq(&pd->pg[0].events, 0);
93	if (err)
94	goto out;
95
96	err = percpu_counter_init_irq(&pd->pg[1].events, 0);
97	if (err)
98	percpu_counter_destroy(&pd->pg[0].events);
99
100	out:
101	return err;
102	}
103
104	/*
105	* We have two copies, and flip between them to make it seem like an atomic
106	* update. The update is not really atomic wrt the events counter, but
107	* it is internally consistent with the bit layout depending on shift.
108	*
109	* We copy the events count, move the bits around and flip the index.
110	*/
111	void prop_change_shift(struct prop_descriptor *pd, int shift)
112	{
113	int index;
114	int offset;
115	u64 events;
116	unsigned long flags;
117
118	if (shift > PROP_MAX_SHIFT)
119	shift = PROP_MAX_SHIFT;
120
121	mutex_lock(&pd->mutex);
122
123	index = pd->index ^ 1;
124	offset = pd->pg[pd->index].shift - shift;
125	if (!offset)
126	goto out;
127
128	pd->pg[index].shift = shift;
129
130	local_irq_save(flags);
131	events = percpu_counter_sum(&pd->pg[pd->index].events);
132	if (offset < 0)
133	events <<= -offset;
134	else
135	events >>= offset;
136	percpu_counter_set(&pd->pg[index].events, events);
137
138	/*
139	* ensure the new pg is fully written before the switch
140	*/
141	smp_wmb();
142	pd->index = index;
143	local_irq_restore(flags);
144
145	synchronize_rcu();
146
147	out:
148	mutex_unlock(&pd->mutex);
149	}
150
151	/*
152	* wrap the access to the data in an rcu_read_lock() section;
153	* this is used to track the active references.
154	*/
155	static struct prop_global prop_get_global(struct prop_descriptor pd)
156	{
157	int index;
158
159	rcu_read_lock();
160	index = pd->index;
161	/*
162	* match the wmb from vcd_flip()
163	*/
164	smp_rmb();
165	return &pd->pg[index];
166	}
167
168	static void prop_put_global(struct prop_descriptor pd, struct prop_global pg)
169	{
170	rcu_read_unlock();
171	}
172
173	static void
174	prop_adjust_shift(int pl_shift, unsigned long pl_period, int new_shift)
175	{
176	int offset = *pl_shift - new_shift;
177
178	if (!offset)
179	return;
180
181	if (offset < 0)
182	*pl_period <<= -offset;
183	else
184	*pl_period >>= offset;
185
186	*pl_shift = new_shift;
187	}
188
189	/*
190	* PERCPU
191	*/
192
f16b34aa PZ	193	#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
f16b34aa PZ	194
145ca25e PZ	195	int prop_local_init_percpu(struct prop_local_percpu *pl)
	196	{
	197	spin_lock_init(&pl->lock);
	198	pl->shift = 0;
	199	pl->period = 0;
	200	return percpu_counter_init_irq(&pl->events, 0);
	201	}
	202
	203	void prop_local_destroy_percpu(struct prop_local_percpu *pl)
	204	{
	205	percpu_counter_destroy(&pl->events);
	206	}
	207
	208	/*
	209	* Catch up with missed period expirations.
	210	*
	211	* until (c_{j} == c)
	212	* x_{j} -= x_{j}/2;
	213	* c_{j}++;
	214	*/
	215	static
	216	void prop_norm_percpu(struct prop_global pg, struct prop_local_percpu pl)
	217	{
	218	unsigned long period = 1UL << (pg->shift - 1);
	219	unsigned long period_mask = ~(period - 1);
	220	unsigned long global_period;
	221	unsigned long flags;
	222
	223	global_period = percpu_counter_read(&pg->events);
	224	global_period &= period_mask;
	225
	226	/*
	227	* Fast path - check if the local and global period count still match
	228	* outside of the lock.
	229	*/
	230	if (pl->period == global_period)
	231	return;
	232
	233	spin_lock_irqsave(&pl->lock, flags);
	234	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
f16b34aa	235
145ca25e PZ	236	/*
	237	* For each missed period, we half the local counter.
	238	* basically:
	239	* pl->events >> (global_period - pl->period);
145ca25e	240	*/
f16b34aa PZ	241	period = (global_period - pl->period) >> (pg->shift - 1);
	242	if (period < BITS_PER_LONG) {
	243	s64 val = percpu_counter_read(&pl->events);
	244
	245	if (val < (nr_cpu_ids * PROP_BATCH))
	246	val = percpu_counter_sum(&pl->events);
	247
	248	__percpu_counter_add(&pl->events, -val + (val >> period),
	249	PROP_BATCH);
	250	} else
	251	percpu_counter_set(&pl->events, 0);
	252
145ca25e PZ	253	pl->period = global_period;
	254	spin_unlock_irqrestore(&pl->lock, flags);
	255	}
	256
	257	/*
	258	* ++x_{j}, ++t
	259	*/
	260	void __prop_inc_percpu(struct prop_descriptor pd, struct prop_local_percpu pl)
	261	{
	262	struct prop_global *pg = prop_get_global(pd);
	263
	264	prop_norm_percpu(pg, pl);
f16b34aa	265	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
145ca25e PZ	266	percpu_counter_add(&pg->events, 1);
	267	prop_put_global(pd, pg);
	268	}
	269
	270	/*
	271	* Obtain a fraction of this proportion
	272	*
	273	* p_{j} = x_{j} / (period/2 + t % period/2)
	274	*/
	275	void prop_fraction_percpu(struct prop_descriptor *pd,
	276	struct prop_local_percpu *pl,
	277	long numerator, long denominator)
	278	{
	279	struct prop_global *pg = prop_get_global(pd);
	280	unsigned long period_2 = 1UL << (pg->shift - 1);
	281	unsigned long counter_mask = period_2 - 1;
	282	unsigned long global_count;
	283
	284	prop_norm_percpu(pg, pl);
	285	*numerator = percpu_counter_read_positive(&pl->events);
	286
	287	global_count = percpu_counter_read(&pg->events);
	288	*denominator = period_2 + (global_count & counter_mask);
	289
	290	prop_put_global(pd, pg);
	291	}
	292
	293	/*
	294	* SINGLE
	295	*/
	296
	297	int prop_local_init_single(struct prop_local_single *pl)
	298	{
	299	spin_lock_init(&pl->lock);
	300	pl->shift = 0;
	301	pl->period = 0;
	302	pl->events = 0;
	303	return 0;
	304	}
	305
	306	void prop_local_destroy_single(struct prop_local_single *pl)
	307	{
	308	}
	309
	310	/*
	311	* Catch up with missed period expirations.
	312	*/
	313	static
	314	void prop_norm_single(struct prop_global pg, struct prop_local_single pl)
	315	{
	316	unsigned long period = 1UL << (pg->shift - 1);
	317	unsigned long period_mask = ~(period - 1);
	318	unsigned long global_period;
	319	unsigned long flags;
	320
	321	global_period = percpu_counter_read(&pg->events);
	322	global_period &= period_mask;
	323
	324	/*
	325	* Fast path - check if the local and global period count still match
	326	* outside of the lock.
	327	*/
	328	if (pl->period == global_period)
	329	return;
330
331	spin_lock_irqsave(&pl->lock, flags);
332	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
333	/*
334	* For each missed period, we half the local counter.
335	*/
336	period = (global_period - pl->period) >> (pg->shift - 1);
337	if (likely(period < BITS_PER_LONG))
338	pl->events >>= period;
339	else
340	pl->events = 0;
341	pl->period = global_period;
342	spin_unlock_irqrestore(&pl->lock, flags);
343	}
344
345	/*
346	* ++x_{j}, ++t
347	*/
348	void __prop_inc_single(struct prop_descriptor pd, struct prop_local_single pl)
349	{
350	struct prop_global *pg = prop_get_global(pd);
351
352	prop_norm_single(pg, pl);
353	pl->events++;
354	percpu_counter_add(&pg->events, 1);
355	prop_put_global(pd, pg);
356	}
357
358	/*
359	* Obtain a fraction of this proportion
360	*
361	* p_{j} = x_{j} / (period/2 + t % period/2)
362	*/
363	void prop_fraction_single(struct prop_descriptor *pd,
364	struct prop_local_single *pl,
365	long numerator, long denominator)
366	{
367	struct prop_global *pg = prop_get_global(pd);
368	unsigned long period_2 = 1UL << (pg->shift - 1);
369	unsigned long counter_mask = period_2 - 1;
370	unsigned long global_count;
371
372	prop_norm_single(pg, pl);
373	*numerator = pl->events;
374
375	global_count = percpu_counter_read(&pg->events);
376	*denominator = period_2 + (global_count & counter_mask);
377
378	prop_put_global(pd, pg);
379	}