[net-next-2.6.git] / net / sched / sch_tbf.c

/*
 * net/sched/sch_tbf.c	Token Bucket Filter queue.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
 *						 original idea by Martin Devera
 *
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>


/*	Simple Token Bucket Filter.
	=======================================

	SOURCE.
	-------

	None.

	Description.
	------------

	A data flow obeys TBF with rate R and depth B, if for any
	time interval t_i...t_f the number of transmitted bits
	does not exceed B + R*(t_f-t_i).

	Packetized version of this definition:
	The sequence of packets of sizes s_i served at moments t_i
	obeys TBF, if for any i<=k:

	s_i+....+s_k <= B + R*(t_k - t_i)

	Algorithm.
	----------

	Let N(t_i) be B/R initially and N(t) grow continuously with time as:

	N(t+delta) = min{B/R, N(t) + delta}

	If the first packet in queue has length S, it may be
	transmitted only at the time t_* when S/R <= N(t_*),
	and in this case N(t) jumps:

	N(t_* + 0) = N(t_* - 0) - S/R.


	Actually, QoS requires two TBF to be applied to a data stream.
	One of them controls steady state burst size, another
	one with rate P (peak rate) and depth M (equal to link MTU)
	limits bursts at a smaller time scale.

	It is easy to see that P>R, and B>M. If P is infinity, this double
	TBF is equivalent to a single one.

	When TBF works in reshaping mode, latency is estimated as:

	lat = max ((L-B)/R, (L-M)/P)


	NOTES.
	------

	If TBF throttles, it starts a watchdog timer, which will wake it up
	when it is ready to transmit.
	Note that the minimal timer resolution is 1/HZ.
	If no new packets arrive during this period,
	or if the device is not awaken by EOI for some previous packet,
	TBF can stop its activity for 1/HZ.


	This means, that with depth B, the maximal rate is

	R_crit = B*HZ

	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.

	Note that the peak rate TBF is much more tough: with MTU 1500
	P_crit = 150Kbytes/sec. So, if you need greater peak
	rates, use alpha with HZ=1000 :-)

	With classful TBF, limit is just kept for backwards compatibility.
	It is passed to the default bfifo qdisc - if the inner qdisc is
	changed the limit is not effective anymore.
*/

struct tbf_sched_data
{
/* Parameters */
	u32		limit;		/* Maximal length of backlog: bytes */
	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
	u32		mtu;
	u32		max_size;
	struct qdisc_rate_table	*R_tab;
	struct qdisc_rate_table	*P_tab;

/* Variables */
	long	tokens;			/* Current number of B tokens */
	long	ptokens;		/* Current number of P tokens */
	psched_time_t	t_c;		/* Time check-point */
	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
	struct qdisc_watchdog watchdog;	/* Watchdog timer */
};

#define L2T(q,L)   qdisc_l2t((q)->R_tab,L)
#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)

static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	int ret;

	if (qdisc_pkt_len(skb) > q->max_size)
		return qdisc_reshape_fail(skb, sch);

	ret = qdisc_enqueue(skb, q->qdisc);
	if (ret != NET_XMIT_SUCCESS) {
		if (net_xmit_drop_count(ret))
			sch->qstats.drops++;
		return ret;
	}

	sch->q.qlen++;
	sch->bstats.bytes += qdisc_pkt_len(skb);
	sch->bstats.packets++;
	return NET_XMIT_SUCCESS;
}

static unsigned int tbf_drop(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	unsigned int len = 0;

	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
		sch->q.qlen--;
		sch->qstats.drops++;
	}
	return len;
}

static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct sk_buff *skb;

	skb = q->qdisc->ops->peek(q->qdisc);

	if (skb) {
		psched_time_t now;
		long toks;
		long ptoks = 0;
		unsigned int len = qdisc_pkt_len(skb);

		now = psched_get_time();
		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);

		if (q->P_tab) {
			ptoks = toks + q->ptokens;
			if (ptoks > (long)q->mtu)
				ptoks = q->mtu;
			ptoks -= L2T_P(q, len);
		}
		toks += q->tokens;
		if (toks > (long)q->buffer)
			toks = q->buffer;
		toks -= L2T(q, len);

		if ((toks|ptoks) >= 0) {
			skb = qdisc_dequeue_peeked(q->qdisc);
			if (unlikely(!skb))
				return NULL;

			q->t_c = now;
			q->tokens = toks;
			q->ptokens = ptoks;
			sch->q.qlen--;
			sch->flags &= ~TCQ_F_THROTTLED;
			return skb;
		}

		qdisc_watchdog_schedule(&q->watchdog,
					now + max_t(long, -toks, -ptoks));

		/* Maybe we have a shorter packet in the queue,
		   which can be sent now. It sounds cool,
		   but, however, this is wrong in principle.
		   We MUST NOT reorder packets under these circumstances.

		   Really, if we split the flow into independent
		   subflows, it would be a very good solution.
		   This is the main idea of all FQ algorithms
		   (cf. CSZ, HPFQ, HFSC)
		 */

		sch->qstats.overlimits++;
	}
	return NULL;
}

static void tbf_reset(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	qdisc_reset(q->qdisc);
	sch->q.qlen = 0;
	q->t_c = psched_get_time();
	q->tokens = q->buffer;
	q->ptokens = q->mtu;
	qdisc_watchdog_cancel(&q->watchdog);
}

static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
};

static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
{
	int err;
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct nlattr *tb[TCA_TBF_PTAB + 1];
	struct tc_tbf_qopt *qopt;
	struct qdisc_rate_table *rtab = NULL;
	struct qdisc_rate_table *ptab = NULL;
	struct Qdisc *child = NULL;
	int max_size,n;

	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
	if (err < 0)
		return err;

	err = -EINVAL;
	if (tb[TCA_TBF_PARMS] == NULL)
		goto done;

	qopt = nla_data(tb[TCA_TBF_PARMS]);
	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
	if (rtab == NULL)
		goto done;

	if (qopt->peakrate.rate) {
		if (qopt->peakrate.rate > qopt->rate.rate)
			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
		if (ptab == NULL)
			goto done;
	}

	for (n = 0; n < 256; n++)
		if (rtab->data[n] > qopt->buffer) break;
	max_size = (n << qopt->rate.cell_log)-1;
	if (ptab) {
		int size;

		for (n = 0; n < 256; n++)
			if (ptab->data[n] > qopt->mtu) break;
		size = (n << qopt->peakrate.cell_log)-1;
		if (size < max_size) max_size = size;
	}
	if (max_size < 0)
		goto done;

	if (q->qdisc != &noop_qdisc) {
		err = fifo_set_limit(q->qdisc, qopt->limit);
		if (err)
			goto done;
	} else if (qopt->limit > 0) {
		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
		if (IS_ERR(child)) {
			err = PTR_ERR(child);
			goto done;
		}
	}

	sch_tree_lock(sch);
	if (child) {
		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
		qdisc_destroy(q->qdisc);
		q->qdisc = child;
	}
	q->limit = qopt->limit;
	q->mtu = qopt->mtu;
	q->max_size = max_size;
	q->buffer = qopt->buffer;
	q->tokens = q->buffer;
	q->ptokens = q->mtu;

	swap(q->R_tab, rtab);
	swap(q->P_tab, ptab);

	sch_tree_unlock(sch);
	err = 0;
done:
	if (rtab)
		qdisc_put_rtab(rtab);
	if (ptab)
		qdisc_put_rtab(ptab);
	return err;
}

static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (opt == NULL)
		return -EINVAL;

	q->t_c = psched_get_time();
	qdisc_watchdog_init(&q->watchdog, sch);
	q->qdisc = &noop_qdisc;

	return tbf_change(sch, opt);
}

static void tbf_destroy(struct Qdisc *sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	qdisc_watchdog_cancel(&q->watchdog);

	if (q->P_tab)
		qdisc_put_rtab(q->P_tab);
	if (q->R_tab)
		qdisc_put_rtab(q->R_tab);

	qdisc_destroy(q->qdisc);
}

static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct nlattr *nest;
	struct tc_tbf_qopt opt;

	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;

	opt.limit = q->limit;
	opt.rate = q->R_tab->rate;
	if (q->P_tab)
		opt.peakrate = q->P_tab->rate;
	else
		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
	opt.mtu = q->mtu;
	opt.buffer = q->buffer;
	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);

	nla_nest_end(skb, nest);
	return skb->len;

nla_put_failure:
	nla_nest_cancel(skb, nest);
	return -1;
}

static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
			  struct sk_buff *skb, struct tcmsg *tcm)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	tcm->tcm_handle |= TC_H_MIN(1);
	tcm->tcm_info = q->qdisc->handle;

	return 0;
}

static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
		     struct Qdisc **old)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (new == NULL)
		new = &noop_qdisc;

	sch_tree_lock(sch);
	*old = q->qdisc;
	q->qdisc = new;
	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
	qdisc_reset(*old);
	sch_tree_unlock(sch);

	return 0;
}

static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	return q->qdisc;
}

static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
{
	return 1;
}

static void tbf_put(struct Qdisc *sch, unsigned long arg)
{
}

static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
	if (!walker->stop) {
		if (walker->count >= walker->skip)
			if (walker->fn(sch, 1, walker) < 0) {
				walker->stop = 1;
				return;
			}
		walker->count++;
	}
}

static const struct Qdisc_class_ops tbf_class_ops =
{
	.graft		=	tbf_graft,
	.leaf		=	tbf_leaf,
	.get		=	tbf_get,
	.put		=	tbf_put,
	.walk		=	tbf_walk,
	.dump		=	tbf_dump_class,
};

static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
	.next		=	NULL,
	.cl_ops		=	&tbf_class_ops,
	.id		=	"tbf",
	.priv_size	=	sizeof(struct tbf_sched_data),
	.enqueue	=	tbf_enqueue,
	.dequeue	=	tbf_dequeue,
	.peek		=	qdisc_peek_dequeued,
	.drop		=	tbf_drop,
	.init		=	tbf_init,
	.reset		=	tbf_reset,
	.destroy	=	tbf_destroy,
	.change		=	tbf_change,
	.dump		=	tbf_dump,
	.owner		=	THIS_MODULE,
};

static int __init tbf_module_init(void)
{
	return register_qdisc(&tbf_qdisc_ops);
}

static void __exit tbf_module_exit(void)
{
	unregister_qdisc(&tbf_qdisc_ops);
}
module_init(tbf_module_init)
module_exit(tbf_module_exit)
MODULE_LICENSE("GPL");
Commit	Line	Data
1da177e4 LT	1	/*
	2	* net/sched/sch_tbf.c Token Bucket Filter queue.
	3	*
	4	* This program is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU General Public License
	6	* as published by the Free Software Foundation; either version
	7	* 2 of the License, or (at your option) any later version.
	8	*
	9	* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
	10	* Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
	11	* original idea by Martin Devera
	12	*
	13	*/
	14
1da177e4	15	#include <linux/module.h>
1da177e4 LT	16	#include <linux/types.h>
1da177e4 LT	17	#include <linux/kernel.h>
1da177e4	18	#include <linux/string.h>
1da177e4	19	#include <linux/errno.h>
1da177e4	20	#include <linux/skbuff.h>
0ba48053	21	#include <net/netlink.h>
1da177e4 LT	22	#include <net/pkt_sched.h>
	23
	24
	25	/* Simple Token Bucket Filter.
	26	=======================================
	27
	28	SOURCE.
	29	-------
	30
	31	None.
	32
	33	Description.
	34	------------
	35
	36	A data flow obeys TBF with rate R and depth B, if for any
	37	time interval t_i...t_f the number of transmitted bits
	38	does not exceed B + R*(t_f-t_i).
	39
	40	Packetized version of this definition:
	41	The sequence of packets of sizes s_i served at moments t_i
	42	obeys TBF, if for any i<=k:
	43
	44	s_i+....+s_k <= B + R*(t_k - t_i)
	45
	46	Algorithm.
	47	----------
	48
	49	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
	50
	51	N(t+delta) = min{B/R, N(t) + delta}
	52
	53	If the first packet in queue has length S, it may be
	54	transmitted only at the time t_* when S/R <= N(t_*),
	55	and in this case N(t) jumps:
	56
	57	N(t_* + 0) = N(t_* - 0) - S/R.
	58
	59
	60
	61	Actually, QoS requires two TBF to be applied to a data stream.
	62	One of them controls steady state burst size, another
	63	one with rate P (peak rate) and depth M (equal to link MTU)
	64	limits bursts at a smaller time scale.
	65
	66	It is easy to see that P>R, and B>M. If P is infinity, this double
	67	TBF is equivalent to a single one.
	68
	69	When TBF works in reshaping mode, latency is estimated as:
	70
	71	lat = max ((L-B)/R, (L-M)/P)
	72
	73
	74	NOTES.
	75	------
	76
	77	If TBF throttles, it starts a watchdog timer, which will wake it up
	78	when it is ready to transmit.
	79	Note that the minimal timer resolution is 1/HZ.
	80	If no new packets arrive during this period,
	81	or if the device is not awaken by EOI for some previous packet,
	82	TBF can stop its activity for 1/HZ.
	83
	84
	85	This means, that with depth B, the maximal rate is
86
87	R_crit = B*HZ
88
89	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
90
91	Note that the peak rate TBF is much more tough: with MTU 1500
92	P_crit = 150Kbytes/sec. So, if you need greater peak
93	rates, use alpha with HZ=1000 :-)
94
95	With classful TBF, limit is just kept for backwards compatibility.
96	It is passed to the default bfifo qdisc - if the inner qdisc is
97	changed the limit is not effective anymore.
98	*/
99
100	struct tbf_sched_data
101	{
102	/* Parameters */
103	u32 limit; /* Maximal length of backlog: bytes */
104	u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
105	u32 mtu;
106	u32 max_size;
107	struct qdisc_rate_table *R_tab;
108	struct qdisc_rate_table *P_tab;
109
110	/* Variables */
111	long tokens; /* Current number of B tokens */
112	long ptokens; /* Current number of P tokens */
113	psched_time_t t_c; /* Time check-point */
1da177e4	114	struct Qdisc qdisc; / Inner qdisc, default - bfifo queue */
f7f593e3	115	struct qdisc_watchdog watchdog; /* Watchdog timer */
1da177e4 LT	116	};
1da177e4 LT	117
e9bef55d JDB	118	#define L2T(q,L) qdisc_l2t((q)->R_tab,L)
e9bef55d JDB	119	#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
1da177e4 LT	120
	121	static int tbf_enqueue(struct sk_buff skb, struct Qdisc sch)
	122	{
	123	struct tbf_sched_data *q = qdisc_priv(sch);
	124	int ret;
	125
69747650 DM	126	if (qdisc_pkt_len(skb) > q->max_size)
69747650 DM	127	return qdisc_reshape_fail(skb, sch);
1da177e4	128
5f86173b	129	ret = qdisc_enqueue(skb, q->qdisc);
9871e50e	130	if (ret != NET_XMIT_SUCCESS) {
378a2f09 JP	131	if (net_xmit_drop_count(ret))
378a2f09 JP	132	sch->qstats.drops++;
1da177e4 LT	133	return ret;
	134	}
	135
	136	sch->q.qlen++;
0abf77e5	137	sch->bstats.bytes += qdisc_pkt_len(skb);
1da177e4	138	sch->bstats.packets++;
9871e50e	139	return NET_XMIT_SUCCESS;
1da177e4 LT	140	}
1da177e4 LT	141
1da177e4 LT	142	static unsigned int tbf_drop(struct Qdisc* sch)
	143	{
	144	struct tbf_sched_data *q = qdisc_priv(sch);
6d037a26	145	unsigned int len = 0;
1da177e4	146
6d037a26	147	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
1da177e4 LT	148	sch->q.qlen--;
	149	sch->qstats.drops++;
	150	}
	151	return len;
	152	}
	153
1da177e4 LT	154	static struct sk_buff tbf_dequeue(struct Qdisc sch)
	155	{
	156	struct tbf_sched_data *q = qdisc_priv(sch);
	157	struct sk_buff *skb;
	158
03c05f0d	159	skb = q->qdisc->ops->peek(q->qdisc);
1da177e4 LT	160
	161	if (skb) {
	162	psched_time_t now;
f7f593e3	163	long toks;
1da177e4	164	long ptoks = 0;
0abf77e5	165	unsigned int len = qdisc_pkt_len(skb);
1da177e4	166
3bebcda2	167	now = psched_get_time();
03cc45c0	168	toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
1da177e4 LT	169
	170	if (q->P_tab) {
	171	ptoks = toks + q->ptokens;
	172	if (ptoks > (long)q->mtu)
	173	ptoks = q->mtu;
	174	ptoks -= L2T_P(q, len);
	175	}
	176	toks += q->tokens;
	177	if (toks > (long)q->buffer)
	178	toks = q->buffer;
	179	toks -= L2T(q, len);
	180
	181	if ((toks\|ptoks) >= 0) {
77be155c	182	skb = qdisc_dequeue_peeked(q->qdisc);
03c05f0d JP	183	if (unlikely(!skb))
	184	return NULL;
	185
1da177e4 LT	186	q->t_c = now;
	187	q->tokens = toks;
	188	q->ptokens = ptoks;
	189	sch->q.qlen--;
	190	sch->flags &= ~TCQ_F_THROTTLED;
	191	return skb;
	192	}
	193
f7f593e3 PM	194	qdisc_watchdog_schedule(&q->watchdog,
f7f593e3 PM	195	now + max_t(long, -toks, -ptoks));
1da177e4 LT	196
	197	/* Maybe we have a shorter packet in the queue,
	198	which can be sent now. It sounds cool,
	199	but, however, this is wrong in principle.
	200	We MUST NOT reorder packets under these circumstances.
	201
	202	Really, if we split the flow into independent
	203	subflows, it would be a very good solution.
	204	This is the main idea of all FQ algorithms
	205	(cf. CSZ, HPFQ, HFSC)
	206	*/
	207
1da177e4 LT	208	sch->qstats.overlimits++;
	209	}
	210	return NULL;
	211	}
	212
	213	static void tbf_reset(struct Qdisc* sch)
	214	{
	215	struct tbf_sched_data *q = qdisc_priv(sch);
	216
	217	qdisc_reset(q->qdisc);
	218	sch->q.qlen = 0;
3bebcda2	219	q->t_c = psched_get_time();
1da177e4 LT	220	q->tokens = q->buffer;
1da177e4 LT	221	q->ptokens = q->mtu;
f7f593e3	222	qdisc_watchdog_cancel(&q->watchdog);
1da177e4 LT	223	}
1da177e4 LT	224
27a3421e PM	225	static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
	226	[TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
	227	[TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	228	[TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	229	};
	230
1e90474c	231	static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
1da177e4	232	{
cee63723	233	int err;
1da177e4	234	struct tbf_sched_data *q = qdisc_priv(sch);
1e90474c	235	struct nlattr *tb[TCA_TBF_PTAB + 1];
1da177e4 LT	236	struct tc_tbf_qopt *qopt;
	237	struct qdisc_rate_table *rtab = NULL;
	238	struct qdisc_rate_table *ptab = NULL;
	239	struct Qdisc *child = NULL;
	240	int max_size,n;
	241
27a3421e	242	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
cee63723 PM	243	if (err < 0)
	244	return err;
	245
	246	err = -EINVAL;
27a3421e	247	if (tb[TCA_TBF_PARMS] == NULL)
1da177e4 LT	248	goto done;
1da177e4 LT	249
1e90474c PM	250	qopt = nla_data(tb[TCA_TBF_PARMS]);
1e90474c PM	251	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
1da177e4 LT	252	if (rtab == NULL)
	253	goto done;
	254
	255	if (qopt->peakrate.rate) {
	256	if (qopt->peakrate.rate > qopt->rate.rate)
1e90474c	257	ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
1da177e4 LT	258	if (ptab == NULL)
	259	goto done;
	260	}
	261
	262	for (n = 0; n < 256; n++)
	263	if (rtab->data[n] > qopt->buffer) break;
	264	max_size = (n << qopt->rate.cell_log)-1;
	265	if (ptab) {
	266	int size;
	267
	268	for (n = 0; n < 256; n++)
	269	if (ptab->data[n] > qopt->mtu) break;
	270	size = (n << qopt->peakrate.cell_log)-1;
	271	if (size < max_size) max_size = size;
	272	}
	273	if (max_size < 0)
	274	goto done;
	275
f0cd1508	276	if (q->qdisc != &noop_qdisc) {
	277	err = fifo_set_limit(q->qdisc, qopt->limit);
	278	if (err)
	279	goto done;
	280	} else if (qopt->limit > 0) {
fb0305ce PM	281	child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
	282	if (IS_ERR(child)) {
	283	err = PTR_ERR(child);
1da177e4	284	goto done;
fb0305ce	285	}
1da177e4 LT	286	}
	287
	288	sch_tree_lock(sch);
5e50da01 PM	289	if (child) {
5e50da01 PM	290	qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
b94c8afc PM	291	qdisc_destroy(q->qdisc);
b94c8afc PM	292	q->qdisc = child;
5e50da01	293	}
1da177e4 LT	294	q->limit = qopt->limit;
	295	q->mtu = qopt->mtu;
	296	q->max_size = max_size;
	297	q->buffer = qopt->buffer;
	298	q->tokens = q->buffer;
	299	q->ptokens = q->mtu;
b94c8afc	300
a0bffffc IJ	301	swap(q->R_tab, rtab);
a0bffffc IJ	302	swap(q->P_tab, ptab);
b94c8afc	303
1da177e4 LT	304	sch_tree_unlock(sch);
	305	err = 0;
	306	done:
	307	if (rtab)
	308	qdisc_put_rtab(rtab);
	309	if (ptab)
	310	qdisc_put_rtab(ptab);
	311	return err;
	312	}
	313
1e90474c	314	static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
1da177e4 LT	315	{
	316	struct tbf_sched_data *q = qdisc_priv(sch);
	317
	318	if (opt == NULL)
	319	return -EINVAL;
	320
3bebcda2	321	q->t_c = psched_get_time();
f7f593e3	322	qdisc_watchdog_init(&q->watchdog, sch);
1da177e4 LT	323	q->qdisc = &noop_qdisc;
	324
	325	return tbf_change(sch, opt);
	326	}
	327
	328	static void tbf_destroy(struct Qdisc *sch)
	329	{
	330	struct tbf_sched_data *q = qdisc_priv(sch);
	331
f7f593e3	332	qdisc_watchdog_cancel(&q->watchdog);
1da177e4 LT	333
	334	if (q->P_tab)
	335	qdisc_put_rtab(q->P_tab);
	336	if (q->R_tab)
	337	qdisc_put_rtab(q->R_tab);
	338
	339	qdisc_destroy(q->qdisc);
	340	}
	341
	342	static int tbf_dump(struct Qdisc sch, struct sk_buff skb)
	343	{
	344	struct tbf_sched_data *q = qdisc_priv(sch);
4b3550ef	345	struct nlattr *nest;
1da177e4 LT	346	struct tc_tbf_qopt opt;
1da177e4 LT	347
4b3550ef PM	348	nest = nla_nest_start(skb, TCA_OPTIONS);
	349	if (nest == NULL)
	350	goto nla_put_failure;
1da177e4 LT	351
	352	opt.limit = q->limit;
	353	opt.rate = q->R_tab->rate;
	354	if (q->P_tab)
	355	opt.peakrate = q->P_tab->rate;
	356	else
	357	memset(&opt.peakrate, 0, sizeof(opt.peakrate));
	358	opt.mtu = q->mtu;
	359	opt.buffer = q->buffer;
1e90474c	360	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
1da177e4	361
4b3550ef	362	nla_nest_end(skb, nest);
1da177e4 LT	363	return skb->len;
1da177e4 LT	364
1e90474c	365	nla_put_failure:
4b3550ef	366	nla_nest_cancel(skb, nest);
1da177e4 LT	367	return -1;
	368	}
	369
	370	static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
	371	struct sk_buff skb, struct tcmsg tcm)
	372	{
	373	struct tbf_sched_data *q = qdisc_priv(sch);
	374
1da177e4 LT	375	tcm->tcm_handle \|= TC_H_MIN(1);
	376	tcm->tcm_info = q->qdisc->handle;
	377
	378	return 0;
	379	}
	380
	381	static int tbf_graft(struct Qdisc sch, unsigned long arg, struct Qdisc new,
	382	struct Qdisc **old)
	383	{
	384	struct tbf_sched_data *q = qdisc_priv(sch);
	385
	386	if (new == NULL)
	387	new = &noop_qdisc;
	388
	389	sch_tree_lock(sch);
b94c8afc PM	390	*old = q->qdisc;
b94c8afc PM	391	q->qdisc = new;
5e50da01	392	qdisc_tree_decrease_qlen(old, (old)->q.qlen);
1da177e4	393	qdisc_reset(*old);
1da177e4 LT	394	sch_tree_unlock(sch);
	395
	396	return 0;
	397	}
	398
	399	static struct Qdisc tbf_leaf(struct Qdisc sch, unsigned long arg)
	400	{
	401	struct tbf_sched_data *q = qdisc_priv(sch);
	402	return q->qdisc;
	403	}
	404
	405	static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
	406	{
	407	return 1;
	408	}
	409
	410	static void tbf_put(struct Qdisc *sch, unsigned long arg)
	411	{
	412	}
	413
1da177e4 LT	414	static void tbf_walk(struct Qdisc sch, struct qdisc_walker walker)
	415	{
	416	if (!walker->stop) {
	417	if (walker->count >= walker->skip)
	418	if (walker->fn(sch, 1, walker) < 0) {
	419	walker->stop = 1;
	420	return;
	421	}
	422	walker->count++;
	423	}
	424	}
	425
20fea08b	426	static const struct Qdisc_class_ops tbf_class_ops =
1da177e4 LT	427	{
	428	.graft = tbf_graft,
	429	.leaf = tbf_leaf,
	430	.get = tbf_get,
	431	.put = tbf_put,
1da177e4	432	.walk = tbf_walk,
1da177e4 LT	433	.dump = tbf_dump_class,
	434	};
	435
20fea08b	436	static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
1da177e4 LT	437	.next = NULL,
	438	.cl_ops = &tbf_class_ops,
	439	.id = "tbf",
	440	.priv_size = sizeof(struct tbf_sched_data),
	441	.enqueue = tbf_enqueue,
	442	.dequeue = tbf_dequeue,
77be155c	443	.peek = qdisc_peek_dequeued,
1da177e4 LT	444	.drop = tbf_drop,
	445	.init = tbf_init,
	446	.reset = tbf_reset,
	447	.destroy = tbf_destroy,
	448	.change = tbf_change,
	449	.dump = tbf_dump,
	450	.owner = THIS_MODULE,
	451	};
	452
	453	static int __init tbf_module_init(void)
	454	{
	455	return register_qdisc(&tbf_qdisc_ops);
	456	}
	457
	458	static void __exit tbf_module_exit(void)
	459	{
	460	unregister_qdisc(&tbf_qdisc_ops);
	461	}
	462	module_init(tbf_module_init)
	463	module_exit(tbf_module_exit)
	464	MODULE_LICENSE("GPL");