[deliverable/linux.git] / net / sched / sch_tbf.c

/*
 * net/sched/sch_tbf.c	Token Bucket Filter queue.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
 *						 original idea by Martin Devera
 *
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>


/*	Simple Token Bucket Filter.
	=======================================

	SOURCE.
	-------

	None.

	Description.
	------------

	A data flow obeys TBF with rate R and depth B, if for any
	time interval t_i...t_f the number of transmitted bits
	does not exceed B + R*(t_f-t_i).

	Packetized version of this definition:
	The sequence of packets of sizes s_i served at moments t_i
	obeys TBF, if for any i<=k:

	s_i+....+s_k <= B + R*(t_k - t_i)

	Algorithm.
	----------

	Let N(t_i) be B/R initially and N(t) grow continuously with time as:

	N(t+delta) = min{B/R, N(t) + delta}

	If the first packet in queue has length S, it may be
	transmitted only at the time t_* when S/R <= N(t_*),
	and in this case N(t) jumps:

	N(t_* + 0) = N(t_* - 0) - S/R.


	Actually, QoS requires two TBF to be applied to a data stream.
	One of them controls steady state burst size, another
	one with rate P (peak rate) and depth M (equal to link MTU)
	limits bursts at a smaller time scale.

	It is easy to see that P>R, and B>M. If P is infinity, this double
	TBF is equivalent to a single one.

	When TBF works in reshaping mode, latency is estimated as:

	lat = max ((L-B)/R, (L-M)/P)


	NOTES.
	------

	If TBF throttles, it starts a watchdog timer, which will wake it up
	when it is ready to transmit.
	Note that the minimal timer resolution is 1/HZ.
	If no new packets arrive during this period,
	or if the device is not awaken by EOI for some previous packet,
	TBF can stop its activity for 1/HZ.


	This means, that with depth B, the maximal rate is

	R_crit = B*HZ

	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.

	Note that the peak rate TBF is much more tough: with MTU 1500
	P_crit = 150Kbytes/sec. So, if you need greater peak
	rates, use alpha with HZ=1000 :-)

	With classful TBF, limit is just kept for backwards compatibility.
	It is passed to the default bfifo qdisc - if the inner qdisc is
	changed the limit is not effective anymore.
*/

struct tbf_sched_data
{
/* Parameters */
	u32		limit;		/* Maximal length of backlog: bytes */
	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
	u32		mtu;
	u32		max_size;
	struct qdisc_rate_table	*R_tab;
	struct qdisc_rate_table	*P_tab;

/* Variables */
	long	tokens;			/* Current number of B tokens */
	long	ptokens;		/* Current number of P tokens */
	psched_time_t	t_c;		/* Time check-point */
	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
	struct qdisc_watchdog watchdog;	/* Watchdog timer */
};

#define L2T(q,L)   qdisc_l2t((q)->R_tab,L)
#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)

static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	int ret;

	if (qdisc_pkt_len(skb) > q->max_size)
		return qdisc_reshape_fail(skb, sch);

	ret = qdisc_enqueue(skb, q->qdisc);
	if (ret != 0) {
		if (net_xmit_drop_count(ret))
			sch->qstats.drops++;
		return ret;
	}

	sch->q.qlen++;
	sch->bstats.bytes += qdisc_pkt_len(skb);
	sch->bstats.packets++;
	return 0;
}

static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	int ret;

	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
		sch->q.qlen++;
		sch->qstats.requeues++;
	}

	return ret;
}

static unsigned int tbf_drop(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	unsigned int len = 0;

	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
		sch->q.qlen--;
		sch->qstats.drops++;
	}
	return len;
}

static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct sk_buff *skb;

	skb = q->qdisc->ops->peek(q->qdisc);

	if (skb) {
		psched_time_t now;
		long toks;
		long ptoks = 0;
		unsigned int len = qdisc_pkt_len(skb);

		now = psched_get_time();
		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);

		if (q->P_tab) {
			ptoks = toks + q->ptokens;
			if (ptoks > (long)q->mtu)
				ptoks = q->mtu;
			ptoks -= L2T_P(q, len);
		}
		toks += q->tokens;
		if (toks > (long)q->buffer)
			toks = q->buffer;
		toks -= L2T(q, len);

		if ((toks|ptoks) >= 0) {
			skb = q->qdisc->dequeue(q->qdisc);
			if (unlikely(!skb))
				return NULL;

			q->t_c = now;
			q->tokens = toks;
			q->ptokens = ptoks;
			sch->q.qlen--;
			sch->flags &= ~TCQ_F_THROTTLED;
			return skb;
		}

		qdisc_watchdog_schedule(&q->watchdog,
					now + max_t(long, -toks, -ptoks));

		/* Maybe we have a shorter packet in the queue,
		   which can be sent now. It sounds cool,
		   but, however, this is wrong in principle.
		   We MUST NOT reorder packets under these circumstances.

		   Really, if we split the flow into independent
		   subflows, it would be a very good solution.
		   This is the main idea of all FQ algorithms
		   (cf. CSZ, HPFQ, HFSC)
		 */

		sch->qstats.overlimits++;
	}
	return NULL;
}

static void tbf_reset(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	qdisc_reset(q->qdisc);
	sch->q.qlen = 0;
	q->t_c = psched_get_time();
	q->tokens = q->buffer;
	q->ptokens = q->mtu;
	qdisc_watchdog_cancel(&q->watchdog);
}

static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
};

static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
{
	int err;
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct nlattr *tb[TCA_TBF_PTAB + 1];
	struct tc_tbf_qopt *qopt;
	struct qdisc_rate_table *rtab = NULL;
	struct qdisc_rate_table *ptab = NULL;
	struct Qdisc *child = NULL;
	int max_size,n;

	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
	if (err < 0)
		return err;

	err = -EINVAL;
	if (tb[TCA_TBF_PARMS] == NULL)
		goto done;

	qopt = nla_data(tb[TCA_TBF_PARMS]);
	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
	if (rtab == NULL)
		goto done;

	if (qopt->peakrate.rate) {
		if (qopt->peakrate.rate > qopt->rate.rate)
			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
		if (ptab == NULL)
			goto done;
	}

	for (n = 0; n < 256; n++)
		if (rtab->data[n] > qopt->buffer) break;
	max_size = (n << qopt->rate.cell_log)-1;
	if (ptab) {
		int size;

		for (n = 0; n < 256; n++)
			if (ptab->data[n] > qopt->mtu) break;
		size = (n << qopt->peakrate.cell_log)-1;
		if (size < max_size) max_size = size;
	}
	if (max_size < 0)
		goto done;

	if (qopt->limit > 0) {
		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
		if (IS_ERR(child)) {
			err = PTR_ERR(child);
			goto done;
		}
	}

	sch_tree_lock(sch);
	if (child) {
		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
		qdisc_destroy(xchg(&q->qdisc, child));
	}
	q->limit = qopt->limit;
	q->mtu = qopt->mtu;
	q->max_size = max_size;
	q->buffer = qopt->buffer;
	q->tokens = q->buffer;
	q->ptokens = q->mtu;
	rtab = xchg(&q->R_tab, rtab);
	ptab = xchg(&q->P_tab, ptab);
	sch_tree_unlock(sch);
	err = 0;
done:
	if (rtab)
		qdisc_put_rtab(rtab);
	if (ptab)
		qdisc_put_rtab(ptab);
	return err;
}

static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (opt == NULL)
		return -EINVAL;

	q->t_c = psched_get_time();
	qdisc_watchdog_init(&q->watchdog, sch);
	q->qdisc = &noop_qdisc;

	return tbf_change(sch, opt);
}

static void tbf_destroy(struct Qdisc *sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	qdisc_watchdog_cancel(&q->watchdog);

	if (q->P_tab)
		qdisc_put_rtab(q->P_tab);
	if (q->R_tab)
		qdisc_put_rtab(q->R_tab);

	qdisc_destroy(q->qdisc);
}

static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct nlattr *nest;
	struct tc_tbf_qopt opt;

	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;

	opt.limit = q->limit;
	opt.rate = q->R_tab->rate;
	if (q->P_tab)
		opt.peakrate = q->P_tab->rate;
	else
		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
	opt.mtu = q->mtu;
	opt.buffer = q->buffer;
	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);

	nla_nest_end(skb, nest);
	return skb->len;

nla_put_failure:
	nla_nest_cancel(skb, nest);
	return -1;
}

static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
			  struct sk_buff *skb, struct tcmsg *tcm)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (cl != 1) 	/* only one class */
		return -ENOENT;

	tcm->tcm_handle |= TC_H_MIN(1);
	tcm->tcm_info = q->qdisc->handle;

	return 0;
}

static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
		     struct Qdisc **old)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (new == NULL)
		new = &noop_qdisc;

	sch_tree_lock(sch);
	*old = xchg(&q->qdisc, new);
	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
	qdisc_reset(*old);
	sch_tree_unlock(sch);

	return 0;
}

static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	return q->qdisc;
}

static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
{
	return 1;
}

static void tbf_put(struct Qdisc *sch, unsigned long arg)
{
}

static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
			    struct nlattr **tca, unsigned long *arg)
{
	return -ENOSYS;
}

static int tbf_delete(struct Qdisc *sch, unsigned long arg)
{
	return -ENOSYS;
}

static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
	if (!walker->stop) {
		if (walker->count >= walker->skip)
			if (walker->fn(sch, 1, walker) < 0) {
				walker->stop = 1;
				return;
			}
		walker->count++;
	}
}

static struct tcf_proto **tbf_find_tcf(struct Qdisc *sch, unsigned long cl)
{
	return NULL;
}

static const struct Qdisc_class_ops tbf_class_ops =
{
	.graft		=	tbf_graft,
	.leaf		=	tbf_leaf,
	.get		=	tbf_get,
	.put		=	tbf_put,
	.change		=	tbf_change_class,
	.delete		=	tbf_delete,
	.walk		=	tbf_walk,
	.tcf_chain	=	tbf_find_tcf,
	.dump		=	tbf_dump_class,
};

static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
	.next		=	NULL,
	.cl_ops		=	&tbf_class_ops,
	.id		=	"tbf",
	.priv_size	=	sizeof(struct tbf_sched_data),
	.enqueue	=	tbf_enqueue,
	.dequeue	=	tbf_dequeue,
	.requeue	=	tbf_requeue,
	.drop		=	tbf_drop,
	.init		=	tbf_init,
	.reset		=	tbf_reset,
	.destroy	=	tbf_destroy,
	.change		=	tbf_change,
	.dump		=	tbf_dump,
	.owner		=	THIS_MODULE,
};

static int __init tbf_module_init(void)
{
	return register_qdisc(&tbf_qdisc_ops);
}

static void __exit tbf_module_exit(void)
{
	unregister_qdisc(&tbf_qdisc_ops);
}
module_init(tbf_module_init)
module_exit(tbf_module_exit)
MODULE_LICENSE("GPL");
Commit	Line	Data
1da177e4 LT	1	/*
	2	* net/sched/sch_tbf.c Token Bucket Filter queue.
	3	*
	4	* This program is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU General Public License
	6	* as published by the Free Software Foundation; either version
	7	* 2 of the License, or (at your option) any later version.
	8	*
	9	* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
	10	* Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
	11	* original idea by Martin Devera
	12	*
	13	*/
	14
1da177e4	15	#include <linux/module.h>
1da177e4 LT	16	#include <linux/types.h>
1da177e4 LT	17	#include <linux/kernel.h>
1da177e4	18	#include <linux/string.h>
1da177e4	19	#include <linux/errno.h>
1da177e4	20	#include <linux/skbuff.h>
0ba48053	21	#include <net/netlink.h>
1da177e4 LT	22	#include <net/pkt_sched.h>
	23
	24
	25	/* Simple Token Bucket Filter.
	26	=======================================
	27
	28	SOURCE.
	29	-------
	30
	31	None.
	32
	33	Description.
	34	------------
	35
	36	A data flow obeys TBF with rate R and depth B, if for any
	37	time interval t_i...t_f the number of transmitted bits
	38	does not exceed B + R*(t_f-t_i).
	39
	40	Packetized version of this definition:
	41	The sequence of packets of sizes s_i served at moments t_i
	42	obeys TBF, if for any i<=k:
	43
	44	s_i+....+s_k <= B + R*(t_k - t_i)
	45
	46	Algorithm.
	47	----------
	48
	49	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
	50
	51	N(t+delta) = min{B/R, N(t) + delta}
	52
	53	If the first packet in queue has length S, it may be
	54	transmitted only at the time t_* when S/R <= N(t_*),
	55	and in this case N(t) jumps:
	56
	57	N(t_* + 0) = N(t_* - 0) - S/R.
	58
	59
	60
	61	Actually, QoS requires two TBF to be applied to a data stream.
	62	One of them controls steady state burst size, another
	63	one with rate P (peak rate) and depth M (equal to link MTU)
	64	limits bursts at a smaller time scale.
	65
	66	It is easy to see that P>R, and B>M. If P is infinity, this double
	67	TBF is equivalent to a single one.
	68
	69	When TBF works in reshaping mode, latency is estimated as:
	70
	71	lat = max ((L-B)/R, (L-M)/P)
	72
	73
	74	NOTES.
	75	------
	76
	77	If TBF throttles, it starts a watchdog timer, which will wake it up
	78	when it is ready to transmit.
	79	Note that the minimal timer resolution is 1/HZ.
	80	If no new packets arrive during this period,
	81	or if the device is not awaken by EOI for some previous packet,
	82	TBF can stop its activity for 1/HZ.
	83
	84
	85	This means, that with depth B, the maximal rate is
86
87	R_crit = B*HZ
88
89	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
90
91	Note that the peak rate TBF is much more tough: with MTU 1500
92	P_crit = 150Kbytes/sec. So, if you need greater peak
93	rates, use alpha with HZ=1000 :-)
94
95	With classful TBF, limit is just kept for backwards compatibility.
96	It is passed to the default bfifo qdisc - if the inner qdisc is
97	changed the limit is not effective anymore.
98	*/
99
100	struct tbf_sched_data
101	{
102	/* Parameters */
103	u32 limit; /* Maximal length of backlog: bytes */
104	u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
105	u32 mtu;
106	u32 max_size;
107	struct qdisc_rate_table *R_tab;
108	struct qdisc_rate_table *P_tab;
109
110	/* Variables */
111	long tokens; /* Current number of B tokens */
112	long ptokens; /* Current number of P tokens */
113	psched_time_t t_c; /* Time check-point */
1da177e4	114	struct Qdisc qdisc; / Inner qdisc, default - bfifo queue */
f7f593e3	115	struct qdisc_watchdog watchdog; /* Watchdog timer */
1da177e4 LT	116	};
1da177e4 LT	117
e9bef55d JDB	118	#define L2T(q,L) qdisc_l2t((q)->R_tab,L)
e9bef55d JDB	119	#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
1da177e4 LT	120
	121	static int tbf_enqueue(struct sk_buff skb, struct Qdisc sch)
	122	{
	123	struct tbf_sched_data *q = qdisc_priv(sch);
	124	int ret;
	125
69747650 DM	126	if (qdisc_pkt_len(skb) > q->max_size)
69747650 DM	127	return qdisc_reshape_fail(skb, sch);
1da177e4	128
5f86173b JK	129	ret = qdisc_enqueue(skb, q->qdisc);
5f86173b JK	130	if (ret != 0) {
378a2f09 JP	131	if (net_xmit_drop_count(ret))
378a2f09 JP	132	sch->qstats.drops++;
1da177e4 LT	133	return ret;
	134	}
	135
	136	sch->q.qlen++;
0abf77e5	137	sch->bstats.bytes += qdisc_pkt_len(skb);
1da177e4 LT	138	sch->bstats.packets++;
	139	return 0;
	140	}
	141
	142	static int tbf_requeue(struct sk_buff skb, struct Qdisc sch)
	143	{
	144	struct tbf_sched_data *q = qdisc_priv(sch);
	145	int ret;
	146
	147	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
	148	sch->q.qlen++;
	149	sch->qstats.requeues++;
	150	}
	151
	152	return ret;
	153	}
	154
	155	static unsigned int tbf_drop(struct Qdisc* sch)
	156	{
	157	struct tbf_sched_data *q = qdisc_priv(sch);
6d037a26	158	unsigned int len = 0;
1da177e4	159
6d037a26	160	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
1da177e4 LT	161	sch->q.qlen--;
	162	sch->qstats.drops++;
	163	}
	164	return len;
	165	}
	166
1da177e4 LT	167	static struct sk_buff tbf_dequeue(struct Qdisc sch)
	168	{
	169	struct tbf_sched_data *q = qdisc_priv(sch);
	170	struct sk_buff *skb;
	171
03c05f0d	172	skb = q->qdisc->ops->peek(q->qdisc);
1da177e4 LT	173
	174	if (skb) {
	175	psched_time_t now;
f7f593e3	176	long toks;
1da177e4	177	long ptoks = 0;
0abf77e5	178	unsigned int len = qdisc_pkt_len(skb);
1da177e4	179
3bebcda2	180	now = psched_get_time();
03cc45c0	181	toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
1da177e4 LT	182
	183	if (q->P_tab) {
	184	ptoks = toks + q->ptokens;
	185	if (ptoks > (long)q->mtu)
	186	ptoks = q->mtu;
	187	ptoks -= L2T_P(q, len);
	188	}
	189	toks += q->tokens;
	190	if (toks > (long)q->buffer)
	191	toks = q->buffer;
	192	toks -= L2T(q, len);
	193
	194	if ((toks\|ptoks) >= 0) {
03c05f0d JP	195	skb = q->qdisc->dequeue(q->qdisc);
	196	if (unlikely(!skb))
	197	return NULL;
	198
1da177e4 LT	199	q->t_c = now;
	200	q->tokens = toks;
	201	q->ptokens = ptoks;
	202	sch->q.qlen--;
	203	sch->flags &= ~TCQ_F_THROTTLED;
	204	return skb;
	205	}
	206
f7f593e3 PM	207	qdisc_watchdog_schedule(&q->watchdog,
f7f593e3 PM	208	now + max_t(long, -toks, -ptoks));
1da177e4 LT	209
	210	/* Maybe we have a shorter packet in the queue,
	211	which can be sent now. It sounds cool,
	212	but, however, this is wrong in principle.
	213	We MUST NOT reorder packets under these circumstances.
	214
	215	Really, if we split the flow into independent
	216	subflows, it would be a very good solution.
	217	This is the main idea of all FQ algorithms
	218	(cf. CSZ, HPFQ, HFSC)
	219	*/
	220
1da177e4 LT	221	sch->qstats.overlimits++;
	222	}
	223	return NULL;
	224	}
	225
	226	static void tbf_reset(struct Qdisc* sch)
	227	{
	228	struct tbf_sched_data *q = qdisc_priv(sch);
	229
	230	qdisc_reset(q->qdisc);
	231	sch->q.qlen = 0;
3bebcda2	232	q->t_c = psched_get_time();
1da177e4 LT	233	q->tokens = q->buffer;
1da177e4 LT	234	q->ptokens = q->mtu;
f7f593e3	235	qdisc_watchdog_cancel(&q->watchdog);
1da177e4 LT	236	}
1da177e4 LT	237
27a3421e PM	238	static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
	239	[TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
	240	[TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	241	[TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	242	};
	243
1e90474c	244	static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
1da177e4	245	{
cee63723	246	int err;
1da177e4	247	struct tbf_sched_data *q = qdisc_priv(sch);
1e90474c	248	struct nlattr *tb[TCA_TBF_PTAB + 1];
1da177e4 LT	249	struct tc_tbf_qopt *qopt;
	250	struct qdisc_rate_table *rtab = NULL;
	251	struct qdisc_rate_table *ptab = NULL;
	252	struct Qdisc *child = NULL;
	253	int max_size,n;
	254
27a3421e	255	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
cee63723 PM	256	if (err < 0)
	257	return err;
	258
	259	err = -EINVAL;
27a3421e	260	if (tb[TCA_TBF_PARMS] == NULL)
1da177e4 LT	261	goto done;
1da177e4 LT	262
1e90474c PM	263	qopt = nla_data(tb[TCA_TBF_PARMS]);
1e90474c PM	264	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
1da177e4 LT	265	if (rtab == NULL)
	266	goto done;
	267
	268	if (qopt->peakrate.rate) {
	269	if (qopt->peakrate.rate > qopt->rate.rate)
1e90474c	270	ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
1da177e4 LT	271	if (ptab == NULL)
	272	goto done;
	273	}
	274
	275	for (n = 0; n < 256; n++)
	276	if (rtab->data[n] > qopt->buffer) break;
	277	max_size = (n << qopt->rate.cell_log)-1;
	278	if (ptab) {
	279	int size;
	280
	281	for (n = 0; n < 256; n++)
	282	if (ptab->data[n] > qopt->mtu) break;
	283	size = (n << qopt->peakrate.cell_log)-1;
	284	if (size < max_size) max_size = size;
	285	}
	286	if (max_size < 0)
	287	goto done;
	288
053cfed7	289	if (qopt->limit > 0) {
fb0305ce PM	290	child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
	291	if (IS_ERR(child)) {
	292	err = PTR_ERR(child);
1da177e4	293	goto done;
fb0305ce	294	}
1da177e4 LT	295	}
	296
	297	sch_tree_lock(sch);
5e50da01 PM	298	if (child) {
5e50da01 PM	299	qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
053cfed7	300	qdisc_destroy(xchg(&q->qdisc, child));
5e50da01	301	}
1da177e4 LT	302	q->limit = qopt->limit;
	303	q->mtu = qopt->mtu;
	304	q->max_size = max_size;
	305	q->buffer = qopt->buffer;
	306	q->tokens = q->buffer;
	307	q->ptokens = q->mtu;
	308	rtab = xchg(&q->R_tab, rtab);
	309	ptab = xchg(&q->P_tab, ptab);
	310	sch_tree_unlock(sch);
	311	err = 0;
	312	done:
	313	if (rtab)
	314	qdisc_put_rtab(rtab);
	315	if (ptab)
	316	qdisc_put_rtab(ptab);
	317	return err;
	318	}
	319
1e90474c	320	static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
1da177e4 LT	321	{
	322	struct tbf_sched_data *q = qdisc_priv(sch);
	323
	324	if (opt == NULL)
	325	return -EINVAL;
	326
3bebcda2	327	q->t_c = psched_get_time();
f7f593e3	328	qdisc_watchdog_init(&q->watchdog, sch);
1da177e4 LT	329	q->qdisc = &noop_qdisc;
	330
	331	return tbf_change(sch, opt);
	332	}
	333
	334	static void tbf_destroy(struct Qdisc *sch)
	335	{
	336	struct tbf_sched_data *q = qdisc_priv(sch);
	337
f7f593e3	338	qdisc_watchdog_cancel(&q->watchdog);
1da177e4 LT	339
	340	if (q->P_tab)
	341	qdisc_put_rtab(q->P_tab);
	342	if (q->R_tab)
	343	qdisc_put_rtab(q->R_tab);
	344
	345	qdisc_destroy(q->qdisc);
	346	}
	347
	348	static int tbf_dump(struct Qdisc sch, struct sk_buff skb)
	349	{
	350	struct tbf_sched_data *q = qdisc_priv(sch);
4b3550ef	351	struct nlattr *nest;
1da177e4 LT	352	struct tc_tbf_qopt opt;
1da177e4 LT	353
4b3550ef PM	354	nest = nla_nest_start(skb, TCA_OPTIONS);
	355	if (nest == NULL)
	356	goto nla_put_failure;
1da177e4 LT	357
	358	opt.limit = q->limit;
	359	opt.rate = q->R_tab->rate;
	360	if (q->P_tab)
	361	opt.peakrate = q->P_tab->rate;
	362	else
	363	memset(&opt.peakrate, 0, sizeof(opt.peakrate));
	364	opt.mtu = q->mtu;
	365	opt.buffer = q->buffer;
1e90474c	366	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
1da177e4	367
4b3550ef	368	nla_nest_end(skb, nest);
1da177e4 LT	369	return skb->len;
1da177e4 LT	370
1e90474c	371	nla_put_failure:
4b3550ef	372	nla_nest_cancel(skb, nest);
1da177e4 LT	373	return -1;
	374	}
	375
	376	static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
	377	struct sk_buff skb, struct tcmsg tcm)
	378	{
	379	struct tbf_sched_data *q = qdisc_priv(sch);
	380
	381	if (cl != 1) /* only one class */
	382	return -ENOENT;
	383
	384	tcm->tcm_handle \|= TC_H_MIN(1);
	385	tcm->tcm_info = q->qdisc->handle;
	386
	387	return 0;
	388	}
	389
	390	static int tbf_graft(struct Qdisc sch, unsigned long arg, struct Qdisc new,
	391	struct Qdisc **old)
	392	{
	393	struct tbf_sched_data *q = qdisc_priv(sch);
	394
	395	if (new == NULL)
	396	new = &noop_qdisc;
	397
	398	sch_tree_lock(sch);
	399	*old = xchg(&q->qdisc, new);
5e50da01	400	qdisc_tree_decrease_qlen(old, (old)->q.qlen);
1da177e4	401	qdisc_reset(*old);
1da177e4 LT	402	sch_tree_unlock(sch);
	403
	404	return 0;
	405	}
	406
	407	static struct Qdisc tbf_leaf(struct Qdisc sch, unsigned long arg)
	408	{
	409	struct tbf_sched_data *q = qdisc_priv(sch);
	410	return q->qdisc;
	411	}
	412
	413	static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
	414	{
	415	return 1;
	416	}
	417
	418	static void tbf_put(struct Qdisc *sch, unsigned long arg)
	419	{
	420	}
	421
10297b99	422	static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1e90474c	423	struct nlattr *tca, unsigned long arg)
1da177e4 LT	424	{
	425	return -ENOSYS;
	426	}
	427
	428	static int tbf_delete(struct Qdisc *sch, unsigned long arg)
	429	{
	430	return -ENOSYS;
	431	}
	432
	433	static void tbf_walk(struct Qdisc sch, struct qdisc_walker walker)
	434	{
	435	if (!walker->stop) {
	436	if (walker->count >= walker->skip)
	437	if (walker->fn(sch, 1, walker) < 0) {
	438	walker->stop = 1;
	439	return;
	440	}
	441	walker->count++;
	442	}
	443	}
	444
	445	static struct tcf_proto *tbf_find_tcf(struct Qdisc sch, unsigned long cl)
	446	{
	447	return NULL;
	448	}
	449
20fea08b	450	static const struct Qdisc_class_ops tbf_class_ops =
1da177e4 LT	451	{
	452	.graft = tbf_graft,
	453	.leaf = tbf_leaf,
	454	.get = tbf_get,
	455	.put = tbf_put,
	456	.change = tbf_change_class,
	457	.delete = tbf_delete,
	458	.walk = tbf_walk,
	459	.tcf_chain = tbf_find_tcf,
	460	.dump = tbf_dump_class,
	461	};
	462
20fea08b	463	static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
1da177e4 LT	464	.next = NULL,
	465	.cl_ops = &tbf_class_ops,
	466	.id = "tbf",
	467	.priv_size = sizeof(struct tbf_sched_data),
	468	.enqueue = tbf_enqueue,
	469	.dequeue = tbf_dequeue,
	470	.requeue = tbf_requeue,
	471	.drop = tbf_drop,
	472	.init = tbf_init,
	473	.reset = tbf_reset,
	474	.destroy = tbf_destroy,
	475	.change = tbf_change,
	476	.dump = tbf_dump,
	477	.owner = THIS_MODULE,
	478	};
	479
	480	static int __init tbf_module_init(void)
	481	{
	482	return register_qdisc(&tbf_qdisc_ops);
	483	}
	484
	485	static void __exit tbf_module_exit(void)
	486	{
	487	unregister_qdisc(&tbf_qdisc_ops);
	488	}
	489	module_init(tbf_module_init)
	490	module_exit(tbf_module_exit)
	491	MODULE_LICENSE("GPL");