pkt_sched: sch_generic: Add generic qdisc->ops->peek() implementation.
[deliverable/linux.git] / net / sched / sch_api.c
1 /*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31
32 #include <net/net_namespace.h>
33 #include <net/sock.h>
34 #include <net/netlink.h>
35 #include <net/pkt_sched.h>
36
37 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
38 struct Qdisc *old, struct Qdisc *new);
39 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
40 struct Qdisc *q, unsigned long cl, int event);
41
42 /*
43
44 Short review.
45 -------------
46
47 This file consists of two interrelated parts:
48
49 1. queueing disciplines manager frontend.
50 2. traffic classes manager frontend.
51
52 Generally, queueing discipline ("qdisc") is a black box,
53 which is able to enqueue packets and to dequeue them (when
54 device is ready to send something) in order and at times
55 determined by algorithm hidden in it.
56
57 qdisc's are divided to two categories:
58 - "queues", which have no internal structure visible from outside.
59 - "schedulers", which split all the packets to "traffic classes",
60 using "packet classifiers" (look at cls_api.c)
61
62 In turn, classes may have child qdiscs (as rule, queues)
63 attached to them etc. etc. etc.
64
65 The goal of the routines in this file is to translate
66 information supplied by user in the form of handles
67 to more intelligible for kernel form, to make some sanity
68 checks and part of work, which is common to all qdiscs
69 and to provide rtnetlink notifications.
70
71 All real intelligent work is done inside qdisc modules.
72
73
74
75 Every discipline has two major routines: enqueue and dequeue.
76
77 ---dequeue
78
79 dequeue usually returns a skb to send. It is allowed to return NULL,
80 but it does not mean that queue is empty, it just means that
81 discipline does not want to send anything this time.
82 Queue is really empty if q->q.qlen == 0.
83 For complicated disciplines with multiple queues q->q is not
84 real packet queue, but however q->q.qlen must be valid.
85
86 ---enqueue
87
88 enqueue returns 0, if packet was enqueued successfully.
89 If packet (this one or another one) was dropped, it returns
90 not zero error code.
91 NET_XMIT_DROP - this packet dropped
92 Expected action: do not backoff, but wait until queue will clear.
93 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
94 Expected action: backoff or ignore
95 NET_XMIT_POLICED - dropped by police.
96 Expected action: backoff or error to real-time apps.
97
98 Auxiliary routines:
99
100 ---requeue
101
102 requeues once dequeued packet. It is used for non-standard or
103 just buggy devices, which can defer output even if netif_queue_stopped()=0.
104
105 ---peek
106
107 like dequeue but without removing a packet from the queue
108
109 ---reset
110
111 returns qdisc to initial state: purge all buffers, clear all
112 timers, counters (except for statistics) etc.
113
114 ---init
115
116 initializes newly created qdisc.
117
118 ---destroy
119
120 destroys resources allocated by init and during lifetime of qdisc.
121
122 ---change
123
124 changes qdisc parameters.
125 */
126
127 /* Protects list of registered TC modules. It is pure SMP lock. */
128 static DEFINE_RWLOCK(qdisc_mod_lock);
129
130
131 /************************************************
132 * Queueing disciplines manipulation. *
133 ************************************************/
134
135
136 /* The list of all installed queueing disciplines. */
137
138 static struct Qdisc_ops *qdisc_base;
139
140 /* Register/uregister queueing discipline */
141
142 int register_qdisc(struct Qdisc_ops *qops)
143 {
144 struct Qdisc_ops *q, **qp;
145 int rc = -EEXIST;
146
147 write_lock(&qdisc_mod_lock);
148 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
149 if (!strcmp(qops->id, q->id))
150 goto out;
151
152 if (qops->enqueue == NULL)
153 qops->enqueue = noop_qdisc_ops.enqueue;
154 if (qops->requeue == NULL)
155 qops->requeue = noop_qdisc_ops.requeue;
156 if (qops->peek == NULL) {
157 if (qops->dequeue == NULL) {
158 qops->peek = noop_qdisc_ops.peek;
159 } else {
160 rc = -EINVAL;
161 goto out;
162 }
163 }
164 if (qops->dequeue == NULL)
165 qops->dequeue = noop_qdisc_ops.dequeue;
166
167 qops->next = NULL;
168 *qp = qops;
169 rc = 0;
170 out:
171 write_unlock(&qdisc_mod_lock);
172 return rc;
173 }
174 EXPORT_SYMBOL(register_qdisc);
175
176 int unregister_qdisc(struct Qdisc_ops *qops)
177 {
178 struct Qdisc_ops *q, **qp;
179 int err = -ENOENT;
180
181 write_lock(&qdisc_mod_lock);
182 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
183 if (q == qops)
184 break;
185 if (q) {
186 *qp = q->next;
187 q->next = NULL;
188 err = 0;
189 }
190 write_unlock(&qdisc_mod_lock);
191 return err;
192 }
193 EXPORT_SYMBOL(unregister_qdisc);
194
195 /* We know handle. Find qdisc among all qdisc's attached to device
196 (root qdisc, all its children, children of children etc.)
197 */
198
199 struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
200 {
201 struct Qdisc *q;
202
203 if (!(root->flags & TCQ_F_BUILTIN) &&
204 root->handle == handle)
205 return root;
206
207 list_for_each_entry(q, &root->list, list) {
208 if (q->handle == handle)
209 return q;
210 }
211 return NULL;
212 }
213
214 /*
215 * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen()
216 * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue()
217 */
218 static DEFINE_SPINLOCK(qdisc_list_lock);
219
220 static void qdisc_list_add(struct Qdisc *q)
221 {
222 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
223 spin_lock_bh(&qdisc_list_lock);
224 list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
225 spin_unlock_bh(&qdisc_list_lock);
226 }
227 }
228
229 void qdisc_list_del(struct Qdisc *q)
230 {
231 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
232 spin_lock_bh(&qdisc_list_lock);
233 list_del(&q->list);
234 spin_unlock_bh(&qdisc_list_lock);
235 }
236 }
237 EXPORT_SYMBOL(qdisc_list_del);
238
239 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
240 {
241 unsigned int i;
242 struct Qdisc *q;
243
244 spin_lock_bh(&qdisc_list_lock);
245
246 for (i = 0; i < dev->num_tx_queues; i++) {
247 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
248 struct Qdisc *txq_root = txq->qdisc_sleeping;
249
250 q = qdisc_match_from_root(txq_root, handle);
251 if (q)
252 goto unlock;
253 }
254
255 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
256
257 unlock:
258 spin_unlock_bh(&qdisc_list_lock);
259
260 return q;
261 }
262
263 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
264 {
265 unsigned long cl;
266 struct Qdisc *leaf;
267 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
268
269 if (cops == NULL)
270 return NULL;
271 cl = cops->get(p, classid);
272
273 if (cl == 0)
274 return NULL;
275 leaf = cops->leaf(p, cl);
276 cops->put(p, cl);
277 return leaf;
278 }
279
280 /* Find queueing discipline by name */
281
282 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
283 {
284 struct Qdisc_ops *q = NULL;
285
286 if (kind) {
287 read_lock(&qdisc_mod_lock);
288 for (q = qdisc_base; q; q = q->next) {
289 if (nla_strcmp(kind, q->id) == 0) {
290 if (!try_module_get(q->owner))
291 q = NULL;
292 break;
293 }
294 }
295 read_unlock(&qdisc_mod_lock);
296 }
297 return q;
298 }
299
300 static struct qdisc_rate_table *qdisc_rtab_list;
301
302 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
303 {
304 struct qdisc_rate_table *rtab;
305
306 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
307 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
308 rtab->refcnt++;
309 return rtab;
310 }
311 }
312
313 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
314 nla_len(tab) != TC_RTAB_SIZE)
315 return NULL;
316
317 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
318 if (rtab) {
319 rtab->rate = *r;
320 rtab->refcnt = 1;
321 memcpy(rtab->data, nla_data(tab), 1024);
322 rtab->next = qdisc_rtab_list;
323 qdisc_rtab_list = rtab;
324 }
325 return rtab;
326 }
327 EXPORT_SYMBOL(qdisc_get_rtab);
328
329 void qdisc_put_rtab(struct qdisc_rate_table *tab)
330 {
331 struct qdisc_rate_table *rtab, **rtabp;
332
333 if (!tab || --tab->refcnt)
334 return;
335
336 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
337 if (rtab == tab) {
338 *rtabp = rtab->next;
339 kfree(rtab);
340 return;
341 }
342 }
343 }
344 EXPORT_SYMBOL(qdisc_put_rtab);
345
346 static LIST_HEAD(qdisc_stab_list);
347 static DEFINE_SPINLOCK(qdisc_stab_lock);
348
349 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
350 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
351 [TCA_STAB_DATA] = { .type = NLA_BINARY },
352 };
353
354 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
355 {
356 struct nlattr *tb[TCA_STAB_MAX + 1];
357 struct qdisc_size_table *stab;
358 struct tc_sizespec *s;
359 unsigned int tsize = 0;
360 u16 *tab = NULL;
361 int err;
362
363 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
364 if (err < 0)
365 return ERR_PTR(err);
366 if (!tb[TCA_STAB_BASE])
367 return ERR_PTR(-EINVAL);
368
369 s = nla_data(tb[TCA_STAB_BASE]);
370
371 if (s->tsize > 0) {
372 if (!tb[TCA_STAB_DATA])
373 return ERR_PTR(-EINVAL);
374 tab = nla_data(tb[TCA_STAB_DATA]);
375 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
376 }
377
378 if (!s || tsize != s->tsize || (!tab && tsize > 0))
379 return ERR_PTR(-EINVAL);
380
381 spin_lock(&qdisc_stab_lock);
382
383 list_for_each_entry(stab, &qdisc_stab_list, list) {
384 if (memcmp(&stab->szopts, s, sizeof(*s)))
385 continue;
386 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
387 continue;
388 stab->refcnt++;
389 spin_unlock(&qdisc_stab_lock);
390 return stab;
391 }
392
393 spin_unlock(&qdisc_stab_lock);
394
395 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
396 if (!stab)
397 return ERR_PTR(-ENOMEM);
398
399 stab->refcnt = 1;
400 stab->szopts = *s;
401 if (tsize > 0)
402 memcpy(stab->data, tab, tsize * sizeof(u16));
403
404 spin_lock(&qdisc_stab_lock);
405 list_add_tail(&stab->list, &qdisc_stab_list);
406 spin_unlock(&qdisc_stab_lock);
407
408 return stab;
409 }
410
411 void qdisc_put_stab(struct qdisc_size_table *tab)
412 {
413 if (!tab)
414 return;
415
416 spin_lock(&qdisc_stab_lock);
417
418 if (--tab->refcnt == 0) {
419 list_del(&tab->list);
420 kfree(tab);
421 }
422
423 spin_unlock(&qdisc_stab_lock);
424 }
425 EXPORT_SYMBOL(qdisc_put_stab);
426
427 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
428 {
429 struct nlattr *nest;
430
431 nest = nla_nest_start(skb, TCA_STAB);
432 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
433 nla_nest_end(skb, nest);
434
435 return skb->len;
436
437 nla_put_failure:
438 return -1;
439 }
440
441 void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
442 {
443 int pkt_len, slot;
444
445 pkt_len = skb->len + stab->szopts.overhead;
446 if (unlikely(!stab->szopts.tsize))
447 goto out;
448
449 slot = pkt_len + stab->szopts.cell_align;
450 if (unlikely(slot < 0))
451 slot = 0;
452
453 slot >>= stab->szopts.cell_log;
454 if (likely(slot < stab->szopts.tsize))
455 pkt_len = stab->data[slot];
456 else
457 pkt_len = stab->data[stab->szopts.tsize - 1] *
458 (slot / stab->szopts.tsize) +
459 stab->data[slot % stab->szopts.tsize];
460
461 pkt_len <<= stab->szopts.size_log;
462 out:
463 if (unlikely(pkt_len < 1))
464 pkt_len = 1;
465 qdisc_skb_cb(skb)->pkt_len = pkt_len;
466 }
467 EXPORT_SYMBOL(qdisc_calculate_pkt_len);
468
469 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
470 {
471 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
472 timer);
473
474 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
475 smp_wmb();
476 __netif_schedule(qdisc_root(wd->qdisc));
477
478 return HRTIMER_NORESTART;
479 }
480
481 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
482 {
483 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
484 wd->timer.function = qdisc_watchdog;
485 wd->qdisc = qdisc;
486 }
487 EXPORT_SYMBOL(qdisc_watchdog_init);
488
489 void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
490 {
491 ktime_t time;
492
493 if (test_bit(__QDISC_STATE_DEACTIVATED,
494 &qdisc_root_sleeping(wd->qdisc)->state))
495 return;
496
497 wd->qdisc->flags |= TCQ_F_THROTTLED;
498 time = ktime_set(0, 0);
499 time = ktime_add_ns(time, PSCHED_US2NS(expires));
500 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
501 }
502 EXPORT_SYMBOL(qdisc_watchdog_schedule);
503
504 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
505 {
506 hrtimer_cancel(&wd->timer);
507 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
508 }
509 EXPORT_SYMBOL(qdisc_watchdog_cancel);
510
511 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
512 {
513 unsigned int size = n * sizeof(struct hlist_head), i;
514 struct hlist_head *h;
515
516 if (size <= PAGE_SIZE)
517 h = kmalloc(size, GFP_KERNEL);
518 else
519 h = (struct hlist_head *)
520 __get_free_pages(GFP_KERNEL, get_order(size));
521
522 if (h != NULL) {
523 for (i = 0; i < n; i++)
524 INIT_HLIST_HEAD(&h[i]);
525 }
526 return h;
527 }
528
529 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
530 {
531 unsigned int size = n * sizeof(struct hlist_head);
532
533 if (size <= PAGE_SIZE)
534 kfree(h);
535 else
536 free_pages((unsigned long)h, get_order(size));
537 }
538
539 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
540 {
541 struct Qdisc_class_common *cl;
542 struct hlist_node *n, *next;
543 struct hlist_head *nhash, *ohash;
544 unsigned int nsize, nmask, osize;
545 unsigned int i, h;
546
547 /* Rehash when load factor exceeds 0.75 */
548 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
549 return;
550 nsize = clhash->hashsize * 2;
551 nmask = nsize - 1;
552 nhash = qdisc_class_hash_alloc(nsize);
553 if (nhash == NULL)
554 return;
555
556 ohash = clhash->hash;
557 osize = clhash->hashsize;
558
559 sch_tree_lock(sch);
560 for (i = 0; i < osize; i++) {
561 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
562 h = qdisc_class_hash(cl->classid, nmask);
563 hlist_add_head(&cl->hnode, &nhash[h]);
564 }
565 }
566 clhash->hash = nhash;
567 clhash->hashsize = nsize;
568 clhash->hashmask = nmask;
569 sch_tree_unlock(sch);
570
571 qdisc_class_hash_free(ohash, osize);
572 }
573 EXPORT_SYMBOL(qdisc_class_hash_grow);
574
575 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
576 {
577 unsigned int size = 4;
578
579 clhash->hash = qdisc_class_hash_alloc(size);
580 if (clhash->hash == NULL)
581 return -ENOMEM;
582 clhash->hashsize = size;
583 clhash->hashmask = size - 1;
584 clhash->hashelems = 0;
585 return 0;
586 }
587 EXPORT_SYMBOL(qdisc_class_hash_init);
588
589 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
590 {
591 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
592 }
593 EXPORT_SYMBOL(qdisc_class_hash_destroy);
594
595 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
596 struct Qdisc_class_common *cl)
597 {
598 unsigned int h;
599
600 INIT_HLIST_NODE(&cl->hnode);
601 h = qdisc_class_hash(cl->classid, clhash->hashmask);
602 hlist_add_head(&cl->hnode, &clhash->hash[h]);
603 clhash->hashelems++;
604 }
605 EXPORT_SYMBOL(qdisc_class_hash_insert);
606
607 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
608 struct Qdisc_class_common *cl)
609 {
610 hlist_del(&cl->hnode);
611 clhash->hashelems--;
612 }
613 EXPORT_SYMBOL(qdisc_class_hash_remove);
614
615 /* Allocate an unique handle from space managed by kernel */
616
617 static u32 qdisc_alloc_handle(struct net_device *dev)
618 {
619 int i = 0x10000;
620 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
621
622 do {
623 autohandle += TC_H_MAKE(0x10000U, 0);
624 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
625 autohandle = TC_H_MAKE(0x80000000U, 0);
626 } while (qdisc_lookup(dev, autohandle) && --i > 0);
627
628 return i>0 ? autohandle : 0;
629 }
630
631 /* Attach toplevel qdisc to device queue. */
632
633 static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
634 struct Qdisc *qdisc)
635 {
636 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
637 spinlock_t *root_lock;
638
639 root_lock = qdisc_lock(oqdisc);
640 spin_lock_bh(root_lock);
641
642 /* Prune old scheduler */
643 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
644 qdisc_reset(oqdisc);
645
646 /* ... and graft new one */
647 if (qdisc == NULL)
648 qdisc = &noop_qdisc;
649 dev_queue->qdisc_sleeping = qdisc;
650 rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
651
652 spin_unlock_bh(root_lock);
653
654 return oqdisc;
655 }
656
657 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
658 {
659 const struct Qdisc_class_ops *cops;
660 unsigned long cl;
661 u32 parentid;
662
663 if (n == 0)
664 return;
665 while ((parentid = sch->parent)) {
666 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
667 return;
668
669 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
670 if (sch == NULL) {
671 WARN_ON(parentid != TC_H_ROOT);
672 return;
673 }
674 cops = sch->ops->cl_ops;
675 if (cops->qlen_notify) {
676 cl = cops->get(sch, parentid);
677 cops->qlen_notify(sch, cl);
678 cops->put(sch, cl);
679 }
680 sch->q.qlen -= n;
681 }
682 }
683 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
684
685 static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
686 struct Qdisc *old, struct Qdisc *new)
687 {
688 if (new || old)
689 qdisc_notify(skb, n, clid, old, new);
690
691 if (old)
692 qdisc_destroy(old);
693 }
694
695 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
696 * to device "dev".
697 *
698 * When appropriate send a netlink notification using 'skb'
699 * and "n".
700 *
701 * On success, destroy old qdisc.
702 */
703
704 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
705 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
706 struct Qdisc *new, struct Qdisc *old)
707 {
708 struct Qdisc *q = old;
709 int err = 0;
710
711 if (parent == NULL) {
712 unsigned int i, num_q, ingress;
713
714 ingress = 0;
715 num_q = dev->num_tx_queues;
716 if ((q && q->flags & TCQ_F_INGRESS) ||
717 (new && new->flags & TCQ_F_INGRESS)) {
718 num_q = 1;
719 ingress = 1;
720 }
721
722 if (dev->flags & IFF_UP)
723 dev_deactivate(dev);
724
725 for (i = 0; i < num_q; i++) {
726 struct netdev_queue *dev_queue = &dev->rx_queue;
727
728 if (!ingress)
729 dev_queue = netdev_get_tx_queue(dev, i);
730
731 old = dev_graft_qdisc(dev_queue, new);
732 if (new && i > 0)
733 atomic_inc(&new->refcnt);
734
735 notify_and_destroy(skb, n, classid, old, new);
736 }
737
738 if (dev->flags & IFF_UP)
739 dev_activate(dev);
740 } else {
741 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
742
743 err = -EINVAL;
744
745 if (cops) {
746 unsigned long cl = cops->get(parent, classid);
747 if (cl) {
748 err = cops->graft(parent, cl, new, &old);
749 cops->put(parent, cl);
750 }
751 }
752 if (!err)
753 notify_and_destroy(skb, n, classid, old, new);
754 }
755 return err;
756 }
757
758 /* lockdep annotation is needed for ingress; egress gets it only for name */
759 static struct lock_class_key qdisc_tx_lock;
760 static struct lock_class_key qdisc_rx_lock;
761
762 /*
763 Allocate and initialize new qdisc.
764
765 Parameters are passed via opt.
766 */
767
768 static struct Qdisc *
769 qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
770 u32 parent, u32 handle, struct nlattr **tca, int *errp)
771 {
772 int err;
773 struct nlattr *kind = tca[TCA_KIND];
774 struct Qdisc *sch;
775 struct Qdisc_ops *ops;
776 struct qdisc_size_table *stab;
777
778 ops = qdisc_lookup_ops(kind);
779 #ifdef CONFIG_MODULES
780 if (ops == NULL && kind != NULL) {
781 char name[IFNAMSIZ];
782 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
783 /* We dropped the RTNL semaphore in order to
784 * perform the module load. So, even if we
785 * succeeded in loading the module we have to
786 * tell the caller to replay the request. We
787 * indicate this using -EAGAIN.
788 * We replay the request because the device may
789 * go away in the mean time.
790 */
791 rtnl_unlock();
792 request_module("sch_%s", name);
793 rtnl_lock();
794 ops = qdisc_lookup_ops(kind);
795 if (ops != NULL) {
796 /* We will try again qdisc_lookup_ops,
797 * so don't keep a reference.
798 */
799 module_put(ops->owner);
800 err = -EAGAIN;
801 goto err_out;
802 }
803 }
804 }
805 #endif
806
807 err = -ENOENT;
808 if (ops == NULL)
809 goto err_out;
810
811 sch = qdisc_alloc(dev_queue, ops);
812 if (IS_ERR(sch)) {
813 err = PTR_ERR(sch);
814 goto err_out2;
815 }
816
817 sch->parent = parent;
818
819 if (handle == TC_H_INGRESS) {
820 sch->flags |= TCQ_F_INGRESS;
821 handle = TC_H_MAKE(TC_H_INGRESS, 0);
822 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
823 } else {
824 if (handle == 0) {
825 handle = qdisc_alloc_handle(dev);
826 err = -ENOMEM;
827 if (handle == 0)
828 goto err_out3;
829 }
830 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
831 }
832
833 sch->handle = handle;
834
835 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
836 if (tca[TCA_STAB]) {
837 stab = qdisc_get_stab(tca[TCA_STAB]);
838 if (IS_ERR(stab)) {
839 err = PTR_ERR(stab);
840 goto err_out3;
841 }
842 sch->stab = stab;
843 }
844 if (tca[TCA_RATE]) {
845 spinlock_t *root_lock;
846
847 if ((sch->parent != TC_H_ROOT) &&
848 !(sch->flags & TCQ_F_INGRESS))
849 root_lock = qdisc_root_sleeping_lock(sch);
850 else
851 root_lock = qdisc_lock(sch);
852
853 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
854 root_lock, tca[TCA_RATE]);
855 if (err) {
856 /*
857 * Any broken qdiscs that would require
858 * a ops->reset() here? The qdisc was never
859 * in action so it shouldn't be necessary.
860 */
861 if (ops->destroy)
862 ops->destroy(sch);
863 goto err_out3;
864 }
865 }
866
867 qdisc_list_add(sch);
868
869 return sch;
870 }
871 err_out3:
872 qdisc_put_stab(sch->stab);
873 dev_put(dev);
874 kfree((char *) sch - sch->padded);
875 err_out2:
876 module_put(ops->owner);
877 err_out:
878 *errp = err;
879 return NULL;
880 }
881
882 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
883 {
884 struct qdisc_size_table *stab = NULL;
885 int err = 0;
886
887 if (tca[TCA_OPTIONS]) {
888 if (sch->ops->change == NULL)
889 return -EINVAL;
890 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
891 if (err)
892 return err;
893 }
894
895 if (tca[TCA_STAB]) {
896 stab = qdisc_get_stab(tca[TCA_STAB]);
897 if (IS_ERR(stab))
898 return PTR_ERR(stab);
899 }
900
901 qdisc_put_stab(sch->stab);
902 sch->stab = stab;
903
904 if (tca[TCA_RATE])
905 gen_replace_estimator(&sch->bstats, &sch->rate_est,
906 qdisc_root_sleeping_lock(sch),
907 tca[TCA_RATE]);
908 return 0;
909 }
910
911 struct check_loop_arg
912 {
913 struct qdisc_walker w;
914 struct Qdisc *p;
915 int depth;
916 };
917
918 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
919
920 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
921 {
922 struct check_loop_arg arg;
923
924 if (q->ops->cl_ops == NULL)
925 return 0;
926
927 arg.w.stop = arg.w.skip = arg.w.count = 0;
928 arg.w.fn = check_loop_fn;
929 arg.depth = depth;
930 arg.p = p;
931 q->ops->cl_ops->walk(q, &arg.w);
932 return arg.w.stop ? -ELOOP : 0;
933 }
934
935 static int
936 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
937 {
938 struct Qdisc *leaf;
939 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
940 struct check_loop_arg *arg = (struct check_loop_arg *)w;
941
942 leaf = cops->leaf(q, cl);
943 if (leaf) {
944 if (leaf == arg->p || arg->depth > 7)
945 return -ELOOP;
946 return check_loop(leaf, arg->p, arg->depth + 1);
947 }
948 return 0;
949 }
950
951 /*
952 * Delete/get qdisc.
953 */
954
955 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
956 {
957 struct net *net = sock_net(skb->sk);
958 struct tcmsg *tcm = NLMSG_DATA(n);
959 struct nlattr *tca[TCA_MAX + 1];
960 struct net_device *dev;
961 u32 clid = tcm->tcm_parent;
962 struct Qdisc *q = NULL;
963 struct Qdisc *p = NULL;
964 int err;
965
966 if (net != &init_net)
967 return -EINVAL;
968
969 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
970 return -ENODEV;
971
972 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
973 if (err < 0)
974 return err;
975
976 if (clid) {
977 if (clid != TC_H_ROOT) {
978 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
979 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
980 return -ENOENT;
981 q = qdisc_leaf(p, clid);
982 } else { /* ingress */
983 q = dev->rx_queue.qdisc_sleeping;
984 }
985 } else {
986 struct netdev_queue *dev_queue;
987 dev_queue = netdev_get_tx_queue(dev, 0);
988 q = dev_queue->qdisc_sleeping;
989 }
990 if (!q)
991 return -ENOENT;
992
993 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
994 return -EINVAL;
995 } else {
996 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
997 return -ENOENT;
998 }
999
1000 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1001 return -EINVAL;
1002
1003 if (n->nlmsg_type == RTM_DELQDISC) {
1004 if (!clid)
1005 return -EINVAL;
1006 if (q->handle == 0)
1007 return -ENOENT;
1008 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
1009 return err;
1010 } else {
1011 qdisc_notify(skb, n, clid, NULL, q);
1012 }
1013 return 0;
1014 }
1015
1016 /*
1017 Create/change qdisc.
1018 */
1019
1020 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1021 {
1022 struct net *net = sock_net(skb->sk);
1023 struct tcmsg *tcm;
1024 struct nlattr *tca[TCA_MAX + 1];
1025 struct net_device *dev;
1026 u32 clid;
1027 struct Qdisc *q, *p;
1028 int err;
1029
1030 if (net != &init_net)
1031 return -EINVAL;
1032
1033 replay:
1034 /* Reinit, just in case something touches this. */
1035 tcm = NLMSG_DATA(n);
1036 clid = tcm->tcm_parent;
1037 q = p = NULL;
1038
1039 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1040 return -ENODEV;
1041
1042 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1043 if (err < 0)
1044 return err;
1045
1046 if (clid) {
1047 if (clid != TC_H_ROOT) {
1048 if (clid != TC_H_INGRESS) {
1049 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1050 return -ENOENT;
1051 q = qdisc_leaf(p, clid);
1052 } else { /*ingress */
1053 q = dev->rx_queue.qdisc_sleeping;
1054 }
1055 } else {
1056 struct netdev_queue *dev_queue;
1057 dev_queue = netdev_get_tx_queue(dev, 0);
1058 q = dev_queue->qdisc_sleeping;
1059 }
1060
1061 /* It may be default qdisc, ignore it */
1062 if (q && q->handle == 0)
1063 q = NULL;
1064
1065 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1066 if (tcm->tcm_handle) {
1067 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1068 return -EEXIST;
1069 if (TC_H_MIN(tcm->tcm_handle))
1070 return -EINVAL;
1071 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1072 goto create_n_graft;
1073 if (n->nlmsg_flags&NLM_F_EXCL)
1074 return -EEXIST;
1075 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1076 return -EINVAL;
1077 if (q == p ||
1078 (p && check_loop(q, p, 0)))
1079 return -ELOOP;
1080 atomic_inc(&q->refcnt);
1081 goto graft;
1082 } else {
1083 if (q == NULL)
1084 goto create_n_graft;
1085
1086 /* This magic test requires explanation.
1087 *
1088 * We know, that some child q is already
1089 * attached to this parent and have choice:
1090 * either to change it or to create/graft new one.
1091 *
1092 * 1. We are allowed to create/graft only
1093 * if CREATE and REPLACE flags are set.
1094 *
1095 * 2. If EXCL is set, requestor wanted to say,
1096 * that qdisc tcm_handle is not expected
1097 * to exist, so that we choose create/graft too.
1098 *
1099 * 3. The last case is when no flags are set.
1100 * Alas, it is sort of hole in API, we
1101 * cannot decide what to do unambiguously.
1102 * For now we select create/graft, if
1103 * user gave KIND, which does not match existing.
1104 */
1105 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1106 (n->nlmsg_flags&NLM_F_REPLACE) &&
1107 ((n->nlmsg_flags&NLM_F_EXCL) ||
1108 (tca[TCA_KIND] &&
1109 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1110 goto create_n_graft;
1111 }
1112 }
1113 } else {
1114 if (!tcm->tcm_handle)
1115 return -EINVAL;
1116 q = qdisc_lookup(dev, tcm->tcm_handle);
1117 }
1118
1119 /* Change qdisc parameters */
1120 if (q == NULL)
1121 return -ENOENT;
1122 if (n->nlmsg_flags&NLM_F_EXCL)
1123 return -EEXIST;
1124 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1125 return -EINVAL;
1126 err = qdisc_change(q, tca);
1127 if (err == 0)
1128 qdisc_notify(skb, n, clid, NULL, q);
1129 return err;
1130
1131 create_n_graft:
1132 if (!(n->nlmsg_flags&NLM_F_CREATE))
1133 return -ENOENT;
1134 if (clid == TC_H_INGRESS)
1135 q = qdisc_create(dev, &dev->rx_queue,
1136 tcm->tcm_parent, tcm->tcm_parent,
1137 tca, &err);
1138 else
1139 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
1140 tcm->tcm_parent, tcm->tcm_handle,
1141 tca, &err);
1142 if (q == NULL) {
1143 if (err == -EAGAIN)
1144 goto replay;
1145 return err;
1146 }
1147
1148 graft:
1149 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1150 if (err) {
1151 if (q)
1152 qdisc_destroy(q);
1153 return err;
1154 }
1155
1156 return 0;
1157 }
1158
1159 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1160 u32 pid, u32 seq, u16 flags, int event)
1161 {
1162 struct tcmsg *tcm;
1163 struct nlmsghdr *nlh;
1164 unsigned char *b = skb_tail_pointer(skb);
1165 struct gnet_dump d;
1166
1167 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1168 tcm = NLMSG_DATA(nlh);
1169 tcm->tcm_family = AF_UNSPEC;
1170 tcm->tcm__pad1 = 0;
1171 tcm->tcm__pad2 = 0;
1172 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1173 tcm->tcm_parent = clid;
1174 tcm->tcm_handle = q->handle;
1175 tcm->tcm_info = atomic_read(&q->refcnt);
1176 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1177 if (q->ops->dump && q->ops->dump(q, skb) < 0)
1178 goto nla_put_failure;
1179 q->qstats.qlen = q->q.qlen;
1180
1181 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1182 goto nla_put_failure;
1183
1184 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1185 qdisc_root_sleeping_lock(q), &d) < 0)
1186 goto nla_put_failure;
1187
1188 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1189 goto nla_put_failure;
1190
1191 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
1192 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1193 gnet_stats_copy_queue(&d, &q->qstats) < 0)
1194 goto nla_put_failure;
1195
1196 if (gnet_stats_finish_copy(&d) < 0)
1197 goto nla_put_failure;
1198
1199 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1200 return skb->len;
1201
1202 nlmsg_failure:
1203 nla_put_failure:
1204 nlmsg_trim(skb, b);
1205 return -1;
1206 }
1207
1208 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1209 u32 clid, struct Qdisc *old, struct Qdisc *new)
1210 {
1211 struct sk_buff *skb;
1212 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1213
1214 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1215 if (!skb)
1216 return -ENOBUFS;
1217
1218 if (old && old->handle) {
1219 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1220 goto err_out;
1221 }
1222 if (new) {
1223 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1224 goto err_out;
1225 }
1226
1227 if (skb->len)
1228 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1229
1230 err_out:
1231 kfree_skb(skb);
1232 return -EINVAL;
1233 }
1234
1235 static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1236 {
1237 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1238 }
1239
1240 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1241 struct netlink_callback *cb,
1242 int *q_idx_p, int s_q_idx)
1243 {
1244 int ret = 0, q_idx = *q_idx_p;
1245 struct Qdisc *q;
1246
1247 if (!root)
1248 return 0;
1249
1250 q = root;
1251 if (q_idx < s_q_idx) {
1252 q_idx++;
1253 } else {
1254 if (!tc_qdisc_dump_ignore(q) &&
1255 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1256 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1257 goto done;
1258 q_idx++;
1259 }
1260 list_for_each_entry(q, &root->list, list) {
1261 if (q_idx < s_q_idx) {
1262 q_idx++;
1263 continue;
1264 }
1265 if (!tc_qdisc_dump_ignore(q) &&
1266 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1267 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1268 goto done;
1269 q_idx++;
1270 }
1271
1272 out:
1273 *q_idx_p = q_idx;
1274 return ret;
1275 done:
1276 ret = -1;
1277 goto out;
1278 }
1279
1280 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1281 {
1282 struct net *net = sock_net(skb->sk);
1283 int idx, q_idx;
1284 int s_idx, s_q_idx;
1285 struct net_device *dev;
1286
1287 if (net != &init_net)
1288 return 0;
1289
1290 s_idx = cb->args[0];
1291 s_q_idx = q_idx = cb->args[1];
1292 read_lock(&dev_base_lock);
1293 idx = 0;
1294 for_each_netdev(&init_net, dev) {
1295 struct netdev_queue *dev_queue;
1296
1297 if (idx < s_idx)
1298 goto cont;
1299 if (idx > s_idx)
1300 s_q_idx = 0;
1301 q_idx = 0;
1302
1303 dev_queue = netdev_get_tx_queue(dev, 0);
1304 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
1305 goto done;
1306
1307 dev_queue = &dev->rx_queue;
1308 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
1309 goto done;
1310
1311 cont:
1312 idx++;
1313 }
1314
1315 done:
1316 read_unlock(&dev_base_lock);
1317
1318 cb->args[0] = idx;
1319 cb->args[1] = q_idx;
1320
1321 return skb->len;
1322 }
1323
1324
1325
1326 /************************************************
1327 * Traffic classes manipulation. *
1328 ************************************************/
1329
1330
1331
1332 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1333 {
1334 struct net *net = sock_net(skb->sk);
1335 struct netdev_queue *dev_queue;
1336 struct tcmsg *tcm = NLMSG_DATA(n);
1337 struct nlattr *tca[TCA_MAX + 1];
1338 struct net_device *dev;
1339 struct Qdisc *q = NULL;
1340 const struct Qdisc_class_ops *cops;
1341 unsigned long cl = 0;
1342 unsigned long new_cl;
1343 u32 pid = tcm->tcm_parent;
1344 u32 clid = tcm->tcm_handle;
1345 u32 qid = TC_H_MAJ(clid);
1346 int err;
1347
1348 if (net != &init_net)
1349 return -EINVAL;
1350
1351 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1352 return -ENODEV;
1353
1354 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1355 if (err < 0)
1356 return err;
1357
1358 /*
1359 parent == TC_H_UNSPEC - unspecified parent.
1360 parent == TC_H_ROOT - class is root, which has no parent.
1361 parent == X:0 - parent is root class.
1362 parent == X:Y - parent is a node in hierarchy.
1363 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1364
1365 handle == 0:0 - generate handle from kernel pool.
1366 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1367 handle == X:Y - clear.
1368 handle == X:0 - root class.
1369 */
1370
1371 /* Step 1. Determine qdisc handle X:0 */
1372
1373 dev_queue = netdev_get_tx_queue(dev, 0);
1374 if (pid != TC_H_ROOT) {
1375 u32 qid1 = TC_H_MAJ(pid);
1376
1377 if (qid && qid1) {
1378 /* If both majors are known, they must be identical. */
1379 if (qid != qid1)
1380 return -EINVAL;
1381 } else if (qid1) {
1382 qid = qid1;
1383 } else if (qid == 0)
1384 qid = dev_queue->qdisc_sleeping->handle;
1385
1386 /* Now qid is genuine qdisc handle consistent
1387 both with parent and child.
1388
1389 TC_H_MAJ(pid) still may be unspecified, complete it now.
1390 */
1391 if (pid)
1392 pid = TC_H_MAKE(qid, pid);
1393 } else {
1394 if (qid == 0)
1395 qid = dev_queue->qdisc_sleeping->handle;
1396 }
1397
1398 /* OK. Locate qdisc */
1399 if ((q = qdisc_lookup(dev, qid)) == NULL)
1400 return -ENOENT;
1401
1402 /* An check that it supports classes */
1403 cops = q->ops->cl_ops;
1404 if (cops == NULL)
1405 return -EINVAL;
1406
1407 /* Now try to get class */
1408 if (clid == 0) {
1409 if (pid == TC_H_ROOT)
1410 clid = qid;
1411 } else
1412 clid = TC_H_MAKE(qid, clid);
1413
1414 if (clid)
1415 cl = cops->get(q, clid);
1416
1417 if (cl == 0) {
1418 err = -ENOENT;
1419 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1420 goto out;
1421 } else {
1422 switch (n->nlmsg_type) {
1423 case RTM_NEWTCLASS:
1424 err = -EEXIST;
1425 if (n->nlmsg_flags&NLM_F_EXCL)
1426 goto out;
1427 break;
1428 case RTM_DELTCLASS:
1429 err = cops->delete(q, cl);
1430 if (err == 0)
1431 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1432 goto out;
1433 case RTM_GETTCLASS:
1434 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1435 goto out;
1436 default:
1437 err = -EINVAL;
1438 goto out;
1439 }
1440 }
1441
1442 new_cl = cl;
1443 err = cops->change(q, clid, pid, tca, &new_cl);
1444 if (err == 0)
1445 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1446
1447 out:
1448 if (cl)
1449 cops->put(q, cl);
1450
1451 return err;
1452 }
1453
1454
1455 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1456 unsigned long cl,
1457 u32 pid, u32 seq, u16 flags, int event)
1458 {
1459 struct tcmsg *tcm;
1460 struct nlmsghdr *nlh;
1461 unsigned char *b = skb_tail_pointer(skb);
1462 struct gnet_dump d;
1463 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1464
1465 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1466 tcm = NLMSG_DATA(nlh);
1467 tcm->tcm_family = AF_UNSPEC;
1468 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1469 tcm->tcm_parent = q->handle;
1470 tcm->tcm_handle = q->handle;
1471 tcm->tcm_info = 0;
1472 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1473 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1474 goto nla_put_failure;
1475
1476 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1477 qdisc_root_sleeping_lock(q), &d) < 0)
1478 goto nla_put_failure;
1479
1480 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1481 goto nla_put_failure;
1482
1483 if (gnet_stats_finish_copy(&d) < 0)
1484 goto nla_put_failure;
1485
1486 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1487 return skb->len;
1488
1489 nlmsg_failure:
1490 nla_put_failure:
1491 nlmsg_trim(skb, b);
1492 return -1;
1493 }
1494
1495 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1496 struct Qdisc *q, unsigned long cl, int event)
1497 {
1498 struct sk_buff *skb;
1499 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1500
1501 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1502 if (!skb)
1503 return -ENOBUFS;
1504
1505 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1506 kfree_skb(skb);
1507 return -EINVAL;
1508 }
1509
1510 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1511 }
1512
1513 struct qdisc_dump_args
1514 {
1515 struct qdisc_walker w;
1516 struct sk_buff *skb;
1517 struct netlink_callback *cb;
1518 };
1519
1520 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1521 {
1522 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1523
1524 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1525 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1526 }
1527
1528 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1529 struct tcmsg *tcm, struct netlink_callback *cb,
1530 int *t_p, int s_t)
1531 {
1532 struct qdisc_dump_args arg;
1533
1534 if (tc_qdisc_dump_ignore(q) ||
1535 *t_p < s_t || !q->ops->cl_ops ||
1536 (tcm->tcm_parent &&
1537 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1538 (*t_p)++;
1539 return 0;
1540 }
1541 if (*t_p > s_t)
1542 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1543 arg.w.fn = qdisc_class_dump;
1544 arg.skb = skb;
1545 arg.cb = cb;
1546 arg.w.stop = 0;
1547 arg.w.skip = cb->args[1];
1548 arg.w.count = 0;
1549 q->ops->cl_ops->walk(q, &arg.w);
1550 cb->args[1] = arg.w.count;
1551 if (arg.w.stop)
1552 return -1;
1553 (*t_p)++;
1554 return 0;
1555 }
1556
1557 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1558 struct tcmsg *tcm, struct netlink_callback *cb,
1559 int *t_p, int s_t)
1560 {
1561 struct Qdisc *q;
1562
1563 if (!root)
1564 return 0;
1565
1566 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1567 return -1;
1568
1569 list_for_each_entry(q, &root->list, list) {
1570 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1571 return -1;
1572 }
1573
1574 return 0;
1575 }
1576
1577 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1578 {
1579 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1580 struct net *net = sock_net(skb->sk);
1581 struct netdev_queue *dev_queue;
1582 struct net_device *dev;
1583 int t, s_t;
1584
1585 if (net != &init_net)
1586 return 0;
1587
1588 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1589 return 0;
1590 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1591 return 0;
1592
1593 s_t = cb->args[0];
1594 t = 0;
1595
1596 dev_queue = netdev_get_tx_queue(dev, 0);
1597 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
1598 goto done;
1599
1600 dev_queue = &dev->rx_queue;
1601 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
1602 goto done;
1603
1604 done:
1605 cb->args[0] = t;
1606
1607 dev_put(dev);
1608 return skb->len;
1609 }
1610
1611 /* Main classifier routine: scans classifier chain attached
1612 to this qdisc, (optionally) tests for protocol and asks
1613 specific classifiers.
1614 */
1615 int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1616 struct tcf_result *res)
1617 {
1618 __be16 protocol = skb->protocol;
1619 int err = 0;
1620
1621 for (; tp; tp = tp->next) {
1622 if ((tp->protocol == protocol ||
1623 tp->protocol == htons(ETH_P_ALL)) &&
1624 (err = tp->classify(skb, tp, res)) >= 0) {
1625 #ifdef CONFIG_NET_CLS_ACT
1626 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1627 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1628 #endif
1629 return err;
1630 }
1631 }
1632 return -1;
1633 }
1634 EXPORT_SYMBOL(tc_classify_compat);
1635
1636 int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1637 struct tcf_result *res)
1638 {
1639 int err = 0;
1640 __be16 protocol;
1641 #ifdef CONFIG_NET_CLS_ACT
1642 struct tcf_proto *otp = tp;
1643 reclassify:
1644 #endif
1645 protocol = skb->protocol;
1646
1647 err = tc_classify_compat(skb, tp, res);
1648 #ifdef CONFIG_NET_CLS_ACT
1649 if (err == TC_ACT_RECLASSIFY) {
1650 u32 verd = G_TC_VERD(skb->tc_verd);
1651 tp = otp;
1652
1653 if (verd++ >= MAX_REC_LOOP) {
1654 printk("rule prio %u protocol %02x reclassify loop, "
1655 "packet dropped\n",
1656 tp->prio&0xffff, ntohs(tp->protocol));
1657 return TC_ACT_SHOT;
1658 }
1659 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1660 goto reclassify;
1661 }
1662 #endif
1663 return err;
1664 }
1665 EXPORT_SYMBOL(tc_classify);
1666
1667 void tcf_destroy(struct tcf_proto *tp)
1668 {
1669 tp->ops->destroy(tp);
1670 module_put(tp->ops->owner);
1671 kfree(tp);
1672 }
1673
1674 void tcf_destroy_chain(struct tcf_proto **fl)
1675 {
1676 struct tcf_proto *tp;
1677
1678 while ((tp = *fl) != NULL) {
1679 *fl = tp->next;
1680 tcf_destroy(tp);
1681 }
1682 }
1683 EXPORT_SYMBOL(tcf_destroy_chain);
1684
1685 #ifdef CONFIG_PROC_FS
1686 static int psched_show(struct seq_file *seq, void *v)
1687 {
1688 struct timespec ts;
1689
1690 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1691 seq_printf(seq, "%08x %08x %08x %08x\n",
1692 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
1693 1000000,
1694 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
1695
1696 return 0;
1697 }
1698
1699 static int psched_open(struct inode *inode, struct file *file)
1700 {
1701 return single_open(file, psched_show, PDE(inode)->data);
1702 }
1703
1704 static const struct file_operations psched_fops = {
1705 .owner = THIS_MODULE,
1706 .open = psched_open,
1707 .read = seq_read,
1708 .llseek = seq_lseek,
1709 .release = single_release,
1710 };
1711 #endif
1712
1713 static int __init pktsched_init(void)
1714 {
1715 register_qdisc(&pfifo_qdisc_ops);
1716 register_qdisc(&bfifo_qdisc_ops);
1717 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
1718
1719 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1720 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1721 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1722 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1723 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1724 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1725
1726 return 0;
1727 }
1728
1729 subsys_initcall(pktsched_init);
This page took 0.122734 seconds and 5 git commands to generate.