Commit | Line | Data |
---|---|---|
5bc1421e NH |
1 | /* |
2 | * net/core/netprio_cgroup.c Priority Control Group | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version | |
7 | * 2 of the License, or (at your option) any later version. | |
8 | * | |
9 | * Authors: Neil Horman <nhorman@tuxdriver.com> | |
10 | */ | |
11 | ||
e005d193 JP |
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
13 | ||
5bc1421e NH |
14 | #include <linux/module.h> |
15 | #include <linux/slab.h> | |
16 | #include <linux/types.h> | |
17 | #include <linux/string.h> | |
18 | #include <linux/errno.h> | |
19 | #include <linux/skbuff.h> | |
20 | #include <linux/cgroup.h> | |
21 | #include <linux/rcupdate.h> | |
22 | #include <linux/atomic.h> | |
23 | #include <net/rtnetlink.h> | |
24 | #include <net/pkt_cls.h> | |
25 | #include <net/sock.h> | |
26 | #include <net/netprio_cgroup.h> | |
27 | ||
406a3c63 JF |
28 | #include <linux/fdtable.h> |
29 | ||
4a6ee25c | 30 | #define PRIOMAP_MIN_SZ 128 |
5bc1421e NH |
31 | |
32 | static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) | |
33 | { | |
34 | return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id), | |
35 | struct cgroup_netprio_state, css); | |
36 | } | |
37 | ||
4a6ee25c TH |
38 | /* |
39 | * Extend @dev->priomap so that it's large enough to accomodate | |
40 | * @target_idx. @dev->priomap.priomap_len > @target_idx after successful | |
41 | * return. Must be called under rtnl lock. | |
42 | */ | |
43 | static int extend_netdev_table(struct net_device *dev, u32 target_idx) | |
5bc1421e | 44 | { |
4a6ee25c TH |
45 | struct netprio_map *old, *new; |
46 | size_t new_sz, new_len; | |
5bc1421e | 47 | |
4a6ee25c | 48 | /* is the existing priomap large enough? */ |
52bca930 | 49 | old = rtnl_dereference(dev->priomap); |
4a6ee25c TH |
50 | if (old && old->priomap_len > target_idx) |
51 | return 0; | |
52 | ||
53 | /* | |
54 | * Determine the new size. Let's keep it power-of-two. We start | |
55 | * from PRIOMAP_MIN_SZ and double it until it's large enough to | |
56 | * accommodate @target_idx. | |
57 | */ | |
58 | new_sz = PRIOMAP_MIN_SZ; | |
59 | while (true) { | |
60 | new_len = (new_sz - offsetof(struct netprio_map, priomap)) / | |
61 | sizeof(new->priomap[0]); | |
62 | if (new_len > target_idx) | |
63 | break; | |
64 | new_sz *= 2; | |
65 | /* overflowed? */ | |
66 | if (WARN_ON(new_sz < PRIOMAP_MIN_SZ)) | |
67 | return -ENOSPC; | |
68 | } | |
5bc1421e | 69 | |
4a6ee25c TH |
70 | /* allocate & copy */ |
71 | new = kzalloc(new_sz, GFP_KERNEL); | |
62b5942a | 72 | if (!new) |
ef209f15 | 73 | return -ENOMEM; |
5bc1421e | 74 | |
52bca930 TH |
75 | if (old) |
76 | memcpy(new->priomap, old->priomap, | |
77 | old->priomap_len * sizeof(old->priomap[0])); | |
5bc1421e | 78 | |
52bca930 | 79 | new->priomap_len = new_len; |
5bc1421e | 80 | |
4a6ee25c | 81 | /* install the new priomap */ |
52bca930 TH |
82 | rcu_assign_pointer(dev->priomap, new); |
83 | if (old) | |
84 | kfree_rcu(old, rcu); | |
ef209f15 G |
85 | return 0; |
86 | } | |
87 | ||
666b0ebe TH |
88 | /** |
89 | * netprio_prio - return the effective netprio of a cgroup-net_device pair | |
90 | * @cgrp: cgroup part of the target pair | |
91 | * @dev: net_device part of the target pair | |
92 | * | |
93 | * Should be called under RCU read or rtnl lock. | |
94 | */ | |
95 | static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev) | |
96 | { | |
97 | struct netprio_map *map = rcu_dereference_rtnl(dev->priomap); | |
98 | ||
99 | if (map && cgrp->id < map->priomap_len) | |
100 | return map->priomap[cgrp->id]; | |
101 | return 0; | |
102 | } | |
103 | ||
104 | /** | |
105 | * netprio_set_prio - set netprio on a cgroup-net_device pair | |
106 | * @cgrp: cgroup part of the target pair | |
107 | * @dev: net_device part of the target pair | |
108 | * @prio: prio to set | |
109 | * | |
110 | * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl | |
111 | * lock and may fail under memory pressure for non-zero @prio. | |
112 | */ | |
113 | static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev, | |
114 | u32 prio) | |
115 | { | |
116 | struct netprio_map *map; | |
117 | int ret; | |
118 | ||
119 | /* avoid extending priomap for zero writes */ | |
120 | map = rtnl_dereference(dev->priomap); | |
121 | if (!prio && (!map || map->priomap_len <= cgrp->id)) | |
122 | return 0; | |
123 | ||
124 | ret = extend_netdev_table(dev, cgrp->id); | |
125 | if (ret) | |
126 | return ret; | |
127 | ||
128 | map = rtnl_dereference(dev->priomap); | |
129 | map->priomap[cgrp->id] = prio; | |
130 | return 0; | |
131 | } | |
132 | ||
92fb9748 | 133 | static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) |
5bc1421e NH |
134 | { |
135 | struct cgroup_netprio_state *cs; | |
88d642fa | 136 | |
5bc1421e NH |
137 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); |
138 | if (!cs) | |
139 | return ERR_PTR(-ENOMEM); | |
140 | ||
5bc1421e NH |
141 | return &cs->css; |
142 | } | |
143 | ||
811d8d6f | 144 | static int cgrp_css_online(struct cgroup *cgrp) |
5bc1421e | 145 | { |
811d8d6f | 146 | struct cgroup *parent = cgrp->parent; |
5bc1421e | 147 | struct net_device *dev; |
811d8d6f TH |
148 | int ret = 0; |
149 | ||
150 | if (!parent) | |
151 | return 0; | |
5bc1421e | 152 | |
5bc1421e | 153 | rtnl_lock(); |
811d8d6f TH |
154 | /* |
155 | * Inherit prios from the parent. As all prios are set during | |
156 | * onlining, there is no need to clear them on offline. | |
157 | */ | |
158 | for_each_netdev(&init_net, dev) { | |
159 | u32 prio = netprio_prio(parent, dev); | |
160 | ||
161 | ret = netprio_set_prio(cgrp, dev, prio); | |
162 | if (ret) | |
163 | break; | |
164 | } | |
5bc1421e | 165 | rtnl_unlock(); |
811d8d6f TH |
166 | return ret; |
167 | } | |
168 | ||
169 | static void cgrp_css_free(struct cgroup *cgrp) | |
170 | { | |
171 | kfree(cgrp_netprio_state(cgrp)); | |
5bc1421e NH |
172 | } |
173 | ||
174 | static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) | |
175 | { | |
88d642fa | 176 | return cgrp->id; |
5bc1421e NH |
177 | } |
178 | ||
179 | static int read_priomap(struct cgroup *cont, struct cftype *cft, | |
180 | struct cgroup_map_cb *cb) | |
181 | { | |
182 | struct net_device *dev; | |
5bc1421e NH |
183 | |
184 | rcu_read_lock(); | |
666b0ebe TH |
185 | for_each_netdev_rcu(&init_net, dev) |
186 | cb->fill(cb, dev->name, netprio_prio(cont, dev)); | |
5bc1421e NH |
187 | rcu_read_unlock(); |
188 | return 0; | |
189 | } | |
190 | ||
191 | static int write_priomap(struct cgroup *cgrp, struct cftype *cft, | |
192 | const char *buffer) | |
193 | { | |
6d5759dd | 194 | char devname[IFNAMSIZ + 1]; |
5bc1421e | 195 | struct net_device *dev; |
6d5759dd TH |
196 | u32 prio; |
197 | int ret; | |
5bc1421e | 198 | |
6d5759dd TH |
199 | if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) |
200 | return -EINVAL; | |
5bc1421e NH |
201 | |
202 | dev = dev_get_by_name(&init_net, devname); | |
203 | if (!dev) | |
6d5759dd | 204 | return -ENODEV; |
5bc1421e | 205 | |
476ad154 | 206 | rtnl_lock(); |
6d5759dd | 207 | |
666b0ebe | 208 | ret = netprio_set_prio(cgrp, dev, prio); |
ef209f15 | 209 | |
476ad154 | 210 | rtnl_unlock(); |
5bc1421e | 211 | dev_put(dev); |
5bc1421e NH |
212 | return ret; |
213 | } | |
214 | ||
c3c073f8 AV |
215 | static int update_netprio(const void *v, struct file *file, unsigned n) |
216 | { | |
217 | int err; | |
218 | struct socket *sock = sock_from_file(file, &err); | |
219 | if (sock) | |
220 | sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; | |
221 | return 0; | |
222 | } | |
223 | ||
c658f19d | 224 | static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) |
406a3c63 JF |
225 | { |
226 | struct task_struct *p; | |
c3c073f8 | 227 | void *v; |
406a3c63 JF |
228 | |
229 | cgroup_taskset_for_each(p, cgrp, tset) { | |
406a3c63 | 230 | task_lock(p); |
c3c073f8 AV |
231 | v = (void *)(unsigned long)task_netprioidx(p); |
232 | iterate_fd(p->files, 0, update_netprio, v); | |
406a3c63 JF |
233 | task_unlock(p); |
234 | } | |
406a3c63 JF |
235 | } |
236 | ||
5bc1421e NH |
237 | static struct cftype ss_files[] = { |
238 | { | |
239 | .name = "prioidx", | |
240 | .read_u64 = read_prioidx, | |
241 | }, | |
242 | { | |
243 | .name = "ifpriomap", | |
244 | .read_map = read_priomap, | |
245 | .write_string = write_priomap, | |
246 | }, | |
4baf6e33 | 247 | { } /* terminate */ |
5bc1421e NH |
248 | }; |
249 | ||
676f7c8f TH |
250 | struct cgroup_subsys net_prio_subsys = { |
251 | .name = "net_prio", | |
92fb9748 | 252 | .css_alloc = cgrp_css_alloc, |
811d8d6f | 253 | .css_online = cgrp_css_online, |
92fb9748 | 254 | .css_free = cgrp_css_free, |
406a3c63 | 255 | .attach = net_prio_attach, |
676f7c8f | 256 | .subsys_id = net_prio_subsys_id, |
4baf6e33 | 257 | .base_cftypes = ss_files, |
8c7f6edb | 258 | .module = THIS_MODULE, |
676f7c8f | 259 | }; |
5bc1421e NH |
260 | |
261 | static int netprio_device_event(struct notifier_block *unused, | |
262 | unsigned long event, void *ptr) | |
263 | { | |
351638e7 | 264 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
5bc1421e | 265 | struct netprio_map *old; |
5bc1421e NH |
266 | |
267 | /* | |
268 | * Note this is called with rtnl_lock held so we have update side | |
269 | * protection on our rcu assignments | |
270 | */ | |
271 | ||
272 | switch (event) { | |
5bc1421e NH |
273 | case NETDEV_UNREGISTER: |
274 | old = rtnl_dereference(dev->priomap); | |
2cfa5a04 | 275 | RCU_INIT_POINTER(dev->priomap, NULL); |
5bc1421e NH |
276 | if (old) |
277 | kfree_rcu(old, rcu); | |
278 | break; | |
279 | } | |
280 | return NOTIFY_DONE; | |
281 | } | |
282 | ||
283 | static struct notifier_block netprio_device_notifier = { | |
284 | .notifier_call = netprio_device_event | |
285 | }; | |
286 | ||
287 | static int __init init_cgroup_netprio(void) | |
288 | { | |
289 | int ret; | |
290 | ||
291 | ret = cgroup_load_subsys(&net_prio_subsys); | |
292 | if (ret) | |
293 | goto out; | |
5bc1421e NH |
294 | |
295 | register_netdevice_notifier(&netprio_device_notifier); | |
296 | ||
297 | out: | |
298 | return ret; | |
299 | } | |
300 | ||
301 | static void __exit exit_cgroup_netprio(void) | |
302 | { | |
303 | struct netprio_map *old; | |
304 | struct net_device *dev; | |
305 | ||
306 | unregister_netdevice_notifier(&netprio_device_notifier); | |
307 | ||
308 | cgroup_unload_subsys(&net_prio_subsys); | |
309 | ||
5bc1421e NH |
310 | rtnl_lock(); |
311 | for_each_netdev(&init_net, dev) { | |
312 | old = rtnl_dereference(dev->priomap); | |
2cfa5a04 | 313 | RCU_INIT_POINTER(dev->priomap, NULL); |
5bc1421e NH |
314 | if (old) |
315 | kfree_rcu(old, rcu); | |
316 | } | |
317 | rtnl_unlock(); | |
318 | } | |
319 | ||
320 | module_init(init_cgroup_netprio); | |
321 | module_exit(exit_cgroup_netprio); | |
322 | MODULE_LICENSE("GPL v2"); |