net: Allow userns root to control llc, netfilter, netlink, packet, and xfrm
[deliverable/linux.git] / net / netfilter / ipset / ip_set_core.c
1 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10 /* Kernel module for IP set management */
11
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/moduleparam.h>
15 #include <linux/ip.h>
16 #include <linux/skbuff.h>
17 #include <linux/spinlock.h>
18 #include <linux/netlink.h>
19 #include <linux/rculist.h>
20 #include <net/netlink.h>
21
22 #include <linux/netfilter.h>
23 #include <linux/netfilter/x_tables.h>
24 #include <linux/netfilter/nfnetlink.h>
25 #include <linux/netfilter/ipset/ip_set.h>
26
27 static LIST_HEAD(ip_set_type_list); /* all registered set types */
28 static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */
29 static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
30
31 static struct ip_set **ip_set_list; /* all individual sets */
32 static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
33
34 #define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
35
36 static unsigned int max_sets;
37
38 module_param(max_sets, int, 0600);
39 MODULE_PARM_DESC(max_sets, "maximal number of sets");
40 MODULE_LICENSE("GPL");
41 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
42 MODULE_DESCRIPTION("core IP set support");
43 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
44
45 /*
46 * The set types are implemented in modules and registered set types
47 * can be found in ip_set_type_list. Adding/deleting types is
48 * serialized by ip_set_type_mutex.
49 */
50
51 static inline void
52 ip_set_type_lock(void)
53 {
54 mutex_lock(&ip_set_type_mutex);
55 }
56
57 static inline void
58 ip_set_type_unlock(void)
59 {
60 mutex_unlock(&ip_set_type_mutex);
61 }
62
63 /* Register and deregister settype */
64
65 static struct ip_set_type *
66 find_set_type(const char *name, u8 family, u8 revision)
67 {
68 struct ip_set_type *type;
69
70 list_for_each_entry_rcu(type, &ip_set_type_list, list)
71 if (STREQ(type->name, name) &&
72 (type->family == family ||
73 type->family == NFPROTO_UNSPEC) &&
74 revision >= type->revision_min &&
75 revision <= type->revision_max)
76 return type;
77 return NULL;
78 }
79
80 /* Unlock, try to load a set type module and lock again */
81 static bool
82 load_settype(const char *name)
83 {
84 nfnl_unlock();
85 pr_debug("try to load ip_set_%s\n", name);
86 if (request_module("ip_set_%s", name) < 0) {
87 pr_warning("Can't find ip_set type %s\n", name);
88 nfnl_lock();
89 return false;
90 }
91 nfnl_lock();
92 return true;
93 }
94
95 /* Find a set type and reference it */
96 #define find_set_type_get(name, family, revision, found) \
97 __find_set_type_get(name, family, revision, found, false)
98
99 static int
100 __find_set_type_get(const char *name, u8 family, u8 revision,
101 struct ip_set_type **found, bool retry)
102 {
103 struct ip_set_type *type;
104 int err;
105
106 if (retry && !load_settype(name))
107 return -IPSET_ERR_FIND_TYPE;
108
109 rcu_read_lock();
110 *found = find_set_type(name, family, revision);
111 if (*found) {
112 err = !try_module_get((*found)->me) ? -EFAULT : 0;
113 goto unlock;
114 }
115 /* Make sure the type is already loaded
116 * but we don't support the revision */
117 list_for_each_entry_rcu(type, &ip_set_type_list, list)
118 if (STREQ(type->name, name)) {
119 err = -IPSET_ERR_FIND_TYPE;
120 goto unlock;
121 }
122 rcu_read_unlock();
123
124 return retry ? -IPSET_ERR_FIND_TYPE :
125 __find_set_type_get(name, family, revision, found, true);
126
127 unlock:
128 rcu_read_unlock();
129 return err;
130 }
131
132 /* Find a given set type by name and family.
133 * If we succeeded, the supported minimal and maximum revisions are
134 * filled out.
135 */
136 #define find_set_type_minmax(name, family, min, max) \
137 __find_set_type_minmax(name, family, min, max, false)
138
139 static int
140 __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
141 bool retry)
142 {
143 struct ip_set_type *type;
144 bool found = false;
145
146 if (retry && !load_settype(name))
147 return -IPSET_ERR_FIND_TYPE;
148
149 *min = 255; *max = 0;
150 rcu_read_lock();
151 list_for_each_entry_rcu(type, &ip_set_type_list, list)
152 if (STREQ(type->name, name) &&
153 (type->family == family ||
154 type->family == NFPROTO_UNSPEC)) {
155 found = true;
156 if (type->revision_min < *min)
157 *min = type->revision_min;
158 if (type->revision_max > *max)
159 *max = type->revision_max;
160 }
161 rcu_read_unlock();
162 if (found)
163 return 0;
164
165 return retry ? -IPSET_ERR_FIND_TYPE :
166 __find_set_type_minmax(name, family, min, max, true);
167 }
168
169 #define family_name(f) ((f) == NFPROTO_IPV4 ? "inet" : \
170 (f) == NFPROTO_IPV6 ? "inet6" : "any")
171
172 /* Register a set type structure. The type is identified by
173 * the unique triple of name, family and revision.
174 */
175 int
176 ip_set_type_register(struct ip_set_type *type)
177 {
178 int ret = 0;
179
180 if (type->protocol != IPSET_PROTOCOL) {
181 pr_warning("ip_set type %s, family %s, revision %u:%u uses "
182 "wrong protocol version %u (want %u)\n",
183 type->name, family_name(type->family),
184 type->revision_min, type->revision_max,
185 type->protocol, IPSET_PROTOCOL);
186 return -EINVAL;
187 }
188
189 ip_set_type_lock();
190 if (find_set_type(type->name, type->family, type->revision_min)) {
191 /* Duplicate! */
192 pr_warning("ip_set type %s, family %s with revision min %u "
193 "already registered!\n", type->name,
194 family_name(type->family), type->revision_min);
195 ret = -EINVAL;
196 goto unlock;
197 }
198 list_add_rcu(&type->list, &ip_set_type_list);
199 pr_debug("type %s, family %s, revision %u:%u registered.\n",
200 type->name, family_name(type->family),
201 type->revision_min, type->revision_max);
202 unlock:
203 ip_set_type_unlock();
204 return ret;
205 }
206 EXPORT_SYMBOL_GPL(ip_set_type_register);
207
208 /* Unregister a set type. There's a small race with ip_set_create */
209 void
210 ip_set_type_unregister(struct ip_set_type *type)
211 {
212 ip_set_type_lock();
213 if (!find_set_type(type->name, type->family, type->revision_min)) {
214 pr_warning("ip_set type %s, family %s with revision min %u "
215 "not registered\n", type->name,
216 family_name(type->family), type->revision_min);
217 goto unlock;
218 }
219 list_del_rcu(&type->list);
220 pr_debug("type %s, family %s with revision min %u unregistered.\n",
221 type->name, family_name(type->family), type->revision_min);
222 unlock:
223 ip_set_type_unlock();
224
225 synchronize_rcu();
226 }
227 EXPORT_SYMBOL_GPL(ip_set_type_unregister);
228
229 /* Utility functions */
230 void *
231 ip_set_alloc(size_t size)
232 {
233 void *members = NULL;
234
235 if (size < KMALLOC_MAX_SIZE)
236 members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
237
238 if (members) {
239 pr_debug("%p: allocated with kmalloc\n", members);
240 return members;
241 }
242
243 members = vzalloc(size);
244 if (!members)
245 return NULL;
246 pr_debug("%p: allocated with vmalloc\n", members);
247
248 return members;
249 }
250 EXPORT_SYMBOL_GPL(ip_set_alloc);
251
252 void
253 ip_set_free(void *members)
254 {
255 pr_debug("%p: free with %s\n", members,
256 is_vmalloc_addr(members) ? "vfree" : "kfree");
257 if (is_vmalloc_addr(members))
258 vfree(members);
259 else
260 kfree(members);
261 }
262 EXPORT_SYMBOL_GPL(ip_set_free);
263
264 static inline bool
265 flag_nested(const struct nlattr *nla)
266 {
267 return nla->nla_type & NLA_F_NESTED;
268 }
269
270 static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
271 [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 },
272 [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY,
273 .len = sizeof(struct in6_addr) },
274 };
275
276 int
277 ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
278 {
279 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
280
281 if (unlikely(!flag_nested(nla)))
282 return -IPSET_ERR_PROTOCOL;
283 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
284 return -IPSET_ERR_PROTOCOL;
285 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
286 return -IPSET_ERR_PROTOCOL;
287
288 *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
289 return 0;
290 }
291 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
292
293 int
294 ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
295 {
296 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
297
298 if (unlikely(!flag_nested(nla)))
299 return -IPSET_ERR_PROTOCOL;
300
301 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
302 return -IPSET_ERR_PROTOCOL;
303 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
304 return -IPSET_ERR_PROTOCOL;
305
306 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
307 sizeof(struct in6_addr));
308 return 0;
309 }
310 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
311
312 /*
313 * Creating/destroying/renaming/swapping affect the existence and
314 * the properties of a set. All of these can be executed from userspace
315 * only and serialized by the nfnl mutex indirectly from nfnetlink.
316 *
317 * Sets are identified by their index in ip_set_list and the index
318 * is used by the external references (set/SET netfilter modules).
319 *
320 * The set behind an index may change by swapping only, from userspace.
321 */
322
323 static inline void
324 __ip_set_get(ip_set_id_t index)
325 {
326 write_lock_bh(&ip_set_ref_lock);
327 ip_set_list[index]->ref++;
328 write_unlock_bh(&ip_set_ref_lock);
329 }
330
331 static inline void
332 __ip_set_put(ip_set_id_t index)
333 {
334 write_lock_bh(&ip_set_ref_lock);
335 BUG_ON(ip_set_list[index]->ref == 0);
336 ip_set_list[index]->ref--;
337 write_unlock_bh(&ip_set_ref_lock);
338 }
339
340 /*
341 * Add, del and test set entries from kernel.
342 *
343 * The set behind the index must exist and must be referenced
344 * so it can't be destroyed (or changed) under our foot.
345 */
346
347 int
348 ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
349 const struct xt_action_param *par,
350 const struct ip_set_adt_opt *opt)
351 {
352 struct ip_set *set = ip_set_list[index];
353 int ret = 0;
354
355 BUG_ON(set == NULL);
356 pr_debug("set %s, index %u\n", set->name, index);
357
358 if (opt->dim < set->type->dimension ||
359 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
360 return 0;
361
362 read_lock_bh(&set->lock);
363 ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
364 read_unlock_bh(&set->lock);
365
366 if (ret == -EAGAIN) {
367 /* Type requests element to be completed */
368 pr_debug("element must be competed, ADD is triggered\n");
369 write_lock_bh(&set->lock);
370 set->variant->kadt(set, skb, par, IPSET_ADD, opt);
371 write_unlock_bh(&set->lock);
372 ret = 1;
373 } else {
374 /* --return-nomatch: invert matched element */
375 if ((opt->flags & IPSET_RETURN_NOMATCH) &&
376 (set->type->features & IPSET_TYPE_NOMATCH) &&
377 (ret > 0 || ret == -ENOTEMPTY))
378 ret = -ret;
379 }
380
381 /* Convert error codes to nomatch */
382 return (ret < 0 ? 0 : ret);
383 }
384 EXPORT_SYMBOL_GPL(ip_set_test);
385
386 int
387 ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
388 const struct xt_action_param *par,
389 const struct ip_set_adt_opt *opt)
390 {
391 struct ip_set *set = ip_set_list[index];
392 int ret;
393
394 BUG_ON(set == NULL);
395 pr_debug("set %s, index %u\n", set->name, index);
396
397 if (opt->dim < set->type->dimension ||
398 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
399 return 0;
400
401 write_lock_bh(&set->lock);
402 ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
403 write_unlock_bh(&set->lock);
404
405 return ret;
406 }
407 EXPORT_SYMBOL_GPL(ip_set_add);
408
409 int
410 ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
411 const struct xt_action_param *par,
412 const struct ip_set_adt_opt *opt)
413 {
414 struct ip_set *set = ip_set_list[index];
415 int ret = 0;
416
417 BUG_ON(set == NULL);
418 pr_debug("set %s, index %u\n", set->name, index);
419
420 if (opt->dim < set->type->dimension ||
421 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
422 return 0;
423
424 write_lock_bh(&set->lock);
425 ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
426 write_unlock_bh(&set->lock);
427
428 return ret;
429 }
430 EXPORT_SYMBOL_GPL(ip_set_del);
431
432 /*
433 * Find set by name, reference it once. The reference makes sure the
434 * thing pointed to, does not go away under our feet.
435 *
436 */
437 ip_set_id_t
438 ip_set_get_byname(const char *name, struct ip_set **set)
439 {
440 ip_set_id_t i, index = IPSET_INVALID_ID;
441 struct ip_set *s;
442
443 for (i = 0; i < ip_set_max; i++) {
444 s = ip_set_list[i];
445 if (s != NULL && STREQ(s->name, name)) {
446 __ip_set_get(i);
447 index = i;
448 *set = s;
449 }
450 }
451
452 return index;
453 }
454 EXPORT_SYMBOL_GPL(ip_set_get_byname);
455
456 /*
457 * If the given set pointer points to a valid set, decrement
458 * reference count by 1. The caller shall not assume the index
459 * to be valid, after calling this function.
460 *
461 */
462 void
463 ip_set_put_byindex(ip_set_id_t index)
464 {
465 if (ip_set_list[index] != NULL)
466 __ip_set_put(index);
467 }
468 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
469
470 /*
471 * Get the name of a set behind a set index.
472 * We assume the set is referenced, so it does exist and
473 * can't be destroyed. The set cannot be renamed due to
474 * the referencing either.
475 *
476 */
477 const char *
478 ip_set_name_byindex(ip_set_id_t index)
479 {
480 const struct ip_set *set = ip_set_list[index];
481
482 BUG_ON(set == NULL);
483 BUG_ON(set->ref == 0);
484
485 /* Referenced, so it's safe */
486 return set->name;
487 }
488 EXPORT_SYMBOL_GPL(ip_set_name_byindex);
489
490 /*
491 * Routines to call by external subsystems, which do not
492 * call nfnl_lock for us.
493 */
494
495 /*
496 * Find set by name, reference it once. The reference makes sure the
497 * thing pointed to, does not go away under our feet.
498 *
499 * The nfnl mutex is used in the function.
500 */
501 ip_set_id_t
502 ip_set_nfnl_get(const char *name)
503 {
504 struct ip_set *s;
505 ip_set_id_t index;
506
507 nfnl_lock();
508 index = ip_set_get_byname(name, &s);
509 nfnl_unlock();
510
511 return index;
512 }
513 EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
514
515 /*
516 * Find set by index, reference it once. The reference makes sure the
517 * thing pointed to, does not go away under our feet.
518 *
519 * The nfnl mutex is used in the function.
520 */
521 ip_set_id_t
522 ip_set_nfnl_get_byindex(ip_set_id_t index)
523 {
524 if (index > ip_set_max)
525 return IPSET_INVALID_ID;
526
527 nfnl_lock();
528 if (ip_set_list[index])
529 __ip_set_get(index);
530 else
531 index = IPSET_INVALID_ID;
532 nfnl_unlock();
533
534 return index;
535 }
536 EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
537
538 /*
539 * If the given set pointer points to a valid set, decrement
540 * reference count by 1. The caller shall not assume the index
541 * to be valid, after calling this function.
542 *
543 * The nfnl mutex is used in the function.
544 */
545 void
546 ip_set_nfnl_put(ip_set_id_t index)
547 {
548 nfnl_lock();
549 ip_set_put_byindex(index);
550 nfnl_unlock();
551 }
552 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
553
554 /*
555 * Communication protocol with userspace over netlink.
556 *
557 * The commands are serialized by the nfnl mutex.
558 */
559
560 static inline bool
561 protocol_failed(const struct nlattr * const tb[])
562 {
563 return !tb[IPSET_ATTR_PROTOCOL] ||
564 nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
565 }
566
567 static inline u32
568 flag_exist(const struct nlmsghdr *nlh)
569 {
570 return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
571 }
572
573 static struct nlmsghdr *
574 start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
575 enum ipset_cmd cmd)
576 {
577 struct nlmsghdr *nlh;
578 struct nfgenmsg *nfmsg;
579
580 nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
581 sizeof(*nfmsg), flags);
582 if (nlh == NULL)
583 return NULL;
584
585 nfmsg = nlmsg_data(nlh);
586 nfmsg->nfgen_family = NFPROTO_IPV4;
587 nfmsg->version = NFNETLINK_V0;
588 nfmsg->res_id = 0;
589
590 return nlh;
591 }
592
593 /* Create a set */
594
595 static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
596 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
597 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
598 .len = IPSET_MAXNAMELEN - 1 },
599 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
600 .len = IPSET_MAXNAMELEN - 1},
601 [IPSET_ATTR_REVISION] = { .type = NLA_U8 },
602 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
603 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
604 };
605
606 static ip_set_id_t
607 find_set_id(const char *name)
608 {
609 ip_set_id_t i, index = IPSET_INVALID_ID;
610 const struct ip_set *set;
611
612 for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) {
613 set = ip_set_list[i];
614 if (set != NULL && STREQ(set->name, name))
615 index = i;
616 }
617 return index;
618 }
619
620 static inline struct ip_set *
621 find_set(const char *name)
622 {
623 ip_set_id_t index = find_set_id(name);
624
625 return index == IPSET_INVALID_ID ? NULL : ip_set_list[index];
626 }
627
628 static int
629 find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
630 {
631 ip_set_id_t i;
632
633 *index = IPSET_INVALID_ID;
634 for (i = 0; i < ip_set_max; i++) {
635 if (ip_set_list[i] == NULL) {
636 if (*index == IPSET_INVALID_ID)
637 *index = i;
638 } else if (STREQ(name, ip_set_list[i]->name)) {
639 /* Name clash */
640 *set = ip_set_list[i];
641 return -EEXIST;
642 }
643 }
644 if (*index == IPSET_INVALID_ID)
645 /* No free slot remained */
646 return -IPSET_ERR_MAX_SETS;
647 return 0;
648 }
649
650 static int
651 ip_set_none(struct sock *ctnl, struct sk_buff *skb,
652 const struct nlmsghdr *nlh,
653 const struct nlattr * const attr[])
654 {
655 return -EOPNOTSUPP;
656 }
657
658 static int
659 ip_set_create(struct sock *ctnl, struct sk_buff *skb,
660 const struct nlmsghdr *nlh,
661 const struct nlattr * const attr[])
662 {
663 struct ip_set *set, *clash = NULL;
664 ip_set_id_t index = IPSET_INVALID_ID;
665 struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
666 const char *name, *typename;
667 u8 family, revision;
668 u32 flags = flag_exist(nlh);
669 int ret = 0;
670
671 if (unlikely(protocol_failed(attr) ||
672 attr[IPSET_ATTR_SETNAME] == NULL ||
673 attr[IPSET_ATTR_TYPENAME] == NULL ||
674 attr[IPSET_ATTR_REVISION] == NULL ||
675 attr[IPSET_ATTR_FAMILY] == NULL ||
676 (attr[IPSET_ATTR_DATA] != NULL &&
677 !flag_nested(attr[IPSET_ATTR_DATA]))))
678 return -IPSET_ERR_PROTOCOL;
679
680 name = nla_data(attr[IPSET_ATTR_SETNAME]);
681 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
682 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
683 revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
684 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
685 name, typename, family_name(family), revision);
686
687 /*
688 * First, and without any locks, allocate and initialize
689 * a normal base set structure.
690 */
691 set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
692 if (!set)
693 return -ENOMEM;
694 rwlock_init(&set->lock);
695 strlcpy(set->name, name, IPSET_MAXNAMELEN);
696 set->family = family;
697 set->revision = revision;
698
699 /*
700 * Next, check that we know the type, and take
701 * a reference on the type, to make sure it stays available
702 * while constructing our new set.
703 *
704 * After referencing the type, we try to create the type
705 * specific part of the set without holding any locks.
706 */
707 ret = find_set_type_get(typename, family, revision, &(set->type));
708 if (ret)
709 goto out;
710
711 /*
712 * Without holding any locks, create private part.
713 */
714 if (attr[IPSET_ATTR_DATA] &&
715 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
716 set->type->create_policy)) {
717 ret = -IPSET_ERR_PROTOCOL;
718 goto put_out;
719 }
720
721 ret = set->type->create(set, tb, flags);
722 if (ret != 0)
723 goto put_out;
724
725 /* BTW, ret==0 here. */
726
727 /*
728 * Here, we have a valid, constructed set and we are protected
729 * by the nfnl mutex. Find the first free index in ip_set_list
730 * and check clashing.
731 */
732 ret = find_free_id(set->name, &index, &clash);
733 if (ret != 0) {
734 /* If this is the same set and requested, ignore error */
735 if (ret == -EEXIST &&
736 (flags & IPSET_FLAG_EXIST) &&
737 STREQ(set->type->name, clash->type->name) &&
738 set->type->family == clash->type->family &&
739 set->type->revision_min == clash->type->revision_min &&
740 set->type->revision_max == clash->type->revision_max &&
741 set->variant->same_set(set, clash))
742 ret = 0;
743 goto cleanup;
744 }
745
746 /*
747 * Finally! Add our shiny new set to the list, and be done.
748 */
749 pr_debug("create: '%s' created with index %u!\n", set->name, index);
750 ip_set_list[index] = set;
751
752 return ret;
753
754 cleanup:
755 set->variant->destroy(set);
756 put_out:
757 module_put(set->type->me);
758 out:
759 kfree(set);
760 return ret;
761 }
762
763 /* Destroy sets */
764
765 static const struct nla_policy
766 ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
767 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
768 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
769 .len = IPSET_MAXNAMELEN - 1 },
770 };
771
772 static void
773 ip_set_destroy_set(ip_set_id_t index)
774 {
775 struct ip_set *set = ip_set_list[index];
776
777 pr_debug("set: %s\n", set->name);
778 ip_set_list[index] = NULL;
779
780 /* Must call it without holding any lock */
781 set->variant->destroy(set);
782 module_put(set->type->me);
783 kfree(set);
784 }
785
786 static int
787 ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
788 const struct nlmsghdr *nlh,
789 const struct nlattr * const attr[])
790 {
791 ip_set_id_t i;
792 int ret = 0;
793
794 if (unlikely(protocol_failed(attr)))
795 return -IPSET_ERR_PROTOCOL;
796
797 /* Commands are serialized and references are
798 * protected by the ip_set_ref_lock.
799 * External systems (i.e. xt_set) must call
800 * ip_set_put|get_nfnl_* functions, that way we
801 * can safely check references here.
802 *
803 * list:set timer can only decrement the reference
804 * counter, so if it's already zero, we can proceed
805 * without holding the lock.
806 */
807 read_lock_bh(&ip_set_ref_lock);
808 if (!attr[IPSET_ATTR_SETNAME]) {
809 for (i = 0; i < ip_set_max; i++) {
810 if (ip_set_list[i] != NULL && ip_set_list[i]->ref) {
811 ret = -IPSET_ERR_BUSY;
812 goto out;
813 }
814 }
815 read_unlock_bh(&ip_set_ref_lock);
816 for (i = 0; i < ip_set_max; i++) {
817 if (ip_set_list[i] != NULL)
818 ip_set_destroy_set(i);
819 }
820 } else {
821 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
822 if (i == IPSET_INVALID_ID) {
823 ret = -ENOENT;
824 goto out;
825 } else if (ip_set_list[i]->ref) {
826 ret = -IPSET_ERR_BUSY;
827 goto out;
828 }
829 read_unlock_bh(&ip_set_ref_lock);
830
831 ip_set_destroy_set(i);
832 }
833 return 0;
834 out:
835 read_unlock_bh(&ip_set_ref_lock);
836 return ret;
837 }
838
839 /* Flush sets */
840
841 static void
842 ip_set_flush_set(struct ip_set *set)
843 {
844 pr_debug("set: %s\n", set->name);
845
846 write_lock_bh(&set->lock);
847 set->variant->flush(set);
848 write_unlock_bh(&set->lock);
849 }
850
851 static int
852 ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
853 const struct nlmsghdr *nlh,
854 const struct nlattr * const attr[])
855 {
856 ip_set_id_t i;
857
858 if (unlikely(protocol_failed(attr)))
859 return -IPSET_ERR_PROTOCOL;
860
861 if (!attr[IPSET_ATTR_SETNAME]) {
862 for (i = 0; i < ip_set_max; i++)
863 if (ip_set_list[i] != NULL)
864 ip_set_flush_set(ip_set_list[i]);
865 } else {
866 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
867 if (i == IPSET_INVALID_ID)
868 return -ENOENT;
869
870 ip_set_flush_set(ip_set_list[i]);
871 }
872
873 return 0;
874 }
875
876 /* Rename a set */
877
878 static const struct nla_policy
879 ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
880 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
881 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
882 .len = IPSET_MAXNAMELEN - 1 },
883 [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING,
884 .len = IPSET_MAXNAMELEN - 1 },
885 };
886
887 static int
888 ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
889 const struct nlmsghdr *nlh,
890 const struct nlattr * const attr[])
891 {
892 struct ip_set *set;
893 const char *name2;
894 ip_set_id_t i;
895 int ret = 0;
896
897 if (unlikely(protocol_failed(attr) ||
898 attr[IPSET_ATTR_SETNAME] == NULL ||
899 attr[IPSET_ATTR_SETNAME2] == NULL))
900 return -IPSET_ERR_PROTOCOL;
901
902 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
903 if (set == NULL)
904 return -ENOENT;
905
906 read_lock_bh(&ip_set_ref_lock);
907 if (set->ref != 0) {
908 ret = -IPSET_ERR_REFERENCED;
909 goto out;
910 }
911
912 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
913 for (i = 0; i < ip_set_max; i++) {
914 if (ip_set_list[i] != NULL &&
915 STREQ(ip_set_list[i]->name, name2)) {
916 ret = -IPSET_ERR_EXIST_SETNAME2;
917 goto out;
918 }
919 }
920 strncpy(set->name, name2, IPSET_MAXNAMELEN);
921
922 out:
923 read_unlock_bh(&ip_set_ref_lock);
924 return ret;
925 }
926
927 /* Swap two sets so that name/index points to the other.
928 * References and set names are also swapped.
929 *
930 * The commands are serialized by the nfnl mutex and references are
931 * protected by the ip_set_ref_lock. The kernel interfaces
932 * do not hold the mutex but the pointer settings are atomic
933 * so the ip_set_list always contains valid pointers to the sets.
934 */
935
936 static int
937 ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
938 const struct nlmsghdr *nlh,
939 const struct nlattr * const attr[])
940 {
941 struct ip_set *from, *to;
942 ip_set_id_t from_id, to_id;
943 char from_name[IPSET_MAXNAMELEN];
944
945 if (unlikely(protocol_failed(attr) ||
946 attr[IPSET_ATTR_SETNAME] == NULL ||
947 attr[IPSET_ATTR_SETNAME2] == NULL))
948 return -IPSET_ERR_PROTOCOL;
949
950 from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
951 if (from_id == IPSET_INVALID_ID)
952 return -ENOENT;
953
954 to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2]));
955 if (to_id == IPSET_INVALID_ID)
956 return -IPSET_ERR_EXIST_SETNAME2;
957
958 from = ip_set_list[from_id];
959 to = ip_set_list[to_id];
960
961 /* Features must not change.
962 * Not an artificial restriction anymore, as we must prevent
963 * possible loops created by swapping in setlist type of sets. */
964 if (!(from->type->features == to->type->features &&
965 from->type->family == to->type->family))
966 return -IPSET_ERR_TYPE_MISMATCH;
967
968 strncpy(from_name, from->name, IPSET_MAXNAMELEN);
969 strncpy(from->name, to->name, IPSET_MAXNAMELEN);
970 strncpy(to->name, from_name, IPSET_MAXNAMELEN);
971
972 write_lock_bh(&ip_set_ref_lock);
973 swap(from->ref, to->ref);
974 ip_set_list[from_id] = to;
975 ip_set_list[to_id] = from;
976 write_unlock_bh(&ip_set_ref_lock);
977
978 return 0;
979 }
980
981 /* List/save set data */
982
983 #define DUMP_INIT 0
984 #define DUMP_ALL 1
985 #define DUMP_ONE 2
986 #define DUMP_LAST 3
987
988 #define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF)
989 #define DUMP_FLAGS(arg) (((u32)(arg)) >> 16)
990
991 static int
992 ip_set_dump_done(struct netlink_callback *cb)
993 {
994 if (cb->args[2]) {
995 pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name);
996 ip_set_put_byindex((ip_set_id_t) cb->args[1]);
997 }
998 return 0;
999 }
1000
1001 static inline void
1002 dump_attrs(struct nlmsghdr *nlh)
1003 {
1004 const struct nlattr *attr;
1005 int rem;
1006
1007 pr_debug("dump nlmsg\n");
1008 nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
1009 pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
1010 }
1011 }
1012
1013 static int
1014 dump_init(struct netlink_callback *cb)
1015 {
1016 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
1017 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
1018 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1019 struct nlattr *attr = (void *)nlh + min_len;
1020 u32 dump_type;
1021 ip_set_id_t index;
1022
1023 /* Second pass, so parser can't fail */
1024 nla_parse(cda, IPSET_ATTR_CMD_MAX,
1025 attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
1026
1027 /* cb->args[0] : dump single set/all sets
1028 * [1] : set index
1029 * [..]: type specific
1030 */
1031
1032 if (cda[IPSET_ATTR_SETNAME]) {
1033 index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME]));
1034 if (index == IPSET_INVALID_ID)
1035 return -ENOENT;
1036
1037 dump_type = DUMP_ONE;
1038 cb->args[1] = index;
1039 } else
1040 dump_type = DUMP_ALL;
1041
1042 if (cda[IPSET_ATTR_FLAGS]) {
1043 u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
1044 dump_type |= (f << 16);
1045 }
1046 cb->args[0] = dump_type;
1047
1048 return 0;
1049 }
1050
1051 static int
1052 ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
1053 {
1054 ip_set_id_t index = IPSET_INVALID_ID, max;
1055 struct ip_set *set = NULL;
1056 struct nlmsghdr *nlh = NULL;
1057 unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
1058 u32 dump_type, dump_flags;
1059 int ret = 0;
1060
1061 if (!cb->args[0]) {
1062 ret = dump_init(cb);
1063 if (ret < 0) {
1064 nlh = nlmsg_hdr(cb->skb);
1065 /* We have to create and send the error message
1066 * manually :-( */
1067 if (nlh->nlmsg_flags & NLM_F_ACK)
1068 netlink_ack(cb->skb, nlh, ret);
1069 return ret;
1070 }
1071 }
1072
1073 if (cb->args[1] >= ip_set_max)
1074 goto out;
1075
1076 dump_type = DUMP_TYPE(cb->args[0]);
1077 dump_flags = DUMP_FLAGS(cb->args[0]);
1078 max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
1079 dump_last:
1080 pr_debug("args[0]: %u %u args[1]: %ld\n",
1081 dump_type, dump_flags, cb->args[1]);
1082 for (; cb->args[1] < max; cb->args[1]++) {
1083 index = (ip_set_id_t) cb->args[1];
1084 set = ip_set_list[index];
1085 if (set == NULL) {
1086 if (dump_type == DUMP_ONE) {
1087 ret = -ENOENT;
1088 goto out;
1089 }
1090 continue;
1091 }
1092 /* When dumping all sets, we must dump "sorted"
1093 * so that lists (unions of sets) are dumped last.
1094 */
1095 if (dump_type != DUMP_ONE &&
1096 ((dump_type == DUMP_ALL) ==
1097 !!(set->type->features & IPSET_DUMP_LAST)))
1098 continue;
1099 pr_debug("List set: %s\n", set->name);
1100 if (!cb->args[2]) {
1101 /* Start listing: make sure set won't be destroyed */
1102 pr_debug("reference set\n");
1103 __ip_set_get(index);
1104 }
1105 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
1106 cb->nlh->nlmsg_seq, flags,
1107 IPSET_CMD_LIST);
1108 if (!nlh) {
1109 ret = -EMSGSIZE;
1110 goto release_refcount;
1111 }
1112 if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1113 nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
1114 goto nla_put_failure;
1115 if (dump_flags & IPSET_FLAG_LIST_SETNAME)
1116 goto next_set;
1117 switch (cb->args[2]) {
1118 case 0:
1119 /* Core header data */
1120 if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
1121 set->type->name) ||
1122 nla_put_u8(skb, IPSET_ATTR_FAMILY,
1123 set->family) ||
1124 nla_put_u8(skb, IPSET_ATTR_REVISION,
1125 set->revision))
1126 goto nla_put_failure;
1127 ret = set->variant->head(set, skb);
1128 if (ret < 0)
1129 goto release_refcount;
1130 if (dump_flags & IPSET_FLAG_LIST_HEADER)
1131 goto next_set;
1132 /* Fall through and add elements */
1133 default:
1134 read_lock_bh(&set->lock);
1135 ret = set->variant->list(set, skb, cb);
1136 read_unlock_bh(&set->lock);
1137 if (!cb->args[2])
1138 /* Set is done, proceed with next one */
1139 goto next_set;
1140 goto release_refcount;
1141 }
1142 }
1143 /* If we dump all sets, continue with dumping last ones */
1144 if (dump_type == DUMP_ALL) {
1145 dump_type = DUMP_LAST;
1146 cb->args[0] = dump_type | (dump_flags << 16);
1147 cb->args[1] = 0;
1148 goto dump_last;
1149 }
1150 goto out;
1151
1152 nla_put_failure:
1153 ret = -EFAULT;
1154 next_set:
1155 if (dump_type == DUMP_ONE)
1156 cb->args[1] = IPSET_INVALID_ID;
1157 else
1158 cb->args[1]++;
1159 release_refcount:
1160 /* If there was an error or set is done, release set */
1161 if (ret || !cb->args[2]) {
1162 pr_debug("release set %s\n", ip_set_list[index]->name);
1163 ip_set_put_byindex(index);
1164 cb->args[2] = 0;
1165 }
1166 out:
1167 if (nlh) {
1168 nlmsg_end(skb, nlh);
1169 pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1170 dump_attrs(nlh);
1171 }
1172
1173 return ret < 0 ? ret : skb->len;
1174 }
1175
1176 static int
1177 ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
1178 const struct nlmsghdr *nlh,
1179 const struct nlattr * const attr[])
1180 {
1181 if (unlikely(protocol_failed(attr)))
1182 return -IPSET_ERR_PROTOCOL;
1183
1184 {
1185 struct netlink_dump_control c = {
1186 .dump = ip_set_dump_start,
1187 .done = ip_set_dump_done,
1188 };
1189 return netlink_dump_start(ctnl, skb, nlh, &c);
1190 }
1191 }
1192
1193 /* Add, del and test */
1194
1195 static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1196 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1197 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
1198 .len = IPSET_MAXNAMELEN - 1 },
1199 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
1200 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
1201 [IPSET_ATTR_ADT] = { .type = NLA_NESTED },
1202 };
1203
1204 static int
1205 call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1206 struct nlattr *tb[], enum ipset_adt adt,
1207 u32 flags, bool use_lineno)
1208 {
1209 int ret;
1210 u32 lineno = 0;
1211 bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
1212
1213 do {
1214 write_lock_bh(&set->lock);
1215 ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
1216 write_unlock_bh(&set->lock);
1217 retried = true;
1218 } while (ret == -EAGAIN &&
1219 set->variant->resize &&
1220 (ret = set->variant->resize(set, retried)) == 0);
1221
1222 if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1223 return 0;
1224 if (lineno && use_lineno) {
1225 /* Error in restore/batch mode: send back lineno */
1226 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1227 struct sk_buff *skb2;
1228 struct nlmsgerr *errmsg;
1229 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
1230 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
1231 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1232 struct nlattr *cmdattr;
1233 u32 *errline;
1234
1235 skb2 = nlmsg_new(payload, GFP_KERNEL);
1236 if (skb2 == NULL)
1237 return -ENOMEM;
1238 rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
1239 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1240 errmsg = nlmsg_data(rep);
1241 errmsg->error = ret;
1242 memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1243 cmdattr = (void *)&errmsg->msg + min_len;
1244
1245 nla_parse(cda, IPSET_ATTR_CMD_MAX,
1246 cmdattr, nlh->nlmsg_len - min_len,
1247 ip_set_adt_policy);
1248
1249 errline = nla_data(cda[IPSET_ATTR_LINENO]);
1250
1251 *errline = lineno;
1252
1253 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1254 /* Signal netlink not to send its ACK/errmsg. */
1255 return -EINTR;
1256 }
1257
1258 return ret;
1259 }
1260
1261 static int
1262 ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1263 const struct nlmsghdr *nlh,
1264 const struct nlattr * const attr[])
1265 {
1266 struct ip_set *set;
1267 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1268 const struct nlattr *nla;
1269 u32 flags = flag_exist(nlh);
1270 bool use_lineno;
1271 int ret = 0;
1272
1273 if (unlikely(protocol_failed(attr) ||
1274 attr[IPSET_ATTR_SETNAME] == NULL ||
1275 !((attr[IPSET_ATTR_DATA] != NULL) ^
1276 (attr[IPSET_ATTR_ADT] != NULL)) ||
1277 (attr[IPSET_ATTR_DATA] != NULL &&
1278 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1279 (attr[IPSET_ATTR_ADT] != NULL &&
1280 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1281 attr[IPSET_ATTR_LINENO] == NULL))))
1282 return -IPSET_ERR_PROTOCOL;
1283
1284 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1285 if (set == NULL)
1286 return -ENOENT;
1287
1288 use_lineno = !!attr[IPSET_ATTR_LINENO];
1289 if (attr[IPSET_ATTR_DATA]) {
1290 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1291 attr[IPSET_ATTR_DATA],
1292 set->type->adt_policy))
1293 return -IPSET_ERR_PROTOCOL;
1294 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
1295 use_lineno);
1296 } else {
1297 int nla_rem;
1298
1299 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1300 memset(tb, 0, sizeof(tb));
1301 if (nla_type(nla) != IPSET_ATTR_DATA ||
1302 !flag_nested(nla) ||
1303 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1304 set->type->adt_policy))
1305 return -IPSET_ERR_PROTOCOL;
1306 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
1307 flags, use_lineno);
1308 if (ret < 0)
1309 return ret;
1310 }
1311 }
1312 return ret;
1313 }
1314
1315 static int
1316 ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1317 const struct nlmsghdr *nlh,
1318 const struct nlattr * const attr[])
1319 {
1320 struct ip_set *set;
1321 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1322 const struct nlattr *nla;
1323 u32 flags = flag_exist(nlh);
1324 bool use_lineno;
1325 int ret = 0;
1326
1327 if (unlikely(protocol_failed(attr) ||
1328 attr[IPSET_ATTR_SETNAME] == NULL ||
1329 !((attr[IPSET_ATTR_DATA] != NULL) ^
1330 (attr[IPSET_ATTR_ADT] != NULL)) ||
1331 (attr[IPSET_ATTR_DATA] != NULL &&
1332 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1333 (attr[IPSET_ATTR_ADT] != NULL &&
1334 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1335 attr[IPSET_ATTR_LINENO] == NULL))))
1336 return -IPSET_ERR_PROTOCOL;
1337
1338 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1339 if (set == NULL)
1340 return -ENOENT;
1341
1342 use_lineno = !!attr[IPSET_ATTR_LINENO];
1343 if (attr[IPSET_ATTR_DATA]) {
1344 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1345 attr[IPSET_ATTR_DATA],
1346 set->type->adt_policy))
1347 return -IPSET_ERR_PROTOCOL;
1348 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
1349 use_lineno);
1350 } else {
1351 int nla_rem;
1352
1353 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1354 memset(tb, 0, sizeof(*tb));
1355 if (nla_type(nla) != IPSET_ATTR_DATA ||
1356 !flag_nested(nla) ||
1357 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1358 set->type->adt_policy))
1359 return -IPSET_ERR_PROTOCOL;
1360 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
1361 flags, use_lineno);
1362 if (ret < 0)
1363 return ret;
1364 }
1365 }
1366 return ret;
1367 }
1368
1369 static int
1370 ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1371 const struct nlmsghdr *nlh,
1372 const struct nlattr * const attr[])
1373 {
1374 struct ip_set *set;
1375 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1376 int ret = 0;
1377
1378 if (unlikely(protocol_failed(attr) ||
1379 attr[IPSET_ATTR_SETNAME] == NULL ||
1380 attr[IPSET_ATTR_DATA] == NULL ||
1381 !flag_nested(attr[IPSET_ATTR_DATA])))
1382 return -IPSET_ERR_PROTOCOL;
1383
1384 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1385 if (set == NULL)
1386 return -ENOENT;
1387
1388 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1389 set->type->adt_policy))
1390 return -IPSET_ERR_PROTOCOL;
1391
1392 read_lock_bh(&set->lock);
1393 ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
1394 read_unlock_bh(&set->lock);
1395 /* Userspace can't trigger element to be re-added */
1396 if (ret == -EAGAIN)
1397 ret = 1;
1398
1399 return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST;
1400 }
1401
1402 /* Get headed data of a set */
1403
1404 static int
1405 ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1406 const struct nlmsghdr *nlh,
1407 const struct nlattr * const attr[])
1408 {
1409 const struct ip_set *set;
1410 struct sk_buff *skb2;
1411 struct nlmsghdr *nlh2;
1412 ip_set_id_t index;
1413 int ret = 0;
1414
1415 if (unlikely(protocol_failed(attr) ||
1416 attr[IPSET_ATTR_SETNAME] == NULL))
1417 return -IPSET_ERR_PROTOCOL;
1418
1419 index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
1420 if (index == IPSET_INVALID_ID)
1421 return -ENOENT;
1422 set = ip_set_list[index];
1423
1424 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1425 if (skb2 == NULL)
1426 return -ENOMEM;
1427
1428 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1429 IPSET_CMD_HEADER);
1430 if (!nlh2)
1431 goto nlmsg_failure;
1432 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1433 nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
1434 nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
1435 nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1436 nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
1437 goto nla_put_failure;
1438 nlmsg_end(skb2, nlh2);
1439
1440 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1441 if (ret < 0)
1442 return ret;
1443
1444 return 0;
1445
1446 nla_put_failure:
1447 nlmsg_cancel(skb2, nlh2);
1448 nlmsg_failure:
1449 kfree_skb(skb2);
1450 return -EMSGSIZE;
1451 }
1452
1453 /* Get type data */
1454
1455 static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1456 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1457 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
1458 .len = IPSET_MAXNAMELEN - 1 },
1459 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
1460 };
1461
1462 static int
1463 ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1464 const struct nlmsghdr *nlh,
1465 const struct nlattr * const attr[])
1466 {
1467 struct sk_buff *skb2;
1468 struct nlmsghdr *nlh2;
1469 u8 family, min, max;
1470 const char *typename;
1471 int ret = 0;
1472
1473 if (unlikely(protocol_failed(attr) ||
1474 attr[IPSET_ATTR_TYPENAME] == NULL ||
1475 attr[IPSET_ATTR_FAMILY] == NULL))
1476 return -IPSET_ERR_PROTOCOL;
1477
1478 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1479 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1480 ret = find_set_type_minmax(typename, family, &min, &max);
1481 if (ret)
1482 return ret;
1483
1484 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1485 if (skb2 == NULL)
1486 return -ENOMEM;
1487
1488 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1489 IPSET_CMD_TYPE);
1490 if (!nlh2)
1491 goto nlmsg_failure;
1492 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1493 nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
1494 nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
1495 nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
1496 nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min))
1497 goto nla_put_failure;
1498 nlmsg_end(skb2, nlh2);
1499
1500 pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
1501 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1502 if (ret < 0)
1503 return ret;
1504
1505 return 0;
1506
1507 nla_put_failure:
1508 nlmsg_cancel(skb2, nlh2);
1509 nlmsg_failure:
1510 kfree_skb(skb2);
1511 return -EMSGSIZE;
1512 }
1513
1514 /* Get protocol version */
1515
1516 static const struct nla_policy
1517 ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1518 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1519 };
1520
1521 static int
1522 ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1523 const struct nlmsghdr *nlh,
1524 const struct nlattr * const attr[])
1525 {
1526 struct sk_buff *skb2;
1527 struct nlmsghdr *nlh2;
1528 int ret = 0;
1529
1530 if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
1531 return -IPSET_ERR_PROTOCOL;
1532
1533 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1534 if (skb2 == NULL)
1535 return -ENOMEM;
1536
1537 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1538 IPSET_CMD_PROTOCOL);
1539 if (!nlh2)
1540 goto nlmsg_failure;
1541 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
1542 goto nla_put_failure;
1543 nlmsg_end(skb2, nlh2);
1544
1545 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1546 if (ret < 0)
1547 return ret;
1548
1549 return 0;
1550
1551 nla_put_failure:
1552 nlmsg_cancel(skb2, nlh2);
1553 nlmsg_failure:
1554 kfree_skb(skb2);
1555 return -EMSGSIZE;
1556 }
1557
1558 static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1559 [IPSET_CMD_NONE] = {
1560 .call = ip_set_none,
1561 .attr_count = IPSET_ATTR_CMD_MAX,
1562 },
1563 [IPSET_CMD_CREATE] = {
1564 .call = ip_set_create,
1565 .attr_count = IPSET_ATTR_CMD_MAX,
1566 .policy = ip_set_create_policy,
1567 },
1568 [IPSET_CMD_DESTROY] = {
1569 .call = ip_set_destroy,
1570 .attr_count = IPSET_ATTR_CMD_MAX,
1571 .policy = ip_set_setname_policy,
1572 },
1573 [IPSET_CMD_FLUSH] = {
1574 .call = ip_set_flush,
1575 .attr_count = IPSET_ATTR_CMD_MAX,
1576 .policy = ip_set_setname_policy,
1577 },
1578 [IPSET_CMD_RENAME] = {
1579 .call = ip_set_rename,
1580 .attr_count = IPSET_ATTR_CMD_MAX,
1581 .policy = ip_set_setname2_policy,
1582 },
1583 [IPSET_CMD_SWAP] = {
1584 .call = ip_set_swap,
1585 .attr_count = IPSET_ATTR_CMD_MAX,
1586 .policy = ip_set_setname2_policy,
1587 },
1588 [IPSET_CMD_LIST] = {
1589 .call = ip_set_dump,
1590 .attr_count = IPSET_ATTR_CMD_MAX,
1591 .policy = ip_set_setname_policy,
1592 },
1593 [IPSET_CMD_SAVE] = {
1594 .call = ip_set_dump,
1595 .attr_count = IPSET_ATTR_CMD_MAX,
1596 .policy = ip_set_setname_policy,
1597 },
1598 [IPSET_CMD_ADD] = {
1599 .call = ip_set_uadd,
1600 .attr_count = IPSET_ATTR_CMD_MAX,
1601 .policy = ip_set_adt_policy,
1602 },
1603 [IPSET_CMD_DEL] = {
1604 .call = ip_set_udel,
1605 .attr_count = IPSET_ATTR_CMD_MAX,
1606 .policy = ip_set_adt_policy,
1607 },
1608 [IPSET_CMD_TEST] = {
1609 .call = ip_set_utest,
1610 .attr_count = IPSET_ATTR_CMD_MAX,
1611 .policy = ip_set_adt_policy,
1612 },
1613 [IPSET_CMD_HEADER] = {
1614 .call = ip_set_header,
1615 .attr_count = IPSET_ATTR_CMD_MAX,
1616 .policy = ip_set_setname_policy,
1617 },
1618 [IPSET_CMD_TYPE] = {
1619 .call = ip_set_type,
1620 .attr_count = IPSET_ATTR_CMD_MAX,
1621 .policy = ip_set_type_policy,
1622 },
1623 [IPSET_CMD_PROTOCOL] = {
1624 .call = ip_set_protocol,
1625 .attr_count = IPSET_ATTR_CMD_MAX,
1626 .policy = ip_set_protocol_policy,
1627 },
1628 };
1629
1630 static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1631 .name = "ip_set",
1632 .subsys_id = NFNL_SUBSYS_IPSET,
1633 .cb_count = IPSET_MSG_MAX,
1634 .cb = ip_set_netlink_subsys_cb,
1635 };
1636
1637 /* Interface to iptables/ip6tables */
1638
1639 static int
1640 ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1641 {
1642 unsigned int *op;
1643 void *data;
1644 int copylen = *len, ret = 0;
1645
1646 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1647 return -EPERM;
1648 if (optval != SO_IP_SET)
1649 return -EBADF;
1650 if (*len < sizeof(unsigned int))
1651 return -EINVAL;
1652
1653 data = vmalloc(*len);
1654 if (!data)
1655 return -ENOMEM;
1656 if (copy_from_user(data, user, *len) != 0) {
1657 ret = -EFAULT;
1658 goto done;
1659 }
1660 op = (unsigned int *) data;
1661
1662 if (*op < IP_SET_OP_VERSION) {
1663 /* Check the version at the beginning of operations */
1664 struct ip_set_req_version *req_version = data;
1665 if (req_version->version != IPSET_PROTOCOL) {
1666 ret = -EPROTO;
1667 goto done;
1668 }
1669 }
1670
1671 switch (*op) {
1672 case IP_SET_OP_VERSION: {
1673 struct ip_set_req_version *req_version = data;
1674
1675 if (*len != sizeof(struct ip_set_req_version)) {
1676 ret = -EINVAL;
1677 goto done;
1678 }
1679
1680 req_version->version = IPSET_PROTOCOL;
1681 ret = copy_to_user(user, req_version,
1682 sizeof(struct ip_set_req_version));
1683 goto done;
1684 }
1685 case IP_SET_OP_GET_BYNAME: {
1686 struct ip_set_req_get_set *req_get = data;
1687
1688 if (*len != sizeof(struct ip_set_req_get_set)) {
1689 ret = -EINVAL;
1690 goto done;
1691 }
1692 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1693 nfnl_lock();
1694 req_get->set.index = find_set_id(req_get->set.name);
1695 nfnl_unlock();
1696 goto copy;
1697 }
1698 case IP_SET_OP_GET_BYINDEX: {
1699 struct ip_set_req_get_set *req_get = data;
1700
1701 if (*len != sizeof(struct ip_set_req_get_set) ||
1702 req_get->set.index >= ip_set_max) {
1703 ret = -EINVAL;
1704 goto done;
1705 }
1706 nfnl_lock();
1707 strncpy(req_get->set.name,
1708 ip_set_list[req_get->set.index]
1709 ? ip_set_list[req_get->set.index]->name : "",
1710 IPSET_MAXNAMELEN);
1711 nfnl_unlock();
1712 goto copy;
1713 }
1714 default:
1715 ret = -EBADMSG;
1716 goto done;
1717 } /* end of switch(op) */
1718
1719 copy:
1720 ret = copy_to_user(user, data, copylen);
1721
1722 done:
1723 vfree(data);
1724 if (ret > 0)
1725 ret = 0;
1726 return ret;
1727 }
1728
1729 static struct nf_sockopt_ops so_set __read_mostly = {
1730 .pf = PF_INET,
1731 .get_optmin = SO_IP_SET,
1732 .get_optmax = SO_IP_SET + 1,
1733 .get = &ip_set_sockfn_get,
1734 .owner = THIS_MODULE,
1735 };
1736
1737 static int __init
1738 ip_set_init(void)
1739 {
1740 int ret;
1741
1742 if (max_sets)
1743 ip_set_max = max_sets;
1744 if (ip_set_max >= IPSET_INVALID_ID)
1745 ip_set_max = IPSET_INVALID_ID - 1;
1746
1747 ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max,
1748 GFP_KERNEL);
1749 if (!ip_set_list)
1750 return -ENOMEM;
1751
1752 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
1753 if (ret != 0) {
1754 pr_err("ip_set: cannot register with nfnetlink.\n");
1755 kfree(ip_set_list);
1756 return ret;
1757 }
1758 ret = nf_register_sockopt(&so_set);
1759 if (ret != 0) {
1760 pr_err("SO_SET registry failed: %d\n", ret);
1761 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1762 kfree(ip_set_list);
1763 return ret;
1764 }
1765
1766 pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
1767 return 0;
1768 }
1769
1770 static void __exit
1771 ip_set_fini(void)
1772 {
1773 /* There can't be any existing set */
1774 nf_unregister_sockopt(&so_set);
1775 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1776 kfree(ip_set_list);
1777 pr_debug("these are the famous last words\n");
1778 }
1779
1780 module_init(ip_set_init);
1781 module_exit(ip_set_fini);
This page took 0.108904 seconds and 5 git commands to generate.