net: Allow userns root to control llc, netfilter, netlink, packet, and xfrm
[deliverable/linux.git] / net / netfilter / ipset / ip_set_core.c
CommitLineData
a7b4f989
JK
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10/* Kernel module for IP set management */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/ip.h>
16#include <linux/skbuff.h>
17#include <linux/spinlock.h>
18#include <linux/netlink.h>
19#include <linux/rculist.h>
a7b4f989
JK
20#include <net/netlink.h>
21
22#include <linux/netfilter.h>
b66554cf 23#include <linux/netfilter/x_tables.h>
a7b4f989
JK
24#include <linux/netfilter/nfnetlink.h>
25#include <linux/netfilter/ipset/ip_set.h>
26
27static LIST_HEAD(ip_set_type_list); /* all registered set types */
28static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */
2f9f28b2 29static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
a7b4f989
JK
30
31static struct ip_set **ip_set_list; /* all individual sets */
32static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
33
34#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
35
36static unsigned int max_sets;
37
38module_param(max_sets, int, 0600);
39MODULE_PARM_DESC(max_sets, "maximal number of sets");
40MODULE_LICENSE("GPL");
41MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
42MODULE_DESCRIPTION("core IP set support");
43MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
44
45/*
46 * The set types are implemented in modules and registered set types
47 * can be found in ip_set_type_list. Adding/deleting types is
48 * serialized by ip_set_type_mutex.
49 */
50
51static inline void
52ip_set_type_lock(void)
53{
54 mutex_lock(&ip_set_type_mutex);
55}
56
57static inline void
58ip_set_type_unlock(void)
59{
60 mutex_unlock(&ip_set_type_mutex);
61}
62
63/* Register and deregister settype */
64
65static struct ip_set_type *
66find_set_type(const char *name, u8 family, u8 revision)
67{
68 struct ip_set_type *type;
69
70 list_for_each_entry_rcu(type, &ip_set_type_list, list)
71 if (STREQ(type->name, name) &&
3ace95c0
JK
72 (type->family == family ||
73 type->family == NFPROTO_UNSPEC) &&
f1e00b39
JK
74 revision >= type->revision_min &&
75 revision <= type->revision_max)
a7b4f989
JK
76 return type;
77 return NULL;
78}
79
80/* Unlock, try to load a set type module and lock again */
088067f4
JK
81static bool
82load_settype(const char *name)
a7b4f989
JK
83{
84 nfnl_unlock();
85 pr_debug("try to load ip_set_%s\n", name);
86 if (request_module("ip_set_%s", name) < 0) {
87 pr_warning("Can't find ip_set type %s\n", name);
88 nfnl_lock();
088067f4 89 return false;
a7b4f989
JK
90 }
91 nfnl_lock();
088067f4 92 return true;
a7b4f989
JK
93}
94
95/* Find a set type and reference it */
088067f4
JK
96#define find_set_type_get(name, family, revision, found) \
97 __find_set_type_get(name, family, revision, found, false)
98
a7b4f989 99static int
088067f4
JK
100__find_set_type_get(const char *name, u8 family, u8 revision,
101 struct ip_set_type **found, bool retry)
a7b4f989 102{
5c1aba46
JK
103 struct ip_set_type *type;
104 int err;
105
088067f4
JK
106 if (retry && !load_settype(name))
107 return -IPSET_ERR_FIND_TYPE;
108
a7b4f989
JK
109 rcu_read_lock();
110 *found = find_set_type(name, family, revision);
111 if (*found) {
5c1aba46
JK
112 err = !try_module_get((*found)->me) ? -EFAULT : 0;
113 goto unlock;
a7b4f989 114 }
088067f4
JK
115 /* Make sure the type is already loaded
116 * but we don't support the revision */
5c1aba46
JK
117 list_for_each_entry_rcu(type, &ip_set_type_list, list)
118 if (STREQ(type->name, name)) {
119 err = -IPSET_ERR_FIND_TYPE;
120 goto unlock;
121 }
a7b4f989
JK
122 rcu_read_unlock();
123
088067f4
JK
124 return retry ? -IPSET_ERR_FIND_TYPE :
125 __find_set_type_get(name, family, revision, found, true);
5c1aba46
JK
126
127unlock:
128 rcu_read_unlock();
129 return err;
a7b4f989
JK
130}
131
132/* Find a given set type by name and family.
133 * If we succeeded, the supported minimal and maximum revisions are
134 * filled out.
135 */
088067f4
JK
136#define find_set_type_minmax(name, family, min, max) \
137 __find_set_type_minmax(name, family, min, max, false)
138
a7b4f989 139static int
088067f4
JK
140__find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
141 bool retry)
a7b4f989
JK
142{
143 struct ip_set_type *type;
144 bool found = false;
145
088067f4
JK
146 if (retry && !load_settype(name))
147 return -IPSET_ERR_FIND_TYPE;
148
5c1aba46 149 *min = 255; *max = 0;
a7b4f989
JK
150 rcu_read_lock();
151 list_for_each_entry_rcu(type, &ip_set_type_list, list)
152 if (STREQ(type->name, name) &&
3ace95c0
JK
153 (type->family == family ||
154 type->family == NFPROTO_UNSPEC)) {
a7b4f989 155 found = true;
f1e00b39
JK
156 if (type->revision_min < *min)
157 *min = type->revision_min;
158 if (type->revision_max > *max)
159 *max = type->revision_max;
a7b4f989
JK
160 }
161 rcu_read_unlock();
162 if (found)
163 return 0;
164
088067f4
JK
165 return retry ? -IPSET_ERR_FIND_TYPE :
166 __find_set_type_minmax(name, family, min, max, true);
a7b4f989
JK
167}
168
c15f1c83
JE
169#define family_name(f) ((f) == NFPROTO_IPV4 ? "inet" : \
170 (f) == NFPROTO_IPV6 ? "inet6" : "any")
a7b4f989
JK
171
172/* Register a set type structure. The type is identified by
173 * the unique triple of name, family and revision.
174 */
175int
176ip_set_type_register(struct ip_set_type *type)
177{
178 int ret = 0;
179
180 if (type->protocol != IPSET_PROTOCOL) {
f1e00b39 181 pr_warning("ip_set type %s, family %s, revision %u:%u uses "
a7b4f989
JK
182 "wrong protocol version %u (want %u)\n",
183 type->name, family_name(type->family),
f1e00b39
JK
184 type->revision_min, type->revision_max,
185 type->protocol, IPSET_PROTOCOL);
a7b4f989
JK
186 return -EINVAL;
187 }
188
189 ip_set_type_lock();
f1e00b39 190 if (find_set_type(type->name, type->family, type->revision_min)) {
a7b4f989 191 /* Duplicate! */
f1e00b39 192 pr_warning("ip_set type %s, family %s with revision min %u "
a7b4f989 193 "already registered!\n", type->name,
f1e00b39 194 family_name(type->family), type->revision_min);
a7b4f989
JK
195 ret = -EINVAL;
196 goto unlock;
197 }
198 list_add_rcu(&type->list, &ip_set_type_list);
f1e00b39
JK
199 pr_debug("type %s, family %s, revision %u:%u registered.\n",
200 type->name, family_name(type->family),
201 type->revision_min, type->revision_max);
a7b4f989
JK
202unlock:
203 ip_set_type_unlock();
204 return ret;
205}
206EXPORT_SYMBOL_GPL(ip_set_type_register);
207
208/* Unregister a set type. There's a small race with ip_set_create */
209void
210ip_set_type_unregister(struct ip_set_type *type)
211{
212 ip_set_type_lock();
f1e00b39
JK
213 if (!find_set_type(type->name, type->family, type->revision_min)) {
214 pr_warning("ip_set type %s, family %s with revision min %u "
a7b4f989 215 "not registered\n", type->name,
f1e00b39 216 family_name(type->family), type->revision_min);
a7b4f989
JK
217 goto unlock;
218 }
219 list_del_rcu(&type->list);
f1e00b39
JK
220 pr_debug("type %s, family %s with revision min %u unregistered.\n",
221 type->name, family_name(type->family), type->revision_min);
a7b4f989
JK
222unlock:
223 ip_set_type_unlock();
224
225 synchronize_rcu();
226}
227EXPORT_SYMBOL_GPL(ip_set_type_unregister);
228
229/* Utility functions */
230void *
231ip_set_alloc(size_t size)
232{
233 void *members = NULL;
234
235 if (size < KMALLOC_MAX_SIZE)
236 members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
237
238 if (members) {
239 pr_debug("%p: allocated with kmalloc\n", members);
240 return members;
241 }
242
243 members = vzalloc(size);
244 if (!members)
245 return NULL;
246 pr_debug("%p: allocated with vmalloc\n", members);
247
248 return members;
249}
250EXPORT_SYMBOL_GPL(ip_set_alloc);
251
252void
253ip_set_free(void *members)
254{
255 pr_debug("%p: free with %s\n", members,
256 is_vmalloc_addr(members) ? "vfree" : "kfree");
257 if (is_vmalloc_addr(members))
258 vfree(members);
259 else
260 kfree(members);
261}
262EXPORT_SYMBOL_GPL(ip_set_free);
263
264static inline bool
265flag_nested(const struct nlattr *nla)
266{
267 return nla->nla_type & NLA_F_NESTED;
268}
269
270static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
271 [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 },
272 [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY,
273 .len = sizeof(struct in6_addr) },
274};
275
276int
277ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
278{
279 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
280
281 if (unlikely(!flag_nested(nla)))
282 return -IPSET_ERR_PROTOCOL;
8da560ce 283 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
a7b4f989
JK
284 return -IPSET_ERR_PROTOCOL;
285 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
286 return -IPSET_ERR_PROTOCOL;
287
288 *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
289 return 0;
290}
291EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
292
293int
294ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
295{
296 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
297
298 if (unlikely(!flag_nested(nla)))
299 return -IPSET_ERR_PROTOCOL;
300
8da560ce 301 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
a7b4f989
JK
302 return -IPSET_ERR_PROTOCOL;
303 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
304 return -IPSET_ERR_PROTOCOL;
305
306 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
307 sizeof(struct in6_addr));
308 return 0;
309}
310EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
311
312/*
313 * Creating/destroying/renaming/swapping affect the existence and
314 * the properties of a set. All of these can be executed from userspace
315 * only and serialized by the nfnl mutex indirectly from nfnetlink.
316 *
317 * Sets are identified by their index in ip_set_list and the index
318 * is used by the external references (set/SET netfilter modules).
319 *
320 * The set behind an index may change by swapping only, from userspace.
321 */
322
323static inline void
324__ip_set_get(ip_set_id_t index)
325{
2f9f28b2
JK
326 write_lock_bh(&ip_set_ref_lock);
327 ip_set_list[index]->ref++;
328 write_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
329}
330
331static inline void
332__ip_set_put(ip_set_id_t index)
333{
2f9f28b2
JK
334 write_lock_bh(&ip_set_ref_lock);
335 BUG_ON(ip_set_list[index]->ref == 0);
336 ip_set_list[index]->ref--;
337 write_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
338}
339
340/*
341 * Add, del and test set entries from kernel.
342 *
343 * The set behind the index must exist and must be referenced
344 * so it can't be destroyed (or changed) under our foot.
345 */
346
347int
348ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
b66554cf 349 const struct xt_action_param *par,
ac8cc925 350 const struct ip_set_adt_opt *opt)
a7b4f989
JK
351{
352 struct ip_set *set = ip_set_list[index];
353 int ret = 0;
354
2f9f28b2 355 BUG_ON(set == NULL);
a7b4f989
JK
356 pr_debug("set %s, index %u\n", set->name, index);
357
ac8cc925 358 if (opt->dim < set->type->dimension ||
c15f1c83 359 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
a7b4f989
JK
360 return 0;
361
362 read_lock_bh(&set->lock);
b66554cf 363 ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
a7b4f989
JK
364 read_unlock_bh(&set->lock);
365
366 if (ret == -EAGAIN) {
367 /* Type requests element to be completed */
368 pr_debug("element must be competed, ADD is triggered\n");
369 write_lock_bh(&set->lock);
b66554cf 370 set->variant->kadt(set, skb, par, IPSET_ADD, opt);
a7b4f989
JK
371 write_unlock_bh(&set->lock);
372 ret = 1;
3e0304a5
JK
373 } else {
374 /* --return-nomatch: invert matched element */
375 if ((opt->flags & IPSET_RETURN_NOMATCH) &&
376 (set->type->features & IPSET_TYPE_NOMATCH) &&
377 (ret > 0 || ret == -ENOTEMPTY))
378 ret = -ret;
a7b4f989
JK
379 }
380
381 /* Convert error codes to nomatch */
382 return (ret < 0 ? 0 : ret);
383}
384EXPORT_SYMBOL_GPL(ip_set_test);
385
386int
387ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
b66554cf 388 const struct xt_action_param *par,
ac8cc925 389 const struct ip_set_adt_opt *opt)
a7b4f989
JK
390{
391 struct ip_set *set = ip_set_list[index];
392 int ret;
393
2f9f28b2 394 BUG_ON(set == NULL);
a7b4f989
JK
395 pr_debug("set %s, index %u\n", set->name, index);
396
ac8cc925 397 if (opt->dim < set->type->dimension ||
c15f1c83 398 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
a7b4f989
JK
399 return 0;
400
401 write_lock_bh(&set->lock);
b66554cf 402 ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
a7b4f989
JK
403 write_unlock_bh(&set->lock);
404
405 return ret;
406}
407EXPORT_SYMBOL_GPL(ip_set_add);
408
409int
410ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
b66554cf 411 const struct xt_action_param *par,
ac8cc925 412 const struct ip_set_adt_opt *opt)
a7b4f989
JK
413{
414 struct ip_set *set = ip_set_list[index];
415 int ret = 0;
416
2f9f28b2 417 BUG_ON(set == NULL);
a7b4f989
JK
418 pr_debug("set %s, index %u\n", set->name, index);
419
ac8cc925 420 if (opt->dim < set->type->dimension ||
c15f1c83 421 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
a7b4f989
JK
422 return 0;
423
424 write_lock_bh(&set->lock);
b66554cf 425 ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
a7b4f989
JK
426 write_unlock_bh(&set->lock);
427
428 return ret;
429}
430EXPORT_SYMBOL_GPL(ip_set_del);
431
432/*
433 * Find set by name, reference it once. The reference makes sure the
434 * thing pointed to, does not go away under our feet.
435 *
a7b4f989
JK
436 */
437ip_set_id_t
438ip_set_get_byname(const char *name, struct ip_set **set)
439{
440 ip_set_id_t i, index = IPSET_INVALID_ID;
441 struct ip_set *s;
442
443 for (i = 0; i < ip_set_max; i++) {
444 s = ip_set_list[i];
445 if (s != NULL && STREQ(s->name, name)) {
446 __ip_set_get(i);
447 index = i;
448 *set = s;
449 }
450 }
451
452 return index;
453}
454EXPORT_SYMBOL_GPL(ip_set_get_byname);
455
456/*
457 * If the given set pointer points to a valid set, decrement
458 * reference count by 1. The caller shall not assume the index
459 * to be valid, after calling this function.
460 *
a7b4f989
JK
461 */
462void
463ip_set_put_byindex(ip_set_id_t index)
464{
2f9f28b2 465 if (ip_set_list[index] != NULL)
a7b4f989 466 __ip_set_put(index);
a7b4f989
JK
467}
468EXPORT_SYMBOL_GPL(ip_set_put_byindex);
469
470/*
471 * Get the name of a set behind a set index.
472 * We assume the set is referenced, so it does exist and
473 * can't be destroyed. The set cannot be renamed due to
474 * the referencing either.
475 *
a7b4f989
JK
476 */
477const char *
478ip_set_name_byindex(ip_set_id_t index)
479{
480 const struct ip_set *set = ip_set_list[index];
481
482 BUG_ON(set == NULL);
2f9f28b2 483 BUG_ON(set->ref == 0);
a7b4f989
JK
484
485 /* Referenced, so it's safe */
486 return set->name;
487}
488EXPORT_SYMBOL_GPL(ip_set_name_byindex);
489
490/*
491 * Routines to call by external subsystems, which do not
492 * call nfnl_lock for us.
493 */
494
495/*
496 * Find set by name, reference it once. The reference makes sure the
497 * thing pointed to, does not go away under our feet.
498 *
499 * The nfnl mutex is used in the function.
500 */
501ip_set_id_t
502ip_set_nfnl_get(const char *name)
503{
504 struct ip_set *s;
505 ip_set_id_t index;
506
507 nfnl_lock();
508 index = ip_set_get_byname(name, &s);
509 nfnl_unlock();
510
511 return index;
512}
513EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
514
515/*
516 * Find set by index, reference it once. The reference makes sure the
517 * thing pointed to, does not go away under our feet.
518 *
519 * The nfnl mutex is used in the function.
520 */
521ip_set_id_t
522ip_set_nfnl_get_byindex(ip_set_id_t index)
523{
524 if (index > ip_set_max)
525 return IPSET_INVALID_ID;
526
527 nfnl_lock();
528 if (ip_set_list[index])
529 __ip_set_get(index);
530 else
531 index = IPSET_INVALID_ID;
532 nfnl_unlock();
533
534 return index;
535}
536EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
537
538/*
539 * If the given set pointer points to a valid set, decrement
540 * reference count by 1. The caller shall not assume the index
541 * to be valid, after calling this function.
542 *
543 * The nfnl mutex is used in the function.
544 */
545void
546ip_set_nfnl_put(ip_set_id_t index)
547{
548 nfnl_lock();
2f9f28b2 549 ip_set_put_byindex(index);
a7b4f989
JK
550 nfnl_unlock();
551}
552EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
553
554/*
555 * Communication protocol with userspace over netlink.
556 *
2f9f28b2 557 * The commands are serialized by the nfnl mutex.
a7b4f989
JK
558 */
559
560static inline bool
561protocol_failed(const struct nlattr * const tb[])
562{
563 return !tb[IPSET_ATTR_PROTOCOL] ||
564 nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
565}
566
567static inline u32
568flag_exist(const struct nlmsghdr *nlh)
569{
570 return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
571}
572
573static struct nlmsghdr *
15e47304 574start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
a7b4f989
JK
575 enum ipset_cmd cmd)
576{
577 struct nlmsghdr *nlh;
578 struct nfgenmsg *nfmsg;
579
15e47304 580 nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
a7b4f989
JK
581 sizeof(*nfmsg), flags);
582 if (nlh == NULL)
583 return NULL;
584
585 nfmsg = nlmsg_data(nlh);
c15f1c83 586 nfmsg->nfgen_family = NFPROTO_IPV4;
a7b4f989
JK
587 nfmsg->version = NFNETLINK_V0;
588 nfmsg->res_id = 0;
589
590 return nlh;
591}
592
593/* Create a set */
594
595static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
596 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
597 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
598 .len = IPSET_MAXNAMELEN - 1 },
599 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
600 .len = IPSET_MAXNAMELEN - 1},
601 [IPSET_ATTR_REVISION] = { .type = NLA_U8 },
602 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
603 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
604};
605
606static ip_set_id_t
607find_set_id(const char *name)
608{
609 ip_set_id_t i, index = IPSET_INVALID_ID;
610 const struct ip_set *set;
611
612 for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) {
613 set = ip_set_list[i];
614 if (set != NULL && STREQ(set->name, name))
615 index = i;
616 }
617 return index;
618}
619
620static inline struct ip_set *
621find_set(const char *name)
622{
623 ip_set_id_t index = find_set_id(name);
624
625 return index == IPSET_INVALID_ID ? NULL : ip_set_list[index];
626}
627
628static int
629find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
630{
631 ip_set_id_t i;
632
633 *index = IPSET_INVALID_ID;
634 for (i = 0; i < ip_set_max; i++) {
635 if (ip_set_list[i] == NULL) {
636 if (*index == IPSET_INVALID_ID)
637 *index = i;
638 } else if (STREQ(name, ip_set_list[i]->name)) {
639 /* Name clash */
640 *set = ip_set_list[i];
641 return -EEXIST;
642 }
643 }
644 if (*index == IPSET_INVALID_ID)
645 /* No free slot remained */
646 return -IPSET_ERR_MAX_SETS;
647 return 0;
648}
649
d31f4d44
TB
650static int
651ip_set_none(struct sock *ctnl, struct sk_buff *skb,
652 const struct nlmsghdr *nlh,
653 const struct nlattr * const attr[])
654{
655 return -EOPNOTSUPP;
656}
657
a7b4f989
JK
658static int
659ip_set_create(struct sock *ctnl, struct sk_buff *skb,
660 const struct nlmsghdr *nlh,
661 const struct nlattr * const attr[])
662{
9846ada1 663 struct ip_set *set, *clash = NULL;
a7b4f989
JK
664 ip_set_id_t index = IPSET_INVALID_ID;
665 struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
666 const char *name, *typename;
667 u8 family, revision;
668 u32 flags = flag_exist(nlh);
669 int ret = 0;
670
671 if (unlikely(protocol_failed(attr) ||
672 attr[IPSET_ATTR_SETNAME] == NULL ||
673 attr[IPSET_ATTR_TYPENAME] == NULL ||
674 attr[IPSET_ATTR_REVISION] == NULL ||
675 attr[IPSET_ATTR_FAMILY] == NULL ||
676 (attr[IPSET_ATTR_DATA] != NULL &&
677 !flag_nested(attr[IPSET_ATTR_DATA]))))
678 return -IPSET_ERR_PROTOCOL;
679
680 name = nla_data(attr[IPSET_ATTR_SETNAME]);
681 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
682 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
683 revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
684 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
685 name, typename, family_name(family), revision);
686
687 /*
688 * First, and without any locks, allocate and initialize
689 * a normal base set structure.
690 */
691 set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
692 if (!set)
693 return -ENOMEM;
694 rwlock_init(&set->lock);
695 strlcpy(set->name, name, IPSET_MAXNAMELEN);
a7b4f989 696 set->family = family;
f1e00b39 697 set->revision = revision;
a7b4f989
JK
698
699 /*
700 * Next, check that we know the type, and take
701 * a reference on the type, to make sure it stays available
702 * while constructing our new set.
703 *
704 * After referencing the type, we try to create the type
705 * specific part of the set without holding any locks.
706 */
707 ret = find_set_type_get(typename, family, revision, &(set->type));
708 if (ret)
709 goto out;
710
711 /*
712 * Without holding any locks, create private part.
713 */
714 if (attr[IPSET_ATTR_DATA] &&
8da560ce
PM
715 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
716 set->type->create_policy)) {
15b4d93f
JK
717 ret = -IPSET_ERR_PROTOCOL;
718 goto put_out;
a7b4f989
JK
719 }
720
721 ret = set->type->create(set, tb, flags);
722 if (ret != 0)
723 goto put_out;
724
725 /* BTW, ret==0 here. */
726
727 /*
728 * Here, we have a valid, constructed set and we are protected
2f9f28b2
JK
729 * by the nfnl mutex. Find the first free index in ip_set_list
730 * and check clashing.
a7b4f989 731 */
3ace95c0
JK
732 ret = find_free_id(set->name, &index, &clash);
733 if (ret != 0) {
a7b4f989
JK
734 /* If this is the same set and requested, ignore error */
735 if (ret == -EEXIST &&
736 (flags & IPSET_FLAG_EXIST) &&
737 STREQ(set->type->name, clash->type->name) &&
738 set->type->family == clash->type->family &&
f1e00b39
JK
739 set->type->revision_min == clash->type->revision_min &&
740 set->type->revision_max == clash->type->revision_max &&
a7b4f989
JK
741 set->variant->same_set(set, clash))
742 ret = 0;
743 goto cleanup;
744 }
745
746 /*
747 * Finally! Add our shiny new set to the list, and be done.
748 */
749 pr_debug("create: '%s' created with index %u!\n", set->name, index);
750 ip_set_list[index] = set;
751
752 return ret;
753
754cleanup:
755 set->variant->destroy(set);
756put_out:
757 module_put(set->type->me);
758out:
759 kfree(set);
760 return ret;
761}
762
763/* Destroy sets */
764
765static const struct nla_policy
766ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
767 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
768 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
769 .len = IPSET_MAXNAMELEN - 1 },
770};
771
772static void
773ip_set_destroy_set(ip_set_id_t index)
774{
775 struct ip_set *set = ip_set_list[index];
776
777 pr_debug("set: %s\n", set->name);
778 ip_set_list[index] = NULL;
779
780 /* Must call it without holding any lock */
781 set->variant->destroy(set);
782 module_put(set->type->me);
783 kfree(set);
784}
785
786static int
787ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
788 const struct nlmsghdr *nlh,
789 const struct nlattr * const attr[])
790{
791 ip_set_id_t i;
2f9f28b2 792 int ret = 0;
a7b4f989
JK
793
794 if (unlikely(protocol_failed(attr)))
795 return -IPSET_ERR_PROTOCOL;
796
2f9f28b2
JK
797 /* Commands are serialized and references are
798 * protected by the ip_set_ref_lock.
799 * External systems (i.e. xt_set) must call
800 * ip_set_put|get_nfnl_* functions, that way we
801 * can safely check references here.
802 *
803 * list:set timer can only decrement the reference
804 * counter, so if it's already zero, we can proceed
805 * without holding the lock.
806 */
807 read_lock_bh(&ip_set_ref_lock);
a7b4f989
JK
808 if (!attr[IPSET_ATTR_SETNAME]) {
809 for (i = 0; i < ip_set_max; i++) {
2f9f28b2 810 if (ip_set_list[i] != NULL && ip_set_list[i]->ref) {
9d883232 811 ret = -IPSET_ERR_BUSY;
2f9f28b2
JK
812 goto out;
813 }
a7b4f989 814 }
2f9f28b2 815 read_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
816 for (i = 0; i < ip_set_max; i++) {
817 if (ip_set_list[i] != NULL)
818 ip_set_destroy_set(i);
819 }
820 } else {
821 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
2f9f28b2
JK
822 if (i == IPSET_INVALID_ID) {
823 ret = -ENOENT;
824 goto out;
825 } else if (ip_set_list[i]->ref) {
826 ret = -IPSET_ERR_BUSY;
827 goto out;
828 }
829 read_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
830
831 ip_set_destroy_set(i);
832 }
833 return 0;
2f9f28b2
JK
834out:
835 read_unlock_bh(&ip_set_ref_lock);
836 return ret;
a7b4f989
JK
837}
838
839/* Flush sets */
840
841static void
842ip_set_flush_set(struct ip_set *set)
843{
844 pr_debug("set: %s\n", set->name);
845
846 write_lock_bh(&set->lock);
847 set->variant->flush(set);
848 write_unlock_bh(&set->lock);
849}
850
851static int
852ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
853 const struct nlmsghdr *nlh,
854 const struct nlattr * const attr[])
855{
856 ip_set_id_t i;
857
858 if (unlikely(protocol_failed(attr)))
9184a9cb 859 return -IPSET_ERR_PROTOCOL;
a7b4f989
JK
860
861 if (!attr[IPSET_ATTR_SETNAME]) {
862 for (i = 0; i < ip_set_max; i++)
863 if (ip_set_list[i] != NULL)
864 ip_set_flush_set(ip_set_list[i]);
865 } else {
866 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
867 if (i == IPSET_INVALID_ID)
868 return -ENOENT;
869
870 ip_set_flush_set(ip_set_list[i]);
871 }
872
873 return 0;
874}
875
876/* Rename a set */
877
878static const struct nla_policy
879ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
880 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
881 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
882 .len = IPSET_MAXNAMELEN - 1 },
883 [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING,
884 .len = IPSET_MAXNAMELEN - 1 },
885};
886
887static int
888ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
889 const struct nlmsghdr *nlh,
890 const struct nlattr * const attr[])
891{
892 struct ip_set *set;
893 const char *name2;
894 ip_set_id_t i;
2f9f28b2 895 int ret = 0;
a7b4f989
JK
896
897 if (unlikely(protocol_failed(attr) ||
898 attr[IPSET_ATTR_SETNAME] == NULL ||
899 attr[IPSET_ATTR_SETNAME2] == NULL))
900 return -IPSET_ERR_PROTOCOL;
901
902 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
903 if (set == NULL)
904 return -ENOENT;
2f9f28b2
JK
905
906 read_lock_bh(&ip_set_ref_lock);
907 if (set->ref != 0) {
908 ret = -IPSET_ERR_REFERENCED;
909 goto out;
910 }
a7b4f989
JK
911
912 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
913 for (i = 0; i < ip_set_max; i++) {
914 if (ip_set_list[i] != NULL &&
2f9f28b2
JK
915 STREQ(ip_set_list[i]->name, name2)) {
916 ret = -IPSET_ERR_EXIST_SETNAME2;
917 goto out;
918 }
a7b4f989
JK
919 }
920 strncpy(set->name, name2, IPSET_MAXNAMELEN);
921
2f9f28b2
JK
922out:
923 read_unlock_bh(&ip_set_ref_lock);
924 return ret;
a7b4f989
JK
925}
926
927/* Swap two sets so that name/index points to the other.
928 * References and set names are also swapped.
929 *
2f9f28b2
JK
930 * The commands are serialized by the nfnl mutex and references are
931 * protected by the ip_set_ref_lock. The kernel interfaces
a7b4f989
JK
932 * do not hold the mutex but the pointer settings are atomic
933 * so the ip_set_list always contains valid pointers to the sets.
934 */
935
936static int
937ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
938 const struct nlmsghdr *nlh,
939 const struct nlattr * const attr[])
940{
941 struct ip_set *from, *to;
942 ip_set_id_t from_id, to_id;
943 char from_name[IPSET_MAXNAMELEN];
a7b4f989
JK
944
945 if (unlikely(protocol_failed(attr) ||
946 attr[IPSET_ATTR_SETNAME] == NULL ||
947 attr[IPSET_ATTR_SETNAME2] == NULL))
948 return -IPSET_ERR_PROTOCOL;
949
950 from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
951 if (from_id == IPSET_INVALID_ID)
952 return -ENOENT;
953
954 to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2]));
955 if (to_id == IPSET_INVALID_ID)
956 return -IPSET_ERR_EXIST_SETNAME2;
957
958 from = ip_set_list[from_id];
959 to = ip_set_list[to_id];
960
961 /* Features must not change.
25985edc 962 * Not an artificial restriction anymore, as we must prevent
a7b4f989
JK
963 * possible loops created by swapping in setlist type of sets. */
964 if (!(from->type->features == to->type->features &&
965 from->type->family == to->type->family))
966 return -IPSET_ERR_TYPE_MISMATCH;
967
a7b4f989 968 strncpy(from_name, from->name, IPSET_MAXNAMELEN);
a7b4f989 969 strncpy(from->name, to->name, IPSET_MAXNAMELEN);
a7b4f989 970 strncpy(to->name, from_name, IPSET_MAXNAMELEN);
a7b4f989 971
2f9f28b2
JK
972 write_lock_bh(&ip_set_ref_lock);
973 swap(from->ref, to->ref);
a7b4f989
JK
974 ip_set_list[from_id] = to;
975 ip_set_list[to_id] = from;
2f9f28b2 976 write_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
977
978 return 0;
979}
980
981/* List/save set data */
982
c1e2e043
JK
983#define DUMP_INIT 0
984#define DUMP_ALL 1
985#define DUMP_ONE 2
986#define DUMP_LAST 3
987
988#define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF)
989#define DUMP_FLAGS(arg) (((u32)(arg)) >> 16)
a7b4f989
JK
990
991static int
992ip_set_dump_done(struct netlink_callback *cb)
993{
994 if (cb->args[2]) {
995 pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name);
2f9f28b2 996 ip_set_put_byindex((ip_set_id_t) cb->args[1]);
a7b4f989
JK
997 }
998 return 0;
999}
1000
1001static inline void
1002dump_attrs(struct nlmsghdr *nlh)
1003{
1004 const struct nlattr *attr;
1005 int rem;
1006
1007 pr_debug("dump nlmsg\n");
1008 nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
1009 pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
1010 }
1011}
1012
1013static int
1014dump_init(struct netlink_callback *cb)
1015{
1016 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
1017 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
1018 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1019 struct nlattr *attr = (void *)nlh + min_len;
c1e2e043 1020 u32 dump_type;
a7b4f989
JK
1021 ip_set_id_t index;
1022
1023 /* Second pass, so parser can't fail */
1024 nla_parse(cda, IPSET_ATTR_CMD_MAX,
1025 attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
1026
1027 /* cb->args[0] : dump single set/all sets
1028 * [1] : set index
1029 * [..]: type specific
1030 */
1031
c1e2e043
JK
1032 if (cda[IPSET_ATTR_SETNAME]) {
1033 index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME]));
1034 if (index == IPSET_INVALID_ID)
1035 return -ENOENT;
a7b4f989 1036
c1e2e043
JK
1037 dump_type = DUMP_ONE;
1038 cb->args[1] = index;
1039 } else
1040 dump_type = DUMP_ALL;
1041
1042 if (cda[IPSET_ATTR_FLAGS]) {
1043 u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
1044 dump_type |= (f << 16);
1045 }
1046 cb->args[0] = dump_type;
a7b4f989 1047
a7b4f989
JK
1048 return 0;
1049}
1050
1051static int
1052ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
1053{
1054 ip_set_id_t index = IPSET_INVALID_ID, max;
1055 struct ip_set *set = NULL;
1056 struct nlmsghdr *nlh = NULL;
15e47304 1057 unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
c1e2e043 1058 u32 dump_type, dump_flags;
a7b4f989
JK
1059 int ret = 0;
1060
c1e2e043 1061 if (!cb->args[0]) {
a7b4f989
JK
1062 ret = dump_init(cb);
1063 if (ret < 0) {
1064 nlh = nlmsg_hdr(cb->skb);
1065 /* We have to create and send the error message
1066 * manually :-( */
1067 if (nlh->nlmsg_flags & NLM_F_ACK)
1068 netlink_ack(cb->skb, nlh, ret);
1069 return ret;
1070 }
1071 }
1072
1073 if (cb->args[1] >= ip_set_max)
1074 goto out;
1075
c1e2e043
JK
1076 dump_type = DUMP_TYPE(cb->args[0]);
1077 dump_flags = DUMP_FLAGS(cb->args[0]);
1078 max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
a8a8a093 1079dump_last:
c1e2e043
JK
1080 pr_debug("args[0]: %u %u args[1]: %ld\n",
1081 dump_type, dump_flags, cb->args[1]);
a7b4f989
JK
1082 for (; cb->args[1] < max; cb->args[1]++) {
1083 index = (ip_set_id_t) cb->args[1];
1084 set = ip_set_list[index];
1085 if (set == NULL) {
c1e2e043 1086 if (dump_type == DUMP_ONE) {
a7b4f989
JK
1087 ret = -ENOENT;
1088 goto out;
1089 }
1090 continue;
1091 }
1092 /* When dumping all sets, we must dump "sorted"
1093 * so that lists (unions of sets) are dumped last.
1094 */
c1e2e043
JK
1095 if (dump_type != DUMP_ONE &&
1096 ((dump_type == DUMP_ALL) ==
a8a8a093 1097 !!(set->type->features & IPSET_DUMP_LAST)))
a7b4f989
JK
1098 continue;
1099 pr_debug("List set: %s\n", set->name);
1100 if (!cb->args[2]) {
1101 /* Start listing: make sure set won't be destroyed */
1102 pr_debug("reference set\n");
1103 __ip_set_get(index);
1104 }
15e47304 1105 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
a7b4f989
JK
1106 cb->nlh->nlmsg_seq, flags,
1107 IPSET_CMD_LIST);
1108 if (!nlh) {
1109 ret = -EMSGSIZE;
1110 goto release_refcount;
1111 }
7cf7899d
DM
1112 if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1113 nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
1114 goto nla_put_failure;
c1e2e043
JK
1115 if (dump_flags & IPSET_FLAG_LIST_SETNAME)
1116 goto next_set;
a7b4f989
JK
1117 switch (cb->args[2]) {
1118 case 0:
1119 /* Core header data */
7cf7899d
DM
1120 if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
1121 set->type->name) ||
1122 nla_put_u8(skb, IPSET_ATTR_FAMILY,
1123 set->family) ||
1124 nla_put_u8(skb, IPSET_ATTR_REVISION,
1125 set->revision))
1126 goto nla_put_failure;
a7b4f989
JK
1127 ret = set->variant->head(set, skb);
1128 if (ret < 0)
1129 goto release_refcount;
c1e2e043
JK
1130 if (dump_flags & IPSET_FLAG_LIST_HEADER)
1131 goto next_set;
a7b4f989
JK
1132 /* Fall through and add elements */
1133 default:
1134 read_lock_bh(&set->lock);
1135 ret = set->variant->list(set, skb, cb);
1136 read_unlock_bh(&set->lock);
c1e2e043 1137 if (!cb->args[2])
a7b4f989 1138 /* Set is done, proceed with next one */
c1e2e043 1139 goto next_set;
a7b4f989
JK
1140 goto release_refcount;
1141 }
1142 }
a8a8a093 1143 /* If we dump all sets, continue with dumping last ones */
c1e2e043
JK
1144 if (dump_type == DUMP_ALL) {
1145 dump_type = DUMP_LAST;
1146 cb->args[0] = dump_type | (dump_flags << 16);
a8a8a093
JK
1147 cb->args[1] = 0;
1148 goto dump_last;
1149 }
a7b4f989
JK
1150 goto out;
1151
1152nla_put_failure:
1153 ret = -EFAULT;
c1e2e043
JK
1154next_set:
1155 if (dump_type == DUMP_ONE)
1156 cb->args[1] = IPSET_INVALID_ID;
1157 else
1158 cb->args[1]++;
a7b4f989
JK
1159release_refcount:
1160 /* If there was an error or set is done, release set */
1161 if (ret || !cb->args[2]) {
1162 pr_debug("release set %s\n", ip_set_list[index]->name);
2f9f28b2 1163 ip_set_put_byindex(index);
be94db9d 1164 cb->args[2] = 0;
a7b4f989 1165 }
a7b4f989
JK
1166out:
1167 if (nlh) {
1168 nlmsg_end(skb, nlh);
1169 pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1170 dump_attrs(nlh);
1171 }
1172
1173 return ret < 0 ? ret : skb->len;
1174}
1175
1176static int
1177ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
1178 const struct nlmsghdr *nlh,
1179 const struct nlattr * const attr[])
1180{
1181 if (unlikely(protocol_failed(attr)))
1182 return -IPSET_ERR_PROTOCOL;
1183
80d326fa
PNA
1184 {
1185 struct netlink_dump_control c = {
1186 .dump = ip_set_dump_start,
1187 .done = ip_set_dump_done,
1188 };
1189 return netlink_dump_start(ctnl, skb, nlh, &c);
1190 }
a7b4f989
JK
1191}
1192
1193/* Add, del and test */
1194
1195static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1196 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1197 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
1198 .len = IPSET_MAXNAMELEN - 1 },
1199 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
1200 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
1201 [IPSET_ATTR_ADT] = { .type = NLA_NESTED },
1202};
1203
1204static int
5f52bc3c 1205call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
a7b4f989
JK
1206 struct nlattr *tb[], enum ipset_adt adt,
1207 u32 flags, bool use_lineno)
1208{
3d14b171 1209 int ret;
a7b4f989 1210 u32 lineno = 0;
3d14b171 1211 bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
a7b4f989
JK
1212
1213 do {
1214 write_lock_bh(&set->lock);
3d14b171 1215 ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
a7b4f989 1216 write_unlock_bh(&set->lock);
3d14b171 1217 retried = true;
a7b4f989
JK
1218 } while (ret == -EAGAIN &&
1219 set->variant->resize &&
3d14b171 1220 (ret = set->variant->resize(set, retried)) == 0);
a7b4f989
JK
1221
1222 if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1223 return 0;
1224 if (lineno && use_lineno) {
1225 /* Error in restore/batch mode: send back lineno */
5f52bc3c
JK
1226 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1227 struct sk_buff *skb2;
1228 struct nlmsgerr *errmsg;
1229 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
a7b4f989
JK
1230 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
1231 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
5f52bc3c 1232 struct nlattr *cmdattr;
a7b4f989
JK
1233 u32 *errline;
1234
5f52bc3c
JK
1235 skb2 = nlmsg_new(payload, GFP_KERNEL);
1236 if (skb2 == NULL)
1237 return -ENOMEM;
15e47304 1238 rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
5f52bc3c
JK
1239 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1240 errmsg = nlmsg_data(rep);
1241 errmsg->error = ret;
1242 memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1243 cmdattr = (void *)&errmsg->msg + min_len;
1244
a7b4f989
JK
1245 nla_parse(cda, IPSET_ATTR_CMD_MAX,
1246 cmdattr, nlh->nlmsg_len - min_len,
1247 ip_set_adt_policy);
1248
1249 errline = nla_data(cda[IPSET_ATTR_LINENO]);
1250
1251 *errline = lineno;
5f52bc3c 1252
15e47304 1253 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
5f52bc3c
JK
1254 /* Signal netlink not to send its ACK/errmsg. */
1255 return -EINTR;
a7b4f989
JK
1256 }
1257
1258 return ret;
1259}
1260
1261static int
1262ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1263 const struct nlmsghdr *nlh,
1264 const struct nlattr * const attr[])
1265{
1266 struct ip_set *set;
1267 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1268 const struct nlattr *nla;
1269 u32 flags = flag_exist(nlh);
1270 bool use_lineno;
1271 int ret = 0;
1272
1273 if (unlikely(protocol_failed(attr) ||
1274 attr[IPSET_ATTR_SETNAME] == NULL ||
1275 !((attr[IPSET_ATTR_DATA] != NULL) ^
1276 (attr[IPSET_ATTR_ADT] != NULL)) ||
1277 (attr[IPSET_ATTR_DATA] != NULL &&
1278 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1279 (attr[IPSET_ATTR_ADT] != NULL &&
1280 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1281 attr[IPSET_ATTR_LINENO] == NULL))))
1282 return -IPSET_ERR_PROTOCOL;
1283
1284 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1285 if (set == NULL)
1286 return -ENOENT;
1287
1288 use_lineno = !!attr[IPSET_ATTR_LINENO];
1289 if (attr[IPSET_ATTR_DATA]) {
8da560ce
PM
1290 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1291 attr[IPSET_ATTR_DATA],
1292 set->type->adt_policy))
a7b4f989 1293 return -IPSET_ERR_PROTOCOL;
5f52bc3c
JK
1294 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
1295 use_lineno);
a7b4f989
JK
1296 } else {
1297 int nla_rem;
1298
1299 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1300 memset(tb, 0, sizeof(tb));
1301 if (nla_type(nla) != IPSET_ATTR_DATA ||
1302 !flag_nested(nla) ||
8da560ce
PM
1303 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1304 set->type->adt_policy))
a7b4f989 1305 return -IPSET_ERR_PROTOCOL;
5f52bc3c 1306 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
a7b4f989
JK
1307 flags, use_lineno);
1308 if (ret < 0)
1309 return ret;
1310 }
1311 }
1312 return ret;
1313}
1314
1315static int
1316ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1317 const struct nlmsghdr *nlh,
1318 const struct nlattr * const attr[])
1319{
1320 struct ip_set *set;
1321 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1322 const struct nlattr *nla;
1323 u32 flags = flag_exist(nlh);
1324 bool use_lineno;
1325 int ret = 0;
1326
1327 if (unlikely(protocol_failed(attr) ||
1328 attr[IPSET_ATTR_SETNAME] == NULL ||
1329 !((attr[IPSET_ATTR_DATA] != NULL) ^
1330 (attr[IPSET_ATTR_ADT] != NULL)) ||
1331 (attr[IPSET_ATTR_DATA] != NULL &&
1332 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1333 (attr[IPSET_ATTR_ADT] != NULL &&
1334 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1335 attr[IPSET_ATTR_LINENO] == NULL))))
1336 return -IPSET_ERR_PROTOCOL;
1337
1338 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1339 if (set == NULL)
1340 return -ENOENT;
1341
1342 use_lineno = !!attr[IPSET_ATTR_LINENO];
1343 if (attr[IPSET_ATTR_DATA]) {
8da560ce
PM
1344 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1345 attr[IPSET_ATTR_DATA],
1346 set->type->adt_policy))
a7b4f989 1347 return -IPSET_ERR_PROTOCOL;
5f52bc3c
JK
1348 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
1349 use_lineno);
a7b4f989
JK
1350 } else {
1351 int nla_rem;
1352
1353 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1354 memset(tb, 0, sizeof(*tb));
1355 if (nla_type(nla) != IPSET_ATTR_DATA ||
1356 !flag_nested(nla) ||
8da560ce
PM
1357 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1358 set->type->adt_policy))
a7b4f989 1359 return -IPSET_ERR_PROTOCOL;
5f52bc3c 1360 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
a7b4f989
JK
1361 flags, use_lineno);
1362 if (ret < 0)
1363 return ret;
1364 }
1365 }
1366 return ret;
1367}
1368
1369static int
1370ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1371 const struct nlmsghdr *nlh,
1372 const struct nlattr * const attr[])
1373{
1374 struct ip_set *set;
1375 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1376 int ret = 0;
1377
1378 if (unlikely(protocol_failed(attr) ||
1379 attr[IPSET_ATTR_SETNAME] == NULL ||
1380 attr[IPSET_ATTR_DATA] == NULL ||
1381 !flag_nested(attr[IPSET_ATTR_DATA])))
1382 return -IPSET_ERR_PROTOCOL;
1383
1384 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1385 if (set == NULL)
1386 return -ENOENT;
1387
8da560ce
PM
1388 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1389 set->type->adt_policy))
a7b4f989
JK
1390 return -IPSET_ERR_PROTOCOL;
1391
1392 read_lock_bh(&set->lock);
3d14b171 1393 ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
a7b4f989
JK
1394 read_unlock_bh(&set->lock);
1395 /* Userspace can't trigger element to be re-added */
1396 if (ret == -EAGAIN)
1397 ret = 1;
1398
1399 return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST;
1400}
1401
1402/* Get headed data of a set */
1403
1404static int
1405ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1406 const struct nlmsghdr *nlh,
1407 const struct nlattr * const attr[])
1408{
1409 const struct ip_set *set;
1410 struct sk_buff *skb2;
1411 struct nlmsghdr *nlh2;
1412 ip_set_id_t index;
1413 int ret = 0;
1414
1415 if (unlikely(protocol_failed(attr) ||
1416 attr[IPSET_ATTR_SETNAME] == NULL))
1417 return -IPSET_ERR_PROTOCOL;
1418
1419 index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
1420 if (index == IPSET_INVALID_ID)
1421 return -ENOENT;
1422 set = ip_set_list[index];
1423
1424 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1425 if (skb2 == NULL)
1426 return -ENOMEM;
1427
15e47304 1428 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
a7b4f989
JK
1429 IPSET_CMD_HEADER);
1430 if (!nlh2)
1431 goto nlmsg_failure;
7cf7899d
DM
1432 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1433 nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
1434 nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
1435 nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1436 nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
1437 goto nla_put_failure;
a7b4f989
JK
1438 nlmsg_end(skb2, nlh2);
1439
15e47304 1440 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
a7b4f989
JK
1441 if (ret < 0)
1442 return ret;
1443
1444 return 0;
1445
1446nla_put_failure:
1447 nlmsg_cancel(skb2, nlh2);
1448nlmsg_failure:
1449 kfree_skb(skb2);
1450 return -EMSGSIZE;
1451}
1452
1453/* Get type data */
1454
1455static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1456 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1457 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
1458 .len = IPSET_MAXNAMELEN - 1 },
1459 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
1460};
1461
1462static int
1463ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1464 const struct nlmsghdr *nlh,
1465 const struct nlattr * const attr[])
1466{
1467 struct sk_buff *skb2;
1468 struct nlmsghdr *nlh2;
1469 u8 family, min, max;
1470 const char *typename;
1471 int ret = 0;
1472
1473 if (unlikely(protocol_failed(attr) ||
1474 attr[IPSET_ATTR_TYPENAME] == NULL ||
1475 attr[IPSET_ATTR_FAMILY] == NULL))
1476 return -IPSET_ERR_PROTOCOL;
1477
1478 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1479 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1480 ret = find_set_type_minmax(typename, family, &min, &max);
1481 if (ret)
1482 return ret;
1483
1484 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1485 if (skb2 == NULL)
1486 return -ENOMEM;
1487
15e47304 1488 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
a7b4f989
JK
1489 IPSET_CMD_TYPE);
1490 if (!nlh2)
1491 goto nlmsg_failure;
7cf7899d
DM
1492 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1493 nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
1494 nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
1495 nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
1496 nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min))
1497 goto nla_put_failure;
a7b4f989
JK
1498 nlmsg_end(skb2, nlh2);
1499
1500 pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
15e47304 1501 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
a7b4f989
JK
1502 if (ret < 0)
1503 return ret;
1504
1505 return 0;
1506
1507nla_put_failure:
1508 nlmsg_cancel(skb2, nlh2);
1509nlmsg_failure:
1510 kfree_skb(skb2);
1511 return -EMSGSIZE;
1512}
1513
1514/* Get protocol version */
1515
1516static const struct nla_policy
1517ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1518 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1519};
1520
1521static int
1522ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1523 const struct nlmsghdr *nlh,
1524 const struct nlattr * const attr[])
1525{
1526 struct sk_buff *skb2;
1527 struct nlmsghdr *nlh2;
1528 int ret = 0;
1529
1530 if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
1531 return -IPSET_ERR_PROTOCOL;
1532
1533 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1534 if (skb2 == NULL)
1535 return -ENOMEM;
1536
15e47304 1537 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
a7b4f989
JK
1538 IPSET_CMD_PROTOCOL);
1539 if (!nlh2)
1540 goto nlmsg_failure;
7cf7899d
DM
1541 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
1542 goto nla_put_failure;
a7b4f989
JK
1543 nlmsg_end(skb2, nlh2);
1544
15e47304 1545 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
a7b4f989
JK
1546 if (ret < 0)
1547 return ret;
1548
1549 return 0;
1550
1551nla_put_failure:
1552 nlmsg_cancel(skb2, nlh2);
1553nlmsg_failure:
1554 kfree_skb(skb2);
1555 return -EMSGSIZE;
1556}
1557
1558static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
d31f4d44
TB
1559 [IPSET_CMD_NONE] = {
1560 .call = ip_set_none,
1561 .attr_count = IPSET_ATTR_CMD_MAX,
1562 },
a7b4f989
JK
1563 [IPSET_CMD_CREATE] = {
1564 .call = ip_set_create,
1565 .attr_count = IPSET_ATTR_CMD_MAX,
1566 .policy = ip_set_create_policy,
1567 },
1568 [IPSET_CMD_DESTROY] = {
1569 .call = ip_set_destroy,
1570 .attr_count = IPSET_ATTR_CMD_MAX,
1571 .policy = ip_set_setname_policy,
1572 },
1573 [IPSET_CMD_FLUSH] = {
1574 .call = ip_set_flush,
1575 .attr_count = IPSET_ATTR_CMD_MAX,
1576 .policy = ip_set_setname_policy,
1577 },
1578 [IPSET_CMD_RENAME] = {
1579 .call = ip_set_rename,
1580 .attr_count = IPSET_ATTR_CMD_MAX,
1581 .policy = ip_set_setname2_policy,
1582 },
1583 [IPSET_CMD_SWAP] = {
1584 .call = ip_set_swap,
1585 .attr_count = IPSET_ATTR_CMD_MAX,
1586 .policy = ip_set_setname2_policy,
1587 },
1588 [IPSET_CMD_LIST] = {
1589 .call = ip_set_dump,
1590 .attr_count = IPSET_ATTR_CMD_MAX,
1591 .policy = ip_set_setname_policy,
1592 },
1593 [IPSET_CMD_SAVE] = {
1594 .call = ip_set_dump,
1595 .attr_count = IPSET_ATTR_CMD_MAX,
1596 .policy = ip_set_setname_policy,
1597 },
1598 [IPSET_CMD_ADD] = {
1599 .call = ip_set_uadd,
1600 .attr_count = IPSET_ATTR_CMD_MAX,
1601 .policy = ip_set_adt_policy,
1602 },
1603 [IPSET_CMD_DEL] = {
1604 .call = ip_set_udel,
1605 .attr_count = IPSET_ATTR_CMD_MAX,
1606 .policy = ip_set_adt_policy,
1607 },
1608 [IPSET_CMD_TEST] = {
1609 .call = ip_set_utest,
1610 .attr_count = IPSET_ATTR_CMD_MAX,
1611 .policy = ip_set_adt_policy,
1612 },
1613 [IPSET_CMD_HEADER] = {
1614 .call = ip_set_header,
1615 .attr_count = IPSET_ATTR_CMD_MAX,
1616 .policy = ip_set_setname_policy,
1617 },
1618 [IPSET_CMD_TYPE] = {
1619 .call = ip_set_type,
1620 .attr_count = IPSET_ATTR_CMD_MAX,
1621 .policy = ip_set_type_policy,
1622 },
1623 [IPSET_CMD_PROTOCOL] = {
1624 .call = ip_set_protocol,
1625 .attr_count = IPSET_ATTR_CMD_MAX,
1626 .policy = ip_set_protocol_policy,
1627 },
1628};
1629
1630static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1631 .name = "ip_set",
1632 .subsys_id = NFNL_SUBSYS_IPSET,
1633 .cb_count = IPSET_MSG_MAX,
1634 .cb = ip_set_netlink_subsys_cb,
1635};
1636
1637/* Interface to iptables/ip6tables */
1638
1639static int
1640ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1641{
95c96174 1642 unsigned int *op;
a7b4f989
JK
1643 void *data;
1644 int copylen = *len, ret = 0;
1645
df008c91 1646 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
a7b4f989
JK
1647 return -EPERM;
1648 if (optval != SO_IP_SET)
1649 return -EBADF;
95c96174 1650 if (*len < sizeof(unsigned int))
a7b4f989
JK
1651 return -EINVAL;
1652
1653 data = vmalloc(*len);
1654 if (!data)
1655 return -ENOMEM;
1656 if (copy_from_user(data, user, *len) != 0) {
1657 ret = -EFAULT;
1658 goto done;
1659 }
95c96174 1660 op = (unsigned int *) data;
a7b4f989
JK
1661
1662 if (*op < IP_SET_OP_VERSION) {
1663 /* Check the version at the beginning of operations */
1664 struct ip_set_req_version *req_version = data;
1665 if (req_version->version != IPSET_PROTOCOL) {
1666 ret = -EPROTO;
1667 goto done;
1668 }
1669 }
1670
1671 switch (*op) {
1672 case IP_SET_OP_VERSION: {
1673 struct ip_set_req_version *req_version = data;
1674
1675 if (*len != sizeof(struct ip_set_req_version)) {
1676 ret = -EINVAL;
1677 goto done;
1678 }
1679
1680 req_version->version = IPSET_PROTOCOL;
1681 ret = copy_to_user(user, req_version,
1682 sizeof(struct ip_set_req_version));
1683 goto done;
1684 }
1685 case IP_SET_OP_GET_BYNAME: {
1686 struct ip_set_req_get_set *req_get = data;
1687
1688 if (*len != sizeof(struct ip_set_req_get_set)) {
1689 ret = -EINVAL;
1690 goto done;
1691 }
1692 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1693 nfnl_lock();
1694 req_get->set.index = find_set_id(req_get->set.name);
1695 nfnl_unlock();
1696 goto copy;
1697 }
1698 case IP_SET_OP_GET_BYINDEX: {
1699 struct ip_set_req_get_set *req_get = data;
1700
1701 if (*len != sizeof(struct ip_set_req_get_set) ||
1702 req_get->set.index >= ip_set_max) {
1703 ret = -EINVAL;
1704 goto done;
1705 }
1706 nfnl_lock();
1707 strncpy(req_get->set.name,
1708 ip_set_list[req_get->set.index]
1709 ? ip_set_list[req_get->set.index]->name : "",
1710 IPSET_MAXNAMELEN);
1711 nfnl_unlock();
1712 goto copy;
1713 }
1714 default:
1715 ret = -EBADMSG;
1716 goto done;
1717 } /* end of switch(op) */
1718
1719copy:
1720 ret = copy_to_user(user, data, copylen);
1721
1722done:
1723 vfree(data);
1724 if (ret > 0)
1725 ret = 0;
1726 return ret;
1727}
1728
1729static struct nf_sockopt_ops so_set __read_mostly = {
1730 .pf = PF_INET,
1731 .get_optmin = SO_IP_SET,
1732 .get_optmax = SO_IP_SET + 1,
1733 .get = &ip_set_sockfn_get,
1734 .owner = THIS_MODULE,
1735};
1736
1737static int __init
1738ip_set_init(void)
1739{
1740 int ret;
1741
1742 if (max_sets)
1743 ip_set_max = max_sets;
1744 if (ip_set_max >= IPSET_INVALID_ID)
1745 ip_set_max = IPSET_INVALID_ID - 1;
1746
1747 ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max,
1748 GFP_KERNEL);
0a9ee813 1749 if (!ip_set_list)
a7b4f989 1750 return -ENOMEM;
a7b4f989
JK
1751
1752 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
1753 if (ret != 0) {
1754 pr_err("ip_set: cannot register with nfnetlink.\n");
1755 kfree(ip_set_list);
1756 return ret;
1757 }
1758 ret = nf_register_sockopt(&so_set);
1759 if (ret != 0) {
1760 pr_err("SO_SET registry failed: %d\n", ret);
1761 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1762 kfree(ip_set_list);
1763 return ret;
1764 }
1765
1766 pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
1767 return 0;
1768}
1769
1770static void __exit
1771ip_set_fini(void)
1772{
1773 /* There can't be any existing set */
1774 nf_unregister_sockopt(&so_set);
1775 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1776 kfree(ip_set_list);
1777 pr_debug("these are the famous last words\n");
1778}
1779
1780module_init(ip_set_init);
1781module_exit(ip_set_fini);
This page took 0.321546 seconds and 5 git commands to generate.