ipvs: convert sched_lock to spin lock
[deliverable/linux.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
9330419d 41#include <linux/nsproxy.h>
1da177e4 42#include <net/ip.h>
09571c7a
VB
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
14c85021 47#include <net/route.h>
1da177e4 48#include <net/sock.h>
9a812198 49#include <net/genetlink.h>
1da177e4
LT
50
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 56static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
1da177e4 61/* sysctl variables */
1da177e4
LT
62
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
7a4f0761
HS
72
73/* Protos */
578bc3ef 74static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
7a4f0761
HS
75
76
09571c7a
VB
77#ifdef CONFIG_IP_VS_IPV6
78/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
c24584c0
ED
79static bool __ip_vs_addr_is_local_v6(struct net *net,
80 const struct in6_addr *addr)
09571c7a 81{
4c9483b2
DM
82 struct flowi6 fl6 = {
83 .daddr = *addr,
09571c7a 84 };
c24584c0
ED
85 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
86 bool is_local;
09571c7a 87
c24584c0 88 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
09571c7a 89
c24584c0
ED
90 dst_release(dst);
91 return is_local;
09571c7a
VB
92}
93#endif
14e40546
SH
94
95#ifdef CONFIG_SYSCTL
1da177e4 96/*
af9debd4
JA
97 * update_defense_level is called from keventd and from sysctl,
98 * so it needs to protect itself from softirqs
1da177e4 99 */
9330419d 100static void update_defense_level(struct netns_ipvs *ipvs)
1da177e4
LT
101{
102 struct sysinfo i;
103 static int old_secure_tcp = 0;
104 int availmem;
105 int nomem;
106 int to_change = -1;
107
108 /* we only count free and buffered memory (in pages) */
109 si_meminfo(&i);
110 availmem = i.freeram + i.bufferram;
111 /* however in linux 2.5 the i.bufferram is total page cache size,
112 we need adjust it */
113 /* si_swapinfo(&i); */
114 /* availmem = availmem - (i.totalswap - i.freeswap); */
115
a0840e2e 116 nomem = (availmem < ipvs->sysctl_amemthresh);
1da177e4 117
af9debd4
JA
118 local_bh_disable();
119
1da177e4 120 /* drop_entry */
a0840e2e
HS
121 spin_lock(&ipvs->dropentry_lock);
122 switch (ipvs->sysctl_drop_entry) {
1da177e4 123 case 0:
a0840e2e 124 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
125 break;
126 case 1:
127 if (nomem) {
a0840e2e
HS
128 atomic_set(&ipvs->dropentry, 1);
129 ipvs->sysctl_drop_entry = 2;
1da177e4 130 } else {
a0840e2e 131 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
132 }
133 break;
134 case 2:
135 if (nomem) {
a0840e2e 136 atomic_set(&ipvs->dropentry, 1);
1da177e4 137 } else {
a0840e2e
HS
138 atomic_set(&ipvs->dropentry, 0);
139 ipvs->sysctl_drop_entry = 1;
1da177e4
LT
140 };
141 break;
142 case 3:
a0840e2e 143 atomic_set(&ipvs->dropentry, 1);
1da177e4
LT
144 break;
145 }
a0840e2e 146 spin_unlock(&ipvs->dropentry_lock);
1da177e4
LT
147
148 /* drop_packet */
a0840e2e
HS
149 spin_lock(&ipvs->droppacket_lock);
150 switch (ipvs->sysctl_drop_packet) {
1da177e4 151 case 0:
a0840e2e 152 ipvs->drop_rate = 0;
1da177e4
LT
153 break;
154 case 1:
155 if (nomem) {
a0840e2e
HS
156 ipvs->drop_rate = ipvs->drop_counter
157 = ipvs->sysctl_amemthresh /
158 (ipvs->sysctl_amemthresh-availmem);
159 ipvs->sysctl_drop_packet = 2;
1da177e4 160 } else {
a0840e2e 161 ipvs->drop_rate = 0;
1da177e4
LT
162 }
163 break;
164 case 2:
165 if (nomem) {
a0840e2e
HS
166 ipvs->drop_rate = ipvs->drop_counter
167 = ipvs->sysctl_amemthresh /
168 (ipvs->sysctl_amemthresh-availmem);
1da177e4 169 } else {
a0840e2e
HS
170 ipvs->drop_rate = 0;
171 ipvs->sysctl_drop_packet = 1;
1da177e4
LT
172 }
173 break;
174 case 3:
a0840e2e 175 ipvs->drop_rate = ipvs->sysctl_am_droprate;
1da177e4
LT
176 break;
177 }
a0840e2e 178 spin_unlock(&ipvs->droppacket_lock);
1da177e4
LT
179
180 /* secure_tcp */
a0840e2e
HS
181 spin_lock(&ipvs->securetcp_lock);
182 switch (ipvs->sysctl_secure_tcp) {
1da177e4
LT
183 case 0:
184 if (old_secure_tcp >= 2)
185 to_change = 0;
186 break;
187 case 1:
188 if (nomem) {
189 if (old_secure_tcp < 2)
190 to_change = 1;
a0840e2e 191 ipvs->sysctl_secure_tcp = 2;
1da177e4
LT
192 } else {
193 if (old_secure_tcp >= 2)
194 to_change = 0;
195 }
196 break;
197 case 2:
198 if (nomem) {
199 if (old_secure_tcp < 2)
200 to_change = 1;
201 } else {
202 if (old_secure_tcp >= 2)
203 to_change = 0;
a0840e2e 204 ipvs->sysctl_secure_tcp = 1;
1da177e4
LT
205 }
206 break;
207 case 3:
208 if (old_secure_tcp < 2)
209 to_change = 1;
210 break;
211 }
a0840e2e 212 old_secure_tcp = ipvs->sysctl_secure_tcp;
1da177e4 213 if (to_change >= 0)
9330419d 214 ip_vs_protocol_timeout_change(ipvs,
a0840e2e
HS
215 ipvs->sysctl_secure_tcp > 1);
216 spin_unlock(&ipvs->securetcp_lock);
af9debd4
JA
217
218 local_bh_enable();
1da177e4
LT
219}
220
221
222/*
223 * Timer for checking the defense
224 */
225#define DEFENSE_TIMER_PERIOD 1*HZ
1da177e4 226
c4028958 227static void defense_work_handler(struct work_struct *work)
1da177e4 228{
f6340ee0
HS
229 struct netns_ipvs *ipvs =
230 container_of(work, struct netns_ipvs, defense_work.work);
9330419d
HS
231
232 update_defense_level(ipvs);
a0840e2e 233 if (atomic_read(&ipvs->dropentry))
f6340ee0
HS
234 ip_vs_random_dropentry(ipvs->net);
235 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
1da177e4 236}
14e40546 237#endif
1da177e4
LT
238
239int
240ip_vs_use_count_inc(void)
241{
242 return try_module_get(THIS_MODULE);
243}
244
245void
246ip_vs_use_count_dec(void)
247{
248 module_put(THIS_MODULE);
249}
250
251
252/*
253 * Hash table: for virtual service lookups
254 */
255#define IP_VS_SVC_TAB_BITS 8
256#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258
259/* the service table hashed by <protocol, addr, port> */
260static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261/* the service table hashed by fwmark */
262static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263
1da177e4
LT
264
265/*
266 * Returns hash value for virtual service
267 */
95c96174
ED
268static inline unsigned int
269ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
fc723250 270 const union nf_inet_addr *addr, __be16 port)
1da177e4 271{
95c96174 272 register unsigned int porth = ntohs(port);
b18610de 273 __be32 addr_fold = addr->ip;
e9836f24 274 __u32 ahash;
1da177e4 275
b18610de
JV
276#ifdef CONFIG_IP_VS_IPV6
277 if (af == AF_INET6)
278 addr_fold = addr->ip6[0]^addr->ip6[1]^
279 addr->ip6[2]^addr->ip6[3];
280#endif
e9836f24
JA
281 ahash = ntohl(addr_fold);
282 ahash ^= ((size_t) net >> 8);
b18610de 283
e9836f24
JA
284 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
285 IP_VS_SVC_TAB_MASK;
1da177e4
LT
286}
287
288/*
289 * Returns hash value of fwmark for virtual service lookup
290 */
95c96174 291static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
1da177e4 292{
fc723250 293 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
1da177e4
LT
294}
295
296/*
fc723250 297 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
1da177e4
LT
298 * or in the ip_vs_svc_fwm_table by fwmark.
299 * Should be called with locked tables.
300 */
301static int ip_vs_svc_hash(struct ip_vs_service *svc)
302{
95c96174 303 unsigned int hash;
1da177e4
LT
304
305 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
306 pr_err("%s(): request for already hashed, called from %pF\n",
307 __func__, __builtin_return_address(0));
1da177e4
LT
308 return 0;
309 }
310
311 if (svc->fwmark == 0) {
312 /*
fc723250 313 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
1da177e4 314 */
fc723250
HS
315 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
316 &svc->addr, svc->port);
1da177e4
LT
317 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
318 } else {
319 /*
fc723250 320 * Hash it by fwmark in svc_fwm_table
1da177e4 321 */
fc723250 322 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
1da177e4
LT
323 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
324 }
325
326 svc->flags |= IP_VS_SVC_F_HASHED;
327 /* increase its refcnt because it is referenced by the svc table */
328 atomic_inc(&svc->refcnt);
329 return 1;
330}
331
332
333/*
fc723250 334 * Unhashes a service from svc_table / svc_fwm_table.
1da177e4
LT
335 * Should be called with locked tables.
336 */
337static int ip_vs_svc_unhash(struct ip_vs_service *svc)
338{
339 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
340 pr_err("%s(): request for unhash flagged, called from %pF\n",
341 __func__, __builtin_return_address(0));
1da177e4
LT
342 return 0;
343 }
344
345 if (svc->fwmark == 0) {
fc723250 346 /* Remove it from the svc_table table */
1da177e4
LT
347 list_del(&svc->s_list);
348 } else {
fc723250 349 /* Remove it from the svc_fwm_table table */
1da177e4
LT
350 list_del(&svc->f_list);
351 }
352
353 svc->flags &= ~IP_VS_SVC_F_HASHED;
354 atomic_dec(&svc->refcnt);
355 return 1;
356}
357
358
359/*
fc723250 360 * Get service by {netns, proto,addr,port} in the service table.
1da177e4 361 */
b18610de 362static inline struct ip_vs_service *
fc723250
HS
363__ip_vs_service_find(struct net *net, int af, __u16 protocol,
364 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4 365{
95c96174 366 unsigned int hash;
1da177e4
LT
367 struct ip_vs_service *svc;
368
369 /* Check for "full" addressed entries */
fc723250 370 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
1da177e4
LT
371
372 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
373 if ((svc->af == af)
374 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4 375 && (svc->port == vport)
fc723250
HS
376 && (svc->protocol == protocol)
377 && net_eq(svc->net, net)) {
1da177e4 378 /* HIT */
1da177e4
LT
379 return svc;
380 }
381 }
382
383 return NULL;
384}
385
386
387/*
388 * Get service by {fwmark} in the service table.
389 */
b18610de 390static inline struct ip_vs_service *
fc723250 391__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
1da177e4 392{
95c96174 393 unsigned int hash;
1da177e4
LT
394 struct ip_vs_service *svc;
395
396 /* Check for fwmark addressed entries */
fc723250 397 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
1da177e4
LT
398
399 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
fc723250
HS
400 if (svc->fwmark == fwmark && svc->af == af
401 && net_eq(svc->net, net)) {
1da177e4 402 /* HIT */
1da177e4
LT
403 return svc;
404 }
405 }
406
407 return NULL;
408}
409
410struct ip_vs_service *
fc723250 411ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
3c2e0505 412 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
413{
414 struct ip_vs_service *svc;
763f8d0e 415 struct netns_ipvs *ipvs = net_ipvs(net);
3c2e0505 416
1da177e4
LT
417 read_lock(&__ip_vs_svc_lock);
418
419 /*
420 * Check the table hashed by fwmark first
421 */
097fc76a
JA
422 if (fwmark) {
423 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
424 if (svc)
425 goto out;
426 }
1da177e4
LT
427
428 /*
429 * Check the table hashed by <protocol,addr,port>
430 * for "full" addressed entries
431 */
fc723250 432 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
1da177e4
LT
433
434 if (svc == NULL
435 && protocol == IPPROTO_TCP
763f8d0e 436 && atomic_read(&ipvs->ftpsvc_counter)
1da177e4
LT
437 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
438 /*
439 * Check if ftp service entry exists, the packet
440 * might belong to FTP data connections.
441 */
fc723250 442 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
1da177e4
LT
443 }
444
445 if (svc == NULL
763f8d0e 446 && atomic_read(&ipvs->nullsvc_counter)) {
1da177e4
LT
447 /*
448 * Check if the catch-all port (port zero) exists
449 */
fc723250 450 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
1da177e4
LT
451 }
452
453 out:
26c15cfd
JA
454 if (svc)
455 atomic_inc(&svc->usecnt);
1da177e4
LT
456 read_unlock(&__ip_vs_svc_lock);
457
3c2e0505
JV
458 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
459 fwmark, ip_vs_proto_name(protocol),
460 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
461 svc ? "hit" : "not hit");
1da177e4
LT
462
463 return svc;
464}
465
466
467static inline void
468__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
469{
470 atomic_inc(&svc->refcnt);
471 dest->svc = svc;
472}
473
26c15cfd 474static void
1da177e4
LT
475__ip_vs_unbind_svc(struct ip_vs_dest *dest)
476{
477 struct ip_vs_service *svc = dest->svc;
478
479 dest->svc = NULL;
26c15cfd
JA
480 if (atomic_dec_and_test(&svc->refcnt)) {
481 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
482 svc->fwmark,
483 IP_VS_DBG_ADDR(svc->af, &svc->addr),
484 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 485 free_percpu(svc->stats.cpustats);
1da177e4 486 kfree(svc);
26c15cfd 487 }
1da177e4
LT
488}
489
490
491/*
492 * Returns hash value for real service
493 */
95c96174 494static inline unsigned int ip_vs_rs_hashkey(int af,
7937df15
JV
495 const union nf_inet_addr *addr,
496 __be16 port)
1da177e4 497{
95c96174 498 register unsigned int porth = ntohs(port);
7937df15
JV
499 __be32 addr_fold = addr->ip;
500
501#ifdef CONFIG_IP_VS_IPV6
502 if (af == AF_INET6)
503 addr_fold = addr->ip6[0]^addr->ip6[1]^
504 addr->ip6[2]^addr->ip6[3];
505#endif
1da177e4 506
7937df15 507 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
508 & IP_VS_RTAB_MASK;
509}
510
276472ea
JA
511/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
512static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
1da177e4 513{
95c96174 514 unsigned int hash;
1da177e4 515
276472ea
JA
516 if (dest->in_rs_table)
517 return;
1da177e4
LT
518
519 /*
520 * Hash by proto,addr,port,
521 * which are the parameters of the real service.
522 */
7937df15
JV
523 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
524
276472ea
JA
525 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
526 dest->in_rs_table = 1;
1da177e4
LT
527}
528
276472ea
JA
529/* Unhash ip_vs_dest from rs_table. */
530static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
1da177e4
LT
531{
532 /*
fc723250 533 * Remove it from the rs_table table.
1da177e4 534 */
276472ea
JA
535 if (dest->in_rs_table) {
536 hlist_del_rcu(&dest->d_list);
537 dest->in_rs_table = 0;
1da177e4 538 }
1da177e4
LT
539}
540
276472ea
JA
541/* Check if real service by <proto,addr,port> is present */
542bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
543 const union nf_inet_addr *daddr, __be16 dport)
1da177e4 544{
fc723250 545 struct netns_ipvs *ipvs = net_ipvs(net);
95c96174 546 unsigned int hash;
1da177e4
LT
547 struct ip_vs_dest *dest;
548
276472ea 549 /* Check for "full" addressed entries */
7937df15 550 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4 551
276472ea
JA
552 rcu_read_lock();
553 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
554 if (dest->port == dport &&
555 dest->af == af &&
556 ip_vs_addr_equal(af, &dest->addr, daddr) &&
557 (dest->protocol == protocol || dest->vfwmark)) {
1da177e4 558 /* HIT */
276472ea
JA
559 rcu_read_unlock();
560 return true;
1da177e4
LT
561 }
562 }
276472ea 563 rcu_read_unlock();
1da177e4 564
276472ea 565 return false;
1da177e4
LT
566}
567
568/*
569 * Lookup destination by {addr,port} in the given service
570 */
571static struct ip_vs_dest *
7937df15
JV
572ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
573 __be16 dport)
1da177e4
LT
574{
575 struct ip_vs_dest *dest;
576
577 /*
578 * Find the destination for the given service
579 */
580 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
581 if ((dest->af == svc->af)
582 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
583 && (dest->port == dport)) {
1da177e4
LT
584 /* HIT */
585 return dest;
586 }
587 }
588
589 return NULL;
590}
591
1e356f9c
RB
592/*
593 * Find destination by {daddr,dport,vaddr,protocol}
594 * Cretaed to be used in ip_vs_process_message() in
595 * the backup synchronization daemon. It finds the
596 * destination to be bound to the received connection
597 * on the backup.
1e356f9c 598 */
fc723250
HS
599struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
600 const union nf_inet_addr *daddr,
7937df15
JV
601 __be16 dport,
602 const union nf_inet_addr *vaddr,
52793dbe
JA
603 __be16 vport, __u16 protocol, __u32 fwmark,
604 __u32 flags)
1e356f9c
RB
605{
606 struct ip_vs_dest *dest;
607 struct ip_vs_service *svc;
52793dbe 608 __be16 port = dport;
1e356f9c 609
fc723250 610 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
1e356f9c
RB
611 if (!svc)
612 return NULL;
52793dbe
JA
613 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
614 port = 0;
615 dest = ip_vs_lookup_dest(svc, daddr, port);
616 if (!dest)
617 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
1e356f9c 618 if (dest)
fca9c20a 619 ip_vs_dest_hold(dest);
1e356f9c
RB
620 ip_vs_service_put(svc);
621 return dest;
622}
1da177e4 623
026ace06
JA
624void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
625{
626 struct ip_vs_dest_dst *dest_dst = container_of(head,
627 struct ip_vs_dest_dst,
628 rcu_head);
629
630 dst_release(dest_dst->dst_cache);
631 kfree(dest_dst);
632}
633
634/* Release dest_dst and dst_cache for dest in user context */
d1deae4d
JA
635static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
636{
026ace06 637 struct ip_vs_dest_dst *old;
d1deae4d 638
026ace06
JA
639 old = rcu_dereference_protected(dest->dest_dst, 1);
640 if (old) {
641 RCU_INIT_POINTER(dest->dest_dst, NULL);
642 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
643 }
d1deae4d
JA
644}
645
1da177e4
LT
646/*
647 * Lookup dest by {svc,addr,port} in the destination trash.
648 * The destination trash is used to hold the destinations that are removed
649 * from the service table but are still referenced by some conn entries.
650 * The reason to add the destination trash is when the dest is temporary
651 * down (either by administrator or by monitor program), the dest can be
652 * picked back from the trash, the remaining connections to the dest can
653 * continue, and the counting information of the dest is also useful for
654 * scheduling.
655 */
656static struct ip_vs_dest *
7937df15
JV
657ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
658 __be16 dport)
1da177e4 659{
578bc3ef 660 struct ip_vs_dest *dest;
f2431e6e 661 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
662
663 /*
664 * Find the destination in trash
665 */
578bc3ef
JA
666 spin_lock_bh(&ipvs->dest_trash_lock);
667 list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
7937df15
JV
668 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
669 "dest->refcnt=%d\n",
670 dest->vfwmark,
671 IP_VS_DBG_ADDR(svc->af, &dest->addr),
672 ntohs(dest->port),
673 atomic_read(&dest->refcnt));
578bc3ef
JA
674 /* We can not reuse dest while in grace period
675 * because conns still can use dest->svc
676 */
677 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
678 continue;
7937df15
JV
679 if (dest->af == svc->af &&
680 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
681 dest->port == dport &&
682 dest->vfwmark == svc->fwmark &&
683 dest->protocol == svc->protocol &&
684 (svc->fwmark ||
7937df15 685 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
686 dest->vport == svc->port))) {
687 /* HIT */
578bc3ef
JA
688 list_del(&dest->t_list);
689 ip_vs_dest_hold(dest);
690 goto out;
1da177e4
LT
691 }
692 }
693
578bc3ef
JA
694 dest = NULL;
695
696out:
697 spin_unlock_bh(&ipvs->dest_trash_lock);
698
699 return dest;
1da177e4
LT
700}
701
578bc3ef
JA
702static void ip_vs_dest_free(struct ip_vs_dest *dest)
703{
704 __ip_vs_dst_cache_reset(dest);
705 __ip_vs_unbind_svc(dest);
706 free_percpu(dest->stats.cpustats);
707 kfree(dest);
708}
1da177e4
LT
709
710/*
711 * Clean up all the destinations in the trash
712 * Called by the ip_vs_control_cleanup()
713 *
714 * When the ip_vs_control_clearup is activated by ipvs module exit,
715 * the service tables must have been flushed and all the connections
716 * are expired, and the refcnt of each destination in the trash must
578bc3ef 717 * be 0, so we simply release them here.
1da177e4 718 */
f2431e6e 719static void ip_vs_trash_cleanup(struct net *net)
1da177e4
LT
720{
721 struct ip_vs_dest *dest, *nxt;
f2431e6e 722 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 723
578bc3ef
JA
724 del_timer_sync(&ipvs->dest_trash_timer);
725 /* No need to use dest_trash_lock */
726 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
727 list_del(&dest->t_list);
728 ip_vs_dest_free(dest);
1da177e4
LT
729 }
730}
731
55a3d4e1
JA
732static void
733ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
734{
735#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
55a3d4e1
JA
736
737 spin_lock_bh(&src->lock);
738
739 IP_VS_SHOW_STATS_COUNTER(conns);
740 IP_VS_SHOW_STATS_COUNTER(inpkts);
741 IP_VS_SHOW_STATS_COUNTER(outpkts);
742 IP_VS_SHOW_STATS_COUNTER(inbytes);
743 IP_VS_SHOW_STATS_COUNTER(outbytes);
744
ea9f22cc 745 ip_vs_read_estimator(dst, src);
55a3d4e1
JA
746
747 spin_unlock_bh(&src->lock);
748}
1da177e4
LT
749
750static void
751ip_vs_zero_stats(struct ip_vs_stats *stats)
752{
753 spin_lock_bh(&stats->lock);
e93615d0 754
55a3d4e1
JA
755 /* get current counters as zero point, rates are zeroed */
756
757#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
55a3d4e1
JA
758
759 IP_VS_ZERO_STATS_COUNTER(conns);
760 IP_VS_ZERO_STATS_COUNTER(inpkts);
761 IP_VS_ZERO_STATS_COUNTER(outpkts);
762 IP_VS_ZERO_STATS_COUNTER(inbytes);
763 IP_VS_ZERO_STATS_COUNTER(outbytes);
764
1da177e4 765 ip_vs_zero_estimator(stats);
e93615d0 766
3a14a313 767 spin_unlock_bh(&stats->lock);
1da177e4
LT
768}
769
770/*
771 * Update a destination in the given service
772 */
773static void
26c15cfd
JA
774__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
775 struct ip_vs_dest_user_kern *udest, int add)
1da177e4 776{
fc723250 777 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
778 int conn_flags;
779
780 /* set the weight and the flags */
781 atomic_set(&dest->weight, udest->weight);
3575792e
JA
782 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
783 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4 784
1da177e4 785 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 786 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
787 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
788 } else {
789 /*
fc723250 790 * Put the real service in rs_table if not present.
1da177e4
LT
791 * For now only for NAT!
792 */
fc723250 793 ip_vs_rs_hash(ipvs, dest);
1da177e4
LT
794 }
795 atomic_set(&dest->conn_flags, conn_flags);
796
797 /* bind the service */
798 if (!dest->svc) {
799 __ip_vs_bind_svc(dest, svc);
800 } else {
801 if (dest->svc != svc) {
802 __ip_vs_unbind_svc(dest);
803 ip_vs_zero_stats(&dest->stats);
804 __ip_vs_bind_svc(dest, svc);
805 }
806 }
807
808 /* set the dest status flags */
809 dest->flags |= IP_VS_DEST_F_AVAILABLE;
810
811 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
812 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
813 dest->u_threshold = udest->u_threshold;
814 dest->l_threshold = udest->l_threshold;
26c15cfd 815
ff75f40f 816 spin_lock_bh(&dest->dst_lock);
d1deae4d 817 __ip_vs_dst_cache_reset(dest);
ff75f40f 818 spin_unlock_bh(&dest->dst_lock);
fc604767 819
26c15cfd 820 if (add)
6ef757f9 821 ip_vs_start_estimator(svc->net, &dest->stats);
26c15cfd
JA
822
823 write_lock_bh(&__ip_vs_svc_lock);
824
825 /* Wait until all other svc users go away */
826 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
827
828 if (add) {
829 list_add(&dest->n_list, &svc->destinations);
830 svc->num_dests++;
6b6df466
JA
831 if (svc->scheduler->add_dest)
832 svc->scheduler->add_dest(svc, dest);
833 } else {
834 if (svc->scheduler->upd_dest)
835 svc->scheduler->upd_dest(svc, dest);
26c15cfd
JA
836 }
837
838 /* call the update_service, because server weight may be changed */
839 if (svc->scheduler->update_service)
840 svc->scheduler->update_service(svc);
841
842 write_unlock_bh(&__ip_vs_svc_lock);
1da177e4
LT
843}
844
845
846/*
847 * Create a destination for the given service
848 */
849static int
c860c6b1 850ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
851 struct ip_vs_dest **dest_p)
852{
853 struct ip_vs_dest *dest;
95c96174 854 unsigned int atype;
1da177e4
LT
855
856 EnterFunction(2);
857
09571c7a
VB
858#ifdef CONFIG_IP_VS_IPV6
859 if (svc->af == AF_INET6) {
860 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
861 if ((!(atype & IPV6_ADDR_UNICAST) ||
862 atype & IPV6_ADDR_LINKLOCAL) &&
4a98480b 863 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
09571c7a
VB
864 return -EINVAL;
865 } else
866#endif
867 {
4a98480b 868 atype = inet_addr_type(svc->net, udest->addr.ip);
09571c7a
VB
869 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
870 return -EINVAL;
871 }
1da177e4 872
dee06e47 873 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
0a9ee813 874 if (dest == NULL)
1da177e4 875 return -ENOMEM;
0a9ee813 876
b17fc996 877 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
0a9ee813 878 if (!dest->stats.cpustats)
b17fc996 879 goto err_alloc;
1da177e4 880
c860c6b1 881 dest->af = svc->af;
1da177e4 882 dest->protocol = svc->protocol;
c860c6b1 883 dest->vaddr = svc->addr;
1da177e4
LT
884 dest->vport = svc->port;
885 dest->vfwmark = svc->fwmark;
c860c6b1 886 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
887 dest->port = udest->port;
888
889 atomic_set(&dest->activeconns, 0);
890 atomic_set(&dest->inactconns, 0);
891 atomic_set(&dest->persistconns, 0);
26c15cfd 892 atomic_set(&dest->refcnt, 1);
1da177e4 893
276472ea 894 INIT_HLIST_NODE(&dest->d_list);
1da177e4
LT
895 spin_lock_init(&dest->dst_lock);
896 spin_lock_init(&dest->stats.lock);
26c15cfd 897 __ip_vs_update_dest(svc, dest, udest, 1);
1da177e4
LT
898
899 *dest_p = dest;
900
901 LeaveFunction(2);
902 return 0;
b17fc996
HS
903
904err_alloc:
905 kfree(dest);
906 return -ENOMEM;
1da177e4
LT
907}
908
909
910/*
911 * Add a destination into an existing service
912 */
913static int
c860c6b1 914ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
915{
916 struct ip_vs_dest *dest;
c860c6b1 917 union nf_inet_addr daddr;
014d730d 918 __be16 dport = udest->port;
1da177e4
LT
919 int ret;
920
921 EnterFunction(2);
922
923 if (udest->weight < 0) {
1e3e238e 924 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
925 return -ERANGE;
926 }
927
928 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
929 pr_err("%s(): lower threshold is higher than upper threshold\n",
930 __func__);
1da177e4
LT
931 return -ERANGE;
932 }
933
c860c6b1
JV
934 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
935
1da177e4
LT
936 /*
937 * Check if the dest already exists in the list
938 */
7937df15
JV
939 dest = ip_vs_lookup_dest(svc, &daddr, dport);
940
1da177e4 941 if (dest != NULL) {
1e3e238e 942 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
943 return -EEXIST;
944 }
945
946 /*
947 * Check if the dest already exists in the trash and
948 * is from the same service
949 */
7937df15
JV
950 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
951
1da177e4 952 if (dest != NULL) {
cfc78c5a
JV
953 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
954 "dest->refcnt=%d, service %u/%s:%u\n",
955 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
956 atomic_read(&dest->refcnt),
957 dest->vfwmark,
958 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
959 ntohs(dest->vport));
960
26c15cfd
JA
961 __ip_vs_update_dest(svc, dest, udest, 1);
962 ret = 0;
963 } else {
1da177e4 964 /*
26c15cfd 965 * Allocate and initialize the dest structure
1da177e4 966 */
26c15cfd 967 ret = ip_vs_new_dest(svc, udest, &dest);
1da177e4 968 }
1da177e4
LT
969 LeaveFunction(2);
970
26c15cfd 971 return ret;
1da177e4
LT
972}
973
974
975/*
976 * Edit a destination in the given service
977 */
978static int
c860c6b1 979ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
980{
981 struct ip_vs_dest *dest;
c860c6b1 982 union nf_inet_addr daddr;
014d730d 983 __be16 dport = udest->port;
1da177e4
LT
984
985 EnterFunction(2);
986
987 if (udest->weight < 0) {
1e3e238e 988 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
989 return -ERANGE;
990 }
991
992 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
993 pr_err("%s(): lower threshold is higher than upper threshold\n",
994 __func__);
1da177e4
LT
995 return -ERANGE;
996 }
997
c860c6b1
JV
998 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
999
1da177e4
LT
1000 /*
1001 * Lookup the destination list
1002 */
7937df15
JV
1003 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1004
1da177e4 1005 if (dest == NULL) {
1e3e238e 1006 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
1007 return -ENOENT;
1008 }
1009
26c15cfd 1010 __ip_vs_update_dest(svc, dest, udest, 0);
1da177e4
LT
1011 LeaveFunction(2);
1012
1013 return 0;
1014}
1015
578bc3ef
JA
1016static void ip_vs_dest_wait_readers(struct rcu_head *head)
1017{
1018 struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
1019 rcu_head);
1020
1021 /* End of grace period after unlinking */
1022 clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
1023}
1024
1da177e4
LT
1025
1026/*
1027 * Delete a destination (must be already unlinked from the service)
1028 */
578bc3ef
JA
1029static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
1030 bool cleanup)
1da177e4 1031{
a0840e2e
HS
1032 struct netns_ipvs *ipvs = net_ipvs(net);
1033
6ef757f9 1034 ip_vs_stop_estimator(net, &dest->stats);
1da177e4
LT
1035
1036 /*
1037 * Remove it from the d-linked list with the real services.
1038 */
1da177e4 1039 ip_vs_rs_unhash(dest);
1da177e4 1040
578bc3ef
JA
1041 if (!cleanup) {
1042 set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
1043 call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
1da177e4 1044 }
578bc3ef
JA
1045
1046 spin_lock_bh(&ipvs->dest_trash_lock);
1047 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1048 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1049 atomic_read(&dest->refcnt));
1050 if (list_empty(&ipvs->dest_trash) && !cleanup)
1051 mod_timer(&ipvs->dest_trash_timer,
1052 jiffies + IP_VS_DEST_TRASH_PERIOD);
1053 /* dest lives in trash without reference */
1054 list_add(&dest->t_list, &ipvs->dest_trash);
1055 spin_unlock_bh(&ipvs->dest_trash_lock);
1056 ip_vs_dest_put(dest);
1da177e4
LT
1057}
1058
1059
1060/*
1061 * Unlink a destination from the given service
1062 */
1063static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1064 struct ip_vs_dest *dest,
1065 int svcupd)
1066{
1067 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1068
1069 /*
1070 * Remove it from the d-linked destination list.
1071 */
1072 list_del(&dest->n_list);
1073 svc->num_dests--;
82dfb6f3 1074
6b6df466
JA
1075 if (svcupd && svc->scheduler->del_dest)
1076 svc->scheduler->del_dest(svc, dest);
1077
82dfb6f3
SW
1078 /*
1079 * Call the update_service function of its scheduler
1080 */
1081 if (svcupd && svc->scheduler->update_service)
1082 svc->scheduler->update_service(svc);
1da177e4
LT
1083}
1084
1085
1086/*
1087 * Delete a destination server in the given service
1088 */
1089static int
c860c6b1 1090ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1091{
1092 struct ip_vs_dest *dest;
014d730d 1093 __be16 dport = udest->port;
1da177e4
LT
1094
1095 EnterFunction(2);
1096
7937df15 1097 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1098
1da177e4 1099 if (dest == NULL) {
1e3e238e 1100 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1101 return -ENOENT;
1102 }
1103
1104 write_lock_bh(&__ip_vs_svc_lock);
1105
1106 /*
1107 * Wait until all other svc users go away.
1108 */
26c15cfd 1109 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1110
1111 /*
1112 * Unlink dest from the service
1113 */
1114 __ip_vs_unlink_dest(svc, dest, 1);
1115
1116 write_unlock_bh(&__ip_vs_svc_lock);
1117
1118 /*
1119 * Delete the destination
1120 */
578bc3ef 1121 __ip_vs_del_dest(svc->net, dest, false);
1da177e4
LT
1122
1123 LeaveFunction(2);
1124
1125 return 0;
1126}
1127
578bc3ef
JA
1128static void ip_vs_dest_trash_expire(unsigned long data)
1129{
1130 struct net *net = (struct net *) data;
1131 struct netns_ipvs *ipvs = net_ipvs(net);
1132 struct ip_vs_dest *dest, *next;
1133
1134 spin_lock(&ipvs->dest_trash_lock);
1135 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1136 /* Skip if dest is in grace period */
1137 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
1138 continue;
1139 if (atomic_read(&dest->refcnt) > 0)
1140 continue;
1141 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1142 dest->vfwmark,
1143 IP_VS_DBG_ADDR(dest->svc->af, &dest->addr),
1144 ntohs(dest->port));
1145 list_del(&dest->t_list);
1146 ip_vs_dest_free(dest);
1147 }
1148 if (!list_empty(&ipvs->dest_trash))
1149 mod_timer(&ipvs->dest_trash_timer,
1150 jiffies + IP_VS_DEST_TRASH_PERIOD);
1151 spin_unlock(&ipvs->dest_trash_lock);
1152}
1da177e4
LT
1153
1154/*
1155 * Add a service into the service hash table
1156 */
1157static int
fc723250 1158ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
c860c6b1 1159 struct ip_vs_service **svc_p)
1da177e4
LT
1160{
1161 int ret = 0;
1162 struct ip_vs_scheduler *sched = NULL;
0d1e71b0 1163 struct ip_vs_pe *pe = NULL;
1da177e4 1164 struct ip_vs_service *svc = NULL;
a0840e2e 1165 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
1166
1167 /* increase the module use count */
1168 ip_vs_use_count_inc();
1169
1170 /* Lookup the scheduler by 'u->sched_name' */
1171 sched = ip_vs_scheduler_get(u->sched_name);
1172 if (sched == NULL) {
1e3e238e 1173 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4 1174 ret = -ENOENT;
6e08bfb8 1175 goto out_err;
1da177e4
LT
1176 }
1177
0d1e71b0 1178 if (u->pe_name && *u->pe_name) {
e9e5eee8 1179 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1180 if (pe == NULL) {
1181 pr_info("persistence engine module ip_vs_pe_%s "
1182 "not found\n", u->pe_name);
1183 ret = -ENOENT;
1184 goto out_err;
1185 }
1186 }
1187
f94fd041 1188#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1189 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1190 ret = -EINVAL;
1191 goto out_err;
f94fd041
JV
1192 }
1193#endif
1194
dee06e47 1195 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1196 if (svc == NULL) {
1e3e238e 1197 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1198 ret = -ENOMEM;
1199 goto out_err;
1200 }
b17fc996 1201 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
0a54e939
JL
1202 if (!svc->stats.cpustats) {
1203 ret = -ENOMEM;
b17fc996 1204 goto out_err;
0a54e939 1205 }
1da177e4
LT
1206
1207 /* I'm the first user of the service */
26c15cfd 1208 atomic_set(&svc->usecnt, 0);
1da177e4
LT
1209 atomic_set(&svc->refcnt, 0);
1210
c860c6b1 1211 svc->af = u->af;
1da177e4 1212 svc->protocol = u->protocol;
c860c6b1 1213 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1214 svc->port = u->port;
1215 svc->fwmark = u->fwmark;
1216 svc->flags = u->flags;
1217 svc->timeout = u->timeout * HZ;
1218 svc->netmask = u->netmask;
fc723250 1219 svc->net = net;
1da177e4
LT
1220
1221 INIT_LIST_HEAD(&svc->destinations);
ba3a3ce1 1222 spin_lock_init(&svc->sched_lock);
1da177e4
LT
1223 spin_lock_init(&svc->stats.lock);
1224
1225 /* Bind the scheduler */
1226 ret = ip_vs_bind_scheduler(svc, sched);
1227 if (ret)
1228 goto out_err;
1229 sched = NULL;
1230
0d1e71b0
SH
1231 /* Bind the ct retriever */
1232 ip_vs_bind_pe(svc, pe);
1233 pe = NULL;
1234
1da177e4
LT
1235 /* Update the virtual service counters */
1236 if (svc->port == FTPPORT)
763f8d0e 1237 atomic_inc(&ipvs->ftpsvc_counter);
1da177e4 1238 else if (svc->port == 0)
763f8d0e 1239 atomic_inc(&ipvs->nullsvc_counter);
1da177e4 1240
6ef757f9 1241 ip_vs_start_estimator(net, &svc->stats);
f94fd041
JV
1242
1243 /* Count only IPv4 services for old get/setsockopt interface */
1244 if (svc->af == AF_INET)
a0840e2e 1245 ipvs->num_services++;
1da177e4
LT
1246
1247 /* Hash the service into the service table */
1248 write_lock_bh(&__ip_vs_svc_lock);
1249 ip_vs_svc_hash(svc);
1250 write_unlock_bh(&__ip_vs_svc_lock);
1251
1252 *svc_p = svc;
7a4f0761
HS
1253 /* Now there is a service - full throttle */
1254 ipvs->enable = 1;
1da177e4
LT
1255 return 0;
1256
b17fc996 1257
6e08bfb8 1258 out_err:
1da177e4 1259 if (svc != NULL) {
2fabf35b 1260 ip_vs_unbind_scheduler(svc);
1da177e4
LT
1261 if (svc->inc) {
1262 local_bh_disable();
1263 ip_vs_app_inc_put(svc->inc);
1264 local_bh_enable();
1265 }
b17fc996
HS
1266 if (svc->stats.cpustats)
1267 free_percpu(svc->stats.cpustats);
1da177e4
LT
1268 kfree(svc);
1269 }
1270 ip_vs_scheduler_put(sched);
0d1e71b0 1271 ip_vs_pe_put(pe);
1da177e4 1272
1da177e4
LT
1273 /* decrease the module use count */
1274 ip_vs_use_count_dec();
1275
1276 return ret;
1277}
1278
1279
1280/*
1281 * Edit a service and bind it with a new scheduler
1282 */
1283static int
c860c6b1 1284ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1285{
1286 struct ip_vs_scheduler *sched, *old_sched;
0d1e71b0 1287 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1da177e4
LT
1288 int ret = 0;
1289
1290 /*
1291 * Lookup the scheduler, by 'u->sched_name'
1292 */
1293 sched = ip_vs_scheduler_get(u->sched_name);
1294 if (sched == NULL) {
1e3e238e 1295 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1296 return -ENOENT;
1297 }
1298 old_sched = sched;
1299
0d1e71b0 1300 if (u->pe_name && *u->pe_name) {
e9e5eee8 1301 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1302 if (pe == NULL) {
1303 pr_info("persistence engine module ip_vs_pe_%s "
1304 "not found\n", u->pe_name);
1305 ret = -ENOENT;
1306 goto out;
1307 }
1308 old_pe = pe;
1309 }
1310
f94fd041 1311#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1312 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1313 ret = -EINVAL;
1314 goto out;
f94fd041
JV
1315 }
1316#endif
1317
1da177e4
LT
1318 write_lock_bh(&__ip_vs_svc_lock);
1319
1320 /*
1321 * Wait until all other svc users go away.
1322 */
26c15cfd 1323 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1324
1325 /*
1326 * Set the flags and timeout value
1327 */
1328 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1329 svc->timeout = u->timeout * HZ;
1330 svc->netmask = u->netmask;
1331
1332 old_sched = svc->scheduler;
1333 if (sched != old_sched) {
1334 /*
1335 * Unbind the old scheduler
1336 */
ed3ffc4e 1337 ip_vs_unbind_scheduler(svc);
1da177e4
LT
1338
1339 /*
1340 * Bind the new scheduler
1341 */
1342 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1343 /*
1344 * If ip_vs_bind_scheduler fails, restore the old
1345 * scheduler.
1346 * The main reason of failure is out of memory.
1347 *
1348 * The question is if the old scheduler can be
1349 * restored all the time. TODO: if it cannot be
1350 * restored some time, we must delete the service,
1351 * otherwise the system may crash.
1352 */
1353 ip_vs_bind_scheduler(svc, old_sched);
1354 old_sched = sched;
9e691ed6 1355 goto out_unlock;
1da177e4
LT
1356 }
1357 }
1358
0d1e71b0
SH
1359 old_pe = svc->pe;
1360 if (pe != old_pe) {
1361 ip_vs_unbind_pe(svc);
1362 ip_vs_bind_pe(svc, pe);
1363 }
1364
552ad65a 1365out_unlock:
1da177e4 1366 write_unlock_bh(&__ip_vs_svc_lock);
552ad65a 1367out:
6e08bfb8 1368 ip_vs_scheduler_put(old_sched);
0d1e71b0 1369 ip_vs_pe_put(old_pe);
1da177e4
LT
1370 return ret;
1371}
1372
1373
1374/*
1375 * Delete a service from the service list
1376 * - The service must be unlinked, unlocked and not referenced!
1377 * - We are called under _bh lock
1378 */
578bc3ef 1379static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1da177e4
LT
1380{
1381 struct ip_vs_dest *dest, *nxt;
1382 struct ip_vs_scheduler *old_sched;
0d1e71b0 1383 struct ip_vs_pe *old_pe;
a0840e2e 1384 struct netns_ipvs *ipvs = net_ipvs(svc->net);
0d1e71b0
SH
1385
1386 pr_info("%s: enter\n", __func__);
1da177e4 1387
f94fd041
JV
1388 /* Count only IPv4 services for old get/setsockopt interface */
1389 if (svc->af == AF_INET)
a0840e2e 1390 ipvs->num_services--;
f94fd041 1391
6ef757f9 1392 ip_vs_stop_estimator(svc->net, &svc->stats);
1da177e4
LT
1393
1394 /* Unbind scheduler */
1395 old_sched = svc->scheduler;
1396 ip_vs_unbind_scheduler(svc);
6e08bfb8 1397 ip_vs_scheduler_put(old_sched);
1da177e4 1398
0d1e71b0
SH
1399 /* Unbind persistence engine */
1400 old_pe = svc->pe;
1401 ip_vs_unbind_pe(svc);
1402 ip_vs_pe_put(old_pe);
1403
1da177e4
LT
1404 /* Unbind app inc */
1405 if (svc->inc) {
1406 ip_vs_app_inc_put(svc->inc);
1407 svc->inc = NULL;
1408 }
1409
1410 /*
1411 * Unlink the whole destination list
1412 */
1413 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1414 __ip_vs_unlink_dest(svc, dest, 0);
578bc3ef 1415 __ip_vs_del_dest(svc->net, dest, cleanup);
1da177e4
LT
1416 }
1417
1418 /*
1419 * Update the virtual service counters
1420 */
1421 if (svc->port == FTPPORT)
763f8d0e 1422 atomic_dec(&ipvs->ftpsvc_counter);
1da177e4 1423 else if (svc->port == 0)
763f8d0e 1424 atomic_dec(&ipvs->nullsvc_counter);
1da177e4
LT
1425
1426 /*
1427 * Free the service if nobody refers to it
1428 */
26c15cfd
JA
1429 if (atomic_read(&svc->refcnt) == 0) {
1430 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1431 svc->fwmark,
1432 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1433 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 1434 free_percpu(svc->stats.cpustats);
1da177e4 1435 kfree(svc);
26c15cfd 1436 }
1da177e4
LT
1437
1438 /* decrease the module use count */
1439 ip_vs_use_count_dec();
1440}
1441
1442/*
26c15cfd 1443 * Unlink a service from list and try to delete it if its refcnt reached 0
1da177e4 1444 */
578bc3ef 1445static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
1da177e4 1446{
1da177e4
LT
1447 /*
1448 * Unhash it from the service table
1449 */
1450 write_lock_bh(&__ip_vs_svc_lock);
1451
1452 ip_vs_svc_unhash(svc);
1453
1454 /*
1455 * Wait until all the svc users go away.
1456 */
26c15cfd 1457 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4 1458
578bc3ef 1459 __ip_vs_del_service(svc, cleanup);
1da177e4
LT
1460
1461 write_unlock_bh(&__ip_vs_svc_lock);
26c15cfd
JA
1462}
1463
1464/*
1465 * Delete a service from the service list
1466 */
1467static int ip_vs_del_service(struct ip_vs_service *svc)
1468{
1469 if (svc == NULL)
1470 return -EEXIST;
578bc3ef 1471 ip_vs_unlink_service(svc, false);
1da177e4
LT
1472
1473 return 0;
1474}
1475
1476
1477/*
1478 * Flush all the virtual services
1479 */
578bc3ef 1480static int ip_vs_flush(struct net *net, bool cleanup)
1da177e4
LT
1481{
1482 int idx;
1483 struct ip_vs_service *svc, *nxt;
1484
1485 /*
fc723250 1486 * Flush the service table hashed by <netns,protocol,addr,port>
1da177e4
LT
1487 */
1488 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
fc723250
HS
1489 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1490 s_list) {
1491 if (net_eq(svc->net, net))
578bc3ef 1492 ip_vs_unlink_service(svc, cleanup);
1da177e4
LT
1493 }
1494 }
1495
1496 /*
1497 * Flush the service table hashed by fwmark
1498 */
1499 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1500 list_for_each_entry_safe(svc, nxt,
1501 &ip_vs_svc_fwm_table[idx], f_list) {
fc723250 1502 if (net_eq(svc->net, net))
578bc3ef 1503 ip_vs_unlink_service(svc, cleanup);
1da177e4
LT
1504 }
1505 }
1506
1507 return 0;
1508}
1509
7a4f0761
HS
1510/*
1511 * Delete service by {netns} in the service table.
1512 * Called by __ip_vs_cleanup()
1513 */
503cf15a 1514void ip_vs_service_net_cleanup(struct net *net)
7a4f0761
HS
1515{
1516 EnterFunction(2);
1517 /* Check for "full" addressed entries */
1518 mutex_lock(&__ip_vs_mutex);
578bc3ef 1519 ip_vs_flush(net, true);
7a4f0761
HS
1520 mutex_unlock(&__ip_vs_mutex);
1521 LeaveFunction(2);
1522}
d1deae4d
JA
1523
1524/* Put all references for device (dst_cache) */
7a4f0761 1525static inline void
d1deae4d 1526ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
7a4f0761
HS
1527{
1528 spin_lock_bh(&dest->dst_lock);
026ace06 1529 if (dest->dest_dst && dest->dest_dst->dst_cache->dev == dev) {
7a4f0761
HS
1530 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1531 dev->name,
1532 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1533 ntohs(dest->port),
1534 atomic_read(&dest->refcnt));
d1deae4d 1535 __ip_vs_dst_cache_reset(dest);
7a4f0761
HS
1536 }
1537 spin_unlock_bh(&dest->dst_lock);
1538
1539}
313eae63
JA
1540/* Netdev event receiver
1541 * Currently only NETDEV_DOWN is handled to release refs to cached dsts
7a4f0761
HS
1542 */
1543static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1544 void *ptr)
1545{
1546 struct net_device *dev = ptr;
1547 struct net *net = dev_net(dev);
283283c4 1548 struct netns_ipvs *ipvs = net_ipvs(net);
7a4f0761
HS
1549 struct ip_vs_service *svc;
1550 struct ip_vs_dest *dest;
1551 unsigned int idx;
1552
313eae63 1553 if (event != NETDEV_DOWN || !ipvs)
7a4f0761
HS
1554 return NOTIFY_DONE;
1555 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1556 EnterFunction(2);
1557 mutex_lock(&__ip_vs_mutex);
1558 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1559 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1560 if (net_eq(svc->net, net)) {
1561 list_for_each_entry(dest, &svc->destinations,
1562 n_list) {
d1deae4d 1563 ip_vs_forget_dev(dest, dev);
7a4f0761
HS
1564 }
1565 }
1566 }
1567
1568 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1569 if (net_eq(svc->net, net)) {
1570 list_for_each_entry(dest, &svc->destinations,
1571 n_list) {
d1deae4d 1572 ip_vs_forget_dev(dest, dev);
7a4f0761
HS
1573 }
1574 }
1575
1576 }
1577 }
1578
578bc3ef
JA
1579 spin_lock_bh(&ipvs->dest_trash_lock);
1580 list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
d1deae4d 1581 ip_vs_forget_dev(dest, dev);
7a4f0761 1582 }
578bc3ef 1583 spin_unlock_bh(&ipvs->dest_trash_lock);
7a4f0761
HS
1584 mutex_unlock(&__ip_vs_mutex);
1585 LeaveFunction(2);
1586 return NOTIFY_DONE;
1587}
1da177e4
LT
1588
1589/*
1590 * Zero counters in a service or all services
1591 */
1592static int ip_vs_zero_service(struct ip_vs_service *svc)
1593{
1594 struct ip_vs_dest *dest;
1595
1596 write_lock_bh(&__ip_vs_svc_lock);
1597 list_for_each_entry(dest, &svc->destinations, n_list) {
1598 ip_vs_zero_stats(&dest->stats);
1599 }
1600 ip_vs_zero_stats(&svc->stats);
1601 write_unlock_bh(&__ip_vs_svc_lock);
1602 return 0;
1603}
1604
fc723250 1605static int ip_vs_zero_all(struct net *net)
1da177e4
LT
1606{
1607 int idx;
1608 struct ip_vs_service *svc;
1609
1610 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1611 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250
HS
1612 if (net_eq(svc->net, net))
1613 ip_vs_zero_service(svc);
1da177e4
LT
1614 }
1615 }
1616
1617 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1618 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1619 if (net_eq(svc->net, net))
1620 ip_vs_zero_service(svc);
1da177e4
LT
1621 }
1622 }
1623
2a0751af 1624 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1da177e4
LT
1625 return 0;
1626}
1627
14e40546 1628#ifdef CONFIG_SYSCTL
749c42b6
JA
1629
1630static int zero;
1631static int three = 3;
1632
1da177e4 1633static int
8d65af78 1634proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1635 void __user *buffer, size_t *lenp, loff_t *ppos)
1636{
9330419d 1637 struct net *net = current->nsproxy->net_ns;
1da177e4
LT
1638 int *valp = table->data;
1639 int val = *valp;
1640 int rc;
1641
8d65af78 1642 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1643 if (write && (*valp != val)) {
1644 if ((*valp < 0) || (*valp > 3)) {
1645 /* Restore the correct value */
1646 *valp = val;
1647 } else {
9330419d 1648 update_defense_level(net_ipvs(net));
1da177e4
LT
1649 }
1650 }
1651 return rc;
1652}
1653
1da177e4 1654static int
8d65af78 1655proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1656 void __user *buffer, size_t *lenp, loff_t *ppos)
1657{
1658 int *valp = table->data;
1659 int val[2];
1660 int rc;
1661
1662 /* backup the value first */
1663 memcpy(val, valp, sizeof(val));
1664
8d65af78 1665 rc = proc_dointvec(table, write, buffer, lenp, ppos);
749c42b6
JA
1666 if (write && (valp[0] < 0 || valp[1] < 0 ||
1667 (valp[0] >= valp[1] && valp[1]))) {
1da177e4
LT
1668 /* Restore the correct value */
1669 memcpy(valp, val, sizeof(val));
1670 }
1671 return rc;
1672}
1673
b880c1f0
HS
1674static int
1675proc_do_sync_mode(ctl_table *table, int write,
1676 void __user *buffer, size_t *lenp, loff_t *ppos)
1677{
1678 int *valp = table->data;
1679 int val = *valp;
1680 int rc;
1681
1682 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1683 if (write && (*valp != val)) {
1684 if ((*valp < 0) || (*valp > 1)) {
1685 /* Restore the correct value */
1686 *valp = val;
f73181c8
PNA
1687 }
1688 }
1689 return rc;
1690}
1691
1692static int
1693proc_do_sync_ports(ctl_table *table, int write,
1694 void __user *buffer, size_t *lenp, loff_t *ppos)
1695{
1696 int *valp = table->data;
1697 int val = *valp;
1698 int rc;
1699
1700 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1701 if (write && (*valp != val)) {
1702 if (*valp < 1 || !is_power_of_2(*valp)) {
1703 /* Restore the correct value */
1704 *valp = val;
b880c1f0
HS
1705 }
1706 }
1707 return rc;
1708}
1da177e4
LT
1709
1710/*
1711 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
a0840e2e 1712 * Do not change order or insert new entries without
503cf15a 1713 * align with netns init in ip_vs_control_net_init()
1da177e4
LT
1714 */
1715
1716static struct ctl_table vs_vars[] = {
1717 {
1da177e4 1718 .procname = "amemthresh",
1da177e4
LT
1719 .maxlen = sizeof(int),
1720 .mode = 0644,
6d9f239a 1721 .proc_handler = proc_dointvec,
1da177e4 1722 },
1da177e4 1723 {
1da177e4 1724 .procname = "am_droprate",
1da177e4
LT
1725 .maxlen = sizeof(int),
1726 .mode = 0644,
6d9f239a 1727 .proc_handler = proc_dointvec,
1da177e4
LT
1728 },
1729 {
1da177e4 1730 .procname = "drop_entry",
1da177e4
LT
1731 .maxlen = sizeof(int),
1732 .mode = 0644,
6d9f239a 1733 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1734 },
1735 {
1da177e4 1736 .procname = "drop_packet",
1da177e4
LT
1737 .maxlen = sizeof(int),
1738 .mode = 0644,
6d9f239a 1739 .proc_handler = proc_do_defense_mode,
1da177e4 1740 },
f4bc17cd
JA
1741#ifdef CONFIG_IP_VS_NFCT
1742 {
1743 .procname = "conntrack",
f4bc17cd
JA
1744 .maxlen = sizeof(int),
1745 .mode = 0644,
1746 .proc_handler = &proc_dointvec,
1747 },
1748#endif
1da177e4 1749 {
1da177e4 1750 .procname = "secure_tcp",
1da177e4
LT
1751 .maxlen = sizeof(int),
1752 .mode = 0644,
6d9f239a 1753 .proc_handler = proc_do_defense_mode,
1da177e4 1754 },
8a803040
JA
1755 {
1756 .procname = "snat_reroute",
8a803040
JA
1757 .maxlen = sizeof(int),
1758 .mode = 0644,
1759 .proc_handler = &proc_dointvec,
1760 },
b880c1f0
HS
1761 {
1762 .procname = "sync_version",
b880c1f0
HS
1763 .maxlen = sizeof(int),
1764 .mode = 0644,
1765 .proc_handler = &proc_do_sync_mode,
1766 },
f73181c8
PNA
1767 {
1768 .procname = "sync_ports",
1769 .maxlen = sizeof(int),
1770 .mode = 0644,
1771 .proc_handler = &proc_do_sync_ports,
1772 },
1c003b15
PNA
1773 {
1774 .procname = "sync_qlen_max",
1775 .maxlen = sizeof(int),
1776 .mode = 0644,
1777 .proc_handler = proc_dointvec,
1778 },
1779 {
1780 .procname = "sync_sock_size",
1781 .maxlen = sizeof(int),
1782 .mode = 0644,
1783 .proc_handler = proc_dointvec,
1784 },
a0840e2e
HS
1785 {
1786 .procname = "cache_bypass",
1787 .maxlen = sizeof(int),
1788 .mode = 0644,
1789 .proc_handler = proc_dointvec,
1790 },
1791 {
1792 .procname = "expire_nodest_conn",
1793 .maxlen = sizeof(int),
1794 .mode = 0644,
1795 .proc_handler = proc_dointvec,
1796 },
1797 {
1798 .procname = "expire_quiescent_template",
1799 .maxlen = sizeof(int),
1800 .mode = 0644,
1801 .proc_handler = proc_dointvec,
1802 },
1803 {
1804 .procname = "sync_threshold",
1805 .maxlen =
1806 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1807 .mode = 0644,
1808 .proc_handler = proc_do_sync_threshold,
1809 },
749c42b6
JA
1810 {
1811 .procname = "sync_refresh_period",
1812 .maxlen = sizeof(int),
1813 .mode = 0644,
1814 .proc_handler = proc_dointvec_jiffies,
1815 },
1816 {
1817 .procname = "sync_retries",
1818 .maxlen = sizeof(int),
1819 .mode = 0644,
1820 .proc_handler = proc_dointvec_minmax,
1821 .extra1 = &zero,
1822 .extra2 = &three,
1823 },
a0840e2e
HS
1824 {
1825 .procname = "nat_icmp_send",
1826 .maxlen = sizeof(int),
1827 .mode = 0644,
1828 .proc_handler = proc_dointvec,
1829 },
3654e611
JA
1830 {
1831 .procname = "pmtu_disc",
1832 .maxlen = sizeof(int),
1833 .mode = 0644,
1834 .proc_handler = proc_dointvec,
1835 },
0c12582f
JA
1836 {
1837 .procname = "backup_only",
1838 .maxlen = sizeof(int),
1839 .mode = 0644,
1840 .proc_handler = proc_dointvec,
1841 },
a0840e2e
HS
1842#ifdef CONFIG_IP_VS_DEBUG
1843 {
1844 .procname = "debug_level",
1845 .data = &sysctl_ip_vs_debug_level,
1846 .maxlen = sizeof(int),
1847 .mode = 0644,
1848 .proc_handler = proc_dointvec,
1849 },
1850#endif
1da177e4
LT
1851#if 0
1852 {
1da177e4
LT
1853 .procname = "timeout_established",
1854 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1855 .maxlen = sizeof(int),
1856 .mode = 0644,
6d9f239a 1857 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1858 },
1859 {
1da177e4
LT
1860 .procname = "timeout_synsent",
1861 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1862 .maxlen = sizeof(int),
1863 .mode = 0644,
6d9f239a 1864 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1865 },
1866 {
1da177e4
LT
1867 .procname = "timeout_synrecv",
1868 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1869 .maxlen = sizeof(int),
1870 .mode = 0644,
6d9f239a 1871 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1872 },
1873 {
1da177e4
LT
1874 .procname = "timeout_finwait",
1875 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1876 .maxlen = sizeof(int),
1877 .mode = 0644,
6d9f239a 1878 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1879 },
1880 {
1da177e4
LT
1881 .procname = "timeout_timewait",
1882 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1883 .maxlen = sizeof(int),
1884 .mode = 0644,
6d9f239a 1885 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1886 },
1887 {
1da177e4
LT
1888 .procname = "timeout_close",
1889 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1890 .maxlen = sizeof(int),
1891 .mode = 0644,
6d9f239a 1892 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1893 },
1894 {
1da177e4
LT
1895 .procname = "timeout_closewait",
1896 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1897 .maxlen = sizeof(int),
1898 .mode = 0644,
6d9f239a 1899 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1900 },
1901 {
1da177e4
LT
1902 .procname = "timeout_lastack",
1903 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1904 .maxlen = sizeof(int),
1905 .mode = 0644,
6d9f239a 1906 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1907 },
1908 {
1da177e4
LT
1909 .procname = "timeout_listen",
1910 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1911 .maxlen = sizeof(int),
1912 .mode = 0644,
6d9f239a 1913 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1914 },
1915 {
1da177e4
LT
1916 .procname = "timeout_synack",
1917 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1918 .maxlen = sizeof(int),
1919 .mode = 0644,
6d9f239a 1920 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1921 },
1922 {
1da177e4
LT
1923 .procname = "timeout_udp",
1924 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1925 .maxlen = sizeof(int),
1926 .mode = 0644,
6d9f239a 1927 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1928 },
1929 {
1da177e4
LT
1930 .procname = "timeout_icmp",
1931 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1932 .maxlen = sizeof(int),
1933 .mode = 0644,
6d9f239a 1934 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1935 },
1936#endif
f8572d8f 1937 { }
1da177e4
LT
1938};
1939
14e40546 1940#endif
1da177e4 1941
1da177e4
LT
1942#ifdef CONFIG_PROC_FS
1943
1944struct ip_vs_iter {
fc723250 1945 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1da177e4
LT
1946 struct list_head *table;
1947 int bucket;
1948};
1949
1950/*
1951 * Write the contents of the VS rule table to a PROCfs file.
1952 * (It is kept just for backward compatibility)
1953 */
95c96174 1954static inline const char *ip_vs_fwd_name(unsigned int flags)
1da177e4
LT
1955{
1956 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1957 case IP_VS_CONN_F_LOCALNODE:
1958 return "Local";
1959 case IP_VS_CONN_F_TUNNEL:
1960 return "Tunnel";
1961 case IP_VS_CONN_F_DROUTE:
1962 return "Route";
1963 default:
1964 return "Masq";
1965 }
1966}
1967
1968
1969/* Get the Nth entry in the two lists */
1970static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1971{
fc723250 1972 struct net *net = seq_file_net(seq);
1da177e4
LT
1973 struct ip_vs_iter *iter = seq->private;
1974 int idx;
1975 struct ip_vs_service *svc;
1976
1977 /* look in hash by protocol */
1978 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1979 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250 1980 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1981 iter->table = ip_vs_svc_table;
1982 iter->bucket = idx;
1983 return svc;
1984 }
1985 }
1986 }
1987
1988 /* keep looking in fwmark */
1989 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1990 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250 1991 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1992 iter->table = ip_vs_svc_fwm_table;
1993 iter->bucket = idx;
1994 return svc;
1995 }
1996 }
1997 }
1998
1999 return NULL;
2000}
2001
2002static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 2003__acquires(__ip_vs_svc_lock)
1da177e4
LT
2004{
2005
2006 read_lock_bh(&__ip_vs_svc_lock);
2007 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
2008}
2009
2010
2011static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2012{
2013 struct list_head *e;
2014 struct ip_vs_iter *iter;
2015 struct ip_vs_service *svc;
2016
2017 ++*pos;
2018 if (v == SEQ_START_TOKEN)
2019 return ip_vs_info_array(seq,0);
2020
2021 svc = v;
2022 iter = seq->private;
2023
2024 if (iter->table == ip_vs_svc_table) {
2025 /* next service in table hashed by protocol */
2026 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
2027 return list_entry(e, struct ip_vs_service, s_list);
2028
2029
2030 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2031 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
2032 s_list) {
2033 return svc;
2034 }
2035 }
2036
2037 iter->table = ip_vs_svc_fwm_table;
2038 iter->bucket = -1;
2039 goto scan_fwmark;
2040 }
2041
2042 /* next service in hashed by fwmark */
2043 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
2044 return list_entry(e, struct ip_vs_service, f_list);
2045
2046 scan_fwmark:
2047 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2048 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
2049 f_list)
2050 return svc;
2051 }
2052
2053 return NULL;
2054}
2055
2056static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 2057__releases(__ip_vs_svc_lock)
1da177e4
LT
2058{
2059 read_unlock_bh(&__ip_vs_svc_lock);
2060}
2061
2062
2063static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2064{
2065 if (v == SEQ_START_TOKEN) {
2066 seq_printf(seq,
2067 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 2068 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2069 seq_puts(seq,
2070 "Prot LocalAddress:Port Scheduler Flags\n");
2071 seq_puts(seq,
2072 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2073 } else {
2074 const struct ip_vs_service *svc = v;
2075 const struct ip_vs_iter *iter = seq->private;
2076 const struct ip_vs_dest *dest;
2077
667a5f18
VB
2078 if (iter->table == ip_vs_svc_table) {
2079#ifdef CONFIG_IP_VS_IPV6
2080 if (svc->af == AF_INET6)
5b095d98 2081 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 2082 ip_vs_proto_name(svc->protocol),
38ff4fa4 2083 &svc->addr.in6,
667a5f18
VB
2084 ntohs(svc->port),
2085 svc->scheduler->name);
2086 else
2087#endif
26ec037f 2088 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
2089 ip_vs_proto_name(svc->protocol),
2090 ntohl(svc->addr.ip),
2091 ntohs(svc->port),
26ec037f
NC
2092 svc->scheduler->name,
2093 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 2094 } else {
26ec037f
NC
2095 seq_printf(seq, "FWM %08X %s %s",
2096 svc->fwmark, svc->scheduler->name,
2097 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 2098 }
1da177e4
LT
2099
2100 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2101 seq_printf(seq, "persistent %d %08X\n",
2102 svc->timeout,
2103 ntohl(svc->netmask));
2104 else
2105 seq_putc(seq, '\n');
2106
2107 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
2108#ifdef CONFIG_IP_VS_IPV6
2109 if (dest->af == AF_INET6)
2110 seq_printf(seq,
5b095d98 2111 " -> [%pI6]:%04X"
667a5f18 2112 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 2113 &dest->addr.in6,
667a5f18
VB
2114 ntohs(dest->port),
2115 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2116 atomic_read(&dest->weight),
2117 atomic_read(&dest->activeconns),
2118 atomic_read(&dest->inactconns));
2119 else
2120#endif
2121 seq_printf(seq,
2122 " -> %08X:%04X "
2123 "%-7s %-6d %-10d %-10d\n",
2124 ntohl(dest->addr.ip),
2125 ntohs(dest->port),
2126 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2127 atomic_read(&dest->weight),
2128 atomic_read(&dest->activeconns),
2129 atomic_read(&dest->inactconns));
2130
1da177e4
LT
2131 }
2132 }
2133 return 0;
2134}
2135
56b3d975 2136static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
2137 .start = ip_vs_info_seq_start,
2138 .next = ip_vs_info_seq_next,
2139 .stop = ip_vs_info_seq_stop,
2140 .show = ip_vs_info_seq_show,
2141};
2142
2143static int ip_vs_info_open(struct inode *inode, struct file *file)
2144{
fc723250 2145 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
cf7732e4 2146 sizeof(struct ip_vs_iter));
1da177e4
LT
2147}
2148
9a32144e 2149static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
2150 .owner = THIS_MODULE,
2151 .open = ip_vs_info_open,
2152 .read = seq_read,
2153 .llseek = seq_lseek,
0f08190f 2154 .release = seq_release_net,
1da177e4
LT
2155};
2156
1da177e4
LT
2157static int ip_vs_stats_show(struct seq_file *seq, void *v)
2158{
b17fc996 2159 struct net *net = seq_file_single_net(seq);
55a3d4e1 2160 struct ip_vs_stats_user show;
1da177e4
LT
2161
2162/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2163 seq_puts(seq,
2164 " Total Incoming Outgoing Incoming Outgoing\n");
2165 seq_printf(seq,
2166 " Conns Packets Packets Bytes Bytes\n");
2167
55a3d4e1
JA
2168 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2169 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2170 show.inpkts, show.outpkts,
2171 (unsigned long long) show.inbytes,
2172 (unsigned long long) show.outbytes);
1da177e4
LT
2173
2174/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2175 seq_puts(seq,
2176 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
55a3d4e1
JA
2177 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2178 show.cps, show.inpps, show.outpps,
2179 show.inbps, show.outbps);
1da177e4
LT
2180
2181 return 0;
2182}
2183
2184static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2185{
fc723250 2186 return single_open_net(inode, file, ip_vs_stats_show);
1da177e4
LT
2187}
2188
9a32144e 2189static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
2190 .owner = THIS_MODULE,
2191 .open = ip_vs_stats_seq_open,
2192 .read = seq_read,
2193 .llseek = seq_lseek,
0f08190f 2194 .release = single_release_net,
1da177e4
LT
2195};
2196
b17fc996
HS
2197static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2198{
2199 struct net *net = seq_file_single_net(seq);
2a0751af
JA
2200 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2201 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
ea9f22cc 2202 struct ip_vs_stats_user rates;
b17fc996
HS
2203 int i;
2204
2205/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2206 seq_puts(seq,
2207 " Total Incoming Outgoing Incoming Outgoing\n");
2208 seq_printf(seq,
2209 "CPU Conns Packets Packets Bytes Bytes\n");
2210
2211 for_each_possible_cpu(i) {
2a0751af
JA
2212 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2213 unsigned int start;
2214 __u64 inbytes, outbytes;
2215
2216 do {
2217 start = u64_stats_fetch_begin_bh(&u->syncp);
2218 inbytes = u->ustats.inbytes;
2219 outbytes = u->ustats.outbytes;
2220 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2221
b17fc996 2222 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2a0751af
JA
2223 i, u->ustats.conns, u->ustats.inpkts,
2224 u->ustats.outpkts, (__u64)inbytes,
2225 (__u64)outbytes);
b17fc996
HS
2226 }
2227
2228 spin_lock_bh(&tot_stats->lock);
ea9f22cc 2229
b17fc996
HS
2230 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2231 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2232 tot_stats->ustats.outpkts,
2233 (unsigned long long) tot_stats->ustats.inbytes,
2234 (unsigned long long) tot_stats->ustats.outbytes);
2235
ea9f22cc
JA
2236 ip_vs_read_estimator(&rates, tot_stats);
2237
2238 spin_unlock_bh(&tot_stats->lock);
2239
b17fc996
HS
2240/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2241 seq_puts(seq,
2242 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2243 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
ea9f22cc
JA
2244 rates.cps,
2245 rates.inpps,
2246 rates.outpps,
2247 rates.inbps,
2248 rates.outbps);
b17fc996
HS
2249
2250 return 0;
2251}
2252
2253static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2254{
2255 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2256}
2257
2258static const struct file_operations ip_vs_stats_percpu_fops = {
2259 .owner = THIS_MODULE,
2260 .open = ip_vs_stats_percpu_seq_open,
2261 .read = seq_read,
2262 .llseek = seq_lseek,
0f08190f 2263 .release = single_release_net,
b17fc996 2264};
1da177e4
LT
2265#endif
2266
2267/*
2268 * Set timeout values for tcp tcpfin udp in the timeout_table.
2269 */
9330419d 2270static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2271{
091bb34c 2272#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
9330419d 2273 struct ip_vs_proto_data *pd;
091bb34c 2274#endif
9330419d 2275
1da177e4
LT
2276 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2277 u->tcp_timeout,
2278 u->tcp_fin_timeout,
2279 u->udp_timeout);
2280
2281#ifdef CONFIG_IP_VS_PROTO_TCP
2282 if (u->tcp_timeout) {
9330419d
HS
2283 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2284 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
1da177e4
LT
2285 = u->tcp_timeout * HZ;
2286 }
2287
2288 if (u->tcp_fin_timeout) {
9330419d
HS
2289 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2290 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
1da177e4
LT
2291 = u->tcp_fin_timeout * HZ;
2292 }
2293#endif
2294
2295#ifdef CONFIG_IP_VS_PROTO_UDP
2296 if (u->udp_timeout) {
9330419d
HS
2297 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2298 pd->timeout_table[IP_VS_UDP_S_NORMAL]
1da177e4
LT
2299 = u->udp_timeout * HZ;
2300 }
2301#endif
2302 return 0;
2303}
2304
2305
2306#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2307#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2308#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2309 sizeof(struct ip_vs_dest_user))
2310#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2311#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2312#define MAX_ARG_LEN SVCDEST_ARG_LEN
2313
9b5b5cff 2314static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2315 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2316 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2317 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2318 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2319 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2320 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2321 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2322 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2323 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2324 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2325 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2326};
2327
c860c6b1
JV
2328static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2329 struct ip_vs_service_user *usvc_compat)
2330{
0d1e71b0
SH
2331 memset(usvc, 0, sizeof(*usvc));
2332
c860c6b1
JV
2333 usvc->af = AF_INET;
2334 usvc->protocol = usvc_compat->protocol;
2335 usvc->addr.ip = usvc_compat->addr;
2336 usvc->port = usvc_compat->port;
2337 usvc->fwmark = usvc_compat->fwmark;
2338
2339 /* Deep copy of sched_name is not needed here */
2340 usvc->sched_name = usvc_compat->sched_name;
2341
2342 usvc->flags = usvc_compat->flags;
2343 usvc->timeout = usvc_compat->timeout;
2344 usvc->netmask = usvc_compat->netmask;
2345}
2346
2347static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2348 struct ip_vs_dest_user *udest_compat)
2349{
0d1e71b0
SH
2350 memset(udest, 0, sizeof(*udest));
2351
c860c6b1
JV
2352 udest->addr.ip = udest_compat->addr;
2353 udest->port = udest_compat->port;
2354 udest->conn_flags = udest_compat->conn_flags;
2355 udest->weight = udest_compat->weight;
2356 udest->u_threshold = udest_compat->u_threshold;
2357 udest->l_threshold = udest_compat->l_threshold;
2358}
2359
1da177e4
LT
2360static int
2361do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2362{
fc723250 2363 struct net *net = sock_net(sk);
1da177e4
LT
2364 int ret;
2365 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2366 struct ip_vs_service_user *usvc_compat;
2367 struct ip_vs_service_user_kern usvc;
1da177e4 2368 struct ip_vs_service *svc;
c860c6b1
JV
2369 struct ip_vs_dest_user *udest_compat;
2370 struct ip_vs_dest_user_kern udest;
ae1d48b2 2371 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2372
df008c91 2373 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2374 return -EPERM;
2375
04bcef2a
AV
2376 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2377 return -EINVAL;
2378 if (len < 0 || len > MAX_ARG_LEN)
2379 return -EINVAL;
1da177e4 2380 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2381 pr_err("set_ctl: len %u != %u\n",
2382 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2383 return -EINVAL;
2384 }
2385
2386 if (copy_from_user(arg, user, len) != 0)
2387 return -EFAULT;
2388
2389 /* increase the module use count */
2390 ip_vs_use_count_inc();
2391
ae1d48b2
HS
2392 /* Handle daemons since they have another lock */
2393 if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2394 cmd == IP_VS_SO_SET_STOPDAEMON) {
2395 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2396
2397 if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2398 ret = -ERESTARTSYS;
2399 goto out_dec;
2400 }
2401 if (cmd == IP_VS_SO_SET_STARTDAEMON)
2402 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2403 dm->syncid);
2404 else
2405 ret = stop_sync_thread(net, dm->state);
2406 mutex_unlock(&ipvs->sync_mutex);
2407 goto out_dec;
2408 }
2409
14cc3e2b 2410 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2411 ret = -ERESTARTSYS;
2412 goto out_dec;
2413 }
2414
2415 if (cmd == IP_VS_SO_SET_FLUSH) {
2416 /* Flush the virtual service */
578bc3ef 2417 ret = ip_vs_flush(net, false);
1da177e4
LT
2418 goto out_unlock;
2419 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2420 /* Set timeout values for (tcp tcpfin udp) */
9330419d 2421 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
1da177e4 2422 goto out_unlock;
1da177e4
LT
2423 }
2424
c860c6b1
JV
2425 usvc_compat = (struct ip_vs_service_user *)arg;
2426 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2427
2428 /* We only use the new structs internally, so copy userspace compat
2429 * structs to extended internal versions */
2430 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2431 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2432
2433 if (cmd == IP_VS_SO_SET_ZERO) {
2434 /* if no service address is set, zero counters in all */
c860c6b1 2435 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
fc723250 2436 ret = ip_vs_zero_all(net);
1da177e4
LT
2437 goto out_unlock;
2438 }
2439 }
2440
2906f66a
VMR
2441 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2442 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2443 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2444 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2445 usvc.protocol, &usvc.addr.ip,
2446 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2447 ret = -EFAULT;
2448 goto out_unlock;
2449 }
2450
2451 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2452 if (usvc.fwmark == 0)
fc723250 2453 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
26c15cfd 2454 &usvc.addr, usvc.port);
1da177e4 2455 else
fc723250 2456 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
1da177e4
LT
2457
2458 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2459 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2460 ret = -ESRCH;
26c15cfd 2461 goto out_unlock;
1da177e4
LT
2462 }
2463
2464 switch (cmd) {
2465 case IP_VS_SO_SET_ADD:
2466 if (svc != NULL)
2467 ret = -EEXIST;
2468 else
fc723250 2469 ret = ip_vs_add_service(net, &usvc, &svc);
1da177e4
LT
2470 break;
2471 case IP_VS_SO_SET_EDIT:
c860c6b1 2472 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2473 break;
2474 case IP_VS_SO_SET_DEL:
2475 ret = ip_vs_del_service(svc);
2476 if (!ret)
2477 goto out_unlock;
2478 break;
2479 case IP_VS_SO_SET_ZERO:
2480 ret = ip_vs_zero_service(svc);
2481 break;
2482 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2483 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2484 break;
2485 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2486 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2487 break;
2488 case IP_VS_SO_SET_DELDEST:
c860c6b1 2489 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2490 break;
2491 default:
2492 ret = -EINVAL;
2493 }
2494
1da177e4 2495 out_unlock:
14cc3e2b 2496 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2497 out_dec:
2498 /* decrease the module use count */
2499 ip_vs_use_count_dec();
2500
2501 return ret;
2502}
2503
2504
1da177e4
LT
2505static void
2506ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2507{
2508 dst->protocol = src->protocol;
e7ade46a 2509 dst->addr = src->addr.ip;
1da177e4
LT
2510 dst->port = src->port;
2511 dst->fwmark = src->fwmark;
4da62fc7 2512 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2513 dst->flags = src->flags;
2514 dst->timeout = src->timeout / HZ;
2515 dst->netmask = src->netmask;
2516 dst->num_dests = src->num_dests;
2517 ip_vs_copy_stats(&dst->stats, &src->stats);
2518}
2519
2520static inline int
fc723250
HS
2521__ip_vs_get_service_entries(struct net *net,
2522 const struct ip_vs_get_services *get,
1da177e4
LT
2523 struct ip_vs_get_services __user *uptr)
2524{
2525 int idx, count=0;
2526 struct ip_vs_service *svc;
2527 struct ip_vs_service_entry entry;
2528 int ret = 0;
2529
2530 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2531 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041 2532 /* Only expose IPv4 entries to old interface */
fc723250 2533 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2534 continue;
2535
1da177e4
LT
2536 if (count >= get->num_services)
2537 goto out;
4da62fc7 2538 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2539 ip_vs_copy_service(&entry, svc);
2540 if (copy_to_user(&uptr->entrytable[count],
2541 &entry, sizeof(entry))) {
2542 ret = -EFAULT;
2543 goto out;
2544 }
2545 count++;
2546 }
2547 }
2548
2549 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2550 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041 2551 /* Only expose IPv4 entries to old interface */
fc723250 2552 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2553 continue;
2554
1da177e4
LT
2555 if (count >= get->num_services)
2556 goto out;
4da62fc7 2557 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2558 ip_vs_copy_service(&entry, svc);
2559 if (copy_to_user(&uptr->entrytable[count],
2560 &entry, sizeof(entry))) {
2561 ret = -EFAULT;
2562 goto out;
2563 }
2564 count++;
2565 }
2566 }
552ad65a 2567out:
1da177e4
LT
2568 return ret;
2569}
2570
2571static inline int
fc723250 2572__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
1da177e4
LT
2573 struct ip_vs_get_dests __user *uptr)
2574{
2575 struct ip_vs_service *svc;
b18610de 2576 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2577 int ret = 0;
2578
2579 if (get->fwmark)
fc723250 2580 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
1da177e4 2581 else
fc723250 2582 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
26c15cfd 2583 get->port);
b18610de 2584
1da177e4
LT
2585 if (svc) {
2586 int count = 0;
2587 struct ip_vs_dest *dest;
2588 struct ip_vs_dest_entry entry;
2589
2590 list_for_each_entry(dest, &svc->destinations, n_list) {
2591 if (count >= get->num_dests)
2592 break;
2593
e7ade46a 2594 entry.addr = dest->addr.ip;
1da177e4
LT
2595 entry.port = dest->port;
2596 entry.conn_flags = atomic_read(&dest->conn_flags);
2597 entry.weight = atomic_read(&dest->weight);
2598 entry.u_threshold = dest->u_threshold;
2599 entry.l_threshold = dest->l_threshold;
2600 entry.activeconns = atomic_read(&dest->activeconns);
2601 entry.inactconns = atomic_read(&dest->inactconns);
2602 entry.persistconns = atomic_read(&dest->persistconns);
2603 ip_vs_copy_stats(&entry.stats, &dest->stats);
2604 if (copy_to_user(&uptr->entrytable[count],
2605 &entry, sizeof(entry))) {
2606 ret = -EFAULT;
2607 break;
2608 }
2609 count++;
2610 }
1da177e4
LT
2611 } else
2612 ret = -ESRCH;
2613 return ret;
2614}
2615
2616static inline void
9330419d 2617__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2618{
091bb34c 2619#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
9330419d 2620 struct ip_vs_proto_data *pd;
091bb34c 2621#endif
9330419d 2622
b61a602e
AB
2623 memset(u, 0, sizeof (*u));
2624
1da177e4 2625#ifdef CONFIG_IP_VS_PROTO_TCP
9330419d
HS
2626 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2627 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2628 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
1da177e4
LT
2629#endif
2630#ifdef CONFIG_IP_VS_PROTO_UDP
9330419d 2631 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
1da177e4 2632 u->udp_timeout =
9330419d 2633 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
1da177e4
LT
2634#endif
2635}
2636
2637
2638#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2639#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2640#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2641#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2642#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2643#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2644#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2645
9b5b5cff 2646static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2647 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2648 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2649 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2650 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2651 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2652 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2653 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2654};
2655
2656static int
2657do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2658{
2659 unsigned char arg[128];
2660 int ret = 0;
04bcef2a 2661 unsigned int copylen;
fc723250 2662 struct net *net = sock_net(sk);
f131315f 2663 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2664
fc723250 2665 BUG_ON(!net);
df008c91 2666 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2667 return -EPERM;
2668
04bcef2a
AV
2669 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2670 return -EINVAL;
2671
1da177e4 2672 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2673 pr_err("get_ctl: len %u < %u\n",
2674 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2675 return -EINVAL;
2676 }
2677
04bcef2a
AV
2678 copylen = get_arglen[GET_CMDID(cmd)];
2679 if (copylen > 128)
2680 return -EINVAL;
2681
2682 if (copy_from_user(arg, user, copylen) != 0)
1da177e4 2683 return -EFAULT;
ae1d48b2
HS
2684 /*
2685 * Handle daemons first since it has its own locking
2686 */
2687 if (cmd == IP_VS_SO_GET_DAEMON) {
2688 struct ip_vs_daemon_user d[2];
2689
2690 memset(&d, 0, sizeof(d));
2691 if (mutex_lock_interruptible(&ipvs->sync_mutex))
2692 return -ERESTARTSYS;
2693
2694 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2695 d[0].state = IP_VS_STATE_MASTER;
2696 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2697 sizeof(d[0].mcast_ifn));
2698 d[0].syncid = ipvs->master_syncid;
2699 }
2700 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2701 d[1].state = IP_VS_STATE_BACKUP;
2702 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2703 sizeof(d[1].mcast_ifn));
2704 d[1].syncid = ipvs->backup_syncid;
2705 }
2706 if (copy_to_user(user, &d, sizeof(d)) != 0)
2707 ret = -EFAULT;
2708 mutex_unlock(&ipvs->sync_mutex);
2709 return ret;
2710 }
1da177e4 2711
14cc3e2b 2712 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2713 return -ERESTARTSYS;
2714
2715 switch (cmd) {
2716 case IP_VS_SO_GET_VERSION:
2717 {
2718 char buf[64];
2719
2720 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2721 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2722 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2723 ret = -EFAULT;
2724 goto out;
2725 }
2726 *len = strlen(buf)+1;
2727 }
2728 break;
2729
2730 case IP_VS_SO_GET_INFO:
2731 {
2732 struct ip_vs_getinfo info;
2733 info.version = IP_VS_VERSION_CODE;
6f7edb48 2734 info.size = ip_vs_conn_tab_size;
a0840e2e 2735 info.num_services = ipvs->num_services;
1da177e4
LT
2736 if (copy_to_user(user, &info, sizeof(info)) != 0)
2737 ret = -EFAULT;
2738 }
2739 break;
2740
2741 case IP_VS_SO_GET_SERVICES:
2742 {
2743 struct ip_vs_get_services *get;
2744 int size;
2745
2746 get = (struct ip_vs_get_services *)arg;
2747 size = sizeof(*get) +
2748 sizeof(struct ip_vs_service_entry) * get->num_services;
2749 if (*len != size) {
1e3e238e 2750 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2751 ret = -EINVAL;
2752 goto out;
2753 }
fc723250 2754 ret = __ip_vs_get_service_entries(net, get, user);
1da177e4
LT
2755 }
2756 break;
2757
2758 case IP_VS_SO_GET_SERVICE:
2759 {
2760 struct ip_vs_service_entry *entry;
2761 struct ip_vs_service *svc;
b18610de 2762 union nf_inet_addr addr;
1da177e4
LT
2763
2764 entry = (struct ip_vs_service_entry *)arg;
b18610de 2765 addr.ip = entry->addr;
1da177e4 2766 if (entry->fwmark)
fc723250 2767 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
1da177e4 2768 else
fc723250
HS
2769 svc = __ip_vs_service_find(net, AF_INET,
2770 entry->protocol, &addr,
2771 entry->port);
1da177e4
LT
2772 if (svc) {
2773 ip_vs_copy_service(entry, svc);
2774 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2775 ret = -EFAULT;
1da177e4
LT
2776 } else
2777 ret = -ESRCH;
2778 }
2779 break;
2780
2781 case IP_VS_SO_GET_DESTS:
2782 {
2783 struct ip_vs_get_dests *get;
2784 int size;
2785
2786 get = (struct ip_vs_get_dests *)arg;
2787 size = sizeof(*get) +
2788 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2789 if (*len != size) {
1e3e238e 2790 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2791 ret = -EINVAL;
2792 goto out;
2793 }
fc723250 2794 ret = __ip_vs_get_dest_entries(net, get, user);
1da177e4
LT
2795 }
2796 break;
2797
2798 case IP_VS_SO_GET_TIMEOUT:
2799 {
2800 struct ip_vs_timeout_user t;
2801
9330419d 2802 __ip_vs_get_timeouts(net, &t);
1da177e4
LT
2803 if (copy_to_user(user, &t, sizeof(t)) != 0)
2804 ret = -EFAULT;
2805 }
2806 break;
2807
1da177e4
LT
2808 default:
2809 ret = -EINVAL;
2810 }
2811
552ad65a 2812out:
14cc3e2b 2813 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2814 return ret;
2815}
2816
2817
2818static struct nf_sockopt_ops ip_vs_sockopts = {
2819 .pf = PF_INET,
2820 .set_optmin = IP_VS_BASE_CTL,
2821 .set_optmax = IP_VS_SO_SET_MAX+1,
2822 .set = do_ip_vs_set_ctl,
2823 .get_optmin = IP_VS_BASE_CTL,
2824 .get_optmax = IP_VS_SO_GET_MAX+1,
2825 .get = do_ip_vs_get_ctl,
16fcec35 2826 .owner = THIS_MODULE,
1da177e4
LT
2827};
2828
9a812198
JV
2829/*
2830 * Generic Netlink interface
2831 */
2832
2833/* IPVS genetlink family */
2834static struct genl_family ip_vs_genl_family = {
2835 .id = GENL_ID_GENERATE,
2836 .hdrsize = 0,
2837 .name = IPVS_GENL_NAME,
2838 .version = IPVS_GENL_VERSION,
2839 .maxattr = IPVS_CMD_MAX,
c6d2d445 2840 .netnsok = true, /* Make ipvsadm to work on netns */
9a812198
JV
2841};
2842
2843/* Policy used for first-level command attributes */
2844static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2845 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2846 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2847 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2848 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2849 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2850 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2851};
2852
2853/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2854static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2855 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2856 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2857 .len = IP_VS_IFNAME_MAXLEN },
2858 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2859};
2860
2861/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2862static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2863 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2864 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2865 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2866 .len = sizeof(union nf_inet_addr) },
2867 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2868 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2869 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2870 .len = IP_VS_SCHEDNAME_MAXLEN },
0d1e71b0
SH
2871 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2872 .len = IP_VS_PENAME_MAXLEN },
9a812198
JV
2873 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2874 .len = sizeof(struct ip_vs_flags) },
2875 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2876 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2877 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2878};
2879
2880/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2881static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2882 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2883 .len = sizeof(union nf_inet_addr) },
2884 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2885 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2886 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2887 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2888 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2889 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2890 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2891 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2892 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2893};
2894
2895static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2896 struct ip_vs_stats *stats)
2897{
55a3d4e1 2898 struct ip_vs_stats_user ustats;
9a812198
JV
2899 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2900 if (!nl_stats)
2901 return -EMSGSIZE;
2902
55a3d4e1 2903 ip_vs_copy_stats(&ustats, stats);
9a812198 2904
969e8e25
DM
2905 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2906 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2907 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2908 nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2909 nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2910 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2911 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2912 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2913 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2914 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2915 goto nla_put_failure;
9a812198
JV
2916 nla_nest_end(skb, nl_stats);
2917
2918 return 0;
2919
2920nla_put_failure:
9a812198
JV
2921 nla_nest_cancel(skb, nl_stats);
2922 return -EMSGSIZE;
2923}
2924
2925static int ip_vs_genl_fill_service(struct sk_buff *skb,
2926 struct ip_vs_service *svc)
2927{
2928 struct nlattr *nl_service;
2929 struct ip_vs_flags flags = { .flags = svc->flags,
2930 .mask = ~0 };
2931
2932 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2933 if (!nl_service)
2934 return -EMSGSIZE;
2935
969e8e25
DM
2936 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2937 goto nla_put_failure;
9a812198 2938 if (svc->fwmark) {
969e8e25
DM
2939 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2940 goto nla_put_failure;
9a812198 2941 } else {
969e8e25
DM
2942 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2943 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2944 nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2945 goto nla_put_failure;
9a812198
JV
2946 }
2947
969e8e25
DM
2948 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
2949 (svc->pe &&
2950 nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2951 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2952 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2953 nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2954 goto nla_put_failure;
9a812198
JV
2955 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2956 goto nla_put_failure;
2957
2958 nla_nest_end(skb, nl_service);
2959
2960 return 0;
2961
2962nla_put_failure:
2963 nla_nest_cancel(skb, nl_service);
2964 return -EMSGSIZE;
2965}
2966
2967static int ip_vs_genl_dump_service(struct sk_buff *skb,
2968 struct ip_vs_service *svc,
2969 struct netlink_callback *cb)
2970{
2971 void *hdr;
2972
15e47304 2973 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
9a812198
JV
2974 &ip_vs_genl_family, NLM_F_MULTI,
2975 IPVS_CMD_NEW_SERVICE);
2976 if (!hdr)
2977 return -EMSGSIZE;
2978
2979 if (ip_vs_genl_fill_service(skb, svc) < 0)
2980 goto nla_put_failure;
2981
2982 return genlmsg_end(skb, hdr);
2983
2984nla_put_failure:
2985 genlmsg_cancel(skb, hdr);
2986 return -EMSGSIZE;
2987}
2988
2989static int ip_vs_genl_dump_services(struct sk_buff *skb,
2990 struct netlink_callback *cb)
2991{
2992 int idx = 0, i;
2993 int start = cb->args[0];
2994 struct ip_vs_service *svc;
fc723250 2995 struct net *net = skb_sknet(skb);
9a812198
JV
2996
2997 mutex_lock(&__ip_vs_mutex);
2998 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2999 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
fc723250 3000 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
3001 continue;
3002 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3003 idx--;
3004 goto nla_put_failure;
3005 }
3006 }
3007 }
3008
3009 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3010 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
fc723250 3011 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
3012 continue;
3013 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3014 idx--;
3015 goto nla_put_failure;
3016 }
3017 }
3018 }
3019
3020nla_put_failure:
3021 mutex_unlock(&__ip_vs_mutex);
3022 cb->args[0] = idx;
3023
3024 return skb->len;
3025}
3026
fc723250
HS
3027static int ip_vs_genl_parse_service(struct net *net,
3028 struct ip_vs_service_user_kern *usvc,
26c15cfd
JA
3029 struct nlattr *nla, int full_entry,
3030 struct ip_vs_service **ret_svc)
9a812198
JV
3031{
3032 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3033 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
26c15cfd 3034 struct ip_vs_service *svc;
9a812198
JV
3035
3036 /* Parse mandatory identifying service fields first */
3037 if (nla == NULL ||
3038 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3039 return -EINVAL;
3040
3041 nla_af = attrs[IPVS_SVC_ATTR_AF];
3042 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
3043 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
3044 nla_port = attrs[IPVS_SVC_ATTR_PORT];
3045 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
3046
3047 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3048 return -EINVAL;
3049
258c8893
SH
3050 memset(usvc, 0, sizeof(*usvc));
3051
c860c6b1 3052 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
3053#ifdef CONFIG_IP_VS_IPV6
3054 if (usvc->af != AF_INET && usvc->af != AF_INET6)
3055#else
3056 if (usvc->af != AF_INET)
3057#endif
9a812198
JV
3058 return -EAFNOSUPPORT;
3059
3060 if (nla_fwmark) {
3061 usvc->protocol = IPPROTO_TCP;
3062 usvc->fwmark = nla_get_u32(nla_fwmark);
3063 } else {
3064 usvc->protocol = nla_get_u16(nla_protocol);
3065 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3066 usvc->port = nla_get_u16(nla_port);
3067 usvc->fwmark = 0;
3068 }
3069
26c15cfd 3070 if (usvc->fwmark)
fc723250 3071 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
26c15cfd 3072 else
fc723250 3073 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
26c15cfd
JA
3074 &usvc->addr, usvc->port);
3075 *ret_svc = svc;
3076
9a812198
JV
3077 /* If a full entry was requested, check for the additional fields */
3078 if (full_entry) {
0d1e71b0 3079 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
9a812198
JV
3080 *nla_netmask;
3081 struct ip_vs_flags flags;
9a812198
JV
3082
3083 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
0d1e71b0 3084 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
9a812198
JV
3085 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3086 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3087 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3088
3089 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3090 return -EINVAL;
3091
3092 nla_memcpy(&flags, nla_flags, sizeof(flags));
3093
3094 /* prefill flags from service if it already exists */
26c15cfd 3095 if (svc)
9a812198 3096 usvc->flags = svc->flags;
9a812198
JV
3097
3098 /* set new flags from userland */
3099 usvc->flags = (usvc->flags & ~flags.mask) |
3100 (flags.flags & flags.mask);
c860c6b1 3101 usvc->sched_name = nla_data(nla_sched);
0d1e71b0 3102 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
9a812198
JV
3103 usvc->timeout = nla_get_u32(nla_timeout);
3104 usvc->netmask = nla_get_u32(nla_netmask);
3105 }
3106
3107 return 0;
3108}
3109
fc723250
HS
3110static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3111 struct nlattr *nla)
9a812198 3112{
c860c6b1 3113 struct ip_vs_service_user_kern usvc;
26c15cfd 3114 struct ip_vs_service *svc;
9a812198
JV
3115 int ret;
3116
fc723250 3117 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
26c15cfd 3118 return ret ? ERR_PTR(ret) : svc;
9a812198
JV
3119}
3120
3121static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3122{
3123 struct nlattr *nl_dest;
3124
3125 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3126 if (!nl_dest)
3127 return -EMSGSIZE;
3128
969e8e25
DM
3129 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3130 nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3131 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3132 (atomic_read(&dest->conn_flags) &
3133 IP_VS_CONN_F_FWD_MASK)) ||
3134 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3135 atomic_read(&dest->weight)) ||
3136 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3137 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3138 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3139 atomic_read(&dest->activeconns)) ||
3140 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3141 atomic_read(&dest->inactconns)) ||
3142 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3143 atomic_read(&dest->persistconns)))
3144 goto nla_put_failure;
9a812198
JV
3145 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3146 goto nla_put_failure;
3147
3148 nla_nest_end(skb, nl_dest);
3149
3150 return 0;
3151
3152nla_put_failure:
3153 nla_nest_cancel(skb, nl_dest);
3154 return -EMSGSIZE;
3155}
3156
3157static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3158 struct netlink_callback *cb)
3159{
3160 void *hdr;
3161
15e47304 3162 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
9a812198
JV
3163 &ip_vs_genl_family, NLM_F_MULTI,
3164 IPVS_CMD_NEW_DEST);
3165 if (!hdr)
3166 return -EMSGSIZE;
3167
3168 if (ip_vs_genl_fill_dest(skb, dest) < 0)
3169 goto nla_put_failure;
3170
3171 return genlmsg_end(skb, hdr);
3172
3173nla_put_failure:
3174 genlmsg_cancel(skb, hdr);
3175 return -EMSGSIZE;
3176}
3177
3178static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3179 struct netlink_callback *cb)
3180{
3181 int idx = 0;
3182 int start = cb->args[0];
3183 struct ip_vs_service *svc;
3184 struct ip_vs_dest *dest;
3185 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
a0840e2e 3186 struct net *net = skb_sknet(skb);
9a812198
JV
3187
3188 mutex_lock(&__ip_vs_mutex);
3189
3190 /* Try to find the service for which to dump destinations */
3191 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3192 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3193 goto out_err;
3194
a0840e2e 3195
fc723250 3196 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3197 if (IS_ERR(svc) || svc == NULL)
3198 goto out_err;
3199
3200 /* Dump the destinations */
3201 list_for_each_entry(dest, &svc->destinations, n_list) {
3202 if (++idx <= start)
3203 continue;
3204 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3205 idx--;
3206 goto nla_put_failure;
3207 }
3208 }
3209
3210nla_put_failure:
3211 cb->args[0] = idx;
9a812198
JV
3212
3213out_err:
3214 mutex_unlock(&__ip_vs_mutex);
3215
3216 return skb->len;
3217}
3218
c860c6b1 3219static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
3220 struct nlattr *nla, int full_entry)
3221{
3222 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3223 struct nlattr *nla_addr, *nla_port;
3224
3225 /* Parse mandatory identifying destination fields first */
3226 if (nla == NULL ||
3227 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3228 return -EINVAL;
3229
3230 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3231 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3232
3233 if (!(nla_addr && nla_port))
3234 return -EINVAL;
3235
258c8893
SH
3236 memset(udest, 0, sizeof(*udest));
3237
9a812198
JV
3238 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3239 udest->port = nla_get_u16(nla_port);
3240
3241 /* If a full entry was requested, check for the additional fields */
3242 if (full_entry) {
3243 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3244 *nla_l_thresh;
3245
3246 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3247 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3248 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3249 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3250
3251 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3252 return -EINVAL;
3253
3254 udest->conn_flags = nla_get_u32(nla_fwd)
3255 & IP_VS_CONN_F_FWD_MASK;
3256 udest->weight = nla_get_u32(nla_weight);
3257 udest->u_threshold = nla_get_u32(nla_u_thresh);
3258 udest->l_threshold = nla_get_u32(nla_l_thresh);
3259 }
3260
3261 return 0;
3262}
3263
3264static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3265 const char *mcast_ifn, __be32 syncid)
3266{
3267 struct nlattr *nl_daemon;
3268
3269 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3270 if (!nl_daemon)
3271 return -EMSGSIZE;
3272
969e8e25
DM
3273 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3274 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3275 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3276 goto nla_put_failure;
9a812198
JV
3277 nla_nest_end(skb, nl_daemon);
3278
3279 return 0;
3280
3281nla_put_failure:
3282 nla_nest_cancel(skb, nl_daemon);
3283 return -EMSGSIZE;
3284}
3285
3286static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3287 const char *mcast_ifn, __be32 syncid,
3288 struct netlink_callback *cb)
3289{
3290 void *hdr;
15e47304 3291 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
9a812198
JV
3292 &ip_vs_genl_family, NLM_F_MULTI,
3293 IPVS_CMD_NEW_DAEMON);
3294 if (!hdr)
3295 return -EMSGSIZE;
3296
3297 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3298 goto nla_put_failure;
3299
3300 return genlmsg_end(skb, hdr);
3301
3302nla_put_failure:
3303 genlmsg_cancel(skb, hdr);
3304 return -EMSGSIZE;
3305}
3306
3307static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3308 struct netlink_callback *cb)
3309{
a09d1977 3310 struct net *net = skb_sknet(skb);
f131315f
HS
3311 struct netns_ipvs *ipvs = net_ipvs(net);
3312
ae1d48b2 3313 mutex_lock(&ipvs->sync_mutex);
f131315f 3314 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
9a812198 3315 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
f131315f
HS
3316 ipvs->master_mcast_ifn,
3317 ipvs->master_syncid, cb) < 0)
9a812198
JV
3318 goto nla_put_failure;
3319
3320 cb->args[0] = 1;
3321 }
3322
f131315f 3323 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
9a812198 3324 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
f131315f
HS
3325 ipvs->backup_mcast_ifn,
3326 ipvs->backup_syncid, cb) < 0)
9a812198
JV
3327 goto nla_put_failure;
3328
3329 cb->args[1] = 1;
3330 }
3331
3332nla_put_failure:
ae1d48b2 3333 mutex_unlock(&ipvs->sync_mutex);
9a812198
JV
3334
3335 return skb->len;
3336}
3337
f131315f 3338static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3339{
3340 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3341 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3342 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3343 return -EINVAL;
3344
f131315f
HS
3345 return start_sync_thread(net,
3346 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
9a812198
JV
3347 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3348 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3349}
3350
f131315f 3351static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3352{
3353 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3354 return -EINVAL;
3355
f131315f
HS
3356 return stop_sync_thread(net,
3357 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
9a812198
JV
3358}
3359
9330419d 3360static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
9a812198
JV
3361{
3362 struct ip_vs_timeout_user t;
3363
9330419d 3364 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3365
3366 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3367 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3368
3369 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3370 t.tcp_fin_timeout =
3371 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3372
3373 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3374 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3375
9330419d 3376 return ip_vs_set_timeout(net, &t);
9a812198
JV
3377}
3378
ae1d48b2 3379static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
9a812198 3380{
9a812198 3381 int ret = 0, cmd;
fc723250 3382 struct net *net;
a0840e2e 3383 struct netns_ipvs *ipvs;
9a812198 3384
fc723250 3385 net = skb_sknet(skb);
a0840e2e 3386 ipvs = net_ipvs(net);
9a812198
JV
3387 cmd = info->genlhdr->cmd;
3388
ae1d48b2 3389 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
9a812198
JV
3390 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3391
ae1d48b2 3392 mutex_lock(&ipvs->sync_mutex);
9a812198
JV
3393 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3394 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3395 info->attrs[IPVS_CMD_ATTR_DAEMON],
3396 ip_vs_daemon_policy)) {
3397 ret = -EINVAL;
3398 goto out;
3399 }
3400
3401 if (cmd == IPVS_CMD_NEW_DAEMON)
f131315f 3402 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
9a812198 3403 else
f131315f 3404 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
ae1d48b2
HS
3405out:
3406 mutex_unlock(&ipvs->sync_mutex);
3407 }
3408 return ret;
3409}
3410
3411static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3412{
3413 struct ip_vs_service *svc = NULL;
3414 struct ip_vs_service_user_kern usvc;
3415 struct ip_vs_dest_user_kern udest;
3416 int ret = 0, cmd;
3417 int need_full_svc = 0, need_full_dest = 0;
3418 struct net *net;
ae1d48b2
HS
3419
3420 net = skb_sknet(skb);
ae1d48b2
HS
3421 cmd = info->genlhdr->cmd;
3422
3423 mutex_lock(&__ip_vs_mutex);
3424
3425 if (cmd == IPVS_CMD_FLUSH) {
578bc3ef 3426 ret = ip_vs_flush(net, false);
ae1d48b2
HS
3427 goto out;
3428 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3429 ret = ip_vs_genl_set_config(net, info->attrs);
9a812198
JV
3430 goto out;
3431 } else if (cmd == IPVS_CMD_ZERO &&
3432 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
fc723250 3433 ret = ip_vs_zero_all(net);
9a812198
JV
3434 goto out;
3435 }
3436
3437 /* All following commands require a service argument, so check if we
3438 * received a valid one. We need a full service specification when
3439 * adding / editing a service. Only identifying members otherwise. */
3440 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3441 need_full_svc = 1;
3442
fc723250 3443 ret = ip_vs_genl_parse_service(net, &usvc,
9a812198 3444 info->attrs[IPVS_CMD_ATTR_SERVICE],
26c15cfd 3445 need_full_svc, &svc);
9a812198
JV
3446 if (ret)
3447 goto out;
3448
9a812198
JV
3449 /* Unless we're adding a new service, the service must already exist */
3450 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3451 ret = -ESRCH;
3452 goto out;
3453 }
3454
3455 /* Destination commands require a valid destination argument. For
3456 * adding / editing a destination, we need a full destination
3457 * specification. */
3458 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3459 cmd == IPVS_CMD_DEL_DEST) {
3460 if (cmd != IPVS_CMD_DEL_DEST)
3461 need_full_dest = 1;
3462
3463 ret = ip_vs_genl_parse_dest(&udest,
3464 info->attrs[IPVS_CMD_ATTR_DEST],
3465 need_full_dest);
3466 if (ret)
3467 goto out;
3468 }
3469
3470 switch (cmd) {
3471 case IPVS_CMD_NEW_SERVICE:
3472 if (svc == NULL)
fc723250 3473 ret = ip_vs_add_service(net, &usvc, &svc);
9a812198
JV
3474 else
3475 ret = -EEXIST;
3476 break;
3477 case IPVS_CMD_SET_SERVICE:
3478 ret = ip_vs_edit_service(svc, &usvc);
3479 break;
3480 case IPVS_CMD_DEL_SERVICE:
3481 ret = ip_vs_del_service(svc);
26c15cfd 3482 /* do not use svc, it can be freed */
9a812198
JV
3483 break;
3484 case IPVS_CMD_NEW_DEST:
3485 ret = ip_vs_add_dest(svc, &udest);
3486 break;
3487 case IPVS_CMD_SET_DEST:
3488 ret = ip_vs_edit_dest(svc, &udest);
3489 break;
3490 case IPVS_CMD_DEL_DEST:
3491 ret = ip_vs_del_dest(svc, &udest);
3492 break;
3493 case IPVS_CMD_ZERO:
3494 ret = ip_vs_zero_service(svc);
3495 break;
3496 default:
3497 ret = -EINVAL;
3498 }
3499
3500out:
9a812198
JV
3501 mutex_unlock(&__ip_vs_mutex);
3502
3503 return ret;
3504}
3505
3506static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3507{
3508 struct sk_buff *msg;
3509 void *reply;
3510 int ret, cmd, reply_cmd;
fc723250 3511 struct net *net;
9a812198 3512
fc723250 3513 net = skb_sknet(skb);
9a812198
JV
3514 cmd = info->genlhdr->cmd;
3515
3516 if (cmd == IPVS_CMD_GET_SERVICE)
3517 reply_cmd = IPVS_CMD_NEW_SERVICE;
3518 else if (cmd == IPVS_CMD_GET_INFO)
3519 reply_cmd = IPVS_CMD_SET_INFO;
3520 else if (cmd == IPVS_CMD_GET_CONFIG)
3521 reply_cmd = IPVS_CMD_SET_CONFIG;
3522 else {
1e3e238e 3523 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3524 return -EINVAL;
3525 }
3526
3527 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3528 if (!msg)
3529 return -ENOMEM;
3530
3531 mutex_lock(&__ip_vs_mutex);
3532
3533 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3534 if (reply == NULL)
3535 goto nla_put_failure;
3536
3537 switch (cmd) {
3538 case IPVS_CMD_GET_SERVICE:
3539 {
3540 struct ip_vs_service *svc;
3541
fc723250
HS
3542 svc = ip_vs_genl_find_service(net,
3543 info->attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3544 if (IS_ERR(svc)) {
3545 ret = PTR_ERR(svc);
3546 goto out_err;
3547 } else if (svc) {
3548 ret = ip_vs_genl_fill_service(msg, svc);
9a812198
JV
3549 if (ret)
3550 goto nla_put_failure;
3551 } else {
3552 ret = -ESRCH;
3553 goto out_err;
3554 }
3555
3556 break;
3557 }
3558
3559 case IPVS_CMD_GET_CONFIG:
3560 {
3561 struct ip_vs_timeout_user t;
3562
9330419d 3563 __ip_vs_get_timeouts(net, &t);
9a812198 3564#ifdef CONFIG_IP_VS_PROTO_TCP
969e8e25
DM
3565 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3566 t.tcp_timeout) ||
3567 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3568 t.tcp_fin_timeout))
3569 goto nla_put_failure;
9a812198
JV
3570#endif
3571#ifdef CONFIG_IP_VS_PROTO_UDP
969e8e25
DM
3572 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3573 goto nla_put_failure;
9a812198
JV
3574#endif
3575
3576 break;
3577 }
3578
3579 case IPVS_CMD_GET_INFO:
969e8e25
DM
3580 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3581 IP_VS_VERSION_CODE) ||
3582 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3583 ip_vs_conn_tab_size))
3584 goto nla_put_failure;
9a812198
JV
3585 break;
3586 }
3587
3588 genlmsg_end(msg, reply);
134e6375 3589 ret = genlmsg_reply(msg, info);
9a812198
JV
3590 goto out;
3591
3592nla_put_failure:
1e3e238e 3593 pr_err("not enough space in Netlink message\n");
9a812198
JV
3594 ret = -EMSGSIZE;
3595
3596out_err:
3597 nlmsg_free(msg);
3598out:
3599 mutex_unlock(&__ip_vs_mutex);
3600
3601 return ret;
3602}
3603
3604
3605static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3606 {
3607 .cmd = IPVS_CMD_NEW_SERVICE,
3608 .flags = GENL_ADMIN_PERM,
3609 .policy = ip_vs_cmd_policy,
3610 .doit = ip_vs_genl_set_cmd,
3611 },
3612 {
3613 .cmd = IPVS_CMD_SET_SERVICE,
3614 .flags = GENL_ADMIN_PERM,
3615 .policy = ip_vs_cmd_policy,
3616 .doit = ip_vs_genl_set_cmd,
3617 },
3618 {
3619 .cmd = IPVS_CMD_DEL_SERVICE,
3620 .flags = GENL_ADMIN_PERM,
3621 .policy = ip_vs_cmd_policy,
3622 .doit = ip_vs_genl_set_cmd,
3623 },
3624 {
3625 .cmd = IPVS_CMD_GET_SERVICE,
3626 .flags = GENL_ADMIN_PERM,
3627 .doit = ip_vs_genl_get_cmd,
3628 .dumpit = ip_vs_genl_dump_services,
3629 .policy = ip_vs_cmd_policy,
3630 },
3631 {
3632 .cmd = IPVS_CMD_NEW_DEST,
3633 .flags = GENL_ADMIN_PERM,
3634 .policy = ip_vs_cmd_policy,
3635 .doit = ip_vs_genl_set_cmd,
3636 },
3637 {
3638 .cmd = IPVS_CMD_SET_DEST,
3639 .flags = GENL_ADMIN_PERM,
3640 .policy = ip_vs_cmd_policy,
3641 .doit = ip_vs_genl_set_cmd,
3642 },
3643 {
3644 .cmd = IPVS_CMD_DEL_DEST,
3645 .flags = GENL_ADMIN_PERM,
3646 .policy = ip_vs_cmd_policy,
3647 .doit = ip_vs_genl_set_cmd,
3648 },
3649 {
3650 .cmd = IPVS_CMD_GET_DEST,
3651 .flags = GENL_ADMIN_PERM,
3652 .policy = ip_vs_cmd_policy,
3653 .dumpit = ip_vs_genl_dump_dests,
3654 },
3655 {
3656 .cmd = IPVS_CMD_NEW_DAEMON,
3657 .flags = GENL_ADMIN_PERM,
3658 .policy = ip_vs_cmd_policy,
ae1d48b2 3659 .doit = ip_vs_genl_set_daemon,
9a812198
JV
3660 },
3661 {
3662 .cmd = IPVS_CMD_DEL_DAEMON,
3663 .flags = GENL_ADMIN_PERM,
3664 .policy = ip_vs_cmd_policy,
ae1d48b2 3665 .doit = ip_vs_genl_set_daemon,
9a812198
JV
3666 },
3667 {
3668 .cmd = IPVS_CMD_GET_DAEMON,
3669 .flags = GENL_ADMIN_PERM,
3670 .dumpit = ip_vs_genl_dump_daemons,
3671 },
3672 {
3673 .cmd = IPVS_CMD_SET_CONFIG,
3674 .flags = GENL_ADMIN_PERM,
3675 .policy = ip_vs_cmd_policy,
3676 .doit = ip_vs_genl_set_cmd,
3677 },
3678 {
3679 .cmd = IPVS_CMD_GET_CONFIG,
3680 .flags = GENL_ADMIN_PERM,
3681 .doit = ip_vs_genl_get_cmd,
3682 },
3683 {
3684 .cmd = IPVS_CMD_GET_INFO,
3685 .flags = GENL_ADMIN_PERM,
3686 .doit = ip_vs_genl_get_cmd,
3687 },
3688 {
3689 .cmd = IPVS_CMD_ZERO,
3690 .flags = GENL_ADMIN_PERM,
3691 .policy = ip_vs_cmd_policy,
3692 .doit = ip_vs_genl_set_cmd,
3693 },
3694 {
3695 .cmd = IPVS_CMD_FLUSH,
3696 .flags = GENL_ADMIN_PERM,
3697 .doit = ip_vs_genl_set_cmd,
3698 },
3699};
3700
3701static int __init ip_vs_genl_register(void)
3702{
8f698d54
MM
3703 return genl_register_family_with_ops(&ip_vs_genl_family,
3704 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3705}
3706
3707static void ip_vs_genl_unregister(void)
3708{
3709 genl_unregister_family(&ip_vs_genl_family);
3710}
3711
3712/* End of Generic Netlink interface definitions */
3713
61b1ab45
HS
3714/*
3715 * per netns intit/exit func.
3716 */
14e40546 3717#ifdef CONFIG_SYSCTL
2b2d2808 3718static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
61b1ab45 3719{
fc723250
HS
3720 int idx;
3721 struct netns_ipvs *ipvs = net_ipvs(net);
a0840e2e 3722 struct ctl_table *tbl;
fc723250 3723
a0840e2e
HS
3724 atomic_set(&ipvs->dropentry, 0);
3725 spin_lock_init(&ipvs->dropentry_lock);
3726 spin_lock_init(&ipvs->droppacket_lock);
3727 spin_lock_init(&ipvs->securetcp_lock);
a0840e2e
HS
3728
3729 if (!net_eq(net, &init_net)) {
3730 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3731 if (tbl == NULL)
14e40546 3732 return -ENOMEM;
464dc801
EB
3733
3734 /* Don't export sysctls to unprivileged users */
3735 if (net->user_ns != &init_user_ns)
3736 tbl[0].procname = NULL;
a0840e2e
HS
3737 } else
3738 tbl = vs_vars;
3739 /* Initialize sysctl defaults */
3740 idx = 0;
3741 ipvs->sysctl_amemthresh = 1024;
3742 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3743 ipvs->sysctl_am_droprate = 10;
3744 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3745 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3746 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3747#ifdef CONFIG_IP_VS_NFCT
3748 tbl[idx++].data = &ipvs->sysctl_conntrack;
3749#endif
3750 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3751 ipvs->sysctl_snat_reroute = 1;
3752 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3753 ipvs->sysctl_sync_ver = 1;
3754 tbl[idx++].data = &ipvs->sysctl_sync_ver;
f73181c8
PNA
3755 ipvs->sysctl_sync_ports = 1;
3756 tbl[idx++].data = &ipvs->sysctl_sync_ports;
1c003b15
PNA
3757 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3758 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3759 ipvs->sysctl_sync_sock_size = 0;
3760 tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
a0840e2e
HS
3761 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3762 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3763 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
59e0350e
SH
3764 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3765 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
a0840e2e
HS
3766 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3767 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
749c42b6
JA
3768 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3769 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3770 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3771 tbl[idx++].data = &ipvs->sysctl_sync_retries;
a0840e2e 3772 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3654e611
JA
3773 ipvs->sysctl_pmtu_disc = 1;
3774 tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
0c12582f 3775 tbl[idx++].data = &ipvs->sysctl_backup_only;
a0840e2e
HS
3776
3777
ec8f23ce 3778 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
0443929f
SH
3779 if (ipvs->sysctl_hdr == NULL) {
3780 if (!net_eq(net, &init_net))
3781 kfree(tbl);
14e40546 3782 return -ENOMEM;
0443929f 3783 }
6ef757f9 3784 ip_vs_start_estimator(net, &ipvs->tot_stats);
a0840e2e 3785 ipvs->sysctl_tbl = tbl;
f6340ee0
HS
3786 /* Schedule defense work */
3787 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3788 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
61b1ab45 3789
61b1ab45 3790 return 0;
61b1ab45
HS
3791}
3792
2b2d2808 3793static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
61b1ab45 3794{
b17fc996
HS
3795 struct netns_ipvs *ipvs = net_ipvs(net);
3796
f2431e6e
HS
3797 cancel_delayed_work_sync(&ipvs->defense_work);
3798 cancel_work_sync(&ipvs->defense_work.work);
a0840e2e 3799 unregister_net_sysctl_table(ipvs->sysctl_hdr);
14e40546
SH
3800}
3801
3802#else
3803
2b2d2808
CG
3804static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3805static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
14e40546 3806
0443929f 3807#endif
14e40546 3808
7a4f0761
HS
3809static struct notifier_block ip_vs_dst_notifier = {
3810 .notifier_call = ip_vs_dst_event,
3811};
3812
503cf15a 3813int __net_init ip_vs_control_net_init(struct net *net)
14e40546
SH
3814{
3815 int idx;
3816 struct netns_ipvs *ipvs = net_ipvs(net);
3817
14e40546
SH
3818 /* Initialize rs_table */
3819 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
276472ea 3820 INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
14e40546
SH
3821
3822 INIT_LIST_HEAD(&ipvs->dest_trash);
578bc3ef
JA
3823 spin_lock_init(&ipvs->dest_trash_lock);
3824 setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
3825 (unsigned long) net);
14e40546
SH
3826 atomic_set(&ipvs->ftpsvc_counter, 0);
3827 atomic_set(&ipvs->nullsvc_counter, 0);
3828
3829 /* procfs stats */
3830 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
0a9ee813 3831 if (!ipvs->tot_stats.cpustats)
14e40546 3832 return -ENOMEM;
0a9ee813 3833
14e40546
SH
3834 spin_lock_init(&ipvs->tot_stats.lock);
3835
d4beaa66
G
3836 proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops);
3837 proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops);
3838 proc_create("ip_vs_stats_percpu", 0, net->proc_net,
3839 &ip_vs_stats_percpu_fops);
14e40546 3840
503cf15a 3841 if (ip_vs_control_net_init_sysctl(net))
14e40546
SH
3842 goto err;
3843
3844 return 0;
3845
3846err:
2a0751af 3847 free_percpu(ipvs->tot_stats.cpustats);
61b1ab45
HS
3848 return -ENOMEM;
3849}
3850
503cf15a 3851void __net_exit ip_vs_control_net_cleanup(struct net *net)
61b1ab45 3852{
b17fc996
HS
3853 struct netns_ipvs *ipvs = net_ipvs(net);
3854
578bc3ef
JA
3855 /* Some dest can be in grace period even before cleanup, we have to
3856 * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called.
3857 */
3858 rcu_barrier();
f2431e6e 3859 ip_vs_trash_cleanup(net);
6ef757f9 3860 ip_vs_stop_estimator(net, &ipvs->tot_stats);
503cf15a 3861 ip_vs_control_net_cleanup_sysctl(net);
ece31ffd
G
3862 remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
3863 remove_proc_entry("ip_vs_stats", net->proc_net);
3864 remove_proc_entry("ip_vs", net->proc_net);
2a0751af 3865 free_percpu(ipvs->tot_stats.cpustats);
61b1ab45
HS
3866}
3867
8537de8a 3868int __init ip_vs_register_nl_ioctl(void)
1da177e4 3869{
fc723250 3870 int ret;
1da177e4 3871
1da177e4
LT
3872 ret = nf_register_sockopt(&ip_vs_sockopts);
3873 if (ret) {
1e3e238e 3874 pr_err("cannot register sockopt.\n");
7a4f0761 3875 goto err_sock;
1da177e4
LT
3876 }
3877
9a812198
JV
3878 ret = ip_vs_genl_register();
3879 if (ret) {
1e3e238e 3880 pr_err("cannot register Generic Netlink interface.\n");
7a4f0761 3881 goto err_genl;
9a812198 3882 }
1da177e4 3883 return 0;
fc723250 3884
7a4f0761
HS
3885err_genl:
3886 nf_unregister_sockopt(&ip_vs_sockopts);
3887err_sock:
fc723250 3888 return ret;
1da177e4
LT
3889}
3890
8537de8a
HS
3891void ip_vs_unregister_nl_ioctl(void)
3892{
3893 ip_vs_genl_unregister();
3894 nf_unregister_sockopt(&ip_vs_sockopts);
3895}
3896
3897int __init ip_vs_control_init(void)
3898{
3899 int idx;
3900 int ret;
3901
3902 EnterFunction(2);
3903
276472ea 3904 /* Initialize svc_table, ip_vs_svc_fwm_table */
8537de8a
HS
3905 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3906 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3907 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3908 }
3909
3910 smp_wmb(); /* Do we really need it now ? */
3911
3912 ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3913 if (ret < 0)
3914 return ret;
3915
3916 LeaveFunction(2);
3917 return 0;
3918}
3919
1da177e4
LT
3920
3921void ip_vs_control_cleanup(void)
3922{
3923 EnterFunction(2);
7676e345 3924 unregister_netdevice_notifier(&ip_vs_dst_notifier);
1da177e4
LT
3925 LeaveFunction(2);
3926}
This page took 1.029899 seconds and 5 git commands to generate.