IPVS: netns, trash handling
[deliverable/linux.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
9330419d 41#include <linux/nsproxy.h>
1da177e4 42#include <net/ip.h>
09571c7a
VB
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
14c85021 47#include <net/route.h>
1da177e4 48#include <net/sock.h>
9a812198 49#include <net/genetlink.h>
1da177e4
LT
50
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 56static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
1da177e4 61/* sysctl variables */
1da177e4
LT
62
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
09571c7a
VB
72#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
75{
76 struct rt6_info *rt;
77 struct flowi fl = {
78 .oif = 0,
5811662b
CG
79 .fl6_dst = *addr,
80 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
09571c7a
VB
81 };
82
83 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
84 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
85 return 1;
86
87 return 0;
88}
89#endif
1da177e4 90/*
af9debd4
JA
91 * update_defense_level is called from keventd and from sysctl,
92 * so it needs to protect itself from softirqs
1da177e4 93 */
9330419d 94static void update_defense_level(struct netns_ipvs *ipvs)
1da177e4
LT
95{
96 struct sysinfo i;
97 static int old_secure_tcp = 0;
98 int availmem;
99 int nomem;
100 int to_change = -1;
101
102 /* we only count free and buffered memory (in pages) */
103 si_meminfo(&i);
104 availmem = i.freeram + i.bufferram;
105 /* however in linux 2.5 the i.bufferram is total page cache size,
106 we need adjust it */
107 /* si_swapinfo(&i); */
108 /* availmem = availmem - (i.totalswap - i.freeswap); */
109
a0840e2e 110 nomem = (availmem < ipvs->sysctl_amemthresh);
1da177e4 111
af9debd4
JA
112 local_bh_disable();
113
1da177e4 114 /* drop_entry */
a0840e2e
HS
115 spin_lock(&ipvs->dropentry_lock);
116 switch (ipvs->sysctl_drop_entry) {
1da177e4 117 case 0:
a0840e2e 118 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
119 break;
120 case 1:
121 if (nomem) {
a0840e2e
HS
122 atomic_set(&ipvs->dropentry, 1);
123 ipvs->sysctl_drop_entry = 2;
1da177e4 124 } else {
a0840e2e 125 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
126 }
127 break;
128 case 2:
129 if (nomem) {
a0840e2e 130 atomic_set(&ipvs->dropentry, 1);
1da177e4 131 } else {
a0840e2e
HS
132 atomic_set(&ipvs->dropentry, 0);
133 ipvs->sysctl_drop_entry = 1;
1da177e4
LT
134 };
135 break;
136 case 3:
a0840e2e 137 atomic_set(&ipvs->dropentry, 1);
1da177e4
LT
138 break;
139 }
a0840e2e 140 spin_unlock(&ipvs->dropentry_lock);
1da177e4
LT
141
142 /* drop_packet */
a0840e2e
HS
143 spin_lock(&ipvs->droppacket_lock);
144 switch (ipvs->sysctl_drop_packet) {
1da177e4 145 case 0:
a0840e2e 146 ipvs->drop_rate = 0;
1da177e4
LT
147 break;
148 case 1:
149 if (nomem) {
a0840e2e
HS
150 ipvs->drop_rate = ipvs->drop_counter
151 = ipvs->sysctl_amemthresh /
152 (ipvs->sysctl_amemthresh-availmem);
153 ipvs->sysctl_drop_packet = 2;
1da177e4 154 } else {
a0840e2e 155 ipvs->drop_rate = 0;
1da177e4
LT
156 }
157 break;
158 case 2:
159 if (nomem) {
a0840e2e
HS
160 ipvs->drop_rate = ipvs->drop_counter
161 = ipvs->sysctl_amemthresh /
162 (ipvs->sysctl_amemthresh-availmem);
1da177e4 163 } else {
a0840e2e
HS
164 ipvs->drop_rate = 0;
165 ipvs->sysctl_drop_packet = 1;
1da177e4
LT
166 }
167 break;
168 case 3:
a0840e2e 169 ipvs->drop_rate = ipvs->sysctl_am_droprate;
1da177e4
LT
170 break;
171 }
a0840e2e 172 spin_unlock(&ipvs->droppacket_lock);
1da177e4
LT
173
174 /* secure_tcp */
a0840e2e
HS
175 spin_lock(&ipvs->securetcp_lock);
176 switch (ipvs->sysctl_secure_tcp) {
1da177e4
LT
177 case 0:
178 if (old_secure_tcp >= 2)
179 to_change = 0;
180 break;
181 case 1:
182 if (nomem) {
183 if (old_secure_tcp < 2)
184 to_change = 1;
a0840e2e 185 ipvs->sysctl_secure_tcp = 2;
1da177e4
LT
186 } else {
187 if (old_secure_tcp >= 2)
188 to_change = 0;
189 }
190 break;
191 case 2:
192 if (nomem) {
193 if (old_secure_tcp < 2)
194 to_change = 1;
195 } else {
196 if (old_secure_tcp >= 2)
197 to_change = 0;
a0840e2e 198 ipvs->sysctl_secure_tcp = 1;
1da177e4
LT
199 }
200 break;
201 case 3:
202 if (old_secure_tcp < 2)
203 to_change = 1;
204 break;
205 }
a0840e2e 206 old_secure_tcp = ipvs->sysctl_secure_tcp;
1da177e4 207 if (to_change >= 0)
9330419d 208 ip_vs_protocol_timeout_change(ipvs,
a0840e2e
HS
209 ipvs->sysctl_secure_tcp > 1);
210 spin_unlock(&ipvs->securetcp_lock);
af9debd4
JA
211
212 local_bh_enable();
1da177e4
LT
213}
214
215
216/*
217 * Timer for checking the defense
218 */
219#define DEFENSE_TIMER_PERIOD 1*HZ
1da177e4 220
c4028958 221static void defense_work_handler(struct work_struct *work)
1da177e4 222{
f6340ee0
HS
223 struct netns_ipvs *ipvs =
224 container_of(work, struct netns_ipvs, defense_work.work);
9330419d
HS
225
226 update_defense_level(ipvs);
a0840e2e 227 if (atomic_read(&ipvs->dropentry))
f6340ee0
HS
228 ip_vs_random_dropentry(ipvs->net);
229 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
1da177e4
LT
230}
231
232int
233ip_vs_use_count_inc(void)
234{
235 return try_module_get(THIS_MODULE);
236}
237
238void
239ip_vs_use_count_dec(void)
240{
241 module_put(THIS_MODULE);
242}
243
244
245/*
246 * Hash table: for virtual service lookups
247 */
248#define IP_VS_SVC_TAB_BITS 8
249#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
250#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
251
252/* the service table hashed by <protocol, addr, port> */
253static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
254/* the service table hashed by fwmark */
255static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
256
1da177e4
LT
257/*
258 * FTP & NULL virtual service counters
259 */
260static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
261static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
262
263
264/*
265 * Returns hash value for virtual service
266 */
fc723250
HS
267static inline unsigned
268ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
269 const union nf_inet_addr *addr, __be16 port)
1da177e4
LT
270{
271 register unsigned porth = ntohs(port);
b18610de 272 __be32 addr_fold = addr->ip;
1da177e4 273
b18610de
JV
274#ifdef CONFIG_IP_VS_IPV6
275 if (af == AF_INET6)
276 addr_fold = addr->ip6[0]^addr->ip6[1]^
277 addr->ip6[2]^addr->ip6[3];
278#endif
fc723250 279 addr_fold ^= ((size_t)net>>8);
b18610de
JV
280
281 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
282 & IP_VS_SVC_TAB_MASK;
283}
284
285/*
286 * Returns hash value of fwmark for virtual service lookup
287 */
fc723250 288static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
1da177e4 289{
fc723250 290 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
1da177e4
LT
291}
292
293/*
fc723250 294 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
1da177e4
LT
295 * or in the ip_vs_svc_fwm_table by fwmark.
296 * Should be called with locked tables.
297 */
298static int ip_vs_svc_hash(struct ip_vs_service *svc)
299{
300 unsigned hash;
301
302 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
303 pr_err("%s(): request for already hashed, called from %pF\n",
304 __func__, __builtin_return_address(0));
1da177e4
LT
305 return 0;
306 }
307
308 if (svc->fwmark == 0) {
309 /*
fc723250 310 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
1da177e4 311 */
fc723250
HS
312 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
313 &svc->addr, svc->port);
1da177e4
LT
314 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
315 } else {
316 /*
fc723250 317 * Hash it by fwmark in svc_fwm_table
1da177e4 318 */
fc723250 319 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
1da177e4
LT
320 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
321 }
322
323 svc->flags |= IP_VS_SVC_F_HASHED;
324 /* increase its refcnt because it is referenced by the svc table */
325 atomic_inc(&svc->refcnt);
326 return 1;
327}
328
329
330/*
fc723250 331 * Unhashes a service from svc_table / svc_fwm_table.
1da177e4
LT
332 * Should be called with locked tables.
333 */
334static int ip_vs_svc_unhash(struct ip_vs_service *svc)
335{
336 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
337 pr_err("%s(): request for unhash flagged, called from %pF\n",
338 __func__, __builtin_return_address(0));
1da177e4
LT
339 return 0;
340 }
341
342 if (svc->fwmark == 0) {
fc723250 343 /* Remove it from the svc_table table */
1da177e4
LT
344 list_del(&svc->s_list);
345 } else {
fc723250 346 /* Remove it from the svc_fwm_table table */
1da177e4
LT
347 list_del(&svc->f_list);
348 }
349
350 svc->flags &= ~IP_VS_SVC_F_HASHED;
351 atomic_dec(&svc->refcnt);
352 return 1;
353}
354
355
356/*
fc723250 357 * Get service by {netns, proto,addr,port} in the service table.
1da177e4 358 */
b18610de 359static inline struct ip_vs_service *
fc723250
HS
360__ip_vs_service_find(struct net *net, int af, __u16 protocol,
361 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
362{
363 unsigned hash;
364 struct ip_vs_service *svc;
365
366 /* Check for "full" addressed entries */
fc723250 367 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
1da177e4
LT
368
369 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
370 if ((svc->af == af)
371 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4 372 && (svc->port == vport)
fc723250
HS
373 && (svc->protocol == protocol)
374 && net_eq(svc->net, net)) {
1da177e4 375 /* HIT */
1da177e4
LT
376 return svc;
377 }
378 }
379
380 return NULL;
381}
382
383
384/*
385 * Get service by {fwmark} in the service table.
386 */
b18610de 387static inline struct ip_vs_service *
fc723250 388__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
1da177e4
LT
389{
390 unsigned hash;
391 struct ip_vs_service *svc;
392
393 /* Check for fwmark addressed entries */
fc723250 394 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
1da177e4
LT
395
396 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
fc723250
HS
397 if (svc->fwmark == fwmark && svc->af == af
398 && net_eq(svc->net, net)) {
1da177e4 399 /* HIT */
1da177e4
LT
400 return svc;
401 }
402 }
403
404 return NULL;
405}
406
407struct ip_vs_service *
fc723250 408ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
3c2e0505 409 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
410{
411 struct ip_vs_service *svc;
3c2e0505 412
1da177e4
LT
413 read_lock(&__ip_vs_svc_lock);
414
415 /*
416 * Check the table hashed by fwmark first
417 */
fc723250
HS
418 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
419 if (fwmark && svc)
1da177e4
LT
420 goto out;
421
422 /*
423 * Check the table hashed by <protocol,addr,port>
424 * for "full" addressed entries
425 */
fc723250 426 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
1da177e4
LT
427
428 if (svc == NULL
429 && protocol == IPPROTO_TCP
430 && atomic_read(&ip_vs_ftpsvc_counter)
431 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
432 /*
433 * Check if ftp service entry exists, the packet
434 * might belong to FTP data connections.
435 */
fc723250 436 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
1da177e4
LT
437 }
438
439 if (svc == NULL
440 && atomic_read(&ip_vs_nullsvc_counter)) {
441 /*
442 * Check if the catch-all port (port zero) exists
443 */
fc723250 444 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
1da177e4
LT
445 }
446
447 out:
26c15cfd
JA
448 if (svc)
449 atomic_inc(&svc->usecnt);
1da177e4
LT
450 read_unlock(&__ip_vs_svc_lock);
451
3c2e0505
JV
452 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
453 fwmark, ip_vs_proto_name(protocol),
454 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
455 svc ? "hit" : "not hit");
1da177e4
LT
456
457 return svc;
458}
459
460
461static inline void
462__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
463{
464 atomic_inc(&svc->refcnt);
465 dest->svc = svc;
466}
467
26c15cfd 468static void
1da177e4
LT
469__ip_vs_unbind_svc(struct ip_vs_dest *dest)
470{
471 struct ip_vs_service *svc = dest->svc;
472
473 dest->svc = NULL;
26c15cfd
JA
474 if (atomic_dec_and_test(&svc->refcnt)) {
475 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
476 svc->fwmark,
477 IP_VS_DBG_ADDR(svc->af, &svc->addr),
478 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 479 free_percpu(svc->stats.cpustats);
1da177e4 480 kfree(svc);
26c15cfd 481 }
1da177e4
LT
482}
483
484
485/*
486 * Returns hash value for real service
487 */
7937df15
JV
488static inline unsigned ip_vs_rs_hashkey(int af,
489 const union nf_inet_addr *addr,
490 __be16 port)
1da177e4
LT
491{
492 register unsigned porth = ntohs(port);
7937df15
JV
493 __be32 addr_fold = addr->ip;
494
495#ifdef CONFIG_IP_VS_IPV6
496 if (af == AF_INET6)
497 addr_fold = addr->ip6[0]^addr->ip6[1]^
498 addr->ip6[2]^addr->ip6[3];
499#endif
1da177e4 500
7937df15 501 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
502 & IP_VS_RTAB_MASK;
503}
504
505/*
fc723250 506 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
1da177e4
LT
507 * should be called with locked tables.
508 */
fc723250 509static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
1da177e4
LT
510{
511 unsigned hash;
512
513 if (!list_empty(&dest->d_list)) {
514 return 0;
515 }
516
517 /*
518 * Hash by proto,addr,port,
519 * which are the parameters of the real service.
520 */
7937df15
JV
521 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
522
fc723250 523 list_add(&dest->d_list, &ipvs->rs_table[hash]);
1da177e4
LT
524
525 return 1;
526}
527
528/*
fc723250 529 * UNhashes ip_vs_dest from rs_table.
1da177e4
LT
530 * should be called with locked tables.
531 */
532static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
533{
534 /*
fc723250 535 * Remove it from the rs_table table.
1da177e4
LT
536 */
537 if (!list_empty(&dest->d_list)) {
538 list_del(&dest->d_list);
539 INIT_LIST_HEAD(&dest->d_list);
540 }
541
542 return 1;
543}
544
545/*
546 * Lookup real service by <proto,addr,port> in the real service table.
547 */
548struct ip_vs_dest *
fc723250 549ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
7937df15
JV
550 const union nf_inet_addr *daddr,
551 __be16 dport)
1da177e4 552{
fc723250 553 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
554 unsigned hash;
555 struct ip_vs_dest *dest;
556
557 /*
558 * Check for "full" addressed entries
559 * Return the first found entry
560 */
7937df15 561 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4 562
a0840e2e 563 read_lock(&ipvs->rs_lock);
fc723250 564 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
7937df15
JV
565 if ((dest->af == af)
566 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
567 && (dest->port == dport)
568 && ((dest->protocol == protocol) ||
569 dest->vfwmark)) {
570 /* HIT */
a0840e2e 571 read_unlock(&ipvs->rs_lock);
1da177e4
LT
572 return dest;
573 }
574 }
a0840e2e 575 read_unlock(&ipvs->rs_lock);
1da177e4
LT
576
577 return NULL;
578}
579
580/*
581 * Lookup destination by {addr,port} in the given service
582 */
583static struct ip_vs_dest *
7937df15
JV
584ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
585 __be16 dport)
1da177e4
LT
586{
587 struct ip_vs_dest *dest;
588
589 /*
590 * Find the destination for the given service
591 */
592 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
593 if ((dest->af == svc->af)
594 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
595 && (dest->port == dport)) {
1da177e4
LT
596 /* HIT */
597 return dest;
598 }
599 }
600
601 return NULL;
602}
603
1e356f9c
RB
604/*
605 * Find destination by {daddr,dport,vaddr,protocol}
606 * Cretaed to be used in ip_vs_process_message() in
607 * the backup synchronization daemon. It finds the
608 * destination to be bound to the received connection
609 * on the backup.
610 *
611 * ip_vs_lookup_real_service() looked promissing, but
612 * seems not working as expected.
613 */
fc723250
HS
614struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
615 const union nf_inet_addr *daddr,
7937df15
JV
616 __be16 dport,
617 const union nf_inet_addr *vaddr,
0e051e68 618 __be16 vport, __u16 protocol, __u32 fwmark)
1e356f9c
RB
619{
620 struct ip_vs_dest *dest;
621 struct ip_vs_service *svc;
622
fc723250 623 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
1e356f9c
RB
624 if (!svc)
625 return NULL;
626 dest = ip_vs_lookup_dest(svc, daddr, dport);
627 if (dest)
628 atomic_inc(&dest->refcnt);
629 ip_vs_service_put(svc);
630 return dest;
631}
1da177e4
LT
632
633/*
634 * Lookup dest by {svc,addr,port} in the destination trash.
635 * The destination trash is used to hold the destinations that are removed
636 * from the service table but are still referenced by some conn entries.
637 * The reason to add the destination trash is when the dest is temporary
638 * down (either by administrator or by monitor program), the dest can be
639 * picked back from the trash, the remaining connections to the dest can
640 * continue, and the counting information of the dest is also useful for
641 * scheduling.
642 */
643static struct ip_vs_dest *
7937df15
JV
644ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
645 __be16 dport)
1da177e4
LT
646{
647 struct ip_vs_dest *dest, *nxt;
f2431e6e 648 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
649
650 /*
651 * Find the destination in trash
652 */
f2431e6e 653 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
7937df15
JV
654 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
655 "dest->refcnt=%d\n",
656 dest->vfwmark,
657 IP_VS_DBG_ADDR(svc->af, &dest->addr),
658 ntohs(dest->port),
659 atomic_read(&dest->refcnt));
660 if (dest->af == svc->af &&
661 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
662 dest->port == dport &&
663 dest->vfwmark == svc->fwmark &&
664 dest->protocol == svc->protocol &&
665 (svc->fwmark ||
7937df15 666 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
667 dest->vport == svc->port))) {
668 /* HIT */
669 return dest;
670 }
671
672 /*
673 * Try to purge the destination from trash if not referenced
674 */
675 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
676 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
677 "from trash\n",
678 dest->vfwmark,
679 IP_VS_DBG_ADDR(svc->af, &dest->addr),
680 ntohs(dest->port));
1da177e4
LT
681 list_del(&dest->n_list);
682 ip_vs_dst_reset(dest);
683 __ip_vs_unbind_svc(dest);
b17fc996 684 free_percpu(dest->stats.cpustats);
1da177e4
LT
685 kfree(dest);
686 }
687 }
688
689 return NULL;
690}
691
692
693/*
694 * Clean up all the destinations in the trash
695 * Called by the ip_vs_control_cleanup()
696 *
697 * When the ip_vs_control_clearup is activated by ipvs module exit,
698 * the service tables must have been flushed and all the connections
699 * are expired, and the refcnt of each destination in the trash must
700 * be 1, so we simply release them here.
701 */
f2431e6e 702static void ip_vs_trash_cleanup(struct net *net)
1da177e4
LT
703{
704 struct ip_vs_dest *dest, *nxt;
f2431e6e 705 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 706
f2431e6e 707 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
1da177e4
LT
708 list_del(&dest->n_list);
709 ip_vs_dst_reset(dest);
710 __ip_vs_unbind_svc(dest);
b17fc996 711 free_percpu(dest->stats.cpustats);
1da177e4
LT
712 kfree(dest);
713 }
714}
715
716
717static void
718ip_vs_zero_stats(struct ip_vs_stats *stats)
719{
720 spin_lock_bh(&stats->lock);
e93615d0 721
e9c0ce23 722 memset(&stats->ustats, 0, sizeof(stats->ustats));
1da177e4 723 ip_vs_zero_estimator(stats);
e93615d0 724
3a14a313 725 spin_unlock_bh(&stats->lock);
1da177e4
LT
726}
727
728/*
729 * Update a destination in the given service
730 */
731static void
26c15cfd
JA
732__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
733 struct ip_vs_dest_user_kern *udest, int add)
1da177e4 734{
fc723250 735 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
736 int conn_flags;
737
738 /* set the weight and the flags */
739 atomic_set(&dest->weight, udest->weight);
3575792e
JA
740 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
741 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4 742
1da177e4 743 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 744 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
745 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
746 } else {
747 /*
fc723250 748 * Put the real service in rs_table if not present.
1da177e4
LT
749 * For now only for NAT!
750 */
a0840e2e 751 write_lock_bh(&ipvs->rs_lock);
fc723250 752 ip_vs_rs_hash(ipvs, dest);
a0840e2e 753 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
754 }
755 atomic_set(&dest->conn_flags, conn_flags);
756
757 /* bind the service */
758 if (!dest->svc) {
759 __ip_vs_bind_svc(dest, svc);
760 } else {
761 if (dest->svc != svc) {
762 __ip_vs_unbind_svc(dest);
763 ip_vs_zero_stats(&dest->stats);
764 __ip_vs_bind_svc(dest, svc);
765 }
766 }
767
768 /* set the dest status flags */
769 dest->flags |= IP_VS_DEST_F_AVAILABLE;
770
771 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
772 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
773 dest->u_threshold = udest->u_threshold;
774 dest->l_threshold = udest->l_threshold;
26c15cfd 775
fc604767
JA
776 spin_lock(&dest->dst_lock);
777 ip_vs_dst_reset(dest);
778 spin_unlock(&dest->dst_lock);
779
26c15cfd 780 if (add)
29c2026f 781 ip_vs_new_estimator(svc->net, &dest->stats);
26c15cfd
JA
782
783 write_lock_bh(&__ip_vs_svc_lock);
784
785 /* Wait until all other svc users go away */
786 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
787
788 if (add) {
789 list_add(&dest->n_list, &svc->destinations);
790 svc->num_dests++;
791 }
792
793 /* call the update_service, because server weight may be changed */
794 if (svc->scheduler->update_service)
795 svc->scheduler->update_service(svc);
796
797 write_unlock_bh(&__ip_vs_svc_lock);
1da177e4
LT
798}
799
800
801/*
802 * Create a destination for the given service
803 */
804static int
c860c6b1 805ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
806 struct ip_vs_dest **dest_p)
807{
808 struct ip_vs_dest *dest;
809 unsigned atype;
810
811 EnterFunction(2);
812
09571c7a
VB
813#ifdef CONFIG_IP_VS_IPV6
814 if (svc->af == AF_INET6) {
815 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
816 if ((!(atype & IPV6_ADDR_UNICAST) ||
817 atype & IPV6_ADDR_LINKLOCAL) &&
09571c7a
VB
818 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
819 return -EINVAL;
820 } else
821#endif
822 {
823 atype = inet_addr_type(&init_net, udest->addr.ip);
824 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
825 return -EINVAL;
826 }
1da177e4 827
dee06e47 828 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
1da177e4 829 if (dest == NULL) {
1e3e238e 830 pr_err("%s(): no memory.\n", __func__);
1da177e4
LT
831 return -ENOMEM;
832 }
b17fc996
HS
833 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
834 if (!dest->stats.cpustats) {
835 pr_err("%s() alloc_percpu failed\n", __func__);
836 goto err_alloc;
837 }
1da177e4 838
c860c6b1 839 dest->af = svc->af;
1da177e4 840 dest->protocol = svc->protocol;
c860c6b1 841 dest->vaddr = svc->addr;
1da177e4
LT
842 dest->vport = svc->port;
843 dest->vfwmark = svc->fwmark;
c860c6b1 844 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
845 dest->port = udest->port;
846
847 atomic_set(&dest->activeconns, 0);
848 atomic_set(&dest->inactconns, 0);
849 atomic_set(&dest->persistconns, 0);
26c15cfd 850 atomic_set(&dest->refcnt, 1);
1da177e4
LT
851
852 INIT_LIST_HEAD(&dest->d_list);
853 spin_lock_init(&dest->dst_lock);
854 spin_lock_init(&dest->stats.lock);
26c15cfd 855 __ip_vs_update_dest(svc, dest, udest, 1);
1da177e4
LT
856
857 *dest_p = dest;
858
859 LeaveFunction(2);
860 return 0;
b17fc996
HS
861
862err_alloc:
863 kfree(dest);
864 return -ENOMEM;
1da177e4
LT
865}
866
867
868/*
869 * Add a destination into an existing service
870 */
871static int
c860c6b1 872ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
873{
874 struct ip_vs_dest *dest;
c860c6b1 875 union nf_inet_addr daddr;
014d730d 876 __be16 dport = udest->port;
1da177e4
LT
877 int ret;
878
879 EnterFunction(2);
880
881 if (udest->weight < 0) {
1e3e238e 882 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
883 return -ERANGE;
884 }
885
886 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
887 pr_err("%s(): lower threshold is higher than upper threshold\n",
888 __func__);
1da177e4
LT
889 return -ERANGE;
890 }
891
c860c6b1
JV
892 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
893
1da177e4
LT
894 /*
895 * Check if the dest already exists in the list
896 */
7937df15
JV
897 dest = ip_vs_lookup_dest(svc, &daddr, dport);
898
1da177e4 899 if (dest != NULL) {
1e3e238e 900 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
901 return -EEXIST;
902 }
903
904 /*
905 * Check if the dest already exists in the trash and
906 * is from the same service
907 */
7937df15
JV
908 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
909
1da177e4 910 if (dest != NULL) {
cfc78c5a
JV
911 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
912 "dest->refcnt=%d, service %u/%s:%u\n",
913 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
914 atomic_read(&dest->refcnt),
915 dest->vfwmark,
916 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
917 ntohs(dest->vport));
918
1da177e4
LT
919 /*
920 * Get the destination from the trash
921 */
922 list_del(&dest->n_list);
923
26c15cfd
JA
924 __ip_vs_update_dest(svc, dest, udest, 1);
925 ret = 0;
926 } else {
1da177e4 927 /*
26c15cfd 928 * Allocate and initialize the dest structure
1da177e4 929 */
26c15cfd 930 ret = ip_vs_new_dest(svc, udest, &dest);
1da177e4 931 }
1da177e4
LT
932 LeaveFunction(2);
933
26c15cfd 934 return ret;
1da177e4
LT
935}
936
937
938/*
939 * Edit a destination in the given service
940 */
941static int
c860c6b1 942ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
943{
944 struct ip_vs_dest *dest;
c860c6b1 945 union nf_inet_addr daddr;
014d730d 946 __be16 dport = udest->port;
1da177e4
LT
947
948 EnterFunction(2);
949
950 if (udest->weight < 0) {
1e3e238e 951 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
952 return -ERANGE;
953 }
954
955 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
956 pr_err("%s(): lower threshold is higher than upper threshold\n",
957 __func__);
1da177e4
LT
958 return -ERANGE;
959 }
960
c860c6b1
JV
961 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
962
1da177e4
LT
963 /*
964 * Lookup the destination list
965 */
7937df15
JV
966 dest = ip_vs_lookup_dest(svc, &daddr, dport);
967
1da177e4 968 if (dest == NULL) {
1e3e238e 969 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
970 return -ENOENT;
971 }
972
26c15cfd 973 __ip_vs_update_dest(svc, dest, udest, 0);
1da177e4
LT
974 LeaveFunction(2);
975
976 return 0;
977}
978
979
980/*
981 * Delete a destination (must be already unlinked from the service)
982 */
29c2026f 983static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1da177e4 984{
a0840e2e
HS
985 struct netns_ipvs *ipvs = net_ipvs(net);
986
29c2026f 987 ip_vs_kill_estimator(net, &dest->stats);
1da177e4
LT
988
989 /*
990 * Remove it from the d-linked list with the real services.
991 */
a0840e2e 992 write_lock_bh(&ipvs->rs_lock);
1da177e4 993 ip_vs_rs_unhash(dest);
a0840e2e 994 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
995
996 /*
997 * Decrease the refcnt of the dest, and free the dest
998 * if nobody refers to it (refcnt=0). Otherwise, throw
999 * the destination into the trash.
1000 */
1001 if (atomic_dec_and_test(&dest->refcnt)) {
26c15cfd
JA
1002 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1003 dest->vfwmark,
1004 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1005 ntohs(dest->port));
1da177e4
LT
1006 ip_vs_dst_reset(dest);
1007 /* simply decrease svc->refcnt here, let the caller check
1008 and release the service if nobody refers to it.
1009 Only user context can release destination and service,
1010 and only one user context can update virtual service at a
1011 time, so the operation here is OK */
1012 atomic_dec(&dest->svc->refcnt);
b17fc996 1013 free_percpu(dest->stats.cpustats);
1da177e4
LT
1014 kfree(dest);
1015 } else {
cfc78c5a
JV
1016 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1017 "dest->refcnt=%d\n",
1018 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1019 ntohs(dest->port),
1020 atomic_read(&dest->refcnt));
f2431e6e 1021 list_add(&dest->n_list, &ipvs->dest_trash);
1da177e4
LT
1022 atomic_inc(&dest->refcnt);
1023 }
1024}
1025
1026
1027/*
1028 * Unlink a destination from the given service
1029 */
1030static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1031 struct ip_vs_dest *dest,
1032 int svcupd)
1033{
1034 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1035
1036 /*
1037 * Remove it from the d-linked destination list.
1038 */
1039 list_del(&dest->n_list);
1040 svc->num_dests--;
82dfb6f3
SW
1041
1042 /*
1043 * Call the update_service function of its scheduler
1044 */
1045 if (svcupd && svc->scheduler->update_service)
1046 svc->scheduler->update_service(svc);
1da177e4
LT
1047}
1048
1049
1050/*
1051 * Delete a destination server in the given service
1052 */
1053static int
c860c6b1 1054ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1055{
1056 struct ip_vs_dest *dest;
014d730d 1057 __be16 dport = udest->port;
1da177e4
LT
1058
1059 EnterFunction(2);
1060
7937df15 1061 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1062
1da177e4 1063 if (dest == NULL) {
1e3e238e 1064 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1065 return -ENOENT;
1066 }
1067
1068 write_lock_bh(&__ip_vs_svc_lock);
1069
1070 /*
1071 * Wait until all other svc users go away.
1072 */
26c15cfd 1073 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1074
1075 /*
1076 * Unlink dest from the service
1077 */
1078 __ip_vs_unlink_dest(svc, dest, 1);
1079
1080 write_unlock_bh(&__ip_vs_svc_lock);
1081
1082 /*
1083 * Delete the destination
1084 */
a0840e2e 1085 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1086
1087 LeaveFunction(2);
1088
1089 return 0;
1090}
1091
1092
1093/*
1094 * Add a service into the service hash table
1095 */
1096static int
fc723250 1097ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
c860c6b1 1098 struct ip_vs_service **svc_p)
1da177e4
LT
1099{
1100 int ret = 0;
1101 struct ip_vs_scheduler *sched = NULL;
0d1e71b0 1102 struct ip_vs_pe *pe = NULL;
1da177e4 1103 struct ip_vs_service *svc = NULL;
a0840e2e 1104 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
1105
1106 /* increase the module use count */
1107 ip_vs_use_count_inc();
1108
1109 /* Lookup the scheduler by 'u->sched_name' */
1110 sched = ip_vs_scheduler_get(u->sched_name);
1111 if (sched == NULL) {
1e3e238e 1112 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4 1113 ret = -ENOENT;
6e08bfb8 1114 goto out_err;
1da177e4
LT
1115 }
1116
0d1e71b0 1117 if (u->pe_name && *u->pe_name) {
e9e5eee8 1118 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1119 if (pe == NULL) {
1120 pr_info("persistence engine module ip_vs_pe_%s "
1121 "not found\n", u->pe_name);
1122 ret = -ENOENT;
1123 goto out_err;
1124 }
1125 }
1126
f94fd041 1127#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1128 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1129 ret = -EINVAL;
1130 goto out_err;
f94fd041
JV
1131 }
1132#endif
1133
dee06e47 1134 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1135 if (svc == NULL) {
1e3e238e 1136 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1137 ret = -ENOMEM;
1138 goto out_err;
1139 }
b17fc996
HS
1140 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1141 if (!svc->stats.cpustats) {
1142 pr_err("%s() alloc_percpu failed\n", __func__);
1143 goto out_err;
1144 }
1da177e4
LT
1145
1146 /* I'm the first user of the service */
26c15cfd 1147 atomic_set(&svc->usecnt, 0);
1da177e4
LT
1148 atomic_set(&svc->refcnt, 0);
1149
c860c6b1 1150 svc->af = u->af;
1da177e4 1151 svc->protocol = u->protocol;
c860c6b1 1152 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1153 svc->port = u->port;
1154 svc->fwmark = u->fwmark;
1155 svc->flags = u->flags;
1156 svc->timeout = u->timeout * HZ;
1157 svc->netmask = u->netmask;
fc723250 1158 svc->net = net;
1da177e4
LT
1159
1160 INIT_LIST_HEAD(&svc->destinations);
1161 rwlock_init(&svc->sched_lock);
1162 spin_lock_init(&svc->stats.lock);
1163
1164 /* Bind the scheduler */
1165 ret = ip_vs_bind_scheduler(svc, sched);
1166 if (ret)
1167 goto out_err;
1168 sched = NULL;
1169
0d1e71b0
SH
1170 /* Bind the ct retriever */
1171 ip_vs_bind_pe(svc, pe);
1172 pe = NULL;
1173
1da177e4
LT
1174 /* Update the virtual service counters */
1175 if (svc->port == FTPPORT)
1176 atomic_inc(&ip_vs_ftpsvc_counter);
1177 else if (svc->port == 0)
1178 atomic_inc(&ip_vs_nullsvc_counter);
1179
29c2026f 1180 ip_vs_new_estimator(net, &svc->stats);
f94fd041
JV
1181
1182 /* Count only IPv4 services for old get/setsockopt interface */
1183 if (svc->af == AF_INET)
a0840e2e 1184 ipvs->num_services++;
1da177e4
LT
1185
1186 /* Hash the service into the service table */
1187 write_lock_bh(&__ip_vs_svc_lock);
1188 ip_vs_svc_hash(svc);
1189 write_unlock_bh(&__ip_vs_svc_lock);
1190
1191 *svc_p = svc;
1192 return 0;
1193
b17fc996 1194
6e08bfb8 1195 out_err:
1da177e4 1196 if (svc != NULL) {
2fabf35b 1197 ip_vs_unbind_scheduler(svc);
1da177e4
LT
1198 if (svc->inc) {
1199 local_bh_disable();
1200 ip_vs_app_inc_put(svc->inc);
1201 local_bh_enable();
1202 }
b17fc996
HS
1203 if (svc->stats.cpustats)
1204 free_percpu(svc->stats.cpustats);
1da177e4
LT
1205 kfree(svc);
1206 }
1207 ip_vs_scheduler_put(sched);
0d1e71b0 1208 ip_vs_pe_put(pe);
1da177e4 1209
1da177e4
LT
1210 /* decrease the module use count */
1211 ip_vs_use_count_dec();
1212
1213 return ret;
1214}
1215
1216
1217/*
1218 * Edit a service and bind it with a new scheduler
1219 */
1220static int
c860c6b1 1221ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1222{
1223 struct ip_vs_scheduler *sched, *old_sched;
0d1e71b0 1224 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1da177e4
LT
1225 int ret = 0;
1226
1227 /*
1228 * Lookup the scheduler, by 'u->sched_name'
1229 */
1230 sched = ip_vs_scheduler_get(u->sched_name);
1231 if (sched == NULL) {
1e3e238e 1232 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1233 return -ENOENT;
1234 }
1235 old_sched = sched;
1236
0d1e71b0 1237 if (u->pe_name && *u->pe_name) {
e9e5eee8 1238 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1239 if (pe == NULL) {
1240 pr_info("persistence engine module ip_vs_pe_%s "
1241 "not found\n", u->pe_name);
1242 ret = -ENOENT;
1243 goto out;
1244 }
1245 old_pe = pe;
1246 }
1247
f94fd041 1248#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1249 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1250 ret = -EINVAL;
1251 goto out;
f94fd041
JV
1252 }
1253#endif
1254
1da177e4
LT
1255 write_lock_bh(&__ip_vs_svc_lock);
1256
1257 /*
1258 * Wait until all other svc users go away.
1259 */
26c15cfd 1260 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1261
1262 /*
1263 * Set the flags and timeout value
1264 */
1265 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1266 svc->timeout = u->timeout * HZ;
1267 svc->netmask = u->netmask;
1268
1269 old_sched = svc->scheduler;
1270 if (sched != old_sched) {
1271 /*
1272 * Unbind the old scheduler
1273 */
1274 if ((ret = ip_vs_unbind_scheduler(svc))) {
1275 old_sched = sched;
9e691ed6 1276 goto out_unlock;
1da177e4
LT
1277 }
1278
1279 /*
1280 * Bind the new scheduler
1281 */
1282 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1283 /*
1284 * If ip_vs_bind_scheduler fails, restore the old
1285 * scheduler.
1286 * The main reason of failure is out of memory.
1287 *
1288 * The question is if the old scheduler can be
1289 * restored all the time. TODO: if it cannot be
1290 * restored some time, we must delete the service,
1291 * otherwise the system may crash.
1292 */
1293 ip_vs_bind_scheduler(svc, old_sched);
1294 old_sched = sched;
9e691ed6 1295 goto out_unlock;
1da177e4
LT
1296 }
1297 }
1298
0d1e71b0
SH
1299 old_pe = svc->pe;
1300 if (pe != old_pe) {
1301 ip_vs_unbind_pe(svc);
1302 ip_vs_bind_pe(svc, pe);
1303 }
1304
9e691ed6 1305 out_unlock:
1da177e4 1306 write_unlock_bh(&__ip_vs_svc_lock);
9e691ed6 1307 out:
6e08bfb8 1308 ip_vs_scheduler_put(old_sched);
0d1e71b0 1309 ip_vs_pe_put(old_pe);
1da177e4
LT
1310 return ret;
1311}
1312
1313
1314/*
1315 * Delete a service from the service list
1316 * - The service must be unlinked, unlocked and not referenced!
1317 * - We are called under _bh lock
1318 */
1319static void __ip_vs_del_service(struct ip_vs_service *svc)
1320{
1321 struct ip_vs_dest *dest, *nxt;
1322 struct ip_vs_scheduler *old_sched;
0d1e71b0 1323 struct ip_vs_pe *old_pe;
a0840e2e 1324 struct netns_ipvs *ipvs = net_ipvs(svc->net);
0d1e71b0
SH
1325
1326 pr_info("%s: enter\n", __func__);
1da177e4 1327
f94fd041
JV
1328 /* Count only IPv4 services for old get/setsockopt interface */
1329 if (svc->af == AF_INET)
a0840e2e 1330 ipvs->num_services--;
f94fd041 1331
29c2026f 1332 ip_vs_kill_estimator(svc->net, &svc->stats);
1da177e4
LT
1333
1334 /* Unbind scheduler */
1335 old_sched = svc->scheduler;
1336 ip_vs_unbind_scheduler(svc);
6e08bfb8 1337 ip_vs_scheduler_put(old_sched);
1da177e4 1338
0d1e71b0
SH
1339 /* Unbind persistence engine */
1340 old_pe = svc->pe;
1341 ip_vs_unbind_pe(svc);
1342 ip_vs_pe_put(old_pe);
1343
1da177e4
LT
1344 /* Unbind app inc */
1345 if (svc->inc) {
1346 ip_vs_app_inc_put(svc->inc);
1347 svc->inc = NULL;
1348 }
1349
1350 /*
1351 * Unlink the whole destination list
1352 */
1353 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1354 __ip_vs_unlink_dest(svc, dest, 0);
29c2026f 1355 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1356 }
1357
1358 /*
1359 * Update the virtual service counters
1360 */
1361 if (svc->port == FTPPORT)
1362 atomic_dec(&ip_vs_ftpsvc_counter);
1363 else if (svc->port == 0)
1364 atomic_dec(&ip_vs_nullsvc_counter);
1365
1366 /*
1367 * Free the service if nobody refers to it
1368 */
26c15cfd
JA
1369 if (atomic_read(&svc->refcnt) == 0) {
1370 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1371 svc->fwmark,
1372 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1373 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 1374 free_percpu(svc->stats.cpustats);
1da177e4 1375 kfree(svc);
26c15cfd 1376 }
1da177e4
LT
1377
1378 /* decrease the module use count */
1379 ip_vs_use_count_dec();
1380}
1381
1382/*
26c15cfd 1383 * Unlink a service from list and try to delete it if its refcnt reached 0
1da177e4 1384 */
26c15cfd 1385static void ip_vs_unlink_service(struct ip_vs_service *svc)
1da177e4 1386{
1da177e4
LT
1387 /*
1388 * Unhash it from the service table
1389 */
1390 write_lock_bh(&__ip_vs_svc_lock);
1391
1392 ip_vs_svc_unhash(svc);
1393
1394 /*
1395 * Wait until all the svc users go away.
1396 */
26c15cfd 1397 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1398
1399 __ip_vs_del_service(svc);
1400
1401 write_unlock_bh(&__ip_vs_svc_lock);
26c15cfd
JA
1402}
1403
1404/*
1405 * Delete a service from the service list
1406 */
1407static int ip_vs_del_service(struct ip_vs_service *svc)
1408{
1409 if (svc == NULL)
1410 return -EEXIST;
1411 ip_vs_unlink_service(svc);
1da177e4
LT
1412
1413 return 0;
1414}
1415
1416
1417/*
1418 * Flush all the virtual services
1419 */
fc723250 1420static int ip_vs_flush(struct net *net)
1da177e4
LT
1421{
1422 int idx;
1423 struct ip_vs_service *svc, *nxt;
1424
1425 /*
fc723250 1426 * Flush the service table hashed by <netns,protocol,addr,port>
1da177e4
LT
1427 */
1428 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
fc723250
HS
1429 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1430 s_list) {
1431 if (net_eq(svc->net, net))
1432 ip_vs_unlink_service(svc);
1da177e4
LT
1433 }
1434 }
1435
1436 /*
1437 * Flush the service table hashed by fwmark
1438 */
1439 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1440 list_for_each_entry_safe(svc, nxt,
1441 &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1442 if (net_eq(svc->net, net))
1443 ip_vs_unlink_service(svc);
1da177e4
LT
1444 }
1445 }
1446
1447 return 0;
1448}
1449
1450
1451/*
1452 * Zero counters in a service or all services
1453 */
1454static int ip_vs_zero_service(struct ip_vs_service *svc)
1455{
1456 struct ip_vs_dest *dest;
1457
1458 write_lock_bh(&__ip_vs_svc_lock);
1459 list_for_each_entry(dest, &svc->destinations, n_list) {
1460 ip_vs_zero_stats(&dest->stats);
1461 }
1462 ip_vs_zero_stats(&svc->stats);
1463 write_unlock_bh(&__ip_vs_svc_lock);
1464 return 0;
1465}
1466
fc723250 1467static int ip_vs_zero_all(struct net *net)
1da177e4
LT
1468{
1469 int idx;
1470 struct ip_vs_service *svc;
1471
1472 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1473 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250
HS
1474 if (net_eq(svc->net, net))
1475 ip_vs_zero_service(svc);
1da177e4
LT
1476 }
1477 }
1478
1479 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1480 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1481 if (net_eq(svc->net, net))
1482 ip_vs_zero_service(svc);
1da177e4
LT
1483 }
1484 }
1485
b17fc996 1486 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
1da177e4
LT
1487 return 0;
1488}
1489
1490
1491static int
8d65af78 1492proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1493 void __user *buffer, size_t *lenp, loff_t *ppos)
1494{
9330419d 1495 struct net *net = current->nsproxy->net_ns;
1da177e4
LT
1496 int *valp = table->data;
1497 int val = *valp;
1498 int rc;
1499
8d65af78 1500 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1501 if (write && (*valp != val)) {
1502 if ((*valp < 0) || (*valp > 3)) {
1503 /* Restore the correct value */
1504 *valp = val;
1505 } else {
9330419d 1506 update_defense_level(net_ipvs(net));
1da177e4
LT
1507 }
1508 }
1509 return rc;
1510}
1511
1512
1513static int
8d65af78 1514proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1515 void __user *buffer, size_t *lenp, loff_t *ppos)
1516{
1517 int *valp = table->data;
1518 int val[2];
1519 int rc;
1520
1521 /* backup the value first */
1522 memcpy(val, valp, sizeof(val));
1523
8d65af78 1524 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1525 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1526 /* Restore the correct value */
1527 memcpy(valp, val, sizeof(val));
1528 }
1529 return rc;
1530}
1531
b880c1f0
HS
1532static int
1533proc_do_sync_mode(ctl_table *table, int write,
1534 void __user *buffer, size_t *lenp, loff_t *ppos)
1535{
1536 int *valp = table->data;
1537 int val = *valp;
1538 int rc;
1539
1540 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1541 if (write && (*valp != val)) {
1542 if ((*valp < 0) || (*valp > 1)) {
1543 /* Restore the correct value */
1544 *valp = val;
1545 } else {
f131315f
HS
1546 struct net *net = current->nsproxy->net_ns;
1547 ip_vs_sync_switch_mode(net, val);
b880c1f0
HS
1548 }
1549 }
1550 return rc;
1551}
1da177e4
LT
1552
1553/*
1554 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
a0840e2e
HS
1555 * Do not change order or insert new entries without
1556 * align with netns init in __ip_vs_control_init()
1da177e4
LT
1557 */
1558
1559static struct ctl_table vs_vars[] = {
1560 {
1da177e4 1561 .procname = "amemthresh",
1da177e4
LT
1562 .maxlen = sizeof(int),
1563 .mode = 0644,
6d9f239a 1564 .proc_handler = proc_dointvec,
1da177e4 1565 },
1da177e4 1566 {
1da177e4 1567 .procname = "am_droprate",
1da177e4
LT
1568 .maxlen = sizeof(int),
1569 .mode = 0644,
6d9f239a 1570 .proc_handler = proc_dointvec,
1da177e4
LT
1571 },
1572 {
1da177e4 1573 .procname = "drop_entry",
1da177e4
LT
1574 .maxlen = sizeof(int),
1575 .mode = 0644,
6d9f239a 1576 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1577 },
1578 {
1da177e4 1579 .procname = "drop_packet",
1da177e4
LT
1580 .maxlen = sizeof(int),
1581 .mode = 0644,
6d9f239a 1582 .proc_handler = proc_do_defense_mode,
1da177e4 1583 },
f4bc17cd
JA
1584#ifdef CONFIG_IP_VS_NFCT
1585 {
1586 .procname = "conntrack",
f4bc17cd
JA
1587 .maxlen = sizeof(int),
1588 .mode = 0644,
1589 .proc_handler = &proc_dointvec,
1590 },
1591#endif
1da177e4 1592 {
1da177e4 1593 .procname = "secure_tcp",
1da177e4
LT
1594 .maxlen = sizeof(int),
1595 .mode = 0644,
6d9f239a 1596 .proc_handler = proc_do_defense_mode,
1da177e4 1597 },
8a803040
JA
1598 {
1599 .procname = "snat_reroute",
8a803040
JA
1600 .maxlen = sizeof(int),
1601 .mode = 0644,
1602 .proc_handler = &proc_dointvec,
1603 },
b880c1f0
HS
1604 {
1605 .procname = "sync_version",
b880c1f0
HS
1606 .maxlen = sizeof(int),
1607 .mode = 0644,
1608 .proc_handler = &proc_do_sync_mode,
1609 },
a0840e2e
HS
1610 {
1611 .procname = "cache_bypass",
1612 .maxlen = sizeof(int),
1613 .mode = 0644,
1614 .proc_handler = proc_dointvec,
1615 },
1616 {
1617 .procname = "expire_nodest_conn",
1618 .maxlen = sizeof(int),
1619 .mode = 0644,
1620 .proc_handler = proc_dointvec,
1621 },
1622 {
1623 .procname = "expire_quiescent_template",
1624 .maxlen = sizeof(int),
1625 .mode = 0644,
1626 .proc_handler = proc_dointvec,
1627 },
1628 {
1629 .procname = "sync_threshold",
1630 .maxlen =
1631 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1632 .mode = 0644,
1633 .proc_handler = proc_do_sync_threshold,
1634 },
1635 {
1636 .procname = "nat_icmp_send",
1637 .maxlen = sizeof(int),
1638 .mode = 0644,
1639 .proc_handler = proc_dointvec,
1640 },
1641#ifdef CONFIG_IP_VS_DEBUG
1642 {
1643 .procname = "debug_level",
1644 .data = &sysctl_ip_vs_debug_level,
1645 .maxlen = sizeof(int),
1646 .mode = 0644,
1647 .proc_handler = proc_dointvec,
1648 },
1649#endif
1da177e4
LT
1650#if 0
1651 {
1da177e4
LT
1652 .procname = "timeout_established",
1653 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1654 .maxlen = sizeof(int),
1655 .mode = 0644,
6d9f239a 1656 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1657 },
1658 {
1da177e4
LT
1659 .procname = "timeout_synsent",
1660 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1661 .maxlen = sizeof(int),
1662 .mode = 0644,
6d9f239a 1663 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1664 },
1665 {
1da177e4
LT
1666 .procname = "timeout_synrecv",
1667 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1668 .maxlen = sizeof(int),
1669 .mode = 0644,
6d9f239a 1670 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1671 },
1672 {
1da177e4
LT
1673 .procname = "timeout_finwait",
1674 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1675 .maxlen = sizeof(int),
1676 .mode = 0644,
6d9f239a 1677 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1678 },
1679 {
1da177e4
LT
1680 .procname = "timeout_timewait",
1681 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1682 .maxlen = sizeof(int),
1683 .mode = 0644,
6d9f239a 1684 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1685 },
1686 {
1da177e4
LT
1687 .procname = "timeout_close",
1688 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1689 .maxlen = sizeof(int),
1690 .mode = 0644,
6d9f239a 1691 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1692 },
1693 {
1da177e4
LT
1694 .procname = "timeout_closewait",
1695 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1696 .maxlen = sizeof(int),
1697 .mode = 0644,
6d9f239a 1698 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1699 },
1700 {
1da177e4
LT
1701 .procname = "timeout_lastack",
1702 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1703 .maxlen = sizeof(int),
1704 .mode = 0644,
6d9f239a 1705 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1706 },
1707 {
1da177e4
LT
1708 .procname = "timeout_listen",
1709 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1710 .maxlen = sizeof(int),
1711 .mode = 0644,
6d9f239a 1712 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1713 },
1714 {
1da177e4
LT
1715 .procname = "timeout_synack",
1716 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1717 .maxlen = sizeof(int),
1718 .mode = 0644,
6d9f239a 1719 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1720 },
1721 {
1da177e4
LT
1722 .procname = "timeout_udp",
1723 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1724 .maxlen = sizeof(int),
1725 .mode = 0644,
6d9f239a 1726 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1727 },
1728 {
1da177e4
LT
1729 .procname = "timeout_icmp",
1730 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1731 .maxlen = sizeof(int),
1732 .mode = 0644,
6d9f239a 1733 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1734 },
1735#endif
f8572d8f 1736 { }
1da177e4
LT
1737};
1738
5587da55 1739const struct ctl_path net_vs_ctl_path[] = {
f8572d8f
EB
1740 { .procname = "net", },
1741 { .procname = "ipv4", },
90754f8e
PE
1742 { .procname = "vs", },
1743 { }
1da177e4 1744};
90754f8e 1745EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4 1746
1da177e4
LT
1747#ifdef CONFIG_PROC_FS
1748
1749struct ip_vs_iter {
fc723250 1750 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1da177e4
LT
1751 struct list_head *table;
1752 int bucket;
1753};
1754
1755/*
1756 * Write the contents of the VS rule table to a PROCfs file.
1757 * (It is kept just for backward compatibility)
1758 */
1759static inline const char *ip_vs_fwd_name(unsigned flags)
1760{
1761 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1762 case IP_VS_CONN_F_LOCALNODE:
1763 return "Local";
1764 case IP_VS_CONN_F_TUNNEL:
1765 return "Tunnel";
1766 case IP_VS_CONN_F_DROUTE:
1767 return "Route";
1768 default:
1769 return "Masq";
1770 }
1771}
1772
1773
1774/* Get the Nth entry in the two lists */
1775static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1776{
fc723250 1777 struct net *net = seq_file_net(seq);
1da177e4
LT
1778 struct ip_vs_iter *iter = seq->private;
1779 int idx;
1780 struct ip_vs_service *svc;
1781
1782 /* look in hash by protocol */
1783 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1784 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250 1785 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1786 iter->table = ip_vs_svc_table;
1787 iter->bucket = idx;
1788 return svc;
1789 }
1790 }
1791 }
1792
1793 /* keep looking in fwmark */
1794 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1795 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250 1796 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1797 iter->table = ip_vs_svc_fwm_table;
1798 iter->bucket = idx;
1799 return svc;
1800 }
1801 }
1802 }
1803
1804 return NULL;
1805}
1806
1807static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1808__acquires(__ip_vs_svc_lock)
1da177e4
LT
1809{
1810
1811 read_lock_bh(&__ip_vs_svc_lock);
1812 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1813}
1814
1815
1816static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1817{
1818 struct list_head *e;
1819 struct ip_vs_iter *iter;
1820 struct ip_vs_service *svc;
1821
1822 ++*pos;
1823 if (v == SEQ_START_TOKEN)
1824 return ip_vs_info_array(seq,0);
1825
1826 svc = v;
1827 iter = seq->private;
1828
1829 if (iter->table == ip_vs_svc_table) {
1830 /* next service in table hashed by protocol */
1831 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1832 return list_entry(e, struct ip_vs_service, s_list);
1833
1834
1835 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1836 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1837 s_list) {
1838 return svc;
1839 }
1840 }
1841
1842 iter->table = ip_vs_svc_fwm_table;
1843 iter->bucket = -1;
1844 goto scan_fwmark;
1845 }
1846
1847 /* next service in hashed by fwmark */
1848 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1849 return list_entry(e, struct ip_vs_service, f_list);
1850
1851 scan_fwmark:
1852 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1853 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1854 f_list)
1855 return svc;
1856 }
1857
1858 return NULL;
1859}
1860
1861static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 1862__releases(__ip_vs_svc_lock)
1da177e4
LT
1863{
1864 read_unlock_bh(&__ip_vs_svc_lock);
1865}
1866
1867
1868static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1869{
1870 if (v == SEQ_START_TOKEN) {
1871 seq_printf(seq,
1872 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 1873 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
1874 seq_puts(seq,
1875 "Prot LocalAddress:Port Scheduler Flags\n");
1876 seq_puts(seq,
1877 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1878 } else {
1879 const struct ip_vs_service *svc = v;
1880 const struct ip_vs_iter *iter = seq->private;
1881 const struct ip_vs_dest *dest;
1882
667a5f18
VB
1883 if (iter->table == ip_vs_svc_table) {
1884#ifdef CONFIG_IP_VS_IPV6
1885 if (svc->af == AF_INET6)
5b095d98 1886 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 1887 ip_vs_proto_name(svc->protocol),
38ff4fa4 1888 &svc->addr.in6,
667a5f18
VB
1889 ntohs(svc->port),
1890 svc->scheduler->name);
1891 else
1892#endif
26ec037f 1893 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
1894 ip_vs_proto_name(svc->protocol),
1895 ntohl(svc->addr.ip),
1896 ntohs(svc->port),
26ec037f
NC
1897 svc->scheduler->name,
1898 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1899 } else {
26ec037f
NC
1900 seq_printf(seq, "FWM %08X %s %s",
1901 svc->fwmark, svc->scheduler->name,
1902 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1903 }
1da177e4
LT
1904
1905 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1906 seq_printf(seq, "persistent %d %08X\n",
1907 svc->timeout,
1908 ntohl(svc->netmask));
1909 else
1910 seq_putc(seq, '\n');
1911
1912 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1913#ifdef CONFIG_IP_VS_IPV6
1914 if (dest->af == AF_INET6)
1915 seq_printf(seq,
5b095d98 1916 " -> [%pI6]:%04X"
667a5f18 1917 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 1918 &dest->addr.in6,
667a5f18
VB
1919 ntohs(dest->port),
1920 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1921 atomic_read(&dest->weight),
1922 atomic_read(&dest->activeconns),
1923 atomic_read(&dest->inactconns));
1924 else
1925#endif
1926 seq_printf(seq,
1927 " -> %08X:%04X "
1928 "%-7s %-6d %-10d %-10d\n",
1929 ntohl(dest->addr.ip),
1930 ntohs(dest->port),
1931 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1932 atomic_read(&dest->weight),
1933 atomic_read(&dest->activeconns),
1934 atomic_read(&dest->inactconns));
1935
1da177e4
LT
1936 }
1937 }
1938 return 0;
1939}
1940
56b3d975 1941static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1942 .start = ip_vs_info_seq_start,
1943 .next = ip_vs_info_seq_next,
1944 .stop = ip_vs_info_seq_stop,
1945 .show = ip_vs_info_seq_show,
1946};
1947
1948static int ip_vs_info_open(struct inode *inode, struct file *file)
1949{
fc723250 1950 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
cf7732e4 1951 sizeof(struct ip_vs_iter));
1da177e4
LT
1952}
1953
9a32144e 1954static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1955 .owner = THIS_MODULE,
1956 .open = ip_vs_info_open,
1957 .read = seq_read,
1958 .llseek = seq_lseek,
1959 .release = seq_release_private,
1960};
1961
1962#endif
1963
1da177e4
LT
1964#ifdef CONFIG_PROC_FS
1965static int ip_vs_stats_show(struct seq_file *seq, void *v)
1966{
b17fc996
HS
1967 struct net *net = seq_file_single_net(seq);
1968 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
1da177e4
LT
1969
1970/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1971 seq_puts(seq,
1972 " Total Incoming Outgoing Incoming Outgoing\n");
1973 seq_printf(seq,
1974 " Conns Packets Packets Bytes Bytes\n");
1975
b17fc996
HS
1976 spin_lock_bh(&tot_stats->lock);
1977 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1978 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1979 (unsigned long long) tot_stats->ustats.inbytes,
1980 (unsigned long long) tot_stats->ustats.outbytes);
1da177e4
LT
1981
1982/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1983 seq_puts(seq,
1984 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1985 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
b17fc996
HS
1986 tot_stats->ustats.cps,
1987 tot_stats->ustats.inpps,
1988 tot_stats->ustats.outpps,
1989 tot_stats->ustats.inbps,
1990 tot_stats->ustats.outbps);
1991 spin_unlock_bh(&tot_stats->lock);
1da177e4
LT
1992
1993 return 0;
1994}
1995
1996static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1997{
fc723250 1998 return single_open_net(inode, file, ip_vs_stats_show);
1da177e4
LT
1999}
2000
9a32144e 2001static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
2002 .owner = THIS_MODULE,
2003 .open = ip_vs_stats_seq_open,
2004 .read = seq_read,
2005 .llseek = seq_lseek,
2006 .release = single_release,
2007};
2008
b17fc996
HS
2009static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2010{
2011 struct net *net = seq_file_single_net(seq);
2012 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2013 int i;
2014
2015/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2016 seq_puts(seq,
2017 " Total Incoming Outgoing Incoming Outgoing\n");
2018 seq_printf(seq,
2019 "CPU Conns Packets Packets Bytes Bytes\n");
2020
2021 for_each_possible_cpu(i) {
2022 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2023 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2024 i, u->ustats.conns, u->ustats.inpkts,
2025 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2026 (__u64)u->ustats.outbytes);
2027 }
2028
2029 spin_lock_bh(&tot_stats->lock);
2030 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2031 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2032 tot_stats->ustats.outpkts,
2033 (unsigned long long) tot_stats->ustats.inbytes,
2034 (unsigned long long) tot_stats->ustats.outbytes);
2035
2036/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2037 seq_puts(seq,
2038 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2039 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2040 tot_stats->ustats.cps,
2041 tot_stats->ustats.inpps,
2042 tot_stats->ustats.outpps,
2043 tot_stats->ustats.inbps,
2044 tot_stats->ustats.outbps);
2045 spin_unlock_bh(&tot_stats->lock);
2046
2047 return 0;
2048}
2049
2050static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2051{
2052 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2053}
2054
2055static const struct file_operations ip_vs_stats_percpu_fops = {
2056 .owner = THIS_MODULE,
2057 .open = ip_vs_stats_percpu_seq_open,
2058 .read = seq_read,
2059 .llseek = seq_lseek,
2060 .release = single_release,
2061};
1da177e4
LT
2062#endif
2063
2064/*
2065 * Set timeout values for tcp tcpfin udp in the timeout_table.
2066 */
9330419d 2067static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2068{
9330419d
HS
2069 struct ip_vs_proto_data *pd;
2070
1da177e4
LT
2071 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2072 u->tcp_timeout,
2073 u->tcp_fin_timeout,
2074 u->udp_timeout);
2075
2076#ifdef CONFIG_IP_VS_PROTO_TCP
2077 if (u->tcp_timeout) {
9330419d
HS
2078 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2079 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
1da177e4
LT
2080 = u->tcp_timeout * HZ;
2081 }
2082
2083 if (u->tcp_fin_timeout) {
9330419d
HS
2084 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2085 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
1da177e4
LT
2086 = u->tcp_fin_timeout * HZ;
2087 }
2088#endif
2089
2090#ifdef CONFIG_IP_VS_PROTO_UDP
2091 if (u->udp_timeout) {
9330419d
HS
2092 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2093 pd->timeout_table[IP_VS_UDP_S_NORMAL]
1da177e4
LT
2094 = u->udp_timeout * HZ;
2095 }
2096#endif
2097 return 0;
2098}
2099
2100
2101#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2102#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2103#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2104 sizeof(struct ip_vs_dest_user))
2105#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2106#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2107#define MAX_ARG_LEN SVCDEST_ARG_LEN
2108
9b5b5cff 2109static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2110 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2111 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2112 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2113 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2114 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2115 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2116 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2117 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2118 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2119 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2120 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2121};
2122
c860c6b1
JV
2123static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2124 struct ip_vs_service_user *usvc_compat)
2125{
0d1e71b0
SH
2126 memset(usvc, 0, sizeof(*usvc));
2127
c860c6b1
JV
2128 usvc->af = AF_INET;
2129 usvc->protocol = usvc_compat->protocol;
2130 usvc->addr.ip = usvc_compat->addr;
2131 usvc->port = usvc_compat->port;
2132 usvc->fwmark = usvc_compat->fwmark;
2133
2134 /* Deep copy of sched_name is not needed here */
2135 usvc->sched_name = usvc_compat->sched_name;
2136
2137 usvc->flags = usvc_compat->flags;
2138 usvc->timeout = usvc_compat->timeout;
2139 usvc->netmask = usvc_compat->netmask;
2140}
2141
2142static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2143 struct ip_vs_dest_user *udest_compat)
2144{
0d1e71b0
SH
2145 memset(udest, 0, sizeof(*udest));
2146
c860c6b1
JV
2147 udest->addr.ip = udest_compat->addr;
2148 udest->port = udest_compat->port;
2149 udest->conn_flags = udest_compat->conn_flags;
2150 udest->weight = udest_compat->weight;
2151 udest->u_threshold = udest_compat->u_threshold;
2152 udest->l_threshold = udest_compat->l_threshold;
2153}
2154
1da177e4
LT
2155static int
2156do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2157{
fc723250 2158 struct net *net = sock_net(sk);
1da177e4
LT
2159 int ret;
2160 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2161 struct ip_vs_service_user *usvc_compat;
2162 struct ip_vs_service_user_kern usvc;
1da177e4 2163 struct ip_vs_service *svc;
c860c6b1
JV
2164 struct ip_vs_dest_user *udest_compat;
2165 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2166
2167 if (!capable(CAP_NET_ADMIN))
2168 return -EPERM;
2169
04bcef2a
AV
2170 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2171 return -EINVAL;
2172 if (len < 0 || len > MAX_ARG_LEN)
2173 return -EINVAL;
1da177e4 2174 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2175 pr_err("set_ctl: len %u != %u\n",
2176 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2177 return -EINVAL;
2178 }
2179
2180 if (copy_from_user(arg, user, len) != 0)
2181 return -EFAULT;
2182
2183 /* increase the module use count */
2184 ip_vs_use_count_inc();
2185
14cc3e2b 2186 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2187 ret = -ERESTARTSYS;
2188 goto out_dec;
2189 }
2190
2191 if (cmd == IP_VS_SO_SET_FLUSH) {
2192 /* Flush the virtual service */
fc723250 2193 ret = ip_vs_flush(net);
1da177e4
LT
2194 goto out_unlock;
2195 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2196 /* Set timeout values for (tcp tcpfin udp) */
9330419d 2197 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
1da177e4
LT
2198 goto out_unlock;
2199 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2200 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f
HS
2201 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2202 dm->syncid);
1da177e4
LT
2203 goto out_unlock;
2204 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2205 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f 2206 ret = stop_sync_thread(net, dm->state);
1da177e4
LT
2207 goto out_unlock;
2208 }
2209
c860c6b1
JV
2210 usvc_compat = (struct ip_vs_service_user *)arg;
2211 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2212
2213 /* We only use the new structs internally, so copy userspace compat
2214 * structs to extended internal versions */
2215 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2216 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2217
2218 if (cmd == IP_VS_SO_SET_ZERO) {
2219 /* if no service address is set, zero counters in all */
c860c6b1 2220 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
fc723250 2221 ret = ip_vs_zero_all(net);
1da177e4
LT
2222 goto out_unlock;
2223 }
2224 }
2225
2906f66a
VMR
2226 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2227 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2228 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2229 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2230 usvc.protocol, &usvc.addr.ip,
2231 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2232 ret = -EFAULT;
2233 goto out_unlock;
2234 }
2235
2236 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2237 if (usvc.fwmark == 0)
fc723250 2238 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
26c15cfd 2239 &usvc.addr, usvc.port);
1da177e4 2240 else
fc723250 2241 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
1da177e4
LT
2242
2243 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2244 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2245 ret = -ESRCH;
26c15cfd 2246 goto out_unlock;
1da177e4
LT
2247 }
2248
2249 switch (cmd) {
2250 case IP_VS_SO_SET_ADD:
2251 if (svc != NULL)
2252 ret = -EEXIST;
2253 else
fc723250 2254 ret = ip_vs_add_service(net, &usvc, &svc);
1da177e4
LT
2255 break;
2256 case IP_VS_SO_SET_EDIT:
c860c6b1 2257 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2258 break;
2259 case IP_VS_SO_SET_DEL:
2260 ret = ip_vs_del_service(svc);
2261 if (!ret)
2262 goto out_unlock;
2263 break;
2264 case IP_VS_SO_SET_ZERO:
2265 ret = ip_vs_zero_service(svc);
2266 break;
2267 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2268 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2269 break;
2270 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2271 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2272 break;
2273 case IP_VS_SO_SET_DELDEST:
c860c6b1 2274 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2275 break;
2276 default:
2277 ret = -EINVAL;
2278 }
2279
1da177e4 2280 out_unlock:
14cc3e2b 2281 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2282 out_dec:
2283 /* decrease the module use count */
2284 ip_vs_use_count_dec();
2285
2286 return ret;
2287}
2288
2289
2290static void
2291ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2292{
2293 spin_lock_bh(&src->lock);
e9c0ce23 2294 memcpy(dst, &src->ustats, sizeof(*dst));
1da177e4
LT
2295 spin_unlock_bh(&src->lock);
2296}
2297
2298static void
2299ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2300{
2301 dst->protocol = src->protocol;
e7ade46a 2302 dst->addr = src->addr.ip;
1da177e4
LT
2303 dst->port = src->port;
2304 dst->fwmark = src->fwmark;
4da62fc7 2305 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2306 dst->flags = src->flags;
2307 dst->timeout = src->timeout / HZ;
2308 dst->netmask = src->netmask;
2309 dst->num_dests = src->num_dests;
2310 ip_vs_copy_stats(&dst->stats, &src->stats);
2311}
2312
2313static inline int
fc723250
HS
2314__ip_vs_get_service_entries(struct net *net,
2315 const struct ip_vs_get_services *get,
1da177e4
LT
2316 struct ip_vs_get_services __user *uptr)
2317{
2318 int idx, count=0;
2319 struct ip_vs_service *svc;
2320 struct ip_vs_service_entry entry;
2321 int ret = 0;
2322
2323 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2324 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041 2325 /* Only expose IPv4 entries to old interface */
fc723250 2326 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2327 continue;
2328
1da177e4
LT
2329 if (count >= get->num_services)
2330 goto out;
4da62fc7 2331 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2332 ip_vs_copy_service(&entry, svc);
2333 if (copy_to_user(&uptr->entrytable[count],
2334 &entry, sizeof(entry))) {
2335 ret = -EFAULT;
2336 goto out;
2337 }
2338 count++;
2339 }
2340 }
2341
2342 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2343 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041 2344 /* Only expose IPv4 entries to old interface */
fc723250 2345 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2346 continue;
2347
1da177e4
LT
2348 if (count >= get->num_services)
2349 goto out;
4da62fc7 2350 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2351 ip_vs_copy_service(&entry, svc);
2352 if (copy_to_user(&uptr->entrytable[count],
2353 &entry, sizeof(entry))) {
2354 ret = -EFAULT;
2355 goto out;
2356 }
2357 count++;
2358 }
2359 }
2360 out:
2361 return ret;
2362}
2363
2364static inline int
fc723250 2365__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
1da177e4
LT
2366 struct ip_vs_get_dests __user *uptr)
2367{
2368 struct ip_vs_service *svc;
b18610de 2369 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2370 int ret = 0;
2371
2372 if (get->fwmark)
fc723250 2373 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
1da177e4 2374 else
fc723250 2375 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
26c15cfd 2376 get->port);
b18610de 2377
1da177e4
LT
2378 if (svc) {
2379 int count = 0;
2380 struct ip_vs_dest *dest;
2381 struct ip_vs_dest_entry entry;
2382
2383 list_for_each_entry(dest, &svc->destinations, n_list) {
2384 if (count >= get->num_dests)
2385 break;
2386
e7ade46a 2387 entry.addr = dest->addr.ip;
1da177e4
LT
2388 entry.port = dest->port;
2389 entry.conn_flags = atomic_read(&dest->conn_flags);
2390 entry.weight = atomic_read(&dest->weight);
2391 entry.u_threshold = dest->u_threshold;
2392 entry.l_threshold = dest->l_threshold;
2393 entry.activeconns = atomic_read(&dest->activeconns);
2394 entry.inactconns = atomic_read(&dest->inactconns);
2395 entry.persistconns = atomic_read(&dest->persistconns);
2396 ip_vs_copy_stats(&entry.stats, &dest->stats);
2397 if (copy_to_user(&uptr->entrytable[count],
2398 &entry, sizeof(entry))) {
2399 ret = -EFAULT;
2400 break;
2401 }
2402 count++;
2403 }
1da177e4
LT
2404 } else
2405 ret = -ESRCH;
2406 return ret;
2407}
2408
2409static inline void
9330419d 2410__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2411{
9330419d
HS
2412 struct ip_vs_proto_data *pd;
2413
1da177e4 2414#ifdef CONFIG_IP_VS_PROTO_TCP
9330419d
HS
2415 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2416 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2417 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
1da177e4
LT
2418#endif
2419#ifdef CONFIG_IP_VS_PROTO_UDP
9330419d 2420 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
1da177e4 2421 u->udp_timeout =
9330419d 2422 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
1da177e4
LT
2423#endif
2424}
2425
2426
2427#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2428#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2429#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2430#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2431#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2432#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2433#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2434
9b5b5cff 2435static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2436 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2437 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2438 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2439 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2440 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2441 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2442 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2443};
2444
2445static int
2446do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2447{
2448 unsigned char arg[128];
2449 int ret = 0;
04bcef2a 2450 unsigned int copylen;
fc723250 2451 struct net *net = sock_net(sk);
f131315f 2452 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2453
fc723250 2454 BUG_ON(!net);
1da177e4
LT
2455 if (!capable(CAP_NET_ADMIN))
2456 return -EPERM;
2457
04bcef2a
AV
2458 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2459 return -EINVAL;
2460
1da177e4 2461 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2462 pr_err("get_ctl: len %u < %u\n",
2463 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2464 return -EINVAL;
2465 }
2466
04bcef2a
AV
2467 copylen = get_arglen[GET_CMDID(cmd)];
2468 if (copylen > 128)
2469 return -EINVAL;
2470
2471 if (copy_from_user(arg, user, copylen) != 0)
1da177e4
LT
2472 return -EFAULT;
2473
14cc3e2b 2474 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2475 return -ERESTARTSYS;
2476
2477 switch (cmd) {
2478 case IP_VS_SO_GET_VERSION:
2479 {
2480 char buf[64];
2481
2482 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2483 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2484 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2485 ret = -EFAULT;
2486 goto out;
2487 }
2488 *len = strlen(buf)+1;
2489 }
2490 break;
2491
2492 case IP_VS_SO_GET_INFO:
2493 {
2494 struct ip_vs_getinfo info;
2495 info.version = IP_VS_VERSION_CODE;
6f7edb48 2496 info.size = ip_vs_conn_tab_size;
a0840e2e 2497 info.num_services = ipvs->num_services;
1da177e4
LT
2498 if (copy_to_user(user, &info, sizeof(info)) != 0)
2499 ret = -EFAULT;
2500 }
2501 break;
2502
2503 case IP_VS_SO_GET_SERVICES:
2504 {
2505 struct ip_vs_get_services *get;
2506 int size;
2507
2508 get = (struct ip_vs_get_services *)arg;
2509 size = sizeof(*get) +
2510 sizeof(struct ip_vs_service_entry) * get->num_services;
2511 if (*len != size) {
1e3e238e 2512 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2513 ret = -EINVAL;
2514 goto out;
2515 }
fc723250 2516 ret = __ip_vs_get_service_entries(net, get, user);
1da177e4
LT
2517 }
2518 break;
2519
2520 case IP_VS_SO_GET_SERVICE:
2521 {
2522 struct ip_vs_service_entry *entry;
2523 struct ip_vs_service *svc;
b18610de 2524 union nf_inet_addr addr;
1da177e4
LT
2525
2526 entry = (struct ip_vs_service_entry *)arg;
b18610de 2527 addr.ip = entry->addr;
1da177e4 2528 if (entry->fwmark)
fc723250 2529 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
1da177e4 2530 else
fc723250
HS
2531 svc = __ip_vs_service_find(net, AF_INET,
2532 entry->protocol, &addr,
2533 entry->port);
1da177e4
LT
2534 if (svc) {
2535 ip_vs_copy_service(entry, svc);
2536 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2537 ret = -EFAULT;
1da177e4
LT
2538 } else
2539 ret = -ESRCH;
2540 }
2541 break;
2542
2543 case IP_VS_SO_GET_DESTS:
2544 {
2545 struct ip_vs_get_dests *get;
2546 int size;
2547
2548 get = (struct ip_vs_get_dests *)arg;
2549 size = sizeof(*get) +
2550 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2551 if (*len != size) {
1e3e238e 2552 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2553 ret = -EINVAL;
2554 goto out;
2555 }
fc723250 2556 ret = __ip_vs_get_dest_entries(net, get, user);
1da177e4
LT
2557 }
2558 break;
2559
2560 case IP_VS_SO_GET_TIMEOUT:
2561 {
2562 struct ip_vs_timeout_user t;
2563
9330419d 2564 __ip_vs_get_timeouts(net, &t);
1da177e4
LT
2565 if (copy_to_user(user, &t, sizeof(t)) != 0)
2566 ret = -EFAULT;
2567 }
2568 break;
2569
2570 case IP_VS_SO_GET_DAEMON:
2571 {
2572 struct ip_vs_daemon_user d[2];
2573
2574 memset(&d, 0, sizeof(d));
f131315f 2575 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
1da177e4 2576 d[0].state = IP_VS_STATE_MASTER;
f131315f
HS
2577 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2578 sizeof(d[0].mcast_ifn));
2579 d[0].syncid = ipvs->master_syncid;
1da177e4 2580 }
f131315f 2581 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
1da177e4 2582 d[1].state = IP_VS_STATE_BACKUP;
f131315f
HS
2583 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2584 sizeof(d[1].mcast_ifn));
2585 d[1].syncid = ipvs->backup_syncid;
1da177e4
LT
2586 }
2587 if (copy_to_user(user, &d, sizeof(d)) != 0)
2588 ret = -EFAULT;
2589 }
2590 break;
2591
2592 default:
2593 ret = -EINVAL;
2594 }
2595
2596 out:
14cc3e2b 2597 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2598 return ret;
2599}
2600
2601
2602static struct nf_sockopt_ops ip_vs_sockopts = {
2603 .pf = PF_INET,
2604 .set_optmin = IP_VS_BASE_CTL,
2605 .set_optmax = IP_VS_SO_SET_MAX+1,
2606 .set = do_ip_vs_set_ctl,
2607 .get_optmin = IP_VS_BASE_CTL,
2608 .get_optmax = IP_VS_SO_GET_MAX+1,
2609 .get = do_ip_vs_get_ctl,
16fcec35 2610 .owner = THIS_MODULE,
1da177e4
LT
2611};
2612
9a812198
JV
2613/*
2614 * Generic Netlink interface
2615 */
2616
2617/* IPVS genetlink family */
2618static struct genl_family ip_vs_genl_family = {
2619 .id = GENL_ID_GENERATE,
2620 .hdrsize = 0,
2621 .name = IPVS_GENL_NAME,
2622 .version = IPVS_GENL_VERSION,
2623 .maxattr = IPVS_CMD_MAX,
2624};
2625
2626/* Policy used for first-level command attributes */
2627static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2628 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2629 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2630 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2631 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2632 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2633 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2634};
2635
2636/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2637static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2638 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2639 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2640 .len = IP_VS_IFNAME_MAXLEN },
2641 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2642};
2643
2644/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2645static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2646 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2647 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2648 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2649 .len = sizeof(union nf_inet_addr) },
2650 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2651 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2652 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2653 .len = IP_VS_SCHEDNAME_MAXLEN },
0d1e71b0
SH
2654 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2655 .len = IP_VS_PENAME_MAXLEN },
9a812198
JV
2656 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2657 .len = sizeof(struct ip_vs_flags) },
2658 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2659 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2660 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2661};
2662
2663/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2664static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2665 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2666 .len = sizeof(union nf_inet_addr) },
2667 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2668 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2669 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2670 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2671 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2672 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2673 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2674 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2675 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2676};
2677
2678static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2679 struct ip_vs_stats *stats)
2680{
2681 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2682 if (!nl_stats)
2683 return -EMSGSIZE;
2684
2685 spin_lock_bh(&stats->lock);
2686
e9c0ce23
SW
2687 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2688 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2689 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2690 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2691 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2692 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2693 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2694 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2695 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2696 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
9a812198
JV
2697
2698 spin_unlock_bh(&stats->lock);
2699
2700 nla_nest_end(skb, nl_stats);
2701
2702 return 0;
2703
2704nla_put_failure:
2705 spin_unlock_bh(&stats->lock);
2706 nla_nest_cancel(skb, nl_stats);
2707 return -EMSGSIZE;
2708}
2709
2710static int ip_vs_genl_fill_service(struct sk_buff *skb,
2711 struct ip_vs_service *svc)
2712{
2713 struct nlattr *nl_service;
2714 struct ip_vs_flags flags = { .flags = svc->flags,
2715 .mask = ~0 };
2716
2717 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2718 if (!nl_service)
2719 return -EMSGSIZE;
2720
f94fd041 2721 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2722
2723 if (svc->fwmark) {
2724 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2725 } else {
2726 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2727 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2728 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2729 }
2730
2731 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
0d1e71b0
SH
2732 if (svc->pe)
2733 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
9a812198
JV
2734 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2735 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2736 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2737
2738 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2739 goto nla_put_failure;
2740
2741 nla_nest_end(skb, nl_service);
2742
2743 return 0;
2744
2745nla_put_failure:
2746 nla_nest_cancel(skb, nl_service);
2747 return -EMSGSIZE;
2748}
2749
2750static int ip_vs_genl_dump_service(struct sk_buff *skb,
2751 struct ip_vs_service *svc,
2752 struct netlink_callback *cb)
2753{
2754 void *hdr;
2755
2756 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2757 &ip_vs_genl_family, NLM_F_MULTI,
2758 IPVS_CMD_NEW_SERVICE);
2759 if (!hdr)
2760 return -EMSGSIZE;
2761
2762 if (ip_vs_genl_fill_service(skb, svc) < 0)
2763 goto nla_put_failure;
2764
2765 return genlmsg_end(skb, hdr);
2766
2767nla_put_failure:
2768 genlmsg_cancel(skb, hdr);
2769 return -EMSGSIZE;
2770}
2771
2772static int ip_vs_genl_dump_services(struct sk_buff *skb,
2773 struct netlink_callback *cb)
2774{
2775 int idx = 0, i;
2776 int start = cb->args[0];
2777 struct ip_vs_service *svc;
fc723250 2778 struct net *net = skb_sknet(skb);
9a812198
JV
2779
2780 mutex_lock(&__ip_vs_mutex);
2781 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2782 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
fc723250 2783 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2784 continue;
2785 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2786 idx--;
2787 goto nla_put_failure;
2788 }
2789 }
2790 }
2791
2792 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2793 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
fc723250 2794 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2795 continue;
2796 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2797 idx--;
2798 goto nla_put_failure;
2799 }
2800 }
2801 }
2802
2803nla_put_failure:
2804 mutex_unlock(&__ip_vs_mutex);
2805 cb->args[0] = idx;
2806
2807 return skb->len;
2808}
2809
fc723250
HS
2810static int ip_vs_genl_parse_service(struct net *net,
2811 struct ip_vs_service_user_kern *usvc,
26c15cfd
JA
2812 struct nlattr *nla, int full_entry,
2813 struct ip_vs_service **ret_svc)
9a812198
JV
2814{
2815 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2816 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
26c15cfd 2817 struct ip_vs_service *svc;
9a812198
JV
2818
2819 /* Parse mandatory identifying service fields first */
2820 if (nla == NULL ||
2821 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2822 return -EINVAL;
2823
2824 nla_af = attrs[IPVS_SVC_ATTR_AF];
2825 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2826 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2827 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2828 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2829
2830 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2831 return -EINVAL;
2832
258c8893
SH
2833 memset(usvc, 0, sizeof(*usvc));
2834
c860c6b1 2835 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2836#ifdef CONFIG_IP_VS_IPV6
2837 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2838#else
2839 if (usvc->af != AF_INET)
2840#endif
9a812198
JV
2841 return -EAFNOSUPPORT;
2842
2843 if (nla_fwmark) {
2844 usvc->protocol = IPPROTO_TCP;
2845 usvc->fwmark = nla_get_u32(nla_fwmark);
2846 } else {
2847 usvc->protocol = nla_get_u16(nla_protocol);
2848 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2849 usvc->port = nla_get_u16(nla_port);
2850 usvc->fwmark = 0;
2851 }
2852
26c15cfd 2853 if (usvc->fwmark)
fc723250 2854 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
26c15cfd 2855 else
fc723250 2856 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
26c15cfd
JA
2857 &usvc->addr, usvc->port);
2858 *ret_svc = svc;
2859
9a812198
JV
2860 /* If a full entry was requested, check for the additional fields */
2861 if (full_entry) {
0d1e71b0 2862 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
9a812198
JV
2863 *nla_netmask;
2864 struct ip_vs_flags flags;
9a812198
JV
2865
2866 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
0d1e71b0 2867 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
9a812198
JV
2868 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2869 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2870 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2871
2872 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2873 return -EINVAL;
2874
2875 nla_memcpy(&flags, nla_flags, sizeof(flags));
2876
2877 /* prefill flags from service if it already exists */
26c15cfd 2878 if (svc)
9a812198 2879 usvc->flags = svc->flags;
9a812198
JV
2880
2881 /* set new flags from userland */
2882 usvc->flags = (usvc->flags & ~flags.mask) |
2883 (flags.flags & flags.mask);
c860c6b1 2884 usvc->sched_name = nla_data(nla_sched);
0d1e71b0 2885 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
9a812198
JV
2886 usvc->timeout = nla_get_u32(nla_timeout);
2887 usvc->netmask = nla_get_u32(nla_netmask);
2888 }
2889
2890 return 0;
2891}
2892
fc723250
HS
2893static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2894 struct nlattr *nla)
9a812198 2895{
c860c6b1 2896 struct ip_vs_service_user_kern usvc;
26c15cfd 2897 struct ip_vs_service *svc;
9a812198
JV
2898 int ret;
2899
fc723250 2900 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
26c15cfd 2901 return ret ? ERR_PTR(ret) : svc;
9a812198
JV
2902}
2903
2904static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2905{
2906 struct nlattr *nl_dest;
2907
2908 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2909 if (!nl_dest)
2910 return -EMSGSIZE;
2911
2912 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2913 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2914
2915 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2916 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2917 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2918 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2919 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2920 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2921 atomic_read(&dest->activeconns));
2922 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2923 atomic_read(&dest->inactconns));
2924 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2925 atomic_read(&dest->persistconns));
2926
2927 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2928 goto nla_put_failure;
2929
2930 nla_nest_end(skb, nl_dest);
2931
2932 return 0;
2933
2934nla_put_failure:
2935 nla_nest_cancel(skb, nl_dest);
2936 return -EMSGSIZE;
2937}
2938
2939static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2940 struct netlink_callback *cb)
2941{
2942 void *hdr;
2943
2944 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2945 &ip_vs_genl_family, NLM_F_MULTI,
2946 IPVS_CMD_NEW_DEST);
2947 if (!hdr)
2948 return -EMSGSIZE;
2949
2950 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2951 goto nla_put_failure;
2952
2953 return genlmsg_end(skb, hdr);
2954
2955nla_put_failure:
2956 genlmsg_cancel(skb, hdr);
2957 return -EMSGSIZE;
2958}
2959
2960static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2961 struct netlink_callback *cb)
2962{
2963 int idx = 0;
2964 int start = cb->args[0];
2965 struct ip_vs_service *svc;
2966 struct ip_vs_dest *dest;
2967 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
a0840e2e 2968 struct net *net = skb_sknet(skb);
9a812198
JV
2969
2970 mutex_lock(&__ip_vs_mutex);
2971
2972 /* Try to find the service for which to dump destinations */
2973 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2974 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2975 goto out_err;
2976
a0840e2e 2977
fc723250 2978 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
2979 if (IS_ERR(svc) || svc == NULL)
2980 goto out_err;
2981
2982 /* Dump the destinations */
2983 list_for_each_entry(dest, &svc->destinations, n_list) {
2984 if (++idx <= start)
2985 continue;
2986 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2987 idx--;
2988 goto nla_put_failure;
2989 }
2990 }
2991
2992nla_put_failure:
2993 cb->args[0] = idx;
9a812198
JV
2994
2995out_err:
2996 mutex_unlock(&__ip_vs_mutex);
2997
2998 return skb->len;
2999}
3000
c860c6b1 3001static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
3002 struct nlattr *nla, int full_entry)
3003{
3004 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3005 struct nlattr *nla_addr, *nla_port;
3006
3007 /* Parse mandatory identifying destination fields first */
3008 if (nla == NULL ||
3009 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3010 return -EINVAL;
3011
3012 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3013 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3014
3015 if (!(nla_addr && nla_port))
3016 return -EINVAL;
3017
258c8893
SH
3018 memset(udest, 0, sizeof(*udest));
3019
9a812198
JV
3020 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3021 udest->port = nla_get_u16(nla_port);
3022
3023 /* If a full entry was requested, check for the additional fields */
3024 if (full_entry) {
3025 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3026 *nla_l_thresh;
3027
3028 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3029 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3030 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3031 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3032
3033 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3034 return -EINVAL;
3035
3036 udest->conn_flags = nla_get_u32(nla_fwd)
3037 & IP_VS_CONN_F_FWD_MASK;
3038 udest->weight = nla_get_u32(nla_weight);
3039 udest->u_threshold = nla_get_u32(nla_u_thresh);
3040 udest->l_threshold = nla_get_u32(nla_l_thresh);
3041 }
3042
3043 return 0;
3044}
3045
3046static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3047 const char *mcast_ifn, __be32 syncid)
3048{
3049 struct nlattr *nl_daemon;
3050
3051 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3052 if (!nl_daemon)
3053 return -EMSGSIZE;
3054
3055 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3056 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3057 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3058
3059 nla_nest_end(skb, nl_daemon);
3060
3061 return 0;
3062
3063nla_put_failure:
3064 nla_nest_cancel(skb, nl_daemon);
3065 return -EMSGSIZE;
3066}
3067
3068static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3069 const char *mcast_ifn, __be32 syncid,
3070 struct netlink_callback *cb)
3071{
3072 void *hdr;
3073 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3074 &ip_vs_genl_family, NLM_F_MULTI,
3075 IPVS_CMD_NEW_DAEMON);
3076 if (!hdr)
3077 return -EMSGSIZE;
3078
3079 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3080 goto nla_put_failure;
3081
3082 return genlmsg_end(skb, hdr);
3083
3084nla_put_failure:
3085 genlmsg_cancel(skb, hdr);
3086 return -EMSGSIZE;
3087}
3088
3089static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3090 struct netlink_callback *cb)
3091{
f131315f
HS
3092 struct net *net = skb_net(skb);
3093 struct netns_ipvs *ipvs = net_ipvs(net);
3094
9a812198 3095 mutex_lock(&__ip_vs_mutex);
f131315f 3096 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
9a812198 3097 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
f131315f
HS
3098 ipvs->master_mcast_ifn,
3099 ipvs->master_syncid, cb) < 0)
9a812198
JV
3100 goto nla_put_failure;
3101
3102 cb->args[0] = 1;
3103 }
3104
f131315f 3105 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
9a812198 3106 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
f131315f
HS
3107 ipvs->backup_mcast_ifn,
3108 ipvs->backup_syncid, cb) < 0)
9a812198
JV
3109 goto nla_put_failure;
3110
3111 cb->args[1] = 1;
3112 }
3113
3114nla_put_failure:
3115 mutex_unlock(&__ip_vs_mutex);
3116
3117 return skb->len;
3118}
3119
f131315f 3120static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3121{
3122 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3123 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3124 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3125 return -EINVAL;
3126
f131315f
HS
3127 return start_sync_thread(net,
3128 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
9a812198
JV
3129 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3130 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3131}
3132
f131315f 3133static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3134{
3135 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3136 return -EINVAL;
3137
f131315f
HS
3138 return stop_sync_thread(net,
3139 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
9a812198
JV
3140}
3141
9330419d 3142static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
9a812198
JV
3143{
3144 struct ip_vs_timeout_user t;
3145
9330419d 3146 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3147
3148 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3149 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3150
3151 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3152 t.tcp_fin_timeout =
3153 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3154
3155 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3156 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3157
9330419d 3158 return ip_vs_set_timeout(net, &t);
9a812198
JV
3159}
3160
3161static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3162{
3163 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3164 struct ip_vs_service_user_kern usvc;
3165 struct ip_vs_dest_user_kern udest;
9a812198
JV
3166 int ret = 0, cmd;
3167 int need_full_svc = 0, need_full_dest = 0;
fc723250 3168 struct net *net;
a0840e2e 3169 struct netns_ipvs *ipvs;
9a812198 3170
fc723250 3171 net = skb_sknet(skb);
a0840e2e 3172 ipvs = net_ipvs(net);
9a812198
JV
3173 cmd = info->genlhdr->cmd;
3174
3175 mutex_lock(&__ip_vs_mutex);
3176
3177 if (cmd == IPVS_CMD_FLUSH) {
fc723250 3178 ret = ip_vs_flush(net);
9a812198
JV
3179 goto out;
3180 } else if (cmd == IPVS_CMD_SET_CONFIG) {
9330419d 3181 ret = ip_vs_genl_set_config(net, info->attrs);
9a812198
JV
3182 goto out;
3183 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3184 cmd == IPVS_CMD_DEL_DAEMON) {
3185
3186 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3187
3188 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3189 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3190 info->attrs[IPVS_CMD_ATTR_DAEMON],
3191 ip_vs_daemon_policy)) {
3192 ret = -EINVAL;
3193 goto out;
3194 }
3195
3196 if (cmd == IPVS_CMD_NEW_DAEMON)
f131315f 3197 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
9a812198 3198 else
f131315f 3199 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
9a812198
JV
3200 goto out;
3201 } else if (cmd == IPVS_CMD_ZERO &&
3202 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
fc723250 3203 ret = ip_vs_zero_all(net);
9a812198
JV
3204 goto out;
3205 }
3206
3207 /* All following commands require a service argument, so check if we
3208 * received a valid one. We need a full service specification when
3209 * adding / editing a service. Only identifying members otherwise. */
3210 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3211 need_full_svc = 1;
3212
fc723250 3213 ret = ip_vs_genl_parse_service(net, &usvc,
9a812198 3214 info->attrs[IPVS_CMD_ATTR_SERVICE],
26c15cfd 3215 need_full_svc, &svc);
9a812198
JV
3216 if (ret)
3217 goto out;
3218
9a812198
JV
3219 /* Unless we're adding a new service, the service must already exist */
3220 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3221 ret = -ESRCH;
3222 goto out;
3223 }
3224
3225 /* Destination commands require a valid destination argument. For
3226 * adding / editing a destination, we need a full destination
3227 * specification. */
3228 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3229 cmd == IPVS_CMD_DEL_DEST) {
3230 if (cmd != IPVS_CMD_DEL_DEST)
3231 need_full_dest = 1;
3232
3233 ret = ip_vs_genl_parse_dest(&udest,
3234 info->attrs[IPVS_CMD_ATTR_DEST],
3235 need_full_dest);
3236 if (ret)
3237 goto out;
3238 }
3239
3240 switch (cmd) {
3241 case IPVS_CMD_NEW_SERVICE:
3242 if (svc == NULL)
fc723250 3243 ret = ip_vs_add_service(net, &usvc, &svc);
9a812198
JV
3244 else
3245 ret = -EEXIST;
3246 break;
3247 case IPVS_CMD_SET_SERVICE:
3248 ret = ip_vs_edit_service(svc, &usvc);
3249 break;
3250 case IPVS_CMD_DEL_SERVICE:
3251 ret = ip_vs_del_service(svc);
26c15cfd 3252 /* do not use svc, it can be freed */
9a812198
JV
3253 break;
3254 case IPVS_CMD_NEW_DEST:
3255 ret = ip_vs_add_dest(svc, &udest);
3256 break;
3257 case IPVS_CMD_SET_DEST:
3258 ret = ip_vs_edit_dest(svc, &udest);
3259 break;
3260 case IPVS_CMD_DEL_DEST:
3261 ret = ip_vs_del_dest(svc, &udest);
3262 break;
3263 case IPVS_CMD_ZERO:
3264 ret = ip_vs_zero_service(svc);
3265 break;
3266 default:
3267 ret = -EINVAL;
3268 }
3269
3270out:
9a812198
JV
3271 mutex_unlock(&__ip_vs_mutex);
3272
3273 return ret;
3274}
3275
3276static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3277{
3278 struct sk_buff *msg;
3279 void *reply;
3280 int ret, cmd, reply_cmd;
fc723250 3281 struct net *net;
a0840e2e 3282 struct netns_ipvs *ipvs;
9a812198 3283
fc723250 3284 net = skb_sknet(skb);
a0840e2e 3285 ipvs = net_ipvs(net);
9a812198
JV
3286 cmd = info->genlhdr->cmd;
3287
3288 if (cmd == IPVS_CMD_GET_SERVICE)
3289 reply_cmd = IPVS_CMD_NEW_SERVICE;
3290 else if (cmd == IPVS_CMD_GET_INFO)
3291 reply_cmd = IPVS_CMD_SET_INFO;
3292 else if (cmd == IPVS_CMD_GET_CONFIG)
3293 reply_cmd = IPVS_CMD_SET_CONFIG;
3294 else {
1e3e238e 3295 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3296 return -EINVAL;
3297 }
3298
3299 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3300 if (!msg)
3301 return -ENOMEM;
3302
3303 mutex_lock(&__ip_vs_mutex);
3304
3305 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3306 if (reply == NULL)
3307 goto nla_put_failure;
3308
3309 switch (cmd) {
3310 case IPVS_CMD_GET_SERVICE:
3311 {
3312 struct ip_vs_service *svc;
3313
fc723250
HS
3314 svc = ip_vs_genl_find_service(net,
3315 info->attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3316 if (IS_ERR(svc)) {
3317 ret = PTR_ERR(svc);
3318 goto out_err;
3319 } else if (svc) {
3320 ret = ip_vs_genl_fill_service(msg, svc);
9a812198
JV
3321 if (ret)
3322 goto nla_put_failure;
3323 } else {
3324 ret = -ESRCH;
3325 goto out_err;
3326 }
3327
3328 break;
3329 }
3330
3331 case IPVS_CMD_GET_CONFIG:
3332 {
3333 struct ip_vs_timeout_user t;
3334
9330419d 3335 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3336#ifdef CONFIG_IP_VS_PROTO_TCP
3337 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3338 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3339 t.tcp_fin_timeout);
3340#endif
3341#ifdef CONFIG_IP_VS_PROTO_UDP
3342 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3343#endif
3344
3345 break;
3346 }
3347
3348 case IPVS_CMD_GET_INFO:
3349 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3350 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
6f7edb48 3351 ip_vs_conn_tab_size);
9a812198
JV
3352 break;
3353 }
3354
3355 genlmsg_end(msg, reply);
134e6375 3356 ret = genlmsg_reply(msg, info);
9a812198
JV
3357 goto out;
3358
3359nla_put_failure:
1e3e238e 3360 pr_err("not enough space in Netlink message\n");
9a812198
JV
3361 ret = -EMSGSIZE;
3362
3363out_err:
3364 nlmsg_free(msg);
3365out:
3366 mutex_unlock(&__ip_vs_mutex);
3367
3368 return ret;
3369}
3370
3371
3372static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3373 {
3374 .cmd = IPVS_CMD_NEW_SERVICE,
3375 .flags = GENL_ADMIN_PERM,
3376 .policy = ip_vs_cmd_policy,
3377 .doit = ip_vs_genl_set_cmd,
3378 },
3379 {
3380 .cmd = IPVS_CMD_SET_SERVICE,
3381 .flags = GENL_ADMIN_PERM,
3382 .policy = ip_vs_cmd_policy,
3383 .doit = ip_vs_genl_set_cmd,
3384 },
3385 {
3386 .cmd = IPVS_CMD_DEL_SERVICE,
3387 .flags = GENL_ADMIN_PERM,
3388 .policy = ip_vs_cmd_policy,
3389 .doit = ip_vs_genl_set_cmd,
3390 },
3391 {
3392 .cmd = IPVS_CMD_GET_SERVICE,
3393 .flags = GENL_ADMIN_PERM,
3394 .doit = ip_vs_genl_get_cmd,
3395 .dumpit = ip_vs_genl_dump_services,
3396 .policy = ip_vs_cmd_policy,
3397 },
3398 {
3399 .cmd = IPVS_CMD_NEW_DEST,
3400 .flags = GENL_ADMIN_PERM,
3401 .policy = ip_vs_cmd_policy,
3402 .doit = ip_vs_genl_set_cmd,
3403 },
3404 {
3405 .cmd = IPVS_CMD_SET_DEST,
3406 .flags = GENL_ADMIN_PERM,
3407 .policy = ip_vs_cmd_policy,
3408 .doit = ip_vs_genl_set_cmd,
3409 },
3410 {
3411 .cmd = IPVS_CMD_DEL_DEST,
3412 .flags = GENL_ADMIN_PERM,
3413 .policy = ip_vs_cmd_policy,
3414 .doit = ip_vs_genl_set_cmd,
3415 },
3416 {
3417 .cmd = IPVS_CMD_GET_DEST,
3418 .flags = GENL_ADMIN_PERM,
3419 .policy = ip_vs_cmd_policy,
3420 .dumpit = ip_vs_genl_dump_dests,
3421 },
3422 {
3423 .cmd = IPVS_CMD_NEW_DAEMON,
3424 .flags = GENL_ADMIN_PERM,
3425 .policy = ip_vs_cmd_policy,
3426 .doit = ip_vs_genl_set_cmd,
3427 },
3428 {
3429 .cmd = IPVS_CMD_DEL_DAEMON,
3430 .flags = GENL_ADMIN_PERM,
3431 .policy = ip_vs_cmd_policy,
3432 .doit = ip_vs_genl_set_cmd,
3433 },
3434 {
3435 .cmd = IPVS_CMD_GET_DAEMON,
3436 .flags = GENL_ADMIN_PERM,
3437 .dumpit = ip_vs_genl_dump_daemons,
3438 },
3439 {
3440 .cmd = IPVS_CMD_SET_CONFIG,
3441 .flags = GENL_ADMIN_PERM,
3442 .policy = ip_vs_cmd_policy,
3443 .doit = ip_vs_genl_set_cmd,
3444 },
3445 {
3446 .cmd = IPVS_CMD_GET_CONFIG,
3447 .flags = GENL_ADMIN_PERM,
3448 .doit = ip_vs_genl_get_cmd,
3449 },
3450 {
3451 .cmd = IPVS_CMD_GET_INFO,
3452 .flags = GENL_ADMIN_PERM,
3453 .doit = ip_vs_genl_get_cmd,
3454 },
3455 {
3456 .cmd = IPVS_CMD_ZERO,
3457 .flags = GENL_ADMIN_PERM,
3458 .policy = ip_vs_cmd_policy,
3459 .doit = ip_vs_genl_set_cmd,
3460 },
3461 {
3462 .cmd = IPVS_CMD_FLUSH,
3463 .flags = GENL_ADMIN_PERM,
3464 .doit = ip_vs_genl_set_cmd,
3465 },
3466};
3467
3468static int __init ip_vs_genl_register(void)
3469{
8f698d54
MM
3470 return genl_register_family_with_ops(&ip_vs_genl_family,
3471 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3472}
3473
3474static void ip_vs_genl_unregister(void)
3475{
3476 genl_unregister_family(&ip_vs_genl_family);
3477}
3478
3479/* End of Generic Netlink interface definitions */
3480
61b1ab45
HS
3481/*
3482 * per netns intit/exit func.
3483 */
3484int __net_init __ip_vs_control_init(struct net *net)
3485{
fc723250
HS
3486 int idx;
3487 struct netns_ipvs *ipvs = net_ipvs(net);
a0840e2e 3488 struct ctl_table *tbl;
fc723250 3489
61b1ab45
HS
3490 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3491 return -EPERM;
a0840e2e
HS
3492
3493 atomic_set(&ipvs->dropentry, 0);
3494 spin_lock_init(&ipvs->dropentry_lock);
3495 spin_lock_init(&ipvs->droppacket_lock);
3496 spin_lock_init(&ipvs->securetcp_lock);
3497 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3498
3499 /* Initialize rs_table */
3500 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3501 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3502
f2431e6e
HS
3503 INIT_LIST_HEAD(&ipvs->dest_trash);
3504
b17fc996
HS
3505 /* procfs stats */
3506 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3507 if (ipvs->tot_stats == NULL) {
3508 pr_err("%s(): no memory.\n", __func__);
3509 return -ENOMEM;
3510 }
3511 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3512 if (!ipvs->cpustats) {
3513 pr_err("%s() alloc_percpu failed\n", __func__);
3514 goto err_alloc;
3515 }
3516 spin_lock_init(&ipvs->tot_stats->lock);
61b1ab45 3517
fc723250
HS
3518 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3519 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3520
61b1ab45
HS
3521 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3522 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
b17fc996
HS
3523 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3524 &ip_vs_stats_percpu_fops);
a0840e2e
HS
3525
3526 if (!net_eq(net, &init_net)) {
3527 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3528 if (tbl == NULL)
3529 goto err_dup;
3530 } else
3531 tbl = vs_vars;
3532 /* Initialize sysctl defaults */
3533 idx = 0;
3534 ipvs->sysctl_amemthresh = 1024;
3535 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3536 ipvs->sysctl_am_droprate = 10;
3537 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3538 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3539 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3540#ifdef CONFIG_IP_VS_NFCT
3541 tbl[idx++].data = &ipvs->sysctl_conntrack;
3542#endif
3543 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3544 ipvs->sysctl_snat_reroute = 1;
3545 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3546 ipvs->sysctl_sync_ver = 1;
3547 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3548 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3549 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3550 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3551 ipvs->sysctl_sync_threshold[0] = 3;
3552 ipvs->sysctl_sync_threshold[1] = 50;
3553 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3554 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3555 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3556
3557
3558 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
61b1ab45 3559 vs_vars);
a0840e2e 3560 if (ipvs->sysctl_hdr == NULL)
61b1ab45 3561 goto err_reg;
b17fc996 3562 ip_vs_new_estimator(net, ipvs->tot_stats);
a0840e2e 3563 ipvs->sysctl_tbl = tbl;
f6340ee0
HS
3564 /* Schedule defense work */
3565 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3566 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
61b1ab45
HS
3567 return 0;
3568
3569err_reg:
a0840e2e
HS
3570 if (!net_eq(net, &init_net))
3571 kfree(tbl);
3572err_dup:
b17fc996
HS
3573 free_percpu(ipvs->cpustats);
3574err_alloc:
3575 kfree(ipvs->tot_stats);
61b1ab45
HS
3576 return -ENOMEM;
3577}
3578
3579static void __net_exit __ip_vs_control_cleanup(struct net *net)
3580{
b17fc996
HS
3581 struct netns_ipvs *ipvs = net_ipvs(net);
3582
61b1ab45
HS
3583 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3584 return;
3585
f2431e6e 3586 ip_vs_trash_cleanup(net);
b17fc996 3587 ip_vs_kill_estimator(net, ipvs->tot_stats);
f2431e6e
HS
3588 cancel_delayed_work_sync(&ipvs->defense_work);
3589 cancel_work_sync(&ipvs->defense_work.work);
a0840e2e 3590 unregister_net_sysctl_table(ipvs->sysctl_hdr);
b17fc996 3591 proc_net_remove(net, "ip_vs_stats_percpu");
61b1ab45
HS
3592 proc_net_remove(net, "ip_vs_stats");
3593 proc_net_remove(net, "ip_vs");
b17fc996
HS
3594 free_percpu(ipvs->cpustats);
3595 kfree(ipvs->tot_stats);
61b1ab45
HS
3596}
3597
3598static struct pernet_operations ipvs_control_ops = {
3599 .init = __ip_vs_control_init,
3600 .exit = __ip_vs_control_cleanup,
3601};
1da177e4 3602
048cf48b 3603int __init ip_vs_control_init(void)
1da177e4 3604{
1da177e4 3605 int idx;
fc723250 3606 int ret;
1da177e4
LT
3607
3608 EnterFunction(2);
3609
fc723250 3610 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
d86bef73
EB
3611 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3612 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3613 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3614 }
fc723250
HS
3615
3616 ret = register_pernet_subsys(&ipvs_control_ops);
3617 if (ret) {
3618 pr_err("cannot register namespace.\n");
3619 goto err;
d86bef73 3620 }
fc723250
HS
3621
3622 smp_wmb(); /* Do we really need it now ? */
d86bef73 3623
1da177e4
LT
3624 ret = nf_register_sockopt(&ip_vs_sockopts);
3625 if (ret) {
1e3e238e 3626 pr_err("cannot register sockopt.\n");
fc723250 3627 goto err_net;
1da177e4
LT
3628 }
3629
9a812198
JV
3630 ret = ip_vs_genl_register();
3631 if (ret) {
1e3e238e 3632 pr_err("cannot register Generic Netlink interface.\n");
9a812198 3633 nf_unregister_sockopt(&ip_vs_sockopts);
fc723250 3634 goto err_net;
9a812198
JV
3635 }
3636
1da177e4
LT
3637 LeaveFunction(2);
3638 return 0;
fc723250
HS
3639
3640err_net:
3641 unregister_pernet_subsys(&ipvs_control_ops);
3642err:
3643 return ret;
1da177e4
LT
3644}
3645
3646
3647void ip_vs_control_cleanup(void)
3648{
3649 EnterFunction(2);
61b1ab45 3650 unregister_pernet_subsys(&ipvs_control_ops);
9a812198 3651 ip_vs_genl_unregister();
1da177e4
LT
3652 nf_unregister_sockopt(&ip_vs_sockopts);
3653 LeaveFunction(2);
3654}
This page took 0.795099 seconds and 5 git commands to generate.