IPVS: netns, ip_vs_ctl local vars moved to ipvs struct.
[deliverable/linux.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
9330419d 41#include <linux/nsproxy.h>
1da177e4 42#include <net/ip.h>
09571c7a
VB
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
14c85021 47#include <net/route.h>
1da177e4 48#include <net/sock.h>
9a812198 49#include <net/genetlink.h>
1da177e4
LT
50
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 56static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
1da177e4 61/* sysctl variables */
1da177e4
LT
62
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
09571c7a
VB
72#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
75{
76 struct rt6_info *rt;
77 struct flowi fl = {
78 .oif = 0,
5811662b
CG
79 .fl6_dst = *addr,
80 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
09571c7a
VB
81 };
82
83 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
84 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
85 return 1;
86
87 return 0;
88}
89#endif
1da177e4 90/*
af9debd4
JA
91 * update_defense_level is called from keventd and from sysctl,
92 * so it needs to protect itself from softirqs
1da177e4 93 */
9330419d 94static void update_defense_level(struct netns_ipvs *ipvs)
1da177e4
LT
95{
96 struct sysinfo i;
97 static int old_secure_tcp = 0;
98 int availmem;
99 int nomem;
100 int to_change = -1;
101
102 /* we only count free and buffered memory (in pages) */
103 si_meminfo(&i);
104 availmem = i.freeram + i.bufferram;
105 /* however in linux 2.5 the i.bufferram is total page cache size,
106 we need adjust it */
107 /* si_swapinfo(&i); */
108 /* availmem = availmem - (i.totalswap - i.freeswap); */
109
a0840e2e 110 nomem = (availmem < ipvs->sysctl_amemthresh);
1da177e4 111
af9debd4
JA
112 local_bh_disable();
113
1da177e4 114 /* drop_entry */
a0840e2e
HS
115 spin_lock(&ipvs->dropentry_lock);
116 switch (ipvs->sysctl_drop_entry) {
1da177e4 117 case 0:
a0840e2e 118 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
119 break;
120 case 1:
121 if (nomem) {
a0840e2e
HS
122 atomic_set(&ipvs->dropentry, 1);
123 ipvs->sysctl_drop_entry = 2;
1da177e4 124 } else {
a0840e2e 125 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
126 }
127 break;
128 case 2:
129 if (nomem) {
a0840e2e 130 atomic_set(&ipvs->dropentry, 1);
1da177e4 131 } else {
a0840e2e
HS
132 atomic_set(&ipvs->dropentry, 0);
133 ipvs->sysctl_drop_entry = 1;
1da177e4
LT
134 };
135 break;
136 case 3:
a0840e2e 137 atomic_set(&ipvs->dropentry, 1);
1da177e4
LT
138 break;
139 }
a0840e2e 140 spin_unlock(&ipvs->dropentry_lock);
1da177e4
LT
141
142 /* drop_packet */
a0840e2e
HS
143 spin_lock(&ipvs->droppacket_lock);
144 switch (ipvs->sysctl_drop_packet) {
1da177e4 145 case 0:
a0840e2e 146 ipvs->drop_rate = 0;
1da177e4
LT
147 break;
148 case 1:
149 if (nomem) {
a0840e2e
HS
150 ipvs->drop_rate = ipvs->drop_counter
151 = ipvs->sysctl_amemthresh /
152 (ipvs->sysctl_amemthresh-availmem);
153 ipvs->sysctl_drop_packet = 2;
1da177e4 154 } else {
a0840e2e 155 ipvs->drop_rate = 0;
1da177e4
LT
156 }
157 break;
158 case 2:
159 if (nomem) {
a0840e2e
HS
160 ipvs->drop_rate = ipvs->drop_counter
161 = ipvs->sysctl_amemthresh /
162 (ipvs->sysctl_amemthresh-availmem);
1da177e4 163 } else {
a0840e2e
HS
164 ipvs->drop_rate = 0;
165 ipvs->sysctl_drop_packet = 1;
1da177e4
LT
166 }
167 break;
168 case 3:
a0840e2e 169 ipvs->drop_rate = ipvs->sysctl_am_droprate;
1da177e4
LT
170 break;
171 }
a0840e2e 172 spin_unlock(&ipvs->droppacket_lock);
1da177e4
LT
173
174 /* secure_tcp */
a0840e2e
HS
175 spin_lock(&ipvs->securetcp_lock);
176 switch (ipvs->sysctl_secure_tcp) {
1da177e4
LT
177 case 0:
178 if (old_secure_tcp >= 2)
179 to_change = 0;
180 break;
181 case 1:
182 if (nomem) {
183 if (old_secure_tcp < 2)
184 to_change = 1;
a0840e2e 185 ipvs->sysctl_secure_tcp = 2;
1da177e4
LT
186 } else {
187 if (old_secure_tcp >= 2)
188 to_change = 0;
189 }
190 break;
191 case 2:
192 if (nomem) {
193 if (old_secure_tcp < 2)
194 to_change = 1;
195 } else {
196 if (old_secure_tcp >= 2)
197 to_change = 0;
a0840e2e 198 ipvs->sysctl_secure_tcp = 1;
1da177e4
LT
199 }
200 break;
201 case 3:
202 if (old_secure_tcp < 2)
203 to_change = 1;
204 break;
205 }
a0840e2e 206 old_secure_tcp = ipvs->sysctl_secure_tcp;
1da177e4 207 if (to_change >= 0)
9330419d 208 ip_vs_protocol_timeout_change(ipvs,
a0840e2e
HS
209 ipvs->sysctl_secure_tcp > 1);
210 spin_unlock(&ipvs->securetcp_lock);
af9debd4
JA
211
212 local_bh_enable();
1da177e4
LT
213}
214
215
216/*
217 * Timer for checking the defense
218 */
219#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
220static void defense_work_handler(struct work_struct *work);
221static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 222
c4028958 223static void defense_work_handler(struct work_struct *work)
1da177e4 224{
b17fc996 225 struct netns_ipvs *ipvs = net_ipvs(&init_net);
9330419d
HS
226
227 update_defense_level(ipvs);
a0840e2e 228 if (atomic_read(&ipvs->dropentry))
1da177e4
LT
229 ip_vs_random_dropentry();
230
231 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
232}
233
234int
235ip_vs_use_count_inc(void)
236{
237 return try_module_get(THIS_MODULE);
238}
239
240void
241ip_vs_use_count_dec(void)
242{
243 module_put(THIS_MODULE);
244}
245
246
247/*
248 * Hash table: for virtual service lookups
249 */
250#define IP_VS_SVC_TAB_BITS 8
251#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
252#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
253
254/* the service table hashed by <protocol, addr, port> */
255static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
256/* the service table hashed by fwmark */
257static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
258
1da177e4
LT
259/*
260 * Trash for destinations
261 */
262static LIST_HEAD(ip_vs_dest_trash);
263
264/*
265 * FTP & NULL virtual service counters
266 */
267static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
268static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
269
270
271/*
272 * Returns hash value for virtual service
273 */
fc723250
HS
274static inline unsigned
275ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
276 const union nf_inet_addr *addr, __be16 port)
1da177e4
LT
277{
278 register unsigned porth = ntohs(port);
b18610de 279 __be32 addr_fold = addr->ip;
1da177e4 280
b18610de
JV
281#ifdef CONFIG_IP_VS_IPV6
282 if (af == AF_INET6)
283 addr_fold = addr->ip6[0]^addr->ip6[1]^
284 addr->ip6[2]^addr->ip6[3];
285#endif
fc723250 286 addr_fold ^= ((size_t)net>>8);
b18610de
JV
287
288 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
289 & IP_VS_SVC_TAB_MASK;
290}
291
292/*
293 * Returns hash value of fwmark for virtual service lookup
294 */
fc723250 295static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
1da177e4 296{
fc723250 297 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
1da177e4
LT
298}
299
300/*
fc723250 301 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
1da177e4
LT
302 * or in the ip_vs_svc_fwm_table by fwmark.
303 * Should be called with locked tables.
304 */
305static int ip_vs_svc_hash(struct ip_vs_service *svc)
306{
307 unsigned hash;
308
309 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
310 pr_err("%s(): request for already hashed, called from %pF\n",
311 __func__, __builtin_return_address(0));
1da177e4
LT
312 return 0;
313 }
314
315 if (svc->fwmark == 0) {
316 /*
fc723250 317 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
1da177e4 318 */
fc723250
HS
319 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
320 &svc->addr, svc->port);
1da177e4
LT
321 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
322 } else {
323 /*
fc723250 324 * Hash it by fwmark in svc_fwm_table
1da177e4 325 */
fc723250 326 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
1da177e4
LT
327 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
328 }
329
330 svc->flags |= IP_VS_SVC_F_HASHED;
331 /* increase its refcnt because it is referenced by the svc table */
332 atomic_inc(&svc->refcnt);
333 return 1;
334}
335
336
337/*
fc723250 338 * Unhashes a service from svc_table / svc_fwm_table.
1da177e4
LT
339 * Should be called with locked tables.
340 */
341static int ip_vs_svc_unhash(struct ip_vs_service *svc)
342{
343 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
344 pr_err("%s(): request for unhash flagged, called from %pF\n",
345 __func__, __builtin_return_address(0));
1da177e4
LT
346 return 0;
347 }
348
349 if (svc->fwmark == 0) {
fc723250 350 /* Remove it from the svc_table table */
1da177e4
LT
351 list_del(&svc->s_list);
352 } else {
fc723250 353 /* Remove it from the svc_fwm_table table */
1da177e4
LT
354 list_del(&svc->f_list);
355 }
356
357 svc->flags &= ~IP_VS_SVC_F_HASHED;
358 atomic_dec(&svc->refcnt);
359 return 1;
360}
361
362
363/*
fc723250 364 * Get service by {netns, proto,addr,port} in the service table.
1da177e4 365 */
b18610de 366static inline struct ip_vs_service *
fc723250
HS
367__ip_vs_service_find(struct net *net, int af, __u16 protocol,
368 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
369{
370 unsigned hash;
371 struct ip_vs_service *svc;
372
373 /* Check for "full" addressed entries */
fc723250 374 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
1da177e4
LT
375
376 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
377 if ((svc->af == af)
378 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4 379 && (svc->port == vport)
fc723250
HS
380 && (svc->protocol == protocol)
381 && net_eq(svc->net, net)) {
1da177e4 382 /* HIT */
1da177e4
LT
383 return svc;
384 }
385 }
386
387 return NULL;
388}
389
390
391/*
392 * Get service by {fwmark} in the service table.
393 */
b18610de 394static inline struct ip_vs_service *
fc723250 395__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
1da177e4
LT
396{
397 unsigned hash;
398 struct ip_vs_service *svc;
399
400 /* Check for fwmark addressed entries */
fc723250 401 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
1da177e4
LT
402
403 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
fc723250
HS
404 if (svc->fwmark == fwmark && svc->af == af
405 && net_eq(svc->net, net)) {
1da177e4 406 /* HIT */
1da177e4
LT
407 return svc;
408 }
409 }
410
411 return NULL;
412}
413
414struct ip_vs_service *
fc723250 415ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
3c2e0505 416 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
417{
418 struct ip_vs_service *svc;
3c2e0505 419
1da177e4
LT
420 read_lock(&__ip_vs_svc_lock);
421
422 /*
423 * Check the table hashed by fwmark first
424 */
fc723250
HS
425 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
426 if (fwmark && svc)
1da177e4
LT
427 goto out;
428
429 /*
430 * Check the table hashed by <protocol,addr,port>
431 * for "full" addressed entries
432 */
fc723250 433 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
1da177e4
LT
434
435 if (svc == NULL
436 && protocol == IPPROTO_TCP
437 && atomic_read(&ip_vs_ftpsvc_counter)
438 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
439 /*
440 * Check if ftp service entry exists, the packet
441 * might belong to FTP data connections.
442 */
fc723250 443 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
1da177e4
LT
444 }
445
446 if (svc == NULL
447 && atomic_read(&ip_vs_nullsvc_counter)) {
448 /*
449 * Check if the catch-all port (port zero) exists
450 */
fc723250 451 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
1da177e4
LT
452 }
453
454 out:
26c15cfd
JA
455 if (svc)
456 atomic_inc(&svc->usecnt);
1da177e4
LT
457 read_unlock(&__ip_vs_svc_lock);
458
3c2e0505
JV
459 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
460 fwmark, ip_vs_proto_name(protocol),
461 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
462 svc ? "hit" : "not hit");
1da177e4
LT
463
464 return svc;
465}
466
467
468static inline void
469__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
470{
471 atomic_inc(&svc->refcnt);
472 dest->svc = svc;
473}
474
26c15cfd 475static void
1da177e4
LT
476__ip_vs_unbind_svc(struct ip_vs_dest *dest)
477{
478 struct ip_vs_service *svc = dest->svc;
479
480 dest->svc = NULL;
26c15cfd
JA
481 if (atomic_dec_and_test(&svc->refcnt)) {
482 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
483 svc->fwmark,
484 IP_VS_DBG_ADDR(svc->af, &svc->addr),
485 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 486 free_percpu(svc->stats.cpustats);
1da177e4 487 kfree(svc);
26c15cfd 488 }
1da177e4
LT
489}
490
491
492/*
493 * Returns hash value for real service
494 */
7937df15
JV
495static inline unsigned ip_vs_rs_hashkey(int af,
496 const union nf_inet_addr *addr,
497 __be16 port)
1da177e4
LT
498{
499 register unsigned porth = ntohs(port);
7937df15
JV
500 __be32 addr_fold = addr->ip;
501
502#ifdef CONFIG_IP_VS_IPV6
503 if (af == AF_INET6)
504 addr_fold = addr->ip6[0]^addr->ip6[1]^
505 addr->ip6[2]^addr->ip6[3];
506#endif
1da177e4 507
7937df15 508 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
509 & IP_VS_RTAB_MASK;
510}
511
512/*
fc723250 513 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
1da177e4
LT
514 * should be called with locked tables.
515 */
fc723250 516static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
1da177e4
LT
517{
518 unsigned hash;
519
520 if (!list_empty(&dest->d_list)) {
521 return 0;
522 }
523
524 /*
525 * Hash by proto,addr,port,
526 * which are the parameters of the real service.
527 */
7937df15
JV
528 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
529
fc723250 530 list_add(&dest->d_list, &ipvs->rs_table[hash]);
1da177e4
LT
531
532 return 1;
533}
534
535/*
fc723250 536 * UNhashes ip_vs_dest from rs_table.
1da177e4
LT
537 * should be called with locked tables.
538 */
539static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
540{
541 /*
fc723250 542 * Remove it from the rs_table table.
1da177e4
LT
543 */
544 if (!list_empty(&dest->d_list)) {
545 list_del(&dest->d_list);
546 INIT_LIST_HEAD(&dest->d_list);
547 }
548
549 return 1;
550}
551
552/*
553 * Lookup real service by <proto,addr,port> in the real service table.
554 */
555struct ip_vs_dest *
fc723250 556ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
7937df15
JV
557 const union nf_inet_addr *daddr,
558 __be16 dport)
1da177e4 559{
fc723250 560 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
561 unsigned hash;
562 struct ip_vs_dest *dest;
563
564 /*
565 * Check for "full" addressed entries
566 * Return the first found entry
567 */
7937df15 568 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4 569
a0840e2e 570 read_lock(&ipvs->rs_lock);
fc723250 571 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
7937df15
JV
572 if ((dest->af == af)
573 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
574 && (dest->port == dport)
575 && ((dest->protocol == protocol) ||
576 dest->vfwmark)) {
577 /* HIT */
a0840e2e 578 read_unlock(&ipvs->rs_lock);
1da177e4
LT
579 return dest;
580 }
581 }
a0840e2e 582 read_unlock(&ipvs->rs_lock);
1da177e4
LT
583
584 return NULL;
585}
586
587/*
588 * Lookup destination by {addr,port} in the given service
589 */
590static struct ip_vs_dest *
7937df15
JV
591ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
592 __be16 dport)
1da177e4
LT
593{
594 struct ip_vs_dest *dest;
595
596 /*
597 * Find the destination for the given service
598 */
599 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
600 if ((dest->af == svc->af)
601 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
602 && (dest->port == dport)) {
1da177e4
LT
603 /* HIT */
604 return dest;
605 }
606 }
607
608 return NULL;
609}
610
1e356f9c
RB
611/*
612 * Find destination by {daddr,dport,vaddr,protocol}
613 * Cretaed to be used in ip_vs_process_message() in
614 * the backup synchronization daemon. It finds the
615 * destination to be bound to the received connection
616 * on the backup.
617 *
618 * ip_vs_lookup_real_service() looked promissing, but
619 * seems not working as expected.
620 */
fc723250
HS
621struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
622 const union nf_inet_addr *daddr,
7937df15
JV
623 __be16 dport,
624 const union nf_inet_addr *vaddr,
0e051e68 625 __be16 vport, __u16 protocol, __u32 fwmark)
1e356f9c
RB
626{
627 struct ip_vs_dest *dest;
628 struct ip_vs_service *svc;
629
fc723250 630 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
1e356f9c
RB
631 if (!svc)
632 return NULL;
633 dest = ip_vs_lookup_dest(svc, daddr, dport);
634 if (dest)
635 atomic_inc(&dest->refcnt);
636 ip_vs_service_put(svc);
637 return dest;
638}
1da177e4
LT
639
640/*
641 * Lookup dest by {svc,addr,port} in the destination trash.
642 * The destination trash is used to hold the destinations that are removed
643 * from the service table but are still referenced by some conn entries.
644 * The reason to add the destination trash is when the dest is temporary
645 * down (either by administrator or by monitor program), the dest can be
646 * picked back from the trash, the remaining connections to the dest can
647 * continue, and the counting information of the dest is also useful for
648 * scheduling.
649 */
650static struct ip_vs_dest *
7937df15
JV
651ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
652 __be16 dport)
1da177e4
LT
653{
654 struct ip_vs_dest *dest, *nxt;
655
656 /*
657 * Find the destination in trash
658 */
659 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
660 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
661 "dest->refcnt=%d\n",
662 dest->vfwmark,
663 IP_VS_DBG_ADDR(svc->af, &dest->addr),
664 ntohs(dest->port),
665 atomic_read(&dest->refcnt));
666 if (dest->af == svc->af &&
667 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
668 dest->port == dport &&
669 dest->vfwmark == svc->fwmark &&
670 dest->protocol == svc->protocol &&
671 (svc->fwmark ||
7937df15 672 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
673 dest->vport == svc->port))) {
674 /* HIT */
675 return dest;
676 }
677
678 /*
679 * Try to purge the destination from trash if not referenced
680 */
681 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
682 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
683 "from trash\n",
684 dest->vfwmark,
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
686 ntohs(dest->port));
1da177e4
LT
687 list_del(&dest->n_list);
688 ip_vs_dst_reset(dest);
689 __ip_vs_unbind_svc(dest);
b17fc996 690 free_percpu(dest->stats.cpustats);
1da177e4
LT
691 kfree(dest);
692 }
693 }
694
695 return NULL;
696}
697
698
699/*
700 * Clean up all the destinations in the trash
701 * Called by the ip_vs_control_cleanup()
702 *
703 * When the ip_vs_control_clearup is activated by ipvs module exit,
704 * the service tables must have been flushed and all the connections
705 * are expired, and the refcnt of each destination in the trash must
706 * be 1, so we simply release them here.
707 */
708static void ip_vs_trash_cleanup(void)
709{
710 struct ip_vs_dest *dest, *nxt;
711
712 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
713 list_del(&dest->n_list);
714 ip_vs_dst_reset(dest);
715 __ip_vs_unbind_svc(dest);
b17fc996 716 free_percpu(dest->stats.cpustats);
1da177e4
LT
717 kfree(dest);
718 }
719}
720
721
722static void
723ip_vs_zero_stats(struct ip_vs_stats *stats)
724{
725 spin_lock_bh(&stats->lock);
e93615d0 726
e9c0ce23 727 memset(&stats->ustats, 0, sizeof(stats->ustats));
1da177e4 728 ip_vs_zero_estimator(stats);
e93615d0 729
3a14a313 730 spin_unlock_bh(&stats->lock);
1da177e4
LT
731}
732
733/*
734 * Update a destination in the given service
735 */
736static void
26c15cfd
JA
737__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
738 struct ip_vs_dest_user_kern *udest, int add)
1da177e4 739{
fc723250 740 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
741 int conn_flags;
742
743 /* set the weight and the flags */
744 atomic_set(&dest->weight, udest->weight);
3575792e
JA
745 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
746 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4 747
1da177e4 748 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 749 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
750 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
751 } else {
752 /*
fc723250 753 * Put the real service in rs_table if not present.
1da177e4
LT
754 * For now only for NAT!
755 */
a0840e2e 756 write_lock_bh(&ipvs->rs_lock);
fc723250 757 ip_vs_rs_hash(ipvs, dest);
a0840e2e 758 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
759 }
760 atomic_set(&dest->conn_flags, conn_flags);
761
762 /* bind the service */
763 if (!dest->svc) {
764 __ip_vs_bind_svc(dest, svc);
765 } else {
766 if (dest->svc != svc) {
767 __ip_vs_unbind_svc(dest);
768 ip_vs_zero_stats(&dest->stats);
769 __ip_vs_bind_svc(dest, svc);
770 }
771 }
772
773 /* set the dest status flags */
774 dest->flags |= IP_VS_DEST_F_AVAILABLE;
775
776 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
777 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
778 dest->u_threshold = udest->u_threshold;
779 dest->l_threshold = udest->l_threshold;
26c15cfd 780
fc604767
JA
781 spin_lock(&dest->dst_lock);
782 ip_vs_dst_reset(dest);
783 spin_unlock(&dest->dst_lock);
784
26c15cfd 785 if (add)
29c2026f 786 ip_vs_new_estimator(svc->net, &dest->stats);
26c15cfd
JA
787
788 write_lock_bh(&__ip_vs_svc_lock);
789
790 /* Wait until all other svc users go away */
791 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
792
793 if (add) {
794 list_add(&dest->n_list, &svc->destinations);
795 svc->num_dests++;
796 }
797
798 /* call the update_service, because server weight may be changed */
799 if (svc->scheduler->update_service)
800 svc->scheduler->update_service(svc);
801
802 write_unlock_bh(&__ip_vs_svc_lock);
1da177e4
LT
803}
804
805
806/*
807 * Create a destination for the given service
808 */
809static int
c860c6b1 810ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
811 struct ip_vs_dest **dest_p)
812{
813 struct ip_vs_dest *dest;
814 unsigned atype;
815
816 EnterFunction(2);
817
09571c7a
VB
818#ifdef CONFIG_IP_VS_IPV6
819 if (svc->af == AF_INET6) {
820 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
821 if ((!(atype & IPV6_ADDR_UNICAST) ||
822 atype & IPV6_ADDR_LINKLOCAL) &&
09571c7a
VB
823 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
824 return -EINVAL;
825 } else
826#endif
827 {
828 atype = inet_addr_type(&init_net, udest->addr.ip);
829 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
830 return -EINVAL;
831 }
1da177e4 832
dee06e47 833 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
1da177e4 834 if (dest == NULL) {
1e3e238e 835 pr_err("%s(): no memory.\n", __func__);
1da177e4
LT
836 return -ENOMEM;
837 }
b17fc996
HS
838 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
839 if (!dest->stats.cpustats) {
840 pr_err("%s() alloc_percpu failed\n", __func__);
841 goto err_alloc;
842 }
1da177e4 843
c860c6b1 844 dest->af = svc->af;
1da177e4 845 dest->protocol = svc->protocol;
c860c6b1 846 dest->vaddr = svc->addr;
1da177e4
LT
847 dest->vport = svc->port;
848 dest->vfwmark = svc->fwmark;
c860c6b1 849 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
850 dest->port = udest->port;
851
852 atomic_set(&dest->activeconns, 0);
853 atomic_set(&dest->inactconns, 0);
854 atomic_set(&dest->persistconns, 0);
26c15cfd 855 atomic_set(&dest->refcnt, 1);
1da177e4
LT
856
857 INIT_LIST_HEAD(&dest->d_list);
858 spin_lock_init(&dest->dst_lock);
859 spin_lock_init(&dest->stats.lock);
26c15cfd 860 __ip_vs_update_dest(svc, dest, udest, 1);
1da177e4
LT
861
862 *dest_p = dest;
863
864 LeaveFunction(2);
865 return 0;
b17fc996
HS
866
867err_alloc:
868 kfree(dest);
869 return -ENOMEM;
1da177e4
LT
870}
871
872
873/*
874 * Add a destination into an existing service
875 */
876static int
c860c6b1 877ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
878{
879 struct ip_vs_dest *dest;
c860c6b1 880 union nf_inet_addr daddr;
014d730d 881 __be16 dport = udest->port;
1da177e4
LT
882 int ret;
883
884 EnterFunction(2);
885
886 if (udest->weight < 0) {
1e3e238e 887 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
888 return -ERANGE;
889 }
890
891 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
892 pr_err("%s(): lower threshold is higher than upper threshold\n",
893 __func__);
1da177e4
LT
894 return -ERANGE;
895 }
896
c860c6b1
JV
897 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
898
1da177e4
LT
899 /*
900 * Check if the dest already exists in the list
901 */
7937df15
JV
902 dest = ip_vs_lookup_dest(svc, &daddr, dport);
903
1da177e4 904 if (dest != NULL) {
1e3e238e 905 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
906 return -EEXIST;
907 }
908
909 /*
910 * Check if the dest already exists in the trash and
911 * is from the same service
912 */
7937df15
JV
913 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
914
1da177e4 915 if (dest != NULL) {
cfc78c5a
JV
916 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
917 "dest->refcnt=%d, service %u/%s:%u\n",
918 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
919 atomic_read(&dest->refcnt),
920 dest->vfwmark,
921 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
922 ntohs(dest->vport));
923
1da177e4
LT
924 /*
925 * Get the destination from the trash
926 */
927 list_del(&dest->n_list);
928
26c15cfd
JA
929 __ip_vs_update_dest(svc, dest, udest, 1);
930 ret = 0;
931 } else {
1da177e4 932 /*
26c15cfd 933 * Allocate and initialize the dest structure
1da177e4 934 */
26c15cfd 935 ret = ip_vs_new_dest(svc, udest, &dest);
1da177e4 936 }
1da177e4
LT
937 LeaveFunction(2);
938
26c15cfd 939 return ret;
1da177e4
LT
940}
941
942
943/*
944 * Edit a destination in the given service
945 */
946static int
c860c6b1 947ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
948{
949 struct ip_vs_dest *dest;
c860c6b1 950 union nf_inet_addr daddr;
014d730d 951 __be16 dport = udest->port;
1da177e4
LT
952
953 EnterFunction(2);
954
955 if (udest->weight < 0) {
1e3e238e 956 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
957 return -ERANGE;
958 }
959
960 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
961 pr_err("%s(): lower threshold is higher than upper threshold\n",
962 __func__);
1da177e4
LT
963 return -ERANGE;
964 }
965
c860c6b1
JV
966 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
967
1da177e4
LT
968 /*
969 * Lookup the destination list
970 */
7937df15
JV
971 dest = ip_vs_lookup_dest(svc, &daddr, dport);
972
1da177e4 973 if (dest == NULL) {
1e3e238e 974 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
975 return -ENOENT;
976 }
977
26c15cfd 978 __ip_vs_update_dest(svc, dest, udest, 0);
1da177e4
LT
979 LeaveFunction(2);
980
981 return 0;
982}
983
984
985/*
986 * Delete a destination (must be already unlinked from the service)
987 */
29c2026f 988static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1da177e4 989{
a0840e2e
HS
990 struct netns_ipvs *ipvs = net_ipvs(net);
991
29c2026f 992 ip_vs_kill_estimator(net, &dest->stats);
1da177e4
LT
993
994 /*
995 * Remove it from the d-linked list with the real services.
996 */
a0840e2e 997 write_lock_bh(&ipvs->rs_lock);
1da177e4 998 ip_vs_rs_unhash(dest);
a0840e2e 999 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
1000
1001 /*
1002 * Decrease the refcnt of the dest, and free the dest
1003 * if nobody refers to it (refcnt=0). Otherwise, throw
1004 * the destination into the trash.
1005 */
1006 if (atomic_dec_and_test(&dest->refcnt)) {
26c15cfd
JA
1007 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1008 dest->vfwmark,
1009 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1010 ntohs(dest->port));
1da177e4
LT
1011 ip_vs_dst_reset(dest);
1012 /* simply decrease svc->refcnt here, let the caller check
1013 and release the service if nobody refers to it.
1014 Only user context can release destination and service,
1015 and only one user context can update virtual service at a
1016 time, so the operation here is OK */
1017 atomic_dec(&dest->svc->refcnt);
b17fc996 1018 free_percpu(dest->stats.cpustats);
1da177e4
LT
1019 kfree(dest);
1020 } else {
cfc78c5a
JV
1021 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1022 "dest->refcnt=%d\n",
1023 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1024 ntohs(dest->port),
1025 atomic_read(&dest->refcnt));
1da177e4
LT
1026 list_add(&dest->n_list, &ip_vs_dest_trash);
1027 atomic_inc(&dest->refcnt);
1028 }
1029}
1030
1031
1032/*
1033 * Unlink a destination from the given service
1034 */
1035static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1036 struct ip_vs_dest *dest,
1037 int svcupd)
1038{
1039 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1040
1041 /*
1042 * Remove it from the d-linked destination list.
1043 */
1044 list_del(&dest->n_list);
1045 svc->num_dests--;
82dfb6f3
SW
1046
1047 /*
1048 * Call the update_service function of its scheduler
1049 */
1050 if (svcupd && svc->scheduler->update_service)
1051 svc->scheduler->update_service(svc);
1da177e4
LT
1052}
1053
1054
1055/*
1056 * Delete a destination server in the given service
1057 */
1058static int
c860c6b1 1059ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1060{
1061 struct ip_vs_dest *dest;
014d730d 1062 __be16 dport = udest->port;
1da177e4
LT
1063
1064 EnterFunction(2);
1065
7937df15 1066 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1067
1da177e4 1068 if (dest == NULL) {
1e3e238e 1069 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1070 return -ENOENT;
1071 }
1072
1073 write_lock_bh(&__ip_vs_svc_lock);
1074
1075 /*
1076 * Wait until all other svc users go away.
1077 */
26c15cfd 1078 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1079
1080 /*
1081 * Unlink dest from the service
1082 */
1083 __ip_vs_unlink_dest(svc, dest, 1);
1084
1085 write_unlock_bh(&__ip_vs_svc_lock);
1086
1087 /*
1088 * Delete the destination
1089 */
a0840e2e 1090 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1091
1092 LeaveFunction(2);
1093
1094 return 0;
1095}
1096
1097
1098/*
1099 * Add a service into the service hash table
1100 */
1101static int
fc723250 1102ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
c860c6b1 1103 struct ip_vs_service **svc_p)
1da177e4
LT
1104{
1105 int ret = 0;
1106 struct ip_vs_scheduler *sched = NULL;
0d1e71b0 1107 struct ip_vs_pe *pe = NULL;
1da177e4 1108 struct ip_vs_service *svc = NULL;
a0840e2e 1109 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
1110
1111 /* increase the module use count */
1112 ip_vs_use_count_inc();
1113
1114 /* Lookup the scheduler by 'u->sched_name' */
1115 sched = ip_vs_scheduler_get(u->sched_name);
1116 if (sched == NULL) {
1e3e238e 1117 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4 1118 ret = -ENOENT;
6e08bfb8 1119 goto out_err;
1da177e4
LT
1120 }
1121
0d1e71b0 1122 if (u->pe_name && *u->pe_name) {
e9e5eee8 1123 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1124 if (pe == NULL) {
1125 pr_info("persistence engine module ip_vs_pe_%s "
1126 "not found\n", u->pe_name);
1127 ret = -ENOENT;
1128 goto out_err;
1129 }
1130 }
1131
f94fd041 1132#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1133 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1134 ret = -EINVAL;
1135 goto out_err;
f94fd041
JV
1136 }
1137#endif
1138
dee06e47 1139 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1140 if (svc == NULL) {
1e3e238e 1141 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1142 ret = -ENOMEM;
1143 goto out_err;
1144 }
b17fc996
HS
1145 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1146 if (!svc->stats.cpustats) {
1147 pr_err("%s() alloc_percpu failed\n", __func__);
1148 goto out_err;
1149 }
1da177e4
LT
1150
1151 /* I'm the first user of the service */
26c15cfd 1152 atomic_set(&svc->usecnt, 0);
1da177e4
LT
1153 atomic_set(&svc->refcnt, 0);
1154
c860c6b1 1155 svc->af = u->af;
1da177e4 1156 svc->protocol = u->protocol;
c860c6b1 1157 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1158 svc->port = u->port;
1159 svc->fwmark = u->fwmark;
1160 svc->flags = u->flags;
1161 svc->timeout = u->timeout * HZ;
1162 svc->netmask = u->netmask;
fc723250 1163 svc->net = net;
1da177e4
LT
1164
1165 INIT_LIST_HEAD(&svc->destinations);
1166 rwlock_init(&svc->sched_lock);
1167 spin_lock_init(&svc->stats.lock);
1168
1169 /* Bind the scheduler */
1170 ret = ip_vs_bind_scheduler(svc, sched);
1171 if (ret)
1172 goto out_err;
1173 sched = NULL;
1174
0d1e71b0
SH
1175 /* Bind the ct retriever */
1176 ip_vs_bind_pe(svc, pe);
1177 pe = NULL;
1178
1da177e4
LT
1179 /* Update the virtual service counters */
1180 if (svc->port == FTPPORT)
1181 atomic_inc(&ip_vs_ftpsvc_counter);
1182 else if (svc->port == 0)
1183 atomic_inc(&ip_vs_nullsvc_counter);
1184
29c2026f 1185 ip_vs_new_estimator(net, &svc->stats);
f94fd041
JV
1186
1187 /* Count only IPv4 services for old get/setsockopt interface */
1188 if (svc->af == AF_INET)
a0840e2e 1189 ipvs->num_services++;
1da177e4
LT
1190
1191 /* Hash the service into the service table */
1192 write_lock_bh(&__ip_vs_svc_lock);
1193 ip_vs_svc_hash(svc);
1194 write_unlock_bh(&__ip_vs_svc_lock);
1195
1196 *svc_p = svc;
1197 return 0;
1198
b17fc996 1199
6e08bfb8 1200 out_err:
1da177e4 1201 if (svc != NULL) {
2fabf35b 1202 ip_vs_unbind_scheduler(svc);
1da177e4
LT
1203 if (svc->inc) {
1204 local_bh_disable();
1205 ip_vs_app_inc_put(svc->inc);
1206 local_bh_enable();
1207 }
b17fc996
HS
1208 if (svc->stats.cpustats)
1209 free_percpu(svc->stats.cpustats);
1da177e4
LT
1210 kfree(svc);
1211 }
1212 ip_vs_scheduler_put(sched);
0d1e71b0 1213 ip_vs_pe_put(pe);
1da177e4 1214
1da177e4
LT
1215 /* decrease the module use count */
1216 ip_vs_use_count_dec();
1217
1218 return ret;
1219}
1220
1221
1222/*
1223 * Edit a service and bind it with a new scheduler
1224 */
1225static int
c860c6b1 1226ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1227{
1228 struct ip_vs_scheduler *sched, *old_sched;
0d1e71b0 1229 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1da177e4
LT
1230 int ret = 0;
1231
1232 /*
1233 * Lookup the scheduler, by 'u->sched_name'
1234 */
1235 sched = ip_vs_scheduler_get(u->sched_name);
1236 if (sched == NULL) {
1e3e238e 1237 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1238 return -ENOENT;
1239 }
1240 old_sched = sched;
1241
0d1e71b0 1242 if (u->pe_name && *u->pe_name) {
e9e5eee8 1243 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1244 if (pe == NULL) {
1245 pr_info("persistence engine module ip_vs_pe_%s "
1246 "not found\n", u->pe_name);
1247 ret = -ENOENT;
1248 goto out;
1249 }
1250 old_pe = pe;
1251 }
1252
f94fd041 1253#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1254 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1255 ret = -EINVAL;
1256 goto out;
f94fd041
JV
1257 }
1258#endif
1259
1da177e4
LT
1260 write_lock_bh(&__ip_vs_svc_lock);
1261
1262 /*
1263 * Wait until all other svc users go away.
1264 */
26c15cfd 1265 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1266
1267 /*
1268 * Set the flags and timeout value
1269 */
1270 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1271 svc->timeout = u->timeout * HZ;
1272 svc->netmask = u->netmask;
1273
1274 old_sched = svc->scheduler;
1275 if (sched != old_sched) {
1276 /*
1277 * Unbind the old scheduler
1278 */
1279 if ((ret = ip_vs_unbind_scheduler(svc))) {
1280 old_sched = sched;
9e691ed6 1281 goto out_unlock;
1da177e4
LT
1282 }
1283
1284 /*
1285 * Bind the new scheduler
1286 */
1287 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1288 /*
1289 * If ip_vs_bind_scheduler fails, restore the old
1290 * scheduler.
1291 * The main reason of failure is out of memory.
1292 *
1293 * The question is if the old scheduler can be
1294 * restored all the time. TODO: if it cannot be
1295 * restored some time, we must delete the service,
1296 * otherwise the system may crash.
1297 */
1298 ip_vs_bind_scheduler(svc, old_sched);
1299 old_sched = sched;
9e691ed6 1300 goto out_unlock;
1da177e4
LT
1301 }
1302 }
1303
0d1e71b0
SH
1304 old_pe = svc->pe;
1305 if (pe != old_pe) {
1306 ip_vs_unbind_pe(svc);
1307 ip_vs_bind_pe(svc, pe);
1308 }
1309
9e691ed6 1310 out_unlock:
1da177e4 1311 write_unlock_bh(&__ip_vs_svc_lock);
9e691ed6 1312 out:
6e08bfb8 1313 ip_vs_scheduler_put(old_sched);
0d1e71b0 1314 ip_vs_pe_put(old_pe);
1da177e4
LT
1315 return ret;
1316}
1317
1318
1319/*
1320 * Delete a service from the service list
1321 * - The service must be unlinked, unlocked and not referenced!
1322 * - We are called under _bh lock
1323 */
1324static void __ip_vs_del_service(struct ip_vs_service *svc)
1325{
1326 struct ip_vs_dest *dest, *nxt;
1327 struct ip_vs_scheduler *old_sched;
0d1e71b0 1328 struct ip_vs_pe *old_pe;
a0840e2e 1329 struct netns_ipvs *ipvs = net_ipvs(svc->net);
0d1e71b0
SH
1330
1331 pr_info("%s: enter\n", __func__);
1da177e4 1332
f94fd041
JV
1333 /* Count only IPv4 services for old get/setsockopt interface */
1334 if (svc->af == AF_INET)
a0840e2e 1335 ipvs->num_services--;
f94fd041 1336
29c2026f 1337 ip_vs_kill_estimator(svc->net, &svc->stats);
1da177e4
LT
1338
1339 /* Unbind scheduler */
1340 old_sched = svc->scheduler;
1341 ip_vs_unbind_scheduler(svc);
6e08bfb8 1342 ip_vs_scheduler_put(old_sched);
1da177e4 1343
0d1e71b0
SH
1344 /* Unbind persistence engine */
1345 old_pe = svc->pe;
1346 ip_vs_unbind_pe(svc);
1347 ip_vs_pe_put(old_pe);
1348
1da177e4
LT
1349 /* Unbind app inc */
1350 if (svc->inc) {
1351 ip_vs_app_inc_put(svc->inc);
1352 svc->inc = NULL;
1353 }
1354
1355 /*
1356 * Unlink the whole destination list
1357 */
1358 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1359 __ip_vs_unlink_dest(svc, dest, 0);
29c2026f 1360 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1361 }
1362
1363 /*
1364 * Update the virtual service counters
1365 */
1366 if (svc->port == FTPPORT)
1367 atomic_dec(&ip_vs_ftpsvc_counter);
1368 else if (svc->port == 0)
1369 atomic_dec(&ip_vs_nullsvc_counter);
1370
1371 /*
1372 * Free the service if nobody refers to it
1373 */
26c15cfd
JA
1374 if (atomic_read(&svc->refcnt) == 0) {
1375 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1376 svc->fwmark,
1377 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1378 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 1379 free_percpu(svc->stats.cpustats);
1da177e4 1380 kfree(svc);
26c15cfd 1381 }
1da177e4
LT
1382
1383 /* decrease the module use count */
1384 ip_vs_use_count_dec();
1385}
1386
1387/*
26c15cfd 1388 * Unlink a service from list and try to delete it if its refcnt reached 0
1da177e4 1389 */
26c15cfd 1390static void ip_vs_unlink_service(struct ip_vs_service *svc)
1da177e4 1391{
1da177e4
LT
1392 /*
1393 * Unhash it from the service table
1394 */
1395 write_lock_bh(&__ip_vs_svc_lock);
1396
1397 ip_vs_svc_unhash(svc);
1398
1399 /*
1400 * Wait until all the svc users go away.
1401 */
26c15cfd 1402 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1403
1404 __ip_vs_del_service(svc);
1405
1406 write_unlock_bh(&__ip_vs_svc_lock);
26c15cfd
JA
1407}
1408
1409/*
1410 * Delete a service from the service list
1411 */
1412static int ip_vs_del_service(struct ip_vs_service *svc)
1413{
1414 if (svc == NULL)
1415 return -EEXIST;
1416 ip_vs_unlink_service(svc);
1da177e4
LT
1417
1418 return 0;
1419}
1420
1421
1422/*
1423 * Flush all the virtual services
1424 */
fc723250 1425static int ip_vs_flush(struct net *net)
1da177e4
LT
1426{
1427 int idx;
1428 struct ip_vs_service *svc, *nxt;
1429
1430 /*
fc723250 1431 * Flush the service table hashed by <netns,protocol,addr,port>
1da177e4
LT
1432 */
1433 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
fc723250
HS
1434 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1435 s_list) {
1436 if (net_eq(svc->net, net))
1437 ip_vs_unlink_service(svc);
1da177e4
LT
1438 }
1439 }
1440
1441 /*
1442 * Flush the service table hashed by fwmark
1443 */
1444 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1445 list_for_each_entry_safe(svc, nxt,
1446 &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1447 if (net_eq(svc->net, net))
1448 ip_vs_unlink_service(svc);
1da177e4
LT
1449 }
1450 }
1451
1452 return 0;
1453}
1454
1455
1456/*
1457 * Zero counters in a service or all services
1458 */
1459static int ip_vs_zero_service(struct ip_vs_service *svc)
1460{
1461 struct ip_vs_dest *dest;
1462
1463 write_lock_bh(&__ip_vs_svc_lock);
1464 list_for_each_entry(dest, &svc->destinations, n_list) {
1465 ip_vs_zero_stats(&dest->stats);
1466 }
1467 ip_vs_zero_stats(&svc->stats);
1468 write_unlock_bh(&__ip_vs_svc_lock);
1469 return 0;
1470}
1471
fc723250 1472static int ip_vs_zero_all(struct net *net)
1da177e4
LT
1473{
1474 int idx;
1475 struct ip_vs_service *svc;
1476
1477 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1478 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250
HS
1479 if (net_eq(svc->net, net))
1480 ip_vs_zero_service(svc);
1da177e4
LT
1481 }
1482 }
1483
1484 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1485 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1486 if (net_eq(svc->net, net))
1487 ip_vs_zero_service(svc);
1da177e4
LT
1488 }
1489 }
1490
b17fc996 1491 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
1da177e4
LT
1492 return 0;
1493}
1494
1495
1496static int
8d65af78 1497proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1498 void __user *buffer, size_t *lenp, loff_t *ppos)
1499{
9330419d 1500 struct net *net = current->nsproxy->net_ns;
1da177e4
LT
1501 int *valp = table->data;
1502 int val = *valp;
1503 int rc;
1504
8d65af78 1505 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1506 if (write && (*valp != val)) {
1507 if ((*valp < 0) || (*valp > 3)) {
1508 /* Restore the correct value */
1509 *valp = val;
1510 } else {
9330419d 1511 update_defense_level(net_ipvs(net));
1da177e4
LT
1512 }
1513 }
1514 return rc;
1515}
1516
1517
1518static int
8d65af78 1519proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1520 void __user *buffer, size_t *lenp, loff_t *ppos)
1521{
1522 int *valp = table->data;
1523 int val[2];
1524 int rc;
1525
1526 /* backup the value first */
1527 memcpy(val, valp, sizeof(val));
1528
8d65af78 1529 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1530 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1531 /* Restore the correct value */
1532 memcpy(valp, val, sizeof(val));
1533 }
1534 return rc;
1535}
1536
b880c1f0
HS
1537static int
1538proc_do_sync_mode(ctl_table *table, int write,
1539 void __user *buffer, size_t *lenp, loff_t *ppos)
1540{
1541 int *valp = table->data;
1542 int val = *valp;
1543 int rc;
1544
1545 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1546 if (write && (*valp != val)) {
1547 if ((*valp < 0) || (*valp > 1)) {
1548 /* Restore the correct value */
1549 *valp = val;
1550 } else {
f131315f
HS
1551 struct net *net = current->nsproxy->net_ns;
1552 ip_vs_sync_switch_mode(net, val);
b880c1f0
HS
1553 }
1554 }
1555 return rc;
1556}
1da177e4
LT
1557
1558/*
1559 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
a0840e2e
HS
1560 * Do not change order or insert new entries without
1561 * align with netns init in __ip_vs_control_init()
1da177e4
LT
1562 */
1563
1564static struct ctl_table vs_vars[] = {
1565 {
1da177e4 1566 .procname = "amemthresh",
1da177e4
LT
1567 .maxlen = sizeof(int),
1568 .mode = 0644,
6d9f239a 1569 .proc_handler = proc_dointvec,
1da177e4 1570 },
1da177e4 1571 {
1da177e4 1572 .procname = "am_droprate",
1da177e4
LT
1573 .maxlen = sizeof(int),
1574 .mode = 0644,
6d9f239a 1575 .proc_handler = proc_dointvec,
1da177e4
LT
1576 },
1577 {
1da177e4 1578 .procname = "drop_entry",
1da177e4
LT
1579 .maxlen = sizeof(int),
1580 .mode = 0644,
6d9f239a 1581 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1582 },
1583 {
1da177e4 1584 .procname = "drop_packet",
1da177e4
LT
1585 .maxlen = sizeof(int),
1586 .mode = 0644,
6d9f239a 1587 .proc_handler = proc_do_defense_mode,
1da177e4 1588 },
f4bc17cd
JA
1589#ifdef CONFIG_IP_VS_NFCT
1590 {
1591 .procname = "conntrack",
f4bc17cd
JA
1592 .maxlen = sizeof(int),
1593 .mode = 0644,
1594 .proc_handler = &proc_dointvec,
1595 },
1596#endif
1da177e4 1597 {
1da177e4 1598 .procname = "secure_tcp",
1da177e4
LT
1599 .maxlen = sizeof(int),
1600 .mode = 0644,
6d9f239a 1601 .proc_handler = proc_do_defense_mode,
1da177e4 1602 },
8a803040
JA
1603 {
1604 .procname = "snat_reroute",
8a803040
JA
1605 .maxlen = sizeof(int),
1606 .mode = 0644,
1607 .proc_handler = &proc_dointvec,
1608 },
b880c1f0
HS
1609 {
1610 .procname = "sync_version",
b880c1f0
HS
1611 .maxlen = sizeof(int),
1612 .mode = 0644,
1613 .proc_handler = &proc_do_sync_mode,
1614 },
a0840e2e
HS
1615 {
1616 .procname = "cache_bypass",
1617 .maxlen = sizeof(int),
1618 .mode = 0644,
1619 .proc_handler = proc_dointvec,
1620 },
1621 {
1622 .procname = "expire_nodest_conn",
1623 .maxlen = sizeof(int),
1624 .mode = 0644,
1625 .proc_handler = proc_dointvec,
1626 },
1627 {
1628 .procname = "expire_quiescent_template",
1629 .maxlen = sizeof(int),
1630 .mode = 0644,
1631 .proc_handler = proc_dointvec,
1632 },
1633 {
1634 .procname = "sync_threshold",
1635 .maxlen =
1636 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1637 .mode = 0644,
1638 .proc_handler = proc_do_sync_threshold,
1639 },
1640 {
1641 .procname = "nat_icmp_send",
1642 .maxlen = sizeof(int),
1643 .mode = 0644,
1644 .proc_handler = proc_dointvec,
1645 },
1646#ifdef CONFIG_IP_VS_DEBUG
1647 {
1648 .procname = "debug_level",
1649 .data = &sysctl_ip_vs_debug_level,
1650 .maxlen = sizeof(int),
1651 .mode = 0644,
1652 .proc_handler = proc_dointvec,
1653 },
1654#endif
1da177e4
LT
1655#if 0
1656 {
1da177e4
LT
1657 .procname = "timeout_established",
1658 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1659 .maxlen = sizeof(int),
1660 .mode = 0644,
6d9f239a 1661 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1662 },
1663 {
1da177e4
LT
1664 .procname = "timeout_synsent",
1665 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1666 .maxlen = sizeof(int),
1667 .mode = 0644,
6d9f239a 1668 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1669 },
1670 {
1da177e4
LT
1671 .procname = "timeout_synrecv",
1672 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1673 .maxlen = sizeof(int),
1674 .mode = 0644,
6d9f239a 1675 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1676 },
1677 {
1da177e4
LT
1678 .procname = "timeout_finwait",
1679 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1680 .maxlen = sizeof(int),
1681 .mode = 0644,
6d9f239a 1682 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1683 },
1684 {
1da177e4
LT
1685 .procname = "timeout_timewait",
1686 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1687 .maxlen = sizeof(int),
1688 .mode = 0644,
6d9f239a 1689 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1690 },
1691 {
1da177e4
LT
1692 .procname = "timeout_close",
1693 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1694 .maxlen = sizeof(int),
1695 .mode = 0644,
6d9f239a 1696 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1697 },
1698 {
1da177e4
LT
1699 .procname = "timeout_closewait",
1700 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1701 .maxlen = sizeof(int),
1702 .mode = 0644,
6d9f239a 1703 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1704 },
1705 {
1da177e4
LT
1706 .procname = "timeout_lastack",
1707 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1708 .maxlen = sizeof(int),
1709 .mode = 0644,
6d9f239a 1710 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1711 },
1712 {
1da177e4
LT
1713 .procname = "timeout_listen",
1714 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1715 .maxlen = sizeof(int),
1716 .mode = 0644,
6d9f239a 1717 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1718 },
1719 {
1da177e4
LT
1720 .procname = "timeout_synack",
1721 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1722 .maxlen = sizeof(int),
1723 .mode = 0644,
6d9f239a 1724 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1725 },
1726 {
1da177e4
LT
1727 .procname = "timeout_udp",
1728 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1729 .maxlen = sizeof(int),
1730 .mode = 0644,
6d9f239a 1731 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1732 },
1733 {
1da177e4
LT
1734 .procname = "timeout_icmp",
1735 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1736 .maxlen = sizeof(int),
1737 .mode = 0644,
6d9f239a 1738 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1739 },
1740#endif
f8572d8f 1741 { }
1da177e4
LT
1742};
1743
5587da55 1744const struct ctl_path net_vs_ctl_path[] = {
f8572d8f
EB
1745 { .procname = "net", },
1746 { .procname = "ipv4", },
90754f8e
PE
1747 { .procname = "vs", },
1748 { }
1da177e4 1749};
90754f8e 1750EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4 1751
1da177e4
LT
1752#ifdef CONFIG_PROC_FS
1753
1754struct ip_vs_iter {
fc723250 1755 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1da177e4
LT
1756 struct list_head *table;
1757 int bucket;
1758};
1759
1760/*
1761 * Write the contents of the VS rule table to a PROCfs file.
1762 * (It is kept just for backward compatibility)
1763 */
1764static inline const char *ip_vs_fwd_name(unsigned flags)
1765{
1766 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1767 case IP_VS_CONN_F_LOCALNODE:
1768 return "Local";
1769 case IP_VS_CONN_F_TUNNEL:
1770 return "Tunnel";
1771 case IP_VS_CONN_F_DROUTE:
1772 return "Route";
1773 default:
1774 return "Masq";
1775 }
1776}
1777
1778
1779/* Get the Nth entry in the two lists */
1780static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1781{
fc723250 1782 struct net *net = seq_file_net(seq);
1da177e4
LT
1783 struct ip_vs_iter *iter = seq->private;
1784 int idx;
1785 struct ip_vs_service *svc;
1786
1787 /* look in hash by protocol */
1788 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1789 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250 1790 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1791 iter->table = ip_vs_svc_table;
1792 iter->bucket = idx;
1793 return svc;
1794 }
1795 }
1796 }
1797
1798 /* keep looking in fwmark */
1799 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1800 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250 1801 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1802 iter->table = ip_vs_svc_fwm_table;
1803 iter->bucket = idx;
1804 return svc;
1805 }
1806 }
1807 }
1808
1809 return NULL;
1810}
1811
1812static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1813__acquires(__ip_vs_svc_lock)
1da177e4
LT
1814{
1815
1816 read_lock_bh(&__ip_vs_svc_lock);
1817 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1818}
1819
1820
1821static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1822{
1823 struct list_head *e;
1824 struct ip_vs_iter *iter;
1825 struct ip_vs_service *svc;
1826
1827 ++*pos;
1828 if (v == SEQ_START_TOKEN)
1829 return ip_vs_info_array(seq,0);
1830
1831 svc = v;
1832 iter = seq->private;
1833
1834 if (iter->table == ip_vs_svc_table) {
1835 /* next service in table hashed by protocol */
1836 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1837 return list_entry(e, struct ip_vs_service, s_list);
1838
1839
1840 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1841 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1842 s_list) {
1843 return svc;
1844 }
1845 }
1846
1847 iter->table = ip_vs_svc_fwm_table;
1848 iter->bucket = -1;
1849 goto scan_fwmark;
1850 }
1851
1852 /* next service in hashed by fwmark */
1853 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1854 return list_entry(e, struct ip_vs_service, f_list);
1855
1856 scan_fwmark:
1857 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1858 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1859 f_list)
1860 return svc;
1861 }
1862
1863 return NULL;
1864}
1865
1866static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 1867__releases(__ip_vs_svc_lock)
1da177e4
LT
1868{
1869 read_unlock_bh(&__ip_vs_svc_lock);
1870}
1871
1872
1873static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1874{
1875 if (v == SEQ_START_TOKEN) {
1876 seq_printf(seq,
1877 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 1878 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
1879 seq_puts(seq,
1880 "Prot LocalAddress:Port Scheduler Flags\n");
1881 seq_puts(seq,
1882 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1883 } else {
1884 const struct ip_vs_service *svc = v;
1885 const struct ip_vs_iter *iter = seq->private;
1886 const struct ip_vs_dest *dest;
1887
667a5f18
VB
1888 if (iter->table == ip_vs_svc_table) {
1889#ifdef CONFIG_IP_VS_IPV6
1890 if (svc->af == AF_INET6)
5b095d98 1891 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 1892 ip_vs_proto_name(svc->protocol),
38ff4fa4 1893 &svc->addr.in6,
667a5f18
VB
1894 ntohs(svc->port),
1895 svc->scheduler->name);
1896 else
1897#endif
26ec037f 1898 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
1899 ip_vs_proto_name(svc->protocol),
1900 ntohl(svc->addr.ip),
1901 ntohs(svc->port),
26ec037f
NC
1902 svc->scheduler->name,
1903 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1904 } else {
26ec037f
NC
1905 seq_printf(seq, "FWM %08X %s %s",
1906 svc->fwmark, svc->scheduler->name,
1907 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1908 }
1da177e4
LT
1909
1910 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1911 seq_printf(seq, "persistent %d %08X\n",
1912 svc->timeout,
1913 ntohl(svc->netmask));
1914 else
1915 seq_putc(seq, '\n');
1916
1917 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1918#ifdef CONFIG_IP_VS_IPV6
1919 if (dest->af == AF_INET6)
1920 seq_printf(seq,
5b095d98 1921 " -> [%pI6]:%04X"
667a5f18 1922 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 1923 &dest->addr.in6,
667a5f18
VB
1924 ntohs(dest->port),
1925 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1926 atomic_read(&dest->weight),
1927 atomic_read(&dest->activeconns),
1928 atomic_read(&dest->inactconns));
1929 else
1930#endif
1931 seq_printf(seq,
1932 " -> %08X:%04X "
1933 "%-7s %-6d %-10d %-10d\n",
1934 ntohl(dest->addr.ip),
1935 ntohs(dest->port),
1936 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1937 atomic_read(&dest->weight),
1938 atomic_read(&dest->activeconns),
1939 atomic_read(&dest->inactconns));
1940
1da177e4
LT
1941 }
1942 }
1943 return 0;
1944}
1945
56b3d975 1946static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1947 .start = ip_vs_info_seq_start,
1948 .next = ip_vs_info_seq_next,
1949 .stop = ip_vs_info_seq_stop,
1950 .show = ip_vs_info_seq_show,
1951};
1952
1953static int ip_vs_info_open(struct inode *inode, struct file *file)
1954{
fc723250 1955 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
cf7732e4 1956 sizeof(struct ip_vs_iter));
1da177e4
LT
1957}
1958
9a32144e 1959static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1960 .owner = THIS_MODULE,
1961 .open = ip_vs_info_open,
1962 .read = seq_read,
1963 .llseek = seq_lseek,
1964 .release = seq_release_private,
1965};
1966
1967#endif
1968
1da177e4
LT
1969#ifdef CONFIG_PROC_FS
1970static int ip_vs_stats_show(struct seq_file *seq, void *v)
1971{
b17fc996
HS
1972 struct net *net = seq_file_single_net(seq);
1973 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
1da177e4
LT
1974
1975/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1976 seq_puts(seq,
1977 " Total Incoming Outgoing Incoming Outgoing\n");
1978 seq_printf(seq,
1979 " Conns Packets Packets Bytes Bytes\n");
1980
b17fc996
HS
1981 spin_lock_bh(&tot_stats->lock);
1982 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1983 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1984 (unsigned long long) tot_stats->ustats.inbytes,
1985 (unsigned long long) tot_stats->ustats.outbytes);
1da177e4
LT
1986
1987/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1988 seq_puts(seq,
1989 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1990 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
b17fc996
HS
1991 tot_stats->ustats.cps,
1992 tot_stats->ustats.inpps,
1993 tot_stats->ustats.outpps,
1994 tot_stats->ustats.inbps,
1995 tot_stats->ustats.outbps);
1996 spin_unlock_bh(&tot_stats->lock);
1da177e4
LT
1997
1998 return 0;
1999}
2000
2001static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2002{
fc723250 2003 return single_open_net(inode, file, ip_vs_stats_show);
1da177e4
LT
2004}
2005
9a32144e 2006static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
2007 .owner = THIS_MODULE,
2008 .open = ip_vs_stats_seq_open,
2009 .read = seq_read,
2010 .llseek = seq_lseek,
2011 .release = single_release,
2012};
2013
b17fc996
HS
2014static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2015{
2016 struct net *net = seq_file_single_net(seq);
2017 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2018 int i;
2019
2020/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2021 seq_puts(seq,
2022 " Total Incoming Outgoing Incoming Outgoing\n");
2023 seq_printf(seq,
2024 "CPU Conns Packets Packets Bytes Bytes\n");
2025
2026 for_each_possible_cpu(i) {
2027 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2028 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2029 i, u->ustats.conns, u->ustats.inpkts,
2030 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2031 (__u64)u->ustats.outbytes);
2032 }
2033
2034 spin_lock_bh(&tot_stats->lock);
2035 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2036 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2037 tot_stats->ustats.outpkts,
2038 (unsigned long long) tot_stats->ustats.inbytes,
2039 (unsigned long long) tot_stats->ustats.outbytes);
2040
2041/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2042 seq_puts(seq,
2043 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2044 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2045 tot_stats->ustats.cps,
2046 tot_stats->ustats.inpps,
2047 tot_stats->ustats.outpps,
2048 tot_stats->ustats.inbps,
2049 tot_stats->ustats.outbps);
2050 spin_unlock_bh(&tot_stats->lock);
2051
2052 return 0;
2053}
2054
2055static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2056{
2057 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2058}
2059
2060static const struct file_operations ip_vs_stats_percpu_fops = {
2061 .owner = THIS_MODULE,
2062 .open = ip_vs_stats_percpu_seq_open,
2063 .read = seq_read,
2064 .llseek = seq_lseek,
2065 .release = single_release,
2066};
1da177e4
LT
2067#endif
2068
2069/*
2070 * Set timeout values for tcp tcpfin udp in the timeout_table.
2071 */
9330419d 2072static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2073{
9330419d
HS
2074 struct ip_vs_proto_data *pd;
2075
1da177e4
LT
2076 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2077 u->tcp_timeout,
2078 u->tcp_fin_timeout,
2079 u->udp_timeout);
2080
2081#ifdef CONFIG_IP_VS_PROTO_TCP
2082 if (u->tcp_timeout) {
9330419d
HS
2083 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2084 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
1da177e4
LT
2085 = u->tcp_timeout * HZ;
2086 }
2087
2088 if (u->tcp_fin_timeout) {
9330419d
HS
2089 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2090 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
1da177e4
LT
2091 = u->tcp_fin_timeout * HZ;
2092 }
2093#endif
2094
2095#ifdef CONFIG_IP_VS_PROTO_UDP
2096 if (u->udp_timeout) {
9330419d
HS
2097 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2098 pd->timeout_table[IP_VS_UDP_S_NORMAL]
1da177e4
LT
2099 = u->udp_timeout * HZ;
2100 }
2101#endif
2102 return 0;
2103}
2104
2105
2106#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2107#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2108#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2109 sizeof(struct ip_vs_dest_user))
2110#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2111#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2112#define MAX_ARG_LEN SVCDEST_ARG_LEN
2113
9b5b5cff 2114static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2115 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2116 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2117 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2118 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2119 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2120 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2121 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2122 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2123 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2124 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2125 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2126};
2127
c860c6b1
JV
2128static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2129 struct ip_vs_service_user *usvc_compat)
2130{
0d1e71b0
SH
2131 memset(usvc, 0, sizeof(*usvc));
2132
c860c6b1
JV
2133 usvc->af = AF_INET;
2134 usvc->protocol = usvc_compat->protocol;
2135 usvc->addr.ip = usvc_compat->addr;
2136 usvc->port = usvc_compat->port;
2137 usvc->fwmark = usvc_compat->fwmark;
2138
2139 /* Deep copy of sched_name is not needed here */
2140 usvc->sched_name = usvc_compat->sched_name;
2141
2142 usvc->flags = usvc_compat->flags;
2143 usvc->timeout = usvc_compat->timeout;
2144 usvc->netmask = usvc_compat->netmask;
2145}
2146
2147static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2148 struct ip_vs_dest_user *udest_compat)
2149{
0d1e71b0
SH
2150 memset(udest, 0, sizeof(*udest));
2151
c860c6b1
JV
2152 udest->addr.ip = udest_compat->addr;
2153 udest->port = udest_compat->port;
2154 udest->conn_flags = udest_compat->conn_flags;
2155 udest->weight = udest_compat->weight;
2156 udest->u_threshold = udest_compat->u_threshold;
2157 udest->l_threshold = udest_compat->l_threshold;
2158}
2159
1da177e4
LT
2160static int
2161do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2162{
fc723250 2163 struct net *net = sock_net(sk);
1da177e4
LT
2164 int ret;
2165 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2166 struct ip_vs_service_user *usvc_compat;
2167 struct ip_vs_service_user_kern usvc;
1da177e4 2168 struct ip_vs_service *svc;
c860c6b1
JV
2169 struct ip_vs_dest_user *udest_compat;
2170 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2171
2172 if (!capable(CAP_NET_ADMIN))
2173 return -EPERM;
2174
04bcef2a
AV
2175 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2176 return -EINVAL;
2177 if (len < 0 || len > MAX_ARG_LEN)
2178 return -EINVAL;
1da177e4 2179 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2180 pr_err("set_ctl: len %u != %u\n",
2181 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2182 return -EINVAL;
2183 }
2184
2185 if (copy_from_user(arg, user, len) != 0)
2186 return -EFAULT;
2187
2188 /* increase the module use count */
2189 ip_vs_use_count_inc();
2190
14cc3e2b 2191 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2192 ret = -ERESTARTSYS;
2193 goto out_dec;
2194 }
2195
2196 if (cmd == IP_VS_SO_SET_FLUSH) {
2197 /* Flush the virtual service */
fc723250 2198 ret = ip_vs_flush(net);
1da177e4
LT
2199 goto out_unlock;
2200 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2201 /* Set timeout values for (tcp tcpfin udp) */
9330419d 2202 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
1da177e4
LT
2203 goto out_unlock;
2204 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2205 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f
HS
2206 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2207 dm->syncid);
1da177e4
LT
2208 goto out_unlock;
2209 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2210 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f 2211 ret = stop_sync_thread(net, dm->state);
1da177e4
LT
2212 goto out_unlock;
2213 }
2214
c860c6b1
JV
2215 usvc_compat = (struct ip_vs_service_user *)arg;
2216 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2217
2218 /* We only use the new structs internally, so copy userspace compat
2219 * structs to extended internal versions */
2220 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2221 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2222
2223 if (cmd == IP_VS_SO_SET_ZERO) {
2224 /* if no service address is set, zero counters in all */
c860c6b1 2225 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
fc723250 2226 ret = ip_vs_zero_all(net);
1da177e4
LT
2227 goto out_unlock;
2228 }
2229 }
2230
2906f66a
VMR
2231 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2232 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2233 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2234 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2235 usvc.protocol, &usvc.addr.ip,
2236 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2237 ret = -EFAULT;
2238 goto out_unlock;
2239 }
2240
2241 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2242 if (usvc.fwmark == 0)
fc723250 2243 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
26c15cfd 2244 &usvc.addr, usvc.port);
1da177e4 2245 else
fc723250 2246 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
1da177e4
LT
2247
2248 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2249 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2250 ret = -ESRCH;
26c15cfd 2251 goto out_unlock;
1da177e4
LT
2252 }
2253
2254 switch (cmd) {
2255 case IP_VS_SO_SET_ADD:
2256 if (svc != NULL)
2257 ret = -EEXIST;
2258 else
fc723250 2259 ret = ip_vs_add_service(net, &usvc, &svc);
1da177e4
LT
2260 break;
2261 case IP_VS_SO_SET_EDIT:
c860c6b1 2262 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2263 break;
2264 case IP_VS_SO_SET_DEL:
2265 ret = ip_vs_del_service(svc);
2266 if (!ret)
2267 goto out_unlock;
2268 break;
2269 case IP_VS_SO_SET_ZERO:
2270 ret = ip_vs_zero_service(svc);
2271 break;
2272 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2273 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2274 break;
2275 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2276 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2277 break;
2278 case IP_VS_SO_SET_DELDEST:
c860c6b1 2279 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2280 break;
2281 default:
2282 ret = -EINVAL;
2283 }
2284
1da177e4 2285 out_unlock:
14cc3e2b 2286 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2287 out_dec:
2288 /* decrease the module use count */
2289 ip_vs_use_count_dec();
2290
2291 return ret;
2292}
2293
2294
2295static void
2296ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2297{
2298 spin_lock_bh(&src->lock);
e9c0ce23 2299 memcpy(dst, &src->ustats, sizeof(*dst));
1da177e4
LT
2300 spin_unlock_bh(&src->lock);
2301}
2302
2303static void
2304ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2305{
2306 dst->protocol = src->protocol;
e7ade46a 2307 dst->addr = src->addr.ip;
1da177e4
LT
2308 dst->port = src->port;
2309 dst->fwmark = src->fwmark;
4da62fc7 2310 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2311 dst->flags = src->flags;
2312 dst->timeout = src->timeout / HZ;
2313 dst->netmask = src->netmask;
2314 dst->num_dests = src->num_dests;
2315 ip_vs_copy_stats(&dst->stats, &src->stats);
2316}
2317
2318static inline int
fc723250
HS
2319__ip_vs_get_service_entries(struct net *net,
2320 const struct ip_vs_get_services *get,
1da177e4
LT
2321 struct ip_vs_get_services __user *uptr)
2322{
2323 int idx, count=0;
2324 struct ip_vs_service *svc;
2325 struct ip_vs_service_entry entry;
2326 int ret = 0;
2327
2328 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2329 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041 2330 /* Only expose IPv4 entries to old interface */
fc723250 2331 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2332 continue;
2333
1da177e4
LT
2334 if (count >= get->num_services)
2335 goto out;
4da62fc7 2336 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2337 ip_vs_copy_service(&entry, svc);
2338 if (copy_to_user(&uptr->entrytable[count],
2339 &entry, sizeof(entry))) {
2340 ret = -EFAULT;
2341 goto out;
2342 }
2343 count++;
2344 }
2345 }
2346
2347 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2348 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041 2349 /* Only expose IPv4 entries to old interface */
fc723250 2350 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2351 continue;
2352
1da177e4
LT
2353 if (count >= get->num_services)
2354 goto out;
4da62fc7 2355 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2356 ip_vs_copy_service(&entry, svc);
2357 if (copy_to_user(&uptr->entrytable[count],
2358 &entry, sizeof(entry))) {
2359 ret = -EFAULT;
2360 goto out;
2361 }
2362 count++;
2363 }
2364 }
2365 out:
2366 return ret;
2367}
2368
2369static inline int
fc723250 2370__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
1da177e4
LT
2371 struct ip_vs_get_dests __user *uptr)
2372{
2373 struct ip_vs_service *svc;
b18610de 2374 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2375 int ret = 0;
2376
2377 if (get->fwmark)
fc723250 2378 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
1da177e4 2379 else
fc723250 2380 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
26c15cfd 2381 get->port);
b18610de 2382
1da177e4
LT
2383 if (svc) {
2384 int count = 0;
2385 struct ip_vs_dest *dest;
2386 struct ip_vs_dest_entry entry;
2387
2388 list_for_each_entry(dest, &svc->destinations, n_list) {
2389 if (count >= get->num_dests)
2390 break;
2391
e7ade46a 2392 entry.addr = dest->addr.ip;
1da177e4
LT
2393 entry.port = dest->port;
2394 entry.conn_flags = atomic_read(&dest->conn_flags);
2395 entry.weight = atomic_read(&dest->weight);
2396 entry.u_threshold = dest->u_threshold;
2397 entry.l_threshold = dest->l_threshold;
2398 entry.activeconns = atomic_read(&dest->activeconns);
2399 entry.inactconns = atomic_read(&dest->inactconns);
2400 entry.persistconns = atomic_read(&dest->persistconns);
2401 ip_vs_copy_stats(&entry.stats, &dest->stats);
2402 if (copy_to_user(&uptr->entrytable[count],
2403 &entry, sizeof(entry))) {
2404 ret = -EFAULT;
2405 break;
2406 }
2407 count++;
2408 }
1da177e4
LT
2409 } else
2410 ret = -ESRCH;
2411 return ret;
2412}
2413
2414static inline void
9330419d 2415__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2416{
9330419d
HS
2417 struct ip_vs_proto_data *pd;
2418
1da177e4 2419#ifdef CONFIG_IP_VS_PROTO_TCP
9330419d
HS
2420 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2421 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2422 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
1da177e4
LT
2423#endif
2424#ifdef CONFIG_IP_VS_PROTO_UDP
9330419d 2425 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
1da177e4 2426 u->udp_timeout =
9330419d 2427 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
1da177e4
LT
2428#endif
2429}
2430
2431
2432#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2433#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2434#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2435#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2436#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2437#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2438#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2439
9b5b5cff 2440static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2441 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2442 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2443 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2444 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2445 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2446 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2447 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2448};
2449
2450static int
2451do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2452{
2453 unsigned char arg[128];
2454 int ret = 0;
04bcef2a 2455 unsigned int copylen;
fc723250 2456 struct net *net = sock_net(sk);
f131315f 2457 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2458
fc723250 2459 BUG_ON(!net);
1da177e4
LT
2460 if (!capable(CAP_NET_ADMIN))
2461 return -EPERM;
2462
04bcef2a
AV
2463 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2464 return -EINVAL;
2465
1da177e4 2466 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2467 pr_err("get_ctl: len %u < %u\n",
2468 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2469 return -EINVAL;
2470 }
2471
04bcef2a
AV
2472 copylen = get_arglen[GET_CMDID(cmd)];
2473 if (copylen > 128)
2474 return -EINVAL;
2475
2476 if (copy_from_user(arg, user, copylen) != 0)
1da177e4
LT
2477 return -EFAULT;
2478
14cc3e2b 2479 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2480 return -ERESTARTSYS;
2481
2482 switch (cmd) {
2483 case IP_VS_SO_GET_VERSION:
2484 {
2485 char buf[64];
2486
2487 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2488 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2489 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2490 ret = -EFAULT;
2491 goto out;
2492 }
2493 *len = strlen(buf)+1;
2494 }
2495 break;
2496
2497 case IP_VS_SO_GET_INFO:
2498 {
2499 struct ip_vs_getinfo info;
2500 info.version = IP_VS_VERSION_CODE;
6f7edb48 2501 info.size = ip_vs_conn_tab_size;
a0840e2e 2502 info.num_services = ipvs->num_services;
1da177e4
LT
2503 if (copy_to_user(user, &info, sizeof(info)) != 0)
2504 ret = -EFAULT;
2505 }
2506 break;
2507
2508 case IP_VS_SO_GET_SERVICES:
2509 {
2510 struct ip_vs_get_services *get;
2511 int size;
2512
2513 get = (struct ip_vs_get_services *)arg;
2514 size = sizeof(*get) +
2515 sizeof(struct ip_vs_service_entry) * get->num_services;
2516 if (*len != size) {
1e3e238e 2517 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2518 ret = -EINVAL;
2519 goto out;
2520 }
fc723250 2521 ret = __ip_vs_get_service_entries(net, get, user);
1da177e4
LT
2522 }
2523 break;
2524
2525 case IP_VS_SO_GET_SERVICE:
2526 {
2527 struct ip_vs_service_entry *entry;
2528 struct ip_vs_service *svc;
b18610de 2529 union nf_inet_addr addr;
1da177e4
LT
2530
2531 entry = (struct ip_vs_service_entry *)arg;
b18610de 2532 addr.ip = entry->addr;
1da177e4 2533 if (entry->fwmark)
fc723250 2534 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
1da177e4 2535 else
fc723250
HS
2536 svc = __ip_vs_service_find(net, AF_INET,
2537 entry->protocol, &addr,
2538 entry->port);
1da177e4
LT
2539 if (svc) {
2540 ip_vs_copy_service(entry, svc);
2541 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2542 ret = -EFAULT;
1da177e4
LT
2543 } else
2544 ret = -ESRCH;
2545 }
2546 break;
2547
2548 case IP_VS_SO_GET_DESTS:
2549 {
2550 struct ip_vs_get_dests *get;
2551 int size;
2552
2553 get = (struct ip_vs_get_dests *)arg;
2554 size = sizeof(*get) +
2555 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2556 if (*len != size) {
1e3e238e 2557 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2558 ret = -EINVAL;
2559 goto out;
2560 }
fc723250 2561 ret = __ip_vs_get_dest_entries(net, get, user);
1da177e4
LT
2562 }
2563 break;
2564
2565 case IP_VS_SO_GET_TIMEOUT:
2566 {
2567 struct ip_vs_timeout_user t;
2568
9330419d 2569 __ip_vs_get_timeouts(net, &t);
1da177e4
LT
2570 if (copy_to_user(user, &t, sizeof(t)) != 0)
2571 ret = -EFAULT;
2572 }
2573 break;
2574
2575 case IP_VS_SO_GET_DAEMON:
2576 {
2577 struct ip_vs_daemon_user d[2];
2578
2579 memset(&d, 0, sizeof(d));
f131315f 2580 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
1da177e4 2581 d[0].state = IP_VS_STATE_MASTER;
f131315f
HS
2582 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2583 sizeof(d[0].mcast_ifn));
2584 d[0].syncid = ipvs->master_syncid;
1da177e4 2585 }
f131315f 2586 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
1da177e4 2587 d[1].state = IP_VS_STATE_BACKUP;
f131315f
HS
2588 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2589 sizeof(d[1].mcast_ifn));
2590 d[1].syncid = ipvs->backup_syncid;
1da177e4
LT
2591 }
2592 if (copy_to_user(user, &d, sizeof(d)) != 0)
2593 ret = -EFAULT;
2594 }
2595 break;
2596
2597 default:
2598 ret = -EINVAL;
2599 }
2600
2601 out:
14cc3e2b 2602 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2603 return ret;
2604}
2605
2606
2607static struct nf_sockopt_ops ip_vs_sockopts = {
2608 .pf = PF_INET,
2609 .set_optmin = IP_VS_BASE_CTL,
2610 .set_optmax = IP_VS_SO_SET_MAX+1,
2611 .set = do_ip_vs_set_ctl,
2612 .get_optmin = IP_VS_BASE_CTL,
2613 .get_optmax = IP_VS_SO_GET_MAX+1,
2614 .get = do_ip_vs_get_ctl,
16fcec35 2615 .owner = THIS_MODULE,
1da177e4
LT
2616};
2617
9a812198
JV
2618/*
2619 * Generic Netlink interface
2620 */
2621
2622/* IPVS genetlink family */
2623static struct genl_family ip_vs_genl_family = {
2624 .id = GENL_ID_GENERATE,
2625 .hdrsize = 0,
2626 .name = IPVS_GENL_NAME,
2627 .version = IPVS_GENL_VERSION,
2628 .maxattr = IPVS_CMD_MAX,
2629};
2630
2631/* Policy used for first-level command attributes */
2632static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2633 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2634 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2635 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2636 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2637 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2638 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2639};
2640
2641/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2642static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2643 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2644 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2645 .len = IP_VS_IFNAME_MAXLEN },
2646 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2647};
2648
2649/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2650static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2651 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2652 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2653 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2654 .len = sizeof(union nf_inet_addr) },
2655 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2656 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2657 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2658 .len = IP_VS_SCHEDNAME_MAXLEN },
0d1e71b0
SH
2659 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2660 .len = IP_VS_PENAME_MAXLEN },
9a812198
JV
2661 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2662 .len = sizeof(struct ip_vs_flags) },
2663 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2664 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2665 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2666};
2667
2668/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2669static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2670 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2671 .len = sizeof(union nf_inet_addr) },
2672 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2673 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2674 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2675 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2676 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2677 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2678 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2679 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2680 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2681};
2682
2683static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2684 struct ip_vs_stats *stats)
2685{
2686 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2687 if (!nl_stats)
2688 return -EMSGSIZE;
2689
2690 spin_lock_bh(&stats->lock);
2691
e9c0ce23
SW
2692 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2693 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2694 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2695 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2696 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2697 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2698 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2699 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2700 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2701 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
9a812198
JV
2702
2703 spin_unlock_bh(&stats->lock);
2704
2705 nla_nest_end(skb, nl_stats);
2706
2707 return 0;
2708
2709nla_put_failure:
2710 spin_unlock_bh(&stats->lock);
2711 nla_nest_cancel(skb, nl_stats);
2712 return -EMSGSIZE;
2713}
2714
2715static int ip_vs_genl_fill_service(struct sk_buff *skb,
2716 struct ip_vs_service *svc)
2717{
2718 struct nlattr *nl_service;
2719 struct ip_vs_flags flags = { .flags = svc->flags,
2720 .mask = ~0 };
2721
2722 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2723 if (!nl_service)
2724 return -EMSGSIZE;
2725
f94fd041 2726 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2727
2728 if (svc->fwmark) {
2729 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2730 } else {
2731 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2732 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2733 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2734 }
2735
2736 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
0d1e71b0
SH
2737 if (svc->pe)
2738 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
9a812198
JV
2739 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2740 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2741 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2742
2743 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2744 goto nla_put_failure;
2745
2746 nla_nest_end(skb, nl_service);
2747
2748 return 0;
2749
2750nla_put_failure:
2751 nla_nest_cancel(skb, nl_service);
2752 return -EMSGSIZE;
2753}
2754
2755static int ip_vs_genl_dump_service(struct sk_buff *skb,
2756 struct ip_vs_service *svc,
2757 struct netlink_callback *cb)
2758{
2759 void *hdr;
2760
2761 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2762 &ip_vs_genl_family, NLM_F_MULTI,
2763 IPVS_CMD_NEW_SERVICE);
2764 if (!hdr)
2765 return -EMSGSIZE;
2766
2767 if (ip_vs_genl_fill_service(skb, svc) < 0)
2768 goto nla_put_failure;
2769
2770 return genlmsg_end(skb, hdr);
2771
2772nla_put_failure:
2773 genlmsg_cancel(skb, hdr);
2774 return -EMSGSIZE;
2775}
2776
2777static int ip_vs_genl_dump_services(struct sk_buff *skb,
2778 struct netlink_callback *cb)
2779{
2780 int idx = 0, i;
2781 int start = cb->args[0];
2782 struct ip_vs_service *svc;
fc723250 2783 struct net *net = skb_sknet(skb);
9a812198
JV
2784
2785 mutex_lock(&__ip_vs_mutex);
2786 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2787 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
fc723250 2788 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2789 continue;
2790 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2791 idx--;
2792 goto nla_put_failure;
2793 }
2794 }
2795 }
2796
2797 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2798 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
fc723250 2799 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2800 continue;
2801 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2802 idx--;
2803 goto nla_put_failure;
2804 }
2805 }
2806 }
2807
2808nla_put_failure:
2809 mutex_unlock(&__ip_vs_mutex);
2810 cb->args[0] = idx;
2811
2812 return skb->len;
2813}
2814
fc723250
HS
2815static int ip_vs_genl_parse_service(struct net *net,
2816 struct ip_vs_service_user_kern *usvc,
26c15cfd
JA
2817 struct nlattr *nla, int full_entry,
2818 struct ip_vs_service **ret_svc)
9a812198
JV
2819{
2820 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2821 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
26c15cfd 2822 struct ip_vs_service *svc;
9a812198
JV
2823
2824 /* Parse mandatory identifying service fields first */
2825 if (nla == NULL ||
2826 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2827 return -EINVAL;
2828
2829 nla_af = attrs[IPVS_SVC_ATTR_AF];
2830 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2831 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2832 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2833 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2834
2835 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2836 return -EINVAL;
2837
258c8893
SH
2838 memset(usvc, 0, sizeof(*usvc));
2839
c860c6b1 2840 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2841#ifdef CONFIG_IP_VS_IPV6
2842 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2843#else
2844 if (usvc->af != AF_INET)
2845#endif
9a812198
JV
2846 return -EAFNOSUPPORT;
2847
2848 if (nla_fwmark) {
2849 usvc->protocol = IPPROTO_TCP;
2850 usvc->fwmark = nla_get_u32(nla_fwmark);
2851 } else {
2852 usvc->protocol = nla_get_u16(nla_protocol);
2853 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2854 usvc->port = nla_get_u16(nla_port);
2855 usvc->fwmark = 0;
2856 }
2857
26c15cfd 2858 if (usvc->fwmark)
fc723250 2859 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
26c15cfd 2860 else
fc723250 2861 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
26c15cfd
JA
2862 &usvc->addr, usvc->port);
2863 *ret_svc = svc;
2864
9a812198
JV
2865 /* If a full entry was requested, check for the additional fields */
2866 if (full_entry) {
0d1e71b0 2867 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
9a812198
JV
2868 *nla_netmask;
2869 struct ip_vs_flags flags;
9a812198
JV
2870
2871 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
0d1e71b0 2872 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
9a812198
JV
2873 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2874 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2875 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2876
2877 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2878 return -EINVAL;
2879
2880 nla_memcpy(&flags, nla_flags, sizeof(flags));
2881
2882 /* prefill flags from service if it already exists */
26c15cfd 2883 if (svc)
9a812198 2884 usvc->flags = svc->flags;
9a812198
JV
2885
2886 /* set new flags from userland */
2887 usvc->flags = (usvc->flags & ~flags.mask) |
2888 (flags.flags & flags.mask);
c860c6b1 2889 usvc->sched_name = nla_data(nla_sched);
0d1e71b0 2890 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
9a812198
JV
2891 usvc->timeout = nla_get_u32(nla_timeout);
2892 usvc->netmask = nla_get_u32(nla_netmask);
2893 }
2894
2895 return 0;
2896}
2897
fc723250
HS
2898static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2899 struct nlattr *nla)
9a812198 2900{
c860c6b1 2901 struct ip_vs_service_user_kern usvc;
26c15cfd 2902 struct ip_vs_service *svc;
9a812198
JV
2903 int ret;
2904
fc723250 2905 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
26c15cfd 2906 return ret ? ERR_PTR(ret) : svc;
9a812198
JV
2907}
2908
2909static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2910{
2911 struct nlattr *nl_dest;
2912
2913 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2914 if (!nl_dest)
2915 return -EMSGSIZE;
2916
2917 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2918 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2919
2920 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2921 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2922 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2923 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2924 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2925 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2926 atomic_read(&dest->activeconns));
2927 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2928 atomic_read(&dest->inactconns));
2929 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2930 atomic_read(&dest->persistconns));
2931
2932 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2933 goto nla_put_failure;
2934
2935 nla_nest_end(skb, nl_dest);
2936
2937 return 0;
2938
2939nla_put_failure:
2940 nla_nest_cancel(skb, nl_dest);
2941 return -EMSGSIZE;
2942}
2943
2944static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2945 struct netlink_callback *cb)
2946{
2947 void *hdr;
2948
2949 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2950 &ip_vs_genl_family, NLM_F_MULTI,
2951 IPVS_CMD_NEW_DEST);
2952 if (!hdr)
2953 return -EMSGSIZE;
2954
2955 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2956 goto nla_put_failure;
2957
2958 return genlmsg_end(skb, hdr);
2959
2960nla_put_failure:
2961 genlmsg_cancel(skb, hdr);
2962 return -EMSGSIZE;
2963}
2964
2965static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2966 struct netlink_callback *cb)
2967{
2968 int idx = 0;
2969 int start = cb->args[0];
2970 struct ip_vs_service *svc;
2971 struct ip_vs_dest *dest;
2972 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
a0840e2e 2973 struct net *net = skb_sknet(skb);
9a812198
JV
2974
2975 mutex_lock(&__ip_vs_mutex);
2976
2977 /* Try to find the service for which to dump destinations */
2978 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2979 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2980 goto out_err;
2981
a0840e2e 2982
fc723250 2983 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
2984 if (IS_ERR(svc) || svc == NULL)
2985 goto out_err;
2986
2987 /* Dump the destinations */
2988 list_for_each_entry(dest, &svc->destinations, n_list) {
2989 if (++idx <= start)
2990 continue;
2991 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2992 idx--;
2993 goto nla_put_failure;
2994 }
2995 }
2996
2997nla_put_failure:
2998 cb->args[0] = idx;
9a812198
JV
2999
3000out_err:
3001 mutex_unlock(&__ip_vs_mutex);
3002
3003 return skb->len;
3004}
3005
c860c6b1 3006static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
3007 struct nlattr *nla, int full_entry)
3008{
3009 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3010 struct nlattr *nla_addr, *nla_port;
3011
3012 /* Parse mandatory identifying destination fields first */
3013 if (nla == NULL ||
3014 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3015 return -EINVAL;
3016
3017 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3018 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3019
3020 if (!(nla_addr && nla_port))
3021 return -EINVAL;
3022
258c8893
SH
3023 memset(udest, 0, sizeof(*udest));
3024
9a812198
JV
3025 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3026 udest->port = nla_get_u16(nla_port);
3027
3028 /* If a full entry was requested, check for the additional fields */
3029 if (full_entry) {
3030 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3031 *nla_l_thresh;
3032
3033 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3034 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3035 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3036 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3037
3038 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3039 return -EINVAL;
3040
3041 udest->conn_flags = nla_get_u32(nla_fwd)
3042 & IP_VS_CONN_F_FWD_MASK;
3043 udest->weight = nla_get_u32(nla_weight);
3044 udest->u_threshold = nla_get_u32(nla_u_thresh);
3045 udest->l_threshold = nla_get_u32(nla_l_thresh);
3046 }
3047
3048 return 0;
3049}
3050
3051static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3052 const char *mcast_ifn, __be32 syncid)
3053{
3054 struct nlattr *nl_daemon;
3055
3056 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3057 if (!nl_daemon)
3058 return -EMSGSIZE;
3059
3060 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3061 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3062 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3063
3064 nla_nest_end(skb, nl_daemon);
3065
3066 return 0;
3067
3068nla_put_failure:
3069 nla_nest_cancel(skb, nl_daemon);
3070 return -EMSGSIZE;
3071}
3072
3073static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3074 const char *mcast_ifn, __be32 syncid,
3075 struct netlink_callback *cb)
3076{
3077 void *hdr;
3078 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3079 &ip_vs_genl_family, NLM_F_MULTI,
3080 IPVS_CMD_NEW_DAEMON);
3081 if (!hdr)
3082 return -EMSGSIZE;
3083
3084 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3085 goto nla_put_failure;
3086
3087 return genlmsg_end(skb, hdr);
3088
3089nla_put_failure:
3090 genlmsg_cancel(skb, hdr);
3091 return -EMSGSIZE;
3092}
3093
3094static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3095 struct netlink_callback *cb)
3096{
f131315f
HS
3097 struct net *net = skb_net(skb);
3098 struct netns_ipvs *ipvs = net_ipvs(net);
3099
9a812198 3100 mutex_lock(&__ip_vs_mutex);
f131315f 3101 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
9a812198 3102 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
f131315f
HS
3103 ipvs->master_mcast_ifn,
3104 ipvs->master_syncid, cb) < 0)
9a812198
JV
3105 goto nla_put_failure;
3106
3107 cb->args[0] = 1;
3108 }
3109
f131315f 3110 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
9a812198 3111 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
f131315f
HS
3112 ipvs->backup_mcast_ifn,
3113 ipvs->backup_syncid, cb) < 0)
9a812198
JV
3114 goto nla_put_failure;
3115
3116 cb->args[1] = 1;
3117 }
3118
3119nla_put_failure:
3120 mutex_unlock(&__ip_vs_mutex);
3121
3122 return skb->len;
3123}
3124
f131315f 3125static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3126{
3127 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3128 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3129 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3130 return -EINVAL;
3131
f131315f
HS
3132 return start_sync_thread(net,
3133 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
9a812198
JV
3134 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3135 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3136}
3137
f131315f 3138static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3139{
3140 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3141 return -EINVAL;
3142
f131315f
HS
3143 return stop_sync_thread(net,
3144 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
9a812198
JV
3145}
3146
9330419d 3147static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
9a812198
JV
3148{
3149 struct ip_vs_timeout_user t;
3150
9330419d 3151 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3152
3153 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3154 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3155
3156 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3157 t.tcp_fin_timeout =
3158 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3159
3160 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3161 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3162
9330419d 3163 return ip_vs_set_timeout(net, &t);
9a812198
JV
3164}
3165
3166static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3167{
3168 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3169 struct ip_vs_service_user_kern usvc;
3170 struct ip_vs_dest_user_kern udest;
9a812198
JV
3171 int ret = 0, cmd;
3172 int need_full_svc = 0, need_full_dest = 0;
fc723250 3173 struct net *net;
a0840e2e 3174 struct netns_ipvs *ipvs;
9a812198 3175
fc723250 3176 net = skb_sknet(skb);
a0840e2e 3177 ipvs = net_ipvs(net);
9a812198
JV
3178 cmd = info->genlhdr->cmd;
3179
3180 mutex_lock(&__ip_vs_mutex);
3181
3182 if (cmd == IPVS_CMD_FLUSH) {
fc723250 3183 ret = ip_vs_flush(net);
9a812198
JV
3184 goto out;
3185 } else if (cmd == IPVS_CMD_SET_CONFIG) {
9330419d 3186 ret = ip_vs_genl_set_config(net, info->attrs);
9a812198
JV
3187 goto out;
3188 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3189 cmd == IPVS_CMD_DEL_DAEMON) {
3190
3191 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3192
3193 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3194 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3195 info->attrs[IPVS_CMD_ATTR_DAEMON],
3196 ip_vs_daemon_policy)) {
3197 ret = -EINVAL;
3198 goto out;
3199 }
3200
3201 if (cmd == IPVS_CMD_NEW_DAEMON)
f131315f 3202 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
9a812198 3203 else
f131315f 3204 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
9a812198
JV
3205 goto out;
3206 } else if (cmd == IPVS_CMD_ZERO &&
3207 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
fc723250 3208 ret = ip_vs_zero_all(net);
9a812198
JV
3209 goto out;
3210 }
3211
3212 /* All following commands require a service argument, so check if we
3213 * received a valid one. We need a full service specification when
3214 * adding / editing a service. Only identifying members otherwise. */
3215 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3216 need_full_svc = 1;
3217
fc723250 3218 ret = ip_vs_genl_parse_service(net, &usvc,
9a812198 3219 info->attrs[IPVS_CMD_ATTR_SERVICE],
26c15cfd 3220 need_full_svc, &svc);
9a812198
JV
3221 if (ret)
3222 goto out;
3223
9a812198
JV
3224 /* Unless we're adding a new service, the service must already exist */
3225 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3226 ret = -ESRCH;
3227 goto out;
3228 }
3229
3230 /* Destination commands require a valid destination argument. For
3231 * adding / editing a destination, we need a full destination
3232 * specification. */
3233 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3234 cmd == IPVS_CMD_DEL_DEST) {
3235 if (cmd != IPVS_CMD_DEL_DEST)
3236 need_full_dest = 1;
3237
3238 ret = ip_vs_genl_parse_dest(&udest,
3239 info->attrs[IPVS_CMD_ATTR_DEST],
3240 need_full_dest);
3241 if (ret)
3242 goto out;
3243 }
3244
3245 switch (cmd) {
3246 case IPVS_CMD_NEW_SERVICE:
3247 if (svc == NULL)
fc723250 3248 ret = ip_vs_add_service(net, &usvc, &svc);
9a812198
JV
3249 else
3250 ret = -EEXIST;
3251 break;
3252 case IPVS_CMD_SET_SERVICE:
3253 ret = ip_vs_edit_service(svc, &usvc);
3254 break;
3255 case IPVS_CMD_DEL_SERVICE:
3256 ret = ip_vs_del_service(svc);
26c15cfd 3257 /* do not use svc, it can be freed */
9a812198
JV
3258 break;
3259 case IPVS_CMD_NEW_DEST:
3260 ret = ip_vs_add_dest(svc, &udest);
3261 break;
3262 case IPVS_CMD_SET_DEST:
3263 ret = ip_vs_edit_dest(svc, &udest);
3264 break;
3265 case IPVS_CMD_DEL_DEST:
3266 ret = ip_vs_del_dest(svc, &udest);
3267 break;
3268 case IPVS_CMD_ZERO:
3269 ret = ip_vs_zero_service(svc);
3270 break;
3271 default:
3272 ret = -EINVAL;
3273 }
3274
3275out:
9a812198
JV
3276 mutex_unlock(&__ip_vs_mutex);
3277
3278 return ret;
3279}
3280
3281static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3282{
3283 struct sk_buff *msg;
3284 void *reply;
3285 int ret, cmd, reply_cmd;
fc723250 3286 struct net *net;
a0840e2e 3287 struct netns_ipvs *ipvs;
9a812198 3288
fc723250 3289 net = skb_sknet(skb);
a0840e2e 3290 ipvs = net_ipvs(net);
9a812198
JV
3291 cmd = info->genlhdr->cmd;
3292
3293 if (cmd == IPVS_CMD_GET_SERVICE)
3294 reply_cmd = IPVS_CMD_NEW_SERVICE;
3295 else if (cmd == IPVS_CMD_GET_INFO)
3296 reply_cmd = IPVS_CMD_SET_INFO;
3297 else if (cmd == IPVS_CMD_GET_CONFIG)
3298 reply_cmd = IPVS_CMD_SET_CONFIG;
3299 else {
1e3e238e 3300 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3301 return -EINVAL;
3302 }
3303
3304 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3305 if (!msg)
3306 return -ENOMEM;
3307
3308 mutex_lock(&__ip_vs_mutex);
3309
3310 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3311 if (reply == NULL)
3312 goto nla_put_failure;
3313
3314 switch (cmd) {
3315 case IPVS_CMD_GET_SERVICE:
3316 {
3317 struct ip_vs_service *svc;
3318
fc723250
HS
3319 svc = ip_vs_genl_find_service(net,
3320 info->attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3321 if (IS_ERR(svc)) {
3322 ret = PTR_ERR(svc);
3323 goto out_err;
3324 } else if (svc) {
3325 ret = ip_vs_genl_fill_service(msg, svc);
9a812198
JV
3326 if (ret)
3327 goto nla_put_failure;
3328 } else {
3329 ret = -ESRCH;
3330 goto out_err;
3331 }
3332
3333 break;
3334 }
3335
3336 case IPVS_CMD_GET_CONFIG:
3337 {
3338 struct ip_vs_timeout_user t;
3339
9330419d 3340 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3341#ifdef CONFIG_IP_VS_PROTO_TCP
3342 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3343 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3344 t.tcp_fin_timeout);
3345#endif
3346#ifdef CONFIG_IP_VS_PROTO_UDP
3347 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3348#endif
3349
3350 break;
3351 }
3352
3353 case IPVS_CMD_GET_INFO:
3354 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3355 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
6f7edb48 3356 ip_vs_conn_tab_size);
9a812198
JV
3357 break;
3358 }
3359
3360 genlmsg_end(msg, reply);
134e6375 3361 ret = genlmsg_reply(msg, info);
9a812198
JV
3362 goto out;
3363
3364nla_put_failure:
1e3e238e 3365 pr_err("not enough space in Netlink message\n");
9a812198
JV
3366 ret = -EMSGSIZE;
3367
3368out_err:
3369 nlmsg_free(msg);
3370out:
3371 mutex_unlock(&__ip_vs_mutex);
3372
3373 return ret;
3374}
3375
3376
3377static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3378 {
3379 .cmd = IPVS_CMD_NEW_SERVICE,
3380 .flags = GENL_ADMIN_PERM,
3381 .policy = ip_vs_cmd_policy,
3382 .doit = ip_vs_genl_set_cmd,
3383 },
3384 {
3385 .cmd = IPVS_CMD_SET_SERVICE,
3386 .flags = GENL_ADMIN_PERM,
3387 .policy = ip_vs_cmd_policy,
3388 .doit = ip_vs_genl_set_cmd,
3389 },
3390 {
3391 .cmd = IPVS_CMD_DEL_SERVICE,
3392 .flags = GENL_ADMIN_PERM,
3393 .policy = ip_vs_cmd_policy,
3394 .doit = ip_vs_genl_set_cmd,
3395 },
3396 {
3397 .cmd = IPVS_CMD_GET_SERVICE,
3398 .flags = GENL_ADMIN_PERM,
3399 .doit = ip_vs_genl_get_cmd,
3400 .dumpit = ip_vs_genl_dump_services,
3401 .policy = ip_vs_cmd_policy,
3402 },
3403 {
3404 .cmd = IPVS_CMD_NEW_DEST,
3405 .flags = GENL_ADMIN_PERM,
3406 .policy = ip_vs_cmd_policy,
3407 .doit = ip_vs_genl_set_cmd,
3408 },
3409 {
3410 .cmd = IPVS_CMD_SET_DEST,
3411 .flags = GENL_ADMIN_PERM,
3412 .policy = ip_vs_cmd_policy,
3413 .doit = ip_vs_genl_set_cmd,
3414 },
3415 {
3416 .cmd = IPVS_CMD_DEL_DEST,
3417 .flags = GENL_ADMIN_PERM,
3418 .policy = ip_vs_cmd_policy,
3419 .doit = ip_vs_genl_set_cmd,
3420 },
3421 {
3422 .cmd = IPVS_CMD_GET_DEST,
3423 .flags = GENL_ADMIN_PERM,
3424 .policy = ip_vs_cmd_policy,
3425 .dumpit = ip_vs_genl_dump_dests,
3426 },
3427 {
3428 .cmd = IPVS_CMD_NEW_DAEMON,
3429 .flags = GENL_ADMIN_PERM,
3430 .policy = ip_vs_cmd_policy,
3431 .doit = ip_vs_genl_set_cmd,
3432 },
3433 {
3434 .cmd = IPVS_CMD_DEL_DAEMON,
3435 .flags = GENL_ADMIN_PERM,
3436 .policy = ip_vs_cmd_policy,
3437 .doit = ip_vs_genl_set_cmd,
3438 },
3439 {
3440 .cmd = IPVS_CMD_GET_DAEMON,
3441 .flags = GENL_ADMIN_PERM,
3442 .dumpit = ip_vs_genl_dump_daemons,
3443 },
3444 {
3445 .cmd = IPVS_CMD_SET_CONFIG,
3446 .flags = GENL_ADMIN_PERM,
3447 .policy = ip_vs_cmd_policy,
3448 .doit = ip_vs_genl_set_cmd,
3449 },
3450 {
3451 .cmd = IPVS_CMD_GET_CONFIG,
3452 .flags = GENL_ADMIN_PERM,
3453 .doit = ip_vs_genl_get_cmd,
3454 },
3455 {
3456 .cmd = IPVS_CMD_GET_INFO,
3457 .flags = GENL_ADMIN_PERM,
3458 .doit = ip_vs_genl_get_cmd,
3459 },
3460 {
3461 .cmd = IPVS_CMD_ZERO,
3462 .flags = GENL_ADMIN_PERM,
3463 .policy = ip_vs_cmd_policy,
3464 .doit = ip_vs_genl_set_cmd,
3465 },
3466 {
3467 .cmd = IPVS_CMD_FLUSH,
3468 .flags = GENL_ADMIN_PERM,
3469 .doit = ip_vs_genl_set_cmd,
3470 },
3471};
3472
3473static int __init ip_vs_genl_register(void)
3474{
8f698d54
MM
3475 return genl_register_family_with_ops(&ip_vs_genl_family,
3476 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3477}
3478
3479static void ip_vs_genl_unregister(void)
3480{
3481 genl_unregister_family(&ip_vs_genl_family);
3482}
3483
3484/* End of Generic Netlink interface definitions */
3485
61b1ab45
HS
3486/*
3487 * per netns intit/exit func.
3488 */
3489int __net_init __ip_vs_control_init(struct net *net)
3490{
fc723250
HS
3491 int idx;
3492 struct netns_ipvs *ipvs = net_ipvs(net);
a0840e2e 3493 struct ctl_table *tbl;
fc723250 3494
61b1ab45
HS
3495 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3496 return -EPERM;
a0840e2e
HS
3497
3498 atomic_set(&ipvs->dropentry, 0);
3499 spin_lock_init(&ipvs->dropentry_lock);
3500 spin_lock_init(&ipvs->droppacket_lock);
3501 spin_lock_init(&ipvs->securetcp_lock);
3502 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3503
3504 /* Initialize rs_table */
3505 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3506 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3507
b17fc996
HS
3508 /* procfs stats */
3509 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3510 if (ipvs->tot_stats == NULL) {
3511 pr_err("%s(): no memory.\n", __func__);
3512 return -ENOMEM;
3513 }
3514 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3515 if (!ipvs->cpustats) {
3516 pr_err("%s() alloc_percpu failed\n", __func__);
3517 goto err_alloc;
3518 }
3519 spin_lock_init(&ipvs->tot_stats->lock);
61b1ab45 3520
fc723250
HS
3521 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3522 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3523
61b1ab45
HS
3524 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3525 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
b17fc996
HS
3526 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3527 &ip_vs_stats_percpu_fops);
a0840e2e
HS
3528
3529 if (!net_eq(net, &init_net)) {
3530 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3531 if (tbl == NULL)
3532 goto err_dup;
3533 } else
3534 tbl = vs_vars;
3535 /* Initialize sysctl defaults */
3536 idx = 0;
3537 ipvs->sysctl_amemthresh = 1024;
3538 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3539 ipvs->sysctl_am_droprate = 10;
3540 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3541 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3542 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3543#ifdef CONFIG_IP_VS_NFCT
3544 tbl[idx++].data = &ipvs->sysctl_conntrack;
3545#endif
3546 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3547 ipvs->sysctl_snat_reroute = 1;
3548 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3549 ipvs->sysctl_sync_ver = 1;
3550 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3551 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3552 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3553 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3554 ipvs->sysctl_sync_threshold[0] = 3;
3555 ipvs->sysctl_sync_threshold[1] = 50;
3556 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3557 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3558 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3559
3560
3561 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
61b1ab45 3562 vs_vars);
a0840e2e 3563 if (ipvs->sysctl_hdr == NULL)
61b1ab45 3564 goto err_reg;
b17fc996 3565 ip_vs_new_estimator(net, ipvs->tot_stats);
a0840e2e 3566 ipvs->sysctl_tbl = tbl;
61b1ab45
HS
3567 return 0;
3568
3569err_reg:
a0840e2e
HS
3570 if (!net_eq(net, &init_net))
3571 kfree(tbl);
3572err_dup:
b17fc996
HS
3573 free_percpu(ipvs->cpustats);
3574err_alloc:
3575 kfree(ipvs->tot_stats);
61b1ab45
HS
3576 return -ENOMEM;
3577}
3578
3579static void __net_exit __ip_vs_control_cleanup(struct net *net)
3580{
b17fc996
HS
3581 struct netns_ipvs *ipvs = net_ipvs(net);
3582
61b1ab45
HS
3583 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3584 return;
3585
b17fc996 3586 ip_vs_kill_estimator(net, ipvs->tot_stats);
a0840e2e 3587 unregister_net_sysctl_table(ipvs->sysctl_hdr);
b17fc996 3588 proc_net_remove(net, "ip_vs_stats_percpu");
61b1ab45
HS
3589 proc_net_remove(net, "ip_vs_stats");
3590 proc_net_remove(net, "ip_vs");
b17fc996
HS
3591 free_percpu(ipvs->cpustats);
3592 kfree(ipvs->tot_stats);
61b1ab45
HS
3593}
3594
3595static struct pernet_operations ipvs_control_ops = {
3596 .init = __ip_vs_control_init,
3597 .exit = __ip_vs_control_cleanup,
3598};
1da177e4 3599
048cf48b 3600int __init ip_vs_control_init(void)
1da177e4 3601{
1da177e4 3602 int idx;
fc723250 3603 int ret;
1da177e4
LT
3604
3605 EnterFunction(2);
3606
fc723250 3607 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
d86bef73
EB
3608 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3609 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3610 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3611 }
fc723250
HS
3612
3613 ret = register_pernet_subsys(&ipvs_control_ops);
3614 if (ret) {
3615 pr_err("cannot register namespace.\n");
3616 goto err;
d86bef73 3617 }
fc723250
HS
3618
3619 smp_wmb(); /* Do we really need it now ? */
d86bef73 3620
1da177e4
LT
3621 ret = nf_register_sockopt(&ip_vs_sockopts);
3622 if (ret) {
1e3e238e 3623 pr_err("cannot register sockopt.\n");
fc723250 3624 goto err_net;
1da177e4
LT
3625 }
3626
9a812198
JV
3627 ret = ip_vs_genl_register();
3628 if (ret) {
1e3e238e 3629 pr_err("cannot register Generic Netlink interface.\n");
9a812198 3630 nf_unregister_sockopt(&ip_vs_sockopts);
fc723250 3631 goto err_net;
9a812198
JV
3632 }
3633
1da177e4
LT
3634 /* Hook the defense timer */
3635 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3636
3637 LeaveFunction(2);
3638 return 0;
fc723250
HS
3639
3640err_net:
3641 unregister_pernet_subsys(&ipvs_control_ops);
3642err:
3643 return ret;
1da177e4
LT
3644}
3645
3646
3647void ip_vs_control_cleanup(void)
3648{
3649 EnterFunction(2);
3650 ip_vs_trash_cleanup();
afe2c511 3651 cancel_delayed_work_sync(&defense_work);
28e53bdd 3652 cancel_work_sync(&defense_work.work);
61b1ab45 3653 unregister_pernet_subsys(&ipvs_control_ops);
9a812198 3654 ip_vs_genl_unregister();
1da177e4
LT
3655 nf_unregister_sockopt(&ip_vs_sockopts);
3656 LeaveFunction(2);
3657}
This page took 0.973268 seconds and 5 git commands to generate.