netfilter: add a missing include in nf_conntrack_reasm.c
[deliverable/linux.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
9330419d 41#include <linux/nsproxy.h>
1da177e4 42#include <net/ip.h>
09571c7a
VB
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
14c85021 47#include <net/route.h>
1da177e4 48#include <net/sock.h>
9a812198 49#include <net/genetlink.h>
1da177e4
LT
50
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 56static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
1da177e4 61/* sysctl variables */
1da177e4
LT
62
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
09571c7a
VB
72#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
4a98480b
HS
74static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
09571c7a
VB
76{
77 struct rt6_info *rt;
78 struct flowi fl = {
79 .oif = 0,
5811662b
CG
80 .fl6_dst = *addr,
81 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
09571c7a
VB
82 };
83
4a98480b 84 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
09571c7a
VB
85 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
86 return 1;
87
88 return 0;
89}
90#endif
1da177e4 91/*
af9debd4
JA
92 * update_defense_level is called from keventd and from sysctl,
93 * so it needs to protect itself from softirqs
1da177e4 94 */
9330419d 95static void update_defense_level(struct netns_ipvs *ipvs)
1da177e4
LT
96{
97 struct sysinfo i;
98 static int old_secure_tcp = 0;
99 int availmem;
100 int nomem;
101 int to_change = -1;
102
103 /* we only count free and buffered memory (in pages) */
104 si_meminfo(&i);
105 availmem = i.freeram + i.bufferram;
106 /* however in linux 2.5 the i.bufferram is total page cache size,
107 we need adjust it */
108 /* si_swapinfo(&i); */
109 /* availmem = availmem - (i.totalswap - i.freeswap); */
110
a0840e2e 111 nomem = (availmem < ipvs->sysctl_amemthresh);
1da177e4 112
af9debd4
JA
113 local_bh_disable();
114
1da177e4 115 /* drop_entry */
a0840e2e
HS
116 spin_lock(&ipvs->dropentry_lock);
117 switch (ipvs->sysctl_drop_entry) {
1da177e4 118 case 0:
a0840e2e 119 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
120 break;
121 case 1:
122 if (nomem) {
a0840e2e
HS
123 atomic_set(&ipvs->dropentry, 1);
124 ipvs->sysctl_drop_entry = 2;
1da177e4 125 } else {
a0840e2e 126 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
127 }
128 break;
129 case 2:
130 if (nomem) {
a0840e2e 131 atomic_set(&ipvs->dropentry, 1);
1da177e4 132 } else {
a0840e2e
HS
133 atomic_set(&ipvs->dropentry, 0);
134 ipvs->sysctl_drop_entry = 1;
1da177e4
LT
135 };
136 break;
137 case 3:
a0840e2e 138 atomic_set(&ipvs->dropentry, 1);
1da177e4
LT
139 break;
140 }
a0840e2e 141 spin_unlock(&ipvs->dropentry_lock);
1da177e4
LT
142
143 /* drop_packet */
a0840e2e
HS
144 spin_lock(&ipvs->droppacket_lock);
145 switch (ipvs->sysctl_drop_packet) {
1da177e4 146 case 0:
a0840e2e 147 ipvs->drop_rate = 0;
1da177e4
LT
148 break;
149 case 1:
150 if (nomem) {
a0840e2e
HS
151 ipvs->drop_rate = ipvs->drop_counter
152 = ipvs->sysctl_amemthresh /
153 (ipvs->sysctl_amemthresh-availmem);
154 ipvs->sysctl_drop_packet = 2;
1da177e4 155 } else {
a0840e2e 156 ipvs->drop_rate = 0;
1da177e4
LT
157 }
158 break;
159 case 2:
160 if (nomem) {
a0840e2e
HS
161 ipvs->drop_rate = ipvs->drop_counter
162 = ipvs->sysctl_amemthresh /
163 (ipvs->sysctl_amemthresh-availmem);
1da177e4 164 } else {
a0840e2e
HS
165 ipvs->drop_rate = 0;
166 ipvs->sysctl_drop_packet = 1;
1da177e4
LT
167 }
168 break;
169 case 3:
a0840e2e 170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
1da177e4
LT
171 break;
172 }
a0840e2e 173 spin_unlock(&ipvs->droppacket_lock);
1da177e4
LT
174
175 /* secure_tcp */
a0840e2e
HS
176 spin_lock(&ipvs->securetcp_lock);
177 switch (ipvs->sysctl_secure_tcp) {
1da177e4
LT
178 case 0:
179 if (old_secure_tcp >= 2)
180 to_change = 0;
181 break;
182 case 1:
183 if (nomem) {
184 if (old_secure_tcp < 2)
185 to_change = 1;
a0840e2e 186 ipvs->sysctl_secure_tcp = 2;
1da177e4
LT
187 } else {
188 if (old_secure_tcp >= 2)
189 to_change = 0;
190 }
191 break;
192 case 2:
193 if (nomem) {
194 if (old_secure_tcp < 2)
195 to_change = 1;
196 } else {
197 if (old_secure_tcp >= 2)
198 to_change = 0;
a0840e2e 199 ipvs->sysctl_secure_tcp = 1;
1da177e4
LT
200 }
201 break;
202 case 3:
203 if (old_secure_tcp < 2)
204 to_change = 1;
205 break;
206 }
a0840e2e 207 old_secure_tcp = ipvs->sysctl_secure_tcp;
1da177e4 208 if (to_change >= 0)
9330419d 209 ip_vs_protocol_timeout_change(ipvs,
a0840e2e
HS
210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
af9debd4
JA
212
213 local_bh_enable();
1da177e4
LT
214}
215
216
217/*
218 * Timer for checking the defense
219 */
220#define DEFENSE_TIMER_PERIOD 1*HZ
1da177e4 221
c4028958 222static void defense_work_handler(struct work_struct *work)
1da177e4 223{
f6340ee0
HS
224 struct netns_ipvs *ipvs =
225 container_of(work, struct netns_ipvs, defense_work.work);
9330419d
HS
226
227 update_defense_level(ipvs);
a0840e2e 228 if (atomic_read(&ipvs->dropentry))
f6340ee0
HS
229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
1da177e4
LT
231}
232
233int
234ip_vs_use_count_inc(void)
235{
236 return try_module_get(THIS_MODULE);
237}
238
239void
240ip_vs_use_count_dec(void)
241{
242 module_put(THIS_MODULE);
243}
244
245
246/*
247 * Hash table: for virtual service lookups
248 */
249#define IP_VS_SVC_TAB_BITS 8
250#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
251#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
252
253/* the service table hashed by <protocol, addr, port> */
254static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
255/* the service table hashed by fwmark */
256static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
257
1da177e4
LT
258
259/*
260 * Returns hash value for virtual service
261 */
fc723250
HS
262static inline unsigned
263ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
264 const union nf_inet_addr *addr, __be16 port)
1da177e4
LT
265{
266 register unsigned porth = ntohs(port);
b18610de 267 __be32 addr_fold = addr->ip;
1da177e4 268
b18610de
JV
269#ifdef CONFIG_IP_VS_IPV6
270 if (af == AF_INET6)
271 addr_fold = addr->ip6[0]^addr->ip6[1]^
272 addr->ip6[2]^addr->ip6[3];
273#endif
fc723250 274 addr_fold ^= ((size_t)net>>8);
b18610de
JV
275
276 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
277 & IP_VS_SVC_TAB_MASK;
278}
279
280/*
281 * Returns hash value of fwmark for virtual service lookup
282 */
fc723250 283static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
1da177e4 284{
fc723250 285 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
1da177e4
LT
286}
287
288/*
fc723250 289 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
1da177e4
LT
290 * or in the ip_vs_svc_fwm_table by fwmark.
291 * Should be called with locked tables.
292 */
293static int ip_vs_svc_hash(struct ip_vs_service *svc)
294{
295 unsigned hash;
296
297 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
298 pr_err("%s(): request for already hashed, called from %pF\n",
299 __func__, __builtin_return_address(0));
1da177e4
LT
300 return 0;
301 }
302
303 if (svc->fwmark == 0) {
304 /*
fc723250 305 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
1da177e4 306 */
fc723250
HS
307 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
308 &svc->addr, svc->port);
1da177e4
LT
309 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
310 } else {
311 /*
fc723250 312 * Hash it by fwmark in svc_fwm_table
1da177e4 313 */
fc723250 314 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
1da177e4
LT
315 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
316 }
317
318 svc->flags |= IP_VS_SVC_F_HASHED;
319 /* increase its refcnt because it is referenced by the svc table */
320 atomic_inc(&svc->refcnt);
321 return 1;
322}
323
324
325/*
fc723250 326 * Unhashes a service from svc_table / svc_fwm_table.
1da177e4
LT
327 * Should be called with locked tables.
328 */
329static int ip_vs_svc_unhash(struct ip_vs_service *svc)
330{
331 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
332 pr_err("%s(): request for unhash flagged, called from %pF\n",
333 __func__, __builtin_return_address(0));
1da177e4
LT
334 return 0;
335 }
336
337 if (svc->fwmark == 0) {
fc723250 338 /* Remove it from the svc_table table */
1da177e4
LT
339 list_del(&svc->s_list);
340 } else {
fc723250 341 /* Remove it from the svc_fwm_table table */
1da177e4
LT
342 list_del(&svc->f_list);
343 }
344
345 svc->flags &= ~IP_VS_SVC_F_HASHED;
346 atomic_dec(&svc->refcnt);
347 return 1;
348}
349
350
351/*
fc723250 352 * Get service by {netns, proto,addr,port} in the service table.
1da177e4 353 */
b18610de 354static inline struct ip_vs_service *
fc723250
HS
355__ip_vs_service_find(struct net *net, int af, __u16 protocol,
356 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
357{
358 unsigned hash;
359 struct ip_vs_service *svc;
360
361 /* Check for "full" addressed entries */
fc723250 362 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
1da177e4
LT
363
364 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
365 if ((svc->af == af)
366 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4 367 && (svc->port == vport)
fc723250
HS
368 && (svc->protocol == protocol)
369 && net_eq(svc->net, net)) {
1da177e4 370 /* HIT */
1da177e4
LT
371 return svc;
372 }
373 }
374
375 return NULL;
376}
377
378
379/*
380 * Get service by {fwmark} in the service table.
381 */
b18610de 382static inline struct ip_vs_service *
fc723250 383__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
1da177e4
LT
384{
385 unsigned hash;
386 struct ip_vs_service *svc;
387
388 /* Check for fwmark addressed entries */
fc723250 389 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
1da177e4
LT
390
391 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
fc723250
HS
392 if (svc->fwmark == fwmark && svc->af == af
393 && net_eq(svc->net, net)) {
1da177e4 394 /* HIT */
1da177e4
LT
395 return svc;
396 }
397 }
398
399 return NULL;
400}
401
402struct ip_vs_service *
fc723250 403ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
3c2e0505 404 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
405{
406 struct ip_vs_service *svc;
763f8d0e 407 struct netns_ipvs *ipvs = net_ipvs(net);
3c2e0505 408
1da177e4
LT
409 read_lock(&__ip_vs_svc_lock);
410
411 /*
412 * Check the table hashed by fwmark first
413 */
fc723250
HS
414 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
415 if (fwmark && svc)
1da177e4
LT
416 goto out;
417
418 /*
419 * Check the table hashed by <protocol,addr,port>
420 * for "full" addressed entries
421 */
fc723250 422 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
1da177e4
LT
423
424 if (svc == NULL
425 && protocol == IPPROTO_TCP
763f8d0e 426 && atomic_read(&ipvs->ftpsvc_counter)
1da177e4
LT
427 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
428 /*
429 * Check if ftp service entry exists, the packet
430 * might belong to FTP data connections.
431 */
fc723250 432 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
1da177e4
LT
433 }
434
435 if (svc == NULL
763f8d0e 436 && atomic_read(&ipvs->nullsvc_counter)) {
1da177e4
LT
437 /*
438 * Check if the catch-all port (port zero) exists
439 */
fc723250 440 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
1da177e4
LT
441 }
442
443 out:
26c15cfd
JA
444 if (svc)
445 atomic_inc(&svc->usecnt);
1da177e4
LT
446 read_unlock(&__ip_vs_svc_lock);
447
3c2e0505
JV
448 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
449 fwmark, ip_vs_proto_name(protocol),
450 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
451 svc ? "hit" : "not hit");
1da177e4
LT
452
453 return svc;
454}
455
456
457static inline void
458__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
459{
460 atomic_inc(&svc->refcnt);
461 dest->svc = svc;
462}
463
26c15cfd 464static void
1da177e4
LT
465__ip_vs_unbind_svc(struct ip_vs_dest *dest)
466{
467 struct ip_vs_service *svc = dest->svc;
468
469 dest->svc = NULL;
26c15cfd
JA
470 if (atomic_dec_and_test(&svc->refcnt)) {
471 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
472 svc->fwmark,
473 IP_VS_DBG_ADDR(svc->af, &svc->addr),
474 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 475 free_percpu(svc->stats.cpustats);
1da177e4 476 kfree(svc);
26c15cfd 477 }
1da177e4
LT
478}
479
480
481/*
482 * Returns hash value for real service
483 */
7937df15
JV
484static inline unsigned ip_vs_rs_hashkey(int af,
485 const union nf_inet_addr *addr,
486 __be16 port)
1da177e4
LT
487{
488 register unsigned porth = ntohs(port);
7937df15
JV
489 __be32 addr_fold = addr->ip;
490
491#ifdef CONFIG_IP_VS_IPV6
492 if (af == AF_INET6)
493 addr_fold = addr->ip6[0]^addr->ip6[1]^
494 addr->ip6[2]^addr->ip6[3];
495#endif
1da177e4 496
7937df15 497 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
498 & IP_VS_RTAB_MASK;
499}
500
501/*
fc723250 502 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
1da177e4
LT
503 * should be called with locked tables.
504 */
fc723250 505static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
1da177e4
LT
506{
507 unsigned hash;
508
509 if (!list_empty(&dest->d_list)) {
510 return 0;
511 }
512
513 /*
514 * Hash by proto,addr,port,
515 * which are the parameters of the real service.
516 */
7937df15
JV
517 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
518
fc723250 519 list_add(&dest->d_list, &ipvs->rs_table[hash]);
1da177e4
LT
520
521 return 1;
522}
523
524/*
fc723250 525 * UNhashes ip_vs_dest from rs_table.
1da177e4
LT
526 * should be called with locked tables.
527 */
528static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
529{
530 /*
fc723250 531 * Remove it from the rs_table table.
1da177e4
LT
532 */
533 if (!list_empty(&dest->d_list)) {
534 list_del(&dest->d_list);
535 INIT_LIST_HEAD(&dest->d_list);
536 }
537
538 return 1;
539}
540
541/*
542 * Lookup real service by <proto,addr,port> in the real service table.
543 */
544struct ip_vs_dest *
fc723250 545ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
7937df15
JV
546 const union nf_inet_addr *daddr,
547 __be16 dport)
1da177e4 548{
fc723250 549 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
550 unsigned hash;
551 struct ip_vs_dest *dest;
552
553 /*
554 * Check for "full" addressed entries
555 * Return the first found entry
556 */
7937df15 557 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4 558
a0840e2e 559 read_lock(&ipvs->rs_lock);
fc723250 560 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
7937df15
JV
561 if ((dest->af == af)
562 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
563 && (dest->port == dport)
564 && ((dest->protocol == protocol) ||
565 dest->vfwmark)) {
566 /* HIT */
a0840e2e 567 read_unlock(&ipvs->rs_lock);
1da177e4
LT
568 return dest;
569 }
570 }
a0840e2e 571 read_unlock(&ipvs->rs_lock);
1da177e4
LT
572
573 return NULL;
574}
575
576/*
577 * Lookup destination by {addr,port} in the given service
578 */
579static struct ip_vs_dest *
7937df15
JV
580ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
581 __be16 dport)
1da177e4
LT
582{
583 struct ip_vs_dest *dest;
584
585 /*
586 * Find the destination for the given service
587 */
588 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
589 if ((dest->af == svc->af)
590 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
591 && (dest->port == dport)) {
1da177e4
LT
592 /* HIT */
593 return dest;
594 }
595 }
596
597 return NULL;
598}
599
1e356f9c
RB
600/*
601 * Find destination by {daddr,dport,vaddr,protocol}
602 * Cretaed to be used in ip_vs_process_message() in
603 * the backup synchronization daemon. It finds the
604 * destination to be bound to the received connection
605 * on the backup.
606 *
607 * ip_vs_lookup_real_service() looked promissing, but
608 * seems not working as expected.
609 */
fc723250
HS
610struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
611 const union nf_inet_addr *daddr,
7937df15
JV
612 __be16 dport,
613 const union nf_inet_addr *vaddr,
0e051e68 614 __be16 vport, __u16 protocol, __u32 fwmark)
1e356f9c
RB
615{
616 struct ip_vs_dest *dest;
617 struct ip_vs_service *svc;
618
fc723250 619 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
1e356f9c
RB
620 if (!svc)
621 return NULL;
622 dest = ip_vs_lookup_dest(svc, daddr, dport);
623 if (dest)
624 atomic_inc(&dest->refcnt);
625 ip_vs_service_put(svc);
626 return dest;
627}
1da177e4
LT
628
629/*
630 * Lookup dest by {svc,addr,port} in the destination trash.
631 * The destination trash is used to hold the destinations that are removed
632 * from the service table but are still referenced by some conn entries.
633 * The reason to add the destination trash is when the dest is temporary
634 * down (either by administrator or by monitor program), the dest can be
635 * picked back from the trash, the remaining connections to the dest can
636 * continue, and the counting information of the dest is also useful for
637 * scheduling.
638 */
639static struct ip_vs_dest *
7937df15
JV
640ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
641 __be16 dport)
1da177e4
LT
642{
643 struct ip_vs_dest *dest, *nxt;
f2431e6e 644 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
645
646 /*
647 * Find the destination in trash
648 */
f2431e6e 649 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
7937df15
JV
650 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
651 "dest->refcnt=%d\n",
652 dest->vfwmark,
653 IP_VS_DBG_ADDR(svc->af, &dest->addr),
654 ntohs(dest->port),
655 atomic_read(&dest->refcnt));
656 if (dest->af == svc->af &&
657 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
658 dest->port == dport &&
659 dest->vfwmark == svc->fwmark &&
660 dest->protocol == svc->protocol &&
661 (svc->fwmark ||
7937df15 662 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
663 dest->vport == svc->port))) {
664 /* HIT */
665 return dest;
666 }
667
668 /*
669 * Try to purge the destination from trash if not referenced
670 */
671 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
672 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
673 "from trash\n",
674 dest->vfwmark,
675 IP_VS_DBG_ADDR(svc->af, &dest->addr),
676 ntohs(dest->port));
1da177e4
LT
677 list_del(&dest->n_list);
678 ip_vs_dst_reset(dest);
679 __ip_vs_unbind_svc(dest);
b17fc996 680 free_percpu(dest->stats.cpustats);
1da177e4
LT
681 kfree(dest);
682 }
683 }
684
685 return NULL;
686}
687
688
689/*
690 * Clean up all the destinations in the trash
691 * Called by the ip_vs_control_cleanup()
692 *
693 * When the ip_vs_control_clearup is activated by ipvs module exit,
694 * the service tables must have been flushed and all the connections
695 * are expired, and the refcnt of each destination in the trash must
696 * be 1, so we simply release them here.
697 */
f2431e6e 698static void ip_vs_trash_cleanup(struct net *net)
1da177e4
LT
699{
700 struct ip_vs_dest *dest, *nxt;
f2431e6e 701 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 702
f2431e6e 703 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
1da177e4
LT
704 list_del(&dest->n_list);
705 ip_vs_dst_reset(dest);
706 __ip_vs_unbind_svc(dest);
b17fc996 707 free_percpu(dest->stats.cpustats);
1da177e4
LT
708 kfree(dest);
709 }
710}
711
712
713static void
714ip_vs_zero_stats(struct ip_vs_stats *stats)
715{
716 spin_lock_bh(&stats->lock);
e93615d0 717
e9c0ce23 718 memset(&stats->ustats, 0, sizeof(stats->ustats));
1da177e4 719 ip_vs_zero_estimator(stats);
e93615d0 720
3a14a313 721 spin_unlock_bh(&stats->lock);
1da177e4
LT
722}
723
724/*
725 * Update a destination in the given service
726 */
727static void
26c15cfd
JA
728__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
729 struct ip_vs_dest_user_kern *udest, int add)
1da177e4 730{
fc723250 731 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
732 int conn_flags;
733
734 /* set the weight and the flags */
735 atomic_set(&dest->weight, udest->weight);
3575792e
JA
736 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
737 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4 738
1da177e4 739 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 740 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
741 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
742 } else {
743 /*
fc723250 744 * Put the real service in rs_table if not present.
1da177e4
LT
745 * For now only for NAT!
746 */
a0840e2e 747 write_lock_bh(&ipvs->rs_lock);
fc723250 748 ip_vs_rs_hash(ipvs, dest);
a0840e2e 749 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
750 }
751 atomic_set(&dest->conn_flags, conn_flags);
752
753 /* bind the service */
754 if (!dest->svc) {
755 __ip_vs_bind_svc(dest, svc);
756 } else {
757 if (dest->svc != svc) {
758 __ip_vs_unbind_svc(dest);
759 ip_vs_zero_stats(&dest->stats);
760 __ip_vs_bind_svc(dest, svc);
761 }
762 }
763
764 /* set the dest status flags */
765 dest->flags |= IP_VS_DEST_F_AVAILABLE;
766
767 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
768 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
769 dest->u_threshold = udest->u_threshold;
770 dest->l_threshold = udest->l_threshold;
26c15cfd 771
fc604767
JA
772 spin_lock(&dest->dst_lock);
773 ip_vs_dst_reset(dest);
774 spin_unlock(&dest->dst_lock);
775
26c15cfd 776 if (add)
29c2026f 777 ip_vs_new_estimator(svc->net, &dest->stats);
26c15cfd
JA
778
779 write_lock_bh(&__ip_vs_svc_lock);
780
781 /* Wait until all other svc users go away */
782 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
783
784 if (add) {
785 list_add(&dest->n_list, &svc->destinations);
786 svc->num_dests++;
787 }
788
789 /* call the update_service, because server weight may be changed */
790 if (svc->scheduler->update_service)
791 svc->scheduler->update_service(svc);
792
793 write_unlock_bh(&__ip_vs_svc_lock);
1da177e4
LT
794}
795
796
797/*
798 * Create a destination for the given service
799 */
800static int
c860c6b1 801ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
802 struct ip_vs_dest **dest_p)
803{
804 struct ip_vs_dest *dest;
805 unsigned atype;
806
807 EnterFunction(2);
808
09571c7a
VB
809#ifdef CONFIG_IP_VS_IPV6
810 if (svc->af == AF_INET6) {
811 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
812 if ((!(atype & IPV6_ADDR_UNICAST) ||
813 atype & IPV6_ADDR_LINKLOCAL) &&
4a98480b 814 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
09571c7a
VB
815 return -EINVAL;
816 } else
817#endif
818 {
4a98480b 819 atype = inet_addr_type(svc->net, udest->addr.ip);
09571c7a
VB
820 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
821 return -EINVAL;
822 }
1da177e4 823
dee06e47 824 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
1da177e4 825 if (dest == NULL) {
1e3e238e 826 pr_err("%s(): no memory.\n", __func__);
1da177e4
LT
827 return -ENOMEM;
828 }
b17fc996
HS
829 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
830 if (!dest->stats.cpustats) {
831 pr_err("%s() alloc_percpu failed\n", __func__);
832 goto err_alloc;
833 }
1da177e4 834
c860c6b1 835 dest->af = svc->af;
1da177e4 836 dest->protocol = svc->protocol;
c860c6b1 837 dest->vaddr = svc->addr;
1da177e4
LT
838 dest->vport = svc->port;
839 dest->vfwmark = svc->fwmark;
c860c6b1 840 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
841 dest->port = udest->port;
842
843 atomic_set(&dest->activeconns, 0);
844 atomic_set(&dest->inactconns, 0);
845 atomic_set(&dest->persistconns, 0);
26c15cfd 846 atomic_set(&dest->refcnt, 1);
1da177e4
LT
847
848 INIT_LIST_HEAD(&dest->d_list);
849 spin_lock_init(&dest->dst_lock);
850 spin_lock_init(&dest->stats.lock);
26c15cfd 851 __ip_vs_update_dest(svc, dest, udest, 1);
1da177e4
LT
852
853 *dest_p = dest;
854
855 LeaveFunction(2);
856 return 0;
b17fc996
HS
857
858err_alloc:
859 kfree(dest);
860 return -ENOMEM;
1da177e4
LT
861}
862
863
864/*
865 * Add a destination into an existing service
866 */
867static int
c860c6b1 868ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
869{
870 struct ip_vs_dest *dest;
c860c6b1 871 union nf_inet_addr daddr;
014d730d 872 __be16 dport = udest->port;
1da177e4
LT
873 int ret;
874
875 EnterFunction(2);
876
877 if (udest->weight < 0) {
1e3e238e 878 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
879 return -ERANGE;
880 }
881
882 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
883 pr_err("%s(): lower threshold is higher than upper threshold\n",
884 __func__);
1da177e4
LT
885 return -ERANGE;
886 }
887
c860c6b1
JV
888 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
889
1da177e4
LT
890 /*
891 * Check if the dest already exists in the list
892 */
7937df15
JV
893 dest = ip_vs_lookup_dest(svc, &daddr, dport);
894
1da177e4 895 if (dest != NULL) {
1e3e238e 896 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
897 return -EEXIST;
898 }
899
900 /*
901 * Check if the dest already exists in the trash and
902 * is from the same service
903 */
7937df15
JV
904 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
905
1da177e4 906 if (dest != NULL) {
cfc78c5a
JV
907 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
908 "dest->refcnt=%d, service %u/%s:%u\n",
909 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
910 atomic_read(&dest->refcnt),
911 dest->vfwmark,
912 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
913 ntohs(dest->vport));
914
1da177e4
LT
915 /*
916 * Get the destination from the trash
917 */
918 list_del(&dest->n_list);
919
26c15cfd
JA
920 __ip_vs_update_dest(svc, dest, udest, 1);
921 ret = 0;
922 } else {
1da177e4 923 /*
26c15cfd 924 * Allocate and initialize the dest structure
1da177e4 925 */
26c15cfd 926 ret = ip_vs_new_dest(svc, udest, &dest);
1da177e4 927 }
1da177e4
LT
928 LeaveFunction(2);
929
26c15cfd 930 return ret;
1da177e4
LT
931}
932
933
934/*
935 * Edit a destination in the given service
936 */
937static int
c860c6b1 938ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
939{
940 struct ip_vs_dest *dest;
c860c6b1 941 union nf_inet_addr daddr;
014d730d 942 __be16 dport = udest->port;
1da177e4
LT
943
944 EnterFunction(2);
945
946 if (udest->weight < 0) {
1e3e238e 947 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
948 return -ERANGE;
949 }
950
951 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
952 pr_err("%s(): lower threshold is higher than upper threshold\n",
953 __func__);
1da177e4
LT
954 return -ERANGE;
955 }
956
c860c6b1
JV
957 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
958
1da177e4
LT
959 /*
960 * Lookup the destination list
961 */
7937df15
JV
962 dest = ip_vs_lookup_dest(svc, &daddr, dport);
963
1da177e4 964 if (dest == NULL) {
1e3e238e 965 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
966 return -ENOENT;
967 }
968
26c15cfd 969 __ip_vs_update_dest(svc, dest, udest, 0);
1da177e4
LT
970 LeaveFunction(2);
971
972 return 0;
973}
974
975
976/*
977 * Delete a destination (must be already unlinked from the service)
978 */
29c2026f 979static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1da177e4 980{
a0840e2e
HS
981 struct netns_ipvs *ipvs = net_ipvs(net);
982
29c2026f 983 ip_vs_kill_estimator(net, &dest->stats);
1da177e4
LT
984
985 /*
986 * Remove it from the d-linked list with the real services.
987 */
a0840e2e 988 write_lock_bh(&ipvs->rs_lock);
1da177e4 989 ip_vs_rs_unhash(dest);
a0840e2e 990 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
991
992 /*
993 * Decrease the refcnt of the dest, and free the dest
994 * if nobody refers to it (refcnt=0). Otherwise, throw
995 * the destination into the trash.
996 */
997 if (atomic_dec_and_test(&dest->refcnt)) {
26c15cfd
JA
998 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
999 dest->vfwmark,
1000 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1001 ntohs(dest->port));
1da177e4
LT
1002 ip_vs_dst_reset(dest);
1003 /* simply decrease svc->refcnt here, let the caller check
1004 and release the service if nobody refers to it.
1005 Only user context can release destination and service,
1006 and only one user context can update virtual service at a
1007 time, so the operation here is OK */
1008 atomic_dec(&dest->svc->refcnt);
b17fc996 1009 free_percpu(dest->stats.cpustats);
1da177e4
LT
1010 kfree(dest);
1011 } else {
cfc78c5a
JV
1012 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1013 "dest->refcnt=%d\n",
1014 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1015 ntohs(dest->port),
1016 atomic_read(&dest->refcnt));
f2431e6e 1017 list_add(&dest->n_list, &ipvs->dest_trash);
1da177e4
LT
1018 atomic_inc(&dest->refcnt);
1019 }
1020}
1021
1022
1023/*
1024 * Unlink a destination from the given service
1025 */
1026static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1027 struct ip_vs_dest *dest,
1028 int svcupd)
1029{
1030 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1031
1032 /*
1033 * Remove it from the d-linked destination list.
1034 */
1035 list_del(&dest->n_list);
1036 svc->num_dests--;
82dfb6f3
SW
1037
1038 /*
1039 * Call the update_service function of its scheduler
1040 */
1041 if (svcupd && svc->scheduler->update_service)
1042 svc->scheduler->update_service(svc);
1da177e4
LT
1043}
1044
1045
1046/*
1047 * Delete a destination server in the given service
1048 */
1049static int
c860c6b1 1050ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1051{
1052 struct ip_vs_dest *dest;
014d730d 1053 __be16 dport = udest->port;
1da177e4
LT
1054
1055 EnterFunction(2);
1056
7937df15 1057 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1058
1da177e4 1059 if (dest == NULL) {
1e3e238e 1060 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1061 return -ENOENT;
1062 }
1063
1064 write_lock_bh(&__ip_vs_svc_lock);
1065
1066 /*
1067 * Wait until all other svc users go away.
1068 */
26c15cfd 1069 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1070
1071 /*
1072 * Unlink dest from the service
1073 */
1074 __ip_vs_unlink_dest(svc, dest, 1);
1075
1076 write_unlock_bh(&__ip_vs_svc_lock);
1077
1078 /*
1079 * Delete the destination
1080 */
a0840e2e 1081 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1082
1083 LeaveFunction(2);
1084
1085 return 0;
1086}
1087
1088
1089/*
1090 * Add a service into the service hash table
1091 */
1092static int
fc723250 1093ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
c860c6b1 1094 struct ip_vs_service **svc_p)
1da177e4
LT
1095{
1096 int ret = 0;
1097 struct ip_vs_scheduler *sched = NULL;
0d1e71b0 1098 struct ip_vs_pe *pe = NULL;
1da177e4 1099 struct ip_vs_service *svc = NULL;
a0840e2e 1100 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
1101
1102 /* increase the module use count */
1103 ip_vs_use_count_inc();
1104
1105 /* Lookup the scheduler by 'u->sched_name' */
1106 sched = ip_vs_scheduler_get(u->sched_name);
1107 if (sched == NULL) {
1e3e238e 1108 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4 1109 ret = -ENOENT;
6e08bfb8 1110 goto out_err;
1da177e4
LT
1111 }
1112
0d1e71b0 1113 if (u->pe_name && *u->pe_name) {
e9e5eee8 1114 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1115 if (pe == NULL) {
1116 pr_info("persistence engine module ip_vs_pe_%s "
1117 "not found\n", u->pe_name);
1118 ret = -ENOENT;
1119 goto out_err;
1120 }
1121 }
1122
f94fd041 1123#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1124 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1125 ret = -EINVAL;
1126 goto out_err;
f94fd041
JV
1127 }
1128#endif
1129
dee06e47 1130 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1131 if (svc == NULL) {
1e3e238e 1132 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1133 ret = -ENOMEM;
1134 goto out_err;
1135 }
b17fc996
HS
1136 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1137 if (!svc->stats.cpustats) {
1138 pr_err("%s() alloc_percpu failed\n", __func__);
1139 goto out_err;
1140 }
1da177e4
LT
1141
1142 /* I'm the first user of the service */
26c15cfd 1143 atomic_set(&svc->usecnt, 0);
1da177e4
LT
1144 atomic_set(&svc->refcnt, 0);
1145
c860c6b1 1146 svc->af = u->af;
1da177e4 1147 svc->protocol = u->protocol;
c860c6b1 1148 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1149 svc->port = u->port;
1150 svc->fwmark = u->fwmark;
1151 svc->flags = u->flags;
1152 svc->timeout = u->timeout * HZ;
1153 svc->netmask = u->netmask;
fc723250 1154 svc->net = net;
1da177e4
LT
1155
1156 INIT_LIST_HEAD(&svc->destinations);
1157 rwlock_init(&svc->sched_lock);
1158 spin_lock_init(&svc->stats.lock);
1159
1160 /* Bind the scheduler */
1161 ret = ip_vs_bind_scheduler(svc, sched);
1162 if (ret)
1163 goto out_err;
1164 sched = NULL;
1165
0d1e71b0
SH
1166 /* Bind the ct retriever */
1167 ip_vs_bind_pe(svc, pe);
1168 pe = NULL;
1169
1da177e4
LT
1170 /* Update the virtual service counters */
1171 if (svc->port == FTPPORT)
763f8d0e 1172 atomic_inc(&ipvs->ftpsvc_counter);
1da177e4 1173 else if (svc->port == 0)
763f8d0e 1174 atomic_inc(&ipvs->nullsvc_counter);
1da177e4 1175
29c2026f 1176 ip_vs_new_estimator(net, &svc->stats);
f94fd041
JV
1177
1178 /* Count only IPv4 services for old get/setsockopt interface */
1179 if (svc->af == AF_INET)
a0840e2e 1180 ipvs->num_services++;
1da177e4
LT
1181
1182 /* Hash the service into the service table */
1183 write_lock_bh(&__ip_vs_svc_lock);
1184 ip_vs_svc_hash(svc);
1185 write_unlock_bh(&__ip_vs_svc_lock);
1186
1187 *svc_p = svc;
1188 return 0;
1189
b17fc996 1190
6e08bfb8 1191 out_err:
1da177e4 1192 if (svc != NULL) {
2fabf35b 1193 ip_vs_unbind_scheduler(svc);
1da177e4
LT
1194 if (svc->inc) {
1195 local_bh_disable();
1196 ip_vs_app_inc_put(svc->inc);
1197 local_bh_enable();
1198 }
b17fc996
HS
1199 if (svc->stats.cpustats)
1200 free_percpu(svc->stats.cpustats);
1da177e4
LT
1201 kfree(svc);
1202 }
1203 ip_vs_scheduler_put(sched);
0d1e71b0 1204 ip_vs_pe_put(pe);
1da177e4 1205
1da177e4
LT
1206 /* decrease the module use count */
1207 ip_vs_use_count_dec();
1208
1209 return ret;
1210}
1211
1212
1213/*
1214 * Edit a service and bind it with a new scheduler
1215 */
1216static int
c860c6b1 1217ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1218{
1219 struct ip_vs_scheduler *sched, *old_sched;
0d1e71b0 1220 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1da177e4
LT
1221 int ret = 0;
1222
1223 /*
1224 * Lookup the scheduler, by 'u->sched_name'
1225 */
1226 sched = ip_vs_scheduler_get(u->sched_name);
1227 if (sched == NULL) {
1e3e238e 1228 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1229 return -ENOENT;
1230 }
1231 old_sched = sched;
1232
0d1e71b0 1233 if (u->pe_name && *u->pe_name) {
e9e5eee8 1234 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1235 if (pe == NULL) {
1236 pr_info("persistence engine module ip_vs_pe_%s "
1237 "not found\n", u->pe_name);
1238 ret = -ENOENT;
1239 goto out;
1240 }
1241 old_pe = pe;
1242 }
1243
f94fd041 1244#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1245 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1246 ret = -EINVAL;
1247 goto out;
f94fd041
JV
1248 }
1249#endif
1250
1da177e4
LT
1251 write_lock_bh(&__ip_vs_svc_lock);
1252
1253 /*
1254 * Wait until all other svc users go away.
1255 */
26c15cfd 1256 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1257
1258 /*
1259 * Set the flags and timeout value
1260 */
1261 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1262 svc->timeout = u->timeout * HZ;
1263 svc->netmask = u->netmask;
1264
1265 old_sched = svc->scheduler;
1266 if (sched != old_sched) {
1267 /*
1268 * Unbind the old scheduler
1269 */
1270 if ((ret = ip_vs_unbind_scheduler(svc))) {
1271 old_sched = sched;
9e691ed6 1272 goto out_unlock;
1da177e4
LT
1273 }
1274
1275 /*
1276 * Bind the new scheduler
1277 */
1278 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1279 /*
1280 * If ip_vs_bind_scheduler fails, restore the old
1281 * scheduler.
1282 * The main reason of failure is out of memory.
1283 *
1284 * The question is if the old scheduler can be
1285 * restored all the time. TODO: if it cannot be
1286 * restored some time, we must delete the service,
1287 * otherwise the system may crash.
1288 */
1289 ip_vs_bind_scheduler(svc, old_sched);
1290 old_sched = sched;
9e691ed6 1291 goto out_unlock;
1da177e4
LT
1292 }
1293 }
1294
0d1e71b0
SH
1295 old_pe = svc->pe;
1296 if (pe != old_pe) {
1297 ip_vs_unbind_pe(svc);
1298 ip_vs_bind_pe(svc, pe);
1299 }
1300
9e691ed6 1301 out_unlock:
1da177e4 1302 write_unlock_bh(&__ip_vs_svc_lock);
9e691ed6 1303 out:
6e08bfb8 1304 ip_vs_scheduler_put(old_sched);
0d1e71b0 1305 ip_vs_pe_put(old_pe);
1da177e4
LT
1306 return ret;
1307}
1308
1309
1310/*
1311 * Delete a service from the service list
1312 * - The service must be unlinked, unlocked and not referenced!
1313 * - We are called under _bh lock
1314 */
1315static void __ip_vs_del_service(struct ip_vs_service *svc)
1316{
1317 struct ip_vs_dest *dest, *nxt;
1318 struct ip_vs_scheduler *old_sched;
0d1e71b0 1319 struct ip_vs_pe *old_pe;
a0840e2e 1320 struct netns_ipvs *ipvs = net_ipvs(svc->net);
0d1e71b0
SH
1321
1322 pr_info("%s: enter\n", __func__);
1da177e4 1323
f94fd041
JV
1324 /* Count only IPv4 services for old get/setsockopt interface */
1325 if (svc->af == AF_INET)
a0840e2e 1326 ipvs->num_services--;
f94fd041 1327
29c2026f 1328 ip_vs_kill_estimator(svc->net, &svc->stats);
1da177e4
LT
1329
1330 /* Unbind scheduler */
1331 old_sched = svc->scheduler;
1332 ip_vs_unbind_scheduler(svc);
6e08bfb8 1333 ip_vs_scheduler_put(old_sched);
1da177e4 1334
0d1e71b0
SH
1335 /* Unbind persistence engine */
1336 old_pe = svc->pe;
1337 ip_vs_unbind_pe(svc);
1338 ip_vs_pe_put(old_pe);
1339
1da177e4
LT
1340 /* Unbind app inc */
1341 if (svc->inc) {
1342 ip_vs_app_inc_put(svc->inc);
1343 svc->inc = NULL;
1344 }
1345
1346 /*
1347 * Unlink the whole destination list
1348 */
1349 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1350 __ip_vs_unlink_dest(svc, dest, 0);
29c2026f 1351 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1352 }
1353
1354 /*
1355 * Update the virtual service counters
1356 */
1357 if (svc->port == FTPPORT)
763f8d0e 1358 atomic_dec(&ipvs->ftpsvc_counter);
1da177e4 1359 else if (svc->port == 0)
763f8d0e 1360 atomic_dec(&ipvs->nullsvc_counter);
1da177e4
LT
1361
1362 /*
1363 * Free the service if nobody refers to it
1364 */
26c15cfd
JA
1365 if (atomic_read(&svc->refcnt) == 0) {
1366 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1367 svc->fwmark,
1368 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1369 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 1370 free_percpu(svc->stats.cpustats);
1da177e4 1371 kfree(svc);
26c15cfd 1372 }
1da177e4
LT
1373
1374 /* decrease the module use count */
1375 ip_vs_use_count_dec();
1376}
1377
1378/*
26c15cfd 1379 * Unlink a service from list and try to delete it if its refcnt reached 0
1da177e4 1380 */
26c15cfd 1381static void ip_vs_unlink_service(struct ip_vs_service *svc)
1da177e4 1382{
1da177e4
LT
1383 /*
1384 * Unhash it from the service table
1385 */
1386 write_lock_bh(&__ip_vs_svc_lock);
1387
1388 ip_vs_svc_unhash(svc);
1389
1390 /*
1391 * Wait until all the svc users go away.
1392 */
26c15cfd 1393 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1394
1395 __ip_vs_del_service(svc);
1396
1397 write_unlock_bh(&__ip_vs_svc_lock);
26c15cfd
JA
1398}
1399
1400/*
1401 * Delete a service from the service list
1402 */
1403static int ip_vs_del_service(struct ip_vs_service *svc)
1404{
1405 if (svc == NULL)
1406 return -EEXIST;
1407 ip_vs_unlink_service(svc);
1da177e4
LT
1408
1409 return 0;
1410}
1411
1412
1413/*
1414 * Flush all the virtual services
1415 */
fc723250 1416static int ip_vs_flush(struct net *net)
1da177e4
LT
1417{
1418 int idx;
1419 struct ip_vs_service *svc, *nxt;
1420
1421 /*
fc723250 1422 * Flush the service table hashed by <netns,protocol,addr,port>
1da177e4
LT
1423 */
1424 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
fc723250
HS
1425 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1426 s_list) {
1427 if (net_eq(svc->net, net))
1428 ip_vs_unlink_service(svc);
1da177e4
LT
1429 }
1430 }
1431
1432 /*
1433 * Flush the service table hashed by fwmark
1434 */
1435 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1436 list_for_each_entry_safe(svc, nxt,
1437 &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1438 if (net_eq(svc->net, net))
1439 ip_vs_unlink_service(svc);
1da177e4
LT
1440 }
1441 }
1442
1443 return 0;
1444}
1445
1446
1447/*
1448 * Zero counters in a service or all services
1449 */
1450static int ip_vs_zero_service(struct ip_vs_service *svc)
1451{
1452 struct ip_vs_dest *dest;
1453
1454 write_lock_bh(&__ip_vs_svc_lock);
1455 list_for_each_entry(dest, &svc->destinations, n_list) {
1456 ip_vs_zero_stats(&dest->stats);
1457 }
1458 ip_vs_zero_stats(&svc->stats);
1459 write_unlock_bh(&__ip_vs_svc_lock);
1460 return 0;
1461}
1462
fc723250 1463static int ip_vs_zero_all(struct net *net)
1da177e4
LT
1464{
1465 int idx;
1466 struct ip_vs_service *svc;
1467
1468 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1469 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250
HS
1470 if (net_eq(svc->net, net))
1471 ip_vs_zero_service(svc);
1da177e4
LT
1472 }
1473 }
1474
1475 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1476 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1477 if (net_eq(svc->net, net))
1478 ip_vs_zero_service(svc);
1da177e4
LT
1479 }
1480 }
1481
b17fc996 1482 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
1da177e4
LT
1483 return 0;
1484}
1485
1486
1487static int
8d65af78 1488proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1489 void __user *buffer, size_t *lenp, loff_t *ppos)
1490{
9330419d 1491 struct net *net = current->nsproxy->net_ns;
1da177e4
LT
1492 int *valp = table->data;
1493 int val = *valp;
1494 int rc;
1495
8d65af78 1496 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1497 if (write && (*valp != val)) {
1498 if ((*valp < 0) || (*valp > 3)) {
1499 /* Restore the correct value */
1500 *valp = val;
1501 } else {
9330419d 1502 update_defense_level(net_ipvs(net));
1da177e4
LT
1503 }
1504 }
1505 return rc;
1506}
1507
1508
1509static int
8d65af78 1510proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1511 void __user *buffer, size_t *lenp, loff_t *ppos)
1512{
1513 int *valp = table->data;
1514 int val[2];
1515 int rc;
1516
1517 /* backup the value first */
1518 memcpy(val, valp, sizeof(val));
1519
8d65af78 1520 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1521 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1522 /* Restore the correct value */
1523 memcpy(valp, val, sizeof(val));
1524 }
1525 return rc;
1526}
1527
b880c1f0
HS
1528static int
1529proc_do_sync_mode(ctl_table *table, int write,
1530 void __user *buffer, size_t *lenp, loff_t *ppos)
1531{
1532 int *valp = table->data;
1533 int val = *valp;
1534 int rc;
1535
1536 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1537 if (write && (*valp != val)) {
1538 if ((*valp < 0) || (*valp > 1)) {
1539 /* Restore the correct value */
1540 *valp = val;
1541 } else {
f131315f
HS
1542 struct net *net = current->nsproxy->net_ns;
1543 ip_vs_sync_switch_mode(net, val);
b880c1f0
HS
1544 }
1545 }
1546 return rc;
1547}
1da177e4
LT
1548
1549/*
1550 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
a0840e2e
HS
1551 * Do not change order or insert new entries without
1552 * align with netns init in __ip_vs_control_init()
1da177e4
LT
1553 */
1554
1555static struct ctl_table vs_vars[] = {
1556 {
1da177e4 1557 .procname = "amemthresh",
1da177e4
LT
1558 .maxlen = sizeof(int),
1559 .mode = 0644,
6d9f239a 1560 .proc_handler = proc_dointvec,
1da177e4 1561 },
1da177e4 1562 {
1da177e4 1563 .procname = "am_droprate",
1da177e4
LT
1564 .maxlen = sizeof(int),
1565 .mode = 0644,
6d9f239a 1566 .proc_handler = proc_dointvec,
1da177e4
LT
1567 },
1568 {
1da177e4 1569 .procname = "drop_entry",
1da177e4
LT
1570 .maxlen = sizeof(int),
1571 .mode = 0644,
6d9f239a 1572 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1573 },
1574 {
1da177e4 1575 .procname = "drop_packet",
1da177e4
LT
1576 .maxlen = sizeof(int),
1577 .mode = 0644,
6d9f239a 1578 .proc_handler = proc_do_defense_mode,
1da177e4 1579 },
f4bc17cd
JA
1580#ifdef CONFIG_IP_VS_NFCT
1581 {
1582 .procname = "conntrack",
f4bc17cd
JA
1583 .maxlen = sizeof(int),
1584 .mode = 0644,
1585 .proc_handler = &proc_dointvec,
1586 },
1587#endif
1da177e4 1588 {
1da177e4 1589 .procname = "secure_tcp",
1da177e4
LT
1590 .maxlen = sizeof(int),
1591 .mode = 0644,
6d9f239a 1592 .proc_handler = proc_do_defense_mode,
1da177e4 1593 },
8a803040
JA
1594 {
1595 .procname = "snat_reroute",
8a803040
JA
1596 .maxlen = sizeof(int),
1597 .mode = 0644,
1598 .proc_handler = &proc_dointvec,
1599 },
b880c1f0
HS
1600 {
1601 .procname = "sync_version",
b880c1f0
HS
1602 .maxlen = sizeof(int),
1603 .mode = 0644,
1604 .proc_handler = &proc_do_sync_mode,
1605 },
a0840e2e
HS
1606 {
1607 .procname = "cache_bypass",
1608 .maxlen = sizeof(int),
1609 .mode = 0644,
1610 .proc_handler = proc_dointvec,
1611 },
1612 {
1613 .procname = "expire_nodest_conn",
1614 .maxlen = sizeof(int),
1615 .mode = 0644,
1616 .proc_handler = proc_dointvec,
1617 },
1618 {
1619 .procname = "expire_quiescent_template",
1620 .maxlen = sizeof(int),
1621 .mode = 0644,
1622 .proc_handler = proc_dointvec,
1623 },
1624 {
1625 .procname = "sync_threshold",
1626 .maxlen =
1627 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1628 .mode = 0644,
1629 .proc_handler = proc_do_sync_threshold,
1630 },
1631 {
1632 .procname = "nat_icmp_send",
1633 .maxlen = sizeof(int),
1634 .mode = 0644,
1635 .proc_handler = proc_dointvec,
1636 },
1637#ifdef CONFIG_IP_VS_DEBUG
1638 {
1639 .procname = "debug_level",
1640 .data = &sysctl_ip_vs_debug_level,
1641 .maxlen = sizeof(int),
1642 .mode = 0644,
1643 .proc_handler = proc_dointvec,
1644 },
1645#endif
1da177e4
LT
1646#if 0
1647 {
1da177e4
LT
1648 .procname = "timeout_established",
1649 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1650 .maxlen = sizeof(int),
1651 .mode = 0644,
6d9f239a 1652 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1653 },
1654 {
1da177e4
LT
1655 .procname = "timeout_synsent",
1656 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1657 .maxlen = sizeof(int),
1658 .mode = 0644,
6d9f239a 1659 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1660 },
1661 {
1da177e4
LT
1662 .procname = "timeout_synrecv",
1663 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1664 .maxlen = sizeof(int),
1665 .mode = 0644,
6d9f239a 1666 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1667 },
1668 {
1da177e4
LT
1669 .procname = "timeout_finwait",
1670 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1671 .maxlen = sizeof(int),
1672 .mode = 0644,
6d9f239a 1673 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1674 },
1675 {
1da177e4
LT
1676 .procname = "timeout_timewait",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1678 .maxlen = sizeof(int),
1679 .mode = 0644,
6d9f239a 1680 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1681 },
1682 {
1da177e4
LT
1683 .procname = "timeout_close",
1684 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1685 .maxlen = sizeof(int),
1686 .mode = 0644,
6d9f239a 1687 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1688 },
1689 {
1da177e4
LT
1690 .procname = "timeout_closewait",
1691 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1692 .maxlen = sizeof(int),
1693 .mode = 0644,
6d9f239a 1694 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1695 },
1696 {
1da177e4
LT
1697 .procname = "timeout_lastack",
1698 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
6d9f239a 1701 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1702 },
1703 {
1da177e4
LT
1704 .procname = "timeout_listen",
1705 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
6d9f239a 1708 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1709 },
1710 {
1da177e4
LT
1711 .procname = "timeout_synack",
1712 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1713 .maxlen = sizeof(int),
1714 .mode = 0644,
6d9f239a 1715 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1716 },
1717 {
1da177e4
LT
1718 .procname = "timeout_udp",
1719 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
6d9f239a 1722 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1723 },
1724 {
1da177e4
LT
1725 .procname = "timeout_icmp",
1726 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1727 .maxlen = sizeof(int),
1728 .mode = 0644,
6d9f239a 1729 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1730 },
1731#endif
f8572d8f 1732 { }
1da177e4
LT
1733};
1734
5587da55 1735const struct ctl_path net_vs_ctl_path[] = {
f8572d8f
EB
1736 { .procname = "net", },
1737 { .procname = "ipv4", },
90754f8e
PE
1738 { .procname = "vs", },
1739 { }
1da177e4 1740};
90754f8e 1741EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4 1742
1da177e4
LT
1743#ifdef CONFIG_PROC_FS
1744
1745struct ip_vs_iter {
fc723250 1746 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1da177e4
LT
1747 struct list_head *table;
1748 int bucket;
1749};
1750
1751/*
1752 * Write the contents of the VS rule table to a PROCfs file.
1753 * (It is kept just for backward compatibility)
1754 */
1755static inline const char *ip_vs_fwd_name(unsigned flags)
1756{
1757 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1758 case IP_VS_CONN_F_LOCALNODE:
1759 return "Local";
1760 case IP_VS_CONN_F_TUNNEL:
1761 return "Tunnel";
1762 case IP_VS_CONN_F_DROUTE:
1763 return "Route";
1764 default:
1765 return "Masq";
1766 }
1767}
1768
1769
1770/* Get the Nth entry in the two lists */
1771static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1772{
fc723250 1773 struct net *net = seq_file_net(seq);
1da177e4
LT
1774 struct ip_vs_iter *iter = seq->private;
1775 int idx;
1776 struct ip_vs_service *svc;
1777
1778 /* look in hash by protocol */
1779 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1780 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250 1781 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1782 iter->table = ip_vs_svc_table;
1783 iter->bucket = idx;
1784 return svc;
1785 }
1786 }
1787 }
1788
1789 /* keep looking in fwmark */
1790 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1791 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250 1792 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1793 iter->table = ip_vs_svc_fwm_table;
1794 iter->bucket = idx;
1795 return svc;
1796 }
1797 }
1798 }
1799
1800 return NULL;
1801}
1802
1803static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1804__acquires(__ip_vs_svc_lock)
1da177e4
LT
1805{
1806
1807 read_lock_bh(&__ip_vs_svc_lock);
1808 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1809}
1810
1811
1812static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1813{
1814 struct list_head *e;
1815 struct ip_vs_iter *iter;
1816 struct ip_vs_service *svc;
1817
1818 ++*pos;
1819 if (v == SEQ_START_TOKEN)
1820 return ip_vs_info_array(seq,0);
1821
1822 svc = v;
1823 iter = seq->private;
1824
1825 if (iter->table == ip_vs_svc_table) {
1826 /* next service in table hashed by protocol */
1827 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1828 return list_entry(e, struct ip_vs_service, s_list);
1829
1830
1831 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1832 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1833 s_list) {
1834 return svc;
1835 }
1836 }
1837
1838 iter->table = ip_vs_svc_fwm_table;
1839 iter->bucket = -1;
1840 goto scan_fwmark;
1841 }
1842
1843 /* next service in hashed by fwmark */
1844 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1845 return list_entry(e, struct ip_vs_service, f_list);
1846
1847 scan_fwmark:
1848 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1849 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1850 f_list)
1851 return svc;
1852 }
1853
1854 return NULL;
1855}
1856
1857static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 1858__releases(__ip_vs_svc_lock)
1da177e4
LT
1859{
1860 read_unlock_bh(&__ip_vs_svc_lock);
1861}
1862
1863
1864static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1865{
1866 if (v == SEQ_START_TOKEN) {
1867 seq_printf(seq,
1868 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 1869 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
1870 seq_puts(seq,
1871 "Prot LocalAddress:Port Scheduler Flags\n");
1872 seq_puts(seq,
1873 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1874 } else {
1875 const struct ip_vs_service *svc = v;
1876 const struct ip_vs_iter *iter = seq->private;
1877 const struct ip_vs_dest *dest;
1878
667a5f18
VB
1879 if (iter->table == ip_vs_svc_table) {
1880#ifdef CONFIG_IP_VS_IPV6
1881 if (svc->af == AF_INET6)
5b095d98 1882 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 1883 ip_vs_proto_name(svc->protocol),
38ff4fa4 1884 &svc->addr.in6,
667a5f18
VB
1885 ntohs(svc->port),
1886 svc->scheduler->name);
1887 else
1888#endif
26ec037f 1889 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
1890 ip_vs_proto_name(svc->protocol),
1891 ntohl(svc->addr.ip),
1892 ntohs(svc->port),
26ec037f
NC
1893 svc->scheduler->name,
1894 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1895 } else {
26ec037f
NC
1896 seq_printf(seq, "FWM %08X %s %s",
1897 svc->fwmark, svc->scheduler->name,
1898 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1899 }
1da177e4
LT
1900
1901 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1902 seq_printf(seq, "persistent %d %08X\n",
1903 svc->timeout,
1904 ntohl(svc->netmask));
1905 else
1906 seq_putc(seq, '\n');
1907
1908 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1909#ifdef CONFIG_IP_VS_IPV6
1910 if (dest->af == AF_INET6)
1911 seq_printf(seq,
5b095d98 1912 " -> [%pI6]:%04X"
667a5f18 1913 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 1914 &dest->addr.in6,
667a5f18
VB
1915 ntohs(dest->port),
1916 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1917 atomic_read(&dest->weight),
1918 atomic_read(&dest->activeconns),
1919 atomic_read(&dest->inactconns));
1920 else
1921#endif
1922 seq_printf(seq,
1923 " -> %08X:%04X "
1924 "%-7s %-6d %-10d %-10d\n",
1925 ntohl(dest->addr.ip),
1926 ntohs(dest->port),
1927 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1928 atomic_read(&dest->weight),
1929 atomic_read(&dest->activeconns),
1930 atomic_read(&dest->inactconns));
1931
1da177e4
LT
1932 }
1933 }
1934 return 0;
1935}
1936
56b3d975 1937static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1938 .start = ip_vs_info_seq_start,
1939 .next = ip_vs_info_seq_next,
1940 .stop = ip_vs_info_seq_stop,
1941 .show = ip_vs_info_seq_show,
1942};
1943
1944static int ip_vs_info_open(struct inode *inode, struct file *file)
1945{
fc723250 1946 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
cf7732e4 1947 sizeof(struct ip_vs_iter));
1da177e4
LT
1948}
1949
9a32144e 1950static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1951 .owner = THIS_MODULE,
1952 .open = ip_vs_info_open,
1953 .read = seq_read,
1954 .llseek = seq_lseek,
1955 .release = seq_release_private,
1956};
1957
1958#endif
1959
1da177e4
LT
1960#ifdef CONFIG_PROC_FS
1961static int ip_vs_stats_show(struct seq_file *seq, void *v)
1962{
b17fc996
HS
1963 struct net *net = seq_file_single_net(seq);
1964 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
1da177e4
LT
1965
1966/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1967 seq_puts(seq,
1968 " Total Incoming Outgoing Incoming Outgoing\n");
1969 seq_printf(seq,
1970 " Conns Packets Packets Bytes Bytes\n");
1971
b17fc996
HS
1972 spin_lock_bh(&tot_stats->lock);
1973 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1974 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1975 (unsigned long long) tot_stats->ustats.inbytes,
1976 (unsigned long long) tot_stats->ustats.outbytes);
1da177e4
LT
1977
1978/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1979 seq_puts(seq,
1980 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1981 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
b17fc996
HS
1982 tot_stats->ustats.cps,
1983 tot_stats->ustats.inpps,
1984 tot_stats->ustats.outpps,
1985 tot_stats->ustats.inbps,
1986 tot_stats->ustats.outbps);
1987 spin_unlock_bh(&tot_stats->lock);
1da177e4
LT
1988
1989 return 0;
1990}
1991
1992static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1993{
fc723250 1994 return single_open_net(inode, file, ip_vs_stats_show);
1da177e4
LT
1995}
1996
9a32144e 1997static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1998 .owner = THIS_MODULE,
1999 .open = ip_vs_stats_seq_open,
2000 .read = seq_read,
2001 .llseek = seq_lseek,
2002 .release = single_release,
2003};
2004
b17fc996
HS
2005static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2006{
2007 struct net *net = seq_file_single_net(seq);
2008 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2009 int i;
2010
2011/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2012 seq_puts(seq,
2013 " Total Incoming Outgoing Incoming Outgoing\n");
2014 seq_printf(seq,
2015 "CPU Conns Packets Packets Bytes Bytes\n");
2016
2017 for_each_possible_cpu(i) {
2018 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2019 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2020 i, u->ustats.conns, u->ustats.inpkts,
2021 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2022 (__u64)u->ustats.outbytes);
2023 }
2024
2025 spin_lock_bh(&tot_stats->lock);
2026 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2027 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2028 tot_stats->ustats.outpkts,
2029 (unsigned long long) tot_stats->ustats.inbytes,
2030 (unsigned long long) tot_stats->ustats.outbytes);
2031
2032/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2033 seq_puts(seq,
2034 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2035 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2036 tot_stats->ustats.cps,
2037 tot_stats->ustats.inpps,
2038 tot_stats->ustats.outpps,
2039 tot_stats->ustats.inbps,
2040 tot_stats->ustats.outbps);
2041 spin_unlock_bh(&tot_stats->lock);
2042
2043 return 0;
2044}
2045
2046static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2047{
2048 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2049}
2050
2051static const struct file_operations ip_vs_stats_percpu_fops = {
2052 .owner = THIS_MODULE,
2053 .open = ip_vs_stats_percpu_seq_open,
2054 .read = seq_read,
2055 .llseek = seq_lseek,
2056 .release = single_release,
2057};
1da177e4
LT
2058#endif
2059
2060/*
2061 * Set timeout values for tcp tcpfin udp in the timeout_table.
2062 */
9330419d 2063static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2064{
9330419d
HS
2065 struct ip_vs_proto_data *pd;
2066
1da177e4
LT
2067 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2068 u->tcp_timeout,
2069 u->tcp_fin_timeout,
2070 u->udp_timeout);
2071
2072#ifdef CONFIG_IP_VS_PROTO_TCP
2073 if (u->tcp_timeout) {
9330419d
HS
2074 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2075 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
1da177e4
LT
2076 = u->tcp_timeout * HZ;
2077 }
2078
2079 if (u->tcp_fin_timeout) {
9330419d
HS
2080 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2081 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
1da177e4
LT
2082 = u->tcp_fin_timeout * HZ;
2083 }
2084#endif
2085
2086#ifdef CONFIG_IP_VS_PROTO_UDP
2087 if (u->udp_timeout) {
9330419d
HS
2088 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2089 pd->timeout_table[IP_VS_UDP_S_NORMAL]
1da177e4
LT
2090 = u->udp_timeout * HZ;
2091 }
2092#endif
2093 return 0;
2094}
2095
2096
2097#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2098#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2099#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2100 sizeof(struct ip_vs_dest_user))
2101#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2102#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2103#define MAX_ARG_LEN SVCDEST_ARG_LEN
2104
9b5b5cff 2105static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2106 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2107 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2108 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2109 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2110 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2111 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2112 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2113 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2114 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2115 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2116 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2117};
2118
c860c6b1
JV
2119static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2120 struct ip_vs_service_user *usvc_compat)
2121{
0d1e71b0
SH
2122 memset(usvc, 0, sizeof(*usvc));
2123
c860c6b1
JV
2124 usvc->af = AF_INET;
2125 usvc->protocol = usvc_compat->protocol;
2126 usvc->addr.ip = usvc_compat->addr;
2127 usvc->port = usvc_compat->port;
2128 usvc->fwmark = usvc_compat->fwmark;
2129
2130 /* Deep copy of sched_name is not needed here */
2131 usvc->sched_name = usvc_compat->sched_name;
2132
2133 usvc->flags = usvc_compat->flags;
2134 usvc->timeout = usvc_compat->timeout;
2135 usvc->netmask = usvc_compat->netmask;
2136}
2137
2138static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2139 struct ip_vs_dest_user *udest_compat)
2140{
0d1e71b0
SH
2141 memset(udest, 0, sizeof(*udest));
2142
c860c6b1
JV
2143 udest->addr.ip = udest_compat->addr;
2144 udest->port = udest_compat->port;
2145 udest->conn_flags = udest_compat->conn_flags;
2146 udest->weight = udest_compat->weight;
2147 udest->u_threshold = udest_compat->u_threshold;
2148 udest->l_threshold = udest_compat->l_threshold;
2149}
2150
1da177e4
LT
2151static int
2152do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2153{
fc723250 2154 struct net *net = sock_net(sk);
1da177e4
LT
2155 int ret;
2156 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2157 struct ip_vs_service_user *usvc_compat;
2158 struct ip_vs_service_user_kern usvc;
1da177e4 2159 struct ip_vs_service *svc;
c860c6b1
JV
2160 struct ip_vs_dest_user *udest_compat;
2161 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2162
2163 if (!capable(CAP_NET_ADMIN))
2164 return -EPERM;
2165
04bcef2a
AV
2166 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2167 return -EINVAL;
2168 if (len < 0 || len > MAX_ARG_LEN)
2169 return -EINVAL;
1da177e4 2170 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2171 pr_err("set_ctl: len %u != %u\n",
2172 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2173 return -EINVAL;
2174 }
2175
2176 if (copy_from_user(arg, user, len) != 0)
2177 return -EFAULT;
2178
2179 /* increase the module use count */
2180 ip_vs_use_count_inc();
2181
14cc3e2b 2182 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2183 ret = -ERESTARTSYS;
2184 goto out_dec;
2185 }
2186
2187 if (cmd == IP_VS_SO_SET_FLUSH) {
2188 /* Flush the virtual service */
fc723250 2189 ret = ip_vs_flush(net);
1da177e4
LT
2190 goto out_unlock;
2191 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2192 /* Set timeout values for (tcp tcpfin udp) */
9330419d 2193 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
1da177e4
LT
2194 goto out_unlock;
2195 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2196 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f
HS
2197 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2198 dm->syncid);
1da177e4
LT
2199 goto out_unlock;
2200 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2201 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f 2202 ret = stop_sync_thread(net, dm->state);
1da177e4
LT
2203 goto out_unlock;
2204 }
2205
c860c6b1
JV
2206 usvc_compat = (struct ip_vs_service_user *)arg;
2207 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2208
2209 /* We only use the new structs internally, so copy userspace compat
2210 * structs to extended internal versions */
2211 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2212 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2213
2214 if (cmd == IP_VS_SO_SET_ZERO) {
2215 /* if no service address is set, zero counters in all */
c860c6b1 2216 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
fc723250 2217 ret = ip_vs_zero_all(net);
1da177e4
LT
2218 goto out_unlock;
2219 }
2220 }
2221
2906f66a
VMR
2222 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2223 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2224 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2225 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2226 usvc.protocol, &usvc.addr.ip,
2227 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2228 ret = -EFAULT;
2229 goto out_unlock;
2230 }
2231
2232 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2233 if (usvc.fwmark == 0)
fc723250 2234 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
26c15cfd 2235 &usvc.addr, usvc.port);
1da177e4 2236 else
fc723250 2237 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
1da177e4
LT
2238
2239 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2240 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2241 ret = -ESRCH;
26c15cfd 2242 goto out_unlock;
1da177e4
LT
2243 }
2244
2245 switch (cmd) {
2246 case IP_VS_SO_SET_ADD:
2247 if (svc != NULL)
2248 ret = -EEXIST;
2249 else
fc723250 2250 ret = ip_vs_add_service(net, &usvc, &svc);
1da177e4
LT
2251 break;
2252 case IP_VS_SO_SET_EDIT:
c860c6b1 2253 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2254 break;
2255 case IP_VS_SO_SET_DEL:
2256 ret = ip_vs_del_service(svc);
2257 if (!ret)
2258 goto out_unlock;
2259 break;
2260 case IP_VS_SO_SET_ZERO:
2261 ret = ip_vs_zero_service(svc);
2262 break;
2263 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2264 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2265 break;
2266 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2267 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2268 break;
2269 case IP_VS_SO_SET_DELDEST:
c860c6b1 2270 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2271 break;
2272 default:
2273 ret = -EINVAL;
2274 }
2275
1da177e4 2276 out_unlock:
14cc3e2b 2277 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2278 out_dec:
2279 /* decrease the module use count */
2280 ip_vs_use_count_dec();
2281
2282 return ret;
2283}
2284
2285
2286static void
2287ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2288{
2289 spin_lock_bh(&src->lock);
e9c0ce23 2290 memcpy(dst, &src->ustats, sizeof(*dst));
1da177e4
LT
2291 spin_unlock_bh(&src->lock);
2292}
2293
2294static void
2295ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2296{
2297 dst->protocol = src->protocol;
e7ade46a 2298 dst->addr = src->addr.ip;
1da177e4
LT
2299 dst->port = src->port;
2300 dst->fwmark = src->fwmark;
4da62fc7 2301 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2302 dst->flags = src->flags;
2303 dst->timeout = src->timeout / HZ;
2304 dst->netmask = src->netmask;
2305 dst->num_dests = src->num_dests;
2306 ip_vs_copy_stats(&dst->stats, &src->stats);
2307}
2308
2309static inline int
fc723250
HS
2310__ip_vs_get_service_entries(struct net *net,
2311 const struct ip_vs_get_services *get,
1da177e4
LT
2312 struct ip_vs_get_services __user *uptr)
2313{
2314 int idx, count=0;
2315 struct ip_vs_service *svc;
2316 struct ip_vs_service_entry entry;
2317 int ret = 0;
2318
2319 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2320 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041 2321 /* Only expose IPv4 entries to old interface */
fc723250 2322 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2323 continue;
2324
1da177e4
LT
2325 if (count >= get->num_services)
2326 goto out;
4da62fc7 2327 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2328 ip_vs_copy_service(&entry, svc);
2329 if (copy_to_user(&uptr->entrytable[count],
2330 &entry, sizeof(entry))) {
2331 ret = -EFAULT;
2332 goto out;
2333 }
2334 count++;
2335 }
2336 }
2337
2338 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2339 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041 2340 /* Only expose IPv4 entries to old interface */
fc723250 2341 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2342 continue;
2343
1da177e4
LT
2344 if (count >= get->num_services)
2345 goto out;
4da62fc7 2346 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2347 ip_vs_copy_service(&entry, svc);
2348 if (copy_to_user(&uptr->entrytable[count],
2349 &entry, sizeof(entry))) {
2350 ret = -EFAULT;
2351 goto out;
2352 }
2353 count++;
2354 }
2355 }
2356 out:
2357 return ret;
2358}
2359
2360static inline int
fc723250 2361__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
1da177e4
LT
2362 struct ip_vs_get_dests __user *uptr)
2363{
2364 struct ip_vs_service *svc;
b18610de 2365 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2366 int ret = 0;
2367
2368 if (get->fwmark)
fc723250 2369 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
1da177e4 2370 else
fc723250 2371 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
26c15cfd 2372 get->port);
b18610de 2373
1da177e4
LT
2374 if (svc) {
2375 int count = 0;
2376 struct ip_vs_dest *dest;
2377 struct ip_vs_dest_entry entry;
2378
2379 list_for_each_entry(dest, &svc->destinations, n_list) {
2380 if (count >= get->num_dests)
2381 break;
2382
e7ade46a 2383 entry.addr = dest->addr.ip;
1da177e4
LT
2384 entry.port = dest->port;
2385 entry.conn_flags = atomic_read(&dest->conn_flags);
2386 entry.weight = atomic_read(&dest->weight);
2387 entry.u_threshold = dest->u_threshold;
2388 entry.l_threshold = dest->l_threshold;
2389 entry.activeconns = atomic_read(&dest->activeconns);
2390 entry.inactconns = atomic_read(&dest->inactconns);
2391 entry.persistconns = atomic_read(&dest->persistconns);
2392 ip_vs_copy_stats(&entry.stats, &dest->stats);
2393 if (copy_to_user(&uptr->entrytable[count],
2394 &entry, sizeof(entry))) {
2395 ret = -EFAULT;
2396 break;
2397 }
2398 count++;
2399 }
1da177e4
LT
2400 } else
2401 ret = -ESRCH;
2402 return ret;
2403}
2404
2405static inline void
9330419d 2406__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2407{
9330419d
HS
2408 struct ip_vs_proto_data *pd;
2409
1da177e4 2410#ifdef CONFIG_IP_VS_PROTO_TCP
9330419d
HS
2411 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2412 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2413 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
1da177e4
LT
2414#endif
2415#ifdef CONFIG_IP_VS_PROTO_UDP
9330419d 2416 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
1da177e4 2417 u->udp_timeout =
9330419d 2418 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
1da177e4
LT
2419#endif
2420}
2421
2422
2423#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2424#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2425#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2426#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2427#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2428#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2429#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2430
9b5b5cff 2431static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2432 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2433 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2434 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2435 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2436 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2437 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2438 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2439};
2440
2441static int
2442do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2443{
2444 unsigned char arg[128];
2445 int ret = 0;
04bcef2a 2446 unsigned int copylen;
fc723250 2447 struct net *net = sock_net(sk);
f131315f 2448 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2449
fc723250 2450 BUG_ON(!net);
1da177e4
LT
2451 if (!capable(CAP_NET_ADMIN))
2452 return -EPERM;
2453
04bcef2a
AV
2454 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2455 return -EINVAL;
2456
1da177e4 2457 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2458 pr_err("get_ctl: len %u < %u\n",
2459 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2460 return -EINVAL;
2461 }
2462
04bcef2a
AV
2463 copylen = get_arglen[GET_CMDID(cmd)];
2464 if (copylen > 128)
2465 return -EINVAL;
2466
2467 if (copy_from_user(arg, user, copylen) != 0)
1da177e4
LT
2468 return -EFAULT;
2469
14cc3e2b 2470 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2471 return -ERESTARTSYS;
2472
2473 switch (cmd) {
2474 case IP_VS_SO_GET_VERSION:
2475 {
2476 char buf[64];
2477
2478 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2479 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2480 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2481 ret = -EFAULT;
2482 goto out;
2483 }
2484 *len = strlen(buf)+1;
2485 }
2486 break;
2487
2488 case IP_VS_SO_GET_INFO:
2489 {
2490 struct ip_vs_getinfo info;
2491 info.version = IP_VS_VERSION_CODE;
6f7edb48 2492 info.size = ip_vs_conn_tab_size;
a0840e2e 2493 info.num_services = ipvs->num_services;
1da177e4
LT
2494 if (copy_to_user(user, &info, sizeof(info)) != 0)
2495 ret = -EFAULT;
2496 }
2497 break;
2498
2499 case IP_VS_SO_GET_SERVICES:
2500 {
2501 struct ip_vs_get_services *get;
2502 int size;
2503
2504 get = (struct ip_vs_get_services *)arg;
2505 size = sizeof(*get) +
2506 sizeof(struct ip_vs_service_entry) * get->num_services;
2507 if (*len != size) {
1e3e238e 2508 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2509 ret = -EINVAL;
2510 goto out;
2511 }
fc723250 2512 ret = __ip_vs_get_service_entries(net, get, user);
1da177e4
LT
2513 }
2514 break;
2515
2516 case IP_VS_SO_GET_SERVICE:
2517 {
2518 struct ip_vs_service_entry *entry;
2519 struct ip_vs_service *svc;
b18610de 2520 union nf_inet_addr addr;
1da177e4
LT
2521
2522 entry = (struct ip_vs_service_entry *)arg;
b18610de 2523 addr.ip = entry->addr;
1da177e4 2524 if (entry->fwmark)
fc723250 2525 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
1da177e4 2526 else
fc723250
HS
2527 svc = __ip_vs_service_find(net, AF_INET,
2528 entry->protocol, &addr,
2529 entry->port);
1da177e4
LT
2530 if (svc) {
2531 ip_vs_copy_service(entry, svc);
2532 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2533 ret = -EFAULT;
1da177e4
LT
2534 } else
2535 ret = -ESRCH;
2536 }
2537 break;
2538
2539 case IP_VS_SO_GET_DESTS:
2540 {
2541 struct ip_vs_get_dests *get;
2542 int size;
2543
2544 get = (struct ip_vs_get_dests *)arg;
2545 size = sizeof(*get) +
2546 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2547 if (*len != size) {
1e3e238e 2548 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2549 ret = -EINVAL;
2550 goto out;
2551 }
fc723250 2552 ret = __ip_vs_get_dest_entries(net, get, user);
1da177e4
LT
2553 }
2554 break;
2555
2556 case IP_VS_SO_GET_TIMEOUT:
2557 {
2558 struct ip_vs_timeout_user t;
2559
9330419d 2560 __ip_vs_get_timeouts(net, &t);
1da177e4
LT
2561 if (copy_to_user(user, &t, sizeof(t)) != 0)
2562 ret = -EFAULT;
2563 }
2564 break;
2565
2566 case IP_VS_SO_GET_DAEMON:
2567 {
2568 struct ip_vs_daemon_user d[2];
2569
2570 memset(&d, 0, sizeof(d));
f131315f 2571 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
1da177e4 2572 d[0].state = IP_VS_STATE_MASTER;
f131315f
HS
2573 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2574 sizeof(d[0].mcast_ifn));
2575 d[0].syncid = ipvs->master_syncid;
1da177e4 2576 }
f131315f 2577 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
1da177e4 2578 d[1].state = IP_VS_STATE_BACKUP;
f131315f
HS
2579 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2580 sizeof(d[1].mcast_ifn));
2581 d[1].syncid = ipvs->backup_syncid;
1da177e4
LT
2582 }
2583 if (copy_to_user(user, &d, sizeof(d)) != 0)
2584 ret = -EFAULT;
2585 }
2586 break;
2587
2588 default:
2589 ret = -EINVAL;
2590 }
2591
2592 out:
14cc3e2b 2593 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2594 return ret;
2595}
2596
2597
2598static struct nf_sockopt_ops ip_vs_sockopts = {
2599 .pf = PF_INET,
2600 .set_optmin = IP_VS_BASE_CTL,
2601 .set_optmax = IP_VS_SO_SET_MAX+1,
2602 .set = do_ip_vs_set_ctl,
2603 .get_optmin = IP_VS_BASE_CTL,
2604 .get_optmax = IP_VS_SO_GET_MAX+1,
2605 .get = do_ip_vs_get_ctl,
16fcec35 2606 .owner = THIS_MODULE,
1da177e4
LT
2607};
2608
9a812198
JV
2609/*
2610 * Generic Netlink interface
2611 */
2612
2613/* IPVS genetlink family */
2614static struct genl_family ip_vs_genl_family = {
2615 .id = GENL_ID_GENERATE,
2616 .hdrsize = 0,
2617 .name = IPVS_GENL_NAME,
2618 .version = IPVS_GENL_VERSION,
2619 .maxattr = IPVS_CMD_MAX,
c6d2d445 2620 .netnsok = true, /* Make ipvsadm to work on netns */
9a812198
JV
2621};
2622
2623/* Policy used for first-level command attributes */
2624static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2625 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2626 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2627 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2628 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2629 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2630 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2631};
2632
2633/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2634static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2635 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2636 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2637 .len = IP_VS_IFNAME_MAXLEN },
2638 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2639};
2640
2641/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2642static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2643 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2644 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2645 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2646 .len = sizeof(union nf_inet_addr) },
2647 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2648 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2649 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2650 .len = IP_VS_SCHEDNAME_MAXLEN },
0d1e71b0
SH
2651 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2652 .len = IP_VS_PENAME_MAXLEN },
9a812198
JV
2653 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2654 .len = sizeof(struct ip_vs_flags) },
2655 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2656 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2657 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2658};
2659
2660/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2661static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2662 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2663 .len = sizeof(union nf_inet_addr) },
2664 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2665 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2666 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2667 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2668 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2669 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2670 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2671 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2672 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2673};
2674
2675static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2676 struct ip_vs_stats *stats)
2677{
2678 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2679 if (!nl_stats)
2680 return -EMSGSIZE;
2681
2682 spin_lock_bh(&stats->lock);
2683
e9c0ce23
SW
2684 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2685 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2686 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2687 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2688 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2689 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2690 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2691 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2692 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2693 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
9a812198
JV
2694
2695 spin_unlock_bh(&stats->lock);
2696
2697 nla_nest_end(skb, nl_stats);
2698
2699 return 0;
2700
2701nla_put_failure:
2702 spin_unlock_bh(&stats->lock);
2703 nla_nest_cancel(skb, nl_stats);
2704 return -EMSGSIZE;
2705}
2706
2707static int ip_vs_genl_fill_service(struct sk_buff *skb,
2708 struct ip_vs_service *svc)
2709{
2710 struct nlattr *nl_service;
2711 struct ip_vs_flags flags = { .flags = svc->flags,
2712 .mask = ~0 };
2713
2714 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2715 if (!nl_service)
2716 return -EMSGSIZE;
2717
f94fd041 2718 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2719
2720 if (svc->fwmark) {
2721 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2722 } else {
2723 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2724 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2725 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2726 }
2727
2728 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
0d1e71b0
SH
2729 if (svc->pe)
2730 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
9a812198
JV
2731 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2732 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2733 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2734
2735 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2736 goto nla_put_failure;
2737
2738 nla_nest_end(skb, nl_service);
2739
2740 return 0;
2741
2742nla_put_failure:
2743 nla_nest_cancel(skb, nl_service);
2744 return -EMSGSIZE;
2745}
2746
2747static int ip_vs_genl_dump_service(struct sk_buff *skb,
2748 struct ip_vs_service *svc,
2749 struct netlink_callback *cb)
2750{
2751 void *hdr;
2752
2753 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2754 &ip_vs_genl_family, NLM_F_MULTI,
2755 IPVS_CMD_NEW_SERVICE);
2756 if (!hdr)
2757 return -EMSGSIZE;
2758
2759 if (ip_vs_genl_fill_service(skb, svc) < 0)
2760 goto nla_put_failure;
2761
2762 return genlmsg_end(skb, hdr);
2763
2764nla_put_failure:
2765 genlmsg_cancel(skb, hdr);
2766 return -EMSGSIZE;
2767}
2768
2769static int ip_vs_genl_dump_services(struct sk_buff *skb,
2770 struct netlink_callback *cb)
2771{
2772 int idx = 0, i;
2773 int start = cb->args[0];
2774 struct ip_vs_service *svc;
fc723250 2775 struct net *net = skb_sknet(skb);
9a812198
JV
2776
2777 mutex_lock(&__ip_vs_mutex);
2778 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2779 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
fc723250 2780 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2781 continue;
2782 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2783 idx--;
2784 goto nla_put_failure;
2785 }
2786 }
2787 }
2788
2789 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2790 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
fc723250 2791 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2792 continue;
2793 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2794 idx--;
2795 goto nla_put_failure;
2796 }
2797 }
2798 }
2799
2800nla_put_failure:
2801 mutex_unlock(&__ip_vs_mutex);
2802 cb->args[0] = idx;
2803
2804 return skb->len;
2805}
2806
fc723250
HS
2807static int ip_vs_genl_parse_service(struct net *net,
2808 struct ip_vs_service_user_kern *usvc,
26c15cfd
JA
2809 struct nlattr *nla, int full_entry,
2810 struct ip_vs_service **ret_svc)
9a812198
JV
2811{
2812 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2813 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
26c15cfd 2814 struct ip_vs_service *svc;
9a812198
JV
2815
2816 /* Parse mandatory identifying service fields first */
2817 if (nla == NULL ||
2818 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2819 return -EINVAL;
2820
2821 nla_af = attrs[IPVS_SVC_ATTR_AF];
2822 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2823 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2824 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2825 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2826
2827 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2828 return -EINVAL;
2829
258c8893
SH
2830 memset(usvc, 0, sizeof(*usvc));
2831
c860c6b1 2832 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2833#ifdef CONFIG_IP_VS_IPV6
2834 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2835#else
2836 if (usvc->af != AF_INET)
2837#endif
9a812198
JV
2838 return -EAFNOSUPPORT;
2839
2840 if (nla_fwmark) {
2841 usvc->protocol = IPPROTO_TCP;
2842 usvc->fwmark = nla_get_u32(nla_fwmark);
2843 } else {
2844 usvc->protocol = nla_get_u16(nla_protocol);
2845 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2846 usvc->port = nla_get_u16(nla_port);
2847 usvc->fwmark = 0;
2848 }
2849
26c15cfd 2850 if (usvc->fwmark)
fc723250 2851 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
26c15cfd 2852 else
fc723250 2853 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
26c15cfd
JA
2854 &usvc->addr, usvc->port);
2855 *ret_svc = svc;
2856
9a812198
JV
2857 /* If a full entry was requested, check for the additional fields */
2858 if (full_entry) {
0d1e71b0 2859 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
9a812198
JV
2860 *nla_netmask;
2861 struct ip_vs_flags flags;
9a812198
JV
2862
2863 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
0d1e71b0 2864 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
9a812198
JV
2865 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2866 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2867 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2868
2869 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2870 return -EINVAL;
2871
2872 nla_memcpy(&flags, nla_flags, sizeof(flags));
2873
2874 /* prefill flags from service if it already exists */
26c15cfd 2875 if (svc)
9a812198 2876 usvc->flags = svc->flags;
9a812198
JV
2877
2878 /* set new flags from userland */
2879 usvc->flags = (usvc->flags & ~flags.mask) |
2880 (flags.flags & flags.mask);
c860c6b1 2881 usvc->sched_name = nla_data(nla_sched);
0d1e71b0 2882 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
9a812198
JV
2883 usvc->timeout = nla_get_u32(nla_timeout);
2884 usvc->netmask = nla_get_u32(nla_netmask);
2885 }
2886
2887 return 0;
2888}
2889
fc723250
HS
2890static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2891 struct nlattr *nla)
9a812198 2892{
c860c6b1 2893 struct ip_vs_service_user_kern usvc;
26c15cfd 2894 struct ip_vs_service *svc;
9a812198
JV
2895 int ret;
2896
fc723250 2897 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
26c15cfd 2898 return ret ? ERR_PTR(ret) : svc;
9a812198
JV
2899}
2900
2901static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2902{
2903 struct nlattr *nl_dest;
2904
2905 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2906 if (!nl_dest)
2907 return -EMSGSIZE;
2908
2909 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2910 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2911
2912 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2913 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2914 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2915 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2916 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2917 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2918 atomic_read(&dest->activeconns));
2919 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2920 atomic_read(&dest->inactconns));
2921 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2922 atomic_read(&dest->persistconns));
2923
2924 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2925 goto nla_put_failure;
2926
2927 nla_nest_end(skb, nl_dest);
2928
2929 return 0;
2930
2931nla_put_failure:
2932 nla_nest_cancel(skb, nl_dest);
2933 return -EMSGSIZE;
2934}
2935
2936static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2937 struct netlink_callback *cb)
2938{
2939 void *hdr;
2940
2941 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2942 &ip_vs_genl_family, NLM_F_MULTI,
2943 IPVS_CMD_NEW_DEST);
2944 if (!hdr)
2945 return -EMSGSIZE;
2946
2947 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2948 goto nla_put_failure;
2949
2950 return genlmsg_end(skb, hdr);
2951
2952nla_put_failure:
2953 genlmsg_cancel(skb, hdr);
2954 return -EMSGSIZE;
2955}
2956
2957static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2958 struct netlink_callback *cb)
2959{
2960 int idx = 0;
2961 int start = cb->args[0];
2962 struct ip_vs_service *svc;
2963 struct ip_vs_dest *dest;
2964 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
a0840e2e 2965 struct net *net = skb_sknet(skb);
9a812198
JV
2966
2967 mutex_lock(&__ip_vs_mutex);
2968
2969 /* Try to find the service for which to dump destinations */
2970 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2971 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2972 goto out_err;
2973
a0840e2e 2974
fc723250 2975 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
2976 if (IS_ERR(svc) || svc == NULL)
2977 goto out_err;
2978
2979 /* Dump the destinations */
2980 list_for_each_entry(dest, &svc->destinations, n_list) {
2981 if (++idx <= start)
2982 continue;
2983 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2984 idx--;
2985 goto nla_put_failure;
2986 }
2987 }
2988
2989nla_put_failure:
2990 cb->args[0] = idx;
9a812198
JV
2991
2992out_err:
2993 mutex_unlock(&__ip_vs_mutex);
2994
2995 return skb->len;
2996}
2997
c860c6b1 2998static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2999 struct nlattr *nla, int full_entry)
3000{
3001 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3002 struct nlattr *nla_addr, *nla_port;
3003
3004 /* Parse mandatory identifying destination fields first */
3005 if (nla == NULL ||
3006 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3007 return -EINVAL;
3008
3009 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3010 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3011
3012 if (!(nla_addr && nla_port))
3013 return -EINVAL;
3014
258c8893
SH
3015 memset(udest, 0, sizeof(*udest));
3016
9a812198
JV
3017 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3018 udest->port = nla_get_u16(nla_port);
3019
3020 /* If a full entry was requested, check for the additional fields */
3021 if (full_entry) {
3022 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3023 *nla_l_thresh;
3024
3025 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3026 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3027 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3028 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3029
3030 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3031 return -EINVAL;
3032
3033 udest->conn_flags = nla_get_u32(nla_fwd)
3034 & IP_VS_CONN_F_FWD_MASK;
3035 udest->weight = nla_get_u32(nla_weight);
3036 udest->u_threshold = nla_get_u32(nla_u_thresh);
3037 udest->l_threshold = nla_get_u32(nla_l_thresh);
3038 }
3039
3040 return 0;
3041}
3042
3043static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3044 const char *mcast_ifn, __be32 syncid)
3045{
3046 struct nlattr *nl_daemon;
3047
3048 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3049 if (!nl_daemon)
3050 return -EMSGSIZE;
3051
3052 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3053 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3054 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3055
3056 nla_nest_end(skb, nl_daemon);
3057
3058 return 0;
3059
3060nla_put_failure:
3061 nla_nest_cancel(skb, nl_daemon);
3062 return -EMSGSIZE;
3063}
3064
3065static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3066 const char *mcast_ifn, __be32 syncid,
3067 struct netlink_callback *cb)
3068{
3069 void *hdr;
3070 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3071 &ip_vs_genl_family, NLM_F_MULTI,
3072 IPVS_CMD_NEW_DAEMON);
3073 if (!hdr)
3074 return -EMSGSIZE;
3075
3076 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3077 goto nla_put_failure;
3078
3079 return genlmsg_end(skb, hdr);
3080
3081nla_put_failure:
3082 genlmsg_cancel(skb, hdr);
3083 return -EMSGSIZE;
3084}
3085
3086static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3087 struct netlink_callback *cb)
3088{
f131315f
HS
3089 struct net *net = skb_net(skb);
3090 struct netns_ipvs *ipvs = net_ipvs(net);
3091
9a812198 3092 mutex_lock(&__ip_vs_mutex);
f131315f 3093 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
9a812198 3094 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
f131315f
HS
3095 ipvs->master_mcast_ifn,
3096 ipvs->master_syncid, cb) < 0)
9a812198
JV
3097 goto nla_put_failure;
3098
3099 cb->args[0] = 1;
3100 }
3101
f131315f 3102 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
9a812198 3103 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
f131315f
HS
3104 ipvs->backup_mcast_ifn,
3105 ipvs->backup_syncid, cb) < 0)
9a812198
JV
3106 goto nla_put_failure;
3107
3108 cb->args[1] = 1;
3109 }
3110
3111nla_put_failure:
3112 mutex_unlock(&__ip_vs_mutex);
3113
3114 return skb->len;
3115}
3116
f131315f 3117static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3118{
3119 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3120 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3121 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3122 return -EINVAL;
3123
f131315f
HS
3124 return start_sync_thread(net,
3125 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
9a812198
JV
3126 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3127 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3128}
3129
f131315f 3130static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3131{
3132 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3133 return -EINVAL;
3134
f131315f
HS
3135 return stop_sync_thread(net,
3136 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
9a812198
JV
3137}
3138
9330419d 3139static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
9a812198
JV
3140{
3141 struct ip_vs_timeout_user t;
3142
9330419d 3143 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3144
3145 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3146 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3147
3148 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3149 t.tcp_fin_timeout =
3150 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3151
3152 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3153 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3154
9330419d 3155 return ip_vs_set_timeout(net, &t);
9a812198
JV
3156}
3157
3158static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3159{
3160 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3161 struct ip_vs_service_user_kern usvc;
3162 struct ip_vs_dest_user_kern udest;
9a812198
JV
3163 int ret = 0, cmd;
3164 int need_full_svc = 0, need_full_dest = 0;
fc723250 3165 struct net *net;
a0840e2e 3166 struct netns_ipvs *ipvs;
9a812198 3167
fc723250 3168 net = skb_sknet(skb);
a0840e2e 3169 ipvs = net_ipvs(net);
9a812198
JV
3170 cmd = info->genlhdr->cmd;
3171
3172 mutex_lock(&__ip_vs_mutex);
3173
3174 if (cmd == IPVS_CMD_FLUSH) {
fc723250 3175 ret = ip_vs_flush(net);
9a812198
JV
3176 goto out;
3177 } else if (cmd == IPVS_CMD_SET_CONFIG) {
9330419d 3178 ret = ip_vs_genl_set_config(net, info->attrs);
9a812198
JV
3179 goto out;
3180 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3181 cmd == IPVS_CMD_DEL_DAEMON) {
3182
3183 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3184
3185 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3186 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3187 info->attrs[IPVS_CMD_ATTR_DAEMON],
3188 ip_vs_daemon_policy)) {
3189 ret = -EINVAL;
3190 goto out;
3191 }
3192
3193 if (cmd == IPVS_CMD_NEW_DAEMON)
f131315f 3194 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
9a812198 3195 else
f131315f 3196 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
9a812198
JV
3197 goto out;
3198 } else if (cmd == IPVS_CMD_ZERO &&
3199 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
fc723250 3200 ret = ip_vs_zero_all(net);
9a812198
JV
3201 goto out;
3202 }
3203
3204 /* All following commands require a service argument, so check if we
3205 * received a valid one. We need a full service specification when
3206 * adding / editing a service. Only identifying members otherwise. */
3207 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3208 need_full_svc = 1;
3209
fc723250 3210 ret = ip_vs_genl_parse_service(net, &usvc,
9a812198 3211 info->attrs[IPVS_CMD_ATTR_SERVICE],
26c15cfd 3212 need_full_svc, &svc);
9a812198
JV
3213 if (ret)
3214 goto out;
3215
9a812198
JV
3216 /* Unless we're adding a new service, the service must already exist */
3217 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3218 ret = -ESRCH;
3219 goto out;
3220 }
3221
3222 /* Destination commands require a valid destination argument. For
3223 * adding / editing a destination, we need a full destination
3224 * specification. */
3225 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3226 cmd == IPVS_CMD_DEL_DEST) {
3227 if (cmd != IPVS_CMD_DEL_DEST)
3228 need_full_dest = 1;
3229
3230 ret = ip_vs_genl_parse_dest(&udest,
3231 info->attrs[IPVS_CMD_ATTR_DEST],
3232 need_full_dest);
3233 if (ret)
3234 goto out;
3235 }
3236
3237 switch (cmd) {
3238 case IPVS_CMD_NEW_SERVICE:
3239 if (svc == NULL)
fc723250 3240 ret = ip_vs_add_service(net, &usvc, &svc);
9a812198
JV
3241 else
3242 ret = -EEXIST;
3243 break;
3244 case IPVS_CMD_SET_SERVICE:
3245 ret = ip_vs_edit_service(svc, &usvc);
3246 break;
3247 case IPVS_CMD_DEL_SERVICE:
3248 ret = ip_vs_del_service(svc);
26c15cfd 3249 /* do not use svc, it can be freed */
9a812198
JV
3250 break;
3251 case IPVS_CMD_NEW_DEST:
3252 ret = ip_vs_add_dest(svc, &udest);
3253 break;
3254 case IPVS_CMD_SET_DEST:
3255 ret = ip_vs_edit_dest(svc, &udest);
3256 break;
3257 case IPVS_CMD_DEL_DEST:
3258 ret = ip_vs_del_dest(svc, &udest);
3259 break;
3260 case IPVS_CMD_ZERO:
3261 ret = ip_vs_zero_service(svc);
3262 break;
3263 default:
3264 ret = -EINVAL;
3265 }
3266
3267out:
9a812198
JV
3268 mutex_unlock(&__ip_vs_mutex);
3269
3270 return ret;
3271}
3272
3273static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3274{
3275 struct sk_buff *msg;
3276 void *reply;
3277 int ret, cmd, reply_cmd;
fc723250 3278 struct net *net;
a0840e2e 3279 struct netns_ipvs *ipvs;
9a812198 3280
fc723250 3281 net = skb_sknet(skb);
a0840e2e 3282 ipvs = net_ipvs(net);
9a812198
JV
3283 cmd = info->genlhdr->cmd;
3284
3285 if (cmd == IPVS_CMD_GET_SERVICE)
3286 reply_cmd = IPVS_CMD_NEW_SERVICE;
3287 else if (cmd == IPVS_CMD_GET_INFO)
3288 reply_cmd = IPVS_CMD_SET_INFO;
3289 else if (cmd == IPVS_CMD_GET_CONFIG)
3290 reply_cmd = IPVS_CMD_SET_CONFIG;
3291 else {
1e3e238e 3292 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3293 return -EINVAL;
3294 }
3295
3296 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3297 if (!msg)
3298 return -ENOMEM;
3299
3300 mutex_lock(&__ip_vs_mutex);
3301
3302 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3303 if (reply == NULL)
3304 goto nla_put_failure;
3305
3306 switch (cmd) {
3307 case IPVS_CMD_GET_SERVICE:
3308 {
3309 struct ip_vs_service *svc;
3310
fc723250
HS
3311 svc = ip_vs_genl_find_service(net,
3312 info->attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3313 if (IS_ERR(svc)) {
3314 ret = PTR_ERR(svc);
3315 goto out_err;
3316 } else if (svc) {
3317 ret = ip_vs_genl_fill_service(msg, svc);
9a812198
JV
3318 if (ret)
3319 goto nla_put_failure;
3320 } else {
3321 ret = -ESRCH;
3322 goto out_err;
3323 }
3324
3325 break;
3326 }
3327
3328 case IPVS_CMD_GET_CONFIG:
3329 {
3330 struct ip_vs_timeout_user t;
3331
9330419d 3332 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3333#ifdef CONFIG_IP_VS_PROTO_TCP
3334 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3335 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3336 t.tcp_fin_timeout);
3337#endif
3338#ifdef CONFIG_IP_VS_PROTO_UDP
3339 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3340#endif
3341
3342 break;
3343 }
3344
3345 case IPVS_CMD_GET_INFO:
3346 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3347 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
6f7edb48 3348 ip_vs_conn_tab_size);
9a812198
JV
3349 break;
3350 }
3351
3352 genlmsg_end(msg, reply);
134e6375 3353 ret = genlmsg_reply(msg, info);
9a812198
JV
3354 goto out;
3355
3356nla_put_failure:
1e3e238e 3357 pr_err("not enough space in Netlink message\n");
9a812198
JV
3358 ret = -EMSGSIZE;
3359
3360out_err:
3361 nlmsg_free(msg);
3362out:
3363 mutex_unlock(&__ip_vs_mutex);
3364
3365 return ret;
3366}
3367
3368
3369static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3370 {
3371 .cmd = IPVS_CMD_NEW_SERVICE,
3372 .flags = GENL_ADMIN_PERM,
3373 .policy = ip_vs_cmd_policy,
3374 .doit = ip_vs_genl_set_cmd,
3375 },
3376 {
3377 .cmd = IPVS_CMD_SET_SERVICE,
3378 .flags = GENL_ADMIN_PERM,
3379 .policy = ip_vs_cmd_policy,
3380 .doit = ip_vs_genl_set_cmd,
3381 },
3382 {
3383 .cmd = IPVS_CMD_DEL_SERVICE,
3384 .flags = GENL_ADMIN_PERM,
3385 .policy = ip_vs_cmd_policy,
3386 .doit = ip_vs_genl_set_cmd,
3387 },
3388 {
3389 .cmd = IPVS_CMD_GET_SERVICE,
3390 .flags = GENL_ADMIN_PERM,
3391 .doit = ip_vs_genl_get_cmd,
3392 .dumpit = ip_vs_genl_dump_services,
3393 .policy = ip_vs_cmd_policy,
3394 },
3395 {
3396 .cmd = IPVS_CMD_NEW_DEST,
3397 .flags = GENL_ADMIN_PERM,
3398 .policy = ip_vs_cmd_policy,
3399 .doit = ip_vs_genl_set_cmd,
3400 },
3401 {
3402 .cmd = IPVS_CMD_SET_DEST,
3403 .flags = GENL_ADMIN_PERM,
3404 .policy = ip_vs_cmd_policy,
3405 .doit = ip_vs_genl_set_cmd,
3406 },
3407 {
3408 .cmd = IPVS_CMD_DEL_DEST,
3409 .flags = GENL_ADMIN_PERM,
3410 .policy = ip_vs_cmd_policy,
3411 .doit = ip_vs_genl_set_cmd,
3412 },
3413 {
3414 .cmd = IPVS_CMD_GET_DEST,
3415 .flags = GENL_ADMIN_PERM,
3416 .policy = ip_vs_cmd_policy,
3417 .dumpit = ip_vs_genl_dump_dests,
3418 },
3419 {
3420 .cmd = IPVS_CMD_NEW_DAEMON,
3421 .flags = GENL_ADMIN_PERM,
3422 .policy = ip_vs_cmd_policy,
3423 .doit = ip_vs_genl_set_cmd,
3424 },
3425 {
3426 .cmd = IPVS_CMD_DEL_DAEMON,
3427 .flags = GENL_ADMIN_PERM,
3428 .policy = ip_vs_cmd_policy,
3429 .doit = ip_vs_genl_set_cmd,
3430 },
3431 {
3432 .cmd = IPVS_CMD_GET_DAEMON,
3433 .flags = GENL_ADMIN_PERM,
3434 .dumpit = ip_vs_genl_dump_daemons,
3435 },
3436 {
3437 .cmd = IPVS_CMD_SET_CONFIG,
3438 .flags = GENL_ADMIN_PERM,
3439 .policy = ip_vs_cmd_policy,
3440 .doit = ip_vs_genl_set_cmd,
3441 },
3442 {
3443 .cmd = IPVS_CMD_GET_CONFIG,
3444 .flags = GENL_ADMIN_PERM,
3445 .doit = ip_vs_genl_get_cmd,
3446 },
3447 {
3448 .cmd = IPVS_CMD_GET_INFO,
3449 .flags = GENL_ADMIN_PERM,
3450 .doit = ip_vs_genl_get_cmd,
3451 },
3452 {
3453 .cmd = IPVS_CMD_ZERO,
3454 .flags = GENL_ADMIN_PERM,
3455 .policy = ip_vs_cmd_policy,
3456 .doit = ip_vs_genl_set_cmd,
3457 },
3458 {
3459 .cmd = IPVS_CMD_FLUSH,
3460 .flags = GENL_ADMIN_PERM,
3461 .doit = ip_vs_genl_set_cmd,
3462 },
3463};
3464
3465static int __init ip_vs_genl_register(void)
3466{
8f698d54
MM
3467 return genl_register_family_with_ops(&ip_vs_genl_family,
3468 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3469}
3470
3471static void ip_vs_genl_unregister(void)
3472{
3473 genl_unregister_family(&ip_vs_genl_family);
3474}
3475
3476/* End of Generic Netlink interface definitions */
3477
61b1ab45
HS
3478/*
3479 * per netns intit/exit func.
3480 */
3481int __net_init __ip_vs_control_init(struct net *net)
3482{
fc723250
HS
3483 int idx;
3484 struct netns_ipvs *ipvs = net_ipvs(net);
a0840e2e 3485 struct ctl_table *tbl;
fc723250 3486
a0840e2e
HS
3487 atomic_set(&ipvs->dropentry, 0);
3488 spin_lock_init(&ipvs->dropentry_lock);
3489 spin_lock_init(&ipvs->droppacket_lock);
3490 spin_lock_init(&ipvs->securetcp_lock);
3491 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3492
3493 /* Initialize rs_table */
3494 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3495 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3496
f2431e6e 3497 INIT_LIST_HEAD(&ipvs->dest_trash);
763f8d0e
HS
3498 atomic_set(&ipvs->ftpsvc_counter, 0);
3499 atomic_set(&ipvs->nullsvc_counter, 0);
f2431e6e 3500
b17fc996
HS
3501 /* procfs stats */
3502 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3503 if (ipvs->tot_stats == NULL) {
3504 pr_err("%s(): no memory.\n", __func__);
3505 return -ENOMEM;
3506 }
3507 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3508 if (!ipvs->cpustats) {
3509 pr_err("%s() alloc_percpu failed\n", __func__);
3510 goto err_alloc;
3511 }
3512 spin_lock_init(&ipvs->tot_stats->lock);
61b1ab45 3513
fc723250
HS
3514 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3515 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3516
61b1ab45
HS
3517 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3518 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
b17fc996
HS
3519 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3520 &ip_vs_stats_percpu_fops);
a0840e2e
HS
3521
3522 if (!net_eq(net, &init_net)) {
3523 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3524 if (tbl == NULL)
3525 goto err_dup;
3526 } else
3527 tbl = vs_vars;
3528 /* Initialize sysctl defaults */
3529 idx = 0;
3530 ipvs->sysctl_amemthresh = 1024;
3531 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3532 ipvs->sysctl_am_droprate = 10;
3533 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3534 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3535 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3536#ifdef CONFIG_IP_VS_NFCT
3537 tbl[idx++].data = &ipvs->sysctl_conntrack;
3538#endif
3539 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3540 ipvs->sysctl_snat_reroute = 1;
3541 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3542 ipvs->sysctl_sync_ver = 1;
3543 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3544 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3545 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3546 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3547 ipvs->sysctl_sync_threshold[0] = 3;
3548 ipvs->sysctl_sync_threshold[1] = 50;
3549 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3550 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3551 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3552
3553
3554 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
61b1ab45 3555 vs_vars);
a0840e2e 3556 if (ipvs->sysctl_hdr == NULL)
61b1ab45 3557 goto err_reg;
b17fc996 3558 ip_vs_new_estimator(net, ipvs->tot_stats);
a0840e2e 3559 ipvs->sysctl_tbl = tbl;
f6340ee0
HS
3560 /* Schedule defense work */
3561 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3562 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
61b1ab45
HS
3563 return 0;
3564
3565err_reg:
a0840e2e
HS
3566 if (!net_eq(net, &init_net))
3567 kfree(tbl);
3568err_dup:
b17fc996
HS
3569 free_percpu(ipvs->cpustats);
3570err_alloc:
3571 kfree(ipvs->tot_stats);
61b1ab45
HS
3572 return -ENOMEM;
3573}
3574
3575static void __net_exit __ip_vs_control_cleanup(struct net *net)
3576{
b17fc996
HS
3577 struct netns_ipvs *ipvs = net_ipvs(net);
3578
f2431e6e 3579 ip_vs_trash_cleanup(net);
b17fc996 3580 ip_vs_kill_estimator(net, ipvs->tot_stats);
f2431e6e
HS
3581 cancel_delayed_work_sync(&ipvs->defense_work);
3582 cancel_work_sync(&ipvs->defense_work.work);
a0840e2e 3583 unregister_net_sysctl_table(ipvs->sysctl_hdr);
b17fc996 3584 proc_net_remove(net, "ip_vs_stats_percpu");
61b1ab45
HS
3585 proc_net_remove(net, "ip_vs_stats");
3586 proc_net_remove(net, "ip_vs");
b17fc996
HS
3587 free_percpu(ipvs->cpustats);
3588 kfree(ipvs->tot_stats);
61b1ab45
HS
3589}
3590
3591static struct pernet_operations ipvs_control_ops = {
3592 .init = __ip_vs_control_init,
3593 .exit = __ip_vs_control_cleanup,
3594};
1da177e4 3595
048cf48b 3596int __init ip_vs_control_init(void)
1da177e4 3597{
1da177e4 3598 int idx;
fc723250 3599 int ret;
1da177e4
LT
3600
3601 EnterFunction(2);
3602
fc723250 3603 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
d86bef73
EB
3604 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3605 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3606 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3607 }
fc723250
HS
3608
3609 ret = register_pernet_subsys(&ipvs_control_ops);
3610 if (ret) {
3611 pr_err("cannot register namespace.\n");
3612 goto err;
d86bef73 3613 }
fc723250
HS
3614
3615 smp_wmb(); /* Do we really need it now ? */
d86bef73 3616
1da177e4
LT
3617 ret = nf_register_sockopt(&ip_vs_sockopts);
3618 if (ret) {
1e3e238e 3619 pr_err("cannot register sockopt.\n");
fc723250 3620 goto err_net;
1da177e4
LT
3621 }
3622
9a812198
JV
3623 ret = ip_vs_genl_register();
3624 if (ret) {
1e3e238e 3625 pr_err("cannot register Generic Netlink interface.\n");
9a812198 3626 nf_unregister_sockopt(&ip_vs_sockopts);
fc723250 3627 goto err_net;
9a812198
JV
3628 }
3629
1da177e4
LT
3630 LeaveFunction(2);
3631 return 0;
fc723250
HS
3632
3633err_net:
3634 unregister_pernet_subsys(&ipvs_control_ops);
3635err:
3636 return ret;
1da177e4
LT
3637}
3638
3639
3640void ip_vs_control_cleanup(void)
3641{
3642 EnterFunction(2);
61b1ab45 3643 unregister_pernet_subsys(&ipvs_control_ops);
9a812198 3644 ip_vs_genl_unregister();
1da177e4
LT
3645 nf_unregister_sockopt(&ip_vs_sockopts);
3646 LeaveFunction(2);
3647}
This page took 2.057159 seconds and 5 git commands to generate.