ipvs: consolidate all dst checks on transmit in one place
[deliverable/linux.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
9330419d 41#include <linux/nsproxy.h>
1da177e4 42#include <net/ip.h>
09571c7a
VB
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
14c85021 47#include <net/route.h>
1da177e4 48#include <net/sock.h>
9a812198 49#include <net/genetlink.h>
1da177e4
LT
50
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 56static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
1da177e4 61/* sysctl variables */
1da177e4
LT
62
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
7a4f0761
HS
72
73/* Protos */
74static void __ip_vs_del_service(struct ip_vs_service *svc);
75
76
09571c7a
VB
77#ifdef CONFIG_IP_VS_IPV6
78/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
c24584c0
ED
79static bool __ip_vs_addr_is_local_v6(struct net *net,
80 const struct in6_addr *addr)
09571c7a 81{
4c9483b2
DM
82 struct flowi6 fl6 = {
83 .daddr = *addr,
09571c7a 84 };
c24584c0
ED
85 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
86 bool is_local;
09571c7a 87
c24584c0 88 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
09571c7a 89
c24584c0
ED
90 dst_release(dst);
91 return is_local;
09571c7a
VB
92}
93#endif
14e40546
SH
94
95#ifdef CONFIG_SYSCTL
1da177e4 96/*
af9debd4
JA
97 * update_defense_level is called from keventd and from sysctl,
98 * so it needs to protect itself from softirqs
1da177e4 99 */
9330419d 100static void update_defense_level(struct netns_ipvs *ipvs)
1da177e4
LT
101{
102 struct sysinfo i;
103 static int old_secure_tcp = 0;
104 int availmem;
105 int nomem;
106 int to_change = -1;
107
108 /* we only count free and buffered memory (in pages) */
109 si_meminfo(&i);
110 availmem = i.freeram + i.bufferram;
111 /* however in linux 2.5 the i.bufferram is total page cache size,
112 we need adjust it */
113 /* si_swapinfo(&i); */
114 /* availmem = availmem - (i.totalswap - i.freeswap); */
115
a0840e2e 116 nomem = (availmem < ipvs->sysctl_amemthresh);
1da177e4 117
af9debd4
JA
118 local_bh_disable();
119
1da177e4 120 /* drop_entry */
a0840e2e
HS
121 spin_lock(&ipvs->dropentry_lock);
122 switch (ipvs->sysctl_drop_entry) {
1da177e4 123 case 0:
a0840e2e 124 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
125 break;
126 case 1:
127 if (nomem) {
a0840e2e
HS
128 atomic_set(&ipvs->dropentry, 1);
129 ipvs->sysctl_drop_entry = 2;
1da177e4 130 } else {
a0840e2e 131 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
132 }
133 break;
134 case 2:
135 if (nomem) {
a0840e2e 136 atomic_set(&ipvs->dropentry, 1);
1da177e4 137 } else {
a0840e2e
HS
138 atomic_set(&ipvs->dropentry, 0);
139 ipvs->sysctl_drop_entry = 1;
1da177e4
LT
140 };
141 break;
142 case 3:
a0840e2e 143 atomic_set(&ipvs->dropentry, 1);
1da177e4
LT
144 break;
145 }
a0840e2e 146 spin_unlock(&ipvs->dropentry_lock);
1da177e4
LT
147
148 /* drop_packet */
a0840e2e
HS
149 spin_lock(&ipvs->droppacket_lock);
150 switch (ipvs->sysctl_drop_packet) {
1da177e4 151 case 0:
a0840e2e 152 ipvs->drop_rate = 0;
1da177e4
LT
153 break;
154 case 1:
155 if (nomem) {
a0840e2e
HS
156 ipvs->drop_rate = ipvs->drop_counter
157 = ipvs->sysctl_amemthresh /
158 (ipvs->sysctl_amemthresh-availmem);
159 ipvs->sysctl_drop_packet = 2;
1da177e4 160 } else {
a0840e2e 161 ipvs->drop_rate = 0;
1da177e4
LT
162 }
163 break;
164 case 2:
165 if (nomem) {
a0840e2e
HS
166 ipvs->drop_rate = ipvs->drop_counter
167 = ipvs->sysctl_amemthresh /
168 (ipvs->sysctl_amemthresh-availmem);
1da177e4 169 } else {
a0840e2e
HS
170 ipvs->drop_rate = 0;
171 ipvs->sysctl_drop_packet = 1;
1da177e4
LT
172 }
173 break;
174 case 3:
a0840e2e 175 ipvs->drop_rate = ipvs->sysctl_am_droprate;
1da177e4
LT
176 break;
177 }
a0840e2e 178 spin_unlock(&ipvs->droppacket_lock);
1da177e4
LT
179
180 /* secure_tcp */
a0840e2e
HS
181 spin_lock(&ipvs->securetcp_lock);
182 switch (ipvs->sysctl_secure_tcp) {
1da177e4
LT
183 case 0:
184 if (old_secure_tcp >= 2)
185 to_change = 0;
186 break;
187 case 1:
188 if (nomem) {
189 if (old_secure_tcp < 2)
190 to_change = 1;
a0840e2e 191 ipvs->sysctl_secure_tcp = 2;
1da177e4
LT
192 } else {
193 if (old_secure_tcp >= 2)
194 to_change = 0;
195 }
196 break;
197 case 2:
198 if (nomem) {
199 if (old_secure_tcp < 2)
200 to_change = 1;
201 } else {
202 if (old_secure_tcp >= 2)
203 to_change = 0;
a0840e2e 204 ipvs->sysctl_secure_tcp = 1;
1da177e4
LT
205 }
206 break;
207 case 3:
208 if (old_secure_tcp < 2)
209 to_change = 1;
210 break;
211 }
a0840e2e 212 old_secure_tcp = ipvs->sysctl_secure_tcp;
1da177e4 213 if (to_change >= 0)
9330419d 214 ip_vs_protocol_timeout_change(ipvs,
a0840e2e
HS
215 ipvs->sysctl_secure_tcp > 1);
216 spin_unlock(&ipvs->securetcp_lock);
af9debd4
JA
217
218 local_bh_enable();
1da177e4
LT
219}
220
221
222/*
223 * Timer for checking the defense
224 */
225#define DEFENSE_TIMER_PERIOD 1*HZ
1da177e4 226
c4028958 227static void defense_work_handler(struct work_struct *work)
1da177e4 228{
f6340ee0
HS
229 struct netns_ipvs *ipvs =
230 container_of(work, struct netns_ipvs, defense_work.work);
9330419d
HS
231
232 update_defense_level(ipvs);
a0840e2e 233 if (atomic_read(&ipvs->dropentry))
f6340ee0
HS
234 ip_vs_random_dropentry(ipvs->net);
235 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
1da177e4 236}
14e40546 237#endif
1da177e4
LT
238
239int
240ip_vs_use_count_inc(void)
241{
242 return try_module_get(THIS_MODULE);
243}
244
245void
246ip_vs_use_count_dec(void)
247{
248 module_put(THIS_MODULE);
249}
250
251
252/*
253 * Hash table: for virtual service lookups
254 */
255#define IP_VS_SVC_TAB_BITS 8
256#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258
259/* the service table hashed by <protocol, addr, port> */
260static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261/* the service table hashed by fwmark */
262static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263
1da177e4
LT
264
265/*
266 * Returns hash value for virtual service
267 */
95c96174
ED
268static inline unsigned int
269ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
fc723250 270 const union nf_inet_addr *addr, __be16 port)
1da177e4 271{
95c96174 272 register unsigned int porth = ntohs(port);
b18610de 273 __be32 addr_fold = addr->ip;
e9836f24 274 __u32 ahash;
1da177e4 275
b18610de
JV
276#ifdef CONFIG_IP_VS_IPV6
277 if (af == AF_INET6)
278 addr_fold = addr->ip6[0]^addr->ip6[1]^
279 addr->ip6[2]^addr->ip6[3];
280#endif
e9836f24
JA
281 ahash = ntohl(addr_fold);
282 ahash ^= ((size_t) net >> 8);
b18610de 283
e9836f24
JA
284 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
285 IP_VS_SVC_TAB_MASK;
1da177e4
LT
286}
287
288/*
289 * Returns hash value of fwmark for virtual service lookup
290 */
95c96174 291static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
1da177e4 292{
fc723250 293 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
1da177e4
LT
294}
295
296/*
fc723250 297 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
1da177e4
LT
298 * or in the ip_vs_svc_fwm_table by fwmark.
299 * Should be called with locked tables.
300 */
301static int ip_vs_svc_hash(struct ip_vs_service *svc)
302{
95c96174 303 unsigned int hash;
1da177e4
LT
304
305 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
306 pr_err("%s(): request for already hashed, called from %pF\n",
307 __func__, __builtin_return_address(0));
1da177e4
LT
308 return 0;
309 }
310
311 if (svc->fwmark == 0) {
312 /*
fc723250 313 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
1da177e4 314 */
fc723250
HS
315 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
316 &svc->addr, svc->port);
1da177e4
LT
317 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
318 } else {
319 /*
fc723250 320 * Hash it by fwmark in svc_fwm_table
1da177e4 321 */
fc723250 322 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
1da177e4
LT
323 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
324 }
325
326 svc->flags |= IP_VS_SVC_F_HASHED;
327 /* increase its refcnt because it is referenced by the svc table */
328 atomic_inc(&svc->refcnt);
329 return 1;
330}
331
332
333/*
fc723250 334 * Unhashes a service from svc_table / svc_fwm_table.
1da177e4
LT
335 * Should be called with locked tables.
336 */
337static int ip_vs_svc_unhash(struct ip_vs_service *svc)
338{
339 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
340 pr_err("%s(): request for unhash flagged, called from %pF\n",
341 __func__, __builtin_return_address(0));
1da177e4
LT
342 return 0;
343 }
344
345 if (svc->fwmark == 0) {
fc723250 346 /* Remove it from the svc_table table */
1da177e4
LT
347 list_del(&svc->s_list);
348 } else {
fc723250 349 /* Remove it from the svc_fwm_table table */
1da177e4
LT
350 list_del(&svc->f_list);
351 }
352
353 svc->flags &= ~IP_VS_SVC_F_HASHED;
354 atomic_dec(&svc->refcnt);
355 return 1;
356}
357
358
359/*
fc723250 360 * Get service by {netns, proto,addr,port} in the service table.
1da177e4 361 */
b18610de 362static inline struct ip_vs_service *
fc723250
HS
363__ip_vs_service_find(struct net *net, int af, __u16 protocol,
364 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4 365{
95c96174 366 unsigned int hash;
1da177e4
LT
367 struct ip_vs_service *svc;
368
369 /* Check for "full" addressed entries */
fc723250 370 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
1da177e4
LT
371
372 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
373 if ((svc->af == af)
374 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4 375 && (svc->port == vport)
fc723250
HS
376 && (svc->protocol == protocol)
377 && net_eq(svc->net, net)) {
1da177e4 378 /* HIT */
1da177e4
LT
379 return svc;
380 }
381 }
382
383 return NULL;
384}
385
386
387/*
388 * Get service by {fwmark} in the service table.
389 */
b18610de 390static inline struct ip_vs_service *
fc723250 391__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
1da177e4 392{
95c96174 393 unsigned int hash;
1da177e4
LT
394 struct ip_vs_service *svc;
395
396 /* Check for fwmark addressed entries */
fc723250 397 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
1da177e4
LT
398
399 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
fc723250
HS
400 if (svc->fwmark == fwmark && svc->af == af
401 && net_eq(svc->net, net)) {
1da177e4 402 /* HIT */
1da177e4
LT
403 return svc;
404 }
405 }
406
407 return NULL;
408}
409
410struct ip_vs_service *
fc723250 411ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
3c2e0505 412 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
413{
414 struct ip_vs_service *svc;
763f8d0e 415 struct netns_ipvs *ipvs = net_ipvs(net);
3c2e0505 416
1da177e4
LT
417 read_lock(&__ip_vs_svc_lock);
418
419 /*
420 * Check the table hashed by fwmark first
421 */
097fc76a
JA
422 if (fwmark) {
423 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
424 if (svc)
425 goto out;
426 }
1da177e4
LT
427
428 /*
429 * Check the table hashed by <protocol,addr,port>
430 * for "full" addressed entries
431 */
fc723250 432 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
1da177e4
LT
433
434 if (svc == NULL
435 && protocol == IPPROTO_TCP
763f8d0e 436 && atomic_read(&ipvs->ftpsvc_counter)
1da177e4
LT
437 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
438 /*
439 * Check if ftp service entry exists, the packet
440 * might belong to FTP data connections.
441 */
fc723250 442 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
1da177e4
LT
443 }
444
445 if (svc == NULL
763f8d0e 446 && atomic_read(&ipvs->nullsvc_counter)) {
1da177e4
LT
447 /*
448 * Check if the catch-all port (port zero) exists
449 */
fc723250 450 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
1da177e4
LT
451 }
452
453 out:
26c15cfd
JA
454 if (svc)
455 atomic_inc(&svc->usecnt);
1da177e4
LT
456 read_unlock(&__ip_vs_svc_lock);
457
3c2e0505
JV
458 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
459 fwmark, ip_vs_proto_name(protocol),
460 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
461 svc ? "hit" : "not hit");
1da177e4
LT
462
463 return svc;
464}
465
466
467static inline void
468__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
469{
470 atomic_inc(&svc->refcnt);
471 dest->svc = svc;
472}
473
26c15cfd 474static void
1da177e4
LT
475__ip_vs_unbind_svc(struct ip_vs_dest *dest)
476{
477 struct ip_vs_service *svc = dest->svc;
478
479 dest->svc = NULL;
26c15cfd
JA
480 if (atomic_dec_and_test(&svc->refcnt)) {
481 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
482 svc->fwmark,
483 IP_VS_DBG_ADDR(svc->af, &svc->addr),
484 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 485 free_percpu(svc->stats.cpustats);
1da177e4 486 kfree(svc);
26c15cfd 487 }
1da177e4
LT
488}
489
490
491/*
492 * Returns hash value for real service
493 */
95c96174 494static inline unsigned int ip_vs_rs_hashkey(int af,
7937df15
JV
495 const union nf_inet_addr *addr,
496 __be16 port)
1da177e4 497{
95c96174 498 register unsigned int porth = ntohs(port);
7937df15
JV
499 __be32 addr_fold = addr->ip;
500
501#ifdef CONFIG_IP_VS_IPV6
502 if (af == AF_INET6)
503 addr_fold = addr->ip6[0]^addr->ip6[1]^
504 addr->ip6[2]^addr->ip6[3];
505#endif
1da177e4 506
7937df15 507 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
508 & IP_VS_RTAB_MASK;
509}
510
511/*
fc723250 512 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
1da177e4
LT
513 * should be called with locked tables.
514 */
fc723250 515static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
1da177e4 516{
95c96174 517 unsigned int hash;
1da177e4
LT
518
519 if (!list_empty(&dest->d_list)) {
520 return 0;
521 }
522
523 /*
524 * Hash by proto,addr,port,
525 * which are the parameters of the real service.
526 */
7937df15
JV
527 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
528
fc723250 529 list_add(&dest->d_list, &ipvs->rs_table[hash]);
1da177e4
LT
530
531 return 1;
532}
533
534/*
fc723250 535 * UNhashes ip_vs_dest from rs_table.
1da177e4
LT
536 * should be called with locked tables.
537 */
538static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
539{
540 /*
fc723250 541 * Remove it from the rs_table table.
1da177e4
LT
542 */
543 if (!list_empty(&dest->d_list)) {
0edd9488 544 list_del_init(&dest->d_list);
1da177e4
LT
545 }
546
547 return 1;
548}
549
550/*
551 * Lookup real service by <proto,addr,port> in the real service table.
552 */
553struct ip_vs_dest *
fc723250 554ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
7937df15
JV
555 const union nf_inet_addr *daddr,
556 __be16 dport)
1da177e4 557{
fc723250 558 struct netns_ipvs *ipvs = net_ipvs(net);
95c96174 559 unsigned int hash;
1da177e4
LT
560 struct ip_vs_dest *dest;
561
562 /*
563 * Check for "full" addressed entries
564 * Return the first found entry
565 */
7937df15 566 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4 567
a0840e2e 568 read_lock(&ipvs->rs_lock);
fc723250 569 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
7937df15
JV
570 if ((dest->af == af)
571 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
572 && (dest->port == dport)
573 && ((dest->protocol == protocol) ||
574 dest->vfwmark)) {
575 /* HIT */
a0840e2e 576 read_unlock(&ipvs->rs_lock);
1da177e4
LT
577 return dest;
578 }
579 }
a0840e2e 580 read_unlock(&ipvs->rs_lock);
1da177e4
LT
581
582 return NULL;
583}
584
585/*
586 * Lookup destination by {addr,port} in the given service
587 */
588static struct ip_vs_dest *
7937df15
JV
589ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
590 __be16 dport)
1da177e4
LT
591{
592 struct ip_vs_dest *dest;
593
594 /*
595 * Find the destination for the given service
596 */
597 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
598 if ((dest->af == svc->af)
599 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
600 && (dest->port == dport)) {
1da177e4
LT
601 /* HIT */
602 return dest;
603 }
604 }
605
606 return NULL;
607}
608
1e356f9c
RB
609/*
610 * Find destination by {daddr,dport,vaddr,protocol}
611 * Cretaed to be used in ip_vs_process_message() in
612 * the backup synchronization daemon. It finds the
613 * destination to be bound to the received connection
614 * on the backup.
615 *
616 * ip_vs_lookup_real_service() looked promissing, but
617 * seems not working as expected.
618 */
fc723250
HS
619struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
620 const union nf_inet_addr *daddr,
7937df15
JV
621 __be16 dport,
622 const union nf_inet_addr *vaddr,
52793dbe
JA
623 __be16 vport, __u16 protocol, __u32 fwmark,
624 __u32 flags)
1e356f9c
RB
625{
626 struct ip_vs_dest *dest;
627 struct ip_vs_service *svc;
52793dbe 628 __be16 port = dport;
1e356f9c 629
fc723250 630 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
1e356f9c
RB
631 if (!svc)
632 return NULL;
52793dbe
JA
633 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
634 port = 0;
635 dest = ip_vs_lookup_dest(svc, daddr, port);
636 if (!dest)
637 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
1e356f9c
RB
638 if (dest)
639 atomic_inc(&dest->refcnt);
640 ip_vs_service_put(svc);
641 return dest;
642}
1da177e4 643
d1deae4d
JA
644/* Release dst_cache for dest in user context */
645static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
646{
647 struct dst_entry *old_dst;
648
649 old_dst = dest->dst_cache;
650 dest->dst_cache = NULL;
651 dst_release(old_dst);
652 dest->dst_saddr.ip = 0;
653}
654
1da177e4
LT
655/*
656 * Lookup dest by {svc,addr,port} in the destination trash.
657 * The destination trash is used to hold the destinations that are removed
658 * from the service table but are still referenced by some conn entries.
659 * The reason to add the destination trash is when the dest is temporary
660 * down (either by administrator or by monitor program), the dest can be
661 * picked back from the trash, the remaining connections to the dest can
662 * continue, and the counting information of the dest is also useful for
663 * scheduling.
664 */
665static struct ip_vs_dest *
7937df15
JV
666ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
667 __be16 dport)
1da177e4
LT
668{
669 struct ip_vs_dest *dest, *nxt;
f2431e6e 670 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
671
672 /*
673 * Find the destination in trash
674 */
f2431e6e 675 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
7937df15
JV
676 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
677 "dest->refcnt=%d\n",
678 dest->vfwmark,
679 IP_VS_DBG_ADDR(svc->af, &dest->addr),
680 ntohs(dest->port),
681 atomic_read(&dest->refcnt));
682 if (dest->af == svc->af &&
683 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
684 dest->port == dport &&
685 dest->vfwmark == svc->fwmark &&
686 dest->protocol == svc->protocol &&
687 (svc->fwmark ||
7937df15 688 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
689 dest->vport == svc->port))) {
690 /* HIT */
691 return dest;
692 }
693
694 /*
695 * Try to purge the destination from trash if not referenced
696 */
697 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
698 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
699 "from trash\n",
700 dest->vfwmark,
701 IP_VS_DBG_ADDR(svc->af, &dest->addr),
702 ntohs(dest->port));
1da177e4 703 list_del(&dest->n_list);
d1deae4d 704 __ip_vs_dst_cache_reset(dest);
1da177e4 705 __ip_vs_unbind_svc(dest);
b17fc996 706 free_percpu(dest->stats.cpustats);
1da177e4
LT
707 kfree(dest);
708 }
709 }
710
711 return NULL;
712}
713
714
715/*
716 * Clean up all the destinations in the trash
717 * Called by the ip_vs_control_cleanup()
718 *
719 * When the ip_vs_control_clearup is activated by ipvs module exit,
720 * the service tables must have been flushed and all the connections
721 * are expired, and the refcnt of each destination in the trash must
722 * be 1, so we simply release them here.
723 */
f2431e6e 724static void ip_vs_trash_cleanup(struct net *net)
1da177e4
LT
725{
726 struct ip_vs_dest *dest, *nxt;
f2431e6e 727 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 728
f2431e6e 729 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
1da177e4 730 list_del(&dest->n_list);
d1deae4d 731 __ip_vs_dst_cache_reset(dest);
1da177e4 732 __ip_vs_unbind_svc(dest);
b17fc996 733 free_percpu(dest->stats.cpustats);
1da177e4
LT
734 kfree(dest);
735 }
736}
737
55a3d4e1
JA
738static void
739ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
740{
741#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
55a3d4e1
JA
742
743 spin_lock_bh(&src->lock);
744
745 IP_VS_SHOW_STATS_COUNTER(conns);
746 IP_VS_SHOW_STATS_COUNTER(inpkts);
747 IP_VS_SHOW_STATS_COUNTER(outpkts);
748 IP_VS_SHOW_STATS_COUNTER(inbytes);
749 IP_VS_SHOW_STATS_COUNTER(outbytes);
750
ea9f22cc 751 ip_vs_read_estimator(dst, src);
55a3d4e1
JA
752
753 spin_unlock_bh(&src->lock);
754}
1da177e4
LT
755
756static void
757ip_vs_zero_stats(struct ip_vs_stats *stats)
758{
759 spin_lock_bh(&stats->lock);
e93615d0 760
55a3d4e1
JA
761 /* get current counters as zero point, rates are zeroed */
762
763#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
55a3d4e1
JA
764
765 IP_VS_ZERO_STATS_COUNTER(conns);
766 IP_VS_ZERO_STATS_COUNTER(inpkts);
767 IP_VS_ZERO_STATS_COUNTER(outpkts);
768 IP_VS_ZERO_STATS_COUNTER(inbytes);
769 IP_VS_ZERO_STATS_COUNTER(outbytes);
770
1da177e4 771 ip_vs_zero_estimator(stats);
e93615d0 772
3a14a313 773 spin_unlock_bh(&stats->lock);
1da177e4
LT
774}
775
776/*
777 * Update a destination in the given service
778 */
779static void
26c15cfd
JA
780__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
781 struct ip_vs_dest_user_kern *udest, int add)
1da177e4 782{
fc723250 783 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
784 int conn_flags;
785
786 /* set the weight and the flags */
787 atomic_set(&dest->weight, udest->weight);
3575792e
JA
788 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
789 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4 790
1da177e4 791 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 792 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
793 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
794 } else {
795 /*
fc723250 796 * Put the real service in rs_table if not present.
1da177e4
LT
797 * For now only for NAT!
798 */
a0840e2e 799 write_lock_bh(&ipvs->rs_lock);
fc723250 800 ip_vs_rs_hash(ipvs, dest);
a0840e2e 801 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
802 }
803 atomic_set(&dest->conn_flags, conn_flags);
804
805 /* bind the service */
806 if (!dest->svc) {
807 __ip_vs_bind_svc(dest, svc);
808 } else {
809 if (dest->svc != svc) {
810 __ip_vs_unbind_svc(dest);
811 ip_vs_zero_stats(&dest->stats);
812 __ip_vs_bind_svc(dest, svc);
813 }
814 }
815
816 /* set the dest status flags */
817 dest->flags |= IP_VS_DEST_F_AVAILABLE;
818
819 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
820 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
821 dest->u_threshold = udest->u_threshold;
822 dest->l_threshold = udest->l_threshold;
26c15cfd 823
ff75f40f 824 spin_lock_bh(&dest->dst_lock);
d1deae4d 825 __ip_vs_dst_cache_reset(dest);
ff75f40f 826 spin_unlock_bh(&dest->dst_lock);
fc604767 827
26c15cfd 828 if (add)
6ef757f9 829 ip_vs_start_estimator(svc->net, &dest->stats);
26c15cfd
JA
830
831 write_lock_bh(&__ip_vs_svc_lock);
832
833 /* Wait until all other svc users go away */
834 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
835
836 if (add) {
837 list_add(&dest->n_list, &svc->destinations);
838 svc->num_dests++;
839 }
840
841 /* call the update_service, because server weight may be changed */
842 if (svc->scheduler->update_service)
843 svc->scheduler->update_service(svc);
844
845 write_unlock_bh(&__ip_vs_svc_lock);
1da177e4
LT
846}
847
848
849/*
850 * Create a destination for the given service
851 */
852static int
c860c6b1 853ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
854 struct ip_vs_dest **dest_p)
855{
856 struct ip_vs_dest *dest;
95c96174 857 unsigned int atype;
1da177e4
LT
858
859 EnterFunction(2);
860
09571c7a
VB
861#ifdef CONFIG_IP_VS_IPV6
862 if (svc->af == AF_INET6) {
863 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
864 if ((!(atype & IPV6_ADDR_UNICAST) ||
865 atype & IPV6_ADDR_LINKLOCAL) &&
4a98480b 866 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
09571c7a
VB
867 return -EINVAL;
868 } else
869#endif
870 {
4a98480b 871 atype = inet_addr_type(svc->net, udest->addr.ip);
09571c7a
VB
872 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
873 return -EINVAL;
874 }
1da177e4 875
dee06e47 876 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
0a9ee813 877 if (dest == NULL)
1da177e4 878 return -ENOMEM;
0a9ee813 879
b17fc996 880 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
0a9ee813 881 if (!dest->stats.cpustats)
b17fc996 882 goto err_alloc;
1da177e4 883
c860c6b1 884 dest->af = svc->af;
1da177e4 885 dest->protocol = svc->protocol;
c860c6b1 886 dest->vaddr = svc->addr;
1da177e4
LT
887 dest->vport = svc->port;
888 dest->vfwmark = svc->fwmark;
c860c6b1 889 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
890 dest->port = udest->port;
891
892 atomic_set(&dest->activeconns, 0);
893 atomic_set(&dest->inactconns, 0);
894 atomic_set(&dest->persistconns, 0);
26c15cfd 895 atomic_set(&dest->refcnt, 1);
1da177e4
LT
896
897 INIT_LIST_HEAD(&dest->d_list);
898 spin_lock_init(&dest->dst_lock);
899 spin_lock_init(&dest->stats.lock);
26c15cfd 900 __ip_vs_update_dest(svc, dest, udest, 1);
1da177e4
LT
901
902 *dest_p = dest;
903
904 LeaveFunction(2);
905 return 0;
b17fc996
HS
906
907err_alloc:
908 kfree(dest);
909 return -ENOMEM;
1da177e4
LT
910}
911
912
913/*
914 * Add a destination into an existing service
915 */
916static int
c860c6b1 917ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
918{
919 struct ip_vs_dest *dest;
c860c6b1 920 union nf_inet_addr daddr;
014d730d 921 __be16 dport = udest->port;
1da177e4
LT
922 int ret;
923
924 EnterFunction(2);
925
926 if (udest->weight < 0) {
1e3e238e 927 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
928 return -ERANGE;
929 }
930
931 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
932 pr_err("%s(): lower threshold is higher than upper threshold\n",
933 __func__);
1da177e4
LT
934 return -ERANGE;
935 }
936
c860c6b1
JV
937 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
938
1da177e4
LT
939 /*
940 * Check if the dest already exists in the list
941 */
7937df15
JV
942 dest = ip_vs_lookup_dest(svc, &daddr, dport);
943
1da177e4 944 if (dest != NULL) {
1e3e238e 945 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
946 return -EEXIST;
947 }
948
949 /*
950 * Check if the dest already exists in the trash and
951 * is from the same service
952 */
7937df15
JV
953 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
954
1da177e4 955 if (dest != NULL) {
cfc78c5a
JV
956 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
957 "dest->refcnt=%d, service %u/%s:%u\n",
958 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
959 atomic_read(&dest->refcnt),
960 dest->vfwmark,
961 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
962 ntohs(dest->vport));
963
1da177e4
LT
964 /*
965 * Get the destination from the trash
966 */
967 list_del(&dest->n_list);
968
26c15cfd
JA
969 __ip_vs_update_dest(svc, dest, udest, 1);
970 ret = 0;
971 } else {
1da177e4 972 /*
26c15cfd 973 * Allocate and initialize the dest structure
1da177e4 974 */
26c15cfd 975 ret = ip_vs_new_dest(svc, udest, &dest);
1da177e4 976 }
1da177e4
LT
977 LeaveFunction(2);
978
26c15cfd 979 return ret;
1da177e4
LT
980}
981
982
983/*
984 * Edit a destination in the given service
985 */
986static int
c860c6b1 987ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
988{
989 struct ip_vs_dest *dest;
c860c6b1 990 union nf_inet_addr daddr;
014d730d 991 __be16 dport = udest->port;
1da177e4
LT
992
993 EnterFunction(2);
994
995 if (udest->weight < 0) {
1e3e238e 996 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
997 return -ERANGE;
998 }
999
1000 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
1001 pr_err("%s(): lower threshold is higher than upper threshold\n",
1002 __func__);
1da177e4
LT
1003 return -ERANGE;
1004 }
1005
c860c6b1
JV
1006 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1007
1da177e4
LT
1008 /*
1009 * Lookup the destination list
1010 */
7937df15
JV
1011 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1012
1da177e4 1013 if (dest == NULL) {
1e3e238e 1014 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
1015 return -ENOENT;
1016 }
1017
26c15cfd 1018 __ip_vs_update_dest(svc, dest, udest, 0);
1da177e4
LT
1019 LeaveFunction(2);
1020
1021 return 0;
1022}
1023
1024
1025/*
1026 * Delete a destination (must be already unlinked from the service)
1027 */
29c2026f 1028static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1da177e4 1029{
a0840e2e
HS
1030 struct netns_ipvs *ipvs = net_ipvs(net);
1031
6ef757f9 1032 ip_vs_stop_estimator(net, &dest->stats);
1da177e4
LT
1033
1034 /*
1035 * Remove it from the d-linked list with the real services.
1036 */
a0840e2e 1037 write_lock_bh(&ipvs->rs_lock);
1da177e4 1038 ip_vs_rs_unhash(dest);
a0840e2e 1039 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
1040
1041 /*
1042 * Decrease the refcnt of the dest, and free the dest
1043 * if nobody refers to it (refcnt=0). Otherwise, throw
1044 * the destination into the trash.
1045 */
1046 if (atomic_dec_and_test(&dest->refcnt)) {
26c15cfd
JA
1047 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1048 dest->vfwmark,
1049 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1050 ntohs(dest->port));
d1deae4d 1051 __ip_vs_dst_cache_reset(dest);
1da177e4
LT
1052 /* simply decrease svc->refcnt here, let the caller check
1053 and release the service if nobody refers to it.
1054 Only user context can release destination and service,
1055 and only one user context can update virtual service at a
1056 time, so the operation here is OK */
1057 atomic_dec(&dest->svc->refcnt);
b17fc996 1058 free_percpu(dest->stats.cpustats);
1da177e4
LT
1059 kfree(dest);
1060 } else {
cfc78c5a
JV
1061 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1062 "dest->refcnt=%d\n",
1063 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1064 ntohs(dest->port),
1065 atomic_read(&dest->refcnt));
f2431e6e 1066 list_add(&dest->n_list, &ipvs->dest_trash);
1da177e4
LT
1067 atomic_inc(&dest->refcnt);
1068 }
1069}
1070
1071
1072/*
1073 * Unlink a destination from the given service
1074 */
1075static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1076 struct ip_vs_dest *dest,
1077 int svcupd)
1078{
1079 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1080
1081 /*
1082 * Remove it from the d-linked destination list.
1083 */
1084 list_del(&dest->n_list);
1085 svc->num_dests--;
82dfb6f3
SW
1086
1087 /*
1088 * Call the update_service function of its scheduler
1089 */
1090 if (svcupd && svc->scheduler->update_service)
1091 svc->scheduler->update_service(svc);
1da177e4
LT
1092}
1093
1094
1095/*
1096 * Delete a destination server in the given service
1097 */
1098static int
c860c6b1 1099ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1100{
1101 struct ip_vs_dest *dest;
014d730d 1102 __be16 dport = udest->port;
1da177e4
LT
1103
1104 EnterFunction(2);
1105
7937df15 1106 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1107
1da177e4 1108 if (dest == NULL) {
1e3e238e 1109 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1110 return -ENOENT;
1111 }
1112
1113 write_lock_bh(&__ip_vs_svc_lock);
1114
1115 /*
1116 * Wait until all other svc users go away.
1117 */
26c15cfd 1118 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1119
1120 /*
1121 * Unlink dest from the service
1122 */
1123 __ip_vs_unlink_dest(svc, dest, 1);
1124
1125 write_unlock_bh(&__ip_vs_svc_lock);
1126
1127 /*
1128 * Delete the destination
1129 */
a0840e2e 1130 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1131
1132 LeaveFunction(2);
1133
1134 return 0;
1135}
1136
1137
1138/*
1139 * Add a service into the service hash table
1140 */
1141static int
fc723250 1142ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
c860c6b1 1143 struct ip_vs_service **svc_p)
1da177e4
LT
1144{
1145 int ret = 0;
1146 struct ip_vs_scheduler *sched = NULL;
0d1e71b0 1147 struct ip_vs_pe *pe = NULL;
1da177e4 1148 struct ip_vs_service *svc = NULL;
a0840e2e 1149 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
1150
1151 /* increase the module use count */
1152 ip_vs_use_count_inc();
1153
1154 /* Lookup the scheduler by 'u->sched_name' */
1155 sched = ip_vs_scheduler_get(u->sched_name);
1156 if (sched == NULL) {
1e3e238e 1157 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4 1158 ret = -ENOENT;
6e08bfb8 1159 goto out_err;
1da177e4
LT
1160 }
1161
0d1e71b0 1162 if (u->pe_name && *u->pe_name) {
e9e5eee8 1163 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1164 if (pe == NULL) {
1165 pr_info("persistence engine module ip_vs_pe_%s "
1166 "not found\n", u->pe_name);
1167 ret = -ENOENT;
1168 goto out_err;
1169 }
1170 }
1171
f94fd041 1172#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1173 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1174 ret = -EINVAL;
1175 goto out_err;
f94fd041
JV
1176 }
1177#endif
1178
dee06e47 1179 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1180 if (svc == NULL) {
1e3e238e 1181 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1182 ret = -ENOMEM;
1183 goto out_err;
1184 }
b17fc996 1185 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
0a54e939
JL
1186 if (!svc->stats.cpustats) {
1187 ret = -ENOMEM;
b17fc996 1188 goto out_err;
0a54e939 1189 }
1da177e4
LT
1190
1191 /* I'm the first user of the service */
26c15cfd 1192 atomic_set(&svc->usecnt, 0);
1da177e4
LT
1193 atomic_set(&svc->refcnt, 0);
1194
c860c6b1 1195 svc->af = u->af;
1da177e4 1196 svc->protocol = u->protocol;
c860c6b1 1197 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1198 svc->port = u->port;
1199 svc->fwmark = u->fwmark;
1200 svc->flags = u->flags;
1201 svc->timeout = u->timeout * HZ;
1202 svc->netmask = u->netmask;
fc723250 1203 svc->net = net;
1da177e4
LT
1204
1205 INIT_LIST_HEAD(&svc->destinations);
1206 rwlock_init(&svc->sched_lock);
1207 spin_lock_init(&svc->stats.lock);
1208
1209 /* Bind the scheduler */
1210 ret = ip_vs_bind_scheduler(svc, sched);
1211 if (ret)
1212 goto out_err;
1213 sched = NULL;
1214
0d1e71b0
SH
1215 /* Bind the ct retriever */
1216 ip_vs_bind_pe(svc, pe);
1217 pe = NULL;
1218
1da177e4
LT
1219 /* Update the virtual service counters */
1220 if (svc->port == FTPPORT)
763f8d0e 1221 atomic_inc(&ipvs->ftpsvc_counter);
1da177e4 1222 else if (svc->port == 0)
763f8d0e 1223 atomic_inc(&ipvs->nullsvc_counter);
1da177e4 1224
6ef757f9 1225 ip_vs_start_estimator(net, &svc->stats);
f94fd041
JV
1226
1227 /* Count only IPv4 services for old get/setsockopt interface */
1228 if (svc->af == AF_INET)
a0840e2e 1229 ipvs->num_services++;
1da177e4
LT
1230
1231 /* Hash the service into the service table */
1232 write_lock_bh(&__ip_vs_svc_lock);
1233 ip_vs_svc_hash(svc);
1234 write_unlock_bh(&__ip_vs_svc_lock);
1235
1236 *svc_p = svc;
7a4f0761
HS
1237 /* Now there is a service - full throttle */
1238 ipvs->enable = 1;
1da177e4
LT
1239 return 0;
1240
b17fc996 1241
6e08bfb8 1242 out_err:
1da177e4 1243 if (svc != NULL) {
2fabf35b 1244 ip_vs_unbind_scheduler(svc);
1da177e4
LT
1245 if (svc->inc) {
1246 local_bh_disable();
1247 ip_vs_app_inc_put(svc->inc);
1248 local_bh_enable();
1249 }
b17fc996
HS
1250 if (svc->stats.cpustats)
1251 free_percpu(svc->stats.cpustats);
1da177e4
LT
1252 kfree(svc);
1253 }
1254 ip_vs_scheduler_put(sched);
0d1e71b0 1255 ip_vs_pe_put(pe);
1da177e4 1256
1da177e4
LT
1257 /* decrease the module use count */
1258 ip_vs_use_count_dec();
1259
1260 return ret;
1261}
1262
1263
1264/*
1265 * Edit a service and bind it with a new scheduler
1266 */
1267static int
c860c6b1 1268ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1269{
1270 struct ip_vs_scheduler *sched, *old_sched;
0d1e71b0 1271 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1da177e4
LT
1272 int ret = 0;
1273
1274 /*
1275 * Lookup the scheduler, by 'u->sched_name'
1276 */
1277 sched = ip_vs_scheduler_get(u->sched_name);
1278 if (sched == NULL) {
1e3e238e 1279 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1280 return -ENOENT;
1281 }
1282 old_sched = sched;
1283
0d1e71b0 1284 if (u->pe_name && *u->pe_name) {
e9e5eee8 1285 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1286 if (pe == NULL) {
1287 pr_info("persistence engine module ip_vs_pe_%s "
1288 "not found\n", u->pe_name);
1289 ret = -ENOENT;
1290 goto out;
1291 }
1292 old_pe = pe;
1293 }
1294
f94fd041 1295#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1296 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1297 ret = -EINVAL;
1298 goto out;
f94fd041
JV
1299 }
1300#endif
1301
1da177e4
LT
1302 write_lock_bh(&__ip_vs_svc_lock);
1303
1304 /*
1305 * Wait until all other svc users go away.
1306 */
26c15cfd 1307 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1308
1309 /*
1310 * Set the flags and timeout value
1311 */
1312 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1313 svc->timeout = u->timeout * HZ;
1314 svc->netmask = u->netmask;
1315
1316 old_sched = svc->scheduler;
1317 if (sched != old_sched) {
1318 /*
1319 * Unbind the old scheduler
1320 */
1321 if ((ret = ip_vs_unbind_scheduler(svc))) {
1322 old_sched = sched;
9e691ed6 1323 goto out_unlock;
1da177e4
LT
1324 }
1325
1326 /*
1327 * Bind the new scheduler
1328 */
1329 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1330 /*
1331 * If ip_vs_bind_scheduler fails, restore the old
1332 * scheduler.
1333 * The main reason of failure is out of memory.
1334 *
1335 * The question is if the old scheduler can be
1336 * restored all the time. TODO: if it cannot be
1337 * restored some time, we must delete the service,
1338 * otherwise the system may crash.
1339 */
1340 ip_vs_bind_scheduler(svc, old_sched);
1341 old_sched = sched;
9e691ed6 1342 goto out_unlock;
1da177e4
LT
1343 }
1344 }
1345
0d1e71b0
SH
1346 old_pe = svc->pe;
1347 if (pe != old_pe) {
1348 ip_vs_unbind_pe(svc);
1349 ip_vs_bind_pe(svc, pe);
1350 }
1351
552ad65a 1352out_unlock:
1da177e4 1353 write_unlock_bh(&__ip_vs_svc_lock);
552ad65a 1354out:
6e08bfb8 1355 ip_vs_scheduler_put(old_sched);
0d1e71b0 1356 ip_vs_pe_put(old_pe);
1da177e4
LT
1357 return ret;
1358}
1359
1360
1361/*
1362 * Delete a service from the service list
1363 * - The service must be unlinked, unlocked and not referenced!
1364 * - We are called under _bh lock
1365 */
1366static void __ip_vs_del_service(struct ip_vs_service *svc)
1367{
1368 struct ip_vs_dest *dest, *nxt;
1369 struct ip_vs_scheduler *old_sched;
0d1e71b0 1370 struct ip_vs_pe *old_pe;
a0840e2e 1371 struct netns_ipvs *ipvs = net_ipvs(svc->net);
0d1e71b0
SH
1372
1373 pr_info("%s: enter\n", __func__);
1da177e4 1374
f94fd041
JV
1375 /* Count only IPv4 services for old get/setsockopt interface */
1376 if (svc->af == AF_INET)
a0840e2e 1377 ipvs->num_services--;
f94fd041 1378
6ef757f9 1379 ip_vs_stop_estimator(svc->net, &svc->stats);
1da177e4
LT
1380
1381 /* Unbind scheduler */
1382 old_sched = svc->scheduler;
1383 ip_vs_unbind_scheduler(svc);
6e08bfb8 1384 ip_vs_scheduler_put(old_sched);
1da177e4 1385
0d1e71b0
SH
1386 /* Unbind persistence engine */
1387 old_pe = svc->pe;
1388 ip_vs_unbind_pe(svc);
1389 ip_vs_pe_put(old_pe);
1390
1da177e4
LT
1391 /* Unbind app inc */
1392 if (svc->inc) {
1393 ip_vs_app_inc_put(svc->inc);
1394 svc->inc = NULL;
1395 }
1396
1397 /*
1398 * Unlink the whole destination list
1399 */
1400 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1401 __ip_vs_unlink_dest(svc, dest, 0);
29c2026f 1402 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1403 }
1404
1405 /*
1406 * Update the virtual service counters
1407 */
1408 if (svc->port == FTPPORT)
763f8d0e 1409 atomic_dec(&ipvs->ftpsvc_counter);
1da177e4 1410 else if (svc->port == 0)
763f8d0e 1411 atomic_dec(&ipvs->nullsvc_counter);
1da177e4
LT
1412
1413 /*
1414 * Free the service if nobody refers to it
1415 */
26c15cfd
JA
1416 if (atomic_read(&svc->refcnt) == 0) {
1417 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1418 svc->fwmark,
1419 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1420 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 1421 free_percpu(svc->stats.cpustats);
1da177e4 1422 kfree(svc);
26c15cfd 1423 }
1da177e4
LT
1424
1425 /* decrease the module use count */
1426 ip_vs_use_count_dec();
1427}
1428
1429/*
26c15cfd 1430 * Unlink a service from list and try to delete it if its refcnt reached 0
1da177e4 1431 */
26c15cfd 1432static void ip_vs_unlink_service(struct ip_vs_service *svc)
1da177e4 1433{
1da177e4
LT
1434 /*
1435 * Unhash it from the service table
1436 */
1437 write_lock_bh(&__ip_vs_svc_lock);
1438
1439 ip_vs_svc_unhash(svc);
1440
1441 /*
1442 * Wait until all the svc users go away.
1443 */
26c15cfd 1444 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1445
1446 __ip_vs_del_service(svc);
1447
1448 write_unlock_bh(&__ip_vs_svc_lock);
26c15cfd
JA
1449}
1450
1451/*
1452 * Delete a service from the service list
1453 */
1454static int ip_vs_del_service(struct ip_vs_service *svc)
1455{
1456 if (svc == NULL)
1457 return -EEXIST;
1458 ip_vs_unlink_service(svc);
1da177e4
LT
1459
1460 return 0;
1461}
1462
1463
1464/*
1465 * Flush all the virtual services
1466 */
fc723250 1467static int ip_vs_flush(struct net *net)
1da177e4
LT
1468{
1469 int idx;
1470 struct ip_vs_service *svc, *nxt;
1471
1472 /*
fc723250 1473 * Flush the service table hashed by <netns,protocol,addr,port>
1da177e4
LT
1474 */
1475 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
fc723250
HS
1476 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1477 s_list) {
1478 if (net_eq(svc->net, net))
1479 ip_vs_unlink_service(svc);
1da177e4
LT
1480 }
1481 }
1482
1483 /*
1484 * Flush the service table hashed by fwmark
1485 */
1486 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1487 list_for_each_entry_safe(svc, nxt,
1488 &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1489 if (net_eq(svc->net, net))
1490 ip_vs_unlink_service(svc);
1da177e4
LT
1491 }
1492 }
1493
1494 return 0;
1495}
1496
7a4f0761
HS
1497/*
1498 * Delete service by {netns} in the service table.
1499 * Called by __ip_vs_cleanup()
1500 */
503cf15a 1501void ip_vs_service_net_cleanup(struct net *net)
7a4f0761
HS
1502{
1503 EnterFunction(2);
1504 /* Check for "full" addressed entries */
1505 mutex_lock(&__ip_vs_mutex);
1506 ip_vs_flush(net);
1507 mutex_unlock(&__ip_vs_mutex);
1508 LeaveFunction(2);
1509}
d1deae4d
JA
1510
1511/* Put all references for device (dst_cache) */
7a4f0761 1512static inline void
d1deae4d 1513ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
7a4f0761
HS
1514{
1515 spin_lock_bh(&dest->dst_lock);
1516 if (dest->dst_cache && dest->dst_cache->dev == dev) {
1517 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1518 dev->name,
1519 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1520 ntohs(dest->port),
1521 atomic_read(&dest->refcnt));
d1deae4d 1522 __ip_vs_dst_cache_reset(dest);
7a4f0761
HS
1523 }
1524 spin_unlock_bh(&dest->dst_lock);
1525
1526}
313eae63
JA
1527/* Netdev event receiver
1528 * Currently only NETDEV_DOWN is handled to release refs to cached dsts
7a4f0761
HS
1529 */
1530static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1531 void *ptr)
1532{
1533 struct net_device *dev = ptr;
1534 struct net *net = dev_net(dev);
283283c4 1535 struct netns_ipvs *ipvs = net_ipvs(net);
7a4f0761
HS
1536 struct ip_vs_service *svc;
1537 struct ip_vs_dest *dest;
1538 unsigned int idx;
1539
313eae63 1540 if (event != NETDEV_DOWN || !ipvs)
7a4f0761
HS
1541 return NOTIFY_DONE;
1542 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1543 EnterFunction(2);
1544 mutex_lock(&__ip_vs_mutex);
1545 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1546 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1547 if (net_eq(svc->net, net)) {
1548 list_for_each_entry(dest, &svc->destinations,
1549 n_list) {
d1deae4d 1550 ip_vs_forget_dev(dest, dev);
7a4f0761
HS
1551 }
1552 }
1553 }
1554
1555 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1556 if (net_eq(svc->net, net)) {
1557 list_for_each_entry(dest, &svc->destinations,
1558 n_list) {
d1deae4d 1559 ip_vs_forget_dev(dest, dev);
7a4f0761
HS
1560 }
1561 }
1562
1563 }
1564 }
1565
283283c4 1566 list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
d1deae4d 1567 ip_vs_forget_dev(dest, dev);
7a4f0761
HS
1568 }
1569 mutex_unlock(&__ip_vs_mutex);
1570 LeaveFunction(2);
1571 return NOTIFY_DONE;
1572}
1da177e4
LT
1573
1574/*
1575 * Zero counters in a service or all services
1576 */
1577static int ip_vs_zero_service(struct ip_vs_service *svc)
1578{
1579 struct ip_vs_dest *dest;
1580
1581 write_lock_bh(&__ip_vs_svc_lock);
1582 list_for_each_entry(dest, &svc->destinations, n_list) {
1583 ip_vs_zero_stats(&dest->stats);
1584 }
1585 ip_vs_zero_stats(&svc->stats);
1586 write_unlock_bh(&__ip_vs_svc_lock);
1587 return 0;
1588}
1589
fc723250 1590static int ip_vs_zero_all(struct net *net)
1da177e4
LT
1591{
1592 int idx;
1593 struct ip_vs_service *svc;
1594
1595 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1596 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250
HS
1597 if (net_eq(svc->net, net))
1598 ip_vs_zero_service(svc);
1da177e4
LT
1599 }
1600 }
1601
1602 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1603 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1604 if (net_eq(svc->net, net))
1605 ip_vs_zero_service(svc);
1da177e4
LT
1606 }
1607 }
1608
2a0751af 1609 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1da177e4
LT
1610 return 0;
1611}
1612
14e40546 1613#ifdef CONFIG_SYSCTL
749c42b6
JA
1614
1615static int zero;
1616static int three = 3;
1617
1da177e4 1618static int
8d65af78 1619proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1620 void __user *buffer, size_t *lenp, loff_t *ppos)
1621{
9330419d 1622 struct net *net = current->nsproxy->net_ns;
1da177e4
LT
1623 int *valp = table->data;
1624 int val = *valp;
1625 int rc;
1626
8d65af78 1627 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1628 if (write && (*valp != val)) {
1629 if ((*valp < 0) || (*valp > 3)) {
1630 /* Restore the correct value */
1631 *valp = val;
1632 } else {
9330419d 1633 update_defense_level(net_ipvs(net));
1da177e4
LT
1634 }
1635 }
1636 return rc;
1637}
1638
1da177e4 1639static int
8d65af78 1640proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1641 void __user *buffer, size_t *lenp, loff_t *ppos)
1642{
1643 int *valp = table->data;
1644 int val[2];
1645 int rc;
1646
1647 /* backup the value first */
1648 memcpy(val, valp, sizeof(val));
1649
8d65af78 1650 rc = proc_dointvec(table, write, buffer, lenp, ppos);
749c42b6
JA
1651 if (write && (valp[0] < 0 || valp[1] < 0 ||
1652 (valp[0] >= valp[1] && valp[1]))) {
1da177e4
LT
1653 /* Restore the correct value */
1654 memcpy(valp, val, sizeof(val));
1655 }
1656 return rc;
1657}
1658
b880c1f0
HS
1659static int
1660proc_do_sync_mode(ctl_table *table, int write,
1661 void __user *buffer, size_t *lenp, loff_t *ppos)
1662{
1663 int *valp = table->data;
1664 int val = *valp;
1665 int rc;
1666
1667 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1668 if (write && (*valp != val)) {
1669 if ((*valp < 0) || (*valp > 1)) {
1670 /* Restore the correct value */
1671 *valp = val;
f73181c8
PNA
1672 }
1673 }
1674 return rc;
1675}
1676
1677static int
1678proc_do_sync_ports(ctl_table *table, int write,
1679 void __user *buffer, size_t *lenp, loff_t *ppos)
1680{
1681 int *valp = table->data;
1682 int val = *valp;
1683 int rc;
1684
1685 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1686 if (write && (*valp != val)) {
1687 if (*valp < 1 || !is_power_of_2(*valp)) {
1688 /* Restore the correct value */
1689 *valp = val;
b880c1f0
HS
1690 }
1691 }
1692 return rc;
1693}
1da177e4
LT
1694
1695/*
1696 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
a0840e2e 1697 * Do not change order or insert new entries without
503cf15a 1698 * align with netns init in ip_vs_control_net_init()
1da177e4
LT
1699 */
1700
1701static struct ctl_table vs_vars[] = {
1702 {
1da177e4 1703 .procname = "amemthresh",
1da177e4
LT
1704 .maxlen = sizeof(int),
1705 .mode = 0644,
6d9f239a 1706 .proc_handler = proc_dointvec,
1da177e4 1707 },
1da177e4 1708 {
1da177e4 1709 .procname = "am_droprate",
1da177e4
LT
1710 .maxlen = sizeof(int),
1711 .mode = 0644,
6d9f239a 1712 .proc_handler = proc_dointvec,
1da177e4
LT
1713 },
1714 {
1da177e4 1715 .procname = "drop_entry",
1da177e4
LT
1716 .maxlen = sizeof(int),
1717 .mode = 0644,
6d9f239a 1718 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1719 },
1720 {
1da177e4 1721 .procname = "drop_packet",
1da177e4
LT
1722 .maxlen = sizeof(int),
1723 .mode = 0644,
6d9f239a 1724 .proc_handler = proc_do_defense_mode,
1da177e4 1725 },
f4bc17cd
JA
1726#ifdef CONFIG_IP_VS_NFCT
1727 {
1728 .procname = "conntrack",
f4bc17cd
JA
1729 .maxlen = sizeof(int),
1730 .mode = 0644,
1731 .proc_handler = &proc_dointvec,
1732 },
1733#endif
1da177e4 1734 {
1da177e4 1735 .procname = "secure_tcp",
1da177e4
LT
1736 .maxlen = sizeof(int),
1737 .mode = 0644,
6d9f239a 1738 .proc_handler = proc_do_defense_mode,
1da177e4 1739 },
8a803040
JA
1740 {
1741 .procname = "snat_reroute",
8a803040
JA
1742 .maxlen = sizeof(int),
1743 .mode = 0644,
1744 .proc_handler = &proc_dointvec,
1745 },
b880c1f0
HS
1746 {
1747 .procname = "sync_version",
b880c1f0
HS
1748 .maxlen = sizeof(int),
1749 .mode = 0644,
1750 .proc_handler = &proc_do_sync_mode,
1751 },
f73181c8
PNA
1752 {
1753 .procname = "sync_ports",
1754 .maxlen = sizeof(int),
1755 .mode = 0644,
1756 .proc_handler = &proc_do_sync_ports,
1757 },
1c003b15
PNA
1758 {
1759 .procname = "sync_qlen_max",
1760 .maxlen = sizeof(int),
1761 .mode = 0644,
1762 .proc_handler = proc_dointvec,
1763 },
1764 {
1765 .procname = "sync_sock_size",
1766 .maxlen = sizeof(int),
1767 .mode = 0644,
1768 .proc_handler = proc_dointvec,
1769 },
a0840e2e
HS
1770 {
1771 .procname = "cache_bypass",
1772 .maxlen = sizeof(int),
1773 .mode = 0644,
1774 .proc_handler = proc_dointvec,
1775 },
1776 {
1777 .procname = "expire_nodest_conn",
1778 .maxlen = sizeof(int),
1779 .mode = 0644,
1780 .proc_handler = proc_dointvec,
1781 },
1782 {
1783 .procname = "expire_quiescent_template",
1784 .maxlen = sizeof(int),
1785 .mode = 0644,
1786 .proc_handler = proc_dointvec,
1787 },
1788 {
1789 .procname = "sync_threshold",
1790 .maxlen =
1791 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1792 .mode = 0644,
1793 .proc_handler = proc_do_sync_threshold,
1794 },
749c42b6
JA
1795 {
1796 .procname = "sync_refresh_period",
1797 .maxlen = sizeof(int),
1798 .mode = 0644,
1799 .proc_handler = proc_dointvec_jiffies,
1800 },
1801 {
1802 .procname = "sync_retries",
1803 .maxlen = sizeof(int),
1804 .mode = 0644,
1805 .proc_handler = proc_dointvec_minmax,
1806 .extra1 = &zero,
1807 .extra2 = &three,
1808 },
a0840e2e
HS
1809 {
1810 .procname = "nat_icmp_send",
1811 .maxlen = sizeof(int),
1812 .mode = 0644,
1813 .proc_handler = proc_dointvec,
1814 },
3654e611
JA
1815 {
1816 .procname = "pmtu_disc",
1817 .maxlen = sizeof(int),
1818 .mode = 0644,
1819 .proc_handler = proc_dointvec,
1820 },
0c12582f
JA
1821 {
1822 .procname = "backup_only",
1823 .maxlen = sizeof(int),
1824 .mode = 0644,
1825 .proc_handler = proc_dointvec,
1826 },
a0840e2e
HS
1827#ifdef CONFIG_IP_VS_DEBUG
1828 {
1829 .procname = "debug_level",
1830 .data = &sysctl_ip_vs_debug_level,
1831 .maxlen = sizeof(int),
1832 .mode = 0644,
1833 .proc_handler = proc_dointvec,
1834 },
1835#endif
1da177e4
LT
1836#if 0
1837 {
1da177e4
LT
1838 .procname = "timeout_established",
1839 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1840 .maxlen = sizeof(int),
1841 .mode = 0644,
6d9f239a 1842 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1843 },
1844 {
1da177e4
LT
1845 .procname = "timeout_synsent",
1846 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1847 .maxlen = sizeof(int),
1848 .mode = 0644,
6d9f239a 1849 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1850 },
1851 {
1da177e4
LT
1852 .procname = "timeout_synrecv",
1853 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1854 .maxlen = sizeof(int),
1855 .mode = 0644,
6d9f239a 1856 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1857 },
1858 {
1da177e4
LT
1859 .procname = "timeout_finwait",
1860 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1861 .maxlen = sizeof(int),
1862 .mode = 0644,
6d9f239a 1863 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1864 },
1865 {
1da177e4
LT
1866 .procname = "timeout_timewait",
1867 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1868 .maxlen = sizeof(int),
1869 .mode = 0644,
6d9f239a 1870 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1871 },
1872 {
1da177e4
LT
1873 .procname = "timeout_close",
1874 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1875 .maxlen = sizeof(int),
1876 .mode = 0644,
6d9f239a 1877 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1878 },
1879 {
1da177e4
LT
1880 .procname = "timeout_closewait",
1881 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1882 .maxlen = sizeof(int),
1883 .mode = 0644,
6d9f239a 1884 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1885 },
1886 {
1da177e4
LT
1887 .procname = "timeout_lastack",
1888 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1889 .maxlen = sizeof(int),
1890 .mode = 0644,
6d9f239a 1891 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1892 },
1893 {
1da177e4
LT
1894 .procname = "timeout_listen",
1895 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1896 .maxlen = sizeof(int),
1897 .mode = 0644,
6d9f239a 1898 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1899 },
1900 {
1da177e4
LT
1901 .procname = "timeout_synack",
1902 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1903 .maxlen = sizeof(int),
1904 .mode = 0644,
6d9f239a 1905 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1906 },
1907 {
1da177e4
LT
1908 .procname = "timeout_udp",
1909 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1910 .maxlen = sizeof(int),
1911 .mode = 0644,
6d9f239a 1912 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1913 },
1914 {
1da177e4
LT
1915 .procname = "timeout_icmp",
1916 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1917 .maxlen = sizeof(int),
1918 .mode = 0644,
6d9f239a 1919 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1920 },
1921#endif
f8572d8f 1922 { }
1da177e4
LT
1923};
1924
14e40546 1925#endif
1da177e4 1926
1da177e4
LT
1927#ifdef CONFIG_PROC_FS
1928
1929struct ip_vs_iter {
fc723250 1930 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1da177e4
LT
1931 struct list_head *table;
1932 int bucket;
1933};
1934
1935/*
1936 * Write the contents of the VS rule table to a PROCfs file.
1937 * (It is kept just for backward compatibility)
1938 */
95c96174 1939static inline const char *ip_vs_fwd_name(unsigned int flags)
1da177e4
LT
1940{
1941 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1942 case IP_VS_CONN_F_LOCALNODE:
1943 return "Local";
1944 case IP_VS_CONN_F_TUNNEL:
1945 return "Tunnel";
1946 case IP_VS_CONN_F_DROUTE:
1947 return "Route";
1948 default:
1949 return "Masq";
1950 }
1951}
1952
1953
1954/* Get the Nth entry in the two lists */
1955static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1956{
fc723250 1957 struct net *net = seq_file_net(seq);
1da177e4
LT
1958 struct ip_vs_iter *iter = seq->private;
1959 int idx;
1960 struct ip_vs_service *svc;
1961
1962 /* look in hash by protocol */
1963 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1964 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250 1965 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1966 iter->table = ip_vs_svc_table;
1967 iter->bucket = idx;
1968 return svc;
1969 }
1970 }
1971 }
1972
1973 /* keep looking in fwmark */
1974 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1975 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250 1976 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1977 iter->table = ip_vs_svc_fwm_table;
1978 iter->bucket = idx;
1979 return svc;
1980 }
1981 }
1982 }
1983
1984 return NULL;
1985}
1986
1987static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1988__acquires(__ip_vs_svc_lock)
1da177e4
LT
1989{
1990
1991 read_lock_bh(&__ip_vs_svc_lock);
1992 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1993}
1994
1995
1996static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1997{
1998 struct list_head *e;
1999 struct ip_vs_iter *iter;
2000 struct ip_vs_service *svc;
2001
2002 ++*pos;
2003 if (v == SEQ_START_TOKEN)
2004 return ip_vs_info_array(seq,0);
2005
2006 svc = v;
2007 iter = seq->private;
2008
2009 if (iter->table == ip_vs_svc_table) {
2010 /* next service in table hashed by protocol */
2011 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
2012 return list_entry(e, struct ip_vs_service, s_list);
2013
2014
2015 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2016 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
2017 s_list) {
2018 return svc;
2019 }
2020 }
2021
2022 iter->table = ip_vs_svc_fwm_table;
2023 iter->bucket = -1;
2024 goto scan_fwmark;
2025 }
2026
2027 /* next service in hashed by fwmark */
2028 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
2029 return list_entry(e, struct ip_vs_service, f_list);
2030
2031 scan_fwmark:
2032 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2033 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
2034 f_list)
2035 return svc;
2036 }
2037
2038 return NULL;
2039}
2040
2041static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 2042__releases(__ip_vs_svc_lock)
1da177e4
LT
2043{
2044 read_unlock_bh(&__ip_vs_svc_lock);
2045}
2046
2047
2048static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2049{
2050 if (v == SEQ_START_TOKEN) {
2051 seq_printf(seq,
2052 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 2053 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2054 seq_puts(seq,
2055 "Prot LocalAddress:Port Scheduler Flags\n");
2056 seq_puts(seq,
2057 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2058 } else {
2059 const struct ip_vs_service *svc = v;
2060 const struct ip_vs_iter *iter = seq->private;
2061 const struct ip_vs_dest *dest;
2062
667a5f18
VB
2063 if (iter->table == ip_vs_svc_table) {
2064#ifdef CONFIG_IP_VS_IPV6
2065 if (svc->af == AF_INET6)
5b095d98 2066 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 2067 ip_vs_proto_name(svc->protocol),
38ff4fa4 2068 &svc->addr.in6,
667a5f18
VB
2069 ntohs(svc->port),
2070 svc->scheduler->name);
2071 else
2072#endif
26ec037f 2073 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
2074 ip_vs_proto_name(svc->protocol),
2075 ntohl(svc->addr.ip),
2076 ntohs(svc->port),
26ec037f
NC
2077 svc->scheduler->name,
2078 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 2079 } else {
26ec037f
NC
2080 seq_printf(seq, "FWM %08X %s %s",
2081 svc->fwmark, svc->scheduler->name,
2082 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 2083 }
1da177e4
LT
2084
2085 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2086 seq_printf(seq, "persistent %d %08X\n",
2087 svc->timeout,
2088 ntohl(svc->netmask));
2089 else
2090 seq_putc(seq, '\n');
2091
2092 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
2093#ifdef CONFIG_IP_VS_IPV6
2094 if (dest->af == AF_INET6)
2095 seq_printf(seq,
5b095d98 2096 " -> [%pI6]:%04X"
667a5f18 2097 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 2098 &dest->addr.in6,
667a5f18
VB
2099 ntohs(dest->port),
2100 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2101 atomic_read(&dest->weight),
2102 atomic_read(&dest->activeconns),
2103 atomic_read(&dest->inactconns));
2104 else
2105#endif
2106 seq_printf(seq,
2107 " -> %08X:%04X "
2108 "%-7s %-6d %-10d %-10d\n",
2109 ntohl(dest->addr.ip),
2110 ntohs(dest->port),
2111 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2112 atomic_read(&dest->weight),
2113 atomic_read(&dest->activeconns),
2114 atomic_read(&dest->inactconns));
2115
1da177e4
LT
2116 }
2117 }
2118 return 0;
2119}
2120
56b3d975 2121static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
2122 .start = ip_vs_info_seq_start,
2123 .next = ip_vs_info_seq_next,
2124 .stop = ip_vs_info_seq_stop,
2125 .show = ip_vs_info_seq_show,
2126};
2127
2128static int ip_vs_info_open(struct inode *inode, struct file *file)
2129{
fc723250 2130 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
cf7732e4 2131 sizeof(struct ip_vs_iter));
1da177e4
LT
2132}
2133
9a32144e 2134static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
2135 .owner = THIS_MODULE,
2136 .open = ip_vs_info_open,
2137 .read = seq_read,
2138 .llseek = seq_lseek,
0f08190f 2139 .release = seq_release_net,
1da177e4
LT
2140};
2141
1da177e4
LT
2142static int ip_vs_stats_show(struct seq_file *seq, void *v)
2143{
b17fc996 2144 struct net *net = seq_file_single_net(seq);
55a3d4e1 2145 struct ip_vs_stats_user show;
1da177e4
LT
2146
2147/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2148 seq_puts(seq,
2149 " Total Incoming Outgoing Incoming Outgoing\n");
2150 seq_printf(seq,
2151 " Conns Packets Packets Bytes Bytes\n");
2152
55a3d4e1
JA
2153 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2154 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2155 show.inpkts, show.outpkts,
2156 (unsigned long long) show.inbytes,
2157 (unsigned long long) show.outbytes);
1da177e4
LT
2158
2159/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2160 seq_puts(seq,
2161 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
55a3d4e1
JA
2162 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2163 show.cps, show.inpps, show.outpps,
2164 show.inbps, show.outbps);
1da177e4
LT
2165
2166 return 0;
2167}
2168
2169static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2170{
fc723250 2171 return single_open_net(inode, file, ip_vs_stats_show);
1da177e4
LT
2172}
2173
9a32144e 2174static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
2175 .owner = THIS_MODULE,
2176 .open = ip_vs_stats_seq_open,
2177 .read = seq_read,
2178 .llseek = seq_lseek,
0f08190f 2179 .release = single_release_net,
1da177e4
LT
2180};
2181
b17fc996
HS
2182static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2183{
2184 struct net *net = seq_file_single_net(seq);
2a0751af
JA
2185 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2186 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
ea9f22cc 2187 struct ip_vs_stats_user rates;
b17fc996
HS
2188 int i;
2189
2190/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2191 seq_puts(seq,
2192 " Total Incoming Outgoing Incoming Outgoing\n");
2193 seq_printf(seq,
2194 "CPU Conns Packets Packets Bytes Bytes\n");
2195
2196 for_each_possible_cpu(i) {
2a0751af
JA
2197 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2198 unsigned int start;
2199 __u64 inbytes, outbytes;
2200
2201 do {
2202 start = u64_stats_fetch_begin_bh(&u->syncp);
2203 inbytes = u->ustats.inbytes;
2204 outbytes = u->ustats.outbytes;
2205 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2206
b17fc996 2207 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2a0751af
JA
2208 i, u->ustats.conns, u->ustats.inpkts,
2209 u->ustats.outpkts, (__u64)inbytes,
2210 (__u64)outbytes);
b17fc996
HS
2211 }
2212
2213 spin_lock_bh(&tot_stats->lock);
ea9f22cc 2214
b17fc996
HS
2215 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2216 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2217 tot_stats->ustats.outpkts,
2218 (unsigned long long) tot_stats->ustats.inbytes,
2219 (unsigned long long) tot_stats->ustats.outbytes);
2220
ea9f22cc
JA
2221 ip_vs_read_estimator(&rates, tot_stats);
2222
2223 spin_unlock_bh(&tot_stats->lock);
2224
b17fc996
HS
2225/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2226 seq_puts(seq,
2227 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2228 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
ea9f22cc
JA
2229 rates.cps,
2230 rates.inpps,
2231 rates.outpps,
2232 rates.inbps,
2233 rates.outbps);
b17fc996
HS
2234
2235 return 0;
2236}
2237
2238static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2239{
2240 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2241}
2242
2243static const struct file_operations ip_vs_stats_percpu_fops = {
2244 .owner = THIS_MODULE,
2245 .open = ip_vs_stats_percpu_seq_open,
2246 .read = seq_read,
2247 .llseek = seq_lseek,
0f08190f 2248 .release = single_release_net,
b17fc996 2249};
1da177e4
LT
2250#endif
2251
2252/*
2253 * Set timeout values for tcp tcpfin udp in the timeout_table.
2254 */
9330419d 2255static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2256{
091bb34c 2257#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
9330419d 2258 struct ip_vs_proto_data *pd;
091bb34c 2259#endif
9330419d 2260
1da177e4
LT
2261 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2262 u->tcp_timeout,
2263 u->tcp_fin_timeout,
2264 u->udp_timeout);
2265
2266#ifdef CONFIG_IP_VS_PROTO_TCP
2267 if (u->tcp_timeout) {
9330419d
HS
2268 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2269 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
1da177e4
LT
2270 = u->tcp_timeout * HZ;
2271 }
2272
2273 if (u->tcp_fin_timeout) {
9330419d
HS
2274 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2275 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
1da177e4
LT
2276 = u->tcp_fin_timeout * HZ;
2277 }
2278#endif
2279
2280#ifdef CONFIG_IP_VS_PROTO_UDP
2281 if (u->udp_timeout) {
9330419d
HS
2282 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2283 pd->timeout_table[IP_VS_UDP_S_NORMAL]
1da177e4
LT
2284 = u->udp_timeout * HZ;
2285 }
2286#endif
2287 return 0;
2288}
2289
2290
2291#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2292#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2293#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2294 sizeof(struct ip_vs_dest_user))
2295#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2296#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2297#define MAX_ARG_LEN SVCDEST_ARG_LEN
2298
9b5b5cff 2299static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2300 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2301 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2302 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2303 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2304 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2305 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2306 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2307 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2308 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2309 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2310 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2311};
2312
c860c6b1
JV
2313static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2314 struct ip_vs_service_user *usvc_compat)
2315{
0d1e71b0
SH
2316 memset(usvc, 0, sizeof(*usvc));
2317
c860c6b1
JV
2318 usvc->af = AF_INET;
2319 usvc->protocol = usvc_compat->protocol;
2320 usvc->addr.ip = usvc_compat->addr;
2321 usvc->port = usvc_compat->port;
2322 usvc->fwmark = usvc_compat->fwmark;
2323
2324 /* Deep copy of sched_name is not needed here */
2325 usvc->sched_name = usvc_compat->sched_name;
2326
2327 usvc->flags = usvc_compat->flags;
2328 usvc->timeout = usvc_compat->timeout;
2329 usvc->netmask = usvc_compat->netmask;
2330}
2331
2332static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2333 struct ip_vs_dest_user *udest_compat)
2334{
0d1e71b0
SH
2335 memset(udest, 0, sizeof(*udest));
2336
c860c6b1
JV
2337 udest->addr.ip = udest_compat->addr;
2338 udest->port = udest_compat->port;
2339 udest->conn_flags = udest_compat->conn_flags;
2340 udest->weight = udest_compat->weight;
2341 udest->u_threshold = udest_compat->u_threshold;
2342 udest->l_threshold = udest_compat->l_threshold;
2343}
2344
1da177e4
LT
2345static int
2346do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2347{
fc723250 2348 struct net *net = sock_net(sk);
1da177e4
LT
2349 int ret;
2350 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2351 struct ip_vs_service_user *usvc_compat;
2352 struct ip_vs_service_user_kern usvc;
1da177e4 2353 struct ip_vs_service *svc;
c860c6b1
JV
2354 struct ip_vs_dest_user *udest_compat;
2355 struct ip_vs_dest_user_kern udest;
ae1d48b2 2356 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2357
df008c91 2358 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2359 return -EPERM;
2360
04bcef2a
AV
2361 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2362 return -EINVAL;
2363 if (len < 0 || len > MAX_ARG_LEN)
2364 return -EINVAL;
1da177e4 2365 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2366 pr_err("set_ctl: len %u != %u\n",
2367 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2368 return -EINVAL;
2369 }
2370
2371 if (copy_from_user(arg, user, len) != 0)
2372 return -EFAULT;
2373
2374 /* increase the module use count */
2375 ip_vs_use_count_inc();
2376
ae1d48b2
HS
2377 /* Handle daemons since they have another lock */
2378 if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2379 cmd == IP_VS_SO_SET_STOPDAEMON) {
2380 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2381
2382 if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2383 ret = -ERESTARTSYS;
2384 goto out_dec;
2385 }
2386 if (cmd == IP_VS_SO_SET_STARTDAEMON)
2387 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2388 dm->syncid);
2389 else
2390 ret = stop_sync_thread(net, dm->state);
2391 mutex_unlock(&ipvs->sync_mutex);
2392 goto out_dec;
2393 }
2394
14cc3e2b 2395 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2396 ret = -ERESTARTSYS;
2397 goto out_dec;
2398 }
2399
2400 if (cmd == IP_VS_SO_SET_FLUSH) {
2401 /* Flush the virtual service */
fc723250 2402 ret = ip_vs_flush(net);
1da177e4
LT
2403 goto out_unlock;
2404 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2405 /* Set timeout values for (tcp tcpfin udp) */
9330419d 2406 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
1da177e4 2407 goto out_unlock;
1da177e4
LT
2408 }
2409
c860c6b1
JV
2410 usvc_compat = (struct ip_vs_service_user *)arg;
2411 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2412
2413 /* We only use the new structs internally, so copy userspace compat
2414 * structs to extended internal versions */
2415 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2416 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2417
2418 if (cmd == IP_VS_SO_SET_ZERO) {
2419 /* if no service address is set, zero counters in all */
c860c6b1 2420 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
fc723250 2421 ret = ip_vs_zero_all(net);
1da177e4
LT
2422 goto out_unlock;
2423 }
2424 }
2425
2906f66a
VMR
2426 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2427 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2428 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2429 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2430 usvc.protocol, &usvc.addr.ip,
2431 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2432 ret = -EFAULT;
2433 goto out_unlock;
2434 }
2435
2436 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2437 if (usvc.fwmark == 0)
fc723250 2438 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
26c15cfd 2439 &usvc.addr, usvc.port);
1da177e4 2440 else
fc723250 2441 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
1da177e4
LT
2442
2443 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2444 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2445 ret = -ESRCH;
26c15cfd 2446 goto out_unlock;
1da177e4
LT
2447 }
2448
2449 switch (cmd) {
2450 case IP_VS_SO_SET_ADD:
2451 if (svc != NULL)
2452 ret = -EEXIST;
2453 else
fc723250 2454 ret = ip_vs_add_service(net, &usvc, &svc);
1da177e4
LT
2455 break;
2456 case IP_VS_SO_SET_EDIT:
c860c6b1 2457 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2458 break;
2459 case IP_VS_SO_SET_DEL:
2460 ret = ip_vs_del_service(svc);
2461 if (!ret)
2462 goto out_unlock;
2463 break;
2464 case IP_VS_SO_SET_ZERO:
2465 ret = ip_vs_zero_service(svc);
2466 break;
2467 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2468 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2469 break;
2470 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2471 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2472 break;
2473 case IP_VS_SO_SET_DELDEST:
c860c6b1 2474 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2475 break;
2476 default:
2477 ret = -EINVAL;
2478 }
2479
1da177e4 2480 out_unlock:
14cc3e2b 2481 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2482 out_dec:
2483 /* decrease the module use count */
2484 ip_vs_use_count_dec();
2485
2486 return ret;
2487}
2488
2489
1da177e4
LT
2490static void
2491ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2492{
2493 dst->protocol = src->protocol;
e7ade46a 2494 dst->addr = src->addr.ip;
1da177e4
LT
2495 dst->port = src->port;
2496 dst->fwmark = src->fwmark;
4da62fc7 2497 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2498 dst->flags = src->flags;
2499 dst->timeout = src->timeout / HZ;
2500 dst->netmask = src->netmask;
2501 dst->num_dests = src->num_dests;
2502 ip_vs_copy_stats(&dst->stats, &src->stats);
2503}
2504
2505static inline int
fc723250
HS
2506__ip_vs_get_service_entries(struct net *net,
2507 const struct ip_vs_get_services *get,
1da177e4
LT
2508 struct ip_vs_get_services __user *uptr)
2509{
2510 int idx, count=0;
2511 struct ip_vs_service *svc;
2512 struct ip_vs_service_entry entry;
2513 int ret = 0;
2514
2515 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2516 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041 2517 /* Only expose IPv4 entries to old interface */
fc723250 2518 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2519 continue;
2520
1da177e4
LT
2521 if (count >= get->num_services)
2522 goto out;
4da62fc7 2523 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2524 ip_vs_copy_service(&entry, svc);
2525 if (copy_to_user(&uptr->entrytable[count],
2526 &entry, sizeof(entry))) {
2527 ret = -EFAULT;
2528 goto out;
2529 }
2530 count++;
2531 }
2532 }
2533
2534 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2535 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041 2536 /* Only expose IPv4 entries to old interface */
fc723250 2537 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2538 continue;
2539
1da177e4
LT
2540 if (count >= get->num_services)
2541 goto out;
4da62fc7 2542 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2543 ip_vs_copy_service(&entry, svc);
2544 if (copy_to_user(&uptr->entrytable[count],
2545 &entry, sizeof(entry))) {
2546 ret = -EFAULT;
2547 goto out;
2548 }
2549 count++;
2550 }
2551 }
552ad65a 2552out:
1da177e4
LT
2553 return ret;
2554}
2555
2556static inline int
fc723250 2557__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
1da177e4
LT
2558 struct ip_vs_get_dests __user *uptr)
2559{
2560 struct ip_vs_service *svc;
b18610de 2561 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2562 int ret = 0;
2563
2564 if (get->fwmark)
fc723250 2565 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
1da177e4 2566 else
fc723250 2567 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
26c15cfd 2568 get->port);
b18610de 2569
1da177e4
LT
2570 if (svc) {
2571 int count = 0;
2572 struct ip_vs_dest *dest;
2573 struct ip_vs_dest_entry entry;
2574
2575 list_for_each_entry(dest, &svc->destinations, n_list) {
2576 if (count >= get->num_dests)
2577 break;
2578
e7ade46a 2579 entry.addr = dest->addr.ip;
1da177e4
LT
2580 entry.port = dest->port;
2581 entry.conn_flags = atomic_read(&dest->conn_flags);
2582 entry.weight = atomic_read(&dest->weight);
2583 entry.u_threshold = dest->u_threshold;
2584 entry.l_threshold = dest->l_threshold;
2585 entry.activeconns = atomic_read(&dest->activeconns);
2586 entry.inactconns = atomic_read(&dest->inactconns);
2587 entry.persistconns = atomic_read(&dest->persistconns);
2588 ip_vs_copy_stats(&entry.stats, &dest->stats);
2589 if (copy_to_user(&uptr->entrytable[count],
2590 &entry, sizeof(entry))) {
2591 ret = -EFAULT;
2592 break;
2593 }
2594 count++;
2595 }
1da177e4
LT
2596 } else
2597 ret = -ESRCH;
2598 return ret;
2599}
2600
2601static inline void
9330419d 2602__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2603{
091bb34c 2604#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
9330419d 2605 struct ip_vs_proto_data *pd;
091bb34c 2606#endif
9330419d 2607
b61a602e
AB
2608 memset(u, 0, sizeof (*u));
2609
1da177e4 2610#ifdef CONFIG_IP_VS_PROTO_TCP
9330419d
HS
2611 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2612 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2613 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
1da177e4
LT
2614#endif
2615#ifdef CONFIG_IP_VS_PROTO_UDP
9330419d 2616 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
1da177e4 2617 u->udp_timeout =
9330419d 2618 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
1da177e4
LT
2619#endif
2620}
2621
2622
2623#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2624#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2625#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2626#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2627#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2628#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2629#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2630
9b5b5cff 2631static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2632 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2633 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2634 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2635 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2636 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2637 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2638 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2639};
2640
2641static int
2642do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2643{
2644 unsigned char arg[128];
2645 int ret = 0;
04bcef2a 2646 unsigned int copylen;
fc723250 2647 struct net *net = sock_net(sk);
f131315f 2648 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2649
fc723250 2650 BUG_ON(!net);
df008c91 2651 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2652 return -EPERM;
2653
04bcef2a
AV
2654 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2655 return -EINVAL;
2656
1da177e4 2657 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2658 pr_err("get_ctl: len %u < %u\n",
2659 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2660 return -EINVAL;
2661 }
2662
04bcef2a
AV
2663 copylen = get_arglen[GET_CMDID(cmd)];
2664 if (copylen > 128)
2665 return -EINVAL;
2666
2667 if (copy_from_user(arg, user, copylen) != 0)
1da177e4 2668 return -EFAULT;
ae1d48b2
HS
2669 /*
2670 * Handle daemons first since it has its own locking
2671 */
2672 if (cmd == IP_VS_SO_GET_DAEMON) {
2673 struct ip_vs_daemon_user d[2];
2674
2675 memset(&d, 0, sizeof(d));
2676 if (mutex_lock_interruptible(&ipvs->sync_mutex))
2677 return -ERESTARTSYS;
2678
2679 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2680 d[0].state = IP_VS_STATE_MASTER;
2681 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2682 sizeof(d[0].mcast_ifn));
2683 d[0].syncid = ipvs->master_syncid;
2684 }
2685 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2686 d[1].state = IP_VS_STATE_BACKUP;
2687 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2688 sizeof(d[1].mcast_ifn));
2689 d[1].syncid = ipvs->backup_syncid;
2690 }
2691 if (copy_to_user(user, &d, sizeof(d)) != 0)
2692 ret = -EFAULT;
2693 mutex_unlock(&ipvs->sync_mutex);
2694 return ret;
2695 }
1da177e4 2696
14cc3e2b 2697 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2698 return -ERESTARTSYS;
2699
2700 switch (cmd) {
2701 case IP_VS_SO_GET_VERSION:
2702 {
2703 char buf[64];
2704
2705 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2706 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2707 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2708 ret = -EFAULT;
2709 goto out;
2710 }
2711 *len = strlen(buf)+1;
2712 }
2713 break;
2714
2715 case IP_VS_SO_GET_INFO:
2716 {
2717 struct ip_vs_getinfo info;
2718 info.version = IP_VS_VERSION_CODE;
6f7edb48 2719 info.size = ip_vs_conn_tab_size;
a0840e2e 2720 info.num_services = ipvs->num_services;
1da177e4
LT
2721 if (copy_to_user(user, &info, sizeof(info)) != 0)
2722 ret = -EFAULT;
2723 }
2724 break;
2725
2726 case IP_VS_SO_GET_SERVICES:
2727 {
2728 struct ip_vs_get_services *get;
2729 int size;
2730
2731 get = (struct ip_vs_get_services *)arg;
2732 size = sizeof(*get) +
2733 sizeof(struct ip_vs_service_entry) * get->num_services;
2734 if (*len != size) {
1e3e238e 2735 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2736 ret = -EINVAL;
2737 goto out;
2738 }
fc723250 2739 ret = __ip_vs_get_service_entries(net, get, user);
1da177e4
LT
2740 }
2741 break;
2742
2743 case IP_VS_SO_GET_SERVICE:
2744 {
2745 struct ip_vs_service_entry *entry;
2746 struct ip_vs_service *svc;
b18610de 2747 union nf_inet_addr addr;
1da177e4
LT
2748
2749 entry = (struct ip_vs_service_entry *)arg;
b18610de 2750 addr.ip = entry->addr;
1da177e4 2751 if (entry->fwmark)
fc723250 2752 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
1da177e4 2753 else
fc723250
HS
2754 svc = __ip_vs_service_find(net, AF_INET,
2755 entry->protocol, &addr,
2756 entry->port);
1da177e4
LT
2757 if (svc) {
2758 ip_vs_copy_service(entry, svc);
2759 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2760 ret = -EFAULT;
1da177e4
LT
2761 } else
2762 ret = -ESRCH;
2763 }
2764 break;
2765
2766 case IP_VS_SO_GET_DESTS:
2767 {
2768 struct ip_vs_get_dests *get;
2769 int size;
2770
2771 get = (struct ip_vs_get_dests *)arg;
2772 size = sizeof(*get) +
2773 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2774 if (*len != size) {
1e3e238e 2775 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2776 ret = -EINVAL;
2777 goto out;
2778 }
fc723250 2779 ret = __ip_vs_get_dest_entries(net, get, user);
1da177e4
LT
2780 }
2781 break;
2782
2783 case IP_VS_SO_GET_TIMEOUT:
2784 {
2785 struct ip_vs_timeout_user t;
2786
9330419d 2787 __ip_vs_get_timeouts(net, &t);
1da177e4
LT
2788 if (copy_to_user(user, &t, sizeof(t)) != 0)
2789 ret = -EFAULT;
2790 }
2791 break;
2792
1da177e4
LT
2793 default:
2794 ret = -EINVAL;
2795 }
2796
552ad65a 2797out:
14cc3e2b 2798 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2799 return ret;
2800}
2801
2802
2803static struct nf_sockopt_ops ip_vs_sockopts = {
2804 .pf = PF_INET,
2805 .set_optmin = IP_VS_BASE_CTL,
2806 .set_optmax = IP_VS_SO_SET_MAX+1,
2807 .set = do_ip_vs_set_ctl,
2808 .get_optmin = IP_VS_BASE_CTL,
2809 .get_optmax = IP_VS_SO_GET_MAX+1,
2810 .get = do_ip_vs_get_ctl,
16fcec35 2811 .owner = THIS_MODULE,
1da177e4
LT
2812};
2813
9a812198
JV
2814/*
2815 * Generic Netlink interface
2816 */
2817
2818/* IPVS genetlink family */
2819static struct genl_family ip_vs_genl_family = {
2820 .id = GENL_ID_GENERATE,
2821 .hdrsize = 0,
2822 .name = IPVS_GENL_NAME,
2823 .version = IPVS_GENL_VERSION,
2824 .maxattr = IPVS_CMD_MAX,
c6d2d445 2825 .netnsok = true, /* Make ipvsadm to work on netns */
9a812198
JV
2826};
2827
2828/* Policy used for first-level command attributes */
2829static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2830 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2831 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2832 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2833 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2834 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2835 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2836};
2837
2838/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2839static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2840 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2841 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2842 .len = IP_VS_IFNAME_MAXLEN },
2843 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2844};
2845
2846/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2847static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2848 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2849 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2850 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2851 .len = sizeof(union nf_inet_addr) },
2852 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2853 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2854 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2855 .len = IP_VS_SCHEDNAME_MAXLEN },
0d1e71b0
SH
2856 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2857 .len = IP_VS_PENAME_MAXLEN },
9a812198
JV
2858 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2859 .len = sizeof(struct ip_vs_flags) },
2860 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2861 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2862 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2863};
2864
2865/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2866static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2867 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2868 .len = sizeof(union nf_inet_addr) },
2869 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2870 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2871 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2872 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2873 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2874 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2875 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2876 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2877 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2878};
2879
2880static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2881 struct ip_vs_stats *stats)
2882{
55a3d4e1 2883 struct ip_vs_stats_user ustats;
9a812198
JV
2884 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2885 if (!nl_stats)
2886 return -EMSGSIZE;
2887
55a3d4e1 2888 ip_vs_copy_stats(&ustats, stats);
9a812198 2889
969e8e25
DM
2890 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2891 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2892 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2893 nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2894 nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2895 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2896 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2897 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2898 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2899 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2900 goto nla_put_failure;
9a812198
JV
2901 nla_nest_end(skb, nl_stats);
2902
2903 return 0;
2904
2905nla_put_failure:
9a812198
JV
2906 nla_nest_cancel(skb, nl_stats);
2907 return -EMSGSIZE;
2908}
2909
2910static int ip_vs_genl_fill_service(struct sk_buff *skb,
2911 struct ip_vs_service *svc)
2912{
2913 struct nlattr *nl_service;
2914 struct ip_vs_flags flags = { .flags = svc->flags,
2915 .mask = ~0 };
2916
2917 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2918 if (!nl_service)
2919 return -EMSGSIZE;
2920
969e8e25
DM
2921 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2922 goto nla_put_failure;
9a812198 2923 if (svc->fwmark) {
969e8e25
DM
2924 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2925 goto nla_put_failure;
9a812198 2926 } else {
969e8e25
DM
2927 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2928 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2929 nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2930 goto nla_put_failure;
9a812198
JV
2931 }
2932
969e8e25
DM
2933 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
2934 (svc->pe &&
2935 nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2936 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2937 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2938 nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2939 goto nla_put_failure;
9a812198
JV
2940 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2941 goto nla_put_failure;
2942
2943 nla_nest_end(skb, nl_service);
2944
2945 return 0;
2946
2947nla_put_failure:
2948 nla_nest_cancel(skb, nl_service);
2949 return -EMSGSIZE;
2950}
2951
2952static int ip_vs_genl_dump_service(struct sk_buff *skb,
2953 struct ip_vs_service *svc,
2954 struct netlink_callback *cb)
2955{
2956 void *hdr;
2957
15e47304 2958 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
9a812198
JV
2959 &ip_vs_genl_family, NLM_F_MULTI,
2960 IPVS_CMD_NEW_SERVICE);
2961 if (!hdr)
2962 return -EMSGSIZE;
2963
2964 if (ip_vs_genl_fill_service(skb, svc) < 0)
2965 goto nla_put_failure;
2966
2967 return genlmsg_end(skb, hdr);
2968
2969nla_put_failure:
2970 genlmsg_cancel(skb, hdr);
2971 return -EMSGSIZE;
2972}
2973
2974static int ip_vs_genl_dump_services(struct sk_buff *skb,
2975 struct netlink_callback *cb)
2976{
2977 int idx = 0, i;
2978 int start = cb->args[0];
2979 struct ip_vs_service *svc;
fc723250 2980 struct net *net = skb_sknet(skb);
9a812198
JV
2981
2982 mutex_lock(&__ip_vs_mutex);
2983 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2984 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
fc723250 2985 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2986 continue;
2987 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2988 idx--;
2989 goto nla_put_failure;
2990 }
2991 }
2992 }
2993
2994 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2995 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
fc723250 2996 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2997 continue;
2998 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2999 idx--;
3000 goto nla_put_failure;
3001 }
3002 }
3003 }
3004
3005nla_put_failure:
3006 mutex_unlock(&__ip_vs_mutex);
3007 cb->args[0] = idx;
3008
3009 return skb->len;
3010}
3011
fc723250
HS
3012static int ip_vs_genl_parse_service(struct net *net,
3013 struct ip_vs_service_user_kern *usvc,
26c15cfd
JA
3014 struct nlattr *nla, int full_entry,
3015 struct ip_vs_service **ret_svc)
9a812198
JV
3016{
3017 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3018 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
26c15cfd 3019 struct ip_vs_service *svc;
9a812198
JV
3020
3021 /* Parse mandatory identifying service fields first */
3022 if (nla == NULL ||
3023 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3024 return -EINVAL;
3025
3026 nla_af = attrs[IPVS_SVC_ATTR_AF];
3027 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
3028 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
3029 nla_port = attrs[IPVS_SVC_ATTR_PORT];
3030 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
3031
3032 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3033 return -EINVAL;
3034
258c8893
SH
3035 memset(usvc, 0, sizeof(*usvc));
3036
c860c6b1 3037 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
3038#ifdef CONFIG_IP_VS_IPV6
3039 if (usvc->af != AF_INET && usvc->af != AF_INET6)
3040#else
3041 if (usvc->af != AF_INET)
3042#endif
9a812198
JV
3043 return -EAFNOSUPPORT;
3044
3045 if (nla_fwmark) {
3046 usvc->protocol = IPPROTO_TCP;
3047 usvc->fwmark = nla_get_u32(nla_fwmark);
3048 } else {
3049 usvc->protocol = nla_get_u16(nla_protocol);
3050 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3051 usvc->port = nla_get_u16(nla_port);
3052 usvc->fwmark = 0;
3053 }
3054
26c15cfd 3055 if (usvc->fwmark)
fc723250 3056 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
26c15cfd 3057 else
fc723250 3058 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
26c15cfd
JA
3059 &usvc->addr, usvc->port);
3060 *ret_svc = svc;
3061
9a812198
JV
3062 /* If a full entry was requested, check for the additional fields */
3063 if (full_entry) {
0d1e71b0 3064 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
9a812198
JV
3065 *nla_netmask;
3066 struct ip_vs_flags flags;
9a812198
JV
3067
3068 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
0d1e71b0 3069 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
9a812198
JV
3070 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3071 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3072 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3073
3074 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3075 return -EINVAL;
3076
3077 nla_memcpy(&flags, nla_flags, sizeof(flags));
3078
3079 /* prefill flags from service if it already exists */
26c15cfd 3080 if (svc)
9a812198 3081 usvc->flags = svc->flags;
9a812198
JV
3082
3083 /* set new flags from userland */
3084 usvc->flags = (usvc->flags & ~flags.mask) |
3085 (flags.flags & flags.mask);
c860c6b1 3086 usvc->sched_name = nla_data(nla_sched);
0d1e71b0 3087 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
9a812198
JV
3088 usvc->timeout = nla_get_u32(nla_timeout);
3089 usvc->netmask = nla_get_u32(nla_netmask);
3090 }
3091
3092 return 0;
3093}
3094
fc723250
HS
3095static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3096 struct nlattr *nla)
9a812198 3097{
c860c6b1 3098 struct ip_vs_service_user_kern usvc;
26c15cfd 3099 struct ip_vs_service *svc;
9a812198
JV
3100 int ret;
3101
fc723250 3102 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
26c15cfd 3103 return ret ? ERR_PTR(ret) : svc;
9a812198
JV
3104}
3105
3106static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3107{
3108 struct nlattr *nl_dest;
3109
3110 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3111 if (!nl_dest)
3112 return -EMSGSIZE;
3113
969e8e25
DM
3114 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3115 nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3116 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3117 (atomic_read(&dest->conn_flags) &
3118 IP_VS_CONN_F_FWD_MASK)) ||
3119 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3120 atomic_read(&dest->weight)) ||
3121 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3122 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3123 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3124 atomic_read(&dest->activeconns)) ||
3125 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3126 atomic_read(&dest->inactconns)) ||
3127 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3128 atomic_read(&dest->persistconns)))
3129 goto nla_put_failure;
9a812198
JV
3130 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3131 goto nla_put_failure;
3132
3133 nla_nest_end(skb, nl_dest);
3134
3135 return 0;
3136
3137nla_put_failure:
3138 nla_nest_cancel(skb, nl_dest);
3139 return -EMSGSIZE;
3140}
3141
3142static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3143 struct netlink_callback *cb)
3144{
3145 void *hdr;
3146
15e47304 3147 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
9a812198
JV
3148 &ip_vs_genl_family, NLM_F_MULTI,
3149 IPVS_CMD_NEW_DEST);
3150 if (!hdr)
3151 return -EMSGSIZE;
3152
3153 if (ip_vs_genl_fill_dest(skb, dest) < 0)
3154 goto nla_put_failure;
3155
3156 return genlmsg_end(skb, hdr);
3157
3158nla_put_failure:
3159 genlmsg_cancel(skb, hdr);
3160 return -EMSGSIZE;
3161}
3162
3163static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3164 struct netlink_callback *cb)
3165{
3166 int idx = 0;
3167 int start = cb->args[0];
3168 struct ip_vs_service *svc;
3169 struct ip_vs_dest *dest;
3170 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
a0840e2e 3171 struct net *net = skb_sknet(skb);
9a812198
JV
3172
3173 mutex_lock(&__ip_vs_mutex);
3174
3175 /* Try to find the service for which to dump destinations */
3176 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3177 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3178 goto out_err;
3179
a0840e2e 3180
fc723250 3181 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3182 if (IS_ERR(svc) || svc == NULL)
3183 goto out_err;
3184
3185 /* Dump the destinations */
3186 list_for_each_entry(dest, &svc->destinations, n_list) {
3187 if (++idx <= start)
3188 continue;
3189 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3190 idx--;
3191 goto nla_put_failure;
3192 }
3193 }
3194
3195nla_put_failure:
3196 cb->args[0] = idx;
9a812198
JV
3197
3198out_err:
3199 mutex_unlock(&__ip_vs_mutex);
3200
3201 return skb->len;
3202}
3203
c860c6b1 3204static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
3205 struct nlattr *nla, int full_entry)
3206{
3207 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3208 struct nlattr *nla_addr, *nla_port;
3209
3210 /* Parse mandatory identifying destination fields first */
3211 if (nla == NULL ||
3212 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3213 return -EINVAL;
3214
3215 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3216 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3217
3218 if (!(nla_addr && nla_port))
3219 return -EINVAL;
3220
258c8893
SH
3221 memset(udest, 0, sizeof(*udest));
3222
9a812198
JV
3223 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3224 udest->port = nla_get_u16(nla_port);
3225
3226 /* If a full entry was requested, check for the additional fields */
3227 if (full_entry) {
3228 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3229 *nla_l_thresh;
3230
3231 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3232 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3233 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3234 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3235
3236 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3237 return -EINVAL;
3238
3239 udest->conn_flags = nla_get_u32(nla_fwd)
3240 & IP_VS_CONN_F_FWD_MASK;
3241 udest->weight = nla_get_u32(nla_weight);
3242 udest->u_threshold = nla_get_u32(nla_u_thresh);
3243 udest->l_threshold = nla_get_u32(nla_l_thresh);
3244 }
3245
3246 return 0;
3247}
3248
3249static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3250 const char *mcast_ifn, __be32 syncid)
3251{
3252 struct nlattr *nl_daemon;
3253
3254 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3255 if (!nl_daemon)
3256 return -EMSGSIZE;
3257
969e8e25
DM
3258 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3259 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3260 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3261 goto nla_put_failure;
9a812198
JV
3262 nla_nest_end(skb, nl_daemon);
3263
3264 return 0;
3265
3266nla_put_failure:
3267 nla_nest_cancel(skb, nl_daemon);
3268 return -EMSGSIZE;
3269}
3270
3271static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3272 const char *mcast_ifn, __be32 syncid,
3273 struct netlink_callback *cb)
3274{
3275 void *hdr;
15e47304 3276 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
9a812198
JV
3277 &ip_vs_genl_family, NLM_F_MULTI,
3278 IPVS_CMD_NEW_DAEMON);
3279 if (!hdr)
3280 return -EMSGSIZE;
3281
3282 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3283 goto nla_put_failure;
3284
3285 return genlmsg_end(skb, hdr);
3286
3287nla_put_failure:
3288 genlmsg_cancel(skb, hdr);
3289 return -EMSGSIZE;
3290}
3291
3292static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3293 struct netlink_callback *cb)
3294{
a09d1977 3295 struct net *net = skb_sknet(skb);
f131315f
HS
3296 struct netns_ipvs *ipvs = net_ipvs(net);
3297
ae1d48b2 3298 mutex_lock(&ipvs->sync_mutex);
f131315f 3299 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
9a812198 3300 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
f131315f
HS
3301 ipvs->master_mcast_ifn,
3302 ipvs->master_syncid, cb) < 0)
9a812198
JV
3303 goto nla_put_failure;
3304
3305 cb->args[0] = 1;
3306 }
3307
f131315f 3308 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
9a812198 3309 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
f131315f
HS
3310 ipvs->backup_mcast_ifn,
3311 ipvs->backup_syncid, cb) < 0)
9a812198
JV
3312 goto nla_put_failure;
3313
3314 cb->args[1] = 1;
3315 }
3316
3317nla_put_failure:
ae1d48b2 3318 mutex_unlock(&ipvs->sync_mutex);
9a812198
JV
3319
3320 return skb->len;
3321}
3322
f131315f 3323static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3324{
3325 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3326 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3327 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3328 return -EINVAL;
3329
f131315f
HS
3330 return start_sync_thread(net,
3331 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
9a812198
JV
3332 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3333 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3334}
3335
f131315f 3336static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3337{
3338 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3339 return -EINVAL;
3340
f131315f
HS
3341 return stop_sync_thread(net,
3342 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
9a812198
JV
3343}
3344
9330419d 3345static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
9a812198
JV
3346{
3347 struct ip_vs_timeout_user t;
3348
9330419d 3349 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3350
3351 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3352 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3353
3354 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3355 t.tcp_fin_timeout =
3356 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3357
3358 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3359 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3360
9330419d 3361 return ip_vs_set_timeout(net, &t);
9a812198
JV
3362}
3363
ae1d48b2 3364static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
9a812198 3365{
9a812198 3366 int ret = 0, cmd;
fc723250 3367 struct net *net;
a0840e2e 3368 struct netns_ipvs *ipvs;
9a812198 3369
fc723250 3370 net = skb_sknet(skb);
a0840e2e 3371 ipvs = net_ipvs(net);
9a812198
JV
3372 cmd = info->genlhdr->cmd;
3373
ae1d48b2 3374 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
9a812198
JV
3375 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3376
ae1d48b2 3377 mutex_lock(&ipvs->sync_mutex);
9a812198
JV
3378 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3379 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3380 info->attrs[IPVS_CMD_ATTR_DAEMON],
3381 ip_vs_daemon_policy)) {
3382 ret = -EINVAL;
3383 goto out;
3384 }
3385
3386 if (cmd == IPVS_CMD_NEW_DAEMON)
f131315f 3387 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
9a812198 3388 else
f131315f 3389 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
ae1d48b2
HS
3390out:
3391 mutex_unlock(&ipvs->sync_mutex);
3392 }
3393 return ret;
3394}
3395
3396static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3397{
3398 struct ip_vs_service *svc = NULL;
3399 struct ip_vs_service_user_kern usvc;
3400 struct ip_vs_dest_user_kern udest;
3401 int ret = 0, cmd;
3402 int need_full_svc = 0, need_full_dest = 0;
3403 struct net *net;
ae1d48b2
HS
3404
3405 net = skb_sknet(skb);
ae1d48b2
HS
3406 cmd = info->genlhdr->cmd;
3407
3408 mutex_lock(&__ip_vs_mutex);
3409
3410 if (cmd == IPVS_CMD_FLUSH) {
3411 ret = ip_vs_flush(net);
3412 goto out;
3413 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3414 ret = ip_vs_genl_set_config(net, info->attrs);
9a812198
JV
3415 goto out;
3416 } else if (cmd == IPVS_CMD_ZERO &&
3417 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
fc723250 3418 ret = ip_vs_zero_all(net);
9a812198
JV
3419 goto out;
3420 }
3421
3422 /* All following commands require a service argument, so check if we
3423 * received a valid one. We need a full service specification when
3424 * adding / editing a service. Only identifying members otherwise. */
3425 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3426 need_full_svc = 1;
3427
fc723250 3428 ret = ip_vs_genl_parse_service(net, &usvc,
9a812198 3429 info->attrs[IPVS_CMD_ATTR_SERVICE],
26c15cfd 3430 need_full_svc, &svc);
9a812198
JV
3431 if (ret)
3432 goto out;
3433
9a812198
JV
3434 /* Unless we're adding a new service, the service must already exist */
3435 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3436 ret = -ESRCH;
3437 goto out;
3438 }
3439
3440 /* Destination commands require a valid destination argument. For
3441 * adding / editing a destination, we need a full destination
3442 * specification. */
3443 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3444 cmd == IPVS_CMD_DEL_DEST) {
3445 if (cmd != IPVS_CMD_DEL_DEST)
3446 need_full_dest = 1;
3447
3448 ret = ip_vs_genl_parse_dest(&udest,
3449 info->attrs[IPVS_CMD_ATTR_DEST],
3450 need_full_dest);
3451 if (ret)
3452 goto out;
3453 }
3454
3455 switch (cmd) {
3456 case IPVS_CMD_NEW_SERVICE:
3457 if (svc == NULL)
fc723250 3458 ret = ip_vs_add_service(net, &usvc, &svc);
9a812198
JV
3459 else
3460 ret = -EEXIST;
3461 break;
3462 case IPVS_CMD_SET_SERVICE:
3463 ret = ip_vs_edit_service(svc, &usvc);
3464 break;
3465 case IPVS_CMD_DEL_SERVICE:
3466 ret = ip_vs_del_service(svc);
26c15cfd 3467 /* do not use svc, it can be freed */
9a812198
JV
3468 break;
3469 case IPVS_CMD_NEW_DEST:
3470 ret = ip_vs_add_dest(svc, &udest);
3471 break;
3472 case IPVS_CMD_SET_DEST:
3473 ret = ip_vs_edit_dest(svc, &udest);
3474 break;
3475 case IPVS_CMD_DEL_DEST:
3476 ret = ip_vs_del_dest(svc, &udest);
3477 break;
3478 case IPVS_CMD_ZERO:
3479 ret = ip_vs_zero_service(svc);
3480 break;
3481 default:
3482 ret = -EINVAL;
3483 }
3484
3485out:
9a812198
JV
3486 mutex_unlock(&__ip_vs_mutex);
3487
3488 return ret;
3489}
3490
3491static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3492{
3493 struct sk_buff *msg;
3494 void *reply;
3495 int ret, cmd, reply_cmd;
fc723250 3496 struct net *net;
9a812198 3497
fc723250 3498 net = skb_sknet(skb);
9a812198
JV
3499 cmd = info->genlhdr->cmd;
3500
3501 if (cmd == IPVS_CMD_GET_SERVICE)
3502 reply_cmd = IPVS_CMD_NEW_SERVICE;
3503 else if (cmd == IPVS_CMD_GET_INFO)
3504 reply_cmd = IPVS_CMD_SET_INFO;
3505 else if (cmd == IPVS_CMD_GET_CONFIG)
3506 reply_cmd = IPVS_CMD_SET_CONFIG;
3507 else {
1e3e238e 3508 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3509 return -EINVAL;
3510 }
3511
3512 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3513 if (!msg)
3514 return -ENOMEM;
3515
3516 mutex_lock(&__ip_vs_mutex);
3517
3518 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3519 if (reply == NULL)
3520 goto nla_put_failure;
3521
3522 switch (cmd) {
3523 case IPVS_CMD_GET_SERVICE:
3524 {
3525 struct ip_vs_service *svc;
3526
fc723250
HS
3527 svc = ip_vs_genl_find_service(net,
3528 info->attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3529 if (IS_ERR(svc)) {
3530 ret = PTR_ERR(svc);
3531 goto out_err;
3532 } else if (svc) {
3533 ret = ip_vs_genl_fill_service(msg, svc);
9a812198
JV
3534 if (ret)
3535 goto nla_put_failure;
3536 } else {
3537 ret = -ESRCH;
3538 goto out_err;
3539 }
3540
3541 break;
3542 }
3543
3544 case IPVS_CMD_GET_CONFIG:
3545 {
3546 struct ip_vs_timeout_user t;
3547
9330419d 3548 __ip_vs_get_timeouts(net, &t);
9a812198 3549#ifdef CONFIG_IP_VS_PROTO_TCP
969e8e25
DM
3550 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3551 t.tcp_timeout) ||
3552 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3553 t.tcp_fin_timeout))
3554 goto nla_put_failure;
9a812198
JV
3555#endif
3556#ifdef CONFIG_IP_VS_PROTO_UDP
969e8e25
DM
3557 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3558 goto nla_put_failure;
9a812198
JV
3559#endif
3560
3561 break;
3562 }
3563
3564 case IPVS_CMD_GET_INFO:
969e8e25
DM
3565 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3566 IP_VS_VERSION_CODE) ||
3567 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3568 ip_vs_conn_tab_size))
3569 goto nla_put_failure;
9a812198
JV
3570 break;
3571 }
3572
3573 genlmsg_end(msg, reply);
134e6375 3574 ret = genlmsg_reply(msg, info);
9a812198
JV
3575 goto out;
3576
3577nla_put_failure:
1e3e238e 3578 pr_err("not enough space in Netlink message\n");
9a812198
JV
3579 ret = -EMSGSIZE;
3580
3581out_err:
3582 nlmsg_free(msg);
3583out:
3584 mutex_unlock(&__ip_vs_mutex);
3585
3586 return ret;
3587}
3588
3589
3590static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3591 {
3592 .cmd = IPVS_CMD_NEW_SERVICE,
3593 .flags = GENL_ADMIN_PERM,
3594 .policy = ip_vs_cmd_policy,
3595 .doit = ip_vs_genl_set_cmd,
3596 },
3597 {
3598 .cmd = IPVS_CMD_SET_SERVICE,
3599 .flags = GENL_ADMIN_PERM,
3600 .policy = ip_vs_cmd_policy,
3601 .doit = ip_vs_genl_set_cmd,
3602 },
3603 {
3604 .cmd = IPVS_CMD_DEL_SERVICE,
3605 .flags = GENL_ADMIN_PERM,
3606 .policy = ip_vs_cmd_policy,
3607 .doit = ip_vs_genl_set_cmd,
3608 },
3609 {
3610 .cmd = IPVS_CMD_GET_SERVICE,
3611 .flags = GENL_ADMIN_PERM,
3612 .doit = ip_vs_genl_get_cmd,
3613 .dumpit = ip_vs_genl_dump_services,
3614 .policy = ip_vs_cmd_policy,
3615 },
3616 {
3617 .cmd = IPVS_CMD_NEW_DEST,
3618 .flags = GENL_ADMIN_PERM,
3619 .policy = ip_vs_cmd_policy,
3620 .doit = ip_vs_genl_set_cmd,
3621 },
3622 {
3623 .cmd = IPVS_CMD_SET_DEST,
3624 .flags = GENL_ADMIN_PERM,
3625 .policy = ip_vs_cmd_policy,
3626 .doit = ip_vs_genl_set_cmd,
3627 },
3628 {
3629 .cmd = IPVS_CMD_DEL_DEST,
3630 .flags = GENL_ADMIN_PERM,
3631 .policy = ip_vs_cmd_policy,
3632 .doit = ip_vs_genl_set_cmd,
3633 },
3634 {
3635 .cmd = IPVS_CMD_GET_DEST,
3636 .flags = GENL_ADMIN_PERM,
3637 .policy = ip_vs_cmd_policy,
3638 .dumpit = ip_vs_genl_dump_dests,
3639 },
3640 {
3641 .cmd = IPVS_CMD_NEW_DAEMON,
3642 .flags = GENL_ADMIN_PERM,
3643 .policy = ip_vs_cmd_policy,
ae1d48b2 3644 .doit = ip_vs_genl_set_daemon,
9a812198
JV
3645 },
3646 {
3647 .cmd = IPVS_CMD_DEL_DAEMON,
3648 .flags = GENL_ADMIN_PERM,
3649 .policy = ip_vs_cmd_policy,
ae1d48b2 3650 .doit = ip_vs_genl_set_daemon,
9a812198
JV
3651 },
3652 {
3653 .cmd = IPVS_CMD_GET_DAEMON,
3654 .flags = GENL_ADMIN_PERM,
3655 .dumpit = ip_vs_genl_dump_daemons,
3656 },
3657 {
3658 .cmd = IPVS_CMD_SET_CONFIG,
3659 .flags = GENL_ADMIN_PERM,
3660 .policy = ip_vs_cmd_policy,
3661 .doit = ip_vs_genl_set_cmd,
3662 },
3663 {
3664 .cmd = IPVS_CMD_GET_CONFIG,
3665 .flags = GENL_ADMIN_PERM,
3666 .doit = ip_vs_genl_get_cmd,
3667 },
3668 {
3669 .cmd = IPVS_CMD_GET_INFO,
3670 .flags = GENL_ADMIN_PERM,
3671 .doit = ip_vs_genl_get_cmd,
3672 },
3673 {
3674 .cmd = IPVS_CMD_ZERO,
3675 .flags = GENL_ADMIN_PERM,
3676 .policy = ip_vs_cmd_policy,
3677 .doit = ip_vs_genl_set_cmd,
3678 },
3679 {
3680 .cmd = IPVS_CMD_FLUSH,
3681 .flags = GENL_ADMIN_PERM,
3682 .doit = ip_vs_genl_set_cmd,
3683 },
3684};
3685
3686static int __init ip_vs_genl_register(void)
3687{
8f698d54
MM
3688 return genl_register_family_with_ops(&ip_vs_genl_family,
3689 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3690}
3691
3692static void ip_vs_genl_unregister(void)
3693{
3694 genl_unregister_family(&ip_vs_genl_family);
3695}
3696
3697/* End of Generic Netlink interface definitions */
3698
61b1ab45
HS
3699/*
3700 * per netns intit/exit func.
3701 */
14e40546 3702#ifdef CONFIG_SYSCTL
2b2d2808 3703static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
61b1ab45 3704{
fc723250
HS
3705 int idx;
3706 struct netns_ipvs *ipvs = net_ipvs(net);
a0840e2e 3707 struct ctl_table *tbl;
fc723250 3708
a0840e2e
HS
3709 atomic_set(&ipvs->dropentry, 0);
3710 spin_lock_init(&ipvs->dropentry_lock);
3711 spin_lock_init(&ipvs->droppacket_lock);
3712 spin_lock_init(&ipvs->securetcp_lock);
a0840e2e
HS
3713
3714 if (!net_eq(net, &init_net)) {
3715 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3716 if (tbl == NULL)
14e40546 3717 return -ENOMEM;
464dc801
EB
3718
3719 /* Don't export sysctls to unprivileged users */
3720 if (net->user_ns != &init_user_ns)
3721 tbl[0].procname = NULL;
a0840e2e
HS
3722 } else
3723 tbl = vs_vars;
3724 /* Initialize sysctl defaults */
3725 idx = 0;
3726 ipvs->sysctl_amemthresh = 1024;
3727 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3728 ipvs->sysctl_am_droprate = 10;
3729 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3730 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3731 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3732#ifdef CONFIG_IP_VS_NFCT
3733 tbl[idx++].data = &ipvs->sysctl_conntrack;
3734#endif
3735 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3736 ipvs->sysctl_snat_reroute = 1;
3737 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3738 ipvs->sysctl_sync_ver = 1;
3739 tbl[idx++].data = &ipvs->sysctl_sync_ver;
f73181c8
PNA
3740 ipvs->sysctl_sync_ports = 1;
3741 tbl[idx++].data = &ipvs->sysctl_sync_ports;
1c003b15
PNA
3742 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3743 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3744 ipvs->sysctl_sync_sock_size = 0;
3745 tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
a0840e2e
HS
3746 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3747 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3748 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
59e0350e
SH
3749 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3750 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
a0840e2e
HS
3751 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3752 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
749c42b6
JA
3753 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3754 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3755 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3756 tbl[idx++].data = &ipvs->sysctl_sync_retries;
a0840e2e 3757 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3654e611
JA
3758 ipvs->sysctl_pmtu_disc = 1;
3759 tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
0c12582f 3760 tbl[idx++].data = &ipvs->sysctl_backup_only;
a0840e2e
HS
3761
3762
ec8f23ce 3763 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
0443929f
SH
3764 if (ipvs->sysctl_hdr == NULL) {
3765 if (!net_eq(net, &init_net))
3766 kfree(tbl);
14e40546 3767 return -ENOMEM;
0443929f 3768 }
6ef757f9 3769 ip_vs_start_estimator(net, &ipvs->tot_stats);
a0840e2e 3770 ipvs->sysctl_tbl = tbl;
f6340ee0
HS
3771 /* Schedule defense work */
3772 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3773 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
61b1ab45 3774
61b1ab45 3775 return 0;
61b1ab45
HS
3776}
3777
2b2d2808 3778static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
61b1ab45 3779{
b17fc996
HS
3780 struct netns_ipvs *ipvs = net_ipvs(net);
3781
f2431e6e
HS
3782 cancel_delayed_work_sync(&ipvs->defense_work);
3783 cancel_work_sync(&ipvs->defense_work.work);
a0840e2e 3784 unregister_net_sysctl_table(ipvs->sysctl_hdr);
14e40546
SH
3785}
3786
3787#else
3788
2b2d2808
CG
3789static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3790static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
14e40546 3791
0443929f 3792#endif
14e40546 3793
7a4f0761
HS
3794static struct notifier_block ip_vs_dst_notifier = {
3795 .notifier_call = ip_vs_dst_event,
3796};
3797
503cf15a 3798int __net_init ip_vs_control_net_init(struct net *net)
14e40546
SH
3799{
3800 int idx;
3801 struct netns_ipvs *ipvs = net_ipvs(net);
3802
3458e21c 3803 rwlock_init(&ipvs->rs_lock);
14e40546
SH
3804
3805 /* Initialize rs_table */
3806 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3807 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3808
3809 INIT_LIST_HEAD(&ipvs->dest_trash);
3810 atomic_set(&ipvs->ftpsvc_counter, 0);
3811 atomic_set(&ipvs->nullsvc_counter, 0);
3812
3813 /* procfs stats */
3814 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
0a9ee813 3815 if (!ipvs->tot_stats.cpustats)
14e40546 3816 return -ENOMEM;
0a9ee813 3817
14e40546
SH
3818 spin_lock_init(&ipvs->tot_stats.lock);
3819
d4beaa66
G
3820 proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops);
3821 proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops);
3822 proc_create("ip_vs_stats_percpu", 0, net->proc_net,
3823 &ip_vs_stats_percpu_fops);
14e40546 3824
503cf15a 3825 if (ip_vs_control_net_init_sysctl(net))
14e40546
SH
3826 goto err;
3827
3828 return 0;
3829
3830err:
2a0751af 3831 free_percpu(ipvs->tot_stats.cpustats);
61b1ab45
HS
3832 return -ENOMEM;
3833}
3834
503cf15a 3835void __net_exit ip_vs_control_net_cleanup(struct net *net)
61b1ab45 3836{
b17fc996
HS
3837 struct netns_ipvs *ipvs = net_ipvs(net);
3838
f2431e6e 3839 ip_vs_trash_cleanup(net);
6ef757f9 3840 ip_vs_stop_estimator(net, &ipvs->tot_stats);
503cf15a 3841 ip_vs_control_net_cleanup_sysctl(net);
ece31ffd
G
3842 remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
3843 remove_proc_entry("ip_vs_stats", net->proc_net);
3844 remove_proc_entry("ip_vs", net->proc_net);
2a0751af 3845 free_percpu(ipvs->tot_stats.cpustats);
61b1ab45
HS
3846}
3847
8537de8a 3848int __init ip_vs_register_nl_ioctl(void)
1da177e4 3849{
fc723250 3850 int ret;
1da177e4 3851
1da177e4
LT
3852 ret = nf_register_sockopt(&ip_vs_sockopts);
3853 if (ret) {
1e3e238e 3854 pr_err("cannot register sockopt.\n");
7a4f0761 3855 goto err_sock;
1da177e4
LT
3856 }
3857
9a812198
JV
3858 ret = ip_vs_genl_register();
3859 if (ret) {
1e3e238e 3860 pr_err("cannot register Generic Netlink interface.\n");
7a4f0761 3861 goto err_genl;
9a812198 3862 }
1da177e4 3863 return 0;
fc723250 3864
7a4f0761
HS
3865err_genl:
3866 nf_unregister_sockopt(&ip_vs_sockopts);
3867err_sock:
fc723250 3868 return ret;
1da177e4
LT
3869}
3870
8537de8a
HS
3871void ip_vs_unregister_nl_ioctl(void)
3872{
3873 ip_vs_genl_unregister();
3874 nf_unregister_sockopt(&ip_vs_sockopts);
3875}
3876
3877int __init ip_vs_control_init(void)
3878{
3879 int idx;
3880 int ret;
3881
3882 EnterFunction(2);
3883
3884 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3885 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3886 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3887 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3888 }
3889
3890 smp_wmb(); /* Do we really need it now ? */
3891
3892 ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3893 if (ret < 0)
3894 return ret;
3895
3896 LeaveFunction(2);
3897 return 0;
3898}
3899
1da177e4
LT
3900
3901void ip_vs_control_cleanup(void)
3902{
3903 EnterFunction(2);
7676e345 3904 unregister_netdevice_notifier(&ip_vs_dst_notifier);
1da177e4
LT
3905 LeaveFunction(2);
3906}
This page took 1.290967 seconds and 5 git commands to generate.