ipvs: move struct netns_ipvs
[deliverable/linux.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
9330419d 41#include <linux/nsproxy.h>
1da177e4 42#include <net/ip.h>
09571c7a
VB
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
14c85021 47#include <net/route.h>
1da177e4 48#include <net/sock.h>
9a812198 49#include <net/genetlink.h>
1da177e4
LT
50
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 56static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
1da177e4 61/* sysctl variables */
1da177e4
LT
62
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
09571c7a
VB
72#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
4a98480b
HS
74static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
09571c7a
VB
76{
77 struct rt6_info *rt;
78 struct flowi fl = {
79 .oif = 0,
5811662b
CG
80 .fl6_dst = *addr,
81 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
09571c7a
VB
82 };
83
4a98480b 84 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
09571c7a
VB
85 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
86 return 1;
87
88 return 0;
89}
90#endif
1da177e4 91/*
af9debd4
JA
92 * update_defense_level is called from keventd and from sysctl,
93 * so it needs to protect itself from softirqs
1da177e4 94 */
9330419d 95static void update_defense_level(struct netns_ipvs *ipvs)
1da177e4
LT
96{
97 struct sysinfo i;
98 static int old_secure_tcp = 0;
99 int availmem;
100 int nomem;
101 int to_change = -1;
102
103 /* we only count free and buffered memory (in pages) */
104 si_meminfo(&i);
105 availmem = i.freeram + i.bufferram;
106 /* however in linux 2.5 the i.bufferram is total page cache size,
107 we need adjust it */
108 /* si_swapinfo(&i); */
109 /* availmem = availmem - (i.totalswap - i.freeswap); */
110
a0840e2e 111 nomem = (availmem < ipvs->sysctl_amemthresh);
1da177e4 112
af9debd4
JA
113 local_bh_disable();
114
1da177e4 115 /* drop_entry */
a0840e2e
HS
116 spin_lock(&ipvs->dropentry_lock);
117 switch (ipvs->sysctl_drop_entry) {
1da177e4 118 case 0:
a0840e2e 119 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
120 break;
121 case 1:
122 if (nomem) {
a0840e2e
HS
123 atomic_set(&ipvs->dropentry, 1);
124 ipvs->sysctl_drop_entry = 2;
1da177e4 125 } else {
a0840e2e 126 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
127 }
128 break;
129 case 2:
130 if (nomem) {
a0840e2e 131 atomic_set(&ipvs->dropentry, 1);
1da177e4 132 } else {
a0840e2e
HS
133 atomic_set(&ipvs->dropentry, 0);
134 ipvs->sysctl_drop_entry = 1;
1da177e4
LT
135 };
136 break;
137 case 3:
a0840e2e 138 atomic_set(&ipvs->dropentry, 1);
1da177e4
LT
139 break;
140 }
a0840e2e 141 spin_unlock(&ipvs->dropentry_lock);
1da177e4
LT
142
143 /* drop_packet */
a0840e2e
HS
144 spin_lock(&ipvs->droppacket_lock);
145 switch (ipvs->sysctl_drop_packet) {
1da177e4 146 case 0:
a0840e2e 147 ipvs->drop_rate = 0;
1da177e4
LT
148 break;
149 case 1:
150 if (nomem) {
a0840e2e
HS
151 ipvs->drop_rate = ipvs->drop_counter
152 = ipvs->sysctl_amemthresh /
153 (ipvs->sysctl_amemthresh-availmem);
154 ipvs->sysctl_drop_packet = 2;
1da177e4 155 } else {
a0840e2e 156 ipvs->drop_rate = 0;
1da177e4
LT
157 }
158 break;
159 case 2:
160 if (nomem) {
a0840e2e
HS
161 ipvs->drop_rate = ipvs->drop_counter
162 = ipvs->sysctl_amemthresh /
163 (ipvs->sysctl_amemthresh-availmem);
1da177e4 164 } else {
a0840e2e
HS
165 ipvs->drop_rate = 0;
166 ipvs->sysctl_drop_packet = 1;
1da177e4
LT
167 }
168 break;
169 case 3:
a0840e2e 170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
1da177e4
LT
171 break;
172 }
a0840e2e 173 spin_unlock(&ipvs->droppacket_lock);
1da177e4
LT
174
175 /* secure_tcp */
a0840e2e
HS
176 spin_lock(&ipvs->securetcp_lock);
177 switch (ipvs->sysctl_secure_tcp) {
1da177e4
LT
178 case 0:
179 if (old_secure_tcp >= 2)
180 to_change = 0;
181 break;
182 case 1:
183 if (nomem) {
184 if (old_secure_tcp < 2)
185 to_change = 1;
a0840e2e 186 ipvs->sysctl_secure_tcp = 2;
1da177e4
LT
187 } else {
188 if (old_secure_tcp >= 2)
189 to_change = 0;
190 }
191 break;
192 case 2:
193 if (nomem) {
194 if (old_secure_tcp < 2)
195 to_change = 1;
196 } else {
197 if (old_secure_tcp >= 2)
198 to_change = 0;
a0840e2e 199 ipvs->sysctl_secure_tcp = 1;
1da177e4
LT
200 }
201 break;
202 case 3:
203 if (old_secure_tcp < 2)
204 to_change = 1;
205 break;
206 }
a0840e2e 207 old_secure_tcp = ipvs->sysctl_secure_tcp;
1da177e4 208 if (to_change >= 0)
9330419d 209 ip_vs_protocol_timeout_change(ipvs,
a0840e2e
HS
210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
af9debd4
JA
212
213 local_bh_enable();
1da177e4
LT
214}
215
216
217/*
218 * Timer for checking the defense
219 */
220#define DEFENSE_TIMER_PERIOD 1*HZ
1da177e4 221
c4028958 222static void defense_work_handler(struct work_struct *work)
1da177e4 223{
f6340ee0
HS
224 struct netns_ipvs *ipvs =
225 container_of(work, struct netns_ipvs, defense_work.work);
9330419d
HS
226
227 update_defense_level(ipvs);
a0840e2e 228 if (atomic_read(&ipvs->dropentry))
f6340ee0
HS
229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
1da177e4
LT
231}
232
233int
234ip_vs_use_count_inc(void)
235{
236 return try_module_get(THIS_MODULE);
237}
238
239void
240ip_vs_use_count_dec(void)
241{
242 module_put(THIS_MODULE);
243}
244
245
246/*
247 * Hash table: for virtual service lookups
248 */
249#define IP_VS_SVC_TAB_BITS 8
250#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
251#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
252
253/* the service table hashed by <protocol, addr, port> */
254static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
255/* the service table hashed by fwmark */
256static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
257
1da177e4
LT
258
259/*
260 * Returns hash value for virtual service
261 */
fc723250
HS
262static inline unsigned
263ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
264 const union nf_inet_addr *addr, __be16 port)
1da177e4
LT
265{
266 register unsigned porth = ntohs(port);
b18610de 267 __be32 addr_fold = addr->ip;
1da177e4 268
b18610de
JV
269#ifdef CONFIG_IP_VS_IPV6
270 if (af == AF_INET6)
271 addr_fold = addr->ip6[0]^addr->ip6[1]^
272 addr->ip6[2]^addr->ip6[3];
273#endif
fc723250 274 addr_fold ^= ((size_t)net>>8);
b18610de
JV
275
276 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
277 & IP_VS_SVC_TAB_MASK;
278}
279
280/*
281 * Returns hash value of fwmark for virtual service lookup
282 */
fc723250 283static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
1da177e4 284{
fc723250 285 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
1da177e4
LT
286}
287
288/*
fc723250 289 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
1da177e4
LT
290 * or in the ip_vs_svc_fwm_table by fwmark.
291 * Should be called with locked tables.
292 */
293static int ip_vs_svc_hash(struct ip_vs_service *svc)
294{
295 unsigned hash;
296
297 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
298 pr_err("%s(): request for already hashed, called from %pF\n",
299 __func__, __builtin_return_address(0));
1da177e4
LT
300 return 0;
301 }
302
303 if (svc->fwmark == 0) {
304 /*
fc723250 305 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
1da177e4 306 */
fc723250
HS
307 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
308 &svc->addr, svc->port);
1da177e4
LT
309 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
310 } else {
311 /*
fc723250 312 * Hash it by fwmark in svc_fwm_table
1da177e4 313 */
fc723250 314 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
1da177e4
LT
315 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
316 }
317
318 svc->flags |= IP_VS_SVC_F_HASHED;
319 /* increase its refcnt because it is referenced by the svc table */
320 atomic_inc(&svc->refcnt);
321 return 1;
322}
323
324
325/*
fc723250 326 * Unhashes a service from svc_table / svc_fwm_table.
1da177e4
LT
327 * Should be called with locked tables.
328 */
329static int ip_vs_svc_unhash(struct ip_vs_service *svc)
330{
331 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
332 pr_err("%s(): request for unhash flagged, called from %pF\n",
333 __func__, __builtin_return_address(0));
1da177e4
LT
334 return 0;
335 }
336
337 if (svc->fwmark == 0) {
fc723250 338 /* Remove it from the svc_table table */
1da177e4
LT
339 list_del(&svc->s_list);
340 } else {
fc723250 341 /* Remove it from the svc_fwm_table table */
1da177e4
LT
342 list_del(&svc->f_list);
343 }
344
345 svc->flags &= ~IP_VS_SVC_F_HASHED;
346 atomic_dec(&svc->refcnt);
347 return 1;
348}
349
350
351/*
fc723250 352 * Get service by {netns, proto,addr,port} in the service table.
1da177e4 353 */
b18610de 354static inline struct ip_vs_service *
fc723250
HS
355__ip_vs_service_find(struct net *net, int af, __u16 protocol,
356 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
357{
358 unsigned hash;
359 struct ip_vs_service *svc;
360
361 /* Check for "full" addressed entries */
fc723250 362 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
1da177e4
LT
363
364 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
365 if ((svc->af == af)
366 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4 367 && (svc->port == vport)
fc723250
HS
368 && (svc->protocol == protocol)
369 && net_eq(svc->net, net)) {
1da177e4 370 /* HIT */
1da177e4
LT
371 return svc;
372 }
373 }
374
375 return NULL;
376}
377
378
379/*
380 * Get service by {fwmark} in the service table.
381 */
b18610de 382static inline struct ip_vs_service *
fc723250 383__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
1da177e4
LT
384{
385 unsigned hash;
386 struct ip_vs_service *svc;
387
388 /* Check for fwmark addressed entries */
fc723250 389 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
1da177e4
LT
390
391 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
fc723250
HS
392 if (svc->fwmark == fwmark && svc->af == af
393 && net_eq(svc->net, net)) {
1da177e4 394 /* HIT */
1da177e4
LT
395 return svc;
396 }
397 }
398
399 return NULL;
400}
401
402struct ip_vs_service *
fc723250 403ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
3c2e0505 404 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
405{
406 struct ip_vs_service *svc;
763f8d0e 407 struct netns_ipvs *ipvs = net_ipvs(net);
3c2e0505 408
1da177e4
LT
409 read_lock(&__ip_vs_svc_lock);
410
411 /*
412 * Check the table hashed by fwmark first
413 */
097fc76a
JA
414 if (fwmark) {
415 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
416 if (svc)
417 goto out;
418 }
1da177e4
LT
419
420 /*
421 * Check the table hashed by <protocol,addr,port>
422 * for "full" addressed entries
423 */
fc723250 424 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
1da177e4
LT
425
426 if (svc == NULL
427 && protocol == IPPROTO_TCP
763f8d0e 428 && atomic_read(&ipvs->ftpsvc_counter)
1da177e4
LT
429 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
430 /*
431 * Check if ftp service entry exists, the packet
432 * might belong to FTP data connections.
433 */
fc723250 434 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
1da177e4
LT
435 }
436
437 if (svc == NULL
763f8d0e 438 && atomic_read(&ipvs->nullsvc_counter)) {
1da177e4
LT
439 /*
440 * Check if the catch-all port (port zero) exists
441 */
fc723250 442 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
1da177e4
LT
443 }
444
445 out:
26c15cfd
JA
446 if (svc)
447 atomic_inc(&svc->usecnt);
1da177e4
LT
448 read_unlock(&__ip_vs_svc_lock);
449
3c2e0505
JV
450 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
451 fwmark, ip_vs_proto_name(protocol),
452 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
453 svc ? "hit" : "not hit");
1da177e4
LT
454
455 return svc;
456}
457
458
459static inline void
460__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
461{
462 atomic_inc(&svc->refcnt);
463 dest->svc = svc;
464}
465
26c15cfd 466static void
1da177e4
LT
467__ip_vs_unbind_svc(struct ip_vs_dest *dest)
468{
469 struct ip_vs_service *svc = dest->svc;
470
471 dest->svc = NULL;
26c15cfd
JA
472 if (atomic_dec_and_test(&svc->refcnt)) {
473 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
474 svc->fwmark,
475 IP_VS_DBG_ADDR(svc->af, &svc->addr),
476 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 477 free_percpu(svc->stats.cpustats);
1da177e4 478 kfree(svc);
26c15cfd 479 }
1da177e4
LT
480}
481
482
483/*
484 * Returns hash value for real service
485 */
7937df15
JV
486static inline unsigned ip_vs_rs_hashkey(int af,
487 const union nf_inet_addr *addr,
488 __be16 port)
1da177e4
LT
489{
490 register unsigned porth = ntohs(port);
7937df15
JV
491 __be32 addr_fold = addr->ip;
492
493#ifdef CONFIG_IP_VS_IPV6
494 if (af == AF_INET6)
495 addr_fold = addr->ip6[0]^addr->ip6[1]^
496 addr->ip6[2]^addr->ip6[3];
497#endif
1da177e4 498
7937df15 499 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
500 & IP_VS_RTAB_MASK;
501}
502
503/*
fc723250 504 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
1da177e4
LT
505 * should be called with locked tables.
506 */
fc723250 507static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
1da177e4
LT
508{
509 unsigned hash;
510
511 if (!list_empty(&dest->d_list)) {
512 return 0;
513 }
514
515 /*
516 * Hash by proto,addr,port,
517 * which are the parameters of the real service.
518 */
7937df15
JV
519 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
520
fc723250 521 list_add(&dest->d_list, &ipvs->rs_table[hash]);
1da177e4
LT
522
523 return 1;
524}
525
526/*
fc723250 527 * UNhashes ip_vs_dest from rs_table.
1da177e4
LT
528 * should be called with locked tables.
529 */
530static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
531{
532 /*
fc723250 533 * Remove it from the rs_table table.
1da177e4
LT
534 */
535 if (!list_empty(&dest->d_list)) {
536 list_del(&dest->d_list);
537 INIT_LIST_HEAD(&dest->d_list);
538 }
539
540 return 1;
541}
542
543/*
544 * Lookup real service by <proto,addr,port> in the real service table.
545 */
546struct ip_vs_dest *
fc723250 547ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
7937df15
JV
548 const union nf_inet_addr *daddr,
549 __be16 dport)
1da177e4 550{
fc723250 551 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
552 unsigned hash;
553 struct ip_vs_dest *dest;
554
555 /*
556 * Check for "full" addressed entries
557 * Return the first found entry
558 */
7937df15 559 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4 560
a0840e2e 561 read_lock(&ipvs->rs_lock);
fc723250 562 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
7937df15
JV
563 if ((dest->af == af)
564 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
565 && (dest->port == dport)
566 && ((dest->protocol == protocol) ||
567 dest->vfwmark)) {
568 /* HIT */
a0840e2e 569 read_unlock(&ipvs->rs_lock);
1da177e4
LT
570 return dest;
571 }
572 }
a0840e2e 573 read_unlock(&ipvs->rs_lock);
1da177e4
LT
574
575 return NULL;
576}
577
578/*
579 * Lookup destination by {addr,port} in the given service
580 */
581static struct ip_vs_dest *
7937df15
JV
582ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
583 __be16 dport)
1da177e4
LT
584{
585 struct ip_vs_dest *dest;
586
587 /*
588 * Find the destination for the given service
589 */
590 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
591 if ((dest->af == svc->af)
592 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
593 && (dest->port == dport)) {
1da177e4
LT
594 /* HIT */
595 return dest;
596 }
597 }
598
599 return NULL;
600}
601
1e356f9c
RB
602/*
603 * Find destination by {daddr,dport,vaddr,protocol}
604 * Cretaed to be used in ip_vs_process_message() in
605 * the backup synchronization daemon. It finds the
606 * destination to be bound to the received connection
607 * on the backup.
608 *
609 * ip_vs_lookup_real_service() looked promissing, but
610 * seems not working as expected.
611 */
fc723250
HS
612struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
613 const union nf_inet_addr *daddr,
7937df15
JV
614 __be16 dport,
615 const union nf_inet_addr *vaddr,
0e051e68 616 __be16 vport, __u16 protocol, __u32 fwmark)
1e356f9c
RB
617{
618 struct ip_vs_dest *dest;
619 struct ip_vs_service *svc;
620
fc723250 621 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
1e356f9c
RB
622 if (!svc)
623 return NULL;
624 dest = ip_vs_lookup_dest(svc, daddr, dport);
625 if (dest)
626 atomic_inc(&dest->refcnt);
627 ip_vs_service_put(svc);
628 return dest;
629}
1da177e4
LT
630
631/*
632 * Lookup dest by {svc,addr,port} in the destination trash.
633 * The destination trash is used to hold the destinations that are removed
634 * from the service table but are still referenced by some conn entries.
635 * The reason to add the destination trash is when the dest is temporary
636 * down (either by administrator or by monitor program), the dest can be
637 * picked back from the trash, the remaining connections to the dest can
638 * continue, and the counting information of the dest is also useful for
639 * scheduling.
640 */
641static struct ip_vs_dest *
7937df15
JV
642ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
643 __be16 dport)
1da177e4
LT
644{
645 struct ip_vs_dest *dest, *nxt;
f2431e6e 646 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
647
648 /*
649 * Find the destination in trash
650 */
f2431e6e 651 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
7937df15
JV
652 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
653 "dest->refcnt=%d\n",
654 dest->vfwmark,
655 IP_VS_DBG_ADDR(svc->af, &dest->addr),
656 ntohs(dest->port),
657 atomic_read(&dest->refcnt));
658 if (dest->af == svc->af &&
659 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
660 dest->port == dport &&
661 dest->vfwmark == svc->fwmark &&
662 dest->protocol == svc->protocol &&
663 (svc->fwmark ||
7937df15 664 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
665 dest->vport == svc->port))) {
666 /* HIT */
667 return dest;
668 }
669
670 /*
671 * Try to purge the destination from trash if not referenced
672 */
673 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
674 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
675 "from trash\n",
676 dest->vfwmark,
677 IP_VS_DBG_ADDR(svc->af, &dest->addr),
678 ntohs(dest->port));
1da177e4
LT
679 list_del(&dest->n_list);
680 ip_vs_dst_reset(dest);
681 __ip_vs_unbind_svc(dest);
b17fc996 682 free_percpu(dest->stats.cpustats);
1da177e4
LT
683 kfree(dest);
684 }
685 }
686
687 return NULL;
688}
689
690
691/*
692 * Clean up all the destinations in the trash
693 * Called by the ip_vs_control_cleanup()
694 *
695 * When the ip_vs_control_clearup is activated by ipvs module exit,
696 * the service tables must have been flushed and all the connections
697 * are expired, and the refcnt of each destination in the trash must
698 * be 1, so we simply release them here.
699 */
f2431e6e 700static void ip_vs_trash_cleanup(struct net *net)
1da177e4
LT
701{
702 struct ip_vs_dest *dest, *nxt;
f2431e6e 703 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 704
f2431e6e 705 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
1da177e4
LT
706 list_del(&dest->n_list);
707 ip_vs_dst_reset(dest);
708 __ip_vs_unbind_svc(dest);
b17fc996 709 free_percpu(dest->stats.cpustats);
1da177e4
LT
710 kfree(dest);
711 }
712}
713
714
715static void
716ip_vs_zero_stats(struct ip_vs_stats *stats)
717{
718 spin_lock_bh(&stats->lock);
e93615d0 719
e9c0ce23 720 memset(&stats->ustats, 0, sizeof(stats->ustats));
1da177e4 721 ip_vs_zero_estimator(stats);
e93615d0 722
3a14a313 723 spin_unlock_bh(&stats->lock);
1da177e4
LT
724}
725
726/*
727 * Update a destination in the given service
728 */
729static void
26c15cfd
JA
730__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
731 struct ip_vs_dest_user_kern *udest, int add)
1da177e4 732{
fc723250 733 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
734 int conn_flags;
735
736 /* set the weight and the flags */
737 atomic_set(&dest->weight, udest->weight);
3575792e
JA
738 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
739 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4 740
1da177e4 741 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 742 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
743 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
744 } else {
745 /*
fc723250 746 * Put the real service in rs_table if not present.
1da177e4
LT
747 * For now only for NAT!
748 */
a0840e2e 749 write_lock_bh(&ipvs->rs_lock);
fc723250 750 ip_vs_rs_hash(ipvs, dest);
a0840e2e 751 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
752 }
753 atomic_set(&dest->conn_flags, conn_flags);
754
755 /* bind the service */
756 if (!dest->svc) {
757 __ip_vs_bind_svc(dest, svc);
758 } else {
759 if (dest->svc != svc) {
760 __ip_vs_unbind_svc(dest);
761 ip_vs_zero_stats(&dest->stats);
762 __ip_vs_bind_svc(dest, svc);
763 }
764 }
765
766 /* set the dest status flags */
767 dest->flags |= IP_VS_DEST_F_AVAILABLE;
768
769 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
770 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
771 dest->u_threshold = udest->u_threshold;
772 dest->l_threshold = udest->l_threshold;
26c15cfd 773
fc604767
JA
774 spin_lock(&dest->dst_lock);
775 ip_vs_dst_reset(dest);
776 spin_unlock(&dest->dst_lock);
777
26c15cfd 778 if (add)
29c2026f 779 ip_vs_new_estimator(svc->net, &dest->stats);
26c15cfd
JA
780
781 write_lock_bh(&__ip_vs_svc_lock);
782
783 /* Wait until all other svc users go away */
784 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
785
786 if (add) {
787 list_add(&dest->n_list, &svc->destinations);
788 svc->num_dests++;
789 }
790
791 /* call the update_service, because server weight may be changed */
792 if (svc->scheduler->update_service)
793 svc->scheduler->update_service(svc);
794
795 write_unlock_bh(&__ip_vs_svc_lock);
1da177e4
LT
796}
797
798
799/*
800 * Create a destination for the given service
801 */
802static int
c860c6b1 803ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
804 struct ip_vs_dest **dest_p)
805{
806 struct ip_vs_dest *dest;
807 unsigned atype;
808
809 EnterFunction(2);
810
09571c7a
VB
811#ifdef CONFIG_IP_VS_IPV6
812 if (svc->af == AF_INET6) {
813 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
814 if ((!(atype & IPV6_ADDR_UNICAST) ||
815 atype & IPV6_ADDR_LINKLOCAL) &&
4a98480b 816 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
09571c7a
VB
817 return -EINVAL;
818 } else
819#endif
820 {
4a98480b 821 atype = inet_addr_type(svc->net, udest->addr.ip);
09571c7a
VB
822 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
823 return -EINVAL;
824 }
1da177e4 825
dee06e47 826 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
1da177e4 827 if (dest == NULL) {
1e3e238e 828 pr_err("%s(): no memory.\n", __func__);
1da177e4
LT
829 return -ENOMEM;
830 }
b17fc996
HS
831 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
832 if (!dest->stats.cpustats) {
833 pr_err("%s() alloc_percpu failed\n", __func__);
834 goto err_alloc;
835 }
1da177e4 836
c860c6b1 837 dest->af = svc->af;
1da177e4 838 dest->protocol = svc->protocol;
c860c6b1 839 dest->vaddr = svc->addr;
1da177e4
LT
840 dest->vport = svc->port;
841 dest->vfwmark = svc->fwmark;
c860c6b1 842 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
843 dest->port = udest->port;
844
845 atomic_set(&dest->activeconns, 0);
846 atomic_set(&dest->inactconns, 0);
847 atomic_set(&dest->persistconns, 0);
26c15cfd 848 atomic_set(&dest->refcnt, 1);
1da177e4
LT
849
850 INIT_LIST_HEAD(&dest->d_list);
851 spin_lock_init(&dest->dst_lock);
852 spin_lock_init(&dest->stats.lock);
26c15cfd 853 __ip_vs_update_dest(svc, dest, udest, 1);
1da177e4
LT
854
855 *dest_p = dest;
856
857 LeaveFunction(2);
858 return 0;
b17fc996
HS
859
860err_alloc:
861 kfree(dest);
862 return -ENOMEM;
1da177e4
LT
863}
864
865
866/*
867 * Add a destination into an existing service
868 */
869static int
c860c6b1 870ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
871{
872 struct ip_vs_dest *dest;
c860c6b1 873 union nf_inet_addr daddr;
014d730d 874 __be16 dport = udest->port;
1da177e4
LT
875 int ret;
876
877 EnterFunction(2);
878
879 if (udest->weight < 0) {
1e3e238e 880 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
881 return -ERANGE;
882 }
883
884 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
885 pr_err("%s(): lower threshold is higher than upper threshold\n",
886 __func__);
1da177e4
LT
887 return -ERANGE;
888 }
889
c860c6b1
JV
890 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
891
1da177e4
LT
892 /*
893 * Check if the dest already exists in the list
894 */
7937df15
JV
895 dest = ip_vs_lookup_dest(svc, &daddr, dport);
896
1da177e4 897 if (dest != NULL) {
1e3e238e 898 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
899 return -EEXIST;
900 }
901
902 /*
903 * Check if the dest already exists in the trash and
904 * is from the same service
905 */
7937df15
JV
906 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
907
1da177e4 908 if (dest != NULL) {
cfc78c5a
JV
909 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
910 "dest->refcnt=%d, service %u/%s:%u\n",
911 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
912 atomic_read(&dest->refcnt),
913 dest->vfwmark,
914 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
915 ntohs(dest->vport));
916
1da177e4
LT
917 /*
918 * Get the destination from the trash
919 */
920 list_del(&dest->n_list);
921
26c15cfd
JA
922 __ip_vs_update_dest(svc, dest, udest, 1);
923 ret = 0;
924 } else {
1da177e4 925 /*
26c15cfd 926 * Allocate and initialize the dest structure
1da177e4 927 */
26c15cfd 928 ret = ip_vs_new_dest(svc, udest, &dest);
1da177e4 929 }
1da177e4
LT
930 LeaveFunction(2);
931
26c15cfd 932 return ret;
1da177e4
LT
933}
934
935
936/*
937 * Edit a destination in the given service
938 */
939static int
c860c6b1 940ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
941{
942 struct ip_vs_dest *dest;
c860c6b1 943 union nf_inet_addr daddr;
014d730d 944 __be16 dport = udest->port;
1da177e4
LT
945
946 EnterFunction(2);
947
948 if (udest->weight < 0) {
1e3e238e 949 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
950 return -ERANGE;
951 }
952
953 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
954 pr_err("%s(): lower threshold is higher than upper threshold\n",
955 __func__);
1da177e4
LT
956 return -ERANGE;
957 }
958
c860c6b1
JV
959 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
960
1da177e4
LT
961 /*
962 * Lookup the destination list
963 */
7937df15
JV
964 dest = ip_vs_lookup_dest(svc, &daddr, dport);
965
1da177e4 966 if (dest == NULL) {
1e3e238e 967 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
968 return -ENOENT;
969 }
970
26c15cfd 971 __ip_vs_update_dest(svc, dest, udest, 0);
1da177e4
LT
972 LeaveFunction(2);
973
974 return 0;
975}
976
977
978/*
979 * Delete a destination (must be already unlinked from the service)
980 */
29c2026f 981static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1da177e4 982{
a0840e2e
HS
983 struct netns_ipvs *ipvs = net_ipvs(net);
984
29c2026f 985 ip_vs_kill_estimator(net, &dest->stats);
1da177e4
LT
986
987 /*
988 * Remove it from the d-linked list with the real services.
989 */
a0840e2e 990 write_lock_bh(&ipvs->rs_lock);
1da177e4 991 ip_vs_rs_unhash(dest);
a0840e2e 992 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
993
994 /*
995 * Decrease the refcnt of the dest, and free the dest
996 * if nobody refers to it (refcnt=0). Otherwise, throw
997 * the destination into the trash.
998 */
999 if (atomic_dec_and_test(&dest->refcnt)) {
26c15cfd
JA
1000 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1001 dest->vfwmark,
1002 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1003 ntohs(dest->port));
1da177e4
LT
1004 ip_vs_dst_reset(dest);
1005 /* simply decrease svc->refcnt here, let the caller check
1006 and release the service if nobody refers to it.
1007 Only user context can release destination and service,
1008 and only one user context can update virtual service at a
1009 time, so the operation here is OK */
1010 atomic_dec(&dest->svc->refcnt);
b17fc996 1011 free_percpu(dest->stats.cpustats);
1da177e4
LT
1012 kfree(dest);
1013 } else {
cfc78c5a
JV
1014 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1015 "dest->refcnt=%d\n",
1016 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1017 ntohs(dest->port),
1018 atomic_read(&dest->refcnt));
f2431e6e 1019 list_add(&dest->n_list, &ipvs->dest_trash);
1da177e4
LT
1020 atomic_inc(&dest->refcnt);
1021 }
1022}
1023
1024
1025/*
1026 * Unlink a destination from the given service
1027 */
1028static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1029 struct ip_vs_dest *dest,
1030 int svcupd)
1031{
1032 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1033
1034 /*
1035 * Remove it from the d-linked destination list.
1036 */
1037 list_del(&dest->n_list);
1038 svc->num_dests--;
82dfb6f3
SW
1039
1040 /*
1041 * Call the update_service function of its scheduler
1042 */
1043 if (svcupd && svc->scheduler->update_service)
1044 svc->scheduler->update_service(svc);
1da177e4
LT
1045}
1046
1047
1048/*
1049 * Delete a destination server in the given service
1050 */
1051static int
c860c6b1 1052ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1053{
1054 struct ip_vs_dest *dest;
014d730d 1055 __be16 dport = udest->port;
1da177e4
LT
1056
1057 EnterFunction(2);
1058
7937df15 1059 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1060
1da177e4 1061 if (dest == NULL) {
1e3e238e 1062 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1063 return -ENOENT;
1064 }
1065
1066 write_lock_bh(&__ip_vs_svc_lock);
1067
1068 /*
1069 * Wait until all other svc users go away.
1070 */
26c15cfd 1071 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1072
1073 /*
1074 * Unlink dest from the service
1075 */
1076 __ip_vs_unlink_dest(svc, dest, 1);
1077
1078 write_unlock_bh(&__ip_vs_svc_lock);
1079
1080 /*
1081 * Delete the destination
1082 */
a0840e2e 1083 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1084
1085 LeaveFunction(2);
1086
1087 return 0;
1088}
1089
1090
1091/*
1092 * Add a service into the service hash table
1093 */
1094static int
fc723250 1095ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
c860c6b1 1096 struct ip_vs_service **svc_p)
1da177e4
LT
1097{
1098 int ret = 0;
1099 struct ip_vs_scheduler *sched = NULL;
0d1e71b0 1100 struct ip_vs_pe *pe = NULL;
1da177e4 1101 struct ip_vs_service *svc = NULL;
a0840e2e 1102 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
1103
1104 /* increase the module use count */
1105 ip_vs_use_count_inc();
1106
1107 /* Lookup the scheduler by 'u->sched_name' */
1108 sched = ip_vs_scheduler_get(u->sched_name);
1109 if (sched == NULL) {
1e3e238e 1110 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4 1111 ret = -ENOENT;
6e08bfb8 1112 goto out_err;
1da177e4
LT
1113 }
1114
0d1e71b0 1115 if (u->pe_name && *u->pe_name) {
e9e5eee8 1116 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1117 if (pe == NULL) {
1118 pr_info("persistence engine module ip_vs_pe_%s "
1119 "not found\n", u->pe_name);
1120 ret = -ENOENT;
1121 goto out_err;
1122 }
1123 }
1124
f94fd041 1125#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1126 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1127 ret = -EINVAL;
1128 goto out_err;
f94fd041
JV
1129 }
1130#endif
1131
dee06e47 1132 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1133 if (svc == NULL) {
1e3e238e 1134 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1135 ret = -ENOMEM;
1136 goto out_err;
1137 }
b17fc996
HS
1138 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1139 if (!svc->stats.cpustats) {
1140 pr_err("%s() alloc_percpu failed\n", __func__);
1141 goto out_err;
1142 }
1da177e4
LT
1143
1144 /* I'm the first user of the service */
26c15cfd 1145 atomic_set(&svc->usecnt, 0);
1da177e4
LT
1146 atomic_set(&svc->refcnt, 0);
1147
c860c6b1 1148 svc->af = u->af;
1da177e4 1149 svc->protocol = u->protocol;
c860c6b1 1150 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1151 svc->port = u->port;
1152 svc->fwmark = u->fwmark;
1153 svc->flags = u->flags;
1154 svc->timeout = u->timeout * HZ;
1155 svc->netmask = u->netmask;
fc723250 1156 svc->net = net;
1da177e4
LT
1157
1158 INIT_LIST_HEAD(&svc->destinations);
1159 rwlock_init(&svc->sched_lock);
1160 spin_lock_init(&svc->stats.lock);
1161
1162 /* Bind the scheduler */
1163 ret = ip_vs_bind_scheduler(svc, sched);
1164 if (ret)
1165 goto out_err;
1166 sched = NULL;
1167
0d1e71b0
SH
1168 /* Bind the ct retriever */
1169 ip_vs_bind_pe(svc, pe);
1170 pe = NULL;
1171
1da177e4
LT
1172 /* Update the virtual service counters */
1173 if (svc->port == FTPPORT)
763f8d0e 1174 atomic_inc(&ipvs->ftpsvc_counter);
1da177e4 1175 else if (svc->port == 0)
763f8d0e 1176 atomic_inc(&ipvs->nullsvc_counter);
1da177e4 1177
29c2026f 1178 ip_vs_new_estimator(net, &svc->stats);
f94fd041
JV
1179
1180 /* Count only IPv4 services for old get/setsockopt interface */
1181 if (svc->af == AF_INET)
a0840e2e 1182 ipvs->num_services++;
1da177e4
LT
1183
1184 /* Hash the service into the service table */
1185 write_lock_bh(&__ip_vs_svc_lock);
1186 ip_vs_svc_hash(svc);
1187 write_unlock_bh(&__ip_vs_svc_lock);
1188
1189 *svc_p = svc;
1190 return 0;
1191
b17fc996 1192
6e08bfb8 1193 out_err:
1da177e4 1194 if (svc != NULL) {
2fabf35b 1195 ip_vs_unbind_scheduler(svc);
1da177e4
LT
1196 if (svc->inc) {
1197 local_bh_disable();
1198 ip_vs_app_inc_put(svc->inc);
1199 local_bh_enable();
1200 }
b17fc996
HS
1201 if (svc->stats.cpustats)
1202 free_percpu(svc->stats.cpustats);
1da177e4
LT
1203 kfree(svc);
1204 }
1205 ip_vs_scheduler_put(sched);
0d1e71b0 1206 ip_vs_pe_put(pe);
1da177e4 1207
1da177e4
LT
1208 /* decrease the module use count */
1209 ip_vs_use_count_dec();
1210
1211 return ret;
1212}
1213
1214
1215/*
1216 * Edit a service and bind it with a new scheduler
1217 */
1218static int
c860c6b1 1219ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1220{
1221 struct ip_vs_scheduler *sched, *old_sched;
0d1e71b0 1222 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1da177e4
LT
1223 int ret = 0;
1224
1225 /*
1226 * Lookup the scheduler, by 'u->sched_name'
1227 */
1228 sched = ip_vs_scheduler_get(u->sched_name);
1229 if (sched == NULL) {
1e3e238e 1230 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1231 return -ENOENT;
1232 }
1233 old_sched = sched;
1234
0d1e71b0 1235 if (u->pe_name && *u->pe_name) {
e9e5eee8 1236 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1237 if (pe == NULL) {
1238 pr_info("persistence engine module ip_vs_pe_%s "
1239 "not found\n", u->pe_name);
1240 ret = -ENOENT;
1241 goto out;
1242 }
1243 old_pe = pe;
1244 }
1245
f94fd041 1246#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1247 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1248 ret = -EINVAL;
1249 goto out;
f94fd041
JV
1250 }
1251#endif
1252
1da177e4
LT
1253 write_lock_bh(&__ip_vs_svc_lock);
1254
1255 /*
1256 * Wait until all other svc users go away.
1257 */
26c15cfd 1258 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1259
1260 /*
1261 * Set the flags and timeout value
1262 */
1263 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1264 svc->timeout = u->timeout * HZ;
1265 svc->netmask = u->netmask;
1266
1267 old_sched = svc->scheduler;
1268 if (sched != old_sched) {
1269 /*
1270 * Unbind the old scheduler
1271 */
1272 if ((ret = ip_vs_unbind_scheduler(svc))) {
1273 old_sched = sched;
9e691ed6 1274 goto out_unlock;
1da177e4
LT
1275 }
1276
1277 /*
1278 * Bind the new scheduler
1279 */
1280 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1281 /*
1282 * If ip_vs_bind_scheduler fails, restore the old
1283 * scheduler.
1284 * The main reason of failure is out of memory.
1285 *
1286 * The question is if the old scheduler can be
1287 * restored all the time. TODO: if it cannot be
1288 * restored some time, we must delete the service,
1289 * otherwise the system may crash.
1290 */
1291 ip_vs_bind_scheduler(svc, old_sched);
1292 old_sched = sched;
9e691ed6 1293 goto out_unlock;
1da177e4
LT
1294 }
1295 }
1296
0d1e71b0
SH
1297 old_pe = svc->pe;
1298 if (pe != old_pe) {
1299 ip_vs_unbind_pe(svc);
1300 ip_vs_bind_pe(svc, pe);
1301 }
1302
9e691ed6 1303 out_unlock:
1da177e4 1304 write_unlock_bh(&__ip_vs_svc_lock);
9e691ed6 1305 out:
6e08bfb8 1306 ip_vs_scheduler_put(old_sched);
0d1e71b0 1307 ip_vs_pe_put(old_pe);
1da177e4
LT
1308 return ret;
1309}
1310
1311
1312/*
1313 * Delete a service from the service list
1314 * - The service must be unlinked, unlocked and not referenced!
1315 * - We are called under _bh lock
1316 */
1317static void __ip_vs_del_service(struct ip_vs_service *svc)
1318{
1319 struct ip_vs_dest *dest, *nxt;
1320 struct ip_vs_scheduler *old_sched;
0d1e71b0 1321 struct ip_vs_pe *old_pe;
a0840e2e 1322 struct netns_ipvs *ipvs = net_ipvs(svc->net);
0d1e71b0
SH
1323
1324 pr_info("%s: enter\n", __func__);
1da177e4 1325
f94fd041
JV
1326 /* Count only IPv4 services for old get/setsockopt interface */
1327 if (svc->af == AF_INET)
a0840e2e 1328 ipvs->num_services--;
f94fd041 1329
29c2026f 1330 ip_vs_kill_estimator(svc->net, &svc->stats);
1da177e4
LT
1331
1332 /* Unbind scheduler */
1333 old_sched = svc->scheduler;
1334 ip_vs_unbind_scheduler(svc);
6e08bfb8 1335 ip_vs_scheduler_put(old_sched);
1da177e4 1336
0d1e71b0
SH
1337 /* Unbind persistence engine */
1338 old_pe = svc->pe;
1339 ip_vs_unbind_pe(svc);
1340 ip_vs_pe_put(old_pe);
1341
1da177e4
LT
1342 /* Unbind app inc */
1343 if (svc->inc) {
1344 ip_vs_app_inc_put(svc->inc);
1345 svc->inc = NULL;
1346 }
1347
1348 /*
1349 * Unlink the whole destination list
1350 */
1351 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1352 __ip_vs_unlink_dest(svc, dest, 0);
29c2026f 1353 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1354 }
1355
1356 /*
1357 * Update the virtual service counters
1358 */
1359 if (svc->port == FTPPORT)
763f8d0e 1360 atomic_dec(&ipvs->ftpsvc_counter);
1da177e4 1361 else if (svc->port == 0)
763f8d0e 1362 atomic_dec(&ipvs->nullsvc_counter);
1da177e4
LT
1363
1364 /*
1365 * Free the service if nobody refers to it
1366 */
26c15cfd
JA
1367 if (atomic_read(&svc->refcnt) == 0) {
1368 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1369 svc->fwmark,
1370 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1371 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 1372 free_percpu(svc->stats.cpustats);
1da177e4 1373 kfree(svc);
26c15cfd 1374 }
1da177e4
LT
1375
1376 /* decrease the module use count */
1377 ip_vs_use_count_dec();
1378}
1379
1380/*
26c15cfd 1381 * Unlink a service from list and try to delete it if its refcnt reached 0
1da177e4 1382 */
26c15cfd 1383static void ip_vs_unlink_service(struct ip_vs_service *svc)
1da177e4 1384{
1da177e4
LT
1385 /*
1386 * Unhash it from the service table
1387 */
1388 write_lock_bh(&__ip_vs_svc_lock);
1389
1390 ip_vs_svc_unhash(svc);
1391
1392 /*
1393 * Wait until all the svc users go away.
1394 */
26c15cfd 1395 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1396
1397 __ip_vs_del_service(svc);
1398
1399 write_unlock_bh(&__ip_vs_svc_lock);
26c15cfd
JA
1400}
1401
1402/*
1403 * Delete a service from the service list
1404 */
1405static int ip_vs_del_service(struct ip_vs_service *svc)
1406{
1407 if (svc == NULL)
1408 return -EEXIST;
1409 ip_vs_unlink_service(svc);
1da177e4
LT
1410
1411 return 0;
1412}
1413
1414
1415/*
1416 * Flush all the virtual services
1417 */
fc723250 1418static int ip_vs_flush(struct net *net)
1da177e4
LT
1419{
1420 int idx;
1421 struct ip_vs_service *svc, *nxt;
1422
1423 /*
fc723250 1424 * Flush the service table hashed by <netns,protocol,addr,port>
1da177e4
LT
1425 */
1426 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
fc723250
HS
1427 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1428 s_list) {
1429 if (net_eq(svc->net, net))
1430 ip_vs_unlink_service(svc);
1da177e4
LT
1431 }
1432 }
1433
1434 /*
1435 * Flush the service table hashed by fwmark
1436 */
1437 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1438 list_for_each_entry_safe(svc, nxt,
1439 &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1440 if (net_eq(svc->net, net))
1441 ip_vs_unlink_service(svc);
1da177e4
LT
1442 }
1443 }
1444
1445 return 0;
1446}
1447
1448
1449/*
1450 * Zero counters in a service or all services
1451 */
1452static int ip_vs_zero_service(struct ip_vs_service *svc)
1453{
1454 struct ip_vs_dest *dest;
1455
1456 write_lock_bh(&__ip_vs_svc_lock);
1457 list_for_each_entry(dest, &svc->destinations, n_list) {
1458 ip_vs_zero_stats(&dest->stats);
1459 }
1460 ip_vs_zero_stats(&svc->stats);
1461 write_unlock_bh(&__ip_vs_svc_lock);
1462 return 0;
1463}
1464
fc723250 1465static int ip_vs_zero_all(struct net *net)
1da177e4
LT
1466{
1467 int idx;
1468 struct ip_vs_service *svc;
1469
1470 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1471 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250
HS
1472 if (net_eq(svc->net, net))
1473 ip_vs_zero_service(svc);
1da177e4
LT
1474 }
1475 }
1476
1477 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1478 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1479 if (net_eq(svc->net, net))
1480 ip_vs_zero_service(svc);
1da177e4
LT
1481 }
1482 }
1483
b17fc996 1484 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
1da177e4
LT
1485 return 0;
1486}
1487
1488
1489static int
8d65af78 1490proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1491 void __user *buffer, size_t *lenp, loff_t *ppos)
1492{
9330419d 1493 struct net *net = current->nsproxy->net_ns;
1da177e4
LT
1494 int *valp = table->data;
1495 int val = *valp;
1496 int rc;
1497
8d65af78 1498 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1499 if (write && (*valp != val)) {
1500 if ((*valp < 0) || (*valp > 3)) {
1501 /* Restore the correct value */
1502 *valp = val;
1503 } else {
9330419d 1504 update_defense_level(net_ipvs(net));
1da177e4
LT
1505 }
1506 }
1507 return rc;
1508}
1509
1510
1511static int
8d65af78 1512proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1513 void __user *buffer, size_t *lenp, loff_t *ppos)
1514{
1515 int *valp = table->data;
1516 int val[2];
1517 int rc;
1518
1519 /* backup the value first */
1520 memcpy(val, valp, sizeof(val));
1521
8d65af78 1522 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1523 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1524 /* Restore the correct value */
1525 memcpy(valp, val, sizeof(val));
1526 }
1527 return rc;
1528}
1529
b880c1f0
HS
1530static int
1531proc_do_sync_mode(ctl_table *table, int write,
1532 void __user *buffer, size_t *lenp, loff_t *ppos)
1533{
1534 int *valp = table->data;
1535 int val = *valp;
1536 int rc;
1537
1538 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1539 if (write && (*valp != val)) {
1540 if ((*valp < 0) || (*valp > 1)) {
1541 /* Restore the correct value */
1542 *valp = val;
1543 } else {
f131315f
HS
1544 struct net *net = current->nsproxy->net_ns;
1545 ip_vs_sync_switch_mode(net, val);
b880c1f0
HS
1546 }
1547 }
1548 return rc;
1549}
1da177e4
LT
1550
1551/*
1552 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
a0840e2e
HS
1553 * Do not change order or insert new entries without
1554 * align with netns init in __ip_vs_control_init()
1da177e4
LT
1555 */
1556
1557static struct ctl_table vs_vars[] = {
1558 {
1da177e4 1559 .procname = "amemthresh",
1da177e4
LT
1560 .maxlen = sizeof(int),
1561 .mode = 0644,
6d9f239a 1562 .proc_handler = proc_dointvec,
1da177e4 1563 },
1da177e4 1564 {
1da177e4 1565 .procname = "am_droprate",
1da177e4
LT
1566 .maxlen = sizeof(int),
1567 .mode = 0644,
6d9f239a 1568 .proc_handler = proc_dointvec,
1da177e4
LT
1569 },
1570 {
1da177e4 1571 .procname = "drop_entry",
1da177e4
LT
1572 .maxlen = sizeof(int),
1573 .mode = 0644,
6d9f239a 1574 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1575 },
1576 {
1da177e4 1577 .procname = "drop_packet",
1da177e4
LT
1578 .maxlen = sizeof(int),
1579 .mode = 0644,
6d9f239a 1580 .proc_handler = proc_do_defense_mode,
1da177e4 1581 },
f4bc17cd
JA
1582#ifdef CONFIG_IP_VS_NFCT
1583 {
1584 .procname = "conntrack",
f4bc17cd
JA
1585 .maxlen = sizeof(int),
1586 .mode = 0644,
1587 .proc_handler = &proc_dointvec,
1588 },
1589#endif
1da177e4 1590 {
1da177e4 1591 .procname = "secure_tcp",
1da177e4
LT
1592 .maxlen = sizeof(int),
1593 .mode = 0644,
6d9f239a 1594 .proc_handler = proc_do_defense_mode,
1da177e4 1595 },
8a803040
JA
1596 {
1597 .procname = "snat_reroute",
8a803040
JA
1598 .maxlen = sizeof(int),
1599 .mode = 0644,
1600 .proc_handler = &proc_dointvec,
1601 },
b880c1f0
HS
1602 {
1603 .procname = "sync_version",
b880c1f0
HS
1604 .maxlen = sizeof(int),
1605 .mode = 0644,
1606 .proc_handler = &proc_do_sync_mode,
1607 },
a0840e2e
HS
1608 {
1609 .procname = "cache_bypass",
1610 .maxlen = sizeof(int),
1611 .mode = 0644,
1612 .proc_handler = proc_dointvec,
1613 },
1614 {
1615 .procname = "expire_nodest_conn",
1616 .maxlen = sizeof(int),
1617 .mode = 0644,
1618 .proc_handler = proc_dointvec,
1619 },
1620 {
1621 .procname = "expire_quiescent_template",
1622 .maxlen = sizeof(int),
1623 .mode = 0644,
1624 .proc_handler = proc_dointvec,
1625 },
1626 {
1627 .procname = "sync_threshold",
1628 .maxlen =
1629 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1630 .mode = 0644,
1631 .proc_handler = proc_do_sync_threshold,
1632 },
1633 {
1634 .procname = "nat_icmp_send",
1635 .maxlen = sizeof(int),
1636 .mode = 0644,
1637 .proc_handler = proc_dointvec,
1638 },
1639#ifdef CONFIG_IP_VS_DEBUG
1640 {
1641 .procname = "debug_level",
1642 .data = &sysctl_ip_vs_debug_level,
1643 .maxlen = sizeof(int),
1644 .mode = 0644,
1645 .proc_handler = proc_dointvec,
1646 },
1647#endif
1da177e4
LT
1648#if 0
1649 {
1da177e4
LT
1650 .procname = "timeout_established",
1651 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1652 .maxlen = sizeof(int),
1653 .mode = 0644,
6d9f239a 1654 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1655 },
1656 {
1da177e4
LT
1657 .procname = "timeout_synsent",
1658 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1659 .maxlen = sizeof(int),
1660 .mode = 0644,
6d9f239a 1661 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1662 },
1663 {
1da177e4
LT
1664 .procname = "timeout_synrecv",
1665 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1666 .maxlen = sizeof(int),
1667 .mode = 0644,
6d9f239a 1668 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1669 },
1670 {
1da177e4
LT
1671 .procname = "timeout_finwait",
1672 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1673 .maxlen = sizeof(int),
1674 .mode = 0644,
6d9f239a 1675 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1676 },
1677 {
1da177e4
LT
1678 .procname = "timeout_timewait",
1679 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1680 .maxlen = sizeof(int),
1681 .mode = 0644,
6d9f239a 1682 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1683 },
1684 {
1da177e4
LT
1685 .procname = "timeout_close",
1686 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1687 .maxlen = sizeof(int),
1688 .mode = 0644,
6d9f239a 1689 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1690 },
1691 {
1da177e4
LT
1692 .procname = "timeout_closewait",
1693 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1694 .maxlen = sizeof(int),
1695 .mode = 0644,
6d9f239a 1696 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1697 },
1698 {
1da177e4
LT
1699 .procname = "timeout_lastack",
1700 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1701 .maxlen = sizeof(int),
1702 .mode = 0644,
6d9f239a 1703 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1704 },
1705 {
1da177e4
LT
1706 .procname = "timeout_listen",
1707 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1708 .maxlen = sizeof(int),
1709 .mode = 0644,
6d9f239a 1710 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1711 },
1712 {
1da177e4
LT
1713 .procname = "timeout_synack",
1714 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1715 .maxlen = sizeof(int),
1716 .mode = 0644,
6d9f239a 1717 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1718 },
1719 {
1da177e4
LT
1720 .procname = "timeout_udp",
1721 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1722 .maxlen = sizeof(int),
1723 .mode = 0644,
6d9f239a 1724 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1725 },
1726 {
1da177e4
LT
1727 .procname = "timeout_icmp",
1728 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1729 .maxlen = sizeof(int),
1730 .mode = 0644,
6d9f239a 1731 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1732 },
1733#endif
f8572d8f 1734 { }
1da177e4
LT
1735};
1736
5587da55 1737const struct ctl_path net_vs_ctl_path[] = {
f8572d8f
EB
1738 { .procname = "net", },
1739 { .procname = "ipv4", },
90754f8e
PE
1740 { .procname = "vs", },
1741 { }
1da177e4 1742};
90754f8e 1743EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4 1744
1da177e4
LT
1745#ifdef CONFIG_PROC_FS
1746
1747struct ip_vs_iter {
fc723250 1748 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1da177e4
LT
1749 struct list_head *table;
1750 int bucket;
1751};
1752
1753/*
1754 * Write the contents of the VS rule table to a PROCfs file.
1755 * (It is kept just for backward compatibility)
1756 */
1757static inline const char *ip_vs_fwd_name(unsigned flags)
1758{
1759 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1760 case IP_VS_CONN_F_LOCALNODE:
1761 return "Local";
1762 case IP_VS_CONN_F_TUNNEL:
1763 return "Tunnel";
1764 case IP_VS_CONN_F_DROUTE:
1765 return "Route";
1766 default:
1767 return "Masq";
1768 }
1769}
1770
1771
1772/* Get the Nth entry in the two lists */
1773static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1774{
fc723250 1775 struct net *net = seq_file_net(seq);
1da177e4
LT
1776 struct ip_vs_iter *iter = seq->private;
1777 int idx;
1778 struct ip_vs_service *svc;
1779
1780 /* look in hash by protocol */
1781 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1782 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250 1783 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1784 iter->table = ip_vs_svc_table;
1785 iter->bucket = idx;
1786 return svc;
1787 }
1788 }
1789 }
1790
1791 /* keep looking in fwmark */
1792 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1793 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250 1794 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1795 iter->table = ip_vs_svc_fwm_table;
1796 iter->bucket = idx;
1797 return svc;
1798 }
1799 }
1800 }
1801
1802 return NULL;
1803}
1804
1805static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1806__acquires(__ip_vs_svc_lock)
1da177e4
LT
1807{
1808
1809 read_lock_bh(&__ip_vs_svc_lock);
1810 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1811}
1812
1813
1814static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1815{
1816 struct list_head *e;
1817 struct ip_vs_iter *iter;
1818 struct ip_vs_service *svc;
1819
1820 ++*pos;
1821 if (v == SEQ_START_TOKEN)
1822 return ip_vs_info_array(seq,0);
1823
1824 svc = v;
1825 iter = seq->private;
1826
1827 if (iter->table == ip_vs_svc_table) {
1828 /* next service in table hashed by protocol */
1829 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1830 return list_entry(e, struct ip_vs_service, s_list);
1831
1832
1833 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1834 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1835 s_list) {
1836 return svc;
1837 }
1838 }
1839
1840 iter->table = ip_vs_svc_fwm_table;
1841 iter->bucket = -1;
1842 goto scan_fwmark;
1843 }
1844
1845 /* next service in hashed by fwmark */
1846 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1847 return list_entry(e, struct ip_vs_service, f_list);
1848
1849 scan_fwmark:
1850 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1851 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1852 f_list)
1853 return svc;
1854 }
1855
1856 return NULL;
1857}
1858
1859static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 1860__releases(__ip_vs_svc_lock)
1da177e4
LT
1861{
1862 read_unlock_bh(&__ip_vs_svc_lock);
1863}
1864
1865
1866static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1867{
1868 if (v == SEQ_START_TOKEN) {
1869 seq_printf(seq,
1870 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 1871 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
1872 seq_puts(seq,
1873 "Prot LocalAddress:Port Scheduler Flags\n");
1874 seq_puts(seq,
1875 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1876 } else {
1877 const struct ip_vs_service *svc = v;
1878 const struct ip_vs_iter *iter = seq->private;
1879 const struct ip_vs_dest *dest;
1880
667a5f18
VB
1881 if (iter->table == ip_vs_svc_table) {
1882#ifdef CONFIG_IP_VS_IPV6
1883 if (svc->af == AF_INET6)
5b095d98 1884 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 1885 ip_vs_proto_name(svc->protocol),
38ff4fa4 1886 &svc->addr.in6,
667a5f18
VB
1887 ntohs(svc->port),
1888 svc->scheduler->name);
1889 else
1890#endif
26ec037f 1891 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
1892 ip_vs_proto_name(svc->protocol),
1893 ntohl(svc->addr.ip),
1894 ntohs(svc->port),
26ec037f
NC
1895 svc->scheduler->name,
1896 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1897 } else {
26ec037f
NC
1898 seq_printf(seq, "FWM %08X %s %s",
1899 svc->fwmark, svc->scheduler->name,
1900 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1901 }
1da177e4
LT
1902
1903 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1904 seq_printf(seq, "persistent %d %08X\n",
1905 svc->timeout,
1906 ntohl(svc->netmask));
1907 else
1908 seq_putc(seq, '\n');
1909
1910 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1911#ifdef CONFIG_IP_VS_IPV6
1912 if (dest->af == AF_INET6)
1913 seq_printf(seq,
5b095d98 1914 " -> [%pI6]:%04X"
667a5f18 1915 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 1916 &dest->addr.in6,
667a5f18
VB
1917 ntohs(dest->port),
1918 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1919 atomic_read(&dest->weight),
1920 atomic_read(&dest->activeconns),
1921 atomic_read(&dest->inactconns));
1922 else
1923#endif
1924 seq_printf(seq,
1925 " -> %08X:%04X "
1926 "%-7s %-6d %-10d %-10d\n",
1927 ntohl(dest->addr.ip),
1928 ntohs(dest->port),
1929 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1930 atomic_read(&dest->weight),
1931 atomic_read(&dest->activeconns),
1932 atomic_read(&dest->inactconns));
1933
1da177e4
LT
1934 }
1935 }
1936 return 0;
1937}
1938
56b3d975 1939static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1940 .start = ip_vs_info_seq_start,
1941 .next = ip_vs_info_seq_next,
1942 .stop = ip_vs_info_seq_stop,
1943 .show = ip_vs_info_seq_show,
1944};
1945
1946static int ip_vs_info_open(struct inode *inode, struct file *file)
1947{
fc723250 1948 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
cf7732e4 1949 sizeof(struct ip_vs_iter));
1da177e4
LT
1950}
1951
9a32144e 1952static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1953 .owner = THIS_MODULE,
1954 .open = ip_vs_info_open,
1955 .read = seq_read,
1956 .llseek = seq_lseek,
1957 .release = seq_release_private,
1958};
1959
1960#endif
1961
1da177e4
LT
1962#ifdef CONFIG_PROC_FS
1963static int ip_vs_stats_show(struct seq_file *seq, void *v)
1964{
b17fc996
HS
1965 struct net *net = seq_file_single_net(seq);
1966 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
1da177e4
LT
1967
1968/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1969 seq_puts(seq,
1970 " Total Incoming Outgoing Incoming Outgoing\n");
1971 seq_printf(seq,
1972 " Conns Packets Packets Bytes Bytes\n");
1973
b17fc996
HS
1974 spin_lock_bh(&tot_stats->lock);
1975 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1976 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1977 (unsigned long long) tot_stats->ustats.inbytes,
1978 (unsigned long long) tot_stats->ustats.outbytes);
1da177e4
LT
1979
1980/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1981 seq_puts(seq,
1982 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1983 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
b17fc996
HS
1984 tot_stats->ustats.cps,
1985 tot_stats->ustats.inpps,
1986 tot_stats->ustats.outpps,
1987 tot_stats->ustats.inbps,
1988 tot_stats->ustats.outbps);
1989 spin_unlock_bh(&tot_stats->lock);
1da177e4
LT
1990
1991 return 0;
1992}
1993
1994static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1995{
fc723250 1996 return single_open_net(inode, file, ip_vs_stats_show);
1da177e4
LT
1997}
1998
9a32144e 1999static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
2000 .owner = THIS_MODULE,
2001 .open = ip_vs_stats_seq_open,
2002 .read = seq_read,
2003 .llseek = seq_lseek,
2004 .release = single_release,
2005};
2006
b17fc996
HS
2007static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2008{
2009 struct net *net = seq_file_single_net(seq);
2010 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2011 int i;
2012
2013/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2014 seq_puts(seq,
2015 " Total Incoming Outgoing Incoming Outgoing\n");
2016 seq_printf(seq,
2017 "CPU Conns Packets Packets Bytes Bytes\n");
2018
2019 for_each_possible_cpu(i) {
2020 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2021 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2022 i, u->ustats.conns, u->ustats.inpkts,
2023 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2024 (__u64)u->ustats.outbytes);
2025 }
2026
2027 spin_lock_bh(&tot_stats->lock);
2028 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2029 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2030 tot_stats->ustats.outpkts,
2031 (unsigned long long) tot_stats->ustats.inbytes,
2032 (unsigned long long) tot_stats->ustats.outbytes);
2033
2034/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2035 seq_puts(seq,
2036 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2037 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2038 tot_stats->ustats.cps,
2039 tot_stats->ustats.inpps,
2040 tot_stats->ustats.outpps,
2041 tot_stats->ustats.inbps,
2042 tot_stats->ustats.outbps);
2043 spin_unlock_bh(&tot_stats->lock);
2044
2045 return 0;
2046}
2047
2048static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2049{
2050 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2051}
2052
2053static const struct file_operations ip_vs_stats_percpu_fops = {
2054 .owner = THIS_MODULE,
2055 .open = ip_vs_stats_percpu_seq_open,
2056 .read = seq_read,
2057 .llseek = seq_lseek,
2058 .release = single_release,
2059};
1da177e4
LT
2060#endif
2061
2062/*
2063 * Set timeout values for tcp tcpfin udp in the timeout_table.
2064 */
9330419d 2065static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2066{
091bb34c 2067#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
9330419d 2068 struct ip_vs_proto_data *pd;
091bb34c 2069#endif
9330419d 2070
1da177e4
LT
2071 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2072 u->tcp_timeout,
2073 u->tcp_fin_timeout,
2074 u->udp_timeout);
2075
2076#ifdef CONFIG_IP_VS_PROTO_TCP
2077 if (u->tcp_timeout) {
9330419d
HS
2078 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2079 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
1da177e4
LT
2080 = u->tcp_timeout * HZ;
2081 }
2082
2083 if (u->tcp_fin_timeout) {
9330419d
HS
2084 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2085 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
1da177e4
LT
2086 = u->tcp_fin_timeout * HZ;
2087 }
2088#endif
2089
2090#ifdef CONFIG_IP_VS_PROTO_UDP
2091 if (u->udp_timeout) {
9330419d
HS
2092 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2093 pd->timeout_table[IP_VS_UDP_S_NORMAL]
1da177e4
LT
2094 = u->udp_timeout * HZ;
2095 }
2096#endif
2097 return 0;
2098}
2099
2100
2101#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2102#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2103#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2104 sizeof(struct ip_vs_dest_user))
2105#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2106#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2107#define MAX_ARG_LEN SVCDEST_ARG_LEN
2108
9b5b5cff 2109static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2110 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2111 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2112 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2113 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2114 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2115 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2116 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2117 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2118 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2119 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2120 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2121};
2122
c860c6b1
JV
2123static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2124 struct ip_vs_service_user *usvc_compat)
2125{
0d1e71b0
SH
2126 memset(usvc, 0, sizeof(*usvc));
2127
c860c6b1
JV
2128 usvc->af = AF_INET;
2129 usvc->protocol = usvc_compat->protocol;
2130 usvc->addr.ip = usvc_compat->addr;
2131 usvc->port = usvc_compat->port;
2132 usvc->fwmark = usvc_compat->fwmark;
2133
2134 /* Deep copy of sched_name is not needed here */
2135 usvc->sched_name = usvc_compat->sched_name;
2136
2137 usvc->flags = usvc_compat->flags;
2138 usvc->timeout = usvc_compat->timeout;
2139 usvc->netmask = usvc_compat->netmask;
2140}
2141
2142static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2143 struct ip_vs_dest_user *udest_compat)
2144{
0d1e71b0
SH
2145 memset(udest, 0, sizeof(*udest));
2146
c860c6b1
JV
2147 udest->addr.ip = udest_compat->addr;
2148 udest->port = udest_compat->port;
2149 udest->conn_flags = udest_compat->conn_flags;
2150 udest->weight = udest_compat->weight;
2151 udest->u_threshold = udest_compat->u_threshold;
2152 udest->l_threshold = udest_compat->l_threshold;
2153}
2154
1da177e4
LT
2155static int
2156do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2157{
fc723250 2158 struct net *net = sock_net(sk);
1da177e4
LT
2159 int ret;
2160 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2161 struct ip_vs_service_user *usvc_compat;
2162 struct ip_vs_service_user_kern usvc;
1da177e4 2163 struct ip_vs_service *svc;
c860c6b1
JV
2164 struct ip_vs_dest_user *udest_compat;
2165 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2166
2167 if (!capable(CAP_NET_ADMIN))
2168 return -EPERM;
2169
04bcef2a
AV
2170 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2171 return -EINVAL;
2172 if (len < 0 || len > MAX_ARG_LEN)
2173 return -EINVAL;
1da177e4 2174 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2175 pr_err("set_ctl: len %u != %u\n",
2176 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2177 return -EINVAL;
2178 }
2179
2180 if (copy_from_user(arg, user, len) != 0)
2181 return -EFAULT;
2182
2183 /* increase the module use count */
2184 ip_vs_use_count_inc();
2185
14cc3e2b 2186 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2187 ret = -ERESTARTSYS;
2188 goto out_dec;
2189 }
2190
2191 if (cmd == IP_VS_SO_SET_FLUSH) {
2192 /* Flush the virtual service */
fc723250 2193 ret = ip_vs_flush(net);
1da177e4
LT
2194 goto out_unlock;
2195 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2196 /* Set timeout values for (tcp tcpfin udp) */
9330419d 2197 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
1da177e4
LT
2198 goto out_unlock;
2199 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2200 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f
HS
2201 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2202 dm->syncid);
1da177e4
LT
2203 goto out_unlock;
2204 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2205 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f 2206 ret = stop_sync_thread(net, dm->state);
1da177e4
LT
2207 goto out_unlock;
2208 }
2209
c860c6b1
JV
2210 usvc_compat = (struct ip_vs_service_user *)arg;
2211 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2212
2213 /* We only use the new structs internally, so copy userspace compat
2214 * structs to extended internal versions */
2215 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2216 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2217
2218 if (cmd == IP_VS_SO_SET_ZERO) {
2219 /* if no service address is set, zero counters in all */
c860c6b1 2220 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
fc723250 2221 ret = ip_vs_zero_all(net);
1da177e4
LT
2222 goto out_unlock;
2223 }
2224 }
2225
2906f66a
VMR
2226 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2227 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2228 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2229 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2230 usvc.protocol, &usvc.addr.ip,
2231 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2232 ret = -EFAULT;
2233 goto out_unlock;
2234 }
2235
2236 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2237 if (usvc.fwmark == 0)
fc723250 2238 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
26c15cfd 2239 &usvc.addr, usvc.port);
1da177e4 2240 else
fc723250 2241 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
1da177e4
LT
2242
2243 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2244 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2245 ret = -ESRCH;
26c15cfd 2246 goto out_unlock;
1da177e4
LT
2247 }
2248
2249 switch (cmd) {
2250 case IP_VS_SO_SET_ADD:
2251 if (svc != NULL)
2252 ret = -EEXIST;
2253 else
fc723250 2254 ret = ip_vs_add_service(net, &usvc, &svc);
1da177e4
LT
2255 break;
2256 case IP_VS_SO_SET_EDIT:
c860c6b1 2257 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2258 break;
2259 case IP_VS_SO_SET_DEL:
2260 ret = ip_vs_del_service(svc);
2261 if (!ret)
2262 goto out_unlock;
2263 break;
2264 case IP_VS_SO_SET_ZERO:
2265 ret = ip_vs_zero_service(svc);
2266 break;
2267 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2268 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2269 break;
2270 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2271 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2272 break;
2273 case IP_VS_SO_SET_DELDEST:
c860c6b1 2274 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2275 break;
2276 default:
2277 ret = -EINVAL;
2278 }
2279
1da177e4 2280 out_unlock:
14cc3e2b 2281 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2282 out_dec:
2283 /* decrease the module use count */
2284 ip_vs_use_count_dec();
2285
2286 return ret;
2287}
2288
2289
2290static void
2291ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2292{
2293 spin_lock_bh(&src->lock);
e9c0ce23 2294 memcpy(dst, &src->ustats, sizeof(*dst));
1da177e4
LT
2295 spin_unlock_bh(&src->lock);
2296}
2297
2298static void
2299ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2300{
2301 dst->protocol = src->protocol;
e7ade46a 2302 dst->addr = src->addr.ip;
1da177e4
LT
2303 dst->port = src->port;
2304 dst->fwmark = src->fwmark;
4da62fc7 2305 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2306 dst->flags = src->flags;
2307 dst->timeout = src->timeout / HZ;
2308 dst->netmask = src->netmask;
2309 dst->num_dests = src->num_dests;
2310 ip_vs_copy_stats(&dst->stats, &src->stats);
2311}
2312
2313static inline int
fc723250
HS
2314__ip_vs_get_service_entries(struct net *net,
2315 const struct ip_vs_get_services *get,
1da177e4
LT
2316 struct ip_vs_get_services __user *uptr)
2317{
2318 int idx, count=0;
2319 struct ip_vs_service *svc;
2320 struct ip_vs_service_entry entry;
2321 int ret = 0;
2322
2323 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2324 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041 2325 /* Only expose IPv4 entries to old interface */
fc723250 2326 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2327 continue;
2328
1da177e4
LT
2329 if (count >= get->num_services)
2330 goto out;
4da62fc7 2331 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2332 ip_vs_copy_service(&entry, svc);
2333 if (copy_to_user(&uptr->entrytable[count],
2334 &entry, sizeof(entry))) {
2335 ret = -EFAULT;
2336 goto out;
2337 }
2338 count++;
2339 }
2340 }
2341
2342 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2343 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041 2344 /* Only expose IPv4 entries to old interface */
fc723250 2345 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2346 continue;
2347
1da177e4
LT
2348 if (count >= get->num_services)
2349 goto out;
4da62fc7 2350 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2351 ip_vs_copy_service(&entry, svc);
2352 if (copy_to_user(&uptr->entrytable[count],
2353 &entry, sizeof(entry))) {
2354 ret = -EFAULT;
2355 goto out;
2356 }
2357 count++;
2358 }
2359 }
2360 out:
2361 return ret;
2362}
2363
2364static inline int
fc723250 2365__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
1da177e4
LT
2366 struct ip_vs_get_dests __user *uptr)
2367{
2368 struct ip_vs_service *svc;
b18610de 2369 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2370 int ret = 0;
2371
2372 if (get->fwmark)
fc723250 2373 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
1da177e4 2374 else
fc723250 2375 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
26c15cfd 2376 get->port);
b18610de 2377
1da177e4
LT
2378 if (svc) {
2379 int count = 0;
2380 struct ip_vs_dest *dest;
2381 struct ip_vs_dest_entry entry;
2382
2383 list_for_each_entry(dest, &svc->destinations, n_list) {
2384 if (count >= get->num_dests)
2385 break;
2386
e7ade46a 2387 entry.addr = dest->addr.ip;
1da177e4
LT
2388 entry.port = dest->port;
2389 entry.conn_flags = atomic_read(&dest->conn_flags);
2390 entry.weight = atomic_read(&dest->weight);
2391 entry.u_threshold = dest->u_threshold;
2392 entry.l_threshold = dest->l_threshold;
2393 entry.activeconns = atomic_read(&dest->activeconns);
2394 entry.inactconns = atomic_read(&dest->inactconns);
2395 entry.persistconns = atomic_read(&dest->persistconns);
2396 ip_vs_copy_stats(&entry.stats, &dest->stats);
2397 if (copy_to_user(&uptr->entrytable[count],
2398 &entry, sizeof(entry))) {
2399 ret = -EFAULT;
2400 break;
2401 }
2402 count++;
2403 }
1da177e4
LT
2404 } else
2405 ret = -ESRCH;
2406 return ret;
2407}
2408
2409static inline void
9330419d 2410__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2411{
091bb34c 2412#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
9330419d 2413 struct ip_vs_proto_data *pd;
091bb34c 2414#endif
9330419d 2415
1da177e4 2416#ifdef CONFIG_IP_VS_PROTO_TCP
9330419d
HS
2417 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2418 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2419 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
1da177e4
LT
2420#endif
2421#ifdef CONFIG_IP_VS_PROTO_UDP
9330419d 2422 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
1da177e4 2423 u->udp_timeout =
9330419d 2424 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
1da177e4
LT
2425#endif
2426}
2427
2428
2429#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2430#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2431#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2432#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2433#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2434#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2435#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2436
9b5b5cff 2437static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2438 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2439 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2440 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2441 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2442 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2443 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2444 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2445};
2446
2447static int
2448do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2449{
2450 unsigned char arg[128];
2451 int ret = 0;
04bcef2a 2452 unsigned int copylen;
fc723250 2453 struct net *net = sock_net(sk);
f131315f 2454 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2455
fc723250 2456 BUG_ON(!net);
1da177e4
LT
2457 if (!capable(CAP_NET_ADMIN))
2458 return -EPERM;
2459
04bcef2a
AV
2460 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2461 return -EINVAL;
2462
1da177e4 2463 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2464 pr_err("get_ctl: len %u < %u\n",
2465 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2466 return -EINVAL;
2467 }
2468
04bcef2a
AV
2469 copylen = get_arglen[GET_CMDID(cmd)];
2470 if (copylen > 128)
2471 return -EINVAL;
2472
2473 if (copy_from_user(arg, user, copylen) != 0)
1da177e4
LT
2474 return -EFAULT;
2475
14cc3e2b 2476 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2477 return -ERESTARTSYS;
2478
2479 switch (cmd) {
2480 case IP_VS_SO_GET_VERSION:
2481 {
2482 char buf[64];
2483
2484 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2485 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2486 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2487 ret = -EFAULT;
2488 goto out;
2489 }
2490 *len = strlen(buf)+1;
2491 }
2492 break;
2493
2494 case IP_VS_SO_GET_INFO:
2495 {
2496 struct ip_vs_getinfo info;
2497 info.version = IP_VS_VERSION_CODE;
6f7edb48 2498 info.size = ip_vs_conn_tab_size;
a0840e2e 2499 info.num_services = ipvs->num_services;
1da177e4
LT
2500 if (copy_to_user(user, &info, sizeof(info)) != 0)
2501 ret = -EFAULT;
2502 }
2503 break;
2504
2505 case IP_VS_SO_GET_SERVICES:
2506 {
2507 struct ip_vs_get_services *get;
2508 int size;
2509
2510 get = (struct ip_vs_get_services *)arg;
2511 size = sizeof(*get) +
2512 sizeof(struct ip_vs_service_entry) * get->num_services;
2513 if (*len != size) {
1e3e238e 2514 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2515 ret = -EINVAL;
2516 goto out;
2517 }
fc723250 2518 ret = __ip_vs_get_service_entries(net, get, user);
1da177e4
LT
2519 }
2520 break;
2521
2522 case IP_VS_SO_GET_SERVICE:
2523 {
2524 struct ip_vs_service_entry *entry;
2525 struct ip_vs_service *svc;
b18610de 2526 union nf_inet_addr addr;
1da177e4
LT
2527
2528 entry = (struct ip_vs_service_entry *)arg;
b18610de 2529 addr.ip = entry->addr;
1da177e4 2530 if (entry->fwmark)
fc723250 2531 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
1da177e4 2532 else
fc723250
HS
2533 svc = __ip_vs_service_find(net, AF_INET,
2534 entry->protocol, &addr,
2535 entry->port);
1da177e4
LT
2536 if (svc) {
2537 ip_vs_copy_service(entry, svc);
2538 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2539 ret = -EFAULT;
1da177e4
LT
2540 } else
2541 ret = -ESRCH;
2542 }
2543 break;
2544
2545 case IP_VS_SO_GET_DESTS:
2546 {
2547 struct ip_vs_get_dests *get;
2548 int size;
2549
2550 get = (struct ip_vs_get_dests *)arg;
2551 size = sizeof(*get) +
2552 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2553 if (*len != size) {
1e3e238e 2554 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2555 ret = -EINVAL;
2556 goto out;
2557 }
fc723250 2558 ret = __ip_vs_get_dest_entries(net, get, user);
1da177e4
LT
2559 }
2560 break;
2561
2562 case IP_VS_SO_GET_TIMEOUT:
2563 {
2564 struct ip_vs_timeout_user t;
2565
9330419d 2566 __ip_vs_get_timeouts(net, &t);
1da177e4
LT
2567 if (copy_to_user(user, &t, sizeof(t)) != 0)
2568 ret = -EFAULT;
2569 }
2570 break;
2571
2572 case IP_VS_SO_GET_DAEMON:
2573 {
2574 struct ip_vs_daemon_user d[2];
2575
2576 memset(&d, 0, sizeof(d));
f131315f 2577 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
1da177e4 2578 d[0].state = IP_VS_STATE_MASTER;
f131315f
HS
2579 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2580 sizeof(d[0].mcast_ifn));
2581 d[0].syncid = ipvs->master_syncid;
1da177e4 2582 }
f131315f 2583 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
1da177e4 2584 d[1].state = IP_VS_STATE_BACKUP;
f131315f
HS
2585 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2586 sizeof(d[1].mcast_ifn));
2587 d[1].syncid = ipvs->backup_syncid;
1da177e4
LT
2588 }
2589 if (copy_to_user(user, &d, sizeof(d)) != 0)
2590 ret = -EFAULT;
2591 }
2592 break;
2593
2594 default:
2595 ret = -EINVAL;
2596 }
2597
2598 out:
14cc3e2b 2599 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2600 return ret;
2601}
2602
2603
2604static struct nf_sockopt_ops ip_vs_sockopts = {
2605 .pf = PF_INET,
2606 .set_optmin = IP_VS_BASE_CTL,
2607 .set_optmax = IP_VS_SO_SET_MAX+1,
2608 .set = do_ip_vs_set_ctl,
2609 .get_optmin = IP_VS_BASE_CTL,
2610 .get_optmax = IP_VS_SO_GET_MAX+1,
2611 .get = do_ip_vs_get_ctl,
16fcec35 2612 .owner = THIS_MODULE,
1da177e4
LT
2613};
2614
9a812198
JV
2615/*
2616 * Generic Netlink interface
2617 */
2618
2619/* IPVS genetlink family */
2620static struct genl_family ip_vs_genl_family = {
2621 .id = GENL_ID_GENERATE,
2622 .hdrsize = 0,
2623 .name = IPVS_GENL_NAME,
2624 .version = IPVS_GENL_VERSION,
2625 .maxattr = IPVS_CMD_MAX,
c6d2d445 2626 .netnsok = true, /* Make ipvsadm to work on netns */
9a812198
JV
2627};
2628
2629/* Policy used for first-level command attributes */
2630static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2631 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2632 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2633 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2634 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2635 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2636 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2637};
2638
2639/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2640static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2641 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2642 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2643 .len = IP_VS_IFNAME_MAXLEN },
2644 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2645};
2646
2647/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2648static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2649 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2650 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2651 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2652 .len = sizeof(union nf_inet_addr) },
2653 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2654 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2655 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2656 .len = IP_VS_SCHEDNAME_MAXLEN },
0d1e71b0
SH
2657 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2658 .len = IP_VS_PENAME_MAXLEN },
9a812198
JV
2659 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2660 .len = sizeof(struct ip_vs_flags) },
2661 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2662 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2663 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2664};
2665
2666/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2667static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2668 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2669 .len = sizeof(union nf_inet_addr) },
2670 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2671 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2672 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2673 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2674 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2675 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2676 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2677 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2678 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2679};
2680
2681static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2682 struct ip_vs_stats *stats)
2683{
2684 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2685 if (!nl_stats)
2686 return -EMSGSIZE;
2687
2688 spin_lock_bh(&stats->lock);
2689
e9c0ce23
SW
2690 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2691 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2692 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2693 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2694 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2695 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2696 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2697 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2698 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2699 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
9a812198
JV
2700
2701 spin_unlock_bh(&stats->lock);
2702
2703 nla_nest_end(skb, nl_stats);
2704
2705 return 0;
2706
2707nla_put_failure:
2708 spin_unlock_bh(&stats->lock);
2709 nla_nest_cancel(skb, nl_stats);
2710 return -EMSGSIZE;
2711}
2712
2713static int ip_vs_genl_fill_service(struct sk_buff *skb,
2714 struct ip_vs_service *svc)
2715{
2716 struct nlattr *nl_service;
2717 struct ip_vs_flags flags = { .flags = svc->flags,
2718 .mask = ~0 };
2719
2720 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2721 if (!nl_service)
2722 return -EMSGSIZE;
2723
f94fd041 2724 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2725
2726 if (svc->fwmark) {
2727 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2728 } else {
2729 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2730 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2731 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2732 }
2733
2734 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
0d1e71b0
SH
2735 if (svc->pe)
2736 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
9a812198
JV
2737 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2738 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2739 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2740
2741 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2742 goto nla_put_failure;
2743
2744 nla_nest_end(skb, nl_service);
2745
2746 return 0;
2747
2748nla_put_failure:
2749 nla_nest_cancel(skb, nl_service);
2750 return -EMSGSIZE;
2751}
2752
2753static int ip_vs_genl_dump_service(struct sk_buff *skb,
2754 struct ip_vs_service *svc,
2755 struct netlink_callback *cb)
2756{
2757 void *hdr;
2758
2759 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2760 &ip_vs_genl_family, NLM_F_MULTI,
2761 IPVS_CMD_NEW_SERVICE);
2762 if (!hdr)
2763 return -EMSGSIZE;
2764
2765 if (ip_vs_genl_fill_service(skb, svc) < 0)
2766 goto nla_put_failure;
2767
2768 return genlmsg_end(skb, hdr);
2769
2770nla_put_failure:
2771 genlmsg_cancel(skb, hdr);
2772 return -EMSGSIZE;
2773}
2774
2775static int ip_vs_genl_dump_services(struct sk_buff *skb,
2776 struct netlink_callback *cb)
2777{
2778 int idx = 0, i;
2779 int start = cb->args[0];
2780 struct ip_vs_service *svc;
fc723250 2781 struct net *net = skb_sknet(skb);
9a812198
JV
2782
2783 mutex_lock(&__ip_vs_mutex);
2784 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2785 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
fc723250 2786 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2787 continue;
2788 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2789 idx--;
2790 goto nla_put_failure;
2791 }
2792 }
2793 }
2794
2795 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2796 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
fc723250 2797 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2798 continue;
2799 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2800 idx--;
2801 goto nla_put_failure;
2802 }
2803 }
2804 }
2805
2806nla_put_failure:
2807 mutex_unlock(&__ip_vs_mutex);
2808 cb->args[0] = idx;
2809
2810 return skb->len;
2811}
2812
fc723250
HS
2813static int ip_vs_genl_parse_service(struct net *net,
2814 struct ip_vs_service_user_kern *usvc,
26c15cfd
JA
2815 struct nlattr *nla, int full_entry,
2816 struct ip_vs_service **ret_svc)
9a812198
JV
2817{
2818 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2819 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
26c15cfd 2820 struct ip_vs_service *svc;
9a812198
JV
2821
2822 /* Parse mandatory identifying service fields first */
2823 if (nla == NULL ||
2824 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2825 return -EINVAL;
2826
2827 nla_af = attrs[IPVS_SVC_ATTR_AF];
2828 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2829 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2830 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2831 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2832
2833 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2834 return -EINVAL;
2835
258c8893
SH
2836 memset(usvc, 0, sizeof(*usvc));
2837
c860c6b1 2838 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2839#ifdef CONFIG_IP_VS_IPV6
2840 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2841#else
2842 if (usvc->af != AF_INET)
2843#endif
9a812198
JV
2844 return -EAFNOSUPPORT;
2845
2846 if (nla_fwmark) {
2847 usvc->protocol = IPPROTO_TCP;
2848 usvc->fwmark = nla_get_u32(nla_fwmark);
2849 } else {
2850 usvc->protocol = nla_get_u16(nla_protocol);
2851 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2852 usvc->port = nla_get_u16(nla_port);
2853 usvc->fwmark = 0;
2854 }
2855
26c15cfd 2856 if (usvc->fwmark)
fc723250 2857 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
26c15cfd 2858 else
fc723250 2859 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
26c15cfd
JA
2860 &usvc->addr, usvc->port);
2861 *ret_svc = svc;
2862
9a812198
JV
2863 /* If a full entry was requested, check for the additional fields */
2864 if (full_entry) {
0d1e71b0 2865 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
9a812198
JV
2866 *nla_netmask;
2867 struct ip_vs_flags flags;
9a812198
JV
2868
2869 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
0d1e71b0 2870 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
9a812198
JV
2871 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2872 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2873 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2874
2875 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2876 return -EINVAL;
2877
2878 nla_memcpy(&flags, nla_flags, sizeof(flags));
2879
2880 /* prefill flags from service if it already exists */
26c15cfd 2881 if (svc)
9a812198 2882 usvc->flags = svc->flags;
9a812198
JV
2883
2884 /* set new flags from userland */
2885 usvc->flags = (usvc->flags & ~flags.mask) |
2886 (flags.flags & flags.mask);
c860c6b1 2887 usvc->sched_name = nla_data(nla_sched);
0d1e71b0 2888 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
9a812198
JV
2889 usvc->timeout = nla_get_u32(nla_timeout);
2890 usvc->netmask = nla_get_u32(nla_netmask);
2891 }
2892
2893 return 0;
2894}
2895
fc723250
HS
2896static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2897 struct nlattr *nla)
9a812198 2898{
c860c6b1 2899 struct ip_vs_service_user_kern usvc;
26c15cfd 2900 struct ip_vs_service *svc;
9a812198
JV
2901 int ret;
2902
fc723250 2903 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
26c15cfd 2904 return ret ? ERR_PTR(ret) : svc;
9a812198
JV
2905}
2906
2907static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2908{
2909 struct nlattr *nl_dest;
2910
2911 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2912 if (!nl_dest)
2913 return -EMSGSIZE;
2914
2915 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2916 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2917
2918 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2919 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2920 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2921 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2922 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2923 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2924 atomic_read(&dest->activeconns));
2925 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2926 atomic_read(&dest->inactconns));
2927 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2928 atomic_read(&dest->persistconns));
2929
2930 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2931 goto nla_put_failure;
2932
2933 nla_nest_end(skb, nl_dest);
2934
2935 return 0;
2936
2937nla_put_failure:
2938 nla_nest_cancel(skb, nl_dest);
2939 return -EMSGSIZE;
2940}
2941
2942static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2943 struct netlink_callback *cb)
2944{
2945 void *hdr;
2946
2947 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2948 &ip_vs_genl_family, NLM_F_MULTI,
2949 IPVS_CMD_NEW_DEST);
2950 if (!hdr)
2951 return -EMSGSIZE;
2952
2953 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2954 goto nla_put_failure;
2955
2956 return genlmsg_end(skb, hdr);
2957
2958nla_put_failure:
2959 genlmsg_cancel(skb, hdr);
2960 return -EMSGSIZE;
2961}
2962
2963static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2964 struct netlink_callback *cb)
2965{
2966 int idx = 0;
2967 int start = cb->args[0];
2968 struct ip_vs_service *svc;
2969 struct ip_vs_dest *dest;
2970 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
a0840e2e 2971 struct net *net = skb_sknet(skb);
9a812198
JV
2972
2973 mutex_lock(&__ip_vs_mutex);
2974
2975 /* Try to find the service for which to dump destinations */
2976 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2977 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2978 goto out_err;
2979
a0840e2e 2980
fc723250 2981 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
2982 if (IS_ERR(svc) || svc == NULL)
2983 goto out_err;
2984
2985 /* Dump the destinations */
2986 list_for_each_entry(dest, &svc->destinations, n_list) {
2987 if (++idx <= start)
2988 continue;
2989 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2990 idx--;
2991 goto nla_put_failure;
2992 }
2993 }
2994
2995nla_put_failure:
2996 cb->args[0] = idx;
9a812198
JV
2997
2998out_err:
2999 mutex_unlock(&__ip_vs_mutex);
3000
3001 return skb->len;
3002}
3003
c860c6b1 3004static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
3005 struct nlattr *nla, int full_entry)
3006{
3007 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3008 struct nlattr *nla_addr, *nla_port;
3009
3010 /* Parse mandatory identifying destination fields first */
3011 if (nla == NULL ||
3012 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3013 return -EINVAL;
3014
3015 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3016 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3017
3018 if (!(nla_addr && nla_port))
3019 return -EINVAL;
3020
258c8893
SH
3021 memset(udest, 0, sizeof(*udest));
3022
9a812198
JV
3023 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3024 udest->port = nla_get_u16(nla_port);
3025
3026 /* If a full entry was requested, check for the additional fields */
3027 if (full_entry) {
3028 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3029 *nla_l_thresh;
3030
3031 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3032 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3033 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3034 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3035
3036 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3037 return -EINVAL;
3038
3039 udest->conn_flags = nla_get_u32(nla_fwd)
3040 & IP_VS_CONN_F_FWD_MASK;
3041 udest->weight = nla_get_u32(nla_weight);
3042 udest->u_threshold = nla_get_u32(nla_u_thresh);
3043 udest->l_threshold = nla_get_u32(nla_l_thresh);
3044 }
3045
3046 return 0;
3047}
3048
3049static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3050 const char *mcast_ifn, __be32 syncid)
3051{
3052 struct nlattr *nl_daemon;
3053
3054 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3055 if (!nl_daemon)
3056 return -EMSGSIZE;
3057
3058 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3059 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3060 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3061
3062 nla_nest_end(skb, nl_daemon);
3063
3064 return 0;
3065
3066nla_put_failure:
3067 nla_nest_cancel(skb, nl_daemon);
3068 return -EMSGSIZE;
3069}
3070
3071static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3072 const char *mcast_ifn, __be32 syncid,
3073 struct netlink_callback *cb)
3074{
3075 void *hdr;
3076 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3077 &ip_vs_genl_family, NLM_F_MULTI,
3078 IPVS_CMD_NEW_DAEMON);
3079 if (!hdr)
3080 return -EMSGSIZE;
3081
3082 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3083 goto nla_put_failure;
3084
3085 return genlmsg_end(skb, hdr);
3086
3087nla_put_failure:
3088 genlmsg_cancel(skb, hdr);
3089 return -EMSGSIZE;
3090}
3091
3092static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3093 struct netlink_callback *cb)
3094{
f131315f
HS
3095 struct net *net = skb_net(skb);
3096 struct netns_ipvs *ipvs = net_ipvs(net);
3097
9a812198 3098 mutex_lock(&__ip_vs_mutex);
f131315f 3099 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
9a812198 3100 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
f131315f
HS
3101 ipvs->master_mcast_ifn,
3102 ipvs->master_syncid, cb) < 0)
9a812198
JV
3103 goto nla_put_failure;
3104
3105 cb->args[0] = 1;
3106 }
3107
f131315f 3108 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
9a812198 3109 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
f131315f
HS
3110 ipvs->backup_mcast_ifn,
3111 ipvs->backup_syncid, cb) < 0)
9a812198
JV
3112 goto nla_put_failure;
3113
3114 cb->args[1] = 1;
3115 }
3116
3117nla_put_failure:
3118 mutex_unlock(&__ip_vs_mutex);
3119
3120 return skb->len;
3121}
3122
f131315f 3123static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3124{
3125 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3126 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3127 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3128 return -EINVAL;
3129
f131315f
HS
3130 return start_sync_thread(net,
3131 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
9a812198
JV
3132 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3133 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3134}
3135
f131315f 3136static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3137{
3138 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3139 return -EINVAL;
3140
f131315f
HS
3141 return stop_sync_thread(net,
3142 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
9a812198
JV
3143}
3144
9330419d 3145static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
9a812198
JV
3146{
3147 struct ip_vs_timeout_user t;
3148
9330419d 3149 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3150
3151 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3152 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3153
3154 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3155 t.tcp_fin_timeout =
3156 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3157
3158 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3159 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3160
9330419d 3161 return ip_vs_set_timeout(net, &t);
9a812198
JV
3162}
3163
3164static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3165{
3166 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3167 struct ip_vs_service_user_kern usvc;
3168 struct ip_vs_dest_user_kern udest;
9a812198
JV
3169 int ret = 0, cmd;
3170 int need_full_svc = 0, need_full_dest = 0;
fc723250 3171 struct net *net;
a0840e2e 3172 struct netns_ipvs *ipvs;
9a812198 3173
fc723250 3174 net = skb_sknet(skb);
a0840e2e 3175 ipvs = net_ipvs(net);
9a812198
JV
3176 cmd = info->genlhdr->cmd;
3177
3178 mutex_lock(&__ip_vs_mutex);
3179
3180 if (cmd == IPVS_CMD_FLUSH) {
fc723250 3181 ret = ip_vs_flush(net);
9a812198
JV
3182 goto out;
3183 } else if (cmd == IPVS_CMD_SET_CONFIG) {
9330419d 3184 ret = ip_vs_genl_set_config(net, info->attrs);
9a812198
JV
3185 goto out;
3186 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3187 cmd == IPVS_CMD_DEL_DAEMON) {
3188
3189 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3190
3191 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3192 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3193 info->attrs[IPVS_CMD_ATTR_DAEMON],
3194 ip_vs_daemon_policy)) {
3195 ret = -EINVAL;
3196 goto out;
3197 }
3198
3199 if (cmd == IPVS_CMD_NEW_DAEMON)
f131315f 3200 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
9a812198 3201 else
f131315f 3202 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
9a812198
JV
3203 goto out;
3204 } else if (cmd == IPVS_CMD_ZERO &&
3205 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
fc723250 3206 ret = ip_vs_zero_all(net);
9a812198
JV
3207 goto out;
3208 }
3209
3210 /* All following commands require a service argument, so check if we
3211 * received a valid one. We need a full service specification when
3212 * adding / editing a service. Only identifying members otherwise. */
3213 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3214 need_full_svc = 1;
3215
fc723250 3216 ret = ip_vs_genl_parse_service(net, &usvc,
9a812198 3217 info->attrs[IPVS_CMD_ATTR_SERVICE],
26c15cfd 3218 need_full_svc, &svc);
9a812198
JV
3219 if (ret)
3220 goto out;
3221
9a812198
JV
3222 /* Unless we're adding a new service, the service must already exist */
3223 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3224 ret = -ESRCH;
3225 goto out;
3226 }
3227
3228 /* Destination commands require a valid destination argument. For
3229 * adding / editing a destination, we need a full destination
3230 * specification. */
3231 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3232 cmd == IPVS_CMD_DEL_DEST) {
3233 if (cmd != IPVS_CMD_DEL_DEST)
3234 need_full_dest = 1;
3235
3236 ret = ip_vs_genl_parse_dest(&udest,
3237 info->attrs[IPVS_CMD_ATTR_DEST],
3238 need_full_dest);
3239 if (ret)
3240 goto out;
3241 }
3242
3243 switch (cmd) {
3244 case IPVS_CMD_NEW_SERVICE:
3245 if (svc == NULL)
fc723250 3246 ret = ip_vs_add_service(net, &usvc, &svc);
9a812198
JV
3247 else
3248 ret = -EEXIST;
3249 break;
3250 case IPVS_CMD_SET_SERVICE:
3251 ret = ip_vs_edit_service(svc, &usvc);
3252 break;
3253 case IPVS_CMD_DEL_SERVICE:
3254 ret = ip_vs_del_service(svc);
26c15cfd 3255 /* do not use svc, it can be freed */
9a812198
JV
3256 break;
3257 case IPVS_CMD_NEW_DEST:
3258 ret = ip_vs_add_dest(svc, &udest);
3259 break;
3260 case IPVS_CMD_SET_DEST:
3261 ret = ip_vs_edit_dest(svc, &udest);
3262 break;
3263 case IPVS_CMD_DEL_DEST:
3264 ret = ip_vs_del_dest(svc, &udest);
3265 break;
3266 case IPVS_CMD_ZERO:
3267 ret = ip_vs_zero_service(svc);
3268 break;
3269 default:
3270 ret = -EINVAL;
3271 }
3272
3273out:
9a812198
JV
3274 mutex_unlock(&__ip_vs_mutex);
3275
3276 return ret;
3277}
3278
3279static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3280{
3281 struct sk_buff *msg;
3282 void *reply;
3283 int ret, cmd, reply_cmd;
fc723250 3284 struct net *net;
a0840e2e 3285 struct netns_ipvs *ipvs;
9a812198 3286
fc723250 3287 net = skb_sknet(skb);
a0840e2e 3288 ipvs = net_ipvs(net);
9a812198
JV
3289 cmd = info->genlhdr->cmd;
3290
3291 if (cmd == IPVS_CMD_GET_SERVICE)
3292 reply_cmd = IPVS_CMD_NEW_SERVICE;
3293 else if (cmd == IPVS_CMD_GET_INFO)
3294 reply_cmd = IPVS_CMD_SET_INFO;
3295 else if (cmd == IPVS_CMD_GET_CONFIG)
3296 reply_cmd = IPVS_CMD_SET_CONFIG;
3297 else {
1e3e238e 3298 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3299 return -EINVAL;
3300 }
3301
3302 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3303 if (!msg)
3304 return -ENOMEM;
3305
3306 mutex_lock(&__ip_vs_mutex);
3307
3308 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3309 if (reply == NULL)
3310 goto nla_put_failure;
3311
3312 switch (cmd) {
3313 case IPVS_CMD_GET_SERVICE:
3314 {
3315 struct ip_vs_service *svc;
3316
fc723250
HS
3317 svc = ip_vs_genl_find_service(net,
3318 info->attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3319 if (IS_ERR(svc)) {
3320 ret = PTR_ERR(svc);
3321 goto out_err;
3322 } else if (svc) {
3323 ret = ip_vs_genl_fill_service(msg, svc);
9a812198
JV
3324 if (ret)
3325 goto nla_put_failure;
3326 } else {
3327 ret = -ESRCH;
3328 goto out_err;
3329 }
3330
3331 break;
3332 }
3333
3334 case IPVS_CMD_GET_CONFIG:
3335 {
3336 struct ip_vs_timeout_user t;
3337
9330419d 3338 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3339#ifdef CONFIG_IP_VS_PROTO_TCP
3340 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3341 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3342 t.tcp_fin_timeout);
3343#endif
3344#ifdef CONFIG_IP_VS_PROTO_UDP
3345 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3346#endif
3347
3348 break;
3349 }
3350
3351 case IPVS_CMD_GET_INFO:
3352 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3353 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
6f7edb48 3354 ip_vs_conn_tab_size);
9a812198
JV
3355 break;
3356 }
3357
3358 genlmsg_end(msg, reply);
134e6375 3359 ret = genlmsg_reply(msg, info);
9a812198
JV
3360 goto out;
3361
3362nla_put_failure:
1e3e238e 3363 pr_err("not enough space in Netlink message\n");
9a812198
JV
3364 ret = -EMSGSIZE;
3365
3366out_err:
3367 nlmsg_free(msg);
3368out:
3369 mutex_unlock(&__ip_vs_mutex);
3370
3371 return ret;
3372}
3373
3374
3375static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3376 {
3377 .cmd = IPVS_CMD_NEW_SERVICE,
3378 .flags = GENL_ADMIN_PERM,
3379 .policy = ip_vs_cmd_policy,
3380 .doit = ip_vs_genl_set_cmd,
3381 },
3382 {
3383 .cmd = IPVS_CMD_SET_SERVICE,
3384 .flags = GENL_ADMIN_PERM,
3385 .policy = ip_vs_cmd_policy,
3386 .doit = ip_vs_genl_set_cmd,
3387 },
3388 {
3389 .cmd = IPVS_CMD_DEL_SERVICE,
3390 .flags = GENL_ADMIN_PERM,
3391 .policy = ip_vs_cmd_policy,
3392 .doit = ip_vs_genl_set_cmd,
3393 },
3394 {
3395 .cmd = IPVS_CMD_GET_SERVICE,
3396 .flags = GENL_ADMIN_PERM,
3397 .doit = ip_vs_genl_get_cmd,
3398 .dumpit = ip_vs_genl_dump_services,
3399 .policy = ip_vs_cmd_policy,
3400 },
3401 {
3402 .cmd = IPVS_CMD_NEW_DEST,
3403 .flags = GENL_ADMIN_PERM,
3404 .policy = ip_vs_cmd_policy,
3405 .doit = ip_vs_genl_set_cmd,
3406 },
3407 {
3408 .cmd = IPVS_CMD_SET_DEST,
3409 .flags = GENL_ADMIN_PERM,
3410 .policy = ip_vs_cmd_policy,
3411 .doit = ip_vs_genl_set_cmd,
3412 },
3413 {
3414 .cmd = IPVS_CMD_DEL_DEST,
3415 .flags = GENL_ADMIN_PERM,
3416 .policy = ip_vs_cmd_policy,
3417 .doit = ip_vs_genl_set_cmd,
3418 },
3419 {
3420 .cmd = IPVS_CMD_GET_DEST,
3421 .flags = GENL_ADMIN_PERM,
3422 .policy = ip_vs_cmd_policy,
3423 .dumpit = ip_vs_genl_dump_dests,
3424 },
3425 {
3426 .cmd = IPVS_CMD_NEW_DAEMON,
3427 .flags = GENL_ADMIN_PERM,
3428 .policy = ip_vs_cmd_policy,
3429 .doit = ip_vs_genl_set_cmd,
3430 },
3431 {
3432 .cmd = IPVS_CMD_DEL_DAEMON,
3433 .flags = GENL_ADMIN_PERM,
3434 .policy = ip_vs_cmd_policy,
3435 .doit = ip_vs_genl_set_cmd,
3436 },
3437 {
3438 .cmd = IPVS_CMD_GET_DAEMON,
3439 .flags = GENL_ADMIN_PERM,
3440 .dumpit = ip_vs_genl_dump_daemons,
3441 },
3442 {
3443 .cmd = IPVS_CMD_SET_CONFIG,
3444 .flags = GENL_ADMIN_PERM,
3445 .policy = ip_vs_cmd_policy,
3446 .doit = ip_vs_genl_set_cmd,
3447 },
3448 {
3449 .cmd = IPVS_CMD_GET_CONFIG,
3450 .flags = GENL_ADMIN_PERM,
3451 .doit = ip_vs_genl_get_cmd,
3452 },
3453 {
3454 .cmd = IPVS_CMD_GET_INFO,
3455 .flags = GENL_ADMIN_PERM,
3456 .doit = ip_vs_genl_get_cmd,
3457 },
3458 {
3459 .cmd = IPVS_CMD_ZERO,
3460 .flags = GENL_ADMIN_PERM,
3461 .policy = ip_vs_cmd_policy,
3462 .doit = ip_vs_genl_set_cmd,
3463 },
3464 {
3465 .cmd = IPVS_CMD_FLUSH,
3466 .flags = GENL_ADMIN_PERM,
3467 .doit = ip_vs_genl_set_cmd,
3468 },
3469};
3470
3471static int __init ip_vs_genl_register(void)
3472{
8f698d54
MM
3473 return genl_register_family_with_ops(&ip_vs_genl_family,
3474 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3475}
3476
3477static void ip_vs_genl_unregister(void)
3478{
3479 genl_unregister_family(&ip_vs_genl_family);
3480}
3481
3482/* End of Generic Netlink interface definitions */
3483
61b1ab45
HS
3484/*
3485 * per netns intit/exit func.
3486 */
3487int __net_init __ip_vs_control_init(struct net *net)
3488{
fc723250
HS
3489 int idx;
3490 struct netns_ipvs *ipvs = net_ipvs(net);
a0840e2e 3491 struct ctl_table *tbl;
fc723250 3492
a0840e2e
HS
3493 atomic_set(&ipvs->dropentry, 0);
3494 spin_lock_init(&ipvs->dropentry_lock);
3495 spin_lock_init(&ipvs->droppacket_lock);
3496 spin_lock_init(&ipvs->securetcp_lock);
3497 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3498
3499 /* Initialize rs_table */
3500 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3501 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3502
f2431e6e 3503 INIT_LIST_HEAD(&ipvs->dest_trash);
763f8d0e
HS
3504 atomic_set(&ipvs->ftpsvc_counter, 0);
3505 atomic_set(&ipvs->nullsvc_counter, 0);
f2431e6e 3506
b17fc996
HS
3507 /* procfs stats */
3508 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3509 if (ipvs->tot_stats == NULL) {
3510 pr_err("%s(): no memory.\n", __func__);
3511 return -ENOMEM;
3512 }
3513 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3514 if (!ipvs->cpustats) {
3515 pr_err("%s() alloc_percpu failed\n", __func__);
3516 goto err_alloc;
3517 }
3518 spin_lock_init(&ipvs->tot_stats->lock);
61b1ab45
HS
3519
3520 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3521 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
b17fc996
HS
3522 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3523 &ip_vs_stats_percpu_fops);
a0840e2e
HS
3524
3525 if (!net_eq(net, &init_net)) {
3526 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3527 if (tbl == NULL)
3528 goto err_dup;
3529 } else
3530 tbl = vs_vars;
3531 /* Initialize sysctl defaults */
3532 idx = 0;
3533 ipvs->sysctl_amemthresh = 1024;
3534 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3535 ipvs->sysctl_am_droprate = 10;
3536 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3537 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3538 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3539#ifdef CONFIG_IP_VS_NFCT
3540 tbl[idx++].data = &ipvs->sysctl_conntrack;
3541#endif
3542 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3543 ipvs->sysctl_snat_reroute = 1;
3544 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3545 ipvs->sysctl_sync_ver = 1;
3546 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3547 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3548 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3549 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3550 ipvs->sysctl_sync_threshold[0] = 3;
3551 ipvs->sysctl_sync_threshold[1] = 50;
3552 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3553 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3554 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3555
3556
0443929f 3557#ifdef CONFIG_SYSCTL
a0840e2e 3558 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
07924709 3559 tbl);
0443929f
SH
3560 if (ipvs->sysctl_hdr == NULL) {
3561 if (!net_eq(net, &init_net))
3562 kfree(tbl);
3563 goto err_dup;
3564 }
3565#endif
b17fc996 3566 ip_vs_new_estimator(net, ipvs->tot_stats);
a0840e2e 3567 ipvs->sysctl_tbl = tbl;
f6340ee0
HS
3568 /* Schedule defense work */
3569 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3570 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
61b1ab45
HS
3571 return 0;
3572
a0840e2e 3573err_dup:
b17fc996
HS
3574 free_percpu(ipvs->cpustats);
3575err_alloc:
3576 kfree(ipvs->tot_stats);
61b1ab45
HS
3577 return -ENOMEM;
3578}
3579
3580static void __net_exit __ip_vs_control_cleanup(struct net *net)
3581{
b17fc996
HS
3582 struct netns_ipvs *ipvs = net_ipvs(net);
3583
f2431e6e 3584 ip_vs_trash_cleanup(net);
b17fc996 3585 ip_vs_kill_estimator(net, ipvs->tot_stats);
f2431e6e
HS
3586 cancel_delayed_work_sync(&ipvs->defense_work);
3587 cancel_work_sync(&ipvs->defense_work.work);
0443929f 3588#ifdef CONFIG_SYSCTL
a0840e2e 3589 unregister_net_sysctl_table(ipvs->sysctl_hdr);
0443929f 3590#endif
b17fc996 3591 proc_net_remove(net, "ip_vs_stats_percpu");
61b1ab45
HS
3592 proc_net_remove(net, "ip_vs_stats");
3593 proc_net_remove(net, "ip_vs");
b17fc996
HS
3594 free_percpu(ipvs->cpustats);
3595 kfree(ipvs->tot_stats);
61b1ab45
HS
3596}
3597
3598static struct pernet_operations ipvs_control_ops = {
3599 .init = __ip_vs_control_init,
3600 .exit = __ip_vs_control_cleanup,
3601};
1da177e4 3602
048cf48b 3603int __init ip_vs_control_init(void)
1da177e4 3604{
1da177e4 3605 int idx;
fc723250 3606 int ret;
1da177e4
LT
3607
3608 EnterFunction(2);
3609
fc723250 3610 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
d86bef73
EB
3611 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3612 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3613 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3614 }
fc723250
HS
3615
3616 ret = register_pernet_subsys(&ipvs_control_ops);
3617 if (ret) {
3618 pr_err("cannot register namespace.\n");
3619 goto err;
d86bef73 3620 }
fc723250
HS
3621
3622 smp_wmb(); /* Do we really need it now ? */
d86bef73 3623
1da177e4
LT
3624 ret = nf_register_sockopt(&ip_vs_sockopts);
3625 if (ret) {
1e3e238e 3626 pr_err("cannot register sockopt.\n");
fc723250 3627 goto err_net;
1da177e4
LT
3628 }
3629
9a812198
JV
3630 ret = ip_vs_genl_register();
3631 if (ret) {
1e3e238e 3632 pr_err("cannot register Generic Netlink interface.\n");
9a812198 3633 nf_unregister_sockopt(&ip_vs_sockopts);
fc723250 3634 goto err_net;
9a812198
JV
3635 }
3636
1da177e4
LT
3637 LeaveFunction(2);
3638 return 0;
fc723250
HS
3639
3640err_net:
3641 unregister_pernet_subsys(&ipvs_control_ops);
3642err:
3643 return ret;
1da177e4
LT
3644}
3645
3646
3647void ip_vs_control_cleanup(void)
3648{
3649 EnterFunction(2);
61b1ab45 3650 unregister_pernet_subsys(&ipvs_control_ops);
9a812198 3651 ip_vs_genl_unregister();
1da177e4
LT
3652 nf_unregister_sockopt(&ip_vs_sockopts);
3653 LeaveFunction(2);
3654}
This page took 0.819372 seconds and 5 git commands to generate.