Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* Cluster IP hashmark target |
2 | * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> | |
3 | * based on ideas of Fabio Olive Leite <olive@unixforge.org> | |
4 | * | |
5 | * Development of this code funded by SuSE Linux AG, http://www.suse.com/ | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | */ | |
12 | #include <linux/module.h> | |
13 | #include <linux/config.h> | |
14 | #include <linux/proc_fs.h> | |
15 | #include <linux/jhash.h> | |
16 | #include <linux/skbuff.h> | |
17 | #include <linux/ip.h> | |
18 | #include <linux/tcp.h> | |
19 | #include <linux/udp.h> | |
20 | #include <linux/icmp.h> | |
21 | #include <linux/if_arp.h> | |
22 | #include <linux/proc_fs.h> | |
23 | #include <linux/seq_file.h> | |
24 | ||
25 | #include <net/checksum.h> | |
26 | ||
27 | #include <linux/netfilter_arp.h> | |
28 | ||
29 | #include <linux/netfilter_ipv4/ip_tables.h> | |
30 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | |
31 | #include <linux/netfilter_ipv4/ip_conntrack.h> | |
32 | #include <linux/netfilter_ipv4/lockhelp.h> | |
33 | ||
34 | #define CLUSTERIP_VERSION "0.6" | |
35 | ||
36 | #define DEBUG_CLUSTERIP | |
37 | ||
38 | #ifdef DEBUG_CLUSTERIP | |
39 | #define DEBUGP printk | |
40 | #else | |
41 | #define DEBUGP | |
42 | #endif | |
43 | ||
44 | MODULE_LICENSE("GPL"); | |
45 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | |
46 | MODULE_DESCRIPTION("iptables target for CLUSTERIP"); | |
47 | ||
48 | struct clusterip_config { | |
49 | struct list_head list; /* list of all configs */ | |
50 | atomic_t refcount; /* reference count */ | |
51 | ||
52 | u_int32_t clusterip; /* the IP address */ | |
53 | u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ | |
54 | struct net_device *dev; /* device */ | |
55 | u_int16_t num_total_nodes; /* total number of nodes */ | |
56 | u_int16_t num_local_nodes; /* number of local nodes */ | |
57 | u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */ | |
58 | ||
59 | #ifdef CONFIG_PROC_FS | |
60 | struct proc_dir_entry *pde; /* proc dir entry */ | |
61 | #endif | |
62 | enum clusterip_hashmode hash_mode; /* which hashing mode */ | |
63 | u_int32_t hash_initval; /* hash initialization */ | |
64 | }; | |
65 | ||
66 | static LIST_HEAD(clusterip_configs); | |
67 | ||
68 | /* clusterip_lock protects the clusterip_configs list _AND_ the configurable | |
69 | * data within all structurses (num_local_nodes, local_nodes[]) */ | |
70 | static DECLARE_RWLOCK(clusterip_lock); | |
71 | ||
72 | #ifdef CONFIG_PROC_FS | |
73 | static struct file_operations clusterip_proc_fops; | |
74 | static struct proc_dir_entry *clusterip_procdir; | |
75 | #endif | |
76 | ||
77 | static inline void | |
78 | clusterip_config_get(struct clusterip_config *c) { | |
79 | atomic_inc(&c->refcount); | |
80 | } | |
81 | ||
82 | static inline void | |
83 | clusterip_config_put(struct clusterip_config *c) { | |
84 | if (atomic_dec_and_test(&c->refcount)) { | |
85 | WRITE_LOCK(&clusterip_lock); | |
86 | list_del(&c->list); | |
87 | WRITE_UNLOCK(&clusterip_lock); | |
88 | dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); | |
89 | dev_put(c->dev); | |
90 | kfree(c); | |
91 | } | |
92 | } | |
93 | ||
94 | ||
95 | static struct clusterip_config * | |
96 | __clusterip_config_find(u_int32_t clusterip) | |
97 | { | |
98 | struct list_head *pos; | |
99 | ||
100 | MUST_BE_READ_LOCKED(&clusterip_lock); | |
101 | list_for_each(pos, &clusterip_configs) { | |
102 | struct clusterip_config *c = list_entry(pos, | |
103 | struct clusterip_config, list); | |
104 | if (c->clusterip == clusterip) { | |
105 | return c; | |
106 | } | |
107 | } | |
108 | ||
109 | return NULL; | |
110 | } | |
111 | ||
112 | static inline struct clusterip_config * | |
113 | clusterip_config_find_get(u_int32_t clusterip) | |
114 | { | |
115 | struct clusterip_config *c; | |
116 | ||
117 | READ_LOCK(&clusterip_lock); | |
118 | c = __clusterip_config_find(clusterip); | |
119 | if (!c) { | |
120 | READ_UNLOCK(&clusterip_lock); | |
121 | return NULL; | |
122 | } | |
123 | atomic_inc(&c->refcount); | |
124 | READ_UNLOCK(&clusterip_lock); | |
125 | ||
126 | return c; | |
127 | } | |
128 | ||
129 | static struct clusterip_config * | |
130 | clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, | |
131 | struct net_device *dev) | |
132 | { | |
133 | struct clusterip_config *c; | |
134 | char buffer[16]; | |
135 | ||
136 | c = kmalloc(sizeof(*c), GFP_ATOMIC); | |
137 | if (!c) | |
138 | return NULL; | |
139 | ||
140 | memset(c, 0, sizeof(*c)); | |
141 | c->dev = dev; | |
142 | c->clusterip = ip; | |
143 | memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); | |
144 | c->num_total_nodes = i->num_total_nodes; | |
145 | c->num_local_nodes = i->num_local_nodes; | |
146 | memcpy(&c->local_nodes, &i->local_nodes, sizeof(&c->local_nodes)); | |
147 | c->hash_mode = i->hash_mode; | |
148 | c->hash_initval = i->hash_initval; | |
149 | atomic_set(&c->refcount, 1); | |
150 | ||
151 | #ifdef CONFIG_PROC_FS | |
152 | /* create proc dir entry */ | |
153 | sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); | |
154 | c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, clusterip_procdir); | |
155 | if (!c->pde) { | |
156 | kfree(c); | |
157 | return NULL; | |
158 | } | |
159 | c->pde->proc_fops = &clusterip_proc_fops; | |
160 | c->pde->data = c; | |
161 | #endif | |
162 | ||
163 | WRITE_LOCK(&clusterip_lock); | |
164 | list_add(&c->list, &clusterip_configs); | |
165 | WRITE_UNLOCK(&clusterip_lock); | |
166 | ||
167 | return c; | |
168 | } | |
169 | ||
170 | static int | |
171 | clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) | |
172 | { | |
173 | int i; | |
174 | ||
175 | WRITE_LOCK(&clusterip_lock); | |
176 | ||
177 | if (c->num_local_nodes >= CLUSTERIP_MAX_NODES | |
178 | || nodenum > CLUSTERIP_MAX_NODES) { | |
179 | WRITE_UNLOCK(&clusterip_lock); | |
180 | return 1; | |
181 | } | |
182 | ||
183 | /* check if we alrady have this number in our array */ | |
184 | for (i = 0; i < c->num_local_nodes; i++) { | |
185 | if (c->local_nodes[i] == nodenum) { | |
186 | WRITE_UNLOCK(&clusterip_lock); | |
187 | return 1; | |
188 | } | |
189 | } | |
190 | ||
191 | c->local_nodes[c->num_local_nodes++] = nodenum; | |
192 | ||
193 | WRITE_UNLOCK(&clusterip_lock); | |
194 | return 0; | |
195 | } | |
196 | ||
197 | static int | |
198 | clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) | |
199 | { | |
200 | int i; | |
201 | ||
202 | WRITE_LOCK(&clusterip_lock); | |
203 | ||
204 | if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) { | |
205 | WRITE_UNLOCK(&clusterip_lock); | |
206 | return 1; | |
207 | } | |
208 | ||
209 | for (i = 0; i < c->num_local_nodes; i++) { | |
210 | if (c->local_nodes[i] == nodenum) { | |
211 | int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1)); | |
212 | memmove(&c->local_nodes[i], &c->local_nodes[i+1], size); | |
213 | c->num_local_nodes--; | |
214 | WRITE_UNLOCK(&clusterip_lock); | |
215 | return 0; | |
216 | } | |
217 | } | |
218 | ||
219 | WRITE_UNLOCK(&clusterip_lock); | |
220 | return 1; | |
221 | } | |
222 | ||
223 | static inline u_int32_t | |
224 | clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) | |
225 | { | |
226 | struct iphdr *iph = skb->nh.iph; | |
227 | unsigned long hashval; | |
228 | u_int16_t sport, dport; | |
229 | struct tcphdr *th; | |
230 | struct udphdr *uh; | |
231 | struct icmphdr *ih; | |
232 | ||
233 | switch (iph->protocol) { | |
234 | case IPPROTO_TCP: | |
235 | th = (void *)iph+iph->ihl*4; | |
236 | sport = ntohs(th->source); | |
237 | dport = ntohs(th->dest); | |
238 | break; | |
239 | case IPPROTO_UDP: | |
240 | uh = (void *)iph+iph->ihl*4; | |
241 | sport = ntohs(uh->source); | |
242 | dport = ntohs(uh->dest); | |
243 | break; | |
244 | case IPPROTO_ICMP: | |
245 | ih = (void *)iph+iph->ihl*4; | |
246 | sport = ntohs(ih->un.echo.id); | |
247 | dport = (ih->type<<8)|ih->code; | |
248 | break; | |
249 | default: | |
250 | if (net_ratelimit()) { | |
251 | printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n", | |
252 | iph->protocol); | |
253 | } | |
254 | sport = dport = 0; | |
255 | } | |
256 | ||
257 | switch (config->hash_mode) { | |
258 | case CLUSTERIP_HASHMODE_SIP: | |
259 | hashval = jhash_1word(ntohl(iph->saddr), | |
260 | config->hash_initval); | |
261 | break; | |
262 | case CLUSTERIP_HASHMODE_SIP_SPT: | |
263 | hashval = jhash_2words(ntohl(iph->saddr), sport, | |
264 | config->hash_initval); | |
265 | break; | |
266 | case CLUSTERIP_HASHMODE_SIP_SPT_DPT: | |
267 | hashval = jhash_3words(ntohl(iph->saddr), sport, dport, | |
268 | config->hash_initval); | |
269 | break; | |
270 | default: | |
271 | /* to make gcc happy */ | |
272 | hashval = 0; | |
273 | /* This cannot happen, unless the check function wasn't called | |
274 | * at rule load time */ | |
275 | printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode); | |
276 | BUG(); | |
277 | break; | |
278 | } | |
279 | ||
280 | /* node numbers are 1..n, not 0..n */ | |
281 | return ((hashval % config->num_total_nodes)+1); | |
282 | } | |
283 | ||
284 | static inline int | |
285 | clusterip_responsible(struct clusterip_config *config, u_int32_t hash) | |
286 | { | |
287 | int i; | |
288 | ||
289 | READ_LOCK(&clusterip_lock); | |
290 | ||
291 | if (config->num_local_nodes == 0) { | |
292 | READ_UNLOCK(&clusterip_lock); | |
293 | return 0; | |
294 | } | |
295 | ||
296 | for (i = 0; i < config->num_local_nodes; i++) { | |
297 | if (config->local_nodes[i] == hash) { | |
298 | READ_UNLOCK(&clusterip_lock); | |
299 | return 1; | |
300 | } | |
301 | } | |
302 | ||
303 | READ_UNLOCK(&clusterip_lock); | |
304 | ||
305 | return 0; | |
306 | } | |
307 | ||
308 | /*********************************************************************** | |
309 | * IPTABLES TARGET | |
310 | ***********************************************************************/ | |
311 | ||
312 | static unsigned int | |
313 | target(struct sk_buff **pskb, | |
314 | const struct net_device *in, | |
315 | const struct net_device *out, | |
316 | unsigned int hooknum, | |
317 | const void *targinfo, | |
318 | void *userinfo) | |
319 | { | |
320 | const struct ipt_clusterip_tgt_info *cipinfo = targinfo; | |
321 | enum ip_conntrack_info ctinfo; | |
322 | struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); | |
323 | u_int32_t hash; | |
324 | ||
325 | /* don't need to clusterip_config_get() here, since refcount | |
326 | * is only decremented by destroy() - and ip_tables guarantees | |
327 | * that the ->target() function isn't called after ->destroy() */ | |
328 | ||
329 | if (!ct) { | |
330 | printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); | |
331 | /* FIXME: need to drop invalid ones, since replies | |
332 | * to outgoing connections of other nodes will be | |
333 | * marked as INVALID */ | |
334 | return NF_DROP; | |
335 | } | |
336 | ||
337 | /* special case: ICMP error handling. conntrack distinguishes between | |
338 | * error messages (RELATED) and information requests (see below) */ | |
339 | if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP | |
340 | && (ctinfo == IP_CT_RELATED | |
341 | || ctinfo == IP_CT_IS_REPLY+IP_CT_IS_REPLY)) | |
342 | return IPT_CONTINUE; | |
343 | ||
344 | /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, | |
345 | * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here | |
346 | * on, which all have an ID field [relevant for hashing]. */ | |
347 | ||
348 | hash = clusterip_hashfn(*pskb, cipinfo->config); | |
349 | ||
350 | switch (ctinfo) { | |
351 | case IP_CT_NEW: | |
352 | ct->mark = hash; | |
353 | break; | |
354 | case IP_CT_RELATED: | |
355 | case IP_CT_RELATED+IP_CT_IS_REPLY: | |
356 | /* FIXME: we don't handle expectations at the | |
357 | * moment. they can arrive on a different node than | |
358 | * the master connection (e.g. FTP passive mode) */ | |
359 | case IP_CT_ESTABLISHED: | |
360 | case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: | |
361 | break; | |
362 | default: | |
363 | break; | |
364 | } | |
365 | ||
366 | #ifdef DEBUG_CLUSTERP | |
367 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | |
368 | #endif | |
369 | DEBUGP("hash=%u ct_hash=%lu ", hash, ct->mark); | |
370 | if (!clusterip_responsible(cipinfo->config, hash)) { | |
371 | DEBUGP("not responsible\n"); | |
372 | return NF_DROP; | |
373 | } | |
374 | DEBUGP("responsible\n"); | |
375 | ||
376 | /* despite being received via linklayer multicast, this is | |
377 | * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ | |
378 | (*pskb)->pkt_type = PACKET_HOST; | |
379 | ||
380 | return IPT_CONTINUE; | |
381 | } | |
382 | ||
383 | static int | |
384 | checkentry(const char *tablename, | |
385 | const struct ipt_entry *e, | |
386 | void *targinfo, | |
387 | unsigned int targinfosize, | |
388 | unsigned int hook_mask) | |
389 | { | |
390 | struct ipt_clusterip_tgt_info *cipinfo = targinfo; | |
391 | ||
392 | struct clusterip_config *config; | |
393 | ||
394 | if (targinfosize != IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info))) { | |
395 | printk(KERN_WARNING "CLUSTERIP: targinfosize %u != %Zu\n", | |
396 | targinfosize, | |
397 | IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info))); | |
398 | return 0; | |
399 | } | |
400 | ||
401 | if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && | |
402 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && | |
403 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { | |
404 | printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n", | |
405 | cipinfo->hash_mode); | |
406 | return 0; | |
407 | ||
408 | } | |
409 | if (e->ip.dmsk.s_addr != 0xffffffff | |
410 | || e->ip.dst.s_addr == 0) { | |
411 | printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); | |
412 | return 0; | |
413 | } | |
414 | ||
415 | /* FIXME: further sanity checks */ | |
416 | ||
417 | config = clusterip_config_find_get(e->ip.dst.s_addr); | |
418 | if (!config) { | |
419 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { | |
420 | printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); | |
421 | return 0; | |
422 | } else { | |
423 | struct net_device *dev; | |
424 | ||
425 | if (e->ip.iniface[0] == '\0') { | |
426 | printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n"); | |
427 | return 0; | |
428 | } | |
429 | ||
430 | dev = dev_get_by_name(e->ip.iniface); | |
431 | if (!dev) { | |
432 | printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); | |
433 | return 0; | |
434 | } | |
435 | ||
436 | config = clusterip_config_init(cipinfo, | |
437 | e->ip.dst.s_addr, dev); | |
438 | if (!config) { | |
439 | printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n"); | |
440 | dev_put(dev); | |
441 | return 0; | |
442 | } | |
443 | dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); | |
444 | } | |
445 | } | |
446 | ||
447 | cipinfo->config = config; | |
448 | ||
449 | return 1; | |
450 | } | |
451 | ||
452 | /* drop reference count of cluster config when rule is deleted */ | |
453 | static void destroy(void *matchinfo, unsigned int matchinfosize) | |
454 | { | |
455 | struct ipt_clusterip_tgt_info *cipinfo = matchinfo; | |
456 | ||
457 | /* we first remove the proc entry and then drop the reference | |
458 | * count. In case anyone still accesses the file, the open/close | |
459 | * functions are also incrementing the refcount on their own */ | |
460 | #ifdef CONFIG_PROC_FS | |
461 | remove_proc_entry(cipinfo->config->pde->name, | |
462 | cipinfo->config->pde->parent); | |
463 | #endif | |
464 | clusterip_config_put(cipinfo->config); | |
465 | } | |
466 | ||
467 | static struct ipt_target clusterip_tgt = { | |
468 | .name = "CLUSTERIP", | |
469 | .target = &target, | |
470 | .checkentry = &checkentry, | |
471 | .destroy = &destroy, | |
472 | .me = THIS_MODULE | |
473 | }; | |
474 | ||
475 | ||
476 | /*********************************************************************** | |
477 | * ARP MANGLING CODE | |
478 | ***********************************************************************/ | |
479 | ||
480 | /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ | |
481 | struct arp_payload { | |
482 | u_int8_t src_hw[ETH_ALEN]; | |
483 | u_int32_t src_ip; | |
484 | u_int8_t dst_hw[ETH_ALEN]; | |
485 | u_int32_t dst_ip; | |
486 | } __attribute__ ((packed)); | |
487 | ||
488 | #ifdef CLUSTERIP_DEBUG | |
489 | static void arp_print(struct arp_payload *payload) | |
490 | { | |
491 | #define HBUFFERLEN 30 | |
492 | char hbuffer[HBUFFERLEN]; | |
493 | int j,k; | |
494 | const char hexbuf[]= "0123456789abcdef"; | |
495 | ||
496 | for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { | |
497 | hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15]; | |
498 | hbuffer[k++]=hexbuf[payload->src_hw[j]&15]; | |
499 | hbuffer[k++]=':'; | |
500 | } | |
501 | hbuffer[--k]='\0'; | |
502 | ||
503 | printk("src %u.%u.%u.%u@%s, dst %u.%u.%u.%u\n", | |
504 | NIPQUAD(payload->src_ip), hbuffer, | |
505 | NIPQUAD(payload->dst_ip)); | |
506 | } | |
507 | #endif | |
508 | ||
509 | static unsigned int | |
510 | arp_mangle(unsigned int hook, | |
511 | struct sk_buff **pskb, | |
512 | const struct net_device *in, | |
513 | const struct net_device *out, | |
514 | int (*okfn)(struct sk_buff *)) | |
515 | { | |
516 | struct arphdr *arp = (*pskb)->nh.arph; | |
517 | struct arp_payload *payload; | |
518 | struct clusterip_config *c; | |
519 | ||
520 | /* we don't care about non-ethernet and non-ipv4 ARP */ | |
521 | if (arp->ar_hrd != htons(ARPHRD_ETHER) | |
522 | || arp->ar_pro != htons(ETH_P_IP) | |
523 | || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) | |
524 | return NF_ACCEPT; | |
525 | ||
526 | /* we only want to mangle arp replies */ | |
527 | if (arp->ar_op != htons(ARPOP_REPLY)) | |
528 | return NF_ACCEPT; | |
529 | ||
530 | payload = (void *)(arp+1); | |
531 | ||
532 | /* if there is no clusterip configuration for the arp reply's | |
533 | * source ip, we don't want to mangle it */ | |
534 | c = clusterip_config_find_get(payload->src_ip); | |
535 | if (!c) | |
536 | return NF_ACCEPT; | |
537 | ||
538 | /* normally the linux kernel always replies to arp queries of | |
539 | * addresses on different interfacs. However, in the CLUSTERIP case | |
540 | * this wouldn't work, since we didn't subscribe the mcast group on | |
541 | * other interfaces */ | |
542 | if (c->dev != out) { | |
543 | DEBUGP("CLUSTERIP: not mangling arp reply on different " | |
544 | "interface: cip'%s'-skb'%s'\n", c->dev->name, out->name); | |
545 | clusterip_config_put(c); | |
546 | return NF_ACCEPT; | |
547 | } | |
548 | ||
549 | /* mangle reply hardware address */ | |
550 | memcpy(payload->src_hw, c->clustermac, arp->ar_hln); | |
551 | ||
552 | #ifdef CLUSTERIP_DEBUG | |
553 | DEBUGP(KERN_DEBUG "CLUSTERIP mangled arp reply: "); | |
554 | arp_print(payload); | |
555 | #endif | |
556 | ||
557 | clusterip_config_put(c); | |
558 | ||
559 | return NF_ACCEPT; | |
560 | } | |
561 | ||
562 | static struct nf_hook_ops cip_arp_ops = { | |
563 | .hook = arp_mangle, | |
564 | .pf = NF_ARP, | |
565 | .hooknum = NF_ARP_OUT, | |
566 | .priority = -1 | |
567 | }; | |
568 | ||
569 | /*********************************************************************** | |
570 | * PROC DIR HANDLING | |
571 | ***********************************************************************/ | |
572 | ||
573 | #ifdef CONFIG_PROC_FS | |
574 | ||
575 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) | |
576 | { | |
577 | struct proc_dir_entry *pde = s->private; | |
578 | struct clusterip_config *c = pde->data; | |
579 | unsigned int *nodeidx; | |
580 | ||
581 | READ_LOCK(&clusterip_lock); | |
582 | if (*pos >= c->num_local_nodes) | |
583 | return NULL; | |
584 | ||
585 | nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); | |
586 | if (!nodeidx) | |
587 | return ERR_PTR(-ENOMEM); | |
588 | ||
589 | *nodeidx = *pos; | |
590 | return nodeidx; | |
591 | } | |
592 | ||
593 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) | |
594 | { | |
595 | struct proc_dir_entry *pde = s->private; | |
596 | struct clusterip_config *c = pde->data; | |
597 | unsigned int *nodeidx = (unsigned int *)v; | |
598 | ||
599 | *pos = ++(*nodeidx); | |
600 | if (*pos >= c->num_local_nodes) { | |
601 | kfree(v); | |
602 | return NULL; | |
603 | } | |
604 | return nodeidx; | |
605 | } | |
606 | ||
607 | static void clusterip_seq_stop(struct seq_file *s, void *v) | |
608 | { | |
609 | kfree(v); | |
610 | ||
611 | READ_UNLOCK(&clusterip_lock); | |
612 | } | |
613 | ||
614 | static int clusterip_seq_show(struct seq_file *s, void *v) | |
615 | { | |
616 | struct proc_dir_entry *pde = s->private; | |
617 | struct clusterip_config *c = pde->data; | |
618 | unsigned int *nodeidx = (unsigned int *)v; | |
619 | ||
620 | if (*nodeidx != 0) | |
621 | seq_putc(s, ','); | |
622 | seq_printf(s, "%u", c->local_nodes[*nodeidx]); | |
623 | ||
624 | if (*nodeidx == c->num_local_nodes-1) | |
625 | seq_putc(s, '\n'); | |
626 | ||
627 | return 0; | |
628 | } | |
629 | ||
630 | static struct seq_operations clusterip_seq_ops = { | |
631 | .start = clusterip_seq_start, | |
632 | .next = clusterip_seq_next, | |
633 | .stop = clusterip_seq_stop, | |
634 | .show = clusterip_seq_show, | |
635 | }; | |
636 | ||
637 | static int clusterip_proc_open(struct inode *inode, struct file *file) | |
638 | { | |
639 | int ret = seq_open(file, &clusterip_seq_ops); | |
640 | ||
641 | if (!ret) { | |
642 | struct seq_file *sf = file->private_data; | |
643 | struct proc_dir_entry *pde = PDE(inode); | |
644 | struct clusterip_config *c = pde->data; | |
645 | ||
646 | sf->private = pde; | |
647 | ||
648 | clusterip_config_get(c); | |
649 | } | |
650 | ||
651 | return ret; | |
652 | } | |
653 | ||
654 | static int clusterip_proc_release(struct inode *inode, struct file *file) | |
655 | { | |
656 | struct proc_dir_entry *pde = PDE(inode); | |
657 | struct clusterip_config *c = pde->data; | |
658 | int ret; | |
659 | ||
660 | ret = seq_release(inode, file); | |
661 | ||
662 | if (!ret) | |
663 | clusterip_config_put(c); | |
664 | ||
665 | return ret; | |
666 | } | |
667 | ||
668 | static ssize_t clusterip_proc_write(struct file *file, const char __user *input, | |
669 | size_t size, loff_t *ofs) | |
670 | { | |
671 | #define PROC_WRITELEN 10 | |
672 | char buffer[PROC_WRITELEN+1]; | |
673 | struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode); | |
674 | struct clusterip_config *c = pde->data; | |
675 | unsigned long nodenum; | |
676 | ||
677 | if (copy_from_user(buffer, input, PROC_WRITELEN)) | |
678 | return -EFAULT; | |
679 | ||
680 | if (*buffer == '+') { | |
681 | nodenum = simple_strtoul(buffer+1, NULL, 10); | |
682 | if (clusterip_add_node(c, nodenum)) | |
683 | return -ENOMEM; | |
684 | } else if (*buffer == '-') { | |
685 | nodenum = simple_strtoul(buffer+1, NULL,10); | |
686 | if (clusterip_del_node(c, nodenum)) | |
687 | return -ENOENT; | |
688 | } else | |
689 | return -EIO; | |
690 | ||
691 | return size; | |
692 | } | |
693 | ||
694 | static struct file_operations clusterip_proc_fops = { | |
695 | .owner = THIS_MODULE, | |
696 | .open = clusterip_proc_open, | |
697 | .read = seq_read, | |
698 | .write = clusterip_proc_write, | |
699 | .llseek = seq_lseek, | |
700 | .release = clusterip_proc_release, | |
701 | }; | |
702 | ||
703 | #endif /* CONFIG_PROC_FS */ | |
704 | ||
705 | static int init_or_cleanup(int fini) | |
706 | { | |
707 | int ret; | |
708 | ||
709 | if (fini) | |
710 | goto cleanup; | |
711 | ||
712 | if (ipt_register_target(&clusterip_tgt)) { | |
713 | ret = -EINVAL; | |
714 | goto cleanup_none; | |
715 | } | |
716 | ||
717 | if (nf_register_hook(&cip_arp_ops) < 0) { | |
718 | ret = -EINVAL; | |
719 | goto cleanup_target; | |
720 | } | |
721 | ||
722 | #ifdef CONFIG_PROC_FS | |
723 | clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net); | |
724 | if (!clusterip_procdir) { | |
725 | printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); | |
726 | ret = -ENOMEM; | |
727 | goto cleanup_hook; | |
728 | } | |
729 | #endif /* CONFIG_PROC_FS */ | |
730 | ||
731 | printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n", | |
732 | CLUSTERIP_VERSION); | |
733 | ||
734 | return 0; | |
735 | ||
736 | cleanup: | |
737 | printk(KERN_NOTICE "ClusterIP Version %s unloading\n", | |
738 | CLUSTERIP_VERSION); | |
739 | #ifdef CONFIG_PROC_FS | |
740 | remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); | |
741 | #endif | |
742 | cleanup_hook: | |
743 | nf_unregister_hook(&cip_arp_ops); | |
744 | cleanup_target: | |
745 | ipt_unregister_target(&clusterip_tgt); | |
746 | cleanup_none: | |
747 | return -EINVAL; | |
748 | } | |
749 | ||
750 | static int __init init(void) | |
751 | { | |
752 | return init_or_cleanup(0); | |
753 | } | |
754 | ||
755 | static void __exit fini(void) | |
756 | { | |
757 | init_or_cleanup(1); | |
758 | } | |
759 | ||
760 | module_init(init); | |
761 | module_exit(fini); |