Commit | Line | Data |
---|---|---|
5324a040 ACM |
1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | |
3 | * operating system. INET is implemented using the BSD Socket | |
4 | * interface as the means of communication with the user level. | |
5 | * | |
6 | * Generic INET6 transport hashtables | |
7 | * | |
d8313f5c ACM |
8 | * Authors: Lotsa people, from code originally in tcp, generalised here |
9 | * by Arnaldo Carvalho de Melo <acme@mandriva.com> | |
5324a040 ACM |
10 | * |
11 | * This program is free software; you can redistribute it and/or | |
12 | * modify it under the terms of the GNU General Public License | |
13 | * as published by the Free Software Foundation; either version | |
14 | * 2 of the License, or (at your option) any later version. | |
15 | */ | |
16 | ||
5324a040 | 17 | #include <linux/module.h> |
d8313f5c | 18 | #include <linux/random.h> |
5324a040 ACM |
19 | |
20 | #include <net/inet_connection_sock.h> | |
21 | #include <net/inet_hashtables.h> | |
22 | #include <net/inet6_hashtables.h> | |
d8313f5c | 23 | #include <net/ip.h> |
5324a040 | 24 | |
b1a7ffcb DV |
25 | void __inet6_hash(struct inet_hashinfo *hashinfo, |
26 | struct sock *sk) | |
27 | { | |
28 | struct hlist_head *list; | |
29 | rwlock_t *lock; | |
30 | ||
31 | BUG_TRAP(sk_unhashed(sk)); | |
32 | ||
33 | if (sk->sk_state == TCP_LISTEN) { | |
34 | list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; | |
35 | lock = &hashinfo->lhash_lock; | |
36 | inet_listen_wlock(hashinfo); | |
37 | } else { | |
38 | unsigned int hash; | |
39 | sk->sk_hash = hash = inet6_sk_ehashfn(sk); | |
40 | hash &= (hashinfo->ehash_size - 1); | |
41 | list = &hashinfo->ehash[hash].chain; | |
42 | lock = &hashinfo->ehash[hash].lock; | |
43 | write_lock(lock); | |
44 | } | |
45 | ||
46 | __sk_add_node(sk, list); | |
47 | sock_prot_inc_use(sk->sk_prot); | |
48 | write_unlock(lock); | |
49 | } | |
50 | EXPORT_SYMBOL(__inet6_hash); | |
51 | ||
52 | /* | |
53 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so | |
54 | * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM | |
55 | * | |
56 | * The sockhash lock must be held as a reader here. | |
57 | */ | |
58 | struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, | |
59 | const struct in6_addr *saddr, | |
60 | const u16 sport, | |
61 | const struct in6_addr *daddr, | |
62 | const u16 hnum, | |
63 | const int dif) | |
64 | { | |
65 | struct sock *sk; | |
66 | const struct hlist_node *node; | |
4f765d84 | 67 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); |
b1a7ffcb DV |
68 | /* Optimize here for direct hit, only listening connections can |
69 | * have wildcards anyways. | |
70 | */ | |
71 | unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); | |
72 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); | |
73 | ||
74 | prefetch(head->chain.first); | |
75 | read_lock(&head->lock); | |
76 | sk_for_each(sk, node, &head->chain) { | |
77 | /* For IPV6 do the cheaper port and family tests first. */ | |
78 | if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) | |
79 | goto hit; /* You sunk my battleship! */ | |
80 | } | |
81 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | |
82 | sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { | |
83 | const struct inet_timewait_sock *tw = inet_twsk(sk); | |
84 | ||
4f765d84 | 85 | if(*((__portpair *)&(tw->tw_dport)) == ports && |
b1a7ffcb DV |
86 | sk->sk_family == PF_INET6) { |
87 | const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); | |
88 | ||
89 | if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && | |
90 | ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && | |
91 | (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) | |
92 | goto hit; | |
93 | } | |
94 | } | |
95 | read_unlock(&head->lock); | |
96 | return NULL; | |
97 | ||
98 | hit: | |
99 | sock_hold(sk); | |
100 | read_unlock(&head->lock); | |
101 | return sk; | |
102 | } | |
103 | EXPORT_SYMBOL(__inet6_lookup_established); | |
104 | ||
5324a040 ACM |
105 | struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, |
106 | const struct in6_addr *daddr, | |
107 | const unsigned short hnum, const int dif) | |
108 | { | |
109 | struct sock *sk; | |
110 | const struct hlist_node *node; | |
111 | struct sock *result = NULL; | |
112 | int score, hiscore = 0; | |
113 | ||
114 | read_lock(&hashinfo->lhash_lock); | |
115 | sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { | |
116 | if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { | |
117 | const struct ipv6_pinfo *np = inet6_sk(sk); | |
118 | ||
119 | score = 1; | |
120 | if (!ipv6_addr_any(&np->rcv_saddr)) { | |
121 | if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) | |
122 | continue; | |
123 | score++; | |
124 | } | |
125 | if (sk->sk_bound_dev_if) { | |
126 | if (sk->sk_bound_dev_if != dif) | |
127 | continue; | |
128 | score++; | |
129 | } | |
130 | if (score == 3) { | |
131 | result = sk; | |
132 | break; | |
133 | } | |
134 | if (score > hiscore) { | |
135 | hiscore = score; | |
136 | result = sk; | |
137 | } | |
138 | } | |
139 | } | |
140 | if (result) | |
141 | sock_hold(result); | |
142 | read_unlock(&hashinfo->lhash_lock); | |
143 | return result; | |
144 | } | |
145 | ||
146 | EXPORT_SYMBOL_GPL(inet6_lookup_listener); | |
147 | ||
148 | struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, | |
149 | const struct in6_addr *saddr, const u16 sport, | |
150 | const struct in6_addr *daddr, const u16 dport, | |
151 | const int dif) | |
152 | { | |
153 | struct sock *sk; | |
154 | ||
155 | local_bh_disable(); | |
156 | sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); | |
157 | local_bh_enable(); | |
158 | ||
159 | return sk; | |
160 | } | |
161 | ||
162 | EXPORT_SYMBOL_GPL(inet6_lookup); | |
d8313f5c ACM |
163 | |
164 | static int __inet6_check_established(struct inet_timewait_death_row *death_row, | |
165 | struct sock *sk, const __u16 lport, | |
166 | struct inet_timewait_sock **twp) | |
167 | { | |
168 | struct inet_hashinfo *hinfo = death_row->hashinfo; | |
3759fa9c | 169 | struct inet_sock *inet = inet_sk(sk); |
d8313f5c ACM |
170 | const struct ipv6_pinfo *np = inet6_sk(sk); |
171 | const struct in6_addr *daddr = &np->rcv_saddr; | |
172 | const struct in6_addr *saddr = &np->daddr; | |
173 | const int dif = sk->sk_bound_dev_if; | |
4f765d84 | 174 | const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); |
d8313f5c ACM |
175 | const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, |
176 | inet->dport); | |
177 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | |
178 | struct sock *sk2; | |
179 | const struct hlist_node *node; | |
180 | struct inet_timewait_sock *tw; | |
181 | ||
182 | prefetch(head->chain.first); | |
183 | write_lock(&head->lock); | |
184 | ||
185 | /* Check TIME-WAIT sockets first. */ | |
186 | sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { | |
187 | const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); | |
188 | ||
189 | tw = inet_twsk(sk2); | |
190 | ||
4f765d84 | 191 | if(*((__portpair *)&(tw->tw_dport)) == ports && |
d8313f5c ACM |
192 | sk2->sk_family == PF_INET6 && |
193 | ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && | |
194 | ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && | |
195 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { | |
196 | if (twsk_unique(sk, sk2, twp)) | |
197 | goto unique; | |
198 | else | |
199 | goto not_unique; | |
200 | } | |
201 | } | |
202 | tw = NULL; | |
203 | ||
204 | /* And established part... */ | |
205 | sk_for_each(sk2, node, &head->chain) { | |
206 | if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) | |
207 | goto not_unique; | |
208 | } | |
209 | ||
210 | unique: | |
3759fa9c HX |
211 | /* Must record num and sport now. Otherwise we will see |
212 | * in hash table socket with a funny identity. */ | |
213 | inet->num = lport; | |
214 | inet->sport = htons(lport); | |
d8313f5c ACM |
215 | BUG_TRAP(sk_unhashed(sk)); |
216 | __sk_add_node(sk, &head->chain); | |
217 | sk->sk_hash = hash; | |
218 | sock_prot_inc_use(sk->sk_prot); | |
219 | write_unlock(&head->lock); | |
220 | ||
221 | if (twp != NULL) { | |
222 | *twp = tw; | |
223 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | |
224 | } else if (tw != NULL) { | |
225 | /* Silly. Should hash-dance instead... */ | |
226 | inet_twsk_deschedule(tw, death_row); | |
227 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | |
228 | ||
229 | inet_twsk_put(tw); | |
230 | } | |
231 | return 0; | |
232 | ||
233 | not_unique: | |
234 | write_unlock(&head->lock); | |
235 | return -EADDRNOTAVAIL; | |
236 | } | |
237 | ||
238 | static inline u32 inet6_sk_port_offset(const struct sock *sk) | |
239 | { | |
240 | const struct inet_sock *inet = inet_sk(sk); | |
241 | const struct ipv6_pinfo *np = inet6_sk(sk); | |
242 | return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, | |
243 | np->daddr.s6_addr32, | |
244 | inet->dport); | |
245 | } | |
246 | ||
247 | int inet6_hash_connect(struct inet_timewait_death_row *death_row, | |
248 | struct sock *sk) | |
249 | { | |
250 | struct inet_hashinfo *hinfo = death_row->hashinfo; | |
251 | const unsigned short snum = inet_sk(sk)->num; | |
252 | struct inet_bind_hashbucket *head; | |
253 | struct inet_bind_bucket *tb; | |
254 | int ret; | |
255 | ||
256 | if (snum == 0) { | |
257 | const int low = sysctl_local_port_range[0]; | |
258 | const int high = sysctl_local_port_range[1]; | |
259 | const int range = high - low; | |
260 | int i, port; | |
261 | static u32 hint; | |
262 | const u32 offset = hint + inet6_sk_port_offset(sk); | |
263 | struct hlist_node *node; | |
264 | struct inet_timewait_sock *tw = NULL; | |
265 | ||
266 | local_bh_disable(); | |
267 | for (i = 1; i <= range; i++) { | |
268 | port = low + (i + offset) % range; | |
269 | head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; | |
270 | spin_lock(&head->lock); | |
271 | ||
272 | /* Does not bother with rcv_saddr checks, | |
273 | * because the established check is already | |
274 | * unique enough. | |
275 | */ | |
276 | inet_bind_bucket_for_each(tb, node, &head->chain) { | |
277 | if (tb->port == port) { | |
278 | BUG_TRAP(!hlist_empty(&tb->owners)); | |
279 | if (tb->fastreuse >= 0) | |
280 | goto next_port; | |
281 | if (!__inet6_check_established(death_row, | |
282 | sk, port, | |
283 | &tw)) | |
284 | goto ok; | |
285 | goto next_port; | |
286 | } | |
287 | } | |
288 | ||
289 | tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, | |
290 | head, port); | |
291 | if (!tb) { | |
292 | spin_unlock(&head->lock); | |
293 | break; | |
294 | } | |
295 | tb->fastreuse = -1; | |
296 | goto ok; | |
297 | ||
298 | next_port: | |
299 | spin_unlock(&head->lock); | |
300 | } | |
301 | local_bh_enable(); | |
302 | ||
303 | return -EADDRNOTAVAIL; | |
304 | ||
305 | ok: | |
306 | hint += i; | |
307 | ||
308 | /* Head lock still held and bh's disabled */ | |
309 | inet_bind_hash(sk, tb, port); | |
310 | if (sk_unhashed(sk)) { | |
311 | inet_sk(sk)->sport = htons(port); | |
312 | __inet6_hash(hinfo, sk); | |
313 | } | |
314 | spin_unlock(&head->lock); | |
315 | ||
316 | if (tw) { | |
317 | inet_twsk_deschedule(tw, death_row); | |
318 | inet_twsk_put(tw); | |
319 | } | |
320 | ||
321 | ret = 0; | |
322 | goto out; | |
323 | } | |
324 | ||
325 | head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; | |
326 | tb = inet_csk(sk)->icsk_bind_hash; | |
327 | spin_lock_bh(&head->lock); | |
328 | ||
329 | if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { | |
330 | __inet6_hash(hinfo, sk); | |
331 | spin_unlock_bh(&head->lock); | |
332 | return 0; | |
333 | } else { | |
334 | spin_unlock(&head->lock); | |
335 | /* No definite answer... Walk to established hash table */ | |
336 | ret = __inet6_check_established(death_row, sk, snum, NULL); | |
337 | out: | |
338 | local_bh_enable(); | |
339 | return ret; | |
340 | } | |
341 | } | |
342 | ||
343 | EXPORT_SYMBOL_GPL(inet6_hash_connect); |