Commit | Line | Data |
---|---|---|
5324a040 ACM |
1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | |
3 | * operating system. INET is implemented using the BSD Socket | |
4 | * interface as the means of communication with the user level. | |
5 | * | |
6 | * Generic INET6 transport hashtables | |
7 | * | |
d8313f5c ACM |
8 | * Authors: Lotsa people, from code originally in tcp, generalised here |
9 | * by Arnaldo Carvalho de Melo <acme@mandriva.com> | |
5324a040 ACM |
10 | * |
11 | * This program is free software; you can redistribute it and/or | |
12 | * modify it under the terms of the GNU General Public License | |
13 | * as published by the Free Software Foundation; either version | |
14 | * 2 of the License, or (at your option) any later version. | |
15 | */ | |
16 | ||
5324a040 | 17 | #include <linux/module.h> |
d8313f5c | 18 | #include <linux/random.h> |
5324a040 ACM |
19 | |
20 | #include <net/inet_connection_sock.h> | |
21 | #include <net/inet_hashtables.h> | |
22 | #include <net/inet6_hashtables.h> | |
d8313f5c | 23 | #include <net/ip.h> |
5324a040 | 24 | |
b1a7ffcb DV |
25 | void __inet6_hash(struct inet_hashinfo *hashinfo, |
26 | struct sock *sk) | |
27 | { | |
28 | struct hlist_head *list; | |
29 | rwlock_t *lock; | |
30 | ||
31 | BUG_TRAP(sk_unhashed(sk)); | |
32 | ||
33 | if (sk->sk_state == TCP_LISTEN) { | |
34 | list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; | |
35 | lock = &hashinfo->lhash_lock; | |
36 | inet_listen_wlock(hashinfo); | |
37 | } else { | |
38 | unsigned int hash; | |
39 | sk->sk_hash = hash = inet6_sk_ehashfn(sk); | |
230140cf ED |
40 | list = &inet_ehash_bucket(hashinfo, hash)->chain; |
41 | lock = inet_ehash_lockp(hashinfo, hash); | |
b1a7ffcb DV |
42 | write_lock(lock); |
43 | } | |
44 | ||
45 | __sk_add_node(sk, list); | |
46 | sock_prot_inc_use(sk->sk_prot); | |
47 | write_unlock(lock); | |
48 | } | |
49 | EXPORT_SYMBOL(__inet6_hash); | |
50 | ||
51 | /* | |
52 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so | |
53 | * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM | |
54 | * | |
55 | * The sockhash lock must be held as a reader here. | |
56 | */ | |
57 | struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, | |
58 | const struct in6_addr *saddr, | |
d2ecd9cc | 59 | const __be16 sport, |
b1a7ffcb DV |
60 | const struct in6_addr *daddr, |
61 | const u16 hnum, | |
62 | const int dif) | |
63 | { | |
64 | struct sock *sk; | |
65 | const struct hlist_node *node; | |
4f765d84 | 66 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); |
b1a7ffcb DV |
67 | /* Optimize here for direct hit, only listening connections can |
68 | * have wildcards anyways. | |
69 | */ | |
70 | unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); | |
71 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); | |
230140cf | 72 | rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); |
b1a7ffcb DV |
73 | |
74 | prefetch(head->chain.first); | |
230140cf | 75 | read_lock(lock); |
b1a7ffcb DV |
76 | sk_for_each(sk, node, &head->chain) { |
77 | /* For IPV6 do the cheaper port and family tests first. */ | |
78 | if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) | |
79 | goto hit; /* You sunk my battleship! */ | |
80 | } | |
81 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | |
dbca9b27 | 82 | sk_for_each(sk, node, &head->twchain) { |
b1a7ffcb DV |
83 | const struct inet_timewait_sock *tw = inet_twsk(sk); |
84 | ||
4f765d84 | 85 | if(*((__portpair *)&(tw->tw_dport)) == ports && |
b1a7ffcb DV |
86 | sk->sk_family == PF_INET6) { |
87 | const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); | |
88 | ||
89 | if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && | |
90 | ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && | |
91 | (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) | |
92 | goto hit; | |
93 | } | |
94 | } | |
230140cf | 95 | read_unlock(lock); |
b1a7ffcb DV |
96 | return NULL; |
97 | ||
98 | hit: | |
99 | sock_hold(sk); | |
230140cf | 100 | read_unlock(lock); |
b1a7ffcb DV |
101 | return sk; |
102 | } | |
103 | EXPORT_SYMBOL(__inet6_lookup_established); | |
104 | ||
5324a040 ACM |
105 | struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, |
106 | const struct in6_addr *daddr, | |
107 | const unsigned short hnum, const int dif) | |
108 | { | |
109 | struct sock *sk; | |
110 | const struct hlist_node *node; | |
111 | struct sock *result = NULL; | |
112 | int score, hiscore = 0; | |
113 | ||
114 | read_lock(&hashinfo->lhash_lock); | |
115 | sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { | |
116 | if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { | |
117 | const struct ipv6_pinfo *np = inet6_sk(sk); | |
1ab1457c | 118 | |
5324a040 ACM |
119 | score = 1; |
120 | if (!ipv6_addr_any(&np->rcv_saddr)) { | |
121 | if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) | |
122 | continue; | |
123 | score++; | |
124 | } | |
125 | if (sk->sk_bound_dev_if) { | |
126 | if (sk->sk_bound_dev_if != dif) | |
127 | continue; | |
128 | score++; | |
129 | } | |
130 | if (score == 3) { | |
131 | result = sk; | |
132 | break; | |
133 | } | |
134 | if (score > hiscore) { | |
135 | hiscore = score; | |
136 | result = sk; | |
137 | } | |
138 | } | |
139 | } | |
140 | if (result) | |
141 | sock_hold(result); | |
142 | read_unlock(&hashinfo->lhash_lock); | |
143 | return result; | |
144 | } | |
145 | ||
146 | EXPORT_SYMBOL_GPL(inet6_lookup_listener); | |
147 | ||
148 | struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, | |
d2ecd9cc AV |
149 | const struct in6_addr *saddr, const __be16 sport, |
150 | const struct in6_addr *daddr, const __be16 dport, | |
5324a040 ACM |
151 | const int dif) |
152 | { | |
153 | struct sock *sk; | |
154 | ||
155 | local_bh_disable(); | |
156 | sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); | |
157 | local_bh_enable(); | |
158 | ||
159 | return sk; | |
160 | } | |
161 | ||
162 | EXPORT_SYMBOL_GPL(inet6_lookup); | |
d8313f5c ACM |
163 | |
164 | static int __inet6_check_established(struct inet_timewait_death_row *death_row, | |
165 | struct sock *sk, const __u16 lport, | |
166 | struct inet_timewait_sock **twp) | |
167 | { | |
168 | struct inet_hashinfo *hinfo = death_row->hashinfo; | |
3759fa9c | 169 | struct inet_sock *inet = inet_sk(sk); |
d8313f5c ACM |
170 | const struct ipv6_pinfo *np = inet6_sk(sk); |
171 | const struct in6_addr *daddr = &np->rcv_saddr; | |
172 | const struct in6_addr *saddr = &np->daddr; | |
173 | const int dif = sk->sk_bound_dev_if; | |
4f765d84 | 174 | const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); |
6e1d9d04 | 175 | const unsigned int hash = inet6_ehashfn(daddr, lport, saddr, |
d8313f5c ACM |
176 | inet->dport); |
177 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | |
230140cf | 178 | rwlock_t *lock = inet_ehash_lockp(hinfo, hash); |
d8313f5c ACM |
179 | struct sock *sk2; |
180 | const struct hlist_node *node; | |
181 | struct inet_timewait_sock *tw; | |
182 | ||
183 | prefetch(head->chain.first); | |
230140cf | 184 | write_lock(lock); |
d8313f5c ACM |
185 | |
186 | /* Check TIME-WAIT sockets first. */ | |
dbca9b27 | 187 | sk_for_each(sk2, node, &head->twchain) { |
d8313f5c ACM |
188 | const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); |
189 | ||
190 | tw = inet_twsk(sk2); | |
191 | ||
4f765d84 | 192 | if(*((__portpair *)&(tw->tw_dport)) == ports && |
d8313f5c ACM |
193 | sk2->sk_family == PF_INET6 && |
194 | ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && | |
195 | ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && | |
196 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { | |
197 | if (twsk_unique(sk, sk2, twp)) | |
198 | goto unique; | |
199 | else | |
200 | goto not_unique; | |
201 | } | |
202 | } | |
203 | tw = NULL; | |
204 | ||
205 | /* And established part... */ | |
206 | sk_for_each(sk2, node, &head->chain) { | |
207 | if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) | |
208 | goto not_unique; | |
209 | } | |
210 | ||
211 | unique: | |
3759fa9c HX |
212 | /* Must record num and sport now. Otherwise we will see |
213 | * in hash table socket with a funny identity. */ | |
214 | inet->num = lport; | |
215 | inet->sport = htons(lport); | |
d8313f5c ACM |
216 | BUG_TRAP(sk_unhashed(sk)); |
217 | __sk_add_node(sk, &head->chain); | |
218 | sk->sk_hash = hash; | |
219 | sock_prot_inc_use(sk->sk_prot); | |
230140cf | 220 | write_unlock(lock); |
d8313f5c ACM |
221 | |
222 | if (twp != NULL) { | |
223 | *twp = tw; | |
224 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | |
225 | } else if (tw != NULL) { | |
226 | /* Silly. Should hash-dance instead... */ | |
227 | inet_twsk_deschedule(tw, death_row); | |
228 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | |
229 | ||
230 | inet_twsk_put(tw); | |
231 | } | |
232 | return 0; | |
233 | ||
234 | not_unique: | |
230140cf | 235 | write_unlock(lock); |
d8313f5c ACM |
236 | return -EADDRNOTAVAIL; |
237 | } | |
238 | ||
239 | static inline u32 inet6_sk_port_offset(const struct sock *sk) | |
240 | { | |
241 | const struct inet_sock *inet = inet_sk(sk); | |
242 | const struct ipv6_pinfo *np = inet6_sk(sk); | |
243 | return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, | |
244 | np->daddr.s6_addr32, | |
245 | inet->dport); | |
246 | } | |
247 | ||
248 | int inet6_hash_connect(struct inet_timewait_death_row *death_row, | |
249 | struct sock *sk) | |
250 | { | |
251 | struct inet_hashinfo *hinfo = death_row->hashinfo; | |
252 | const unsigned short snum = inet_sk(sk)->num; | |
1ab1457c YH |
253 | struct inet_bind_hashbucket *head; |
254 | struct inet_bind_bucket *tb; | |
d8313f5c ACM |
255 | int ret; |
256 | ||
1ab1457c | 257 | if (snum == 0) { |
227b60f5 | 258 | int i, port, low, high, remaining; |
d8313f5c ACM |
259 | static u32 hint; |
260 | const u32 offset = hint + inet6_sk_port_offset(sk); | |
261 | struct hlist_node *node; | |
1ab1457c | 262 | struct inet_timewait_sock *tw = NULL; |
d8313f5c | 263 | |
227b60f5 | 264 | inet_get_local_port_range(&low, &high); |
a25de534 | 265 | remaining = (high - low) + 1; |
227b60f5 | 266 | |
1ab1457c | 267 | local_bh_disable(); |
227b60f5 SH |
268 | for (i = 1; i <= remaining; i++) { |
269 | port = low + (i + offset) % remaining; | |
1ab1457c YH |
270 | head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; |
271 | spin_lock(&head->lock); | |
d8313f5c | 272 | |
1ab1457c YH |
273 | /* Does not bother with rcv_saddr checks, |
274 | * because the established check is already | |
275 | * unique enough. | |
276 | */ | |
d8313f5c | 277 | inet_bind_bucket_for_each(tb, node, &head->chain) { |
1ab1457c YH |
278 | if (tb->port == port) { |
279 | BUG_TRAP(!hlist_empty(&tb->owners)); | |
280 | if (tb->fastreuse >= 0) | |
281 | goto next_port; | |
282 | if (!__inet6_check_established(death_row, | |
d8313f5c ACM |
283 | sk, port, |
284 | &tw)) | |
1ab1457c YH |
285 | goto ok; |
286 | goto next_port; | |
287 | } | |
288 | } | |
d8313f5c | 289 | |
1ab1457c | 290 | tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, |
d8313f5c | 291 | head, port); |
1ab1457c YH |
292 | if (!tb) { |
293 | spin_unlock(&head->lock); | |
294 | break; | |
295 | } | |
296 | tb->fastreuse = -1; | |
297 | goto ok; | |
d8313f5c | 298 | |
1ab1457c YH |
299 | next_port: |
300 | spin_unlock(&head->lock); | |
301 | } | |
302 | local_bh_enable(); | |
d8313f5c | 303 | |
1ab1457c | 304 | return -EADDRNOTAVAIL; |
d8313f5c ACM |
305 | |
306 | ok: | |
307 | hint += i; | |
308 | ||
1ab1457c YH |
309 | /* Head lock still held and bh's disabled */ |
310 | inet_bind_hash(sk, tb, port); | |
d8313f5c | 311 | if (sk_unhashed(sk)) { |
1ab1457c YH |
312 | inet_sk(sk)->sport = htons(port); |
313 | __inet6_hash(hinfo, sk); | |
314 | } | |
315 | spin_unlock(&head->lock); | |
d8313f5c | 316 | |
1ab1457c YH |
317 | if (tw) { |
318 | inet_twsk_deschedule(tw, death_row); | |
319 | inet_twsk_put(tw); | |
320 | } | |
d8313f5c ACM |
321 | |
322 | ret = 0; | |
323 | goto out; | |
1ab1457c | 324 | } |
d8313f5c | 325 | |
1ab1457c YH |
326 | head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; |
327 | tb = inet_csk(sk)->icsk_bind_hash; | |
d8313f5c ACM |
328 | spin_lock_bh(&head->lock); |
329 | ||
330 | if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { | |
331 | __inet6_hash(hinfo, sk); | |
332 | spin_unlock_bh(&head->lock); | |
333 | return 0; | |
334 | } else { | |
335 | spin_unlock(&head->lock); | |
336 | /* No definite answer... Walk to established hash table */ | |
337 | ret = __inet6_check_established(death_row, sk, snum, NULL); | |
338 | out: | |
339 | local_bh_enable(); | |
340 | return ret; | |
341 | } | |
342 | } | |
343 | ||
344 | EXPORT_SYMBOL_GPL(inet6_hash_connect); |