Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. | |
2a1d9b7f RD |
3 | * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. |
4 | * Copyright (c) 2004 Voltaire, Inc. All rights reserved. | |
1da177e4 LT |
5 | * |
6 | * This software is available to you under a choice of one of two | |
7 | * licenses. You may choose to be licensed under the terms of the GNU | |
8 | * General Public License (GPL) Version 2, available from the file | |
9 | * COPYING in the main directory of this source tree, or the | |
10 | * OpenIB.org BSD license below: | |
11 | * | |
12 | * Redistribution and use in source and binary forms, with or | |
13 | * without modification, are permitted provided that the following | |
14 | * conditions are met: | |
15 | * | |
16 | * - Redistributions of source code must retain the above | |
17 | * copyright notice, this list of conditions and the following | |
18 | * disclaimer. | |
19 | * | |
20 | * - Redistributions in binary form must reproduce the above | |
21 | * copyright notice, this list of conditions and the following | |
22 | * disclaimer in the documentation and/or other materials | |
23 | * provided with the distribution. | |
24 | * | |
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
32 | * SOFTWARE. | |
1da177e4 LT |
33 | */ |
34 | ||
35 | #include <linux/skbuff.h> | |
36 | #include <linux/rtnetlink.h> | |
37 | #include <linux/ip.h> | |
38 | #include <linux/in.h> | |
39 | #include <linux/igmp.h> | |
40 | #include <linux/inetdevice.h> | |
41 | #include <linux/delay.h> | |
42 | #include <linux/completion.h> | |
43 | ||
14c85021 ACM |
44 | #include <net/dst.h> |
45 | ||
1da177e4 LT |
46 | #include "ipoib.h" |
47 | ||
48 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | |
49 | static int mcast_debug_level; | |
50 | ||
51 | module_param(mcast_debug_level, int, 0644); | |
52 | MODULE_PARM_DESC(mcast_debug_level, | |
53 | "Enable multicast debug tracing if > 0"); | |
54 | #endif | |
55 | ||
95ed644f | 56 | static DEFINE_MUTEX(mcast_mutex); |
1da177e4 | 57 | |
1da177e4 LT |
58 | struct ipoib_mcast_iter { |
59 | struct net_device *dev; | |
60 | union ib_gid mgid; | |
61 | unsigned long created; | |
62 | unsigned int queuelen; | |
63 | unsigned int complete; | |
64 | unsigned int send_only; | |
65 | }; | |
66 | ||
67 | static void ipoib_mcast_free(struct ipoib_mcast *mcast) | |
68 | { | |
69 | struct net_device *dev = mcast->dev; | |
70 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
71 | struct ipoib_neigh *neigh, *tmp; | |
b36f170b | 72 | int tx_dropped = 0; |
1da177e4 | 73 | |
5b095d98 | 74 | ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", |
fcace2fe | 75 | mcast->mcmember.mgid.raw); |
1da177e4 | 76 | |
943c246e | 77 | spin_lock_irq(&priv->lock); |
1da177e4 LT |
78 | |
79 | list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { | |
97460df3 EC |
80 | /* |
81 | * It's safe to call ipoib_put_ah() inside priv->lock | |
82 | * here, because we know that mcast->ah will always | |
83 | * hold one more reference, so ipoib_put_ah() will | |
84 | * never do more than decrement the ref count. | |
85 | */ | |
1da177e4 | 86 | if (neigh->ah) |
97460df3 | 87 | ipoib_put_ah(neigh->ah); |
2745b5b7 | 88 | ipoib_neigh_free(dev, neigh); |
1da177e4 LT |
89 | } |
90 | ||
943c246e | 91 | spin_unlock_irq(&priv->lock); |
1da177e4 | 92 | |
1da177e4 LT |
93 | if (mcast->ah) |
94 | ipoib_put_ah(mcast->ah); | |
95 | ||
b36f170b MT |
96 | while (!skb_queue_empty(&mcast->pkt_queue)) { |
97 | ++tx_dropped; | |
8c608a32 | 98 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); |
b36f170b MT |
99 | } |
100 | ||
943c246e | 101 | netif_tx_lock_bh(dev); |
de903512 | 102 | dev->stats.tx_dropped += tx_dropped; |
943c246e | 103 | netif_tx_unlock_bh(dev); |
1da177e4 LT |
104 | |
105 | kfree(mcast); | |
106 | } | |
107 | ||
108 | static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, | |
109 | int can_sleep) | |
110 | { | |
111 | struct ipoib_mcast *mcast; | |
112 | ||
de6eb66b | 113 | mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); |
1da177e4 LT |
114 | if (!mcast) |
115 | return NULL; | |
116 | ||
1da177e4 LT |
117 | mcast->dev = dev; |
118 | mcast->created = jiffies; | |
ce5b65cc | 119 | mcast->backoff = 1; |
1da177e4 LT |
120 | |
121 | INIT_LIST_HEAD(&mcast->list); | |
122 | INIT_LIST_HEAD(&mcast->neigh_list); | |
123 | skb_queue_head_init(&mcast->pkt_queue); | |
124 | ||
1da177e4 LT |
125 | return mcast; |
126 | } | |
127 | ||
37c22a77 | 128 | static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) |
1da177e4 LT |
129 | { |
130 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
131 | struct rb_node *n = priv->multicast_tree.rb_node; | |
132 | ||
133 | while (n) { | |
134 | struct ipoib_mcast *mcast; | |
135 | int ret; | |
136 | ||
137 | mcast = rb_entry(n, struct ipoib_mcast, rb_node); | |
138 | ||
37c22a77 | 139 | ret = memcmp(mgid, mcast->mcmember.mgid.raw, |
1da177e4 LT |
140 | sizeof (union ib_gid)); |
141 | if (ret < 0) | |
142 | n = n->rb_left; | |
143 | else if (ret > 0) | |
144 | n = n->rb_right; | |
145 | else | |
146 | return mcast; | |
147 | } | |
148 | ||
149 | return NULL; | |
150 | } | |
151 | ||
152 | static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast) | |
153 | { | |
154 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
155 | struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; | |
156 | ||
157 | while (*n) { | |
158 | struct ipoib_mcast *tmcast; | |
159 | int ret; | |
160 | ||
161 | pn = *n; | |
162 | tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); | |
163 | ||
164 | ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, | |
165 | sizeof (union ib_gid)); | |
166 | if (ret < 0) | |
167 | n = &pn->rb_left; | |
168 | else if (ret > 0) | |
169 | n = &pn->rb_right; | |
170 | else | |
171 | return -EEXIST; | |
172 | } | |
173 | ||
174 | rb_link_node(&mcast->rb_node, pn, n); | |
175 | rb_insert_color(&mcast->rb_node, &priv->multicast_tree); | |
176 | ||
177 | return 0; | |
178 | } | |
179 | ||
180 | static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, | |
181 | struct ib_sa_mcmember_rec *mcmember) | |
182 | { | |
183 | struct net_device *dev = mcast->dev; | |
184 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
7343b231 | 185 | struct ipoib_ah *ah; |
1da177e4 | 186 | int ret; |
d0de1362 | 187 | int set_qkey = 0; |
1da177e4 LT |
188 | |
189 | mcast->mcmember = *mcmember; | |
190 | ||
191 | /* Set the cached Q_Key before we attach if it's the broadcast group */ | |
192 | if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, | |
193 | sizeof (union ib_gid))) { | |
e1d50dce JM |
194 | spin_lock_irq(&priv->lock); |
195 | if (!priv->broadcast) { | |
196 | spin_unlock_irq(&priv->lock); | |
197 | return -EAGAIN; | |
198 | } | |
1da177e4 | 199 | priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); |
e1d50dce | 200 | spin_unlock_irq(&priv->lock); |
1da177e4 | 201 | priv->tx_wr.wr.ud.remote_qkey = priv->qkey; |
d0de1362 | 202 | set_qkey = 1; |
1da177e4 LT |
203 | } |
204 | ||
205 | if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
206 | if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | |
5b095d98 | 207 | ipoib_warn(priv, "multicast group %pI6 already attached\n", |
fcace2fe | 208 | mcast->mcmember.mgid.raw); |
1da177e4 LT |
209 | |
210 | return 0; | |
211 | } | |
212 | ||
213 | ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), | |
d0de1362 | 214 | &mcast->mcmember.mgid, set_qkey); |
1da177e4 | 215 | if (ret < 0) { |
5b095d98 | 216 | ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n", |
fcace2fe | 217 | mcast->mcmember.mgid.raw); |
1da177e4 LT |
218 | |
219 | clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); | |
220 | return ret; | |
221 | } | |
222 | } | |
223 | ||
224 | { | |
225 | struct ib_ah_attr av = { | |
226 | .dlid = be16_to_cpu(mcast->mcmember.mlid), | |
227 | .port_num = priv->port, | |
228 | .sl = mcast->mcmember.sl, | |
229 | .ah_flags = IB_AH_GRH, | |
bf6a9e31 | 230 | .static_rate = mcast->mcmember.rate, |
1da177e4 LT |
231 | .grh = { |
232 | .flow_label = be32_to_cpu(mcast->mcmember.flow_label), | |
233 | .hop_limit = mcast->mcmember.hop_limit, | |
234 | .sgid_index = 0, | |
235 | .traffic_class = mcast->mcmember.traffic_class | |
236 | } | |
237 | }; | |
1da177e4 LT |
238 | av.grh.dgid = mcast->mcmember.mgid; |
239 | ||
7343b231 EC |
240 | ah = ipoib_create_ah(dev, priv->pd, &av); |
241 | if (!ah) { | |
1da177e4 LT |
242 | ipoib_warn(priv, "ib_address_create failed\n"); |
243 | } else { | |
624d01f8 OG |
244 | spin_lock_irq(&priv->lock); |
245 | mcast->ah = ah; | |
246 | spin_unlock_irq(&priv->lock); | |
247 | ||
5b095d98 | 248 | ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n", |
fcace2fe | 249 | mcast->mcmember.mgid.raw, |
1da177e4 LT |
250 | mcast->ah->ah, |
251 | be16_to_cpu(mcast->mcmember.mlid), | |
252 | mcast->mcmember.sl); | |
253 | } | |
254 | } | |
255 | ||
256 | /* actually send any queued packets */ | |
943c246e | 257 | netif_tx_lock_bh(dev); |
1da177e4 LT |
258 | while (!skb_queue_empty(&mcast->pkt_queue)) { |
259 | struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); | |
943c246e | 260 | netif_tx_unlock_bh(dev); |
1da177e4 LT |
261 | |
262 | skb->dev = dev; | |
263 | ||
264 | if (!skb->dst || !skb->dst->neighbour) { | |
265 | /* put pseudoheader back on for next time */ | |
266 | skb_push(skb, sizeof (struct ipoib_pseudoheader)); | |
267 | } | |
268 | ||
269 | if (dev_queue_xmit(skb)) | |
270 | ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); | |
943c246e | 271 | netif_tx_lock_bh(dev); |
1da177e4 | 272 | } |
943c246e | 273 | netif_tx_unlock_bh(dev); |
1da177e4 LT |
274 | |
275 | return 0; | |
276 | } | |
277 | ||
faec2f7b | 278 | static int |
1da177e4 | 279 | ipoib_mcast_sendonly_join_complete(int status, |
faec2f7b | 280 | struct ib_sa_multicast *multicast) |
1da177e4 | 281 | { |
faec2f7b | 282 | struct ipoib_mcast *mcast = multicast->context; |
1da177e4 LT |
283 | struct net_device *dev = mcast->dev; |
284 | ||
faec2f7b SH |
285 | /* We trap for port events ourselves. */ |
286 | if (status == -ENETRESET) | |
287 | return 0; | |
288 | ||
1da177e4 | 289 | if (!status) |
faec2f7b SH |
290 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); |
291 | ||
292 | if (status) { | |
1da177e4 | 293 | if (mcast->logcount++ < 20) |
5b095d98 | 294 | ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n", |
fcace2fe | 295 | mcast->mcmember.mgid.raw, status); |
1da177e4 LT |
296 | |
297 | /* Flush out any queued packets */ | |
943c246e | 298 | netif_tx_lock_bh(dev); |
b36f170b | 299 | while (!skb_queue_empty(&mcast->pkt_queue)) { |
de903512 | 300 | ++dev->stats.tx_dropped; |
8c608a32 | 301 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); |
b36f170b | 302 | } |
943c246e | 303 | netif_tx_unlock_bh(dev); |
1da177e4 LT |
304 | |
305 | /* Clear the busy flag so we try again */ | |
faec2f7b SH |
306 | status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, |
307 | &mcast->flags); | |
1da177e4 | 308 | } |
faec2f7b | 309 | return status; |
1da177e4 LT |
310 | } |
311 | ||
312 | static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) | |
313 | { | |
314 | struct net_device *dev = mcast->dev; | |
315 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
316 | struct ib_sa_mcmember_rec rec = { | |
317 | #if 0 /* Some SMs don't support send-only yet */ | |
318 | .join_state = 4 | |
319 | #else | |
320 | .join_state = 1 | |
321 | #endif | |
322 | }; | |
323 | int ret = 0; | |
324 | ||
325 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { | |
326 | ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n"); | |
327 | return -ENODEV; | |
328 | } | |
329 | ||
330 | if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { | |
331 | ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n"); | |
332 | return -EBUSY; | |
333 | } | |
334 | ||
335 | rec.mgid = mcast->mcmember.mgid; | |
336 | rec.port_gid = priv->local_gid; | |
97f52eb4 | 337 | rec.pkey = cpu_to_be16(priv->pkey); |
1da177e4 | 338 | |
faec2f7b SH |
339 | mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, |
340 | priv->port, &rec, | |
341 | IB_SA_MCMEMBER_REC_MGID | | |
342 | IB_SA_MCMEMBER_REC_PORT_GID | | |
343 | IB_SA_MCMEMBER_REC_PKEY | | |
344 | IB_SA_MCMEMBER_REC_JOIN_STATE, | |
345 | GFP_ATOMIC, | |
346 | ipoib_mcast_sendonly_join_complete, | |
347 | mcast); | |
348 | if (IS_ERR(mcast->mc)) { | |
349 | ret = PTR_ERR(mcast->mc); | |
350 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | |
351 | ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", | |
1da177e4 LT |
352 | ret); |
353 | } else { | |
5b095d98 | 354 | ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n", |
fcace2fe | 355 | mcast->mcmember.mgid.raw); |
1da177e4 LT |
356 | } |
357 | ||
358 | return ret; | |
359 | } | |
360 | ||
e8224e4b YE |
361 | void ipoib_mcast_carrier_on_task(struct work_struct *work) |
362 | { | |
363 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, | |
364 | carrier_on_task); | |
365 | ||
366 | /* | |
367 | * Take rtnl_lock to avoid racing with ipoib_stop() and | |
368 | * turning the carrier back on while a device is being | |
369 | * removed. | |
370 | */ | |
371 | rtnl_lock(); | |
372 | netif_carrier_on(priv->dev); | |
373 | rtnl_unlock(); | |
374 | } | |
375 | ||
faec2f7b SH |
376 | static int ipoib_mcast_join_complete(int status, |
377 | struct ib_sa_multicast *multicast) | |
1da177e4 | 378 | { |
faec2f7b | 379 | struct ipoib_mcast *mcast = multicast->context; |
1da177e4 LT |
380 | struct net_device *dev = mcast->dev; |
381 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
382 | ||
5b095d98 | 383 | ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", |
fcace2fe | 384 | mcast->mcmember.mgid.raw, status); |
1da177e4 | 385 | |
faec2f7b SH |
386 | /* We trap for port events ourselves. */ |
387 | if (status == -ENETRESET) | |
388 | return 0; | |
389 | ||
390 | if (!status) | |
391 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); | |
392 | ||
393 | if (!status) { | |
ce5b65cc | 394 | mcast->backoff = 1; |
95ed644f | 395 | mutex_lock(&mcast_mutex); |
1da177e4 | 396 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
c4028958 DH |
397 | queue_delayed_work(ipoib_workqueue, |
398 | &priv->mcast_task, 0); | |
95ed644f | 399 | mutex_unlock(&mcast_mutex); |
55c9adde | 400 | |
e8224e4b YE |
401 | /* |
402 | * Defer carrier on work to ipoib_workqueue to avoid a | |
403 | * deadlock on rtnl_lock here. | |
404 | */ | |
405 | if (mcast == priv->broadcast) | |
406 | queue_work(ipoib_workqueue, &priv->carrier_on_task); | |
55c9adde | 407 | |
faec2f7b | 408 | return 0; |
1da177e4 LT |
409 | } |
410 | ||
faec2f7b SH |
411 | if (mcast->logcount++ < 20) { |
412 | if (status == -ETIMEDOUT) { | |
5b095d98 | 413 | ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", |
fcace2fe | 414 | mcast->mcmember.mgid.raw, status); |
1da177e4 | 415 | } else { |
5b095d98 | 416 | ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", |
fcace2fe | 417 | mcast->mcmember.mgid.raw, status); |
1da177e4 LT |
418 | } |
419 | } | |
420 | ||
421 | mcast->backoff *= 2; | |
422 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | |
423 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | |
424 | ||
faec2f7b SH |
425 | /* Clear the busy flag so we try again */ |
426 | status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | |
9acf6a85 | 427 | |
faec2f7b | 428 | mutex_lock(&mcast_mutex); |
9acf6a85 | 429 | spin_lock_irq(&priv->lock); |
faec2f7b SH |
430 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
431 | queue_delayed_work(ipoib_workqueue, &priv->mcast_task, | |
432 | mcast->backoff * HZ); | |
9acf6a85 | 433 | spin_unlock_irq(&priv->lock); |
95ed644f | 434 | mutex_unlock(&mcast_mutex); |
1da177e4 | 435 | |
faec2f7b | 436 | return status; |
1da177e4 LT |
437 | } |
438 | ||
439 | static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, | |
440 | int create) | |
441 | { | |
442 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
443 | struct ib_sa_mcmember_rec rec = { | |
444 | .join_state = 1 | |
445 | }; | |
446 | ib_sa_comp_mask comp_mask; | |
447 | int ret = 0; | |
448 | ||
5b095d98 | 449 | ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw); |
1da177e4 LT |
450 | |
451 | rec.mgid = mcast->mcmember.mgid; | |
452 | rec.port_gid = priv->local_gid; | |
97f52eb4 | 453 | rec.pkey = cpu_to_be16(priv->pkey); |
1da177e4 LT |
454 | |
455 | comp_mask = | |
456 | IB_SA_MCMEMBER_REC_MGID | | |
457 | IB_SA_MCMEMBER_REC_PORT_GID | | |
458 | IB_SA_MCMEMBER_REC_PKEY | | |
459 | IB_SA_MCMEMBER_REC_JOIN_STATE; | |
460 | ||
461 | if (create) { | |
462 | comp_mask |= | |
d0df6d6d RD |
463 | IB_SA_MCMEMBER_REC_QKEY | |
464 | IB_SA_MCMEMBER_REC_MTU_SELECTOR | | |
465 | IB_SA_MCMEMBER_REC_MTU | | |
466 | IB_SA_MCMEMBER_REC_TRAFFIC_CLASS | | |
467 | IB_SA_MCMEMBER_REC_RATE_SELECTOR | | |
468 | IB_SA_MCMEMBER_REC_RATE | | |
469 | IB_SA_MCMEMBER_REC_SL | | |
470 | IB_SA_MCMEMBER_REC_FLOW_LABEL | | |
471 | IB_SA_MCMEMBER_REC_HOP_LIMIT; | |
1da177e4 LT |
472 | |
473 | rec.qkey = priv->broadcast->mcmember.qkey; | |
d0df6d6d RD |
474 | rec.mtu_selector = IB_SA_EQ; |
475 | rec.mtu = priv->broadcast->mcmember.mtu; | |
476 | rec.traffic_class = priv->broadcast->mcmember.traffic_class; | |
477 | rec.rate_selector = IB_SA_EQ; | |
478 | rec.rate = priv->broadcast->mcmember.rate; | |
1da177e4 LT |
479 | rec.sl = priv->broadcast->mcmember.sl; |
480 | rec.flow_label = priv->broadcast->mcmember.flow_label; | |
d0df6d6d | 481 | rec.hop_limit = priv->broadcast->mcmember.hop_limit; |
1da177e4 LT |
482 | } |
483 | ||
faec2f7b SH |
484 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
485 | mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, | |
486 | &rec, comp_mask, GFP_KERNEL, | |
487 | ipoib_mcast_join_complete, mcast); | |
488 | if (IS_ERR(mcast->mc)) { | |
489 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | |
490 | ret = PTR_ERR(mcast->mc); | |
491 | ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); | |
1da177e4 LT |
492 | |
493 | mcast->backoff *= 2; | |
494 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | |
495 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | |
496 | ||
95ed644f | 497 | mutex_lock(&mcast_mutex); |
1da177e4 LT |
498 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
499 | queue_delayed_work(ipoib_workqueue, | |
500 | &priv->mcast_task, | |
ce5b65cc | 501 | mcast->backoff * HZ); |
95ed644f | 502 | mutex_unlock(&mcast_mutex); |
faec2f7b | 503 | } |
1da177e4 LT |
504 | } |
505 | ||
c4028958 | 506 | void ipoib_mcast_join_task(struct work_struct *work) |
1da177e4 | 507 | { |
c4028958 DH |
508 | struct ipoib_dev_priv *priv = |
509 | container_of(work, struct ipoib_dev_priv, mcast_task.work); | |
510 | struct net_device *dev = priv->dev; | |
1da177e4 LT |
511 | |
512 | if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) | |
513 | return; | |
514 | ||
515 | if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) | |
24bd1e4e | 516 | ipoib_warn(priv, "ib_query_gid() failed\n"); |
1da177e4 LT |
517 | else |
518 | memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); | |
519 | ||
520 | { | |
521 | struct ib_port_attr attr; | |
522 | ||
658bcef6 RD |
523 | if (!ib_query_port(priv->ca, priv->port, &attr)) |
524 | priv->local_lid = attr.lid; | |
525 | else | |
faec2f7b | 526 | ipoib_warn(priv, "ib_query_port failed\n"); |
1da177e4 LT |
527 | } |
528 | ||
529 | if (!priv->broadcast) { | |
20b83382 RD |
530 | struct ipoib_mcast *broadcast; |
531 | ||
532 | broadcast = ipoib_mcast_alloc(dev, 1); | |
533 | if (!broadcast) { | |
1da177e4 | 534 | ipoib_warn(priv, "failed to allocate broadcast group\n"); |
95ed644f | 535 | mutex_lock(&mcast_mutex); |
1da177e4 LT |
536 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
537 | queue_delayed_work(ipoib_workqueue, | |
538 | &priv->mcast_task, HZ); | |
95ed644f | 539 | mutex_unlock(&mcast_mutex); |
1da177e4 LT |
540 | return; |
541 | } | |
542 | ||
20b83382 RD |
543 | spin_lock_irq(&priv->lock); |
544 | memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, | |
1da177e4 | 545 | sizeof (union ib_gid)); |
20b83382 | 546 | priv->broadcast = broadcast; |
1da177e4 | 547 | |
1da177e4 LT |
548 | __ipoib_mcast_add(dev, priv->broadcast); |
549 | spin_unlock_irq(&priv->lock); | |
550 | } | |
551 | ||
552 | if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { | |
faec2f7b SH |
553 | if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) |
554 | ipoib_mcast_join(dev, priv->broadcast, 0); | |
1da177e4 LT |
555 | return; |
556 | } | |
557 | ||
558 | while (1) { | |
559 | struct ipoib_mcast *mcast = NULL; | |
560 | ||
561 | spin_lock_irq(&priv->lock); | |
562 | list_for_each_entry(mcast, &priv->multicast_list, list) { | |
563 | if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) | |
564 | && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) | |
565 | && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | |
566 | /* Found the next unjoined group */ | |
567 | break; | |
568 | } | |
569 | } | |
570 | spin_unlock_irq(&priv->lock); | |
571 | ||
572 | if (&mcast->list == &priv->multicast_list) { | |
573 | /* All done */ | |
574 | break; | |
575 | } | |
576 | ||
577 | ipoib_mcast_join(dev, mcast, 1); | |
578 | return; | |
579 | } | |
580 | ||
bc7b3a36 | 581 | priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); |
839fcaba | 582 | |
c8c2afe3 EC |
583 | if (!ipoib_cm_admin_enabled(dev)) { |
584 | rtnl_lock(); | |
bd360671 | 585 | dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu)); |
c8c2afe3 EC |
586 | rtnl_unlock(); |
587 | } | |
1da177e4 LT |
588 | |
589 | ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); | |
590 | ||
591 | clear_bit(IPOIB_MCAST_RUN, &priv->flags); | |
1da177e4 LT |
592 | } |
593 | ||
594 | int ipoib_mcast_start_thread(struct net_device *dev) | |
595 | { | |
596 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
597 | ||
598 | ipoib_dbg_mcast(priv, "starting multicast thread\n"); | |
599 | ||
95ed644f | 600 | mutex_lock(&mcast_mutex); |
1da177e4 | 601 | if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) |
c4028958 | 602 | queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); |
95ed644f | 603 | mutex_unlock(&mcast_mutex); |
1da177e4 LT |
604 | |
605 | return 0; | |
606 | } | |
607 | ||
8d2cae06 | 608 | int ipoib_mcast_stop_thread(struct net_device *dev, int flush) |
1da177e4 LT |
609 | { |
610 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
1da177e4 LT |
611 | |
612 | ipoib_dbg_mcast(priv, "stopping multicast thread\n"); | |
613 | ||
95ed644f | 614 | mutex_lock(&mcast_mutex); |
1da177e4 LT |
615 | clear_bit(IPOIB_MCAST_RUN, &priv->flags); |
616 | cancel_delayed_work(&priv->mcast_task); | |
95ed644f | 617 | mutex_unlock(&mcast_mutex); |
1da177e4 | 618 | |
8d2cae06 RD |
619 | if (flush) |
620 | flush_workqueue(ipoib_workqueue); | |
1da177e4 | 621 | |
1da177e4 LT |
622 | return 0; |
623 | } | |
624 | ||
625 | static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) | |
626 | { | |
627 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
1da177e4 LT |
628 | int ret = 0; |
629 | ||
e07832b6 SH |
630 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) |
631 | ib_sa_free_multicast(mcast->mc); | |
632 | ||
faec2f7b | 633 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { |
5b095d98 | 634 | ipoib_dbg_mcast(priv, "leaving MGID %pI6\n", |
fcace2fe | 635 | mcast->mcmember.mgid.raw); |
1da177e4 | 636 | |
faec2f7b | 637 | /* Remove ourselves from the multicast group */ |
9eae554c RD |
638 | ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid, |
639 | be16_to_cpu(mcast->mcmember.mlid)); | |
faec2f7b | 640 | if (ret) |
9eae554c | 641 | ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); |
faec2f7b | 642 | } |
1da177e4 | 643 | |
1da177e4 LT |
644 | return 0; |
645 | } | |
646 | ||
37c22a77 | 647 | void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) |
1da177e4 LT |
648 | { |
649 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
650 | struct ipoib_mcast *mcast; | |
943c246e | 651 | unsigned long flags; |
1da177e4 | 652 | |
943c246e | 653 | spin_lock_irqsave(&priv->lock, flags); |
1da177e4 | 654 | |
b3e2749b | 655 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || |
20b83382 RD |
656 | !priv->broadcast || |
657 | !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { | |
de903512 | 658 | ++dev->stats.tx_dropped; |
479a0796 MT |
659 | dev_kfree_skb_any(skb); |
660 | goto unlock; | |
661 | } | |
662 | ||
1da177e4 LT |
663 | mcast = __ipoib_mcast_find(dev, mgid); |
664 | if (!mcast) { | |
665 | /* Let's create a new send only group now */ | |
5b095d98 | 666 | ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n", |
fcace2fe | 667 | mgid); |
1da177e4 LT |
668 | |
669 | mcast = ipoib_mcast_alloc(dev, 0); | |
670 | if (!mcast) { | |
671 | ipoib_warn(priv, "unable to allocate memory for " | |
672 | "multicast structure\n"); | |
de903512 | 673 | ++dev->stats.tx_dropped; |
1da177e4 LT |
674 | dev_kfree_skb_any(skb); |
675 | goto out; | |
676 | } | |
677 | ||
678 | set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); | |
37c22a77 | 679 | memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); |
1da177e4 LT |
680 | __ipoib_mcast_add(dev, mcast); |
681 | list_add_tail(&mcast->list, &priv->multicast_list); | |
682 | } | |
683 | ||
684 | if (!mcast->ah) { | |
685 | if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) | |
686 | skb_queue_tail(&mcast->pkt_queue, skb); | |
b36f170b | 687 | else { |
de903512 | 688 | ++dev->stats.tx_dropped; |
1da177e4 | 689 | dev_kfree_skb_any(skb); |
b36f170b | 690 | } |
1da177e4 | 691 | |
faec2f7b | 692 | if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) |
1da177e4 LT |
693 | ipoib_dbg_mcast(priv, "no address vector, " |
694 | "but multicast join already started\n"); | |
695 | else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) | |
696 | ipoib_mcast_sendonly_join(mcast); | |
697 | ||
698 | /* | |
699 | * If lookup completes between here and out:, don't | |
700 | * want to send packet twice. | |
701 | */ | |
702 | mcast = NULL; | |
703 | } | |
704 | ||
705 | out: | |
706 | if (mcast && mcast->ah) { | |
2337f809 | 707 | if (skb->dst && |
1da177e4 LT |
708 | skb->dst->neighbour && |
709 | !*to_ipoib_neigh(skb->dst->neighbour)) { | |
732a2170 MS |
710 | struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour, |
711 | skb->dev); | |
1da177e4 LT |
712 | |
713 | if (neigh) { | |
714 | kref_get(&mcast->ah->ref); | |
2337f809 | 715 | neigh->ah = mcast->ah; |
1da177e4 LT |
716 | list_add_tail(&neigh->list, &mcast->neigh_list); |
717 | } | |
718 | } | |
719 | ||
720 | ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); | |
721 | } | |
722 | ||
479a0796 | 723 | unlock: |
943c246e | 724 | spin_unlock_irqrestore(&priv->lock, flags); |
1da177e4 LT |
725 | } |
726 | ||
727 | void ipoib_mcast_dev_flush(struct net_device *dev) | |
728 | { | |
729 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
730 | LIST_HEAD(remove_list); | |
988bd503 | 731 | struct ipoib_mcast *mcast, *tmcast; |
1da177e4 LT |
732 | unsigned long flags; |
733 | ||
734 | ipoib_dbg_mcast(priv, "flushing multicast list\n"); | |
735 | ||
736 | spin_lock_irqsave(&priv->lock, flags); | |
1da177e4 | 737 | |
988bd503 EC |
738 | list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { |
739 | list_del(&mcast->list); | |
740 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | |
741 | list_add_tail(&mcast->list, &remove_list); | |
1da177e4 LT |
742 | } |
743 | ||
744 | if (priv->broadcast) { | |
3cd96564 | 745 | rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); |
988bd503 EC |
746 | list_add_tail(&priv->broadcast->list, &remove_list); |
747 | priv->broadcast = NULL; | |
1da177e4 LT |
748 | } |
749 | ||
750 | spin_unlock_irqrestore(&priv->lock, flags); | |
751 | ||
752 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { | |
753 | ipoib_mcast_leave(dev, mcast); | |
754 | ipoib_mcast_free(mcast); | |
755 | } | |
756 | } | |
757 | ||
c4028958 | 758 | void ipoib_mcast_restart_task(struct work_struct *work) |
1da177e4 | 759 | { |
c4028958 DH |
760 | struct ipoib_dev_priv *priv = |
761 | container_of(work, struct ipoib_dev_priv, restart_task); | |
762 | struct net_device *dev = priv->dev; | |
1da177e4 LT |
763 | struct dev_mc_list *mclist; |
764 | struct ipoib_mcast *mcast, *tmcast; | |
765 | LIST_HEAD(remove_list); | |
766 | unsigned long flags; | |
335a64a5 | 767 | struct ib_sa_mcmember_rec rec; |
1da177e4 LT |
768 | |
769 | ipoib_dbg_mcast(priv, "restarting multicast task\n"); | |
770 | ||
8d2cae06 | 771 | ipoib_mcast_stop_thread(dev, 0); |
1da177e4 | 772 | |
932ff279 | 773 | local_irq_save(flags); |
e308a5d8 | 774 | netif_addr_lock(dev); |
78bfe0b5 | 775 | spin_lock(&priv->lock); |
1da177e4 LT |
776 | |
777 | /* | |
778 | * Unfortunately, the networking core only gives us a list of all of | |
779 | * the multicast hardware addresses. We need to figure out which ones | |
780 | * are new and which ones have been removed | |
781 | */ | |
782 | ||
783 | /* Clear out the found flag */ | |
784 | list_for_each_entry(mcast, &priv->multicast_list, list) | |
785 | clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); | |
786 | ||
787 | /* Mark all of the entries that are found or don't exist */ | |
788 | for (mclist = dev->mc_list; mclist; mclist = mclist->next) { | |
789 | union ib_gid mgid; | |
790 | ||
791 | memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); | |
792 | ||
1da177e4 LT |
793 | mcast = __ipoib_mcast_find(dev, &mgid); |
794 | if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
795 | struct ipoib_mcast *nmcast; | |
796 | ||
335a64a5 OG |
797 | /* ignore group which is directly joined by userspace */ |
798 | if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) && | |
799 | !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) { | |
5b095d98 | 800 | ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %pI6\n", |
fcace2fe | 801 | mgid.raw); |
335a64a5 OG |
802 | continue; |
803 | } | |
804 | ||
1da177e4 | 805 | /* Not found or send-only group, let's add a new entry */ |
5b095d98 | 806 | ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n", |
fcace2fe | 807 | mgid.raw); |
1da177e4 LT |
808 | |
809 | nmcast = ipoib_mcast_alloc(dev, 0); | |
810 | if (!nmcast) { | |
811 | ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); | |
812 | continue; | |
813 | } | |
814 | ||
815 | set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); | |
816 | ||
817 | nmcast->mcmember.mgid = mgid; | |
818 | ||
819 | if (mcast) { | |
820 | /* Destroy the send only entry */ | |
179e0917 | 821 | list_move_tail(&mcast->list, &remove_list); |
1da177e4 LT |
822 | |
823 | rb_replace_node(&mcast->rb_node, | |
824 | &nmcast->rb_node, | |
825 | &priv->multicast_tree); | |
826 | } else | |
827 | __ipoib_mcast_add(dev, nmcast); | |
828 | ||
829 | list_add_tail(&nmcast->list, &priv->multicast_list); | |
830 | } | |
831 | ||
832 | if (mcast) | |
833 | set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); | |
834 | } | |
835 | ||
836 | /* Remove all of the entries don't exist anymore */ | |
837 | list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { | |
838 | if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && | |
839 | !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
5b095d98 | 840 | ipoib_dbg_mcast(priv, "deleting multicast group %pI6\n", |
fcace2fe | 841 | mcast->mcmember.mgid.raw); |
1da177e4 LT |
842 | |
843 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | |
844 | ||
845 | /* Move to the remove list */ | |
179e0917 | 846 | list_move_tail(&mcast->list, &remove_list); |
1da177e4 LT |
847 | } |
848 | } | |
78bfe0b5 MT |
849 | |
850 | spin_unlock(&priv->lock); | |
e308a5d8 | 851 | netif_addr_unlock(dev); |
932ff279 | 852 | local_irq_restore(flags); |
1da177e4 LT |
853 | |
854 | /* We have to cancel outside of the spinlock */ | |
855 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { | |
856 | ipoib_mcast_leave(mcast->dev, mcast); | |
857 | ipoib_mcast_free(mcast); | |
858 | } | |
859 | ||
860 | if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) | |
861 | ipoib_mcast_start_thread(dev); | |
862 | } | |
863 | ||
8ae5a8a2 RD |
864 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
865 | ||
1da177e4 LT |
866 | struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) |
867 | { | |
868 | struct ipoib_mcast_iter *iter; | |
869 | ||
870 | iter = kmalloc(sizeof *iter, GFP_KERNEL); | |
871 | if (!iter) | |
872 | return NULL; | |
873 | ||
874 | iter->dev = dev; | |
1732b0ef | 875 | memset(iter->mgid.raw, 0, 16); |
1da177e4 LT |
876 | |
877 | if (ipoib_mcast_iter_next(iter)) { | |
1732b0ef | 878 | kfree(iter); |
1da177e4 LT |
879 | return NULL; |
880 | } | |
881 | ||
882 | return iter; | |
883 | } | |
884 | ||
1da177e4 LT |
885 | int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) |
886 | { | |
887 | struct ipoib_dev_priv *priv = netdev_priv(iter->dev); | |
888 | struct rb_node *n; | |
889 | struct ipoib_mcast *mcast; | |
890 | int ret = 1; | |
891 | ||
892 | spin_lock_irq(&priv->lock); | |
893 | ||
894 | n = rb_first(&priv->multicast_tree); | |
895 | ||
896 | while (n) { | |
897 | mcast = rb_entry(n, struct ipoib_mcast, rb_node); | |
898 | ||
899 | if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, | |
900 | sizeof (union ib_gid)) < 0) { | |
901 | iter->mgid = mcast->mcmember.mgid; | |
902 | iter->created = mcast->created; | |
903 | iter->queuelen = skb_queue_len(&mcast->pkt_queue); | |
904 | iter->complete = !!mcast->ah; | |
905 | iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); | |
906 | ||
907 | ret = 0; | |
908 | ||
909 | break; | |
910 | } | |
911 | ||
912 | n = rb_next(n); | |
913 | } | |
914 | ||
915 | spin_unlock_irq(&priv->lock); | |
916 | ||
917 | return ret; | |
918 | } | |
919 | ||
920 | void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, | |
921 | union ib_gid *mgid, | |
922 | unsigned long *created, | |
923 | unsigned int *queuelen, | |
924 | unsigned int *complete, | |
925 | unsigned int *send_only) | |
926 | { | |
927 | *mgid = iter->mgid; | |
928 | *created = iter->created; | |
929 | *queuelen = iter->queuelen; | |
930 | *complete = iter->complete; | |
931 | *send_only = iter->send_only; | |
932 | } | |
8ae5a8a2 RD |
933 | |
934 | #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ |