Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. | |
2a1d9b7f RD |
3 | * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. |
4 | * Copyright (c) 2004 Voltaire, Inc. All rights reserved. | |
1da177e4 LT |
5 | * |
6 | * This software is available to you under a choice of one of two | |
7 | * licenses. You may choose to be licensed under the terms of the GNU | |
8 | * General Public License (GPL) Version 2, available from the file | |
9 | * COPYING in the main directory of this source tree, or the | |
10 | * OpenIB.org BSD license below: | |
11 | * | |
12 | * Redistribution and use in source and binary forms, with or | |
13 | * without modification, are permitted provided that the following | |
14 | * conditions are met: | |
15 | * | |
16 | * - Redistributions of source code must retain the above | |
17 | * copyright notice, this list of conditions and the following | |
18 | * disclaimer. | |
19 | * | |
20 | * - Redistributions in binary form must reproduce the above | |
21 | * copyright notice, this list of conditions and the following | |
22 | * disclaimer in the documentation and/or other materials | |
23 | * provided with the distribution. | |
24 | * | |
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
32 | * SOFTWARE. | |
33 | * | |
34 | * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $ | |
35 | */ | |
36 | ||
37 | #include <linux/skbuff.h> | |
38 | #include <linux/rtnetlink.h> | |
39 | #include <linux/ip.h> | |
40 | #include <linux/in.h> | |
41 | #include <linux/igmp.h> | |
42 | #include <linux/inetdevice.h> | |
43 | #include <linux/delay.h> | |
44 | #include <linux/completion.h> | |
45 | ||
14c85021 ACM |
46 | #include <net/dst.h> |
47 | ||
1da177e4 LT |
48 | #include "ipoib.h" |
49 | ||
50 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | |
51 | static int mcast_debug_level; | |
52 | ||
53 | module_param(mcast_debug_level, int, 0644); | |
54 | MODULE_PARM_DESC(mcast_debug_level, | |
55 | "Enable multicast debug tracing if > 0"); | |
56 | #endif | |
57 | ||
95ed644f | 58 | static DEFINE_MUTEX(mcast_mutex); |
1da177e4 LT |
59 | |
60 | /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ | |
61 | struct ipoib_mcast { | |
62 | struct ib_sa_mcmember_rec mcmember; | |
63 | struct ipoib_ah *ah; | |
64 | ||
65 | struct rb_node rb_node; | |
66 | struct list_head list; | |
67 | struct completion done; | |
68 | ||
69 | int query_id; | |
70 | struct ib_sa_query *query; | |
71 | ||
72 | unsigned long created; | |
73 | unsigned long backoff; | |
74 | ||
75 | unsigned long flags; | |
76 | unsigned char logcount; | |
77 | ||
78 | struct list_head neigh_list; | |
79 | ||
80 | struct sk_buff_head pkt_queue; | |
81 | ||
82 | struct net_device *dev; | |
83 | }; | |
84 | ||
85 | struct ipoib_mcast_iter { | |
86 | struct net_device *dev; | |
87 | union ib_gid mgid; | |
88 | unsigned long created; | |
89 | unsigned int queuelen; | |
90 | unsigned int complete; | |
91 | unsigned int send_only; | |
92 | }; | |
93 | ||
94 | static void ipoib_mcast_free(struct ipoib_mcast *mcast) | |
95 | { | |
96 | struct net_device *dev = mcast->dev; | |
97 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
98 | struct ipoib_neigh *neigh, *tmp; | |
99 | unsigned long flags; | |
b36f170b | 100 | int tx_dropped = 0; |
1da177e4 LT |
101 | |
102 | ipoib_dbg_mcast(netdev_priv(dev), | |
103 | "deleting multicast group " IPOIB_GID_FMT "\n", | |
104 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
105 | ||
106 | spin_lock_irqsave(&priv->lock, flags); | |
107 | ||
108 | list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { | |
97460df3 EC |
109 | /* |
110 | * It's safe to call ipoib_put_ah() inside priv->lock | |
111 | * here, because we know that mcast->ah will always | |
112 | * hold one more reference, so ipoib_put_ah() will | |
113 | * never do more than decrement the ref count. | |
114 | */ | |
1da177e4 | 115 | if (neigh->ah) |
97460df3 | 116 | ipoib_put_ah(neigh->ah); |
1da177e4 LT |
117 | *to_ipoib_neigh(neigh->neighbour) = NULL; |
118 | neigh->neighbour->ops->destructor = NULL; | |
119 | kfree(neigh); | |
120 | } | |
121 | ||
122 | spin_unlock_irqrestore(&priv->lock, flags); | |
123 | ||
1da177e4 LT |
124 | if (mcast->ah) |
125 | ipoib_put_ah(mcast->ah); | |
126 | ||
b36f170b MT |
127 | while (!skb_queue_empty(&mcast->pkt_queue)) { |
128 | ++tx_dropped; | |
8c608a32 | 129 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); |
b36f170b MT |
130 | } |
131 | ||
132 | spin_lock_irqsave(&priv->tx_lock, flags); | |
133 | priv->stats.tx_dropped += tx_dropped; | |
134 | spin_unlock_irqrestore(&priv->tx_lock, flags); | |
1da177e4 LT |
135 | |
136 | kfree(mcast); | |
137 | } | |
138 | ||
139 | static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, | |
140 | int can_sleep) | |
141 | { | |
142 | struct ipoib_mcast *mcast; | |
143 | ||
de6eb66b | 144 | mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); |
1da177e4 LT |
145 | if (!mcast) |
146 | return NULL; | |
147 | ||
1da177e4 LT |
148 | mcast->dev = dev; |
149 | mcast->created = jiffies; | |
ce5b65cc | 150 | mcast->backoff = 1; |
1da177e4 LT |
151 | |
152 | INIT_LIST_HEAD(&mcast->list); | |
153 | INIT_LIST_HEAD(&mcast->neigh_list); | |
154 | skb_queue_head_init(&mcast->pkt_queue); | |
155 | ||
1da177e4 LT |
156 | return mcast; |
157 | } | |
158 | ||
159 | static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, union ib_gid *mgid) | |
160 | { | |
161 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
162 | struct rb_node *n = priv->multicast_tree.rb_node; | |
163 | ||
164 | while (n) { | |
165 | struct ipoib_mcast *mcast; | |
166 | int ret; | |
167 | ||
168 | mcast = rb_entry(n, struct ipoib_mcast, rb_node); | |
169 | ||
170 | ret = memcmp(mgid->raw, mcast->mcmember.mgid.raw, | |
171 | sizeof (union ib_gid)); | |
172 | if (ret < 0) | |
173 | n = n->rb_left; | |
174 | else if (ret > 0) | |
175 | n = n->rb_right; | |
176 | else | |
177 | return mcast; | |
178 | } | |
179 | ||
180 | return NULL; | |
181 | } | |
182 | ||
183 | static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast) | |
184 | { | |
185 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
186 | struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; | |
187 | ||
188 | while (*n) { | |
189 | struct ipoib_mcast *tmcast; | |
190 | int ret; | |
191 | ||
192 | pn = *n; | |
193 | tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); | |
194 | ||
195 | ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, | |
196 | sizeof (union ib_gid)); | |
197 | if (ret < 0) | |
198 | n = &pn->rb_left; | |
199 | else if (ret > 0) | |
200 | n = &pn->rb_right; | |
201 | else | |
202 | return -EEXIST; | |
203 | } | |
204 | ||
205 | rb_link_node(&mcast->rb_node, pn, n); | |
206 | rb_insert_color(&mcast->rb_node, &priv->multicast_tree); | |
207 | ||
208 | return 0; | |
209 | } | |
210 | ||
211 | static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, | |
212 | struct ib_sa_mcmember_rec *mcmember) | |
213 | { | |
214 | struct net_device *dev = mcast->dev; | |
215 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
216 | int ret; | |
217 | ||
218 | mcast->mcmember = *mcmember; | |
219 | ||
220 | /* Set the cached Q_Key before we attach if it's the broadcast group */ | |
221 | if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, | |
222 | sizeof (union ib_gid))) { | |
223 | priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); | |
224 | priv->tx_wr.wr.ud.remote_qkey = priv->qkey; | |
225 | } | |
226 | ||
227 | if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
228 | if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | |
229 | ipoib_warn(priv, "multicast group " IPOIB_GID_FMT | |
230 | " already attached\n", | |
231 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
232 | ||
233 | return 0; | |
234 | } | |
235 | ||
236 | ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), | |
237 | &mcast->mcmember.mgid); | |
238 | if (ret < 0) { | |
239 | ipoib_warn(priv, "couldn't attach QP to multicast group " | |
240 | IPOIB_GID_FMT "\n", | |
241 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
242 | ||
243 | clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); | |
244 | return ret; | |
245 | } | |
246 | } | |
247 | ||
248 | { | |
249 | struct ib_ah_attr av = { | |
250 | .dlid = be16_to_cpu(mcast->mcmember.mlid), | |
251 | .port_num = priv->port, | |
252 | .sl = mcast->mcmember.sl, | |
253 | .ah_flags = IB_AH_GRH, | |
254 | .grh = { | |
255 | .flow_label = be32_to_cpu(mcast->mcmember.flow_label), | |
256 | .hop_limit = mcast->mcmember.hop_limit, | |
257 | .sgid_index = 0, | |
258 | .traffic_class = mcast->mcmember.traffic_class | |
259 | } | |
260 | }; | |
e6ded99c | 261 | int path_rate = ib_sa_rate_enum_to_int(mcast->mcmember.rate); |
1da177e4 LT |
262 | |
263 | av.grh.dgid = mcast->mcmember.mgid; | |
264 | ||
e6ded99c RD |
265 | if (path_rate > 0 && priv->local_rate > path_rate) |
266 | av.static_rate = (priv->local_rate - 1) / path_rate; | |
1da177e4 LT |
267 | |
268 | ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n", | |
269 | av.static_rate, priv->local_rate, | |
270 | ib_sa_rate_enum_to_int(mcast->mcmember.rate)); | |
271 | ||
272 | mcast->ah = ipoib_create_ah(dev, priv->pd, &av); | |
273 | if (!mcast->ah) { | |
274 | ipoib_warn(priv, "ib_address_create failed\n"); | |
275 | } else { | |
276 | ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT | |
277 | " AV %p, LID 0x%04x, SL %d\n", | |
278 | IPOIB_GID_ARG(mcast->mcmember.mgid), | |
279 | mcast->ah->ah, | |
280 | be16_to_cpu(mcast->mcmember.mlid), | |
281 | mcast->mcmember.sl); | |
282 | } | |
283 | } | |
284 | ||
285 | /* actually send any queued packets */ | |
b36f170b | 286 | spin_lock_irq(&priv->tx_lock); |
1da177e4 LT |
287 | while (!skb_queue_empty(&mcast->pkt_queue)) { |
288 | struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); | |
b36f170b | 289 | spin_unlock_irq(&priv->tx_lock); |
1da177e4 LT |
290 | |
291 | skb->dev = dev; | |
292 | ||
293 | if (!skb->dst || !skb->dst->neighbour) { | |
294 | /* put pseudoheader back on for next time */ | |
295 | skb_push(skb, sizeof (struct ipoib_pseudoheader)); | |
296 | } | |
297 | ||
298 | if (dev_queue_xmit(skb)) | |
299 | ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); | |
b36f170b | 300 | spin_lock_irq(&priv->tx_lock); |
1da177e4 | 301 | } |
b36f170b | 302 | spin_unlock_irq(&priv->tx_lock); |
1da177e4 LT |
303 | |
304 | return 0; | |
305 | } | |
306 | ||
307 | static void | |
308 | ipoib_mcast_sendonly_join_complete(int status, | |
309 | struct ib_sa_mcmember_rec *mcmember, | |
310 | void *mcast_ptr) | |
311 | { | |
312 | struct ipoib_mcast *mcast = mcast_ptr; | |
313 | struct net_device *dev = mcast->dev; | |
b36f170b | 314 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
1da177e4 LT |
315 | |
316 | if (!status) | |
317 | ipoib_mcast_join_finish(mcast, mcmember); | |
318 | else { | |
319 | if (mcast->logcount++ < 20) | |
320 | ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " | |
321 | IPOIB_GID_FMT ", status %d\n", | |
322 | IPOIB_GID_ARG(mcast->mcmember.mgid), status); | |
323 | ||
324 | /* Flush out any queued packets */ | |
b36f170b MT |
325 | spin_lock_irq(&priv->tx_lock); |
326 | while (!skb_queue_empty(&mcast->pkt_queue)) { | |
327 | ++priv->stats.tx_dropped; | |
8c608a32 | 328 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); |
b36f170b MT |
329 | } |
330 | spin_unlock_irq(&priv->tx_lock); | |
1da177e4 LT |
331 | |
332 | /* Clear the busy flag so we try again */ | |
333 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | |
334 | } | |
335 | ||
336 | complete(&mcast->done); | |
337 | } | |
338 | ||
339 | static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) | |
340 | { | |
341 | struct net_device *dev = mcast->dev; | |
342 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
343 | struct ib_sa_mcmember_rec rec = { | |
344 | #if 0 /* Some SMs don't support send-only yet */ | |
345 | .join_state = 4 | |
346 | #else | |
347 | .join_state = 1 | |
348 | #endif | |
349 | }; | |
350 | int ret = 0; | |
351 | ||
352 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { | |
353 | ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n"); | |
354 | return -ENODEV; | |
355 | } | |
356 | ||
357 | if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { | |
358 | ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n"); | |
359 | return -EBUSY; | |
360 | } | |
361 | ||
362 | rec.mgid = mcast->mcmember.mgid; | |
363 | rec.port_gid = priv->local_gid; | |
97f52eb4 | 364 | rec.pkey = cpu_to_be16(priv->pkey); |
1da177e4 | 365 | |
de922487 MT |
366 | init_completion(&mcast->done); |
367 | ||
1da177e4 LT |
368 | ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, |
369 | IB_SA_MCMEMBER_REC_MGID | | |
370 | IB_SA_MCMEMBER_REC_PORT_GID | | |
371 | IB_SA_MCMEMBER_REC_PKEY | | |
372 | IB_SA_MCMEMBER_REC_JOIN_STATE, | |
373 | 1000, GFP_ATOMIC, | |
374 | ipoib_mcast_sendonly_join_complete, | |
375 | mcast, &mcast->query); | |
376 | if (ret < 0) { | |
377 | ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n", | |
378 | ret); | |
379 | } else { | |
380 | ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT | |
381 | ", starting join\n", | |
382 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
383 | ||
384 | mcast->query_id = ret; | |
385 | } | |
386 | ||
387 | return ret; | |
388 | } | |
389 | ||
390 | static void ipoib_mcast_join_complete(int status, | |
391 | struct ib_sa_mcmember_rec *mcmember, | |
392 | void *mcast_ptr) | |
393 | { | |
394 | struct ipoib_mcast *mcast = mcast_ptr; | |
395 | struct net_device *dev = mcast->dev; | |
396 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
397 | ||
398 | ipoib_dbg_mcast(priv, "join completion for " IPOIB_GID_FMT | |
399 | " (status %d)\n", | |
400 | IPOIB_GID_ARG(mcast->mcmember.mgid), status); | |
401 | ||
402 | if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) { | |
ce5b65cc | 403 | mcast->backoff = 1; |
95ed644f | 404 | mutex_lock(&mcast_mutex); |
1da177e4 LT |
405 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
406 | queue_work(ipoib_workqueue, &priv->mcast_task); | |
95ed644f | 407 | mutex_unlock(&mcast_mutex); |
1da177e4 LT |
408 | complete(&mcast->done); |
409 | return; | |
410 | } | |
411 | ||
412 | if (status == -EINTR) { | |
413 | complete(&mcast->done); | |
414 | return; | |
415 | } | |
416 | ||
417 | if (status && mcast->logcount++ < 20) { | |
418 | if (status == -ETIMEDOUT || status == -EINTR) { | |
419 | ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT | |
420 | ", status %d\n", | |
421 | IPOIB_GID_ARG(mcast->mcmember.mgid), | |
422 | status); | |
423 | } else { | |
424 | ipoib_warn(priv, "multicast join failed for " | |
425 | IPOIB_GID_FMT ", status %d\n", | |
426 | IPOIB_GID_ARG(mcast->mcmember.mgid), | |
427 | status); | |
428 | } | |
429 | } | |
430 | ||
431 | mcast->backoff *= 2; | |
432 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | |
433 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | |
434 | ||
435 | mcast->query = NULL; | |
436 | ||
95ed644f | 437 | mutex_lock(&mcast_mutex); |
1da177e4 LT |
438 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { |
439 | if (status == -ETIMEDOUT) | |
440 | queue_work(ipoib_workqueue, &priv->mcast_task); | |
441 | else | |
442 | queue_delayed_work(ipoib_workqueue, &priv->mcast_task, | |
443 | mcast->backoff * HZ); | |
444 | } else | |
445 | complete(&mcast->done); | |
95ed644f | 446 | mutex_unlock(&mcast_mutex); |
1da177e4 LT |
447 | |
448 | return; | |
449 | } | |
450 | ||
451 | static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, | |
452 | int create) | |
453 | { | |
454 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
455 | struct ib_sa_mcmember_rec rec = { | |
456 | .join_state = 1 | |
457 | }; | |
458 | ib_sa_comp_mask comp_mask; | |
459 | int ret = 0; | |
460 | ||
461 | ipoib_dbg_mcast(priv, "joining MGID " IPOIB_GID_FMT "\n", | |
462 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
463 | ||
464 | rec.mgid = mcast->mcmember.mgid; | |
465 | rec.port_gid = priv->local_gid; | |
97f52eb4 | 466 | rec.pkey = cpu_to_be16(priv->pkey); |
1da177e4 LT |
467 | |
468 | comp_mask = | |
469 | IB_SA_MCMEMBER_REC_MGID | | |
470 | IB_SA_MCMEMBER_REC_PORT_GID | | |
471 | IB_SA_MCMEMBER_REC_PKEY | | |
472 | IB_SA_MCMEMBER_REC_JOIN_STATE; | |
473 | ||
474 | if (create) { | |
475 | comp_mask |= | |
476 | IB_SA_MCMEMBER_REC_QKEY | | |
477 | IB_SA_MCMEMBER_REC_SL | | |
478 | IB_SA_MCMEMBER_REC_FLOW_LABEL | | |
479 | IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; | |
480 | ||
481 | rec.qkey = priv->broadcast->mcmember.qkey; | |
482 | rec.sl = priv->broadcast->mcmember.sl; | |
483 | rec.flow_label = priv->broadcast->mcmember.flow_label; | |
484 | rec.traffic_class = priv->broadcast->mcmember.traffic_class; | |
485 | } | |
486 | ||
de922487 MT |
487 | init_completion(&mcast->done); |
488 | ||
1da177e4 LT |
489 | ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask, |
490 | mcast->backoff * 1000, GFP_ATOMIC, | |
491 | ipoib_mcast_join_complete, | |
492 | mcast, &mcast->query); | |
493 | ||
494 | if (ret < 0) { | |
495 | ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret); | |
496 | ||
497 | mcast->backoff *= 2; | |
498 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | |
499 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | |
500 | ||
95ed644f | 501 | mutex_lock(&mcast_mutex); |
1da177e4 LT |
502 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
503 | queue_delayed_work(ipoib_workqueue, | |
504 | &priv->mcast_task, | |
ce5b65cc | 505 | mcast->backoff * HZ); |
95ed644f | 506 | mutex_unlock(&mcast_mutex); |
1da177e4 LT |
507 | } else |
508 | mcast->query_id = ret; | |
509 | } | |
510 | ||
511 | void ipoib_mcast_join_task(void *dev_ptr) | |
512 | { | |
513 | struct net_device *dev = dev_ptr; | |
514 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
515 | ||
516 | if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) | |
517 | return; | |
518 | ||
519 | if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) | |
520 | ipoib_warn(priv, "ib_gid_entry_get() failed\n"); | |
521 | else | |
522 | memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); | |
523 | ||
524 | { | |
525 | struct ib_port_attr attr; | |
526 | ||
527 | if (!ib_query_port(priv->ca, priv->port, &attr)) { | |
528 | priv->local_lid = attr.lid; | |
529 | priv->local_rate = attr.active_speed * | |
530 | ib_width_enum_to_int(attr.active_width); | |
531 | } else | |
532 | ipoib_warn(priv, "ib_query_port failed\n"); | |
533 | } | |
534 | ||
535 | if (!priv->broadcast) { | |
536 | priv->broadcast = ipoib_mcast_alloc(dev, 1); | |
537 | if (!priv->broadcast) { | |
538 | ipoib_warn(priv, "failed to allocate broadcast group\n"); | |
95ed644f | 539 | mutex_lock(&mcast_mutex); |
1da177e4 LT |
540 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
541 | queue_delayed_work(ipoib_workqueue, | |
542 | &priv->mcast_task, HZ); | |
95ed644f | 543 | mutex_unlock(&mcast_mutex); |
1da177e4 LT |
544 | return; |
545 | } | |
546 | ||
547 | memcpy(priv->broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, | |
548 | sizeof (union ib_gid)); | |
549 | ||
550 | spin_lock_irq(&priv->lock); | |
551 | __ipoib_mcast_add(dev, priv->broadcast); | |
552 | spin_unlock_irq(&priv->lock); | |
553 | } | |
554 | ||
555 | if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { | |
556 | ipoib_mcast_join(dev, priv->broadcast, 0); | |
557 | return; | |
558 | } | |
559 | ||
560 | while (1) { | |
561 | struct ipoib_mcast *mcast = NULL; | |
562 | ||
563 | spin_lock_irq(&priv->lock); | |
564 | list_for_each_entry(mcast, &priv->multicast_list, list) { | |
565 | if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) | |
566 | && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) | |
567 | && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | |
568 | /* Found the next unjoined group */ | |
569 | break; | |
570 | } | |
571 | } | |
572 | spin_unlock_irq(&priv->lock); | |
573 | ||
574 | if (&mcast->list == &priv->multicast_list) { | |
575 | /* All done */ | |
576 | break; | |
577 | } | |
578 | ||
579 | ipoib_mcast_join(dev, mcast, 1); | |
580 | return; | |
581 | } | |
582 | ||
583 | priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - | |
584 | IPOIB_ENCAP_LEN; | |
585 | dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); | |
586 | ||
587 | ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); | |
588 | ||
589 | clear_bit(IPOIB_MCAST_RUN, &priv->flags); | |
590 | netif_carrier_on(dev); | |
591 | } | |
592 | ||
593 | int ipoib_mcast_start_thread(struct net_device *dev) | |
594 | { | |
595 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
596 | ||
597 | ipoib_dbg_mcast(priv, "starting multicast thread\n"); | |
598 | ||
95ed644f | 599 | mutex_lock(&mcast_mutex); |
1da177e4 LT |
600 | if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) |
601 | queue_work(ipoib_workqueue, &priv->mcast_task); | |
95ed644f | 602 | mutex_unlock(&mcast_mutex); |
1da177e4 | 603 | |
479a0796 MT |
604 | spin_lock_irq(&priv->lock); |
605 | set_bit(IPOIB_MCAST_STARTED, &priv->flags); | |
606 | spin_unlock_irq(&priv->lock); | |
607 | ||
1da177e4 LT |
608 | return 0; |
609 | } | |
610 | ||
8d2cae06 | 611 | int ipoib_mcast_stop_thread(struct net_device *dev, int flush) |
1da177e4 LT |
612 | { |
613 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
614 | struct ipoib_mcast *mcast; | |
615 | ||
616 | ipoib_dbg_mcast(priv, "stopping multicast thread\n"); | |
617 | ||
479a0796 MT |
618 | spin_lock_irq(&priv->lock); |
619 | clear_bit(IPOIB_MCAST_STARTED, &priv->flags); | |
620 | spin_unlock_irq(&priv->lock); | |
621 | ||
95ed644f | 622 | mutex_lock(&mcast_mutex); |
1da177e4 LT |
623 | clear_bit(IPOIB_MCAST_RUN, &priv->flags); |
624 | cancel_delayed_work(&priv->mcast_task); | |
95ed644f | 625 | mutex_unlock(&mcast_mutex); |
1da177e4 | 626 | |
8d2cae06 RD |
627 | if (flush) |
628 | flush_workqueue(ipoib_workqueue); | |
1da177e4 LT |
629 | |
630 | if (priv->broadcast && priv->broadcast->query) { | |
631 | ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query); | |
632 | priv->broadcast->query = NULL; | |
633 | ipoib_dbg_mcast(priv, "waiting for bcast\n"); | |
634 | wait_for_completion(&priv->broadcast->done); | |
635 | } | |
636 | ||
637 | list_for_each_entry(mcast, &priv->multicast_list, list) { | |
638 | if (mcast->query) { | |
639 | ib_sa_cancel_query(mcast->query_id, mcast->query); | |
640 | mcast->query = NULL; | |
641 | ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n", | |
642 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
643 | wait_for_completion(&mcast->done); | |
644 | } | |
645 | } | |
646 | ||
647 | return 0; | |
648 | } | |
649 | ||
650 | static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) | |
651 | { | |
652 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
653 | struct ib_sa_mcmember_rec rec = { | |
654 | .join_state = 1 | |
655 | }; | |
656 | int ret = 0; | |
657 | ||
658 | if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) | |
659 | return 0; | |
660 | ||
661 | ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n", | |
662 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
663 | ||
664 | rec.mgid = mcast->mcmember.mgid; | |
665 | rec.port_gid = priv->local_gid; | |
97f52eb4 | 666 | rec.pkey = cpu_to_be16(priv->pkey); |
1da177e4 LT |
667 | |
668 | /* Remove ourselves from the multicast group */ | |
669 | ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), | |
670 | &mcast->mcmember.mgid); | |
671 | if (ret) | |
672 | ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); | |
673 | ||
674 | /* | |
675 | * Just make one shot at leaving and don't wait for a reply; | |
676 | * if we fail, too bad. | |
677 | */ | |
678 | ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec, | |
679 | IB_SA_MCMEMBER_REC_MGID | | |
680 | IB_SA_MCMEMBER_REC_PORT_GID | | |
681 | IB_SA_MCMEMBER_REC_PKEY | | |
682 | IB_SA_MCMEMBER_REC_JOIN_STATE, | |
683 | 0, GFP_ATOMIC, NULL, | |
684 | mcast, &mcast->query); | |
685 | if (ret < 0) | |
686 | ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed " | |
687 | "for leave (result = %d)\n", ret); | |
688 | ||
689 | return 0; | |
690 | } | |
691 | ||
692 | void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid, | |
693 | struct sk_buff *skb) | |
694 | { | |
695 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
696 | struct ipoib_mcast *mcast; | |
697 | ||
698 | /* | |
699 | * We can only be called from ipoib_start_xmit, so we're | |
700 | * inside tx_lock -- no need to save/restore flags. | |
701 | */ | |
702 | spin_lock(&priv->lock); | |
703 | ||
7bcb974e | 704 | if (!test_bit(IPOIB_MCAST_STARTED, &priv->flags) || !priv->broadcast) { |
479a0796 MT |
705 | ++priv->stats.tx_dropped; |
706 | dev_kfree_skb_any(skb); | |
707 | goto unlock; | |
708 | } | |
709 | ||
1da177e4 LT |
710 | mcast = __ipoib_mcast_find(dev, mgid); |
711 | if (!mcast) { | |
712 | /* Let's create a new send only group now */ | |
713 | ipoib_dbg_mcast(priv, "setting up send only multicast group for " | |
714 | IPOIB_GID_FMT "\n", IPOIB_GID_ARG(*mgid)); | |
715 | ||
716 | mcast = ipoib_mcast_alloc(dev, 0); | |
717 | if (!mcast) { | |
718 | ipoib_warn(priv, "unable to allocate memory for " | |
719 | "multicast structure\n"); | |
b36f170b | 720 | ++priv->stats.tx_dropped; |
1da177e4 LT |
721 | dev_kfree_skb_any(skb); |
722 | goto out; | |
723 | } | |
724 | ||
725 | set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); | |
726 | mcast->mcmember.mgid = *mgid; | |
727 | __ipoib_mcast_add(dev, mcast); | |
728 | list_add_tail(&mcast->list, &priv->multicast_list); | |
729 | } | |
730 | ||
731 | if (!mcast->ah) { | |
732 | if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) | |
733 | skb_queue_tail(&mcast->pkt_queue, skb); | |
b36f170b MT |
734 | else { |
735 | ++priv->stats.tx_dropped; | |
1da177e4 | 736 | dev_kfree_skb_any(skb); |
b36f170b | 737 | } |
1da177e4 LT |
738 | |
739 | if (mcast->query) | |
740 | ipoib_dbg_mcast(priv, "no address vector, " | |
741 | "but multicast join already started\n"); | |
742 | else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) | |
743 | ipoib_mcast_sendonly_join(mcast); | |
744 | ||
745 | /* | |
746 | * If lookup completes between here and out:, don't | |
747 | * want to send packet twice. | |
748 | */ | |
749 | mcast = NULL; | |
750 | } | |
751 | ||
752 | out: | |
753 | if (mcast && mcast->ah) { | |
754 | if (skb->dst && | |
755 | skb->dst->neighbour && | |
756 | !*to_ipoib_neigh(skb->dst->neighbour)) { | |
757 | struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); | |
758 | ||
759 | if (neigh) { | |
760 | kref_get(&mcast->ah->ref); | |
761 | neigh->ah = mcast->ah; | |
762 | neigh->neighbour = skb->dst->neighbour; | |
763 | *to_ipoib_neigh(skb->dst->neighbour) = neigh; | |
764 | list_add_tail(&neigh->list, &mcast->neigh_list); | |
765 | } | |
766 | } | |
767 | ||
768 | ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); | |
769 | } | |
770 | ||
479a0796 | 771 | unlock: |
1da177e4 LT |
772 | spin_unlock(&priv->lock); |
773 | } | |
774 | ||
775 | void ipoib_mcast_dev_flush(struct net_device *dev) | |
776 | { | |
777 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
778 | LIST_HEAD(remove_list); | |
988bd503 | 779 | struct ipoib_mcast *mcast, *tmcast; |
1da177e4 LT |
780 | unsigned long flags; |
781 | ||
782 | ipoib_dbg_mcast(priv, "flushing multicast list\n"); | |
783 | ||
784 | spin_lock_irqsave(&priv->lock, flags); | |
1da177e4 | 785 | |
988bd503 EC |
786 | list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { |
787 | list_del(&mcast->list); | |
788 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | |
789 | list_add_tail(&mcast->list, &remove_list); | |
1da177e4 LT |
790 | } |
791 | ||
792 | if (priv->broadcast) { | |
988bd503 EC |
793 | rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); |
794 | list_add_tail(&priv->broadcast->list, &remove_list); | |
795 | priv->broadcast = NULL; | |
1da177e4 LT |
796 | } |
797 | ||
798 | spin_unlock_irqrestore(&priv->lock, flags); | |
799 | ||
800 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { | |
801 | ipoib_mcast_leave(dev, mcast); | |
802 | ipoib_mcast_free(mcast); | |
803 | } | |
804 | } | |
805 | ||
1da177e4 LT |
806 | void ipoib_mcast_restart_task(void *dev_ptr) |
807 | { | |
808 | struct net_device *dev = dev_ptr; | |
809 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
810 | struct dev_mc_list *mclist; | |
811 | struct ipoib_mcast *mcast, *tmcast; | |
812 | LIST_HEAD(remove_list); | |
813 | unsigned long flags; | |
814 | ||
815 | ipoib_dbg_mcast(priv, "restarting multicast task\n"); | |
816 | ||
8d2cae06 | 817 | ipoib_mcast_stop_thread(dev, 0); |
1da177e4 | 818 | |
78bfe0b5 MT |
819 | spin_lock_irqsave(&dev->xmit_lock, flags); |
820 | spin_lock(&priv->lock); | |
1da177e4 LT |
821 | |
822 | /* | |
823 | * Unfortunately, the networking core only gives us a list of all of | |
824 | * the multicast hardware addresses. We need to figure out which ones | |
825 | * are new and which ones have been removed | |
826 | */ | |
827 | ||
828 | /* Clear out the found flag */ | |
829 | list_for_each_entry(mcast, &priv->multicast_list, list) | |
830 | clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); | |
831 | ||
832 | /* Mark all of the entries that are found or don't exist */ | |
833 | for (mclist = dev->mc_list; mclist; mclist = mclist->next) { | |
834 | union ib_gid mgid; | |
835 | ||
836 | memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); | |
837 | ||
838 | /* Add in the P_Key */ | |
839 | mgid.raw[4] = (priv->pkey >> 8) & 0xff; | |
840 | mgid.raw[5] = priv->pkey & 0xff; | |
841 | ||
842 | mcast = __ipoib_mcast_find(dev, &mgid); | |
843 | if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
844 | struct ipoib_mcast *nmcast; | |
845 | ||
846 | /* Not found or send-only group, let's add a new entry */ | |
847 | ipoib_dbg_mcast(priv, "adding multicast entry for mgid " | |
848 | IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid)); | |
849 | ||
850 | nmcast = ipoib_mcast_alloc(dev, 0); | |
851 | if (!nmcast) { | |
852 | ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); | |
853 | continue; | |
854 | } | |
855 | ||
856 | set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); | |
857 | ||
858 | nmcast->mcmember.mgid = mgid; | |
859 | ||
860 | if (mcast) { | |
861 | /* Destroy the send only entry */ | |
862 | list_del(&mcast->list); | |
863 | list_add_tail(&mcast->list, &remove_list); | |
864 | ||
865 | rb_replace_node(&mcast->rb_node, | |
866 | &nmcast->rb_node, | |
867 | &priv->multicast_tree); | |
868 | } else | |
869 | __ipoib_mcast_add(dev, nmcast); | |
870 | ||
871 | list_add_tail(&nmcast->list, &priv->multicast_list); | |
872 | } | |
873 | ||
874 | if (mcast) | |
875 | set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); | |
876 | } | |
877 | ||
878 | /* Remove all of the entries don't exist anymore */ | |
879 | list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { | |
880 | if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && | |
881 | !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
882 | ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n", | |
883 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
884 | ||
885 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | |
886 | ||
887 | /* Move to the remove list */ | |
888 | list_del(&mcast->list); | |
889 | list_add_tail(&mcast->list, &remove_list); | |
890 | } | |
891 | } | |
78bfe0b5 MT |
892 | |
893 | spin_unlock(&priv->lock); | |
894 | spin_unlock_irqrestore(&dev->xmit_lock, flags); | |
1da177e4 LT |
895 | |
896 | /* We have to cancel outside of the spinlock */ | |
897 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { | |
898 | ipoib_mcast_leave(mcast->dev, mcast); | |
899 | ipoib_mcast_free(mcast); | |
900 | } | |
901 | ||
902 | if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) | |
903 | ipoib_mcast_start_thread(dev); | |
904 | } | |
905 | ||
8ae5a8a2 RD |
906 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
907 | ||
1da177e4 LT |
908 | struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) |
909 | { | |
910 | struct ipoib_mcast_iter *iter; | |
911 | ||
912 | iter = kmalloc(sizeof *iter, GFP_KERNEL); | |
913 | if (!iter) | |
914 | return NULL; | |
915 | ||
916 | iter->dev = dev; | |
1732b0ef | 917 | memset(iter->mgid.raw, 0, 16); |
1da177e4 LT |
918 | |
919 | if (ipoib_mcast_iter_next(iter)) { | |
1732b0ef | 920 | kfree(iter); |
1da177e4 LT |
921 | return NULL; |
922 | } | |
923 | ||
924 | return iter; | |
925 | } | |
926 | ||
1da177e4 LT |
927 | int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) |
928 | { | |
929 | struct ipoib_dev_priv *priv = netdev_priv(iter->dev); | |
930 | struct rb_node *n; | |
931 | struct ipoib_mcast *mcast; | |
932 | int ret = 1; | |
933 | ||
934 | spin_lock_irq(&priv->lock); | |
935 | ||
936 | n = rb_first(&priv->multicast_tree); | |
937 | ||
938 | while (n) { | |
939 | mcast = rb_entry(n, struct ipoib_mcast, rb_node); | |
940 | ||
941 | if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, | |
942 | sizeof (union ib_gid)) < 0) { | |
943 | iter->mgid = mcast->mcmember.mgid; | |
944 | iter->created = mcast->created; | |
945 | iter->queuelen = skb_queue_len(&mcast->pkt_queue); | |
946 | iter->complete = !!mcast->ah; | |
947 | iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); | |
948 | ||
949 | ret = 0; | |
950 | ||
951 | break; | |
952 | } | |
953 | ||
954 | n = rb_next(n); | |
955 | } | |
956 | ||
957 | spin_unlock_irq(&priv->lock); | |
958 | ||
959 | return ret; | |
960 | } | |
961 | ||
962 | void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, | |
963 | union ib_gid *mgid, | |
964 | unsigned long *created, | |
965 | unsigned int *queuelen, | |
966 | unsigned int *complete, | |
967 | unsigned int *send_only) | |
968 | { | |
969 | *mgid = iter->mgid; | |
970 | *created = iter->created; | |
971 | *queuelen = iter->queuelen; | |
972 | *complete = iter->complete; | |
973 | *send_only = iter->send_only; | |
974 | } | |
8ae5a8a2 RD |
975 | |
976 | #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ |