Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
1dc563a6 | 30 | * Copyright (c) 2011, 2015, Intel Corporation. |
d7e09d03 PT |
31 | */ |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * lnet/klnds/o2iblnd/o2iblnd.c | |
37 | * | |
38 | * Author: Eric Barton <eric@bartonsoftware.com> | |
39 | */ | |
40 | ||
5f43264c | 41 | #include <asm/div64.h> |
d664d1fd JH |
42 | #include <asm/page.h> |
43 | #include "o2iblnd.h" | |
d7e09d03 | 44 | |
439b4d45 | 45 | static lnd_t the_o2iblnd; |
d7e09d03 | 46 | |
ec3d17c0 | 47 | kib_data_t kiblnd_data; |
d7e09d03 | 48 | |
febe73bd | 49 | static __u32 kiblnd_cksum(void *ptr, int nob) |
d7e09d03 | 50 | { |
ec3d17c0 MS |
51 | char *c = ptr; |
52 | __u32 sum = 0; | |
d7e09d03 PT |
53 | |
54 | while (nob-- > 0) | |
55 | sum = ((sum << 1) | (sum >> 31)) + *c++; | |
56 | ||
57 | /* ensure I don't return 0 (== no checksum) */ | |
5fd88337 | 58 | return !sum ? 1 : sum; |
d7e09d03 PT |
59 | } |
60 | ||
febe73bd | 61 | static char *kiblnd_msgtype2str(int type) |
d7e09d03 PT |
62 | { |
63 | switch (type) { | |
64 | case IBLND_MSG_CONNREQ: | |
65 | return "CONNREQ"; | |
66 | ||
67 | case IBLND_MSG_CONNACK: | |
68 | return "CONNACK"; | |
69 | ||
70 | case IBLND_MSG_NOOP: | |
71 | return "NOOP"; | |
72 | ||
73 | case IBLND_MSG_IMMEDIATE: | |
74 | return "IMMEDIATE"; | |
75 | ||
76 | case IBLND_MSG_PUT_REQ: | |
77 | return "PUT_REQ"; | |
78 | ||
79 | case IBLND_MSG_PUT_NAK: | |
80 | return "PUT_NAK"; | |
81 | ||
82 | case IBLND_MSG_PUT_ACK: | |
83 | return "PUT_ACK"; | |
84 | ||
85 | case IBLND_MSG_PUT_DONE: | |
86 | return "PUT_DONE"; | |
87 | ||
88 | case IBLND_MSG_GET_REQ: | |
89 | return "GET_REQ"; | |
90 | ||
91 | case IBLND_MSG_GET_DONE: | |
92 | return "GET_DONE"; | |
93 | ||
94 | default: | |
95 | return "???"; | |
96 | } | |
97 | } | |
98 | ||
febe73bd | 99 | static int kiblnd_msgtype2size(int type) |
d7e09d03 PT |
100 | { |
101 | const int hdr_size = offsetof(kib_msg_t, ibm_u); | |
102 | ||
103 | switch (type) { | |
104 | case IBLND_MSG_CONNREQ: | |
105 | case IBLND_MSG_CONNACK: | |
106 | return hdr_size + sizeof(kib_connparams_t); | |
107 | ||
108 | case IBLND_MSG_NOOP: | |
109 | return hdr_size; | |
110 | ||
111 | case IBLND_MSG_IMMEDIATE: | |
112 | return offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]); | |
113 | ||
114 | case IBLND_MSG_PUT_REQ: | |
115 | return hdr_size + sizeof(kib_putreq_msg_t); | |
116 | ||
117 | case IBLND_MSG_PUT_ACK: | |
118 | return hdr_size + sizeof(kib_putack_msg_t); | |
119 | ||
120 | case IBLND_MSG_GET_REQ: | |
121 | return hdr_size + sizeof(kib_get_msg_t); | |
122 | ||
123 | case IBLND_MSG_PUT_NAK: | |
124 | case IBLND_MSG_PUT_DONE: | |
125 | case IBLND_MSG_GET_DONE: | |
126 | return hdr_size + sizeof(kib_completion_msg_t); | |
127 | default: | |
128 | return -1; | |
129 | } | |
130 | } | |
131 | ||
febe73bd | 132 | static int kiblnd_unpack_rd(kib_msg_t *msg, int flip) |
d7e09d03 | 133 | { |
ec3d17c0 MS |
134 | kib_rdma_desc_t *rd; |
135 | int nob; | |
136 | int n; | |
137 | int i; | |
d7e09d03 | 138 | |
febe73bd | 139 | LASSERT(msg->ibm_type == IBLND_MSG_GET_REQ || |
c314c319 | 140 | msg->ibm_type == IBLND_MSG_PUT_ACK); |
d7e09d03 PT |
141 | |
142 | rd = msg->ibm_type == IBLND_MSG_GET_REQ ? | |
143 | &msg->ibm_u.get.ibgm_rd : | |
144 | &msg->ibm_u.putack.ibpam_rd; | |
145 | ||
146 | if (flip) { | |
147 | __swab32s(&rd->rd_key); | |
148 | __swab32s(&rd->rd_nfrags); | |
149 | } | |
150 | ||
151 | n = rd->rd_nfrags; | |
152 | ||
153 | if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) { | |
154 | CERROR("Bad nfrags: %d, should be 0 < n <= %d\n", | |
155 | n, IBLND_MAX_RDMA_FRAGS); | |
156 | return 1; | |
157 | } | |
158 | ||
febe73bd | 159 | nob = offsetof(kib_msg_t, ibm_u) + |
d7e09d03 PT |
160 | kiblnd_rd_msg_size(rd, msg->ibm_type, n); |
161 | ||
162 | if (msg->ibm_nob < nob) { | |
163 | CERROR("Short %s: %d(%d)\n", | |
164 | kiblnd_msgtype2str(msg->ibm_type), msg->ibm_nob, nob); | |
165 | return 1; | |
166 | } | |
167 | ||
168 | if (!flip) | |
169 | return 0; | |
170 | ||
171 | for (i = 0; i < n; i++) { | |
172 | __swab32s(&rd->rd_frags[i].rf_nob); | |
173 | __swab64s(&rd->rd_frags[i].rf_addr); | |
174 | } | |
175 | ||
176 | return 0; | |
177 | } | |
178 | ||
febe73bd GM |
179 | void kiblnd_pack_msg(lnet_ni_t *ni, kib_msg_t *msg, int version, |
180 | int credits, lnet_nid_t dstnid, __u64 dststamp) | |
d7e09d03 PT |
181 | { |
182 | kib_net_t *net = ni->ni_data; | |
183 | ||
4420cfd3 JS |
184 | /* |
185 | * CAVEAT EMPTOR! all message fields not set here should have been | |
186 | * initialised previously. | |
187 | */ | |
d7e09d03 PT |
188 | msg->ibm_magic = IBLND_MSG_MAGIC; |
189 | msg->ibm_version = version; | |
190 | /* ibm_type */ | |
191 | msg->ibm_credits = credits; | |
192 | /* ibm_nob */ | |
193 | msg->ibm_cksum = 0; | |
194 | msg->ibm_srcnid = ni->ni_nid; | |
195 | msg->ibm_srcstamp = net->ibn_incarnation; | |
196 | msg->ibm_dstnid = dstnid; | |
197 | msg->ibm_dststamp = dststamp; | |
198 | ||
199 | if (*kiblnd_tunables.kib_cksum) { | |
200 | /* NB ibm_cksum zero while computing cksum */ | |
201 | msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob); | |
202 | } | |
203 | } | |
204 | ||
febe73bd | 205 | int kiblnd_unpack_msg(kib_msg_t *msg, int nob) |
d7e09d03 PT |
206 | { |
207 | const int hdr_size = offsetof(kib_msg_t, ibm_u); | |
ec3d17c0 MS |
208 | __u32 msg_cksum; |
209 | __u16 version; | |
210 | int msg_nob; | |
211 | int flip; | |
d7e09d03 PT |
212 | |
213 | /* 6 bytes are enough to have received magic + version */ | |
214 | if (nob < 6) { | |
215 | CERROR("Short message: %d\n", nob); | |
216 | return -EPROTO; | |
217 | } | |
218 | ||
219 | if (msg->ibm_magic == IBLND_MSG_MAGIC) { | |
220 | flip = 0; | |
221 | } else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) { | |
222 | flip = 1; | |
223 | } else { | |
224 | CERROR("Bad magic: %08x\n", msg->ibm_magic); | |
225 | return -EPROTO; | |
226 | } | |
227 | ||
228 | version = flip ? __swab16(msg->ibm_version) : msg->ibm_version; | |
229 | if (version != IBLND_MSG_VERSION && | |
230 | version != IBLND_MSG_VERSION_1) { | |
231 | CERROR("Bad version: %x\n", version); | |
232 | return -EPROTO; | |
233 | } | |
234 | ||
235 | if (nob < hdr_size) { | |
236 | CERROR("Short message: %d\n", nob); | |
237 | return -EPROTO; | |
238 | } | |
239 | ||
240 | msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; | |
241 | if (msg_nob > nob) { | |
242 | CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); | |
243 | return -EPROTO; | |
244 | } | |
245 | ||
4420cfd3 JS |
246 | /* |
247 | * checksum must be computed with ibm_cksum zero and BEFORE anything | |
248 | * gets flipped | |
249 | */ | |
d7e09d03 PT |
250 | msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; |
251 | msg->ibm_cksum = 0; | |
5fd88337 | 252 | if (msg_cksum && |
d7e09d03 PT |
253 | msg_cksum != kiblnd_cksum(msg, msg_nob)) { |
254 | CERROR("Bad checksum\n"); | |
255 | return -EPROTO; | |
256 | } | |
257 | ||
258 | msg->ibm_cksum = msg_cksum; | |
259 | ||
260 | if (flip) { | |
261 | /* leave magic unflipped as a clue to peer endianness */ | |
262 | msg->ibm_version = version; | |
febe73bd GM |
263 | CLASSERT(sizeof(msg->ibm_type) == 1); |
264 | CLASSERT(sizeof(msg->ibm_credits) == 1); | |
d7e09d03 PT |
265 | msg->ibm_nob = msg_nob; |
266 | __swab64s(&msg->ibm_srcnid); | |
267 | __swab64s(&msg->ibm_srcstamp); | |
268 | __swab64s(&msg->ibm_dstnid); | |
269 | __swab64s(&msg->ibm_dststamp); | |
270 | } | |
271 | ||
272 | if (msg->ibm_srcnid == LNET_NID_ANY) { | |
273 | CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); | |
274 | return -EPROTO; | |
275 | } | |
276 | ||
277 | if (msg_nob < kiblnd_msgtype2size(msg->ibm_type)) { | |
278 | CERROR("Short %s: %d(%d)\n", kiblnd_msgtype2str(msg->ibm_type), | |
279 | msg_nob, kiblnd_msgtype2size(msg->ibm_type)); | |
280 | return -EPROTO; | |
281 | } | |
282 | ||
283 | switch (msg->ibm_type) { | |
284 | default: | |
285 | CERROR("Unknown message type %x\n", msg->ibm_type); | |
286 | return -EPROTO; | |
287 | ||
288 | case IBLND_MSG_NOOP: | |
289 | case IBLND_MSG_IMMEDIATE: | |
290 | case IBLND_MSG_PUT_REQ: | |
291 | break; | |
292 | ||
293 | case IBLND_MSG_PUT_ACK: | |
294 | case IBLND_MSG_GET_REQ: | |
295 | if (kiblnd_unpack_rd(msg, flip)) | |
296 | return -EPROTO; | |
297 | break; | |
298 | ||
299 | case IBLND_MSG_PUT_NAK: | |
300 | case IBLND_MSG_PUT_DONE: | |
301 | case IBLND_MSG_GET_DONE: | |
302 | if (flip) | |
303 | __swab32s(&msg->ibm_u.completion.ibcm_status); | |
304 | break; | |
305 | ||
306 | case IBLND_MSG_CONNREQ: | |
307 | case IBLND_MSG_CONNACK: | |
308 | if (flip) { | |
309 | __swab16s(&msg->ibm_u.connparams.ibcp_queue_depth); | |
310 | __swab16s(&msg->ibm_u.connparams.ibcp_max_frags); | |
311 | __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size); | |
312 | } | |
313 | break; | |
314 | } | |
315 | return 0; | |
316 | } | |
317 | ||
febe73bd | 318 | int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) |
d7e09d03 | 319 | { |
ec3d17c0 MS |
320 | kib_peer_t *peer; |
321 | kib_net_t *net = ni->ni_data; | |
322 | int cpt = lnet_cpt_of_nid(nid); | |
323 | unsigned long flags; | |
d7e09d03 | 324 | |
06ace26e | 325 | LASSERT(net); |
d7e09d03 PT |
326 | LASSERT(nid != LNET_NID_ANY); |
327 | ||
328 | LIBCFS_CPT_ALLOC(peer, lnet_cpt_table(), cpt, sizeof(*peer)); | |
06ace26e | 329 | if (!peer) { |
d7e09d03 PT |
330 | CERROR("Cannot allocate peer\n"); |
331 | return -ENOMEM; | |
332 | } | |
333 | ||
d7e09d03 PT |
334 | peer->ibp_ni = ni; |
335 | peer->ibp_nid = nid; | |
336 | peer->ibp_error = 0; | |
337 | peer->ibp_last_alive = 0; | |
338 | atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */ | |
339 | ||
340 | INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */ | |
341 | INIT_LIST_HEAD(&peer->ibp_conns); | |
342 | INIT_LIST_HEAD(&peer->ibp_tx_queue); | |
343 | ||
344 | write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); | |
345 | ||
346 | /* always called with a ref on ni, which prevents ni being shutdown */ | |
5fd88337 | 347 | LASSERT(!net->ibn_shutdown); |
d7e09d03 PT |
348 | |
349 | /* npeers only grows with the global lock held */ | |
350 | atomic_inc(&net->ibn_npeers); | |
351 | ||
352 | write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); | |
353 | ||
354 | *peerp = peer; | |
355 | return 0; | |
356 | } | |
357 | ||
febe73bd | 358 | void kiblnd_destroy_peer(kib_peer_t *peer) |
d7e09d03 PT |
359 | { |
360 | kib_net_t *net = peer->ibp_ni->ni_data; | |
361 | ||
06ace26e | 362 | LASSERT(net); |
5fd88337 | 363 | LASSERT(!atomic_read(&peer->ibp_refcount)); |
febe73bd | 364 | LASSERT(!kiblnd_peer_active(peer)); |
5fd88337 JS |
365 | LASSERT(!peer->ibp_connecting); |
366 | LASSERT(!peer->ibp_accepting); | |
febe73bd GM |
367 | LASSERT(list_empty(&peer->ibp_conns)); |
368 | LASSERT(list_empty(&peer->ibp_tx_queue)); | |
d7e09d03 PT |
369 | |
370 | LIBCFS_FREE(peer, sizeof(*peer)); | |
371 | ||
4420cfd3 JS |
372 | /* |
373 | * NB a peer's connections keep a reference on their peer until | |
d7e09d03 PT |
374 | * they are destroyed, so we can be assured that _all_ state to do |
375 | * with this peer has been cleaned up when its refcount drops to | |
4420cfd3 JS |
376 | * zero. |
377 | */ | |
d7e09d03 PT |
378 | atomic_dec(&net->ibn_npeers); |
379 | } | |
380 | ||
febe73bd | 381 | kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid) |
d7e09d03 | 382 | { |
4420cfd3 JS |
383 | /* |
384 | * the caller is responsible for accounting the additional reference | |
385 | * that this creates | |
386 | */ | |
ec3d17c0 MS |
387 | struct list_head *peer_list = kiblnd_nid2peerlist(nid); |
388 | struct list_head *tmp; | |
389 | kib_peer_t *peer; | |
d7e09d03 | 390 | |
febe73bd | 391 | list_for_each(tmp, peer_list) { |
d7e09d03 PT |
392 | peer = list_entry(tmp, kib_peer_t, ibp_list); |
393 | ||
febe73bd | 394 | LASSERT(peer->ibp_connecting > 0 || /* creating conns */ |
d7e09d03 PT |
395 | peer->ibp_accepting > 0 || |
396 | !list_empty(&peer->ibp_conns)); /* active conn */ | |
397 | ||
398 | if (peer->ibp_nid != nid) | |
399 | continue; | |
400 | ||
401 | CDEBUG(D_NET, "got peer [%p] -> %s (%d) version: %x\n", | |
402 | peer, libcfs_nid2str(nid), | |
403 | atomic_read(&peer->ibp_refcount), | |
404 | peer->ibp_version); | |
405 | return peer; | |
406 | } | |
407 | return NULL; | |
408 | } | |
409 | ||
febe73bd | 410 | void kiblnd_unlink_peer_locked(kib_peer_t *peer) |
d7e09d03 | 411 | { |
febe73bd | 412 | LASSERT(list_empty(&peer->ibp_conns)); |
d7e09d03 | 413 | |
febe73bd | 414 | LASSERT(kiblnd_peer_active(peer)); |
d7e09d03 PT |
415 | list_del_init(&peer->ibp_list); |
416 | /* lose peerlist's ref */ | |
417 | kiblnd_peer_decref(peer); | |
418 | } | |
419 | ||
febe73bd GM |
420 | static int kiblnd_get_peer_info(lnet_ni_t *ni, int index, |
421 | lnet_nid_t *nidp, int *count) | |
d7e09d03 | 422 | { |
ec3d17c0 MS |
423 | kib_peer_t *peer; |
424 | struct list_head *ptmp; | |
425 | int i; | |
426 | unsigned long flags; | |
d7e09d03 PT |
427 | |
428 | read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); | |
429 | ||
430 | for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { | |
febe73bd | 431 | list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { |
d7e09d03 | 432 | peer = list_entry(ptmp, kib_peer_t, ibp_list); |
febe73bd | 433 | LASSERT(peer->ibp_connecting > 0 || |
c314c319 JS |
434 | peer->ibp_accepting > 0 || |
435 | !list_empty(&peer->ibp_conns)); | |
d7e09d03 PT |
436 | |
437 | if (peer->ibp_ni != ni) | |
438 | continue; | |
439 | ||
440 | if (index-- > 0) | |
441 | continue; | |
442 | ||
443 | *nidp = peer->ibp_nid; | |
444 | *count = atomic_read(&peer->ibp_refcount); | |
445 | ||
446 | read_unlock_irqrestore(&kiblnd_data.kib_global_lock, | |
447 | flags); | |
448 | return 0; | |
449 | } | |
450 | } | |
451 | ||
452 | read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); | |
453 | return -ENOENT; | |
454 | } | |
455 | ||
febe73bd | 456 | static void kiblnd_del_peer_locked(kib_peer_t *peer) |
d7e09d03 | 457 | { |
ec3d17c0 MS |
458 | struct list_head *ctmp; |
459 | struct list_head *cnxt; | |
460 | kib_conn_t *conn; | |
d7e09d03 PT |
461 | |
462 | if (list_empty(&peer->ibp_conns)) { | |
463 | kiblnd_unlink_peer_locked(peer); | |
464 | } else { | |
febe73bd | 465 | list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) { |
d7e09d03 PT |
466 | conn = list_entry(ctmp, kib_conn_t, ibc_list); |
467 | ||
468 | kiblnd_close_conn_locked(conn, 0); | |
469 | } | |
470 | /* NB closing peer's last conn unlinked it. */ | |
471 | } | |
4420cfd3 JS |
472 | /* |
473 | * NB peer now unlinked; might even be freed if the peer table had the | |
474 | * last ref on it. | |
475 | */ | |
d7e09d03 PT |
476 | } |
477 | ||
febe73bd | 478 | static int kiblnd_del_peer(lnet_ni_t *ni, lnet_nid_t nid) |
d7e09d03 | 479 | { |
febe73bd | 480 | LIST_HEAD(zombies); |
ec3d17c0 MS |
481 | struct list_head *ptmp; |
482 | struct list_head *pnxt; | |
483 | kib_peer_t *peer; | |
484 | int lo; | |
485 | int hi; | |
486 | int i; | |
487 | unsigned long flags; | |
488 | int rc = -ENOENT; | |
d7e09d03 PT |
489 | |
490 | write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); | |
491 | ||
492 | if (nid != LNET_NID_ANY) { | |
d3d3d37a JS |
493 | lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; |
494 | hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; | |
d7e09d03 PT |
495 | } else { |
496 | lo = 0; | |
497 | hi = kiblnd_data.kib_peer_hash_size - 1; | |
498 | } | |
499 | ||
500 | for (i = lo; i <= hi; i++) { | |
febe73bd | 501 | list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { |
d7e09d03 | 502 | peer = list_entry(ptmp, kib_peer_t, ibp_list); |
febe73bd | 503 | LASSERT(peer->ibp_connecting > 0 || |
c314c319 JS |
504 | peer->ibp_accepting > 0 || |
505 | !list_empty(&peer->ibp_conns)); | |
d7e09d03 PT |
506 | |
507 | if (peer->ibp_ni != ni) | |
508 | continue; | |
509 | ||
510 | if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid)) | |
511 | continue; | |
512 | ||
513 | if (!list_empty(&peer->ibp_tx_queue)) { | |
febe73bd | 514 | LASSERT(list_empty(&peer->ibp_conns)); |
d7e09d03 PT |
515 | |
516 | list_splice_init(&peer->ibp_tx_queue, | |
c314c319 | 517 | &zombies); |
d7e09d03 PT |
518 | } |
519 | ||
520 | kiblnd_del_peer_locked(peer); | |
521 | rc = 0; /* matched something */ | |
522 | } | |
523 | } | |
524 | ||
525 | write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); | |
526 | ||
527 | kiblnd_txlist_done(ni, &zombies, -EIO); | |
528 | ||
529 | return rc; | |
530 | } | |
531 | ||
febe73bd | 532 | static kib_conn_t *kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index) |
d7e09d03 | 533 | { |
ec3d17c0 MS |
534 | kib_peer_t *peer; |
535 | struct list_head *ptmp; | |
536 | kib_conn_t *conn; | |
537 | struct list_head *ctmp; | |
538 | int i; | |
539 | unsigned long flags; | |
d7e09d03 PT |
540 | |
541 | read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); | |
542 | ||
543 | for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { | |
febe73bd | 544 | list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { |
d7e09d03 | 545 | peer = list_entry(ptmp, kib_peer_t, ibp_list); |
febe73bd | 546 | LASSERT(peer->ibp_connecting > 0 || |
c314c319 JS |
547 | peer->ibp_accepting > 0 || |
548 | !list_empty(&peer->ibp_conns)); | |
d7e09d03 PT |
549 | |
550 | if (peer->ibp_ni != ni) | |
551 | continue; | |
552 | ||
febe73bd | 553 | list_for_each(ctmp, &peer->ibp_conns) { |
d7e09d03 PT |
554 | if (index-- > 0) |
555 | continue; | |
556 | ||
557 | conn = list_entry(ctmp, kib_conn_t, | |
c314c319 | 558 | ibc_list); |
d7e09d03 | 559 | kiblnd_conn_addref(conn); |
7a3888a3 GM |
560 | read_unlock_irqrestore( |
561 | &kiblnd_data.kib_global_lock, | |
562 | flags); | |
d7e09d03 PT |
563 | return conn; |
564 | } | |
565 | } | |
566 | } | |
567 | ||
568 | read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); | |
569 | return NULL; | |
570 | } | |
571 | ||
febe73bd | 572 | int kiblnd_translate_mtu(int value) |
d7e09d03 PT |
573 | { |
574 | switch (value) { | |
575 | default: | |
576 | return -1; | |
577 | case 0: | |
578 | return 0; | |
579 | case 256: | |
580 | return IB_MTU_256; | |
581 | case 512: | |
582 | return IB_MTU_512; | |
583 | case 1024: | |
584 | return IB_MTU_1024; | |
585 | case 2048: | |
586 | return IB_MTU_2048; | |
587 | case 4096: | |
588 | return IB_MTU_4096; | |
589 | } | |
590 | } | |
591 | ||
febe73bd | 592 | static void kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid) |
d7e09d03 | 593 | { |
ec3d17c0 | 594 | int mtu; |
d7e09d03 PT |
595 | |
596 | /* XXX There is no path record for iWARP, set by netdev->change_mtu? */ | |
06ace26e | 597 | if (!cmid->route.path_rec) |
d7e09d03 PT |
598 | return; |
599 | ||
600 | mtu = kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu); | |
febe73bd | 601 | LASSERT(mtu >= 0); |
5fd88337 | 602 | if (mtu) |
d7e09d03 PT |
603 | cmid->route.path_rec->mtu = mtu; |
604 | } | |
605 | ||
febe73bd | 606 | static int kiblnd_get_completion_vector(kib_conn_t *conn, int cpt) |
d7e09d03 | 607 | { |
ec3d17c0 MS |
608 | cpumask_t *mask; |
609 | int vectors; | |
610 | int off; | |
611 | int i; | |
612 | lnet_nid_t nid = conn->ibc_peer->ibp_nid; | |
d7e09d03 PT |
613 | |
614 | vectors = conn->ibc_cmid->device->num_comp_vectors; | |
615 | if (vectors <= 1) | |
616 | return 0; | |
617 | ||
618 | mask = cfs_cpt_cpumask(lnet_cpt_table(), cpt); | |
06ace26e | 619 | if (!mask) |
3867ea5a | 620 | return 0; |
d7e09d03 PT |
621 | |
622 | /* hash NID to CPU id in this partition... */ | |
4a316f79 OD |
623 | off = do_div(nid, cpumask_weight(mask)); |
624 | for_each_cpu(i, mask) { | |
5fd88337 | 625 | if (!off--) |
d7e09d03 PT |
626 | return i % vectors; |
627 | } | |
628 | ||
629 | LBUG(); | |
630 | return 1; | |
631 | } | |
632 | ||
febe73bd | 633 | kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, |
c314c319 | 634 | int state, int version) |
d7e09d03 | 635 | { |
4420cfd3 JS |
636 | /* |
637 | * CAVEAT EMPTOR: | |
d7e09d03 PT |
638 | * If the new conn is created successfully it takes over the caller's |
639 | * ref on 'peer'. It also "owns" 'cmid' and destroys it when it itself | |
640 | * is destroyed. On failure, the caller's ref on 'peer' remains and | |
641 | * she must dispose of 'cmid'. (Actually I'd block forever if I tried | |
642 | * to destroy 'cmid' here since I'm called from the CM which still has | |
4420cfd3 JS |
643 | * its ref on 'cmid'). |
644 | */ | |
ec3d17c0 MS |
645 | rwlock_t *glock = &kiblnd_data.kib_global_lock; |
646 | kib_net_t *net = peer->ibp_ni->ni_data; | |
647 | kib_dev_t *dev; | |
d7e09d03 | 648 | struct ib_qp_init_attr *init_qp_attr; |
ec3d17c0 | 649 | struct kib_sched_info *sched; |
23908db4 | 650 | struct ib_cq_init_attr cq_attr = {}; |
ec3d17c0 MS |
651 | kib_conn_t *conn; |
652 | struct ib_cq *cq; | |
653 | unsigned long flags; | |
654 | int cpt; | |
655 | int rc; | |
656 | int i; | |
d7e09d03 | 657 | |
06ace26e | 658 | LASSERT(net); |
d7e09d03 PT |
659 | LASSERT(!in_interrupt()); |
660 | ||
661 | dev = net->ibn_dev; | |
662 | ||
663 | cpt = lnet_cpt_of_nid(peer->ibp_nid); | |
664 | sched = kiblnd_data.kib_scheds[cpt]; | |
665 | ||
666 | LASSERT(sched->ibs_nthreads > 0); | |
667 | ||
668 | LIBCFS_CPT_ALLOC(init_qp_attr, lnet_cpt_table(), cpt, | |
669 | sizeof(*init_qp_attr)); | |
06ace26e | 670 | if (!init_qp_attr) { |
d7e09d03 PT |
671 | CERROR("Can't allocate qp_attr for %s\n", |
672 | libcfs_nid2str(peer->ibp_nid)); | |
673 | goto failed_0; | |
674 | } | |
675 | ||
676 | LIBCFS_CPT_ALLOC(conn, lnet_cpt_table(), cpt, sizeof(*conn)); | |
06ace26e | 677 | if (!conn) { |
d7e09d03 PT |
678 | CERROR("Can't allocate connection for %s\n", |
679 | libcfs_nid2str(peer->ibp_nid)); | |
680 | goto failed_1; | |
681 | } | |
682 | ||
683 | conn->ibc_state = IBLND_CONN_INIT; | |
684 | conn->ibc_version = version; | |
685 | conn->ibc_peer = peer; /* I take the caller's ref */ | |
686 | cmid->context = conn; /* for future CM callbacks */ | |
687 | conn->ibc_cmid = cmid; | |
688 | ||
689 | INIT_LIST_HEAD(&conn->ibc_early_rxs); | |
690 | INIT_LIST_HEAD(&conn->ibc_tx_noops); | |
691 | INIT_LIST_HEAD(&conn->ibc_tx_queue); | |
692 | INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd); | |
693 | INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred); | |
694 | INIT_LIST_HEAD(&conn->ibc_active_txs); | |
695 | spin_lock_init(&conn->ibc_lock); | |
696 | ||
697 | LIBCFS_CPT_ALLOC(conn->ibc_connvars, lnet_cpt_table(), cpt, | |
698 | sizeof(*conn->ibc_connvars)); | |
06ace26e | 699 | if (!conn->ibc_connvars) { |
d7e09d03 PT |
700 | CERROR("Can't allocate in-progress connection state\n"); |
701 | goto failed_2; | |
702 | } | |
703 | ||
704 | write_lock_irqsave(glock, flags); | |
705 | if (dev->ibd_failover) { | |
706 | write_unlock_irqrestore(glock, flags); | |
707 | CERROR("%s: failover in progress\n", dev->ibd_ifname); | |
708 | goto failed_2; | |
709 | } | |
710 | ||
711 | if (dev->ibd_hdev->ibh_ibdev != cmid->device) { | |
712 | /* wakeup failover thread and teardown connection */ | |
713 | if (kiblnd_dev_can_failover(dev)) { | |
714 | list_add_tail(&dev->ibd_fail_list, | |
715 | &kiblnd_data.kib_failed_devs); | |
716 | wake_up(&kiblnd_data.kib_failover_waitq); | |
717 | } | |
718 | ||
719 | write_unlock_irqrestore(glock, flags); | |
720 | CERROR("cmid HCA(%s), kib_dev(%s) need failover\n", | |
721 | cmid->device->name, dev->ibd_ifname); | |
722 | goto failed_2; | |
723 | } | |
724 | ||
725 | kiblnd_hdev_addref_locked(dev->ibd_hdev); | |
726 | conn->ibc_hdev = dev->ibd_hdev; | |
727 | ||
728 | kiblnd_setup_mtu_locked(cmid); | |
729 | ||
730 | write_unlock_irqrestore(glock, flags); | |
731 | ||
732 | LIBCFS_CPT_ALLOC(conn->ibc_rxs, lnet_cpt_table(), cpt, | |
733 | IBLND_RX_MSGS(version) * sizeof(kib_rx_t)); | |
06ace26e | 734 | if (!conn->ibc_rxs) { |
d7e09d03 PT |
735 | CERROR("Cannot allocate RX buffers\n"); |
736 | goto failed_2; | |
737 | } | |
738 | ||
739 | rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt, | |
740 | IBLND_RX_MSG_PAGES(version)); | |
5fd88337 | 741 | if (rc) |
d7e09d03 PT |
742 | goto failed_2; |
743 | ||
744 | kiblnd_map_rx_descs(conn); | |
745 | ||
8e37210b MB |
746 | cq_attr.cqe = IBLND_CQ_ENTRIES(version); |
747 | cq_attr.comp_vector = kiblnd_get_completion_vector(conn, cpt); | |
d7e09d03 PT |
748 | cq = ib_create_cq(cmid->device, |
749 | kiblnd_cq_completion, kiblnd_cq_event, conn, | |
8e37210b | 750 | &cq_attr); |
d7e09d03 PT |
751 | if (IS_ERR(cq)) { |
752 | CERROR("Can't create CQ: %ld, cqe: %d\n", | |
753 | PTR_ERR(cq), IBLND_CQ_ENTRIES(version)); | |
754 | goto failed_2; | |
755 | } | |
756 | ||
757 | conn->ibc_cq = cq; | |
758 | ||
759 | rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); | |
5fd88337 | 760 | if (rc) { |
d7e09d03 PT |
761 | CERROR("Can't request completion notificiation: %d\n", rc); |
762 | goto failed_2; | |
763 | } | |
764 | ||
765 | init_qp_attr->event_handler = kiblnd_qp_event; | |
766 | init_qp_attr->qp_context = conn; | |
767 | init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(version); | |
768 | init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(version); | |
769 | init_qp_attr->cap.max_send_sge = 1; | |
770 | init_qp_attr->cap.max_recv_sge = 1; | |
771 | init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR; | |
772 | init_qp_attr->qp_type = IB_QPT_RC; | |
773 | init_qp_attr->send_cq = cq; | |
774 | init_qp_attr->recv_cq = cq; | |
775 | ||
776 | conn->ibc_sched = sched; | |
777 | ||
778 | rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr); | |
5fd88337 | 779 | if (rc) { |
d7e09d03 PT |
780 | CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d\n", |
781 | rc, init_qp_attr->cap.max_send_wr, | |
782 | init_qp_attr->cap.max_recv_wr); | |
783 | goto failed_2; | |
784 | } | |
785 | ||
786 | LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); | |
787 | ||
788 | /* 1 ref for caller and each rxmsg */ | |
789 | atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(version)); | |
790 | conn->ibc_nrx = IBLND_RX_MSGS(version); | |
791 | ||
792 | /* post receives */ | |
793 | for (i = 0; i < IBLND_RX_MSGS(version); i++) { | |
794 | rc = kiblnd_post_rx(&conn->ibc_rxs[i], | |
795 | IBLND_POSTRX_NO_CREDIT); | |
5fd88337 | 796 | if (rc) { |
d7e09d03 PT |
797 | CERROR("Can't post rxmsg: %d\n", rc); |
798 | ||
799 | /* Make posted receives complete */ | |
800 | kiblnd_abort_receives(conn); | |
801 | ||
4420cfd3 JS |
802 | /* |
803 | * correct # of posted buffers | |
804 | * NB locking needed now I'm racing with completion | |
805 | */ | |
d7e09d03 PT |
806 | spin_lock_irqsave(&sched->ibs_lock, flags); |
807 | conn->ibc_nrx -= IBLND_RX_MSGS(version) - i; | |
808 | spin_unlock_irqrestore(&sched->ibs_lock, flags); | |
809 | ||
4420cfd3 JS |
810 | /* |
811 | * cmid will be destroyed by CM(ofed) after cm_callback | |
d7e09d03 | 812 | * returned, so we can't refer it anymore |
4420cfd3 JS |
813 | * (by kiblnd_connd()->kiblnd_destroy_conn) |
814 | */ | |
d7e09d03 PT |
815 | rdma_destroy_qp(conn->ibc_cmid); |
816 | conn->ibc_cmid = NULL; | |
817 | ||
818 | /* Drop my own and unused rxbuffer refcounts */ | |
819 | while (i++ <= IBLND_RX_MSGS(version)) | |
820 | kiblnd_conn_decref(conn); | |
821 | ||
822 | return NULL; | |
823 | } | |
824 | } | |
825 | ||
826 | /* Init successful! */ | |
febe73bd | 827 | LASSERT(state == IBLND_CONN_ACTIVE_CONNECT || |
c314c319 | 828 | state == IBLND_CONN_PASSIVE_WAIT); |
d7e09d03 PT |
829 | conn->ibc_state = state; |
830 | ||
831 | /* 1 more conn */ | |
832 | atomic_inc(&net->ibn_nconns); | |
833 | return conn; | |
834 | ||
835 | failed_2: | |
836 | kiblnd_destroy_conn(conn); | |
837 | failed_1: | |
838 | LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); | |
839 | failed_0: | |
840 | return NULL; | |
841 | } | |
842 | ||
febe73bd | 843 | void kiblnd_destroy_conn(kib_conn_t *conn) |
d7e09d03 PT |
844 | { |
845 | struct rdma_cm_id *cmid = conn->ibc_cmid; | |
ec3d17c0 MS |
846 | kib_peer_t *peer = conn->ibc_peer; |
847 | int rc; | |
d7e09d03 | 848 | |
febe73bd | 849 | LASSERT(!in_interrupt()); |
5fd88337 | 850 | LASSERT(!atomic_read(&conn->ibc_refcount)); |
febe73bd GM |
851 | LASSERT(list_empty(&conn->ibc_early_rxs)); |
852 | LASSERT(list_empty(&conn->ibc_tx_noops)); | |
853 | LASSERT(list_empty(&conn->ibc_tx_queue)); | |
854 | LASSERT(list_empty(&conn->ibc_tx_queue_rsrvd)); | |
855 | LASSERT(list_empty(&conn->ibc_tx_queue_nocred)); | |
856 | LASSERT(list_empty(&conn->ibc_active_txs)); | |
5fd88337 JS |
857 | LASSERT(!conn->ibc_noops_posted); |
858 | LASSERT(!conn->ibc_nsends_posted); | |
d7e09d03 PT |
859 | |
860 | switch (conn->ibc_state) { | |
861 | default: | |
862 | /* conn must be completely disengaged from the network */ | |
863 | LBUG(); | |
864 | ||
865 | case IBLND_CONN_DISCONNECTED: | |
866 | /* connvars should have been freed already */ | |
06ace26e | 867 | LASSERT(!conn->ibc_connvars); |
d7e09d03 PT |
868 | break; |
869 | ||
870 | case IBLND_CONN_INIT: | |
871 | break; | |
872 | } | |
873 | ||
874 | /* conn->ibc_cmid might be destroyed by CM already */ | |
06ace26e | 875 | if (cmid && cmid->qp) |
d7e09d03 PT |
876 | rdma_destroy_qp(cmid); |
877 | ||
06ace26e | 878 | if (conn->ibc_cq) { |
d7e09d03 | 879 | rc = ib_destroy_cq(conn->ibc_cq); |
5fd88337 | 880 | if (rc) |
d7e09d03 PT |
881 | CWARN("Error destroying CQ: %d\n", rc); |
882 | } | |
883 | ||
06ace26e | 884 | if (conn->ibc_rx_pages) |
d7e09d03 PT |
885 | kiblnd_unmap_rx_descs(conn); |
886 | ||
06ace26e | 887 | if (conn->ibc_rxs) { |
d7e09d03 | 888 | LIBCFS_FREE(conn->ibc_rxs, |
7a3888a3 GM |
889 | IBLND_RX_MSGS(conn->ibc_version) |
890 | * sizeof(kib_rx_t)); | |
d7e09d03 PT |
891 | } |
892 | ||
06ace26e | 893 | if (conn->ibc_connvars) |
d7e09d03 PT |
894 | LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); |
895 | ||
06ace26e | 896 | if (conn->ibc_hdev) |
d7e09d03 PT |
897 | kiblnd_hdev_decref(conn->ibc_hdev); |
898 | ||
899 | /* See CAVEAT EMPTOR above in kiblnd_create_conn */ | |
900 | if (conn->ibc_state != IBLND_CONN_INIT) { | |
901 | kib_net_t *net = peer->ibp_ni->ni_data; | |
902 | ||
903 | kiblnd_peer_decref(peer); | |
904 | rdma_destroy_id(cmid); | |
905 | atomic_dec(&net->ibn_nconns); | |
906 | } | |
907 | ||
908 | LIBCFS_FREE(conn, sizeof(*conn)); | |
909 | } | |
910 | ||
febe73bd | 911 | int kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why) |
d7e09d03 | 912 | { |
ec3d17c0 MS |
913 | kib_conn_t *conn; |
914 | struct list_head *ctmp; | |
915 | struct list_head *cnxt; | |
916 | int count = 0; | |
d7e09d03 | 917 | |
febe73bd | 918 | list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) { |
d7e09d03 PT |
919 | conn = list_entry(ctmp, kib_conn_t, ibc_list); |
920 | ||
2d00bd17 | 921 | CDEBUG(D_NET, "Closing conn -> %s, version: %x, reason: %d\n", |
d7e09d03 PT |
922 | libcfs_nid2str(peer->ibp_nid), |
923 | conn->ibc_version, why); | |
924 | ||
925 | kiblnd_close_conn_locked(conn, why); | |
926 | count++; | |
927 | } | |
928 | ||
929 | return count; | |
930 | } | |
931 | ||
febe73bd | 932 | int kiblnd_close_stale_conns_locked(kib_peer_t *peer, |
c314c319 | 933 | int version, __u64 incarnation) |
d7e09d03 | 934 | { |
ec3d17c0 MS |
935 | kib_conn_t *conn; |
936 | struct list_head *ctmp; | |
937 | struct list_head *cnxt; | |
938 | int count = 0; | |
d7e09d03 | 939 | |
febe73bd | 940 | list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) { |
d7e09d03 PT |
941 | conn = list_entry(ctmp, kib_conn_t, ibc_list); |
942 | ||
943 | if (conn->ibc_version == version && | |
944 | conn->ibc_incarnation == incarnation) | |
945 | continue; | |
946 | ||
7a3888a3 GM |
947 | CDEBUG(D_NET, |
948 | "Closing stale conn -> %s version: %x, incarnation:%#llx(%x, %#llx)\n", | |
d7e09d03 PT |
949 | libcfs_nid2str(peer->ibp_nid), |
950 | conn->ibc_version, conn->ibc_incarnation, | |
951 | version, incarnation); | |
952 | ||
953 | kiblnd_close_conn_locked(conn, -ESTALE); | |
954 | count++; | |
955 | } | |
956 | ||
957 | return count; | |
958 | } | |
959 | ||
febe73bd | 960 | static int kiblnd_close_matching_conns(lnet_ni_t *ni, lnet_nid_t nid) |
d7e09d03 | 961 | { |
ec3d17c0 MS |
962 | kib_peer_t *peer; |
963 | struct list_head *ptmp; | |
964 | struct list_head *pnxt; | |
965 | int lo; | |
966 | int hi; | |
967 | int i; | |
968 | unsigned long flags; | |
969 | int count = 0; | |
d7e09d03 PT |
970 | |
971 | write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); | |
972 | ||
d3d3d37a JS |
973 | if (nid != LNET_NID_ANY) { |
974 | lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; | |
975 | hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; | |
976 | } else { | |
d7e09d03 PT |
977 | lo = 0; |
978 | hi = kiblnd_data.kib_peer_hash_size - 1; | |
979 | } | |
980 | ||
981 | for (i = lo; i <= hi; i++) { | |
febe73bd | 982 | list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { |
d7e09d03 | 983 | peer = list_entry(ptmp, kib_peer_t, ibp_list); |
febe73bd | 984 | LASSERT(peer->ibp_connecting > 0 || |
c314c319 JS |
985 | peer->ibp_accepting > 0 || |
986 | !list_empty(&peer->ibp_conns)); | |
d7e09d03 PT |
987 | |
988 | if (peer->ibp_ni != ni) | |
989 | continue; | |
990 | ||
991 | if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid)) | |
992 | continue; | |
993 | ||
994 | count += kiblnd_close_peer_conns_locked(peer, 0); | |
995 | } | |
996 | } | |
997 | ||
998 | write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); | |
999 | ||
1000 | /* wildcards always succeed */ | |
1001 | if (nid == LNET_NID_ANY) | |
1002 | return 0; | |
1003 | ||
5fd88337 | 1004 | return !count ? -ENOENT : 0; |
d7e09d03 PT |
1005 | } |
1006 | ||
439b4d45 | 1007 | static int kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) |
d7e09d03 PT |
1008 | { |
1009 | struct libcfs_ioctl_data *data = arg; | |
ec3d17c0 | 1010 | int rc = -EINVAL; |
d7e09d03 | 1011 | |
a58a38ac | 1012 | switch (cmd) { |
d7e09d03 | 1013 | case IOC_LIBCFS_GET_PEER: { |
ec3d17c0 MS |
1014 | lnet_nid_t nid = 0; |
1015 | int count = 0; | |
d7e09d03 PT |
1016 | |
1017 | rc = kiblnd_get_peer_info(ni, data->ioc_count, | |
1018 | &nid, &count); | |
ec3d17c0 MS |
1019 | data->ioc_nid = nid; |
1020 | data->ioc_count = count; | |
d7e09d03 PT |
1021 | break; |
1022 | } | |
1023 | ||
1024 | case IOC_LIBCFS_DEL_PEER: { | |
1025 | rc = kiblnd_del_peer(ni, data->ioc_nid); | |
1026 | break; | |
1027 | } | |
1028 | case IOC_LIBCFS_GET_CONN: { | |
1029 | kib_conn_t *conn; | |
1030 | ||
1031 | rc = 0; | |
1032 | conn = kiblnd_get_conn_by_idx(ni, data->ioc_count); | |
06ace26e | 1033 | if (!conn) { |
d7e09d03 PT |
1034 | rc = -ENOENT; |
1035 | break; | |
1036 | } | |
1037 | ||
06ace26e | 1038 | LASSERT(conn->ibc_cmid); |
d7e09d03 | 1039 | data->ioc_nid = conn->ibc_peer->ibp_nid; |
06ace26e | 1040 | if (!conn->ibc_cmid->route.path_rec) |
d7e09d03 PT |
1041 | data->ioc_u32[0] = 0; /* iWarp has no path MTU */ |
1042 | else | |
1043 | data->ioc_u32[0] = | |
1044 | ib_mtu_enum_to_int(conn->ibc_cmid->route.path_rec->mtu); | |
1045 | kiblnd_conn_decref(conn); | |
1046 | break; | |
1047 | } | |
1048 | case IOC_LIBCFS_CLOSE_CONNECTION: { | |
1049 | rc = kiblnd_close_matching_conns(ni, data->ioc_nid); | |
1050 | break; | |
1051 | } | |
1052 | ||
1053 | default: | |
1054 | break; | |
1055 | } | |
1056 | ||
1057 | return rc; | |
1058 | } | |
1059 | ||
439b4d45 | 1060 | static void kiblnd_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when) |
d7e09d03 | 1061 | { |
ec3d17c0 MS |
1062 | unsigned long last_alive = 0; |
1063 | unsigned long now = cfs_time_current(); | |
1064 | rwlock_t *glock = &kiblnd_data.kib_global_lock; | |
1065 | kib_peer_t *peer; | |
1066 | unsigned long flags; | |
d7e09d03 PT |
1067 | |
1068 | read_lock_irqsave(glock, flags); | |
1069 | ||
1070 | peer = kiblnd_find_peer_locked(nid); | |
06ace26e | 1071 | if (peer) { |
febe73bd | 1072 | LASSERT(peer->ibp_connecting > 0 || /* creating conns */ |
d7e09d03 PT |
1073 | peer->ibp_accepting > 0 || |
1074 | !list_empty(&peer->ibp_conns)); /* active conn */ | |
1075 | last_alive = peer->ibp_last_alive; | |
1076 | } | |
1077 | ||
1078 | read_unlock_irqrestore(glock, flags); | |
1079 | ||
5fd88337 | 1080 | if (last_alive) |
d7e09d03 PT |
1081 | *when = last_alive; |
1082 | ||
4420cfd3 JS |
1083 | /* |
1084 | * peer is not persistent in hash, trigger peer creation | |
1085 | * and connection establishment with a NULL tx | |
1086 | */ | |
06ace26e | 1087 | if (!peer) |
d7e09d03 PT |
1088 | kiblnd_launch_tx(ni, NULL, nid); |
1089 | ||
1090 | CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago\n", | |
1091 | libcfs_nid2str(nid), peer, | |
1092 | last_alive ? cfs_duration_sec(now - last_alive) : -1); | |
d7e09d03 PT |
1093 | } |
1094 | ||
439b4d45 | 1095 | static void kiblnd_free_pages(kib_pages_t *p) |
d7e09d03 | 1096 | { |
ec3d17c0 MS |
1097 | int npages = p->ibp_npages; |
1098 | int i; | |
d7e09d03 PT |
1099 | |
1100 | for (i = 0; i < npages; i++) { | |
06ace26e | 1101 | if (p->ibp_pages[i]) |
d7e09d03 PT |
1102 | __free_page(p->ibp_pages[i]); |
1103 | } | |
1104 | ||
1105 | LIBCFS_FREE(p, offsetof(kib_pages_t, ibp_pages[npages])); | |
1106 | } | |
1107 | ||
febe73bd | 1108 | int kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages) |
d7e09d03 | 1109 | { |
ec3d17c0 MS |
1110 | kib_pages_t *p; |
1111 | int i; | |
d7e09d03 PT |
1112 | |
1113 | LIBCFS_CPT_ALLOC(p, lnet_cpt_table(), cpt, | |
1114 | offsetof(kib_pages_t, ibp_pages[npages])); | |
06ace26e | 1115 | if (!p) { |
d7e09d03 PT |
1116 | CERROR("Can't allocate descriptor for %d pages\n", npages); |
1117 | return -ENOMEM; | |
1118 | } | |
1119 | ||
1120 | memset(p, 0, offsetof(kib_pages_t, ibp_pages[npages])); | |
1121 | p->ibp_npages = npages; | |
1122 | ||
1123 | for (i = 0; i < npages; i++) { | |
49c02a75 PT |
1124 | p->ibp_pages[i] = alloc_pages_node( |
1125 | cfs_cpt_spread_node(lnet_cpt_table(), cpt), | |
0be19afa | 1126 | GFP_NOFS, 0); |
06ace26e | 1127 | if (!p->ibp_pages[i]) { |
d7e09d03 PT |
1128 | CERROR("Can't allocate page %d of %d\n", i, npages); |
1129 | kiblnd_free_pages(p); | |
1130 | return -ENOMEM; | |
1131 | } | |
1132 | } | |
1133 | ||
1134 | *pp = p; | |
1135 | return 0; | |
1136 | } | |
1137 | ||
febe73bd | 1138 | void kiblnd_unmap_rx_descs(kib_conn_t *conn) |
d7e09d03 PT |
1139 | { |
1140 | kib_rx_t *rx; | |
ec3d17c0 | 1141 | int i; |
d7e09d03 | 1142 | |
06ace26e JS |
1143 | LASSERT(conn->ibc_rxs); |
1144 | LASSERT(conn->ibc_hdev); | |
d7e09d03 PT |
1145 | |
1146 | for (i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++) { | |
1147 | rx = &conn->ibc_rxs[i]; | |
1148 | ||
febe73bd | 1149 | LASSERT(rx->rx_nob >= 0); /* not posted */ |
d7e09d03 PT |
1150 | |
1151 | kiblnd_dma_unmap_single(conn->ibc_hdev->ibh_ibdev, | |
1152 | KIBLND_UNMAP_ADDR(rx, rx_msgunmap, | |
1153 | rx->rx_msgaddr), | |
1154 | IBLND_MSG_SIZE, DMA_FROM_DEVICE); | |
1155 | } | |
1156 | ||
1157 | kiblnd_free_pages(conn->ibc_rx_pages); | |
1158 | ||
1159 | conn->ibc_rx_pages = NULL; | |
1160 | } | |
1161 | ||
febe73bd | 1162 | void kiblnd_map_rx_descs(kib_conn_t *conn) |
d7e09d03 | 1163 | { |
ec3d17c0 MS |
1164 | kib_rx_t *rx; |
1165 | struct page *pg; | |
1166 | int pg_off; | |
1167 | int ipg; | |
1168 | int i; | |
d7e09d03 | 1169 | |
ec3d17c0 | 1170 | for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++) { |
d7e09d03 PT |
1171 | pg = conn->ibc_rx_pages->ibp_pages[ipg]; |
1172 | rx = &conn->ibc_rxs[i]; | |
1173 | ||
1174 | rx->rx_conn = conn; | |
1175 | rx->rx_msg = (kib_msg_t *)(((char *)page_address(pg)) + pg_off); | |
1176 | ||
1177 | rx->rx_msgaddr = kiblnd_dma_map_single(conn->ibc_hdev->ibh_ibdev, | |
7a3888a3 GM |
1178 | rx->rx_msg, |
1179 | IBLND_MSG_SIZE, | |
d7e09d03 | 1180 | DMA_FROM_DEVICE); |
febe73bd | 1181 | LASSERT(!kiblnd_dma_mapping_error(conn->ibc_hdev->ibh_ibdev, |
c314c319 | 1182 | rx->rx_msgaddr)); |
d7e09d03 PT |
1183 | KIBLND_UNMAP_ADDR_SET(rx, rx_msgunmap, rx->rx_msgaddr); |
1184 | ||
1d8cb70c | 1185 | CDEBUG(D_NET, "rx %d: %p %#llx(%#llx)\n", |
d7e09d03 | 1186 | i, rx->rx_msg, rx->rx_msgaddr, |
d664d1fd | 1187 | (__u64)(page_to_phys(pg) + pg_off)); |
d7e09d03 PT |
1188 | |
1189 | pg_off += IBLND_MSG_SIZE; | |
febe73bd | 1190 | LASSERT(pg_off <= PAGE_SIZE); |
d7e09d03 PT |
1191 | |
1192 | if (pg_off == PAGE_SIZE) { | |
1193 | pg_off = 0; | |
1194 | ipg++; | |
febe73bd | 1195 | LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn->ibc_version)); |
d7e09d03 PT |
1196 | } |
1197 | } | |
1198 | } | |
1199 | ||
febe73bd | 1200 | static void kiblnd_unmap_tx_pool(kib_tx_pool_t *tpo) |
d7e09d03 | 1201 | { |
ec3d17c0 MS |
1202 | kib_hca_dev_t *hdev = tpo->tpo_hdev; |
1203 | kib_tx_t *tx; | |
1204 | int i; | |
d7e09d03 | 1205 | |
5fd88337 | 1206 | LASSERT(!tpo->tpo_pool.po_allocated); |
d7e09d03 | 1207 | |
06ace26e | 1208 | if (!hdev) |
d7e09d03 PT |
1209 | return; |
1210 | ||
1211 | for (i = 0; i < tpo->tpo_pool.po_size; i++) { | |
1212 | tx = &tpo->tpo_tx_descs[i]; | |
1213 | kiblnd_dma_unmap_single(hdev->ibh_ibdev, | |
1214 | KIBLND_UNMAP_ADDR(tx, tx_msgunmap, | |
1215 | tx->tx_msgaddr), | |
1216 | IBLND_MSG_SIZE, DMA_TO_DEVICE); | |
1217 | } | |
1218 | ||
1219 | kiblnd_hdev_decref(hdev); | |
1220 | tpo->tpo_hdev = NULL; | |
1221 | } | |
1222 | ||
febe73bd | 1223 | static kib_hca_dev_t *kiblnd_current_hdev(kib_dev_t *dev) |
d7e09d03 PT |
1224 | { |
1225 | kib_hca_dev_t *hdev; | |
ec3d17c0 MS |
1226 | unsigned long flags; |
1227 | int i = 0; | |
d7e09d03 PT |
1228 | |
1229 | read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); | |
1230 | while (dev->ibd_failover) { | |
1231 | read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); | |
5fd88337 | 1232 | if (!(i++ % 50)) |
d7e09d03 PT |
1233 | CDEBUG(D_NET, "%s: Wait for failover\n", |
1234 | dev->ibd_ifname); | |
1235 | schedule_timeout(cfs_time_seconds(1) / 100); | |
1236 | ||
1237 | read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); | |
1238 | } | |
1239 | ||
1240 | kiblnd_hdev_addref_locked(dev->ibd_hdev); | |
1241 | hdev = dev->ibd_hdev; | |
1242 | ||
1243 | read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); | |
1244 | ||
1245 | return hdev; | |
1246 | } | |
1247 | ||
febe73bd | 1248 | static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo) |
d7e09d03 | 1249 | { |
ec3d17c0 MS |
1250 | kib_pages_t *txpgs = tpo->tpo_tx_pages; |
1251 | kib_pool_t *pool = &tpo->tpo_pool; | |
1252 | kib_net_t *net = pool->po_owner->ps_net; | |
1253 | kib_dev_t *dev; | |
1254 | struct page *page; | |
1255 | kib_tx_t *tx; | |
1256 | int page_offset; | |
1257 | int ipage; | |
1258 | int i; | |
d7e09d03 | 1259 | |
06ace26e | 1260 | LASSERT(net); |
d7e09d03 PT |
1261 | |
1262 | dev = net->ibn_dev; | |
1263 | ||
1264 | /* pre-mapped messages are not bigger than 1 page */ | |
febe73bd | 1265 | CLASSERT(IBLND_MSG_SIZE <= PAGE_SIZE); |
d7e09d03 PT |
1266 | |
1267 | /* No fancy arithmetic when we do the buffer calculations */ | |
5fd88337 | 1268 | CLASSERT(!(PAGE_SIZE % IBLND_MSG_SIZE)); |
d7e09d03 PT |
1269 | |
1270 | tpo->tpo_hdev = kiblnd_current_hdev(dev); | |
1271 | ||
1272 | for (ipage = page_offset = i = 0; i < pool->po_size; i++) { | |
1273 | page = txpgs->ibp_pages[ipage]; | |
1274 | tx = &tpo->tpo_tx_descs[i]; | |
1275 | ||
1276 | tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + | |
1277 | page_offset); | |
1278 | ||
1279 | tx->tx_msgaddr = kiblnd_dma_map_single( | |
1280 | tpo->tpo_hdev->ibh_ibdev, tx->tx_msg, | |
1281 | IBLND_MSG_SIZE, DMA_TO_DEVICE); | |
febe73bd | 1282 | LASSERT(!kiblnd_dma_mapping_error(tpo->tpo_hdev->ibh_ibdev, |
c314c319 | 1283 | tx->tx_msgaddr)); |
d7e09d03 PT |
1284 | KIBLND_UNMAP_ADDR_SET(tx, tx_msgunmap, tx->tx_msgaddr); |
1285 | ||
1286 | list_add(&tx->tx_list, &pool->po_free_list); | |
1287 | ||
1288 | page_offset += IBLND_MSG_SIZE; | |
febe73bd | 1289 | LASSERT(page_offset <= PAGE_SIZE); |
d7e09d03 PT |
1290 | |
1291 | if (page_offset == PAGE_SIZE) { | |
1292 | page_offset = 0; | |
1293 | ipage++; | |
febe73bd | 1294 | LASSERT(ipage <= txpgs->ibp_npages); |
d7e09d03 PT |
1295 | } |
1296 | } | |
1297 | } | |
1298 | ||
febe73bd | 1299 | struct ib_mr *kiblnd_find_dma_mr(kib_hca_dev_t *hdev, __u64 addr, __u64 size) |
d7e09d03 | 1300 | { |
ec3d17c0 | 1301 | __u64 index; |
d7e09d03 | 1302 | |
06ace26e | 1303 | LASSERT(hdev->ibh_mrs[0]); |
d7e09d03 PT |
1304 | |
1305 | if (hdev->ibh_nmrs == 1) | |
1306 | return hdev->ibh_mrs[0]; | |
1307 | ||
1308 | index = addr >> hdev->ibh_mr_shift; | |
1309 | ||
1310 | if (index < hdev->ibh_nmrs && | |
1311 | index == ((addr + size - 1) >> hdev->ibh_mr_shift)) | |
1312 | return hdev->ibh_mrs[index]; | |
1313 | ||
1314 | return NULL; | |
1315 | } | |
1316 | ||
febe73bd | 1317 | struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd) |
d7e09d03 PT |
1318 | { |
1319 | struct ib_mr *prev_mr; | |
1320 | struct ib_mr *mr; | |
ec3d17c0 | 1321 | int i; |
d7e09d03 | 1322 | |
06ace26e | 1323 | LASSERT(hdev->ibh_mrs[0]); |
d7e09d03 PT |
1324 | |
1325 | if (*kiblnd_tunables.kib_map_on_demand > 0 && | |
1326 | *kiblnd_tunables.kib_map_on_demand <= rd->rd_nfrags) | |
1327 | return NULL; | |
1328 | ||
1329 | if (hdev->ibh_nmrs == 1) | |
1330 | return hdev->ibh_mrs[0]; | |
1331 | ||
1332 | for (i = 0, mr = prev_mr = NULL; | |
1333 | i < rd->rd_nfrags; i++) { | |
1334 | mr = kiblnd_find_dma_mr(hdev, | |
1335 | rd->rd_frags[i].rf_addr, | |
1336 | rd->rd_frags[i].rf_nob); | |
06ace26e | 1337 | if (!prev_mr) |
d7e09d03 PT |
1338 | prev_mr = mr; |
1339 | ||
06ace26e | 1340 | if (!mr || prev_mr != mr) { |
d7e09d03 PT |
1341 | /* Can't covered by one single MR */ |
1342 | mr = NULL; | |
1343 | break; | |
1344 | } | |
1345 | } | |
1346 | ||
1347 | return mr; | |
1348 | } | |
1349 | ||
febe73bd | 1350 | static void kiblnd_destroy_fmr_pool(kib_fmr_pool_t *pool) |
d7e09d03 | 1351 | { |
5fd88337 | 1352 | LASSERT(!pool->fpo_map_count); |
d7e09d03 | 1353 | |
06ace26e | 1354 | if (pool->fpo_fmr_pool) |
d7e09d03 PT |
1355 | ib_destroy_fmr_pool(pool->fpo_fmr_pool); |
1356 | ||
06ace26e | 1357 | if (pool->fpo_hdev) |
d7e09d03 PT |
1358 | kiblnd_hdev_decref(pool->fpo_hdev); |
1359 | ||
a4e872f7 | 1360 | LIBCFS_FREE(pool, sizeof(*pool)); |
d7e09d03 PT |
1361 | } |
1362 | ||
febe73bd | 1363 | static void kiblnd_destroy_fmr_pool_list(struct list_head *head) |
d7e09d03 PT |
1364 | { |
1365 | kib_fmr_pool_t *pool; | |
1366 | ||
1367 | while (!list_empty(head)) { | |
1368 | pool = list_entry(head->next, kib_fmr_pool_t, fpo_list); | |
1369 | list_del(&pool->fpo_list); | |
1370 | kiblnd_destroy_fmr_pool(pool); | |
1371 | } | |
1372 | } | |
1373 | ||
1374 | static int kiblnd_fmr_pool_size(int ncpts) | |
1375 | { | |
1376 | int size = *kiblnd_tunables.kib_fmr_pool_size / ncpts; | |
1377 | ||
1378 | return max(IBLND_FMR_POOL, size); | |
1379 | } | |
1380 | ||
1381 | static int kiblnd_fmr_flush_trigger(int ncpts) | |
1382 | { | |
1383 | int size = *kiblnd_tunables.kib_fmr_flush_trigger / ncpts; | |
1384 | ||
1385 | return max(IBLND_FMR_POOL_FLUSH, size); | |
1386 | } | |
1387 | ||
febe73bd GM |
1388 | static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, |
1389 | kib_fmr_pool_t **pp_fpo) | |
d7e09d03 PT |
1390 | { |
1391 | /* FMR pool for RDMA */ | |
ec3d17c0 MS |
1392 | kib_dev_t *dev = fps->fps_net->ibn_dev; |
1393 | kib_fmr_pool_t *fpo; | |
d7e09d03 | 1394 | struct ib_fmr_pool_param param = { |
51078e25 | 1395 | .max_pages_per_fmr = LNET_MAX_PAYLOAD / PAGE_SIZE, |
ec3d17c0 MS |
1396 | .page_shift = PAGE_SHIFT, |
1397 | .access = (IB_ACCESS_LOCAL_WRITE | | |
e39f6efa | 1398 | IB_ACCESS_REMOTE_WRITE), |
ec3d17c0 | 1399 | .pool_size = fps->fps_pool_size, |
d7e09d03 PT |
1400 | .dirty_watermark = fps->fps_flush_trigger, |
1401 | .flush_function = NULL, | |
ec3d17c0 MS |
1402 | .flush_arg = NULL, |
1403 | .cache = !!*kiblnd_tunables.kib_fmr_cache}; | |
d7e09d03 PT |
1404 | int rc; |
1405 | ||
1406 | LIBCFS_CPT_ALLOC(fpo, lnet_cpt_table(), fps->fps_cpt, sizeof(*fpo)); | |
06ace26e | 1407 | if (!fpo) |
d7e09d03 PT |
1408 | return -ENOMEM; |
1409 | ||
1410 | fpo->fpo_hdev = kiblnd_current_hdev(dev); | |
1411 | ||
1412 | fpo->fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, ¶m); | |
1413 | if (IS_ERR(fpo->fpo_fmr_pool)) { | |
1414 | rc = PTR_ERR(fpo->fpo_fmr_pool); | |
1415 | CERROR("Failed to create FMR pool: %d\n", rc); | |
1416 | ||
1417 | kiblnd_hdev_decref(fpo->fpo_hdev); | |
a4e872f7 | 1418 | LIBCFS_FREE(fpo, sizeof(*fpo)); |
d7e09d03 PT |
1419 | return rc; |
1420 | } | |
1421 | ||
1422 | fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); | |
1423 | fpo->fpo_owner = fps; | |
1424 | *pp_fpo = fpo; | |
1425 | ||
1426 | return 0; | |
1427 | } | |
1428 | ||
febe73bd GM |
1429 | static void kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps, |
1430 | struct list_head *zombies) | |
d7e09d03 | 1431 | { |
06ace26e | 1432 | if (!fps->fps_net) /* intialized? */ |
d7e09d03 PT |
1433 | return; |
1434 | ||
1435 | spin_lock(&fps->fps_lock); | |
1436 | ||
1437 | while (!list_empty(&fps->fps_pool_list)) { | |
1438 | kib_fmr_pool_t *fpo = list_entry(fps->fps_pool_list.next, | |
1439 | kib_fmr_pool_t, fpo_list); | |
1440 | fpo->fpo_failed = 1; | |
1441 | list_del(&fpo->fpo_list); | |
5fd88337 | 1442 | if (!fpo->fpo_map_count) |
d7e09d03 PT |
1443 | list_add(&fpo->fpo_list, zombies); |
1444 | else | |
1445 | list_add(&fpo->fpo_list, &fps->fps_failed_pool_list); | |
1446 | } | |
1447 | ||
1448 | spin_unlock(&fps->fps_lock); | |
1449 | } | |
1450 | ||
febe73bd | 1451 | static void kiblnd_fini_fmr_poolset(kib_fmr_poolset_t *fps) |
d7e09d03 | 1452 | { |
06ace26e | 1453 | if (fps->fps_net) { /* initialized? */ |
d7e09d03 PT |
1454 | kiblnd_destroy_fmr_pool_list(&fps->fps_failed_pool_list); |
1455 | kiblnd_destroy_fmr_pool_list(&fps->fps_pool_list); | |
1456 | } | |
1457 | } | |
1458 | ||
7a3888a3 GM |
1459 | static int kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, |
1460 | kib_net_t *net, int pool_size, | |
1461 | int flush_trigger) | |
d7e09d03 PT |
1462 | { |
1463 | kib_fmr_pool_t *fpo; | |
ec3d17c0 | 1464 | int rc; |
d7e09d03 | 1465 | |
a4e872f7 | 1466 | memset(fps, 0, sizeof(*fps)); |
d7e09d03 PT |
1467 | |
1468 | fps->fps_net = net; | |
1469 | fps->fps_cpt = cpt; | |
1470 | fps->fps_pool_size = pool_size; | |
1471 | fps->fps_flush_trigger = flush_trigger; | |
1472 | spin_lock_init(&fps->fps_lock); | |
1473 | INIT_LIST_HEAD(&fps->fps_pool_list); | |
1474 | INIT_LIST_HEAD(&fps->fps_failed_pool_list); | |
1475 | ||
1476 | rc = kiblnd_create_fmr_pool(fps, &fpo); | |
5fd88337 | 1477 | if (!rc) |
d7e09d03 PT |
1478 | list_add_tail(&fpo->fpo_list, &fps->fps_pool_list); |
1479 | ||
1480 | return rc; | |
1481 | } | |
1482 | ||
febe73bd | 1483 | static int kiblnd_fmr_pool_is_idle(kib_fmr_pool_t *fpo, unsigned long now) |
d7e09d03 | 1484 | { |
5fd88337 | 1485 | if (fpo->fpo_map_count) /* still in use */ |
d7e09d03 PT |
1486 | return 0; |
1487 | if (fpo->fpo_failed) | |
1488 | return 1; | |
1489 | return cfs_time_aftereq(now, fpo->fpo_deadline); | |
1490 | } | |
1491 | ||
febe73bd | 1492 | void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status) |
d7e09d03 | 1493 | { |
febe73bd | 1494 | LIST_HEAD(zombies); |
ec3d17c0 | 1495 | kib_fmr_pool_t *fpo = fmr->fmr_pool; |
d7e09d03 | 1496 | kib_fmr_poolset_t *fps = fpo->fpo_owner; |
ec3d17c0 MS |
1497 | unsigned long now = cfs_time_current(); |
1498 | kib_fmr_pool_t *tmp; | |
1499 | int rc; | |
d7e09d03 PT |
1500 | |
1501 | rc = ib_fmr_pool_unmap(fmr->fmr_pfmr); | |
5fd88337 | 1502 | LASSERT(!rc); |
d7e09d03 | 1503 | |
5fd88337 | 1504 | if (status) { |
d7e09d03 | 1505 | rc = ib_flush_fmr_pool(fpo->fpo_fmr_pool); |
5fd88337 | 1506 | LASSERT(!rc); |
d7e09d03 PT |
1507 | } |
1508 | ||
1509 | fmr->fmr_pool = NULL; | |
1510 | fmr->fmr_pfmr = NULL; | |
1511 | ||
1512 | spin_lock(&fps->fps_lock); | |
74732797 | 1513 | fpo->fpo_map_count--; /* decref the pool */ |
d7e09d03 PT |
1514 | |
1515 | list_for_each_entry_safe(fpo, tmp, &fps->fps_pool_list, fpo_list) { | |
1516 | /* the first pool is persistent */ | |
1517 | if (fps->fps_pool_list.next == &fpo->fpo_list) | |
1518 | continue; | |
1519 | ||
1520 | if (kiblnd_fmr_pool_is_idle(fpo, now)) { | |
1521 | list_move(&fpo->fpo_list, &zombies); | |
74732797 | 1522 | fps->fps_version++; |
d7e09d03 PT |
1523 | } |
1524 | } | |
1525 | spin_unlock(&fps->fps_lock); | |
1526 | ||
1527 | if (!list_empty(&zombies)) | |
1528 | kiblnd_destroy_fmr_pool_list(&zombies); | |
1529 | } | |
1530 | ||
febe73bd GM |
1531 | int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, |
1532 | __u64 iov, kib_fmr_t *fmr) | |
d7e09d03 PT |
1533 | { |
1534 | struct ib_pool_fmr *pfmr; | |
ec3d17c0 MS |
1535 | kib_fmr_pool_t *fpo; |
1536 | __u64 version; | |
1537 | int rc; | |
d7e09d03 PT |
1538 | |
1539 | again: | |
1540 | spin_lock(&fps->fps_lock); | |
1541 | version = fps->fps_version; | |
1542 | list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) { | |
1543 | fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); | |
1544 | fpo->fpo_map_count++; | |
1545 | spin_unlock(&fps->fps_lock); | |
1546 | ||
1547 | pfmr = ib_fmr_pool_map_phys(fpo->fpo_fmr_pool, | |
1548 | pages, npages, iov); | |
1549 | if (likely(!IS_ERR(pfmr))) { | |
1550 | fmr->fmr_pool = fpo; | |
1551 | fmr->fmr_pfmr = pfmr; | |
1552 | return 0; | |
1553 | } | |
1554 | ||
1555 | spin_lock(&fps->fps_lock); | |
1556 | fpo->fpo_map_count--; | |
1557 | if (PTR_ERR(pfmr) != -EAGAIN) { | |
1558 | spin_unlock(&fps->fps_lock); | |
1559 | return PTR_ERR(pfmr); | |
1560 | } | |
1561 | ||
1562 | /* EAGAIN and ... */ | |
1563 | if (version != fps->fps_version) { | |
1564 | spin_unlock(&fps->fps_lock); | |
1565 | goto again; | |
1566 | } | |
1567 | } | |
1568 | ||
1569 | if (fps->fps_increasing) { | |
1570 | spin_unlock(&fps->fps_lock); | |
c314c319 | 1571 | CDEBUG(D_NET, "Another thread is allocating new FMR pool, waiting for her to complete\n"); |
d7e09d03 PT |
1572 | schedule(); |
1573 | goto again; | |
d7e09d03 PT |
1574 | } |
1575 | ||
699503bc | 1576 | if (time_before(cfs_time_current(), fps->fps_next_retry)) { |
d7e09d03 PT |
1577 | /* someone failed recently */ |
1578 | spin_unlock(&fps->fps_lock); | |
1579 | return -EAGAIN; | |
1580 | } | |
1581 | ||
1582 | fps->fps_increasing = 1; | |
1583 | spin_unlock(&fps->fps_lock); | |
1584 | ||
1585 | CDEBUG(D_NET, "Allocate new FMR pool\n"); | |
1586 | rc = kiblnd_create_fmr_pool(fps, &fpo); | |
1587 | spin_lock(&fps->fps_lock); | |
1588 | fps->fps_increasing = 0; | |
5fd88337 | 1589 | if (!rc) { |
d7e09d03 PT |
1590 | fps->fps_version++; |
1591 | list_add_tail(&fpo->fpo_list, &fps->fps_pool_list); | |
1592 | } else { | |
1593 | fps->fps_next_retry = cfs_time_shift(IBLND_POOL_RETRY); | |
1594 | } | |
1595 | spin_unlock(&fps->fps_lock); | |
1596 | ||
1597 | goto again; | |
1598 | } | |
1599 | ||
febe73bd | 1600 | static void kiblnd_fini_pool(kib_pool_t *pool) |
d7e09d03 | 1601 | { |
febe73bd | 1602 | LASSERT(list_empty(&pool->po_free_list)); |
5fd88337 | 1603 | LASSERT(!pool->po_allocated); |
d7e09d03 PT |
1604 | |
1605 | CDEBUG(D_NET, "Finalize %s pool\n", pool->po_owner->ps_name); | |
1606 | } | |
1607 | ||
febe73bd | 1608 | static void kiblnd_init_pool(kib_poolset_t *ps, kib_pool_t *pool, int size) |
d7e09d03 PT |
1609 | { |
1610 | CDEBUG(D_NET, "Initialize %s pool\n", ps->ps_name); | |
1611 | ||
a4e872f7 | 1612 | memset(pool, 0, sizeof(*pool)); |
d7e09d03 PT |
1613 | INIT_LIST_HEAD(&pool->po_free_list); |
1614 | pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); | |
1615 | pool->po_owner = ps; | |
1616 | pool->po_size = size; | |
1617 | } | |
1618 | ||
febe73bd | 1619 | static void kiblnd_destroy_pool_list(struct list_head *head) |
d7e09d03 PT |
1620 | { |
1621 | kib_pool_t *pool; | |
1622 | ||
1623 | while (!list_empty(head)) { | |
1624 | pool = list_entry(head->next, kib_pool_t, po_list); | |
1625 | list_del(&pool->po_list); | |
1626 | ||
06ace26e | 1627 | LASSERT(pool->po_owner); |
d7e09d03 PT |
1628 | pool->po_owner->ps_pool_destroy(pool); |
1629 | } | |
1630 | } | |
1631 | ||
febe73bd | 1632 | static void kiblnd_fail_poolset(kib_poolset_t *ps, struct list_head *zombies) |
d7e09d03 | 1633 | { |
06ace26e | 1634 | if (!ps->ps_net) /* intialized? */ |
d7e09d03 PT |
1635 | return; |
1636 | ||
1637 | spin_lock(&ps->ps_lock); | |
1638 | while (!list_empty(&ps->ps_pool_list)) { | |
1639 | kib_pool_t *po = list_entry(ps->ps_pool_list.next, | |
1640 | kib_pool_t, po_list); | |
1641 | po->po_failed = 1; | |
1642 | list_del(&po->po_list); | |
5fd88337 | 1643 | if (!po->po_allocated) |
d7e09d03 PT |
1644 | list_add(&po->po_list, zombies); |
1645 | else | |
1646 | list_add(&po->po_list, &ps->ps_failed_pool_list); | |
1647 | } | |
1648 | spin_unlock(&ps->ps_lock); | |
1649 | } | |
1650 | ||
febe73bd | 1651 | static void kiblnd_fini_poolset(kib_poolset_t *ps) |
d7e09d03 | 1652 | { |
06ace26e | 1653 | if (ps->ps_net) { /* initialized? */ |
d7e09d03 PT |
1654 | kiblnd_destroy_pool_list(&ps->ps_failed_pool_list); |
1655 | kiblnd_destroy_pool_list(&ps->ps_pool_list); | |
1656 | } | |
1657 | } | |
1658 | ||
febe73bd GM |
1659 | static int kiblnd_init_poolset(kib_poolset_t *ps, int cpt, |
1660 | kib_net_t *net, char *name, int size, | |
1661 | kib_ps_pool_create_t po_create, | |
1662 | kib_ps_pool_destroy_t po_destroy, | |
1663 | kib_ps_node_init_t nd_init, | |
1664 | kib_ps_node_fini_t nd_fini) | |
d7e09d03 | 1665 | { |
ec3d17c0 MS |
1666 | kib_pool_t *pool; |
1667 | int rc; | |
d7e09d03 | 1668 | |
a4e872f7 | 1669 | memset(ps, 0, sizeof(*ps)); |
d7e09d03 | 1670 | |
ec3d17c0 MS |
1671 | ps->ps_cpt = cpt; |
1672 | ps->ps_net = net; | |
d7e09d03 PT |
1673 | ps->ps_pool_create = po_create; |
1674 | ps->ps_pool_destroy = po_destroy; | |
1675 | ps->ps_node_init = nd_init; | |
1676 | ps->ps_node_fini = nd_fini; | |
1677 | ps->ps_pool_size = size; | |
1678 | if (strlcpy(ps->ps_name, name, sizeof(ps->ps_name)) | |
1679 | >= sizeof(ps->ps_name)) | |
1680 | return -E2BIG; | |
1681 | spin_lock_init(&ps->ps_lock); | |
1682 | INIT_LIST_HEAD(&ps->ps_pool_list); | |
1683 | INIT_LIST_HEAD(&ps->ps_failed_pool_list); | |
1684 | ||
1685 | rc = ps->ps_pool_create(ps, size, &pool); | |
5fd88337 | 1686 | if (!rc) |
d7e09d03 PT |
1687 | list_add(&pool->po_list, &ps->ps_pool_list); |
1688 | else | |
1689 | CERROR("Failed to create the first pool for %s\n", ps->ps_name); | |
1690 | ||
1691 | return rc; | |
1692 | } | |
1693 | ||
febe73bd | 1694 | static int kiblnd_pool_is_idle(kib_pool_t *pool, unsigned long now) |
d7e09d03 | 1695 | { |
5fd88337 | 1696 | if (pool->po_allocated) /* still in use */ |
d7e09d03 PT |
1697 | return 0; |
1698 | if (pool->po_failed) | |
1699 | return 1; | |
1700 | return cfs_time_aftereq(now, pool->po_deadline); | |
1701 | } | |
1702 | ||
febe73bd | 1703 | void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node) |
d7e09d03 | 1704 | { |
febe73bd | 1705 | LIST_HEAD(zombies); |
ec3d17c0 MS |
1706 | kib_poolset_t *ps = pool->po_owner; |
1707 | kib_pool_t *tmp; | |
1708 | unsigned long now = cfs_time_current(); | |
d7e09d03 PT |
1709 | |
1710 | spin_lock(&ps->ps_lock); | |
1711 | ||
06ace26e | 1712 | if (ps->ps_node_fini) |
d7e09d03 PT |
1713 | ps->ps_node_fini(pool, node); |
1714 | ||
febe73bd | 1715 | LASSERT(pool->po_allocated > 0); |
d7e09d03 | 1716 | list_add(node, &pool->po_free_list); |
74732797 | 1717 | pool->po_allocated--; |
d7e09d03 PT |
1718 | |
1719 | list_for_each_entry_safe(pool, tmp, &ps->ps_pool_list, po_list) { | |
1720 | /* the first pool is persistent */ | |
1721 | if (ps->ps_pool_list.next == &pool->po_list) | |
1722 | continue; | |
1723 | ||
1724 | if (kiblnd_pool_is_idle(pool, now)) | |
1725 | list_move(&pool->po_list, &zombies); | |
1726 | } | |
1727 | spin_unlock(&ps->ps_lock); | |
1728 | ||
1729 | if (!list_empty(&zombies)) | |
1730 | kiblnd_destroy_pool_list(&zombies); | |
1731 | } | |
1732 | ||
febe73bd | 1733 | struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps) |
d7e09d03 | 1734 | { |
ec3d17c0 MS |
1735 | struct list_head *node; |
1736 | kib_pool_t *pool; | |
1737 | int rc; | |
d7e09d03 PT |
1738 | |
1739 | again: | |
1740 | spin_lock(&ps->ps_lock); | |
1741 | list_for_each_entry(pool, &ps->ps_pool_list, po_list) { | |
1742 | if (list_empty(&pool->po_free_list)) | |
1743 | continue; | |
1744 | ||
74732797 | 1745 | pool->po_allocated++; |
d7e09d03 PT |
1746 | pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); |
1747 | node = pool->po_free_list.next; | |
1748 | list_del(node); | |
1749 | ||
06ace26e | 1750 | if (ps->ps_node_init) { |
d7e09d03 PT |
1751 | /* still hold the lock */ |
1752 | ps->ps_node_init(pool, node); | |
1753 | } | |
1754 | spin_unlock(&ps->ps_lock); | |
1755 | return node; | |
1756 | } | |
1757 | ||
1758 | /* no available tx pool and ... */ | |
1759 | if (ps->ps_increasing) { | |
1760 | /* another thread is allocating a new pool */ | |
1761 | spin_unlock(&ps->ps_lock); | |
2d00bd17 | 1762 | CDEBUG(D_NET, "Another thread is allocating new %s pool, waiting for her to complete\n", |
d7e09d03 PT |
1763 | ps->ps_name); |
1764 | schedule(); | |
1765 | goto again; | |
1766 | } | |
1767 | ||
699503bc | 1768 | if (time_before(cfs_time_current(), ps->ps_next_retry)) { |
d7e09d03 PT |
1769 | /* someone failed recently */ |
1770 | spin_unlock(&ps->ps_lock); | |
1771 | return NULL; | |
1772 | } | |
1773 | ||
1774 | ps->ps_increasing = 1; | |
1775 | spin_unlock(&ps->ps_lock); | |
1776 | ||
1777 | CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name); | |
1778 | ||
1779 | rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool); | |
1780 | ||
1781 | spin_lock(&ps->ps_lock); | |
1782 | ps->ps_increasing = 0; | |
5fd88337 | 1783 | if (!rc) { |
d7e09d03 PT |
1784 | list_add_tail(&pool->po_list, &ps->ps_pool_list); |
1785 | } else { | |
1786 | ps->ps_next_retry = cfs_time_shift(IBLND_POOL_RETRY); | |
1787 | CERROR("Can't allocate new %s pool because out of memory\n", | |
1788 | ps->ps_name); | |
1789 | } | |
1790 | spin_unlock(&ps->ps_lock); | |
1791 | ||
1792 | goto again; | |
1793 | } | |
1794 | ||
febe73bd | 1795 | static void kiblnd_destroy_tx_pool(kib_pool_t *pool) |
d7e09d03 | 1796 | { |
ec3d17c0 MS |
1797 | kib_tx_pool_t *tpo = container_of(pool, kib_tx_pool_t, tpo_pool); |
1798 | int i; | |
d7e09d03 | 1799 | |
5fd88337 | 1800 | LASSERT(!pool->po_allocated); |
d7e09d03 | 1801 | |
06ace26e | 1802 | if (tpo->tpo_tx_pages) { |
d7e09d03 PT |
1803 | kiblnd_unmap_tx_pool(tpo); |
1804 | kiblnd_free_pages(tpo->tpo_tx_pages); | |
1805 | } | |
1806 | ||
06ace26e | 1807 | if (!tpo->tpo_tx_descs) |
d7e09d03 PT |
1808 | goto out; |
1809 | ||
1810 | for (i = 0; i < pool->po_size; i++) { | |
1811 | kib_tx_t *tx = &tpo->tpo_tx_descs[i]; | |
1812 | ||
1813 | list_del(&tx->tx_list); | |
06ace26e | 1814 | if (tx->tx_pages) |
d7e09d03 PT |
1815 | LIBCFS_FREE(tx->tx_pages, |
1816 | LNET_MAX_IOV * | |
1817 | sizeof(*tx->tx_pages)); | |
06ace26e | 1818 | if (tx->tx_frags) |
d7e09d03 PT |
1819 | LIBCFS_FREE(tx->tx_frags, |
1820 | IBLND_MAX_RDMA_FRAGS * | |
1821 | sizeof(*tx->tx_frags)); | |
06ace26e | 1822 | if (tx->tx_wrq) |
d7e09d03 PT |
1823 | LIBCFS_FREE(tx->tx_wrq, |
1824 | (1 + IBLND_MAX_RDMA_FRAGS) * | |
1825 | sizeof(*tx->tx_wrq)); | |
06ace26e | 1826 | if (tx->tx_sge) |
d7e09d03 PT |
1827 | LIBCFS_FREE(tx->tx_sge, |
1828 | (1 + IBLND_MAX_RDMA_FRAGS) * | |
1829 | sizeof(*tx->tx_sge)); | |
06ace26e | 1830 | if (tx->tx_rd) |
d7e09d03 PT |
1831 | LIBCFS_FREE(tx->tx_rd, |
1832 | offsetof(kib_rdma_desc_t, | |
1833 | rd_frags[IBLND_MAX_RDMA_FRAGS])); | |
1834 | } | |
1835 | ||
1836 | LIBCFS_FREE(tpo->tpo_tx_descs, | |
1837 | pool->po_size * sizeof(kib_tx_t)); | |
1838 | out: | |
1839 | kiblnd_fini_pool(pool); | |
a4e872f7 | 1840 | LIBCFS_FREE(tpo, sizeof(*tpo)); |
d7e09d03 PT |
1841 | } |
1842 | ||
1843 | static int kiblnd_tx_pool_size(int ncpts) | |
1844 | { | |
1845 | int ntx = *kiblnd_tunables.kib_ntx / ncpts; | |
1846 | ||
1847 | return max(IBLND_TX_POOL, ntx); | |
1848 | } | |
1849 | ||
febe73bd GM |
1850 | static int kiblnd_create_tx_pool(kib_poolset_t *ps, int size, |
1851 | kib_pool_t **pp_po) | |
d7e09d03 | 1852 | { |
ec3d17c0 MS |
1853 | int i; |
1854 | int npg; | |
1855 | kib_pool_t *pool; | |
d7e09d03 PT |
1856 | kib_tx_pool_t *tpo; |
1857 | ||
1858 | LIBCFS_CPT_ALLOC(tpo, lnet_cpt_table(), ps->ps_cpt, sizeof(*tpo)); | |
06ace26e | 1859 | if (!tpo) { |
d7e09d03 PT |
1860 | CERROR("Failed to allocate TX pool\n"); |
1861 | return -ENOMEM; | |
1862 | } | |
1863 | ||
1864 | pool = &tpo->tpo_pool; | |
1865 | kiblnd_init_pool(ps, pool, size); | |
1866 | tpo->tpo_tx_descs = NULL; | |
1867 | tpo->tpo_tx_pages = NULL; | |
1868 | ||
1869 | npg = (size * IBLND_MSG_SIZE + PAGE_SIZE - 1) / PAGE_SIZE; | |
5fd88337 | 1870 | if (kiblnd_alloc_pages(&tpo->tpo_tx_pages, ps->ps_cpt, npg)) { |
d7e09d03 | 1871 | CERROR("Can't allocate tx pages: %d\n", npg); |
a4e872f7 | 1872 | LIBCFS_FREE(tpo, sizeof(*tpo)); |
d7e09d03 PT |
1873 | return -ENOMEM; |
1874 | } | |
1875 | ||
1876 | LIBCFS_CPT_ALLOC(tpo->tpo_tx_descs, lnet_cpt_table(), ps->ps_cpt, | |
1877 | size * sizeof(kib_tx_t)); | |
06ace26e | 1878 | if (!tpo->tpo_tx_descs) { |
d7e09d03 PT |
1879 | CERROR("Can't allocate %d tx descriptors\n", size); |
1880 | ps->ps_pool_destroy(pool); | |
1881 | return -ENOMEM; | |
1882 | } | |
1883 | ||
1884 | memset(tpo->tpo_tx_descs, 0, size * sizeof(kib_tx_t)); | |
1885 | ||
1886 | for (i = 0; i < size; i++) { | |
1887 | kib_tx_t *tx = &tpo->tpo_tx_descs[i]; | |
1888 | ||
1889 | tx->tx_pool = tpo; | |
06ace26e | 1890 | if (ps->ps_net->ibn_fmr_ps) { |
d7e09d03 PT |
1891 | LIBCFS_CPT_ALLOC(tx->tx_pages, |
1892 | lnet_cpt_table(), ps->ps_cpt, | |
1893 | LNET_MAX_IOV * sizeof(*tx->tx_pages)); | |
06ace26e | 1894 | if (!tx->tx_pages) |
d7e09d03 PT |
1895 | break; |
1896 | } | |
1897 | ||
1898 | LIBCFS_CPT_ALLOC(tx->tx_frags, lnet_cpt_table(), ps->ps_cpt, | |
1899 | IBLND_MAX_RDMA_FRAGS * sizeof(*tx->tx_frags)); | |
06ace26e | 1900 | if (!tx->tx_frags) |
d7e09d03 PT |
1901 | break; |
1902 | ||
1903 | sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS); | |
1904 | ||
1905 | LIBCFS_CPT_ALLOC(tx->tx_wrq, lnet_cpt_table(), ps->ps_cpt, | |
1906 | (1 + IBLND_MAX_RDMA_FRAGS) * | |
1907 | sizeof(*tx->tx_wrq)); | |
06ace26e | 1908 | if (!tx->tx_wrq) |
d7e09d03 PT |
1909 | break; |
1910 | ||
1911 | LIBCFS_CPT_ALLOC(tx->tx_sge, lnet_cpt_table(), ps->ps_cpt, | |
1912 | (1 + IBLND_MAX_RDMA_FRAGS) * | |
1913 | sizeof(*tx->tx_sge)); | |
06ace26e | 1914 | if (!tx->tx_sge) |
d7e09d03 PT |
1915 | break; |
1916 | ||
1917 | LIBCFS_CPT_ALLOC(tx->tx_rd, lnet_cpt_table(), ps->ps_cpt, | |
1918 | offsetof(kib_rdma_desc_t, | |
1919 | rd_frags[IBLND_MAX_RDMA_FRAGS])); | |
06ace26e | 1920 | if (!tx->tx_rd) |
d7e09d03 PT |
1921 | break; |
1922 | } | |
1923 | ||
1924 | if (i == size) { | |
1925 | kiblnd_map_tx_pool(tpo); | |
1926 | *pp_po = pool; | |
1927 | return 0; | |
1928 | } | |
1929 | ||
1930 | ps->ps_pool_destroy(pool); | |
1931 | return -ENOMEM; | |
1932 | } | |
1933 | ||
febe73bd | 1934 | static void kiblnd_tx_init(kib_pool_t *pool, struct list_head *node) |
d7e09d03 PT |
1935 | { |
1936 | kib_tx_poolset_t *tps = container_of(pool->po_owner, kib_tx_poolset_t, | |
1937 | tps_poolset); | |
ec3d17c0 | 1938 | kib_tx_t *tx = list_entry(node, kib_tx_t, tx_list); |
d7e09d03 | 1939 | |
74732797 | 1940 | tx->tx_cookie = tps->tps_next_tx_cookie++; |
d7e09d03 PT |
1941 | } |
1942 | ||
febe73bd | 1943 | static void kiblnd_net_fini_pools(kib_net_t *net) |
d7e09d03 | 1944 | { |
ec3d17c0 | 1945 | int i; |
d7e09d03 PT |
1946 | |
1947 | cfs_cpt_for_each(i, lnet_cpt_table()) { | |
ec3d17c0 MS |
1948 | kib_tx_poolset_t *tps; |
1949 | kib_fmr_poolset_t *fps; | |
d7e09d03 | 1950 | |
06ace26e | 1951 | if (net->ibn_tx_ps) { |
d7e09d03 PT |
1952 | tps = net->ibn_tx_ps[i]; |
1953 | kiblnd_fini_poolset(&tps->tps_poolset); | |
1954 | } | |
1955 | ||
06ace26e | 1956 | if (net->ibn_fmr_ps) { |
d7e09d03 PT |
1957 | fps = net->ibn_fmr_ps[i]; |
1958 | kiblnd_fini_fmr_poolset(fps); | |
1959 | } | |
d7e09d03 PT |
1960 | } |
1961 | ||
06ace26e | 1962 | if (net->ibn_tx_ps) { |
d7e09d03 PT |
1963 | cfs_percpt_free(net->ibn_tx_ps); |
1964 | net->ibn_tx_ps = NULL; | |
1965 | } | |
1966 | ||
06ace26e | 1967 | if (net->ibn_fmr_ps) { |
d7e09d03 PT |
1968 | cfs_percpt_free(net->ibn_fmr_ps); |
1969 | net->ibn_fmr_ps = NULL; | |
1970 | } | |
d7e09d03 PT |
1971 | } |
1972 | ||
febe73bd | 1973 | static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) |
d7e09d03 | 1974 | { |
ec3d17c0 MS |
1975 | unsigned long flags; |
1976 | int cpt; | |
415bcb5c | 1977 | int rc = 0; |
ec3d17c0 | 1978 | int i; |
d7e09d03 PT |
1979 | |
1980 | read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); | |
5fd88337 | 1981 | if (!*kiblnd_tunables.kib_map_on_demand && |
d7e09d03 | 1982 | net->ibn_dev->ibd_hdev->ibh_nmrs == 1) { |
ec3d17c0 | 1983 | read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); |
d7e09d03 PT |
1984 | goto create_tx_pool; |
1985 | } | |
1986 | ||
1987 | read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); | |
1988 | ||
1989 | if (*kiblnd_tunables.kib_fmr_pool_size < | |
1990 | *kiblnd_tunables.kib_ntx / 4) { | |
1991 | CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n", | |
1992 | *kiblnd_tunables.kib_fmr_pool_size, | |
1993 | *kiblnd_tunables.kib_ntx / 4); | |
1994 | rc = -EINVAL; | |
1995 | goto failed; | |
1996 | } | |
1997 | ||
415bcb5c OD |
1998 | /* |
1999 | * TX pool must be created later than FMR, see LU-2268 | |
2000 | * for details | |
2001 | */ | |
06ace26e | 2002 | LASSERT(!net->ibn_tx_ps); |
d7e09d03 | 2003 | |
415bcb5c OD |
2004 | /* |
2005 | * premapping can fail if ibd_nmr > 1, so we always create | |
2006 | * FMR pool and map-on-demand if premapping failed | |
2007 | */ | |
d7e09d03 PT |
2008 | |
2009 | net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(), | |
2010 | sizeof(kib_fmr_poolset_t)); | |
06ace26e | 2011 | if (!net->ibn_fmr_ps) { |
d7e09d03 PT |
2012 | CERROR("Failed to allocate FMR pool array\n"); |
2013 | rc = -ENOMEM; | |
2014 | goto failed; | |
2015 | } | |
2016 | ||
2017 | for (i = 0; i < ncpts; i++) { | |
06ace26e | 2018 | cpt = !cpts ? i : cpts[i]; |
d7e09d03 PT |
2019 | rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, net, |
2020 | kiblnd_fmr_pool_size(ncpts), | |
2021 | kiblnd_fmr_flush_trigger(ncpts)); | |
5fd88337 | 2022 | if (rc == -ENOSYS && !i) /* no FMR */ |
415bcb5c | 2023 | break; |
d7e09d03 | 2024 | |
5fd88337 | 2025 | if (rc) { /* a real error */ |
d7e09d03 PT |
2026 | CERROR("Can't initialize FMR pool for CPT %d: %d\n", |
2027 | cpt, rc); | |
2028 | goto failed; | |
2029 | } | |
2030 | } | |
2031 | ||
2032 | if (i > 0) { | |
2033 | LASSERT(i == ncpts); | |
2034 | goto create_tx_pool; | |
2035 | } | |
2036 | ||
2037 | cfs_percpt_free(net->ibn_fmr_ps); | |
2038 | net->ibn_fmr_ps = NULL; | |
2039 | ||
415bcb5c | 2040 | CWARN("Device does not support FMR\n"); |
d7e09d03 | 2041 | goto failed; |
d7e09d03 PT |
2042 | |
2043 | create_tx_pool: | |
2044 | net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(), | |
2045 | sizeof(kib_tx_poolset_t)); | |
06ace26e | 2046 | if (!net->ibn_tx_ps) { |
d7e09d03 PT |
2047 | CERROR("Failed to allocate tx pool array\n"); |
2048 | rc = -ENOMEM; | |
2049 | goto failed; | |
2050 | } | |
2051 | ||
2052 | for (i = 0; i < ncpts; i++) { | |
06ace26e | 2053 | cpt = !cpts ? i : cpts[i]; |
d7e09d03 PT |
2054 | rc = kiblnd_init_poolset(&net->ibn_tx_ps[cpt]->tps_poolset, |
2055 | cpt, net, "TX", | |
2056 | kiblnd_tx_pool_size(ncpts), | |
2057 | kiblnd_create_tx_pool, | |
2058 | kiblnd_destroy_tx_pool, | |
2059 | kiblnd_tx_init, NULL); | |
5fd88337 | 2060 | if (rc) { |
d7e09d03 PT |
2061 | CERROR("Can't initialize TX pool for CPT %d: %d\n", |
2062 | cpt, rc); | |
2063 | goto failed; | |
2064 | } | |
2065 | } | |
2066 | ||
2067 | return 0; | |
2068 | failed: | |
2069 | kiblnd_net_fini_pools(net); | |
5fd88337 | 2070 | LASSERT(rc); |
d7e09d03 PT |
2071 | return rc; |
2072 | } | |
2073 | ||
febe73bd | 2074 | static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev) |
d7e09d03 | 2075 | { |
4420cfd3 JS |
2076 | /* |
2077 | * It's safe to assume a HCA can handle a page size | |
2078 | * matching that of the native system | |
2079 | */ | |
d7e09d03 PT |
2080 | hdev->ibh_page_shift = PAGE_SHIFT; |
2081 | hdev->ibh_page_size = 1 << PAGE_SHIFT; | |
2082 | hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1); | |
2083 | ||
cebfe5ca | 2084 | hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size; |
d7e09d03 PT |
2085 | if (hdev->ibh_mr_size == ~0ULL) { |
2086 | hdev->ibh_mr_shift = 64; | |
2087 | return 0; | |
2088 | } | |
2089 | ||
2090 | for (hdev->ibh_mr_shift = 0; | |
74732797 | 2091 | hdev->ibh_mr_shift < 64; hdev->ibh_mr_shift++) { |
d7e09d03 PT |
2092 | if (hdev->ibh_mr_size == (1ULL << hdev->ibh_mr_shift) || |
2093 | hdev->ibh_mr_size == (1ULL << hdev->ibh_mr_shift) - 1) | |
2094 | return 0; | |
2095 | } | |
2096 | ||
55f5a824 | 2097 | CERROR("Invalid mr size: %#llx\n", hdev->ibh_mr_size); |
d7e09d03 PT |
2098 | return -EINVAL; |
2099 | } | |
2100 | ||
febe73bd | 2101 | static void kiblnd_hdev_cleanup_mrs(kib_hca_dev_t *hdev) |
d7e09d03 | 2102 | { |
ec3d17c0 | 2103 | int i; |
d7e09d03 | 2104 | |
5fd88337 | 2105 | if (!hdev->ibh_nmrs || !hdev->ibh_mrs) |
d7e09d03 PT |
2106 | return; |
2107 | ||
2108 | for (i = 0; i < hdev->ibh_nmrs; i++) { | |
06ace26e | 2109 | if (!hdev->ibh_mrs[i]) |
d7e09d03 PT |
2110 | break; |
2111 | ||
2112 | ib_dereg_mr(hdev->ibh_mrs[i]); | |
2113 | } | |
2114 | ||
2115 | LIBCFS_FREE(hdev->ibh_mrs, sizeof(*hdev->ibh_mrs) * hdev->ibh_nmrs); | |
2116 | hdev->ibh_mrs = NULL; | |
2117 | hdev->ibh_nmrs = 0; | |
2118 | } | |
2119 | ||
febe73bd | 2120 | void kiblnd_hdev_destroy(kib_hca_dev_t *hdev) |
d7e09d03 PT |
2121 | { |
2122 | kiblnd_hdev_cleanup_mrs(hdev); | |
2123 | ||
06ace26e | 2124 | if (hdev->ibh_pd) |
d7e09d03 PT |
2125 | ib_dealloc_pd(hdev->ibh_pd); |
2126 | ||
06ace26e | 2127 | if (hdev->ibh_cmid) |
d7e09d03 PT |
2128 | rdma_destroy_id(hdev->ibh_cmid); |
2129 | ||
2130 | LIBCFS_FREE(hdev, sizeof(*hdev)); | |
2131 | } | |
2132 | ||
febe73bd | 2133 | static int kiblnd_hdev_setup_mrs(kib_hca_dev_t *hdev) |
d7e09d03 PT |
2134 | { |
2135 | struct ib_mr *mr; | |
ec3d17c0 | 2136 | int rc; |
ec3d17c0 | 2137 | int acflags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; |
d7e09d03 PT |
2138 | |
2139 | rc = kiblnd_hdev_get_attr(hdev); | |
5fd88337 | 2140 | if (rc) |
d7e09d03 PT |
2141 | return rc; |
2142 | ||
01738448 | 2143 | LIBCFS_ALLOC(hdev->ibh_mrs, 1 * sizeof(*hdev->ibh_mrs)); |
06ace26e | 2144 | if (!hdev->ibh_mrs) { |
01738448 LB |
2145 | CERROR("Failed to allocate MRs table\n"); |
2146 | return -ENOMEM; | |
2147 | } | |
d7e09d03 | 2148 | |
01738448 LB |
2149 | hdev->ibh_mrs[0] = NULL; |
2150 | hdev->ibh_nmrs = 1; | |
d7e09d03 | 2151 | |
01738448 LB |
2152 | mr = ib_get_dma_mr(hdev->ibh_pd, acflags); |
2153 | if (IS_ERR(mr)) { | |
2154 | CERROR("Failed ib_get_dma_mr : %ld\n", PTR_ERR(mr)); | |
2155 | kiblnd_hdev_cleanup_mrs(hdev); | |
2156 | return PTR_ERR(mr); | |
2157 | } | |
d7e09d03 | 2158 | |
01738448 | 2159 | hdev->ibh_mrs[0] = mr; |
d7e09d03 | 2160 | |
d7e09d03 PT |
2161 | return 0; |
2162 | } | |
2163 | ||
febe73bd | 2164 | /* DUMMY */ |
7a3888a3 GM |
2165 | static int kiblnd_dummy_callback(struct rdma_cm_id *cmid, |
2166 | struct rdma_cm_event *event) | |
febe73bd | 2167 | { |
d7e09d03 PT |
2168 | return 0; |
2169 | } | |
2170 | ||
febe73bd | 2171 | static int kiblnd_dev_need_failover(kib_dev_t *dev) |
d7e09d03 | 2172 | { |
ec3d17c0 MS |
2173 | struct rdma_cm_id *cmid; |
2174 | struct sockaddr_in srcaddr; | |
2175 | struct sockaddr_in dstaddr; | |
2176 | int rc; | |
d7e09d03 | 2177 | |
06ace26e JS |
2178 | if (!dev->ibd_hdev || /* initializing */ |
2179 | !dev->ibd_hdev->ibh_cmid || /* listener is dead */ | |
d7e09d03 PT |
2180 | *kiblnd_tunables.kib_dev_failover > 1) /* debugging */ |
2181 | return 1; | |
2182 | ||
4420cfd3 JS |
2183 | /* |
2184 | * XXX: it's UGLY, but I don't have better way to find | |
d7e09d03 PT |
2185 | * ib-bonding HCA failover because: |
2186 | * | |
2187 | * a. no reliable CM event for HCA failover... | |
2188 | * b. no OFED API to get ib_device for current net_device... | |
2189 | * | |
2190 | * We have only two choices at this point: | |
2191 | * | |
2192 | * a. rdma_bind_addr(), it will conflict with listener cmid | |
4420cfd3 JS |
2193 | * b. rdma_resolve_addr() to zero addr |
2194 | */ | |
d7e09d03 PT |
2195 | cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP, |
2196 | IB_QPT_RC); | |
2197 | if (IS_ERR(cmid)) { | |
2198 | rc = PTR_ERR(cmid); | |
2199 | CERROR("Failed to create cmid for failover: %d\n", rc); | |
2200 | return rc; | |
2201 | } | |
2202 | ||
2203 | memset(&srcaddr, 0, sizeof(srcaddr)); | |
ec3d17c0 | 2204 | srcaddr.sin_family = AF_INET; |
d7e09d03 PT |
2205 | srcaddr.sin_addr.s_addr = (__force u32)htonl(dev->ibd_ifip); |
2206 | ||
2207 | memset(&dstaddr, 0, sizeof(dstaddr)); | |
2208 | dstaddr.sin_family = AF_INET; | |
2209 | rc = rdma_resolve_addr(cmid, (struct sockaddr *)&srcaddr, | |
2210 | (struct sockaddr *)&dstaddr, 1); | |
5fd88337 | 2211 | if (rc || !cmid->device) { |
5e8f6920 PT |
2212 | CERROR("Failed to bind %s:%pI4h to device(%p): %d\n", |
2213 | dev->ibd_ifname, &dev->ibd_ifip, | |
d7e09d03 PT |
2214 | cmid->device, rc); |
2215 | rdma_destroy_id(cmid); | |
2216 | return rc; | |
2217 | } | |
2218 | ||
199a0cc0 LZ |
2219 | rc = dev->ibd_hdev->ibh_ibdev != cmid->device; /* true for failover */ |
2220 | rdma_destroy_id(cmid); | |
d7e09d03 | 2221 | |
199a0cc0 | 2222 | return rc; |
d7e09d03 PT |
2223 | } |
2224 | ||
febe73bd | 2225 | int kiblnd_dev_failover(kib_dev_t *dev) |
d7e09d03 | 2226 | { |
febe73bd GM |
2227 | LIST_HEAD(zombie_tpo); |
2228 | LIST_HEAD(zombie_ppo); | |
2229 | LIST_HEAD(zombie_fpo); | |
ec3d17c0 MS |
2230 | struct rdma_cm_id *cmid = NULL; |
2231 | kib_hca_dev_t *hdev = NULL; | |
ec3d17c0 MS |
2232 | struct ib_pd *pd; |
2233 | kib_net_t *net; | |
2234 | struct sockaddr_in addr; | |
2235 | unsigned long flags; | |
2236 | int rc = 0; | |
2237 | int i; | |
d7e09d03 | 2238 | |
febe73bd | 2239 | LASSERT(*kiblnd_tunables.kib_dev_failover > 1 || |
06ace26e | 2240 | dev->ibd_can_failover || !dev->ibd_hdev); |
d7e09d03 PT |
2241 | |
2242 | rc = kiblnd_dev_need_failover(dev); | |
2243 | if (rc <= 0) | |
2244 | goto out; | |
2245 | ||
06ace26e JS |
2246 | if (dev->ibd_hdev && |
2247 | dev->ibd_hdev->ibh_cmid) { | |
4420cfd3 JS |
2248 | /* |
2249 | * XXX it's not good to close old listener at here, | |
d7e09d03 PT |
2250 | * because we can fail to create new listener. |
2251 | * But we have to close it now, otherwise rdma_bind_addr | |
4420cfd3 JS |
2252 | * will return EADDRINUSE... How crap! |
2253 | */ | |
d7e09d03 PT |
2254 | write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); |
2255 | ||
2256 | cmid = dev->ibd_hdev->ibh_cmid; | |
4420cfd3 JS |
2257 | /* |
2258 | * make next schedule of kiblnd_dev_need_failover() | |
2259 | * return 1 for me | |
2260 | */ | |
d7e09d03 PT |
2261 | dev->ibd_hdev->ibh_cmid = NULL; |
2262 | write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); | |
2263 | ||
2264 | rdma_destroy_id(cmid); | |
2265 | } | |
2266 | ||
2267 | cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, dev, RDMA_PS_TCP, | |
2268 | IB_QPT_RC); | |
2269 | if (IS_ERR(cmid)) { | |
2270 | rc = PTR_ERR(cmid); | |
2271 | CERROR("Failed to create cmid for failover: %d\n", rc); | |
2272 | goto out; | |
2273 | } | |
2274 | ||
2275 | memset(&addr, 0, sizeof(addr)); | |
2276 | addr.sin_family = AF_INET; | |
2277 | addr.sin_addr.s_addr = (__force u32)htonl(dev->ibd_ifip); | |
2278 | addr.sin_port = htons(*kiblnd_tunables.kib_service); | |
2279 | ||
2280 | /* Bind to failover device or port */ | |
2281 | rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr); | |
5fd88337 | 2282 | if (rc || !cmid->device) { |
5e8f6920 PT |
2283 | CERROR("Failed to bind %s:%pI4h to device(%p): %d\n", |
2284 | dev->ibd_ifname, &dev->ibd_ifip, | |
d7e09d03 PT |
2285 | cmid->device, rc); |
2286 | rdma_destroy_id(cmid); | |
2287 | goto out; | |
2288 | } | |
2289 | ||
2290 | LIBCFS_ALLOC(hdev, sizeof(*hdev)); | |
06ace26e | 2291 | if (!hdev) { |
d7e09d03 PT |
2292 | CERROR("Failed to allocate kib_hca_dev\n"); |
2293 | rdma_destroy_id(cmid); | |
2294 | rc = -ENOMEM; | |
2295 | goto out; | |
2296 | } | |
2297 | ||
2298 | atomic_set(&hdev->ibh_ref, 1); | |
2299 | hdev->ibh_dev = dev; | |
2300 | hdev->ibh_cmid = cmid; | |
2301 | hdev->ibh_ibdev = cmid->device; | |
2302 | ||
2303 | pd = ib_alloc_pd(cmid->device); | |
2304 | if (IS_ERR(pd)) { | |
2305 | rc = PTR_ERR(pd); | |
2306 | CERROR("Can't allocate PD: %d\n", rc); | |
2307 | goto out; | |
2308 | } | |
2309 | ||
2310 | hdev->ibh_pd = pd; | |
2311 | ||
2312 | rc = rdma_listen(cmid, 0); | |
5fd88337 | 2313 | if (rc) { |
d7e09d03 PT |
2314 | CERROR("Can't start new listener: %d\n", rc); |
2315 | goto out; | |
2316 | } | |
2317 | ||
2318 | rc = kiblnd_hdev_setup_mrs(hdev); | |
5fd88337 | 2319 | if (rc) { |
d7e09d03 PT |
2320 | CERROR("Can't setup device: %d\n", rc); |
2321 | goto out; | |
2322 | } | |
2323 | ||
2324 | write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); | |
2325 | ||
6d37b171 | 2326 | swap(dev->ibd_hdev, hdev); /* take over the refcount */ |
d7e09d03 PT |
2327 | |
2328 | list_for_each_entry(net, &dev->ibd_nets, ibn_list) { | |
2329 | cfs_cpt_for_each(i, lnet_cpt_table()) { | |
2330 | kiblnd_fail_poolset(&net->ibn_tx_ps[i]->tps_poolset, | |
2331 | &zombie_tpo); | |
2332 | ||
415bcb5c | 2333 | if (net->ibn_fmr_ps) |
d7e09d03 PT |
2334 | kiblnd_fail_fmr_poolset(net->ibn_fmr_ps[i], |
2335 | &zombie_fpo); | |
d7e09d03 PT |
2336 | } |
2337 | } | |
2338 | ||
2339 | write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); | |
2340 | out: | |
2341 | if (!list_empty(&zombie_tpo)) | |
2342 | kiblnd_destroy_pool_list(&zombie_tpo); | |
2343 | if (!list_empty(&zombie_ppo)) | |
2344 | kiblnd_destroy_pool_list(&zombie_ppo); | |
2345 | if (!list_empty(&zombie_fpo)) | |
2346 | kiblnd_destroy_fmr_pool_list(&zombie_fpo); | |
06ace26e | 2347 | if (hdev) |
d7e09d03 PT |
2348 | kiblnd_hdev_decref(hdev); |
2349 | ||
5fd88337 | 2350 | if (rc) |
d7e09d03 PT |
2351 | dev->ibd_failed_failover++; |
2352 | else | |
2353 | dev->ibd_failed_failover = 0; | |
2354 | ||
2355 | return rc; | |
2356 | } | |
2357 | ||
febe73bd | 2358 | void kiblnd_destroy_dev(kib_dev_t *dev) |
d7e09d03 | 2359 | { |
5fd88337 | 2360 | LASSERT(!dev->ibd_nnets); |
febe73bd | 2361 | LASSERT(list_empty(&dev->ibd_nets)); |
d7e09d03 PT |
2362 | |
2363 | list_del(&dev->ibd_fail_list); | |
2364 | list_del(&dev->ibd_list); | |
2365 | ||
06ace26e | 2366 | if (dev->ibd_hdev) |
d7e09d03 PT |
2367 | kiblnd_hdev_decref(dev->ibd_hdev); |
2368 | ||
2369 | LIBCFS_FREE(dev, sizeof(*dev)); | |
2370 | } | |
2371 | ||
febe73bd | 2372 | static kib_dev_t *kiblnd_create_dev(char *ifname) |
d7e09d03 PT |
2373 | { |
2374 | struct net_device *netdev; | |
ec3d17c0 MS |
2375 | kib_dev_t *dev; |
2376 | __u32 netmask; | |
2377 | __u32 ip; | |
2378 | int up; | |
2379 | int rc; | |
d7e09d03 | 2380 | |
1ad6a73e | 2381 | rc = lnet_ipif_query(ifname, &up, &ip, &netmask); |
5fd88337 | 2382 | if (rc) { |
d7e09d03 PT |
2383 | CERROR("Can't query IPoIB interface %s: %d\n", |
2384 | ifname, rc); | |
2385 | return NULL; | |
2386 | } | |
2387 | ||
2388 | if (!up) { | |
2389 | CERROR("Can't query IPoIB interface %s: it's down\n", ifname); | |
2390 | return NULL; | |
2391 | } | |
2392 | ||
2393 | LIBCFS_ALLOC(dev, sizeof(*dev)); | |
06ace26e | 2394 | if (!dev) |
d7e09d03 PT |
2395 | return NULL; |
2396 | ||
d7e09d03 | 2397 | netdev = dev_get_by_name(&init_net, ifname); |
06ace26e | 2398 | if (!netdev) { |
d7e09d03 PT |
2399 | dev->ibd_can_failover = 0; |
2400 | } else { | |
2401 | dev->ibd_can_failover = !!(netdev->flags & IFF_MASTER); | |
2402 | dev_put(netdev); | |
2403 | } | |
2404 | ||
2405 | INIT_LIST_HEAD(&dev->ibd_nets); | |
2406 | INIT_LIST_HEAD(&dev->ibd_list); /* not yet in kib_devs */ | |
2407 | INIT_LIST_HEAD(&dev->ibd_fail_list); | |
2408 | dev->ibd_ifip = ip; | |
2409 | strcpy(&dev->ibd_ifname[0], ifname); | |
2410 | ||
2411 | /* initialize the device */ | |
2412 | rc = kiblnd_dev_failover(dev); | |
5fd88337 | 2413 | if (rc) { |
d7e09d03 PT |
2414 | CERROR("Can't initialize device: %d\n", rc); |
2415 | LIBCFS_FREE(dev, sizeof(*dev)); | |
2416 | return NULL; | |
2417 | } | |
2418 | ||
c314c319 | 2419 | list_add_tail(&dev->ibd_list, &kiblnd_data.kib_devs); |
d7e09d03 PT |
2420 | return dev; |
2421 | } | |
2422 | ||
febe73bd | 2423 | static void kiblnd_base_shutdown(void) |
d7e09d03 | 2424 | { |
ec3d17c0 MS |
2425 | struct kib_sched_info *sched; |
2426 | int i; | |
d7e09d03 | 2427 | |
febe73bd | 2428 | LASSERT(list_empty(&kiblnd_data.kib_devs)); |
d7e09d03 | 2429 | |
d7e09d03 PT |
2430 | switch (kiblnd_data.kib_init) { |
2431 | default: | |
2432 | LBUG(); | |
2433 | ||
2434 | case IBLND_INIT_ALL: | |
2435 | case IBLND_INIT_DATA: | |
06ace26e | 2436 | LASSERT(kiblnd_data.kib_peers); |
7a3888a3 | 2437 | for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) |
febe73bd | 2438 | LASSERT(list_empty(&kiblnd_data.kib_peers[i])); |
febe73bd GM |
2439 | LASSERT(list_empty(&kiblnd_data.kib_connd_zombies)); |
2440 | LASSERT(list_empty(&kiblnd_data.kib_connd_conns)); | |
d7e09d03 PT |
2441 | |
2442 | /* flag threads to terminate; wake and wait for them to die */ | |
2443 | kiblnd_data.kib_shutdown = 1; | |
2444 | ||
4420cfd3 JS |
2445 | /* |
2446 | * NB: we really want to stop scheduler threads net by net | |
d7e09d03 | 2447 | * instead of the whole module, this should be improved |
4420cfd3 JS |
2448 | * with dynamic configuration LNet |
2449 | */ | |
d7e09d03 PT |
2450 | cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) |
2451 | wake_up_all(&sched->ibs_waitq); | |
2452 | ||
2453 | wake_up_all(&kiblnd_data.kib_connd_waitq); | |
2454 | wake_up_all(&kiblnd_data.kib_failover_waitq); | |
2455 | ||
2456 | i = 2; | |
5fd88337 | 2457 | while (atomic_read(&kiblnd_data.kib_nthreads)) { |
d7e09d03 | 2458 | i++; |
7a3888a3 GM |
2459 | /* power of 2 ? */ |
2460 | CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, | |
d7e09d03 PT |
2461 | "Waiting for %d threads to terminate\n", |
2462 | atomic_read(&kiblnd_data.kib_nthreads)); | |
d3caf4d5 PT |
2463 | set_current_state(TASK_UNINTERRUPTIBLE); |
2464 | schedule_timeout(cfs_time_seconds(1)); | |
d7e09d03 PT |
2465 | } |
2466 | ||
2467 | /* fall through */ | |
2468 | ||
2469 | case IBLND_INIT_NOTHING: | |
2470 | break; | |
2471 | } | |
2472 | ||
06ace26e | 2473 | if (kiblnd_data.kib_peers) { |
d7e09d03 PT |
2474 | LIBCFS_FREE(kiblnd_data.kib_peers, |
2475 | sizeof(struct list_head) * | |
2476 | kiblnd_data.kib_peer_hash_size); | |
2477 | } | |
2478 | ||
06ace26e | 2479 | if (kiblnd_data.kib_scheds) |
d7e09d03 PT |
2480 | cfs_percpt_free(kiblnd_data.kib_scheds); |
2481 | ||
d7e09d03 PT |
2482 | kiblnd_data.kib_init = IBLND_INIT_NOTHING; |
2483 | module_put(THIS_MODULE); | |
2484 | } | |
2485 | ||
439b4d45 | 2486 | static void kiblnd_shutdown(lnet_ni_t *ni) |
d7e09d03 | 2487 | { |
ec3d17c0 MS |
2488 | kib_net_t *net = ni->ni_data; |
2489 | rwlock_t *g_lock = &kiblnd_data.kib_global_lock; | |
2490 | int i; | |
2491 | unsigned long flags; | |
d7e09d03 PT |
2492 | |
2493 | LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL); | |
2494 | ||
06ace26e | 2495 | if (!net) |
d7e09d03 PT |
2496 | goto out; |
2497 | ||
d7e09d03 PT |
2498 | write_lock_irqsave(g_lock, flags); |
2499 | net->ibn_shutdown = 1; | |
2500 | write_unlock_irqrestore(g_lock, flags); | |
2501 | ||
2502 | switch (net->ibn_init) { | |
2503 | default: | |
2504 | LBUG(); | |
2505 | ||
2506 | case IBLND_INIT_ALL: | |
2507 | /* nuke all existing peers within this net */ | |
2508 | kiblnd_del_peer(ni, LNET_NID_ANY); | |
2509 | ||
2510 | /* Wait for all peer state to clean up */ | |
2511 | i = 2; | |
5fd88337 | 2512 | while (atomic_read(&net->ibn_npeers)) { |
d7e09d03 PT |
2513 | i++; |
2514 | CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */ | |
2515 | "%s: waiting for %d peers to disconnect\n", | |
2516 | libcfs_nid2str(ni->ni_nid), | |
2517 | atomic_read(&net->ibn_npeers)); | |
d3caf4d5 PT |
2518 | set_current_state(TASK_UNINTERRUPTIBLE); |
2519 | schedule_timeout(cfs_time_seconds(1)); | |
d7e09d03 PT |
2520 | } |
2521 | ||
2522 | kiblnd_net_fini_pools(net); | |
2523 | ||
2524 | write_lock_irqsave(g_lock, flags); | |
2525 | LASSERT(net->ibn_dev->ibd_nnets > 0); | |
2526 | net->ibn_dev->ibd_nnets--; | |
2527 | list_del(&net->ibn_list); | |
2528 | write_unlock_irqrestore(g_lock, flags); | |
2529 | ||
2530 | /* fall through */ | |
2531 | ||
2532 | case IBLND_INIT_NOTHING: | |
5fd88337 | 2533 | LASSERT(!atomic_read(&net->ibn_nconns)); |
d7e09d03 | 2534 | |
5fd88337 | 2535 | if (net->ibn_dev && !net->ibn_dev->ibd_nnets) |
d7e09d03 PT |
2536 | kiblnd_destroy_dev(net->ibn_dev); |
2537 | ||
2538 | break; | |
2539 | } | |
2540 | ||
d7e09d03 PT |
2541 | net->ibn_init = IBLND_INIT_NOTHING; |
2542 | ni->ni_data = NULL; | |
2543 | ||
2544 | LIBCFS_FREE(net, sizeof(*net)); | |
2545 | ||
2546 | out: | |
2547 | if (list_empty(&kiblnd_data.kib_devs)) | |
2548 | kiblnd_base_shutdown(); | |
d7e09d03 PT |
2549 | } |
2550 | ||
febe73bd | 2551 | static int kiblnd_base_startup(void) |
d7e09d03 | 2552 | { |
ec3d17c0 MS |
2553 | struct kib_sched_info *sched; |
2554 | int rc; | |
2555 | int i; | |
d7e09d03 | 2556 | |
febe73bd | 2557 | LASSERT(kiblnd_data.kib_init == IBLND_INIT_NOTHING); |
d7e09d03 PT |
2558 | |
2559 | try_module_get(THIS_MODULE); | |
7a3888a3 GM |
2560 | /* zero pointers, flags etc */ |
2561 | memset(&kiblnd_data, 0, sizeof(kiblnd_data)); | |
d7e09d03 PT |
2562 | |
2563 | rwlock_init(&kiblnd_data.kib_global_lock); | |
2564 | ||
2565 | INIT_LIST_HEAD(&kiblnd_data.kib_devs); | |
2566 | INIT_LIST_HEAD(&kiblnd_data.kib_failed_devs); | |
2567 | ||
2568 | kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE; | |
2569 | LIBCFS_ALLOC(kiblnd_data.kib_peers, | |
ec3d17c0 | 2570 | sizeof(struct list_head) * kiblnd_data.kib_peer_hash_size); |
06ace26e | 2571 | if (!kiblnd_data.kib_peers) |
d7e09d03 | 2572 | goto failed; |
d7e09d03 PT |
2573 | for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) |
2574 | INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]); | |
2575 | ||
2576 | spin_lock_init(&kiblnd_data.kib_connd_lock); | |
2577 | INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns); | |
2578 | INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies); | |
2579 | init_waitqueue_head(&kiblnd_data.kib_connd_waitq); | |
2580 | init_waitqueue_head(&kiblnd_data.kib_failover_waitq); | |
2581 | ||
2582 | kiblnd_data.kib_scheds = cfs_percpt_alloc(lnet_cpt_table(), | |
2583 | sizeof(*sched)); | |
06ace26e | 2584 | if (!kiblnd_data.kib_scheds) |
d7e09d03 PT |
2585 | goto failed; |
2586 | ||
2587 | cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) { | |
ec3d17c0 | 2588 | int nthrs; |
d7e09d03 PT |
2589 | |
2590 | spin_lock_init(&sched->ibs_lock); | |
2591 | INIT_LIST_HEAD(&sched->ibs_conns); | |
2592 | init_waitqueue_head(&sched->ibs_waitq); | |
2593 | ||
2594 | nthrs = cfs_cpt_weight(lnet_cpt_table(), i); | |
2595 | if (*kiblnd_tunables.kib_nscheds > 0) { | |
2596 | nthrs = min(nthrs, *kiblnd_tunables.kib_nscheds); | |
2597 | } else { | |
4420cfd3 JS |
2598 | /* |
2599 | * max to half of CPUs, another half is reserved for | |
2600 | * upper layer modules | |
2601 | */ | |
d7e09d03 PT |
2602 | nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs); |
2603 | } | |
2604 | ||
2605 | sched->ibs_nthreads_max = nthrs; | |
2606 | sched->ibs_cpt = i; | |
2607 | } | |
2608 | ||
2609 | kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR; | |
2610 | ||
2611 | /* lists/ptrs/locks initialised */ | |
2612 | kiblnd_data.kib_init = IBLND_INIT_DATA; | |
2613 | /*****************************************************/ | |
2614 | ||
2615 | rc = kiblnd_thread_start(kiblnd_connd, NULL, "kiblnd_connd"); | |
5fd88337 | 2616 | if (rc) { |
d7e09d03 PT |
2617 | CERROR("Can't spawn o2iblnd connd: %d\n", rc); |
2618 | goto failed; | |
2619 | } | |
2620 | ||
5fd88337 | 2621 | if (*kiblnd_tunables.kib_dev_failover) |
d7e09d03 PT |
2622 | rc = kiblnd_thread_start(kiblnd_failover_thread, NULL, |
2623 | "kiblnd_failover"); | |
2624 | ||
5fd88337 | 2625 | if (rc) { |
d7e09d03 PT |
2626 | CERROR("Can't spawn o2iblnd failover thread: %d\n", rc); |
2627 | goto failed; | |
2628 | } | |
2629 | ||
2630 | /* flag everything initialised */ | |
2631 | kiblnd_data.kib_init = IBLND_INIT_ALL; | |
2632 | /*****************************************************/ | |
2633 | ||
2634 | return 0; | |
2635 | ||
2636 | failed: | |
2637 | kiblnd_base_shutdown(); | |
2638 | return -ENETDOWN; | |
2639 | } | |
2640 | ||
febe73bd | 2641 | static int kiblnd_start_schedulers(struct kib_sched_info *sched) |
d7e09d03 | 2642 | { |
ec3d17c0 MS |
2643 | int rc = 0; |
2644 | int nthrs; | |
2645 | int i; | |
d7e09d03 | 2646 | |
5fd88337 | 2647 | if (!sched->ibs_nthreads) { |
d7e09d03 PT |
2648 | if (*kiblnd_tunables.kib_nscheds > 0) { |
2649 | nthrs = sched->ibs_nthreads_max; | |
2650 | } else { | |
2651 | nthrs = cfs_cpt_weight(lnet_cpt_table(), | |
2652 | sched->ibs_cpt); | |
2653 | nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs); | |
2654 | nthrs = min(IBLND_N_SCHED_HIGH, nthrs); | |
2655 | } | |
2656 | } else { | |
2657 | LASSERT(sched->ibs_nthreads <= sched->ibs_nthreads_max); | |
2658 | /* increase one thread if there is new interface */ | |
b6ee3824 | 2659 | nthrs = sched->ibs_nthreads < sched->ibs_nthreads_max; |
d7e09d03 PT |
2660 | } |
2661 | ||
2662 | for (i = 0; i < nthrs; i++) { | |
ec3d17c0 MS |
2663 | long id; |
2664 | char name[20]; | |
7a3888a3 | 2665 | |
d7e09d03 PT |
2666 | id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i); |
2667 | snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld", | |
2668 | KIB_THREAD_CPT(id), KIB_THREAD_TID(id)); | |
2669 | rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name); | |
5fd88337 | 2670 | if (!rc) |
d7e09d03 PT |
2671 | continue; |
2672 | ||
2673 | CERROR("Can't spawn thread %d for scheduler[%d]: %d\n", | |
2674 | sched->ibs_cpt, sched->ibs_nthreads + i, rc); | |
2675 | break; | |
2676 | } | |
2677 | ||
2678 | sched->ibs_nthreads += i; | |
2679 | return rc; | |
2680 | } | |
2681 | ||
7a3888a3 GM |
2682 | static int kiblnd_dev_start_threads(kib_dev_t *dev, int newdev, __u32 *cpts, |
2683 | int ncpts) | |
d7e09d03 | 2684 | { |
ec3d17c0 MS |
2685 | int cpt; |
2686 | int rc; | |
2687 | int i; | |
d7e09d03 PT |
2688 | |
2689 | for (i = 0; i < ncpts; i++) { | |
2690 | struct kib_sched_info *sched; | |
2691 | ||
06ace26e | 2692 | cpt = !cpts ? i : cpts[i]; |
d7e09d03 PT |
2693 | sched = kiblnd_data.kib_scheds[cpt]; |
2694 | ||
2695 | if (!newdev && sched->ibs_nthreads > 0) | |
2696 | continue; | |
2697 | ||
2698 | rc = kiblnd_start_schedulers(kiblnd_data.kib_scheds[cpt]); | |
5fd88337 | 2699 | if (rc) { |
d7e09d03 PT |
2700 | CERROR("Failed to start scheduler threads for %s\n", |
2701 | dev->ibd_ifname); | |
2702 | return rc; | |
2703 | } | |
2704 | } | |
2705 | return 0; | |
2706 | } | |
2707 | ||
febe73bd | 2708 | static kib_dev_t *kiblnd_dev_search(char *ifname) |
d7e09d03 | 2709 | { |
ec3d17c0 MS |
2710 | kib_dev_t *alias = NULL; |
2711 | kib_dev_t *dev; | |
2712 | char *colon; | |
2713 | char *colon2; | |
d7e09d03 PT |
2714 | |
2715 | colon = strchr(ifname, ':'); | |
2716 | list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) { | |
5fd88337 | 2717 | if (!strcmp(&dev->ibd_ifname[0], ifname)) |
d7e09d03 PT |
2718 | return dev; |
2719 | ||
06ace26e | 2720 | if (alias) |
d7e09d03 PT |
2721 | continue; |
2722 | ||
2723 | colon2 = strchr(dev->ibd_ifname, ':'); | |
06ace26e | 2724 | if (colon) |
d7e09d03 | 2725 | *colon = 0; |
06ace26e | 2726 | if (colon2) |
d7e09d03 PT |
2727 | *colon2 = 0; |
2728 | ||
5fd88337 | 2729 | if (!strcmp(&dev->ibd_ifname[0], ifname)) |
d7e09d03 PT |
2730 | alias = dev; |
2731 | ||
06ace26e | 2732 | if (colon) |
d7e09d03 | 2733 | *colon = ':'; |
06ace26e | 2734 | if (colon2) |
d7e09d03 PT |
2735 | *colon2 = ':'; |
2736 | } | |
2737 | return alias; | |
2738 | } | |
2739 | ||
439b4d45 | 2740 | static int kiblnd_startup(lnet_ni_t *ni) |
d7e09d03 | 2741 | { |
ec3d17c0 MS |
2742 | char *ifname; |
2743 | kib_dev_t *ibdev = NULL; | |
2744 | kib_net_t *net; | |
473c4e01 | 2745 | struct timespec64 tv; |
ec3d17c0 MS |
2746 | unsigned long flags; |
2747 | int rc; | |
2748 | int newdev; | |
d7e09d03 | 2749 | |
febe73bd | 2750 | LASSERT(ni->ni_lnd == &the_o2iblnd); |
d7e09d03 PT |
2751 | |
2752 | if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) { | |
2753 | rc = kiblnd_base_startup(); | |
5fd88337 | 2754 | if (rc) |
d7e09d03 PT |
2755 | return rc; |
2756 | } | |
2757 | ||
2758 | LIBCFS_ALLOC(net, sizeof(*net)); | |
2759 | ni->ni_data = net; | |
06ace26e | 2760 | if (!net) |
3247c4e5 | 2761 | goto net_failed; |
d7e09d03 | 2762 | |
473c4e01 AB |
2763 | ktime_get_real_ts64(&tv); |
2764 | net->ibn_incarnation = tv.tv_sec * USEC_PER_SEC + | |
2765 | tv.tv_nsec / NSEC_PER_USEC; | |
d7e09d03 PT |
2766 | |
2767 | ni->ni_peertimeout = *kiblnd_tunables.kib_peertimeout; | |
2768 | ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits; | |
2769 | ni->ni_peertxcredits = *kiblnd_tunables.kib_peertxcredits; | |
2770 | ni->ni_peerrtrcredits = *kiblnd_tunables.kib_peerrtrcredits; | |
2771 | ||
06ace26e | 2772 | if (ni->ni_interfaces[0]) { |
d7e09d03 PT |
2773 | /* Use the IPoIB interface specified in 'networks=' */ |
2774 | ||
febe73bd | 2775 | CLASSERT(LNET_MAX_INTERFACES > 1); |
06ace26e | 2776 | if (ni->ni_interfaces[1]) { |
d7e09d03 PT |
2777 | CERROR("Multiple interfaces not supported\n"); |
2778 | goto failed; | |
2779 | } | |
2780 | ||
2781 | ifname = ni->ni_interfaces[0]; | |
2782 | } else { | |
2783 | ifname = *kiblnd_tunables.kib_default_ipif; | |
2784 | } | |
2785 | ||
2786 | if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) { | |
2787 | CERROR("IPoIB interface name too long: %s\n", ifname); | |
2788 | goto failed; | |
2789 | } | |
2790 | ||
2791 | ibdev = kiblnd_dev_search(ifname); | |
2792 | ||
06ace26e | 2793 | newdev = !ibdev; |
d7e09d03 | 2794 | /* hmm...create kib_dev even for alias */ |
5fd88337 | 2795 | if (!ibdev || strcmp(&ibdev->ibd_ifname[0], ifname)) |
d7e09d03 PT |
2796 | ibdev = kiblnd_create_dev(ifname); |
2797 | ||
06ace26e | 2798 | if (!ibdev) |
d7e09d03 PT |
2799 | goto failed; |
2800 | ||
2801 | net->ibn_dev = ibdev; | |
2802 | ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip); | |
2803 | ||
2804 | rc = kiblnd_dev_start_threads(ibdev, newdev, | |
2805 | ni->ni_cpts, ni->ni_ncpts); | |
5fd88337 | 2806 | if (rc) |
d7e09d03 PT |
2807 | goto failed; |
2808 | ||
2809 | rc = kiblnd_net_init_pools(net, ni->ni_cpts, ni->ni_ncpts); | |
5fd88337 | 2810 | if (rc) { |
d7e09d03 PT |
2811 | CERROR("Failed to initialize NI pools: %d\n", rc); |
2812 | goto failed; | |
2813 | } | |
2814 | ||
2815 | write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); | |
2816 | ibdev->ibd_nnets++; | |
2817 | list_add_tail(&net->ibn_list, &ibdev->ibd_nets); | |
2818 | write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); | |
2819 | ||
2820 | net->ibn_init = IBLND_INIT_ALL; | |
2821 | ||
2822 | return 0; | |
2823 | ||
2824 | failed: | |
06ace26e | 2825 | if (!net->ibn_dev && ibdev) |
d7e09d03 PT |
2826 | kiblnd_destroy_dev(ibdev); |
2827 | ||
3247c4e5 | 2828 | net_failed: |
d7e09d03 PT |
2829 | kiblnd_shutdown(ni); |
2830 | ||
2831 | CDEBUG(D_NET, "kiblnd_startup failed\n"); | |
2832 | return -ENETDOWN; | |
2833 | } | |
2834 | ||
439b4d45 FZ |
2835 | static lnd_t the_o2iblnd = { |
2836 | .lnd_type = O2IBLND, | |
2837 | .lnd_startup = kiblnd_startup, | |
2838 | .lnd_shutdown = kiblnd_shutdown, | |
2839 | .lnd_ctl = kiblnd_ctl, | |
2840 | .lnd_query = kiblnd_query, | |
2841 | .lnd_send = kiblnd_send, | |
2842 | .lnd_recv = kiblnd_recv, | |
2843 | }; | |
2844 | ||
e0f94113 | 2845 | static void __exit ko2iblnd_exit(void) |
d7e09d03 PT |
2846 | { |
2847 | lnet_unregister_lnd(&the_o2iblnd); | |
d7e09d03 PT |
2848 | } |
2849 | ||
e0f94113 | 2850 | static int __init ko2iblnd_init(void) |
d7e09d03 | 2851 | { |
ec3d17c0 | 2852 | int rc; |
d7e09d03 | 2853 | |
febe73bd | 2854 | CLASSERT(sizeof(kib_msg_t) <= IBLND_MSG_SIZE); |
7a3888a3 | 2855 | CLASSERT(offsetof(kib_msg_t, |
c314c319 JS |
2856 | ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) |
2857 | <= IBLND_MSG_SIZE); | |
7a3888a3 | 2858 | CLASSERT(offsetof(kib_msg_t, |
c314c319 JS |
2859 | ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) |
2860 | <= IBLND_MSG_SIZE); | |
d7e09d03 PT |
2861 | |
2862 | rc = kiblnd_tunables_init(); | |
5fd88337 | 2863 | if (rc) |
d7e09d03 PT |
2864 | return rc; |
2865 | ||
2866 | lnet_register_lnd(&the_o2iblnd); | |
2867 | ||
2868 | return 0; | |
2869 | } | |
2870 | ||
a0455471 | 2871 | MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); |
57878e17 | 2872 | MODULE_DESCRIPTION("OpenIB gen2 LNet Network Driver"); |
5b0e50b9 | 2873 | MODULE_VERSION("2.7.0"); |
d7e09d03 PT |
2874 | MODULE_LICENSE("GPL"); |
2875 | ||
e0f94113 AD |
2876 | module_init(ko2iblnd_init); |
2877 | module_exit(ko2iblnd_exit); |