Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
4f3ca893 | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
d7e09d03 PT |
19 | * |
20 | * GPL HEADER END | |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | * | |
1dc563a6 | 26 | * Copyright (c) 2012, 2015, Intel Corporation. |
d7e09d03 PT |
27 | */ |
28 | /* | |
29 | * This file is part of Lustre, http://www.lustre.org/ | |
4f3ca893 | 30 | * Lustre is a trademark of Seagate, Inc. |
d7e09d03 PT |
31 | * |
32 | * lnet/include/lnet/lib-types.h | |
d7e09d03 PT |
33 | */ |
34 | ||
35 | #ifndef __LNET_LIB_TYPES_H__ | |
36 | #define __LNET_LIB_TYPES_H__ | |
37 | ||
db18b8e9 JS |
38 | #include <linux/kthread.h> |
39 | #include <linux/uio.h> | |
40 | #include <linux/types.h> | |
d7e09d03 | 41 | |
db18b8e9 | 42 | #include "types.h" |
d7e09d03 | 43 | |
db18b8e9 JS |
44 | /* Max payload size */ |
45 | #define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD | |
46 | #if (LNET_MAX_PAYLOAD < LNET_MTU) | |
47 | # error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb" | |
48 | #elif (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV)) | |
188acc61 | 49 | # error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb" |
db18b8e9 | 50 | #endif |
d7e09d03 PT |
51 | |
52 | /* forward refs */ | |
53 | struct lnet_libmd; | |
54 | ||
55 | typedef struct lnet_msg { | |
188acc61 JS |
56 | struct list_head msg_activelist; |
57 | struct list_head msg_list; /* Q for credits/MD */ | |
d7e09d03 | 58 | |
188acc61 | 59 | lnet_process_id_t msg_target; |
d7e09d03 PT |
60 | /* where is it from, it's only for building event */ |
61 | lnet_nid_t msg_from; | |
62 | __u32 msg_type; | |
63 | ||
253d50eb | 64 | /* committed for sending */ |
d7e09d03 PT |
65 | unsigned int msg_tx_committed:1; |
66 | /* CPT # this message committed for sending */ | |
67 | unsigned int msg_tx_cpt:15; | |
253d50eb | 68 | /* committed for receiving */ |
d7e09d03 PT |
69 | unsigned int msg_rx_committed:1; |
70 | /* CPT # this message committed for receiving */ | |
71 | unsigned int msg_rx_cpt:15; | |
72 | /* queued for tx credit */ | |
73 | unsigned int msg_tx_delayed:1; | |
74 | /* queued for RX buffer */ | |
75 | unsigned int msg_rx_delayed:1; | |
76 | /* ready for pending on RX delay list */ | |
77 | unsigned int msg_rx_ready_delay:1; | |
78 | ||
188acc61 JS |
79 | unsigned int msg_vmflush:1; /* VM trying to free memory */ |
80 | unsigned int msg_target_is_router:1; /* sending to a router */ | |
81 | unsigned int msg_routing:1; /* being forwarded */ | |
82 | unsigned int msg_ack:1; /* ack on finalize (PUT) */ | |
83 | unsigned int msg_sending:1; /* outgoing message */ | |
84 | unsigned int msg_receiving:1; /* being received */ | |
85 | unsigned int msg_txcredit:1; /* taken an NI send credit */ | |
86 | unsigned int msg_peertxcredit:1; /* taken a peer send credit */ | |
4420cfd3 | 87 | unsigned int msg_rtrcredit:1; /* taken a global router credit */ |
188acc61 JS |
88 | unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */ |
89 | unsigned int msg_onactivelist:1; /* on the activelist */ | |
90 | ||
91 | struct lnet_peer *msg_txpeer; /* peer I'm sending to */ | |
92 | struct lnet_peer *msg_rxpeer; /* peer I received from */ | |
93 | ||
94 | void *msg_private; | |
95 | struct lnet_libmd *msg_md; | |
96 | ||
97 | unsigned int msg_len; | |
98 | unsigned int msg_wanted; | |
99 | unsigned int msg_offset; | |
100 | unsigned int msg_niov; | |
101 | struct kvec *msg_iov; | |
102 | lnet_kiov_t *msg_kiov; | |
103 | ||
104 | lnet_event_t msg_ev; | |
105 | lnet_hdr_t msg_hdr; | |
d7e09d03 PT |
106 | } lnet_msg_t; |
107 | ||
d7e09d03 | 108 | typedef struct lnet_libhandle { |
188acc61 JS |
109 | struct list_head lh_hash_chain; |
110 | __u64 lh_cookie; | |
d7e09d03 PT |
111 | } lnet_libhandle_t; |
112 | ||
113 | #define lh_entry(ptr, type, member) \ | |
51078e25 | 114 | ((type *)((char *)(ptr) - (char *)(&((type *)0)->member))) |
d7e09d03 PT |
115 | |
116 | typedef struct lnet_eq { | |
188acc61 JS |
117 | struct list_head eq_list; |
118 | lnet_libhandle_t eq_lh; | |
119 | lnet_seq_t eq_enq_seq; | |
120 | lnet_seq_t eq_deq_seq; | |
121 | unsigned int eq_size; | |
122 | lnet_eq_handler_t eq_callback; | |
123 | lnet_event_t *eq_events; | |
d7e09d03 PT |
124 | int **eq_refs; /* percpt refcount for EQ */ |
125 | } lnet_eq_t; | |
126 | ||
127 | typedef struct lnet_me { | |
188acc61 JS |
128 | struct list_head me_list; |
129 | lnet_libhandle_t me_lh; | |
130 | lnet_process_id_t me_match_id; | |
131 | unsigned int me_portal; | |
132 | unsigned int me_pos; /* hash offset in mt_hash */ | |
133 | __u64 me_match_bits; | |
134 | __u64 me_ignore_bits; | |
135 | lnet_unlink_t me_unlink; | |
136 | struct lnet_libmd *me_md; | |
d7e09d03 PT |
137 | } lnet_me_t; |
138 | ||
139 | typedef struct lnet_libmd { | |
188acc61 JS |
140 | struct list_head md_list; |
141 | lnet_libhandle_t md_lh; | |
142 | lnet_me_t *md_me; | |
143 | char *md_start; | |
144 | unsigned int md_offset; | |
145 | unsigned int md_length; | |
146 | unsigned int md_max_size; | |
147 | int md_threshold; | |
148 | int md_refcount; | |
149 | unsigned int md_options; | |
150 | unsigned int md_flags; | |
151 | void *md_user_ptr; | |
152 | lnet_eq_t *md_eq; | |
153 | unsigned int md_niov; /* # frags */ | |
d7e09d03 | 154 | union { |
188acc61 JS |
155 | struct kvec iov[LNET_MAX_IOV]; |
156 | lnet_kiov_t kiov[LNET_MAX_IOV]; | |
d7e09d03 PT |
157 | } md_iov; |
158 | } lnet_libmd_t; | |
159 | ||
188acc61 JS |
160 | #define LNET_MD_FLAG_ZOMBIE (1 << 0) |
161 | #define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) | |
162 | #define LNET_MD_FLAG_ABORTED (1 << 2) | |
d7e09d03 | 163 | |
d7e09d03 PT |
164 | typedef struct { |
165 | /* info about peers we are trying to fail */ | |
188acc61 JS |
166 | struct list_head tp_list; /* ln_test_peers */ |
167 | lnet_nid_t tp_nid; /* matching nid */ | |
168 | unsigned int tp_threshold; /* # failures to simulate */ | |
d7e09d03 PT |
169 | } lnet_test_peer_t; |
170 | ||
188acc61 JS |
171 | #define LNET_COOKIE_TYPE_MD 1 |
172 | #define LNET_COOKIE_TYPE_ME 2 | |
173 | #define LNET_COOKIE_TYPE_EQ 3 | |
174 | #define LNET_COOKIE_TYPE_BITS 2 | |
d7e09d03 PT |
175 | #define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL) |
176 | ||
188acc61 | 177 | struct lnet_ni; /* forward ref */ |
d7e09d03 | 178 | |
3b77f472 | 179 | typedef struct lnet_lnd { |
d7e09d03 | 180 | /* fields managed by portals */ |
188acc61 JS |
181 | struct list_head lnd_list; /* stash in the LND table */ |
182 | int lnd_refcount; /* # active instances */ | |
d7e09d03 PT |
183 | |
184 | /* fields initialised by the LND */ | |
db18b8e9 | 185 | __u32 lnd_type; |
d7e09d03 | 186 | |
b11866b3 AO |
187 | int (*lnd_startup)(struct lnet_ni *ni); |
188 | void (*lnd_shutdown)(struct lnet_ni *ni); | |
d7e09d03 PT |
189 | int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); |
190 | ||
4420cfd3 JS |
191 | /* |
192 | * In data movement APIs below, payload buffers are described as a set | |
d7e09d03 PT |
193 | * of 'niov' fragments which are... |
194 | * EITHER | |
195 | * in virtual memory (struct iovec *iov != NULL) | |
196 | * OR | |
197 | * in pages (kernel only: plt_kiov_t *kiov != NULL). | |
198 | * The LND may NOT overwrite these fragment descriptors. | |
199 | * An 'offset' and may specify a byte offset within the set of | |
200 | * fragments to start from | |
201 | */ | |
202 | ||
4420cfd3 JS |
203 | /* |
204 | * Start sending a preformatted message. 'private' is NULL for PUT and | |
d7e09d03 PT |
205 | * GET messages; otherwise this is a response to an incoming message |
206 | * and 'private' is the 'private' passed to lnet_parse(). Return | |
207 | * non-zero for immediate failure, otherwise complete later with | |
4420cfd3 JS |
208 | * lnet_finalize() |
209 | */ | |
d7e09d03 PT |
210 | int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg); |
211 | ||
4420cfd3 JS |
212 | /* |
213 | * Start receiving 'mlen' bytes of payload data, skipping the following | |
d7e09d03 | 214 | * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to |
d766b4b5 | 215 | * lnet_parse(). Return non-zero for immediate failure, otherwise |
d7e09d03 | 216 | * complete later with lnet_finalize(). This also gives back a receive |
4420cfd3 JS |
217 | * credit if the LND does flow control. |
218 | */ | |
d7e09d03 PT |
219 | int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, |
220 | int delayed, unsigned int niov, | |
f351bad2 | 221 | struct kvec *iov, lnet_kiov_t *kiov, |
188acc61 JS |
222 | unsigned int offset, unsigned int mlen, |
223 | unsigned int rlen); | |
d7e09d03 | 224 | |
4420cfd3 JS |
225 | /* |
226 | * lnet_parse() has had to delay processing of this message | |
d7e09d03 PT |
227 | * (e.g. waiting for a forwarding buffer or send credits). Give the |
228 | * LND a chance to free urgently needed resources. If called, return 0 | |
229 | * for success and do NOT give back a receive credit; that has to wait | |
230 | * until lnd_recv() gets called. On failure return < 0 and | |
4420cfd3 JS |
231 | * release resources; lnd_recv() will not be called. |
232 | */ | |
188acc61 JS |
233 | int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, |
234 | lnet_msg_t *msg, void **new_privatep); | |
d7e09d03 PT |
235 | |
236 | /* notification of peer health */ | |
237 | void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); | |
238 | ||
239 | /* query of peer aliveness */ | |
188acc61 JS |
240 | void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, |
241 | unsigned long *when); | |
d7e09d03 PT |
242 | |
243 | /* accept a new connection */ | |
e327dc88 | 244 | int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock); |
d7e09d03 PT |
245 | } lnd_t; |
246 | ||
d7e09d03 PT |
247 | struct lnet_tx_queue { |
248 | int tq_credits; /* # tx credits free */ | |
249 | int tq_credits_min; /* lowest it's been */ | |
250 | int tq_credits_max; /* total # tx credits */ | |
188acc61 | 251 | struct list_head tq_delayed; /* delayed TXs */ |
d7e09d03 PT |
252 | }; |
253 | ||
d7e09d03 | 254 | typedef struct lnet_ni { |
188acc61 JS |
255 | spinlock_t ni_lock; |
256 | struct list_head ni_list; /* chain on ln_nis */ | |
257 | struct list_head ni_cptlist; /* chain on ln_nis_cpt */ | |
258 | int ni_maxtxcredits; /* # tx credits */ | |
d7e09d03 | 259 | /* # per-peer send credits */ |
188acc61 | 260 | int ni_peertxcredits; |
d7e09d03 | 261 | /* # per-peer router buffer credits */ |
188acc61 | 262 | int ni_peerrtrcredits; |
d7e09d03 | 263 | /* seconds to consider peer dead */ |
188acc61 JS |
264 | int ni_peertimeout; |
265 | int ni_ncpts; /* number of CPTs */ | |
266 | __u32 *ni_cpts; /* bond NI on some CPTs */ | |
267 | lnet_nid_t ni_nid; /* interface's NID */ | |
268 | void *ni_data; /* instance-specific data */ | |
269 | lnd_t *ni_lnd; /* procedural interface */ | |
d7e09d03 PT |
270 | struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */ |
271 | int **ni_refs; /* percpt reference count */ | |
ec0067d1 | 272 | time64_t ni_last_alive;/* when I was last alive */ |
188acc61 | 273 | lnet_ni_status_t *ni_status; /* my health status */ |
d7e09d03 | 274 | /* equivalent interfaces to use */ |
188acc61 | 275 | char *ni_interfaces[LNET_MAX_INTERFACES]; |
d7e09d03 PT |
276 | } lnet_ni_t; |
277 | ||
278 | #define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL | |
279 | ||
4420cfd3 JS |
280 | /* |
281 | * NB: value of these features equal to LNET_PROTO_PING_VERSION_x | |
282 | * of old LNet, so there shouldn't be any compatibility issue | |
283 | */ | |
d7e09d03 PT |
284 | #define LNET_PING_FEAT_INVAL (0) /* no feature */ |
285 | #define LNET_PING_FEAT_BASE (1 << 0) /* just a ping */ | |
286 | #define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */ | |
86ef6250 | 287 | #define LNET_PING_FEAT_RTE_DISABLED (1 << 2) /* Routing enabled */ |
d7e09d03 PT |
288 | |
289 | #define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \ | |
290 | LNET_PING_FEAT_NI_STATUS) | |
291 | ||
d7e09d03 PT |
292 | /* router checker data, per router */ |
293 | #define LNET_MAX_RTR_NIS 16 | |
294 | #define LNET_PINGINFO_SIZE offsetof(lnet_ping_info_t, pi_ni[LNET_MAX_RTR_NIS]) | |
295 | typedef struct { | |
296 | /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ | |
188acc61 JS |
297 | struct list_head rcd_list; |
298 | lnet_handle_md_t rcd_mdh; /* ping buffer MD */ | |
d7e09d03 PT |
299 | struct lnet_peer *rcd_gateway; /* reference to gateway */ |
300 | lnet_ping_info_t *rcd_pinginfo; /* ping buffer */ | |
301 | } lnet_rc_data_t; | |
302 | ||
303 | typedef struct lnet_peer { | |
188acc61 JS |
304 | struct list_head lp_hashlist; /* chain on peer hash */ |
305 | struct list_head lp_txq; /* messages blocking for | |
306 | tx credits */ | |
307 | struct list_head lp_rtrq; /* messages blocking for | |
308 | router credits */ | |
309 | struct list_head lp_rtr_list; /* chain on router list */ | |
310 | int lp_txcredits; /* # tx credits available */ | |
311 | int lp_mintxcredits; /* low water mark */ | |
312 | int lp_rtrcredits; /* # router credits */ | |
313 | int lp_minrtrcredits; /* low water mark */ | |
314 | unsigned int lp_alive:1; /* alive/dead? */ | |
315 | unsigned int lp_notify:1; /* notification outstanding? */ | |
316 | unsigned int lp_notifylnd:1;/* outstanding notification | |
317 | for LND? */ | |
318 | unsigned int lp_notifying:1; /* some thread is handling | |
319 | notification */ | |
320 | unsigned int lp_ping_notsent;/* SEND event outstanding | |
321 | from ping */ | |
322 | int lp_alive_count; /* # times router went | |
323 | dead<->alive */ | |
324 | long lp_txqnob; /* bytes queued for sending */ | |
325 | unsigned long lp_timestamp; /* time of last aliveness | |
326 | news */ | |
327 | unsigned long lp_ping_timestamp;/* time of last ping | |
328 | attempt */ | |
329 | unsigned long lp_ping_deadline; /* != 0 if ping reply | |
330 | expected */ | |
331 | unsigned long lp_last_alive; /* when I was last alive */ | |
332 | unsigned long lp_last_query; /* when lp_ni was queried | |
333 | last time */ | |
334 | lnet_ni_t *lp_ni; /* interface peer is on */ | |
335 | lnet_nid_t lp_nid; /* peer's NID */ | |
336 | int lp_refcount; /* # refs */ | |
337 | int lp_cpt; /* CPT this peer attached on */ | |
d7e09d03 | 338 | /* # refs from lnet_route_t::lr_gateway */ |
188acc61 | 339 | int lp_rtr_refcount; |
d7e09d03 | 340 | /* returned RC ping features */ |
188acc61 JS |
341 | unsigned int lp_ping_feats; |
342 | struct list_head lp_routes; /* routers on this peer */ | |
d7e09d03 PT |
343 | lnet_rc_data_t *lp_rcd; /* router checker state */ |
344 | } lnet_peer_t; | |
345 | ||
d7e09d03 | 346 | /* peer hash size */ |
188acc61 JS |
347 | #define LNET_PEER_HASH_BITS 9 |
348 | #define LNET_PEER_HASH_SIZE (1 << LNET_PEER_HASH_BITS) | |
d7e09d03 PT |
349 | |
350 | /* peer hash table */ | |
351 | struct lnet_peer_table { | |
188acc61 JS |
352 | int pt_version; /* /proc validity stamp */ |
353 | int pt_number; /* # peers extant */ | |
21602c7d AS |
354 | /* # zombies to go to deathrow (and not there yet) */ |
355 | int pt_zombies; | |
188acc61 JS |
356 | struct list_head pt_deathrow; /* zombie peers */ |
357 | struct list_head *pt_hash; /* NID->peer hash */ | |
d7e09d03 PT |
358 | }; |
359 | ||
4420cfd3 JS |
360 | /* |
361 | * peer aliveness is enabled only on routers for peers in a network where the | |
362 | * lnet_ni_t::ni_peertimeout has been set to a positive value | |
363 | */ | |
5fd88337 | 364 | #define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing && \ |
d7e09d03 PT |
365 | (lp)->lp_ni->ni_peertimeout > 0) |
366 | ||
367 | typedef struct { | |
188acc61 JS |
368 | struct list_head lr_list; /* chain on net */ |
369 | struct list_head lr_gwlist; /* chain on gateway */ | |
d7e09d03 | 370 | lnet_peer_t *lr_gateway; /* router node */ |
188acc61 JS |
371 | __u32 lr_net; /* remote network number */ |
372 | int lr_seq; /* sequence for round-robin */ | |
373 | unsigned int lr_downis; /* number of down NIs */ | |
374 | unsigned int lr_hops; /* how far I am */ | |
375 | unsigned int lr_priority; /* route priority */ | |
d7e09d03 PT |
376 | } lnet_route_t; |
377 | ||
378 | #define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7) | |
379 | #define LNET_REMOTE_NETS_HASH_MAX (1U << 16) | |
380 | #define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits) | |
381 | ||
382 | typedef struct { | |
188acc61 JS |
383 | struct list_head lrn_list; /* chain on |
384 | ln_remote_nets_hash */ | |
385 | struct list_head lrn_routes; /* routes to me */ | |
386 | __u32 lrn_net; /* my net number */ | |
d7e09d03 PT |
387 | } lnet_remotenet_t; |
388 | ||
db18b8e9 JS |
389 | /** lnet message has credit and can be submitted to lnd for send/receive */ |
390 | #define LNET_CREDIT_OK 0 | |
391 | /** lnet message is waiting for credit */ | |
392 | #define LNET_CREDIT_WAIT 1 | |
393 | ||
d7e09d03 | 394 | typedef struct { |
188acc61 JS |
395 | struct list_head rbp_bufs; /* my free buffer pool */ |
396 | struct list_head rbp_msgs; /* messages blocking | |
397 | for a buffer */ | |
398 | int rbp_npages; /* # pages in each buffer */ | |
95fc2938 AS |
399 | /* requested number of buffers */ |
400 | int rbp_req_nbuffers; | |
401 | /* # buffers actually allocated */ | |
402 | int rbp_nbuffers; | |
188acc61 JS |
403 | int rbp_credits; /* # free buffers / |
404 | blocked messages */ | |
405 | int rbp_mincredits; /* low water mark */ | |
d7e09d03 PT |
406 | } lnet_rtrbufpool_t; |
407 | ||
408 | typedef struct { | |
188acc61 JS |
409 | struct list_head rb_list; /* chain on rbp_bufs */ |
410 | lnet_rtrbufpool_t *rb_pool; /* owning pool */ | |
411 | lnet_kiov_t rb_kiov[0]; /* the buffer space */ | |
d7e09d03 PT |
412 | } lnet_rtrbuf_t; |
413 | ||
188acc61 | 414 | #define LNET_PEER_HASHSIZE 503 /* prime! */ |
d7e09d03 | 415 | |
86ef6250 AS |
416 | #define LNET_TINY_BUF_IDX 0 |
417 | #define LNET_SMALL_BUF_IDX 1 | |
418 | #define LNET_LARGE_BUF_IDX 2 | |
419 | ||
420 | /* # different router buffer pools */ | |
421 | #define LNET_NRBPOOLS (LNET_LARGE_BUF_IDX + 1) | |
d7e09d03 PT |
422 | |
423 | enum { | |
424 | /* Didn't match anything */ | |
425 | LNET_MATCHMD_NONE = (1 << 0), | |
426 | /* Matched OK */ | |
427 | LNET_MATCHMD_OK = (1 << 1), | |
428 | /* Must be discarded */ | |
429 | LNET_MATCHMD_DROP = (1 << 2), | |
430 | /* match and buffer is exhausted */ | |
188acc61 | 431 | LNET_MATCHMD_EXHAUSTED = (1 << 3), |
d7e09d03 | 432 | /* match or drop */ |
188acc61 | 433 | LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP), |
d7e09d03 PT |
434 | }; |
435 | ||
436 | /* Options for lnet_portal_t::ptl_options */ | |
188acc61 JS |
437 | #define LNET_PTL_LAZY (1 << 0) |
438 | #define LNET_PTL_MATCH_UNIQUE (1 << 1) /* unique match, for RDMA */ | |
439 | #define LNET_PTL_MATCH_WILDCARD (1 << 2) /* wildcard match, | |
440 | request portal */ | |
d7e09d03 PT |
441 | |
442 | /* parameter for matching operations (GET, PUT) */ | |
443 | struct lnet_match_info { | |
444 | __u64 mi_mbits; | |
445 | lnet_process_id_t mi_id; | |
446 | unsigned int mi_opc; | |
447 | unsigned int mi_portal; | |
448 | unsigned int mi_rlength; | |
449 | unsigned int mi_roffset; | |
450 | }; | |
451 | ||
452 | /* ME hash of RDMA portal */ | |
453 | #define LNET_MT_HASH_BITS 8 | |
454 | #define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS) | |
455 | #define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1) | |
4420cfd3 JS |
456 | /* |
457 | * we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash, | |
458 | * the last entry is reserved for MEs with ignore-bits | |
459 | */ | |
d7e09d03 | 460 | #define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE |
4420cfd3 JS |
461 | /* |
462 | * __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which | |
d7e09d03 | 463 | * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the |
4420cfd3 JS |
464 | * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] |
465 | */ | |
d7e09d03 PT |
466 | #define LNET_MT_BITS_U64 6 /* 2^6 bits */ |
467 | #define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64) | |
468 | #define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1) | |
469 | ||
470 | /* portal match table */ | |
471 | struct lnet_match_table { | |
472 | /* reserved for upcoming patches, CPU partition ID */ | |
188acc61 JS |
473 | unsigned int mt_cpt; |
474 | unsigned int mt_portal; /* portal index */ | |
4420cfd3 JS |
475 | /* |
476 | * match table is set as "enabled" if there's non-exhausted MD | |
477 | * attached on mt_mhash, it's only valid for wildcard portal | |
478 | */ | |
188acc61 | 479 | unsigned int mt_enabled; |
d7e09d03 | 480 | /* bitmap to flag whether MEs on mt_hash are exhausted or not */ |
188acc61 JS |
481 | __u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP]; |
482 | struct list_head *mt_mhash; /* matching hash */ | |
d7e09d03 PT |
483 | }; |
484 | ||
485 | /* these are only useful for wildcard portal */ | |
486 | /* Turn off message rotor for wildcard portals */ | |
487 | #define LNET_PTL_ROTOR_OFF 0 | |
488 | /* round-robin dispatch all PUT messages for wildcard portals */ | |
489 | #define LNET_PTL_ROTOR_ON 1 | |
490 | /* round-robin dispatch routed PUT message for wildcard portals */ | |
491 | #define LNET_PTL_ROTOR_RR_RT 2 | |
492 | /* dispatch routed PUT message by hashing source NID for wildcard portals */ | |
493 | #define LNET_PTL_ROTOR_HASH_RT 3 | |
494 | ||
495 | typedef struct lnet_portal { | |
188acc61 JS |
496 | spinlock_t ptl_lock; |
497 | unsigned int ptl_index; /* portal ID, reserved */ | |
d7e09d03 | 498 | /* flags on this portal: lazy, unique... */ |
188acc61 | 499 | unsigned int ptl_options; |
2b284326 | 500 | /* list of messages which are stealing buffer */ |
188acc61 | 501 | struct list_head ptl_msg_stealing; |
d7e09d03 | 502 | /* messages blocking for MD */ |
188acc61 | 503 | struct list_head ptl_msg_delayed; |
d7e09d03 PT |
504 | /* Match table for each CPT */ |
505 | struct lnet_match_table **ptl_mtables; | |
506 | /* spread rotor of incoming "PUT" */ | |
188acc61 | 507 | unsigned int ptl_rotor; |
d7e09d03 | 508 | /* # active entries for this portal */ |
188acc61 | 509 | int ptl_mt_nmaps; |
d7e09d03 | 510 | /* array of active entries' cpu-partition-id */ |
188acc61 | 511 | int ptl_mt_maps[0]; |
d7e09d03 PT |
512 | } lnet_portal_t; |
513 | ||
514 | #define LNET_LH_HASH_BITS 12 | |
515 | #define LNET_LH_HASH_SIZE (1ULL << LNET_LH_HASH_BITS) | |
516 | #define LNET_LH_HASH_MASK (LNET_LH_HASH_SIZE - 1) | |
517 | ||
518 | /* resource container (ME, MD, EQ) */ | |
519 | struct lnet_res_container { | |
188acc61 JS |
520 | unsigned int rec_type; /* container type */ |
521 | __u64 rec_lh_cookie; /* cookie generator */ | |
522 | struct list_head rec_active; /* active resource list */ | |
523 | struct list_head *rec_lh_hash; /* handle hash */ | |
d7e09d03 PT |
524 | }; |
525 | ||
526 | /* message container */ | |
527 | struct lnet_msg_container { | |
188acc61 | 528 | int msc_init; /* initialized or not */ |
d7e09d03 | 529 | /* max # threads finalizing */ |
188acc61 | 530 | int msc_nfinalizers; |
d7e09d03 | 531 | /* msgs waiting to complete finalizing */ |
188acc61 JS |
532 | struct list_head msc_finalizing; |
533 | struct list_head msc_active; /* active message list */ | |
d7e09d03 PT |
534 | /* threads doing finalization */ |
535 | void **msc_finalizers; | |
d7e09d03 PT |
536 | }; |
537 | ||
538 | /* Router Checker states */ | |
539 | #define LNET_RC_STATE_SHUTDOWN 0 /* not started */ | |
540 | #define LNET_RC_STATE_RUNNING 1 /* started up OK */ | |
541 | #define LNET_RC_STATE_STOPPING 2 /* telling thread to stop */ | |
542 | ||
3b77f472 | 543 | typedef struct { |
d7e09d03 | 544 | /* CPU partition table of LNet */ |
188acc61 | 545 | struct cfs_cpt_table *ln_cpt_table; |
d7e09d03 | 546 | /* number of CPTs in ln_cpt_table */ |
188acc61 JS |
547 | unsigned int ln_cpt_number; |
548 | unsigned int ln_cpt_bits; | |
d7e09d03 PT |
549 | |
550 | /* protect LNet resources (ME/MD/EQ) */ | |
188acc61 | 551 | struct cfs_percpt_lock *ln_res_lock; |
d7e09d03 | 552 | /* # portals */ |
188acc61 | 553 | int ln_nportals; |
d7e09d03 PT |
554 | /* the vector of portals */ |
555 | lnet_portal_t **ln_portals; | |
556 | /* percpt ME containers */ | |
557 | struct lnet_res_container **ln_me_containers; | |
558 | /* percpt MD container */ | |
559 | struct lnet_res_container **ln_md_containers; | |
560 | ||
561 | /* Event Queue container */ | |
188acc61 JS |
562 | struct lnet_res_container ln_eq_container; |
563 | wait_queue_head_t ln_eq_waitq; | |
564 | spinlock_t ln_eq_wait_lock; | |
565 | unsigned int ln_remote_nets_hbits; | |
d7e09d03 PT |
566 | |
567 | /* protect NI, peer table, credits, routers, rtrbuf... */ | |
188acc61 | 568 | struct cfs_percpt_lock *ln_net_lock; |
d7e09d03 PT |
569 | /* percpt message containers for active/finalizing/freed message */ |
570 | struct lnet_msg_container **ln_msg_containers; | |
571 | lnet_counters_t **ln_counters; | |
572 | struct lnet_peer_table **ln_peer_tables; | |
573 | /* failure simulation */ | |
188acc61 | 574 | struct list_head ln_test_peers; |
d7e09d03 | 575 | |
188acc61 | 576 | struct list_head ln_nis; /* LND instances */ |
d7e09d03 | 577 | /* NIs bond on specific CPT(s) */ |
188acc61 | 578 | struct list_head ln_nis_cpt; |
d7e09d03 | 579 | /* dying LND instances */ |
188acc61 JS |
580 | struct list_head ln_nis_zombie; |
581 | lnet_ni_t *ln_loni; /* the loopback NI */ | |
d7e09d03 PT |
582 | |
583 | /* remote networks with routes to them */ | |
188acc61 | 584 | struct list_head *ln_remote_nets_hash; |
d7e09d03 | 585 | /* validity stamp */ |
188acc61 | 586 | __u64 ln_remote_nets_version; |
d7e09d03 | 587 | /* list of all known routers */ |
188acc61 | 588 | struct list_head ln_routers; |
d7e09d03 | 589 | /* validity stamp */ |
188acc61 | 590 | __u64 ln_routers_version; |
d7e09d03 PT |
591 | /* percpt router buffer pools */ |
592 | lnet_rtrbufpool_t **ln_rtrpools; | |
593 | ||
188acc61 JS |
594 | lnet_handle_md_t ln_ping_target_md; |
595 | lnet_handle_eq_t ln_ping_target_eq; | |
596 | lnet_ping_info_t *ln_ping_info; | |
d7e09d03 PT |
597 | |
598 | /* router checker startup/shutdown state */ | |
188acc61 | 599 | int ln_rc_state; |
d7e09d03 | 600 | /* router checker's event queue */ |
188acc61 | 601 | lnet_handle_eq_t ln_rc_eqh; |
d7e09d03 | 602 | /* rcd still pending on net */ |
188acc61 | 603 | struct list_head ln_rcd_deathrow; |
d7e09d03 | 604 | /* rcd ready for free */ |
188acc61 | 605 | struct list_head ln_rcd_zombie; |
d7e09d03 | 606 | /* serialise startup/shutdown */ |
188acc61 | 607 | struct semaphore ln_rc_signal; |
d7e09d03 | 608 | |
188acc61 JS |
609 | struct mutex ln_api_mutex; |
610 | struct mutex ln_lnd_mutex; | |
d7e09d03 | 611 | /* Have I called LNetNIInit myself? */ |
188acc61 | 612 | int ln_niinit_self; |
d7e09d03 | 613 | /* LNetNIInit/LNetNIFini counter */ |
188acc61 | 614 | int ln_refcount; |
d7e09d03 | 615 | /* shutdown in progress */ |
188acc61 | 616 | int ln_shutdown; |
d7e09d03 | 617 | |
188acc61 JS |
618 | int ln_routing; /* am I a router? */ |
619 | lnet_pid_t ln_pid; /* requested pid */ | |
d7e09d03 | 620 | /* uniquely identifies this ni in this epoch */ |
188acc61 | 621 | __u64 ln_interface_cookie; |
d7e09d03 | 622 | /* registered LNDs */ |
188acc61 | 623 | struct list_head ln_lnds; |
d7e09d03 | 624 | |
d7e09d03 | 625 | /* test protocol compatibility flags */ |
188acc61 | 626 | int ln_testprotocompat; |
d7e09d03 | 627 | |
edeb5d8c AS |
628 | /* |
629 | * 0 - load the NIs from the mod params | |
630 | * 1 - do not load the NIs from the mod params | |
631 | * Reverse logic to ensure that other calls to LNetNIInit | |
632 | * need no change | |
633 | */ | |
634 | bool ln_nis_from_mod_params; | |
635 | ||
7f8b70e0 AS |
636 | /* |
637 | * waitq for router checker. As long as there are no routes in | |
638 | * the list, the router checker will sleep on this queue. when | |
639 | * routes are added the thread will wake up | |
640 | */ | |
641 | wait_queue_head_t ln_rc_waitq; | |
642 | ||
d7e09d03 PT |
643 | } lnet_t; |
644 | ||
645 | #endif |