Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
4f3ca893 | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
d7e09d03 PT |
19 | * |
20 | * GPL HEADER END | |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | * | |
1dc563a6 | 26 | * Copyright (c) 2012, 2015, Intel Corporation. |
d7e09d03 PT |
27 | */ |
28 | /* | |
29 | * This file is part of Lustre, http://www.lustre.org/ | |
4f3ca893 | 30 | * Lustre is a trademark of Seagate, Inc. |
d7e09d03 PT |
31 | * |
32 | * lnet/include/lnet/lib-types.h | |
d7e09d03 PT |
33 | */ |
34 | ||
35 | #ifndef __LNET_LIB_TYPES_H__ | |
36 | #define __LNET_LIB_TYPES_H__ | |
37 | ||
db18b8e9 JS |
38 | #include <linux/kthread.h> |
39 | #include <linux/uio.h> | |
40 | #include <linux/types.h> | |
41 | #include <net/sock.h> | |
d7e09d03 | 42 | |
db18b8e9 | 43 | #include "types.h" |
d7e09d03 | 44 | |
db18b8e9 JS |
45 | /* Max payload size */ |
46 | #define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD | |
47 | #if (LNET_MAX_PAYLOAD < LNET_MTU) | |
48 | # error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb" | |
49 | #elif (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV)) | |
188acc61 | 50 | # error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb" |
db18b8e9 | 51 | #endif |
d7e09d03 PT |
52 | |
53 | /* forward refs */ | |
54 | struct lnet_libmd; | |
55 | ||
56 | typedef struct lnet_msg { | |
188acc61 JS |
57 | struct list_head msg_activelist; |
58 | struct list_head msg_list; /* Q for credits/MD */ | |
d7e09d03 | 59 | |
188acc61 | 60 | lnet_process_id_t msg_target; |
d7e09d03 PT |
61 | /* where is it from, it's only for building event */ |
62 | lnet_nid_t msg_from; | |
63 | __u32 msg_type; | |
64 | ||
253d50eb | 65 | /* committed for sending */ |
d7e09d03 PT |
66 | unsigned int msg_tx_committed:1; |
67 | /* CPT # this message committed for sending */ | |
68 | unsigned int msg_tx_cpt:15; | |
253d50eb | 69 | /* committed for receiving */ |
d7e09d03 PT |
70 | unsigned int msg_rx_committed:1; |
71 | /* CPT # this message committed for receiving */ | |
72 | unsigned int msg_rx_cpt:15; | |
73 | /* queued for tx credit */ | |
74 | unsigned int msg_tx_delayed:1; | |
75 | /* queued for RX buffer */ | |
76 | unsigned int msg_rx_delayed:1; | |
77 | /* ready for pending on RX delay list */ | |
78 | unsigned int msg_rx_ready_delay:1; | |
79 | ||
188acc61 JS |
80 | unsigned int msg_vmflush:1; /* VM trying to free memory */ |
81 | unsigned int msg_target_is_router:1; /* sending to a router */ | |
82 | unsigned int msg_routing:1; /* being forwarded */ | |
83 | unsigned int msg_ack:1; /* ack on finalize (PUT) */ | |
84 | unsigned int msg_sending:1; /* outgoing message */ | |
85 | unsigned int msg_receiving:1; /* being received */ | |
86 | unsigned int msg_txcredit:1; /* taken an NI send credit */ | |
87 | unsigned int msg_peertxcredit:1; /* taken a peer send credit */ | |
4420cfd3 | 88 | unsigned int msg_rtrcredit:1; /* taken a global router credit */ |
188acc61 JS |
89 | unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */ |
90 | unsigned int msg_onactivelist:1; /* on the activelist */ | |
91 | ||
92 | struct lnet_peer *msg_txpeer; /* peer I'm sending to */ | |
93 | struct lnet_peer *msg_rxpeer; /* peer I received from */ | |
94 | ||
95 | void *msg_private; | |
96 | struct lnet_libmd *msg_md; | |
97 | ||
98 | unsigned int msg_len; | |
99 | unsigned int msg_wanted; | |
100 | unsigned int msg_offset; | |
101 | unsigned int msg_niov; | |
102 | struct kvec *msg_iov; | |
103 | lnet_kiov_t *msg_kiov; | |
104 | ||
105 | lnet_event_t msg_ev; | |
106 | lnet_hdr_t msg_hdr; | |
d7e09d03 PT |
107 | } lnet_msg_t; |
108 | ||
d7e09d03 | 109 | typedef struct lnet_libhandle { |
188acc61 JS |
110 | struct list_head lh_hash_chain; |
111 | __u64 lh_cookie; | |
d7e09d03 PT |
112 | } lnet_libhandle_t; |
113 | ||
114 | #define lh_entry(ptr, type, member) \ | |
51078e25 | 115 | ((type *)((char *)(ptr) - (char *)(&((type *)0)->member))) |
d7e09d03 PT |
116 | |
117 | typedef struct lnet_eq { | |
188acc61 JS |
118 | struct list_head eq_list; |
119 | lnet_libhandle_t eq_lh; | |
120 | lnet_seq_t eq_enq_seq; | |
121 | lnet_seq_t eq_deq_seq; | |
122 | unsigned int eq_size; | |
123 | lnet_eq_handler_t eq_callback; | |
124 | lnet_event_t *eq_events; | |
d7e09d03 PT |
125 | int **eq_refs; /* percpt refcount for EQ */ |
126 | } lnet_eq_t; | |
127 | ||
128 | typedef struct lnet_me { | |
188acc61 JS |
129 | struct list_head me_list; |
130 | lnet_libhandle_t me_lh; | |
131 | lnet_process_id_t me_match_id; | |
132 | unsigned int me_portal; | |
133 | unsigned int me_pos; /* hash offset in mt_hash */ | |
134 | __u64 me_match_bits; | |
135 | __u64 me_ignore_bits; | |
136 | lnet_unlink_t me_unlink; | |
137 | struct lnet_libmd *me_md; | |
d7e09d03 PT |
138 | } lnet_me_t; |
139 | ||
140 | typedef struct lnet_libmd { | |
188acc61 JS |
141 | struct list_head md_list; |
142 | lnet_libhandle_t md_lh; | |
143 | lnet_me_t *md_me; | |
144 | char *md_start; | |
145 | unsigned int md_offset; | |
146 | unsigned int md_length; | |
147 | unsigned int md_max_size; | |
148 | int md_threshold; | |
149 | int md_refcount; | |
150 | unsigned int md_options; | |
151 | unsigned int md_flags; | |
152 | void *md_user_ptr; | |
153 | lnet_eq_t *md_eq; | |
154 | unsigned int md_niov; /* # frags */ | |
d7e09d03 | 155 | union { |
188acc61 JS |
156 | struct kvec iov[LNET_MAX_IOV]; |
157 | lnet_kiov_t kiov[LNET_MAX_IOV]; | |
d7e09d03 PT |
158 | } md_iov; |
159 | } lnet_libmd_t; | |
160 | ||
188acc61 JS |
161 | #define LNET_MD_FLAG_ZOMBIE (1 << 0) |
162 | #define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) | |
163 | #define LNET_MD_FLAG_ABORTED (1 << 2) | |
d7e09d03 | 164 | |
d7e09d03 PT |
165 | typedef struct { |
166 | /* info about peers we are trying to fail */ | |
188acc61 JS |
167 | struct list_head tp_list; /* ln_test_peers */ |
168 | lnet_nid_t tp_nid; /* matching nid */ | |
169 | unsigned int tp_threshold; /* # failures to simulate */ | |
d7e09d03 PT |
170 | } lnet_test_peer_t; |
171 | ||
188acc61 JS |
172 | #define LNET_COOKIE_TYPE_MD 1 |
173 | #define LNET_COOKIE_TYPE_ME 2 | |
174 | #define LNET_COOKIE_TYPE_EQ 3 | |
175 | #define LNET_COOKIE_TYPE_BITS 2 | |
d7e09d03 PT |
176 | #define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL) |
177 | ||
188acc61 | 178 | struct lnet_ni; /* forward ref */ |
d7e09d03 | 179 | |
3b77f472 | 180 | typedef struct lnet_lnd { |
d7e09d03 | 181 | /* fields managed by portals */ |
188acc61 JS |
182 | struct list_head lnd_list; /* stash in the LND table */ |
183 | int lnd_refcount; /* # active instances */ | |
d7e09d03 PT |
184 | |
185 | /* fields initialised by the LND */ | |
db18b8e9 | 186 | __u32 lnd_type; |
d7e09d03 | 187 | |
b11866b3 AO |
188 | int (*lnd_startup)(struct lnet_ni *ni); |
189 | void (*lnd_shutdown)(struct lnet_ni *ni); | |
d7e09d03 PT |
190 | int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); |
191 | ||
4420cfd3 JS |
192 | /* |
193 | * In data movement APIs below, payload buffers are described as a set | |
d7e09d03 PT |
194 | * of 'niov' fragments which are... |
195 | * EITHER | |
196 | * in virtual memory (struct iovec *iov != NULL) | |
197 | * OR | |
198 | * in pages (kernel only: plt_kiov_t *kiov != NULL). | |
199 | * The LND may NOT overwrite these fragment descriptors. | |
200 | * An 'offset' and may specify a byte offset within the set of | |
201 | * fragments to start from | |
202 | */ | |
203 | ||
4420cfd3 JS |
204 | /* |
205 | * Start sending a preformatted message. 'private' is NULL for PUT and | |
d7e09d03 PT |
206 | * GET messages; otherwise this is a response to an incoming message |
207 | * and 'private' is the 'private' passed to lnet_parse(). Return | |
208 | * non-zero for immediate failure, otherwise complete later with | |
4420cfd3 JS |
209 | * lnet_finalize() |
210 | */ | |
d7e09d03 PT |
211 | int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg); |
212 | ||
4420cfd3 JS |
213 | /* |
214 | * Start receiving 'mlen' bytes of payload data, skipping the following | |
d7e09d03 | 215 | * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to |
d766b4b5 | 216 | * lnet_parse(). Return non-zero for immediate failure, otherwise |
d7e09d03 | 217 | * complete later with lnet_finalize(). This also gives back a receive |
4420cfd3 JS |
218 | * credit if the LND does flow control. |
219 | */ | |
d7e09d03 PT |
220 | int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, |
221 | int delayed, unsigned int niov, | |
f351bad2 | 222 | struct kvec *iov, lnet_kiov_t *kiov, |
188acc61 JS |
223 | unsigned int offset, unsigned int mlen, |
224 | unsigned int rlen); | |
d7e09d03 | 225 | |
4420cfd3 JS |
226 | /* |
227 | * lnet_parse() has had to delay processing of this message | |
d7e09d03 PT |
228 | * (e.g. waiting for a forwarding buffer or send credits). Give the |
229 | * LND a chance to free urgently needed resources. If called, return 0 | |
230 | * for success and do NOT give back a receive credit; that has to wait | |
231 | * until lnd_recv() gets called. On failure return < 0 and | |
4420cfd3 JS |
232 | * release resources; lnd_recv() will not be called. |
233 | */ | |
188acc61 JS |
234 | int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, |
235 | lnet_msg_t *msg, void **new_privatep); | |
d7e09d03 PT |
236 | |
237 | /* notification of peer health */ | |
238 | void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); | |
239 | ||
240 | /* query of peer aliveness */ | |
188acc61 JS |
241 | void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, |
242 | unsigned long *when); | |
d7e09d03 PT |
243 | |
244 | /* accept a new connection */ | |
e327dc88 | 245 | int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock); |
d7e09d03 PT |
246 | } lnd_t; |
247 | ||
d7e09d03 PT |
248 | struct lnet_tx_queue { |
249 | int tq_credits; /* # tx credits free */ | |
250 | int tq_credits_min; /* lowest it's been */ | |
251 | int tq_credits_max; /* total # tx credits */ | |
188acc61 | 252 | struct list_head tq_delayed; /* delayed TXs */ |
d7e09d03 PT |
253 | }; |
254 | ||
d7e09d03 | 255 | typedef struct lnet_ni { |
188acc61 JS |
256 | spinlock_t ni_lock; |
257 | struct list_head ni_list; /* chain on ln_nis */ | |
258 | struct list_head ni_cptlist; /* chain on ln_nis_cpt */ | |
259 | int ni_maxtxcredits; /* # tx credits */ | |
d7e09d03 | 260 | /* # per-peer send credits */ |
188acc61 | 261 | int ni_peertxcredits; |
d7e09d03 | 262 | /* # per-peer router buffer credits */ |
188acc61 | 263 | int ni_peerrtrcredits; |
d7e09d03 | 264 | /* seconds to consider peer dead */ |
188acc61 JS |
265 | int ni_peertimeout; |
266 | int ni_ncpts; /* number of CPTs */ | |
267 | __u32 *ni_cpts; /* bond NI on some CPTs */ | |
268 | lnet_nid_t ni_nid; /* interface's NID */ | |
269 | void *ni_data; /* instance-specific data */ | |
270 | lnd_t *ni_lnd; /* procedural interface */ | |
d7e09d03 PT |
271 | struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */ |
272 | int **ni_refs; /* percpt reference count */ | |
ec0067d1 | 273 | time64_t ni_last_alive;/* when I was last alive */ |
188acc61 | 274 | lnet_ni_status_t *ni_status; /* my health status */ |
d7e09d03 | 275 | /* equivalent interfaces to use */ |
188acc61 | 276 | char *ni_interfaces[LNET_MAX_INTERFACES]; |
d7e09d03 PT |
277 | } lnet_ni_t; |
278 | ||
279 | #define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL | |
280 | ||
4420cfd3 JS |
281 | /* |
282 | * NB: value of these features equal to LNET_PROTO_PING_VERSION_x | |
283 | * of old LNet, so there shouldn't be any compatibility issue | |
284 | */ | |
d7e09d03 PT |
285 | #define LNET_PING_FEAT_INVAL (0) /* no feature */ |
286 | #define LNET_PING_FEAT_BASE (1 << 0) /* just a ping */ | |
287 | #define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */ | |
86ef6250 | 288 | #define LNET_PING_FEAT_RTE_DISABLED (1 << 2) /* Routing enabled */ |
d7e09d03 PT |
289 | |
290 | #define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \ | |
291 | LNET_PING_FEAT_NI_STATUS) | |
292 | ||
d7e09d03 PT |
293 | /* router checker data, per router */ |
294 | #define LNET_MAX_RTR_NIS 16 | |
295 | #define LNET_PINGINFO_SIZE offsetof(lnet_ping_info_t, pi_ni[LNET_MAX_RTR_NIS]) | |
296 | typedef struct { | |
297 | /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ | |
188acc61 JS |
298 | struct list_head rcd_list; |
299 | lnet_handle_md_t rcd_mdh; /* ping buffer MD */ | |
d7e09d03 PT |
300 | struct lnet_peer *rcd_gateway; /* reference to gateway */ |
301 | lnet_ping_info_t *rcd_pinginfo; /* ping buffer */ | |
302 | } lnet_rc_data_t; | |
303 | ||
304 | typedef struct lnet_peer { | |
188acc61 JS |
305 | struct list_head lp_hashlist; /* chain on peer hash */ |
306 | struct list_head lp_txq; /* messages blocking for | |
307 | tx credits */ | |
308 | struct list_head lp_rtrq; /* messages blocking for | |
309 | router credits */ | |
310 | struct list_head lp_rtr_list; /* chain on router list */ | |
311 | int lp_txcredits; /* # tx credits available */ | |
312 | int lp_mintxcredits; /* low water mark */ | |
313 | int lp_rtrcredits; /* # router credits */ | |
314 | int lp_minrtrcredits; /* low water mark */ | |
315 | unsigned int lp_alive:1; /* alive/dead? */ | |
316 | unsigned int lp_notify:1; /* notification outstanding? */ | |
317 | unsigned int lp_notifylnd:1;/* outstanding notification | |
318 | for LND? */ | |
319 | unsigned int lp_notifying:1; /* some thread is handling | |
320 | notification */ | |
321 | unsigned int lp_ping_notsent;/* SEND event outstanding | |
322 | from ping */ | |
323 | int lp_alive_count; /* # times router went | |
324 | dead<->alive */ | |
325 | long lp_txqnob; /* bytes queued for sending */ | |
326 | unsigned long lp_timestamp; /* time of last aliveness | |
327 | news */ | |
328 | unsigned long lp_ping_timestamp;/* time of last ping | |
329 | attempt */ | |
330 | unsigned long lp_ping_deadline; /* != 0 if ping reply | |
331 | expected */ | |
332 | unsigned long lp_last_alive; /* when I was last alive */ | |
333 | unsigned long lp_last_query; /* when lp_ni was queried | |
334 | last time */ | |
335 | lnet_ni_t *lp_ni; /* interface peer is on */ | |
336 | lnet_nid_t lp_nid; /* peer's NID */ | |
337 | int lp_refcount; /* # refs */ | |
338 | int lp_cpt; /* CPT this peer attached on */ | |
d7e09d03 | 339 | /* # refs from lnet_route_t::lr_gateway */ |
188acc61 | 340 | int lp_rtr_refcount; |
d7e09d03 | 341 | /* returned RC ping features */ |
188acc61 JS |
342 | unsigned int lp_ping_feats; |
343 | struct list_head lp_routes; /* routers on this peer */ | |
d7e09d03 PT |
344 | lnet_rc_data_t *lp_rcd; /* router checker state */ |
345 | } lnet_peer_t; | |
346 | ||
d7e09d03 | 347 | /* peer hash size */ |
188acc61 JS |
348 | #define LNET_PEER_HASH_BITS 9 |
349 | #define LNET_PEER_HASH_SIZE (1 << LNET_PEER_HASH_BITS) | |
d7e09d03 PT |
350 | |
351 | /* peer hash table */ | |
352 | struct lnet_peer_table { | |
188acc61 JS |
353 | int pt_version; /* /proc validity stamp */ |
354 | int pt_number; /* # peers extant */ | |
21602c7d AS |
355 | /* # zombies to go to deathrow (and not there yet) */ |
356 | int pt_zombies; | |
188acc61 JS |
357 | struct list_head pt_deathrow; /* zombie peers */ |
358 | struct list_head *pt_hash; /* NID->peer hash */ | |
d7e09d03 PT |
359 | }; |
360 | ||
4420cfd3 JS |
361 | /* |
362 | * peer aliveness is enabled only on routers for peers in a network where the | |
363 | * lnet_ni_t::ni_peertimeout has been set to a positive value | |
364 | */ | |
5fd88337 | 365 | #define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing && \ |
d7e09d03 PT |
366 | (lp)->lp_ni->ni_peertimeout > 0) |
367 | ||
368 | typedef struct { | |
188acc61 JS |
369 | struct list_head lr_list; /* chain on net */ |
370 | struct list_head lr_gwlist; /* chain on gateway */ | |
d7e09d03 | 371 | lnet_peer_t *lr_gateway; /* router node */ |
188acc61 JS |
372 | __u32 lr_net; /* remote network number */ |
373 | int lr_seq; /* sequence for round-robin */ | |
374 | unsigned int lr_downis; /* number of down NIs */ | |
375 | unsigned int lr_hops; /* how far I am */ | |
376 | unsigned int lr_priority; /* route priority */ | |
d7e09d03 PT |
377 | } lnet_route_t; |
378 | ||
379 | #define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7) | |
380 | #define LNET_REMOTE_NETS_HASH_MAX (1U << 16) | |
381 | #define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits) | |
382 | ||
383 | typedef struct { | |
188acc61 JS |
384 | struct list_head lrn_list; /* chain on |
385 | ln_remote_nets_hash */ | |
386 | struct list_head lrn_routes; /* routes to me */ | |
387 | __u32 lrn_net; /* my net number */ | |
d7e09d03 PT |
388 | } lnet_remotenet_t; |
389 | ||
db18b8e9 JS |
390 | /** lnet message has credit and can be submitted to lnd for send/receive */ |
391 | #define LNET_CREDIT_OK 0 | |
392 | /** lnet message is waiting for credit */ | |
393 | #define LNET_CREDIT_WAIT 1 | |
394 | ||
d7e09d03 | 395 | typedef struct { |
188acc61 JS |
396 | struct list_head rbp_bufs; /* my free buffer pool */ |
397 | struct list_head rbp_msgs; /* messages blocking | |
398 | for a buffer */ | |
399 | int rbp_npages; /* # pages in each buffer */ | |
400 | int rbp_nbuffers; /* # buffers */ | |
401 | int rbp_credits; /* # free buffers / | |
402 | blocked messages */ | |
403 | int rbp_mincredits; /* low water mark */ | |
d7e09d03 PT |
404 | } lnet_rtrbufpool_t; |
405 | ||
406 | typedef struct { | |
188acc61 JS |
407 | struct list_head rb_list; /* chain on rbp_bufs */ |
408 | lnet_rtrbufpool_t *rb_pool; /* owning pool */ | |
409 | lnet_kiov_t rb_kiov[0]; /* the buffer space */ | |
d7e09d03 PT |
410 | } lnet_rtrbuf_t; |
411 | ||
188acc61 | 412 | #define LNET_PEER_HASHSIZE 503 /* prime! */ |
d7e09d03 | 413 | |
86ef6250 AS |
414 | #define LNET_TINY_BUF_IDX 0 |
415 | #define LNET_SMALL_BUF_IDX 1 | |
416 | #define LNET_LARGE_BUF_IDX 2 | |
417 | ||
418 | /* # different router buffer pools */ | |
419 | #define LNET_NRBPOOLS (LNET_LARGE_BUF_IDX + 1) | |
d7e09d03 PT |
420 | |
421 | enum { | |
422 | /* Didn't match anything */ | |
423 | LNET_MATCHMD_NONE = (1 << 0), | |
424 | /* Matched OK */ | |
425 | LNET_MATCHMD_OK = (1 << 1), | |
426 | /* Must be discarded */ | |
427 | LNET_MATCHMD_DROP = (1 << 2), | |
428 | /* match and buffer is exhausted */ | |
188acc61 | 429 | LNET_MATCHMD_EXHAUSTED = (1 << 3), |
d7e09d03 | 430 | /* match or drop */ |
188acc61 | 431 | LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP), |
d7e09d03 PT |
432 | }; |
433 | ||
434 | /* Options for lnet_portal_t::ptl_options */ | |
188acc61 JS |
435 | #define LNET_PTL_LAZY (1 << 0) |
436 | #define LNET_PTL_MATCH_UNIQUE (1 << 1) /* unique match, for RDMA */ | |
437 | #define LNET_PTL_MATCH_WILDCARD (1 << 2) /* wildcard match, | |
438 | request portal */ | |
d7e09d03 PT |
439 | |
440 | /* parameter for matching operations (GET, PUT) */ | |
441 | struct lnet_match_info { | |
442 | __u64 mi_mbits; | |
443 | lnet_process_id_t mi_id; | |
444 | unsigned int mi_opc; | |
445 | unsigned int mi_portal; | |
446 | unsigned int mi_rlength; | |
447 | unsigned int mi_roffset; | |
448 | }; | |
449 | ||
450 | /* ME hash of RDMA portal */ | |
451 | #define LNET_MT_HASH_BITS 8 | |
452 | #define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS) | |
453 | #define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1) | |
4420cfd3 JS |
454 | /* |
455 | * we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash, | |
456 | * the last entry is reserved for MEs with ignore-bits | |
457 | */ | |
d7e09d03 | 458 | #define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE |
4420cfd3 JS |
459 | /* |
460 | * __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which | |
d7e09d03 | 461 | * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the |
4420cfd3 JS |
462 | * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] |
463 | */ | |
d7e09d03 PT |
464 | #define LNET_MT_BITS_U64 6 /* 2^6 bits */ |
465 | #define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64) | |
466 | #define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1) | |
467 | ||
468 | /* portal match table */ | |
469 | struct lnet_match_table { | |
470 | /* reserved for upcoming patches, CPU partition ID */ | |
188acc61 JS |
471 | unsigned int mt_cpt; |
472 | unsigned int mt_portal; /* portal index */ | |
4420cfd3 JS |
473 | /* |
474 | * match table is set as "enabled" if there's non-exhausted MD | |
475 | * attached on mt_mhash, it's only valid for wildcard portal | |
476 | */ | |
188acc61 | 477 | unsigned int mt_enabled; |
d7e09d03 | 478 | /* bitmap to flag whether MEs on mt_hash are exhausted or not */ |
188acc61 JS |
479 | __u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP]; |
480 | struct list_head *mt_mhash; /* matching hash */ | |
d7e09d03 PT |
481 | }; |
482 | ||
483 | /* these are only useful for wildcard portal */ | |
484 | /* Turn off message rotor for wildcard portals */ | |
485 | #define LNET_PTL_ROTOR_OFF 0 | |
486 | /* round-robin dispatch all PUT messages for wildcard portals */ | |
487 | #define LNET_PTL_ROTOR_ON 1 | |
488 | /* round-robin dispatch routed PUT message for wildcard portals */ | |
489 | #define LNET_PTL_ROTOR_RR_RT 2 | |
490 | /* dispatch routed PUT message by hashing source NID for wildcard portals */ | |
491 | #define LNET_PTL_ROTOR_HASH_RT 3 | |
492 | ||
493 | typedef struct lnet_portal { | |
188acc61 JS |
494 | spinlock_t ptl_lock; |
495 | unsigned int ptl_index; /* portal ID, reserved */ | |
d7e09d03 | 496 | /* flags on this portal: lazy, unique... */ |
188acc61 | 497 | unsigned int ptl_options; |
2b284326 | 498 | /* list of messages which are stealing buffer */ |
188acc61 | 499 | struct list_head ptl_msg_stealing; |
d7e09d03 | 500 | /* messages blocking for MD */ |
188acc61 | 501 | struct list_head ptl_msg_delayed; |
d7e09d03 PT |
502 | /* Match table for each CPT */ |
503 | struct lnet_match_table **ptl_mtables; | |
504 | /* spread rotor of incoming "PUT" */ | |
188acc61 | 505 | unsigned int ptl_rotor; |
d7e09d03 | 506 | /* # active entries for this portal */ |
188acc61 | 507 | int ptl_mt_nmaps; |
d7e09d03 | 508 | /* array of active entries' cpu-partition-id */ |
188acc61 | 509 | int ptl_mt_maps[0]; |
d7e09d03 PT |
510 | } lnet_portal_t; |
511 | ||
512 | #define LNET_LH_HASH_BITS 12 | |
513 | #define LNET_LH_HASH_SIZE (1ULL << LNET_LH_HASH_BITS) | |
514 | #define LNET_LH_HASH_MASK (LNET_LH_HASH_SIZE - 1) | |
515 | ||
516 | /* resource container (ME, MD, EQ) */ | |
517 | struct lnet_res_container { | |
188acc61 JS |
518 | unsigned int rec_type; /* container type */ |
519 | __u64 rec_lh_cookie; /* cookie generator */ | |
520 | struct list_head rec_active; /* active resource list */ | |
521 | struct list_head *rec_lh_hash; /* handle hash */ | |
d7e09d03 PT |
522 | }; |
523 | ||
524 | /* message container */ | |
525 | struct lnet_msg_container { | |
188acc61 | 526 | int msc_init; /* initialized or not */ |
d7e09d03 | 527 | /* max # threads finalizing */ |
188acc61 | 528 | int msc_nfinalizers; |
d7e09d03 | 529 | /* msgs waiting to complete finalizing */ |
188acc61 JS |
530 | struct list_head msc_finalizing; |
531 | struct list_head msc_active; /* active message list */ | |
d7e09d03 PT |
532 | /* threads doing finalization */ |
533 | void **msc_finalizers; | |
d7e09d03 PT |
534 | }; |
535 | ||
536 | /* Router Checker states */ | |
537 | #define LNET_RC_STATE_SHUTDOWN 0 /* not started */ | |
538 | #define LNET_RC_STATE_RUNNING 1 /* started up OK */ | |
539 | #define LNET_RC_STATE_STOPPING 2 /* telling thread to stop */ | |
540 | ||
3b77f472 | 541 | typedef struct { |
d7e09d03 | 542 | /* CPU partition table of LNet */ |
188acc61 | 543 | struct cfs_cpt_table *ln_cpt_table; |
d7e09d03 | 544 | /* number of CPTs in ln_cpt_table */ |
188acc61 JS |
545 | unsigned int ln_cpt_number; |
546 | unsigned int ln_cpt_bits; | |
d7e09d03 PT |
547 | |
548 | /* protect LNet resources (ME/MD/EQ) */ | |
188acc61 | 549 | struct cfs_percpt_lock *ln_res_lock; |
d7e09d03 | 550 | /* # portals */ |
188acc61 | 551 | int ln_nportals; |
d7e09d03 PT |
552 | /* the vector of portals */ |
553 | lnet_portal_t **ln_portals; | |
554 | /* percpt ME containers */ | |
555 | struct lnet_res_container **ln_me_containers; | |
556 | /* percpt MD container */ | |
557 | struct lnet_res_container **ln_md_containers; | |
558 | ||
559 | /* Event Queue container */ | |
188acc61 JS |
560 | struct lnet_res_container ln_eq_container; |
561 | wait_queue_head_t ln_eq_waitq; | |
562 | spinlock_t ln_eq_wait_lock; | |
563 | unsigned int ln_remote_nets_hbits; | |
d7e09d03 PT |
564 | |
565 | /* protect NI, peer table, credits, routers, rtrbuf... */ | |
188acc61 | 566 | struct cfs_percpt_lock *ln_net_lock; |
d7e09d03 PT |
567 | /* percpt message containers for active/finalizing/freed message */ |
568 | struct lnet_msg_container **ln_msg_containers; | |
569 | lnet_counters_t **ln_counters; | |
570 | struct lnet_peer_table **ln_peer_tables; | |
571 | /* failure simulation */ | |
188acc61 | 572 | struct list_head ln_test_peers; |
d7e09d03 | 573 | |
188acc61 | 574 | struct list_head ln_nis; /* LND instances */ |
d7e09d03 | 575 | /* NIs bond on specific CPT(s) */ |
188acc61 | 576 | struct list_head ln_nis_cpt; |
d7e09d03 | 577 | /* dying LND instances */ |
188acc61 JS |
578 | struct list_head ln_nis_zombie; |
579 | lnet_ni_t *ln_loni; /* the loopback NI */ | |
d7e09d03 | 580 | /* NI to wait for events in */ |
188acc61 | 581 | lnet_ni_t *ln_eq_waitni; |
d7e09d03 PT |
582 | |
583 | /* remote networks with routes to them */ | |
188acc61 | 584 | struct list_head *ln_remote_nets_hash; |
d7e09d03 | 585 | /* validity stamp */ |
188acc61 | 586 | __u64 ln_remote_nets_version; |
d7e09d03 | 587 | /* list of all known routers */ |
188acc61 | 588 | struct list_head ln_routers; |
d7e09d03 | 589 | /* validity stamp */ |
188acc61 | 590 | __u64 ln_routers_version; |
d7e09d03 PT |
591 | /* percpt router buffer pools */ |
592 | lnet_rtrbufpool_t **ln_rtrpools; | |
593 | ||
188acc61 JS |
594 | lnet_handle_md_t ln_ping_target_md; |
595 | lnet_handle_eq_t ln_ping_target_eq; | |
596 | lnet_ping_info_t *ln_ping_info; | |
d7e09d03 PT |
597 | |
598 | /* router checker startup/shutdown state */ | |
188acc61 | 599 | int ln_rc_state; |
d7e09d03 | 600 | /* router checker's event queue */ |
188acc61 | 601 | lnet_handle_eq_t ln_rc_eqh; |
d7e09d03 | 602 | /* rcd still pending on net */ |
188acc61 | 603 | struct list_head ln_rcd_deathrow; |
d7e09d03 | 604 | /* rcd ready for free */ |
188acc61 | 605 | struct list_head ln_rcd_zombie; |
d7e09d03 | 606 | /* serialise startup/shutdown */ |
188acc61 | 607 | struct semaphore ln_rc_signal; |
d7e09d03 | 608 | |
188acc61 JS |
609 | struct mutex ln_api_mutex; |
610 | struct mutex ln_lnd_mutex; | |
611 | int ln_init; /* lnet_init() | |
612 | called? */ | |
d7e09d03 | 613 | /* Have I called LNetNIInit myself? */ |
188acc61 | 614 | int ln_niinit_self; |
d7e09d03 | 615 | /* LNetNIInit/LNetNIFini counter */ |
188acc61 | 616 | int ln_refcount; |
d7e09d03 | 617 | /* shutdown in progress */ |
188acc61 | 618 | int ln_shutdown; |
d7e09d03 | 619 | |
188acc61 JS |
620 | int ln_routing; /* am I a router? */ |
621 | lnet_pid_t ln_pid; /* requested pid */ | |
d7e09d03 | 622 | /* uniquely identifies this ni in this epoch */ |
188acc61 | 623 | __u64 ln_interface_cookie; |
d7e09d03 | 624 | /* registered LNDs */ |
188acc61 | 625 | struct list_head ln_lnds; |
d7e09d03 | 626 | |
d7e09d03 | 627 | /* test protocol compatibility flags */ |
188acc61 | 628 | int ln_testprotocompat; |
d7e09d03 | 629 | |
edeb5d8c AS |
630 | /* |
631 | * 0 - load the NIs from the mod params | |
632 | * 1 - do not load the NIs from the mod params | |
633 | * Reverse logic to ensure that other calls to LNetNIInit | |
634 | * need no change | |
635 | */ | |
636 | bool ln_nis_from_mod_params; | |
637 | ||
d7e09d03 PT |
638 | } lnet_t; |
639 | ||
640 | #endif |