Commit | Line | Data |
---|---|---|
7df20f2d SD |
1 | /* |
2 | * Intel MIC Platform Software Stack (MPSS) | |
3 | * | |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
9 | * Copyright(c) 2014 Intel Corporation. | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of version 2 of the GNU General Public License as | |
13 | * published by the Free Software Foundation. | |
14 | * | |
15 | * This program is distributed in the hope that it will be useful, but | |
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | * General Public License for more details. | |
19 | * | |
20 | * BSD LICENSE | |
21 | * | |
22 | * Copyright(c) 2014 Intel Corporation. | |
23 | * | |
24 | * Redistribution and use in source and binary forms, with or without | |
25 | * modification, are permitted provided that the following conditions | |
26 | * are met: | |
27 | * | |
28 | * * Redistributions of source code must retain the above copyright | |
29 | * notice, this list of conditions and the following disclaimer. | |
30 | * * Redistributions in binary form must reproduce the above copyright | |
31 | * notice, this list of conditions and the following disclaimer in | |
32 | * the documentation and/or other materials provided with the | |
33 | * distribution. | |
34 | * * Neither the name of Intel Corporation nor the names of its | |
35 | * contributors may be used to endorse or promote products derived | |
36 | * from this software without specific prior written permission. | |
37 | * | |
38 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
39 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
40 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
41 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
42 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
43 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
44 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
45 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
46 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
47 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
48 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
49 | * | |
50 | * Intel SCIF driver. | |
51 | * | |
52 | */ | |
53 | #ifndef __SCIF_H__ | |
54 | #define __SCIF_H__ | |
55 | ||
56 | #include <linux/types.h> | |
57 | #include <linux/poll.h> | |
d3d912eb | 58 | #include <linux/device.h> |
7df20f2d SD |
59 | #include <linux/scif_ioctl.h> |
60 | ||
61 | #define SCIF_ACCEPT_SYNC 1 | |
62 | #define SCIF_SEND_BLOCK 1 | |
63 | #define SCIF_RECV_BLOCK 1 | |
64 | ||
65 | enum { | |
66 | SCIF_PROT_READ = (1 << 0), | |
67 | SCIF_PROT_WRITE = (1 << 1) | |
68 | }; | |
69 | ||
70 | enum { | |
71 | SCIF_MAP_FIXED = 0x10, | |
72 | SCIF_MAP_KERNEL = 0x20, | |
73 | }; | |
74 | ||
75 | enum { | |
76 | SCIF_FENCE_INIT_SELF = (1 << 0), | |
77 | SCIF_FENCE_INIT_PEER = (1 << 1), | |
78 | SCIF_SIGNAL_LOCAL = (1 << 4), | |
79 | SCIF_SIGNAL_REMOTE = (1 << 5) | |
80 | }; | |
81 | ||
82 | enum { | |
83 | SCIF_RMA_USECPU = (1 << 0), | |
84 | SCIF_RMA_USECACHE = (1 << 1), | |
85 | SCIF_RMA_SYNC = (1 << 2), | |
86 | SCIF_RMA_ORDERED = (1 << 3) | |
87 | }; | |
88 | ||
89 | /* End of SCIF Admin Reserved Ports */ | |
90 | #define SCIF_ADMIN_PORT_END 1024 | |
91 | ||
92 | /* End of SCIF Reserved Ports */ | |
93 | #define SCIF_PORT_RSVD 1088 | |
94 | ||
95 | typedef struct scif_endpt *scif_epd_t; | |
96 | ||
b7f94441 AD |
97 | /** |
98 | * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll | |
99 | * @epd: SCIF endpoint | |
100 | * @events: requested events | |
101 | * @revents: returned events | |
102 | */ | |
103 | struct scif_pollepd { | |
104 | scif_epd_t epd; | |
105 | short events; | |
106 | short revents; | |
107 | }; | |
108 | ||
d3d912eb AD |
109 | /** |
110 | * scif_peer_dev - representation of a peer SCIF device | |
111 | * | |
112 | * Peer devices show up as PCIe devices for the mgmt node but not the cards. | |
113 | * The mgmt node discovers all the cards on the PCIe bus and informs the other | |
114 | * cards about their peers. Upon notification of a peer a node adds a peer | |
115 | * device to the peer bus to maintain symmetry in the way devices are | |
116 | * discovered across all nodes in the SCIF network. | |
117 | * | |
118 | * @dev: underlying device | |
119 | * @dnode - The destination node which this device will communicate with. | |
120 | */ | |
121 | struct scif_peer_dev { | |
122 | struct device dev; | |
123 | u8 dnode; | |
124 | }; | |
125 | ||
126 | /** | |
127 | * scif_client - representation of a SCIF client | |
128 | * @name: client name | |
129 | * @probe - client method called when a peer device is registered | |
130 | * @remove - client method called when a peer device is unregistered | |
131 | * @si - subsys_interface used internally for implementing SCIF clients | |
132 | */ | |
133 | struct scif_client { | |
134 | const char *name; | |
135 | void (*probe)(struct scif_peer_dev *spdev); | |
136 | void (*remove)(struct scif_peer_dev *spdev); | |
137 | struct subsys_interface si; | |
138 | }; | |
139 | ||
7df20f2d SD |
140 | #define SCIF_OPEN_FAILED ((scif_epd_t)-1) |
141 | #define SCIF_REGISTER_FAILED ((off_t)-1) | |
142 | #define SCIF_MMAP_FAILED ((void *)-1) | |
143 | ||
144 | /** | |
145 | * scif_open() - Create an endpoint | |
146 | * | |
147 | * Return: | |
148 | * Upon successful completion, scif_open() returns an endpoint descriptor to | |
149 | * be used in subsequent SCIF functions calls to refer to that endpoint; | |
150 | * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is | |
151 | * returned and errno is set to indicate the error; in kernel mode a NULL | |
152 | * scif_epd_t is returned. | |
153 | * | |
154 | * Errors: | |
155 | * ENOMEM - Insufficient kernel memory was available | |
156 | */ | |
157 | scif_epd_t scif_open(void); | |
158 | ||
159 | /** | |
160 | * scif_bind() - Bind an endpoint to a port | |
161 | * @epd: endpoint descriptor | |
162 | * @pn: port number | |
163 | * | |
164 | * scif_bind() binds endpoint epd to port pn, where pn is a port number on the | |
165 | * local node. If pn is zero, a port number greater than or equal to | |
166 | * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to | |
167 | * exactly one local port. Ports less than 1024 when requested can only be bound | |
168 | * by system (or root) processes or by processes executed by privileged users. | |
169 | * | |
170 | * Return: | |
171 | * Upon successful completion, scif_bind() returns the port number to which epd | |
172 | * is bound; otherwise in user mode -1 is returned and errno is set to | |
173 | * indicate the error; in kernel mode the negative of one of the following | |
174 | * errors is returned. | |
175 | * | |
176 | * Errors: | |
177 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
178 | * EINVAL - the endpoint or the port is already bound | |
179 | * EISCONN - The endpoint is already connected | |
180 | * ENOSPC - No port number available for assignment | |
181 | * EACCES - The port requested is protected and the user is not the superuser | |
182 | */ | |
183 | int scif_bind(scif_epd_t epd, u16 pn); | |
184 | ||
185 | /** | |
186 | * scif_listen() - Listen for connections on an endpoint | |
187 | * @epd: endpoint descriptor | |
188 | * @backlog: maximum pending connection requests | |
189 | * | |
190 | * scif_listen() marks the endpoint epd as a listening endpoint - that is, as | |
191 | * an endpoint that will be used to accept incoming connection requests. Once | |
192 | * so marked, the endpoint is said to be in the listening state and may not be | |
193 | * used as the endpoint of a connection. | |
194 | * | |
195 | * The endpoint, epd, must have been bound to a port. | |
196 | * | |
197 | * The backlog argument defines the maximum length to which the queue of | |
198 | * pending connections for epd may grow. If a connection request arrives when | |
199 | * the queue is full, the client may receive an error with an indication that | |
200 | * the connection was refused. | |
201 | * | |
202 | * Return: | |
203 | * Upon successful completion, scif_listen() returns 0; otherwise in user mode | |
204 | * -1 is returned and errno is set to indicate the error; in kernel mode the | |
205 | * negative of one of the following errors is returned. | |
206 | * | |
207 | * Errors: | |
208 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
209 | * EINVAL - the endpoint is not bound to a port | |
210 | * EISCONN - The endpoint is already connected or listening | |
211 | */ | |
212 | int scif_listen(scif_epd_t epd, int backlog); | |
213 | ||
214 | /** | |
215 | * scif_connect() - Initiate a connection on a port | |
216 | * @epd: endpoint descriptor | |
217 | * @dst: global id of port to which to connect | |
218 | * | |
219 | * The scif_connect() function requests the connection of endpoint epd to remote | |
220 | * port dst. If the connection is successful, a peer endpoint, bound to dst, is | |
221 | * created on node dst.node. On successful return, the connection is complete. | |
222 | * | |
223 | * If the endpoint epd has not already been bound to a port, scif_connect() | |
224 | * will bind it to an unused local port. | |
225 | * | |
226 | * A connection is terminated when an endpoint of the connection is closed, | |
227 | * either explicitly by scif_close(), or when a process that owns one of the | |
228 | * endpoints of the connection is terminated. | |
229 | * | |
230 | * In user space, scif_connect() supports an asynchronous connection mode | |
231 | * if the application has set the O_NONBLOCK flag on the endpoint via the | |
232 | * fcntl() system call. Setting this flag will result in the calling process | |
233 | * not to wait during scif_connect(). | |
234 | * | |
235 | * Return: | |
236 | * Upon successful completion, scif_connect() returns the port ID to which the | |
237 | * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is | |
238 | * set to indicate the error; in kernel mode the negative of one of the | |
239 | * following errors is returned. | |
240 | * | |
241 | * Errors: | |
242 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
243 | * ECONNREFUSED - The destination was not listening for connections or refused | |
244 | * the connection request | |
245 | * EINVAL - dst.port is not a valid port ID | |
246 | * EISCONN - The endpoint is already connected | |
247 | * ENOMEM - No buffer space is available | |
248 | * ENODEV - The destination node does not exist, or the node is lost or existed, | |
249 | * but is not currently in the network since it may have crashed | |
250 | * ENOSPC - No port number available for assignment | |
251 | * EOPNOTSUPP - The endpoint is listening and cannot be connected | |
252 | */ | |
253 | int scif_connect(scif_epd_t epd, struct scif_port_id *dst); | |
254 | ||
255 | /** | |
256 | * scif_accept() - Accept a connection on an endpoint | |
257 | * @epd: endpoint descriptor | |
258 | * @peer: global id of port to which connected | |
259 | * @newepd: new connected endpoint descriptor | |
260 | * @flags: flags | |
261 | * | |
262 | * The scif_accept() call extracts the first connection request from the queue | |
263 | * of pending connections for the port on which epd is listening. scif_accept() | |
264 | * creates a new endpoint, bound to the same port as epd, and allocates a new | |
265 | * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new | |
266 | * endpoint is connected to the endpoint through which the connection was | |
267 | * requested. epd is unaffected by this call, and remains in the listening | |
268 | * state. | |
269 | * | |
270 | * On successful return, peer holds the global port identifier (node id and | |
271 | * local port number) of the port which requested the connection. | |
272 | * | |
273 | * A connection is terminated when an endpoint of the connection is closed, | |
274 | * either explicitly by scif_close(), or when a process that owns one of the | |
275 | * endpoints of the connection is terminated. | |
276 | * | |
277 | * The number of connections that can (subsequently) be accepted on epd is only | |
278 | * limited by system resources (memory). | |
279 | * | |
280 | * The flags argument is formed by OR'ing together zero or more of the | |
281 | * following values. | |
282 | * SCIF_ACCEPT_SYNC - block until a connection request is presented. If | |
283 | * SCIF_ACCEPT_SYNC is not in flags, and no pending | |
284 | * connections are present on the queue, scif_accept() | |
285 | * fails with an EAGAIN error | |
286 | * | |
287 | * In user mode, the select() and poll() functions can be used to determine | |
288 | * when there is a connection request. In kernel mode, the scif_poll() | |
289 | * function may be used for this purpose. A readable event will be delivered | |
290 | * when a connection is requested. | |
291 | * | |
292 | * Return: | |
293 | * Upon successful completion, scif_accept() returns 0; otherwise in user mode | |
294 | * -1 is returned and errno is set to indicate the error; in kernel mode the | |
295 | * negative of one of the following errors is returned. | |
296 | * | |
297 | * Errors: | |
298 | * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be | |
299 | * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete | |
300 | * its connection request | |
301 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
302 | * EINTR - Interrupted function | |
303 | * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is | |
304 | * NULL, or newepd is NULL | |
305 | * ENODEV - The requesting node is lost or existed, but is not currently in the | |
306 | * network since it may have crashed | |
307 | * ENOMEM - Not enough space | |
308 | * ENOENT - Secondary part of epd registration failed | |
309 | */ | |
310 | int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t | |
311 | *newepd, int flags); | |
312 | ||
313 | /** | |
314 | * scif_close() - Close an endpoint | |
315 | * @epd: endpoint descriptor | |
316 | * | |
317 | * scif_close() closes an endpoint and performs necessary teardown of | |
318 | * facilities associated with that endpoint. | |
319 | * | |
320 | * If epd is a listening endpoint then it will no longer accept connection | |
321 | * requests on the port to which it is bound. Any pending connection requests | |
322 | * are rejected. | |
323 | * | |
324 | * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs | |
325 | * which are in-process through epd or its peer endpoint will complete before | |
326 | * scif_close() returns. Registered windows of the local and peer endpoints are | |
327 | * released as if scif_unregister() was called against each window. | |
328 | * | |
329 | * Closing a SCIF endpoint does not affect local registered memory mapped by | |
330 | * a SCIF endpoint on a remote node. The local memory remains mapped by the peer | |
331 | * SCIF endpoint explicitly removed by calling munmap(..) by the peer. | |
332 | * | |
333 | * If the peer endpoint's receive queue is not empty at the time that epd is | |
334 | * closed, then the peer endpoint can be passed as the endpoint parameter to | |
335 | * scif_recv() until the receive queue is empty. | |
336 | * | |
337 | * epd is freed and may no longer be accessed. | |
338 | * | |
339 | * Return: | |
340 | * Upon successful completion, scif_close() returns 0; otherwise in user mode | |
341 | * -1 is returned and errno is set to indicate the error; in kernel mode the | |
342 | * negative of one of the following errors is returned. | |
343 | * | |
344 | * Errors: | |
345 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
346 | */ | |
347 | int scif_close(scif_epd_t epd); | |
348 | ||
349 | /** | |
350 | * scif_send() - Send a message | |
351 | * @epd: endpoint descriptor | |
352 | * @msg: message buffer address | |
353 | * @len: message length | |
354 | * @flags: blocking mode flags | |
355 | * | |
356 | * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data | |
357 | * are copied from memory starting at address msg. On successful execution the | |
358 | * return value of scif_send() is the number of bytes that were sent, and is | |
359 | * zero if no bytes were sent because len was zero. scif_send() may be called | |
360 | * only when the endpoint is in a connected state. | |
361 | * | |
362 | * If a scif_send() call is non-blocking, then it sends only those bytes which | |
363 | * can be sent without waiting, up to a maximum of len bytes. | |
364 | * | |
365 | * If a scif_send() call is blocking, then it normally returns after sending | |
366 | * all len bytes. If a blocking call is interrupted or the connection is | |
367 | * reset, the call is considered successful if some bytes were sent or len is | |
368 | * zero, otherwise the call is considered unsuccessful. | |
369 | * | |
370 | * In user mode, the select() and poll() functions can be used to determine | |
371 | * when the send queue is not full. In kernel mode, the scif_poll() function | |
372 | * may be used for this purpose. | |
373 | * | |
374 | * It is recommended that scif_send()/scif_recv() only be used for short | |
375 | * control-type message communication between SCIF endpoints. The SCIF RMA | |
376 | * APIs are expected to provide better performance for transfer sizes of | |
377 | * 1024 bytes or longer for the current MIC hardware and software | |
378 | * implementation. | |
379 | * | |
380 | * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK | |
381 | * is passed as the flags argument. | |
382 | * | |
383 | * Return: | |
384 | * Upon successful completion, scif_send() returns the number of bytes sent; | |
385 | * otherwise in user mode -1 is returned and errno is set to indicate the | |
386 | * error; in kernel mode the negative of one of the following errors is | |
387 | * returned. | |
388 | * | |
389 | * Errors: | |
390 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
391 | * ECONNRESET - Connection reset by peer | |
392 | * EFAULT - An invalid address was specified for a parameter | |
393 | * EINVAL - flags is invalid, or len is negative | |
394 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
395 | * network since it may have crashed | |
396 | * ENOMEM - Not enough space | |
397 | * ENOTCONN - The endpoint is not connected | |
398 | */ | |
399 | int scif_send(scif_epd_t epd, void *msg, int len, int flags); | |
400 | ||
401 | /** | |
402 | * scif_recv() - Receive a message | |
403 | * @epd: endpoint descriptor | |
404 | * @msg: message buffer address | |
405 | * @len: message buffer length | |
406 | * @flags: blocking mode flags | |
407 | * | |
408 | * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of | |
409 | * data are copied to memory starting at address msg. On successful execution | |
410 | * the return value of scif_recv() is the number of bytes that were received, | |
411 | * and is zero if no bytes were received because len was zero. scif_recv() may | |
412 | * be called only when the endpoint is in a connected state. | |
413 | * | |
414 | * If a scif_recv() call is non-blocking, then it receives only those bytes | |
415 | * which can be received without waiting, up to a maximum of len bytes. | |
416 | * | |
417 | * If a scif_recv() call is blocking, then it normally returns after receiving | |
418 | * all len bytes. If the blocking call was interrupted due to a disconnection, | |
419 | * subsequent calls to scif_recv() will copy all bytes received upto the point | |
420 | * of disconnection. | |
421 | * | |
422 | * In user mode, the select() and poll() functions can be used to determine | |
423 | * when data is available to be received. In kernel mode, the scif_poll() | |
424 | * function may be used for this purpose. | |
425 | * | |
426 | * It is recommended that scif_send()/scif_recv() only be used for short | |
427 | * control-type message communication between SCIF endpoints. The SCIF RMA | |
428 | * APIs are expected to provide better performance for transfer sizes of | |
429 | * 1024 bytes or longer for the current MIC hardware and software | |
430 | * implementation. | |
431 | * | |
432 | * scif_recv() will block until the entire message is received if | |
433 | * SCIF_RECV_BLOCK is passed as the flags argument. | |
434 | * | |
435 | * Return: | |
436 | * Upon successful completion, scif_recv() returns the number of bytes | |
437 | * received; otherwise in user mode -1 is returned and errno is set to | |
438 | * indicate the error; in kernel mode the negative of one of the following | |
439 | * errors is returned. | |
440 | * | |
441 | * Errors: | |
442 | * EAGAIN - The destination node is returning from a low power state | |
443 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
444 | * ECONNRESET - Connection reset by peer | |
445 | * EFAULT - An invalid address was specified for a parameter | |
446 | * EINVAL - flags is invalid, or len is negative | |
447 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
448 | * network since it may have crashed | |
449 | * ENOMEM - Not enough space | |
450 | * ENOTCONN - The endpoint is not connected | |
451 | */ | |
452 | int scif_recv(scif_epd_t epd, void *msg, int len, int flags); | |
453 | ||
454 | /** | |
455 | * scif_register() - Mark a memory region for remote access. | |
456 | * @epd: endpoint descriptor | |
457 | * @addr: starting virtual address | |
458 | * @len: length of range | |
459 | * @offset: offset of window | |
460 | * @prot_flags: read/write protection flags | |
461 | * @map_flags: mapping flags | |
462 | * | |
463 | * The scif_register() function opens a window, a range of whole pages of the | |
464 | * registered address space of the endpoint epd, starting at offset po and | |
465 | * continuing for len bytes. The value of po, further described below, is a | |
466 | * function of the parameters offset and len, and the value of map_flags. Each | |
467 | * page of the window represents the physical memory page which backs the | |
468 | * corresponding page of the range of virtual address pages starting at addr | |
469 | * and continuing for len bytes. addr and len are constrained to be multiples | |
470 | * of the page size. A successful scif_register() call returns po. | |
471 | * | |
472 | * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset | |
473 | * exactly, and offset is constrained to be a multiple of the page size. The | |
474 | * mapping established by scif_register() will not replace any existing | |
475 | * registration; an error is returned if any page within the range [offset, | |
476 | * offset + len - 1] intersects an existing window. | |
477 | * | |
478 | * When SCIF_MAP_FIXED is not set, the implementation uses offset in an | |
479 | * implementation-defined manner to arrive at po. The po value so chosen will | |
480 | * be an area of the registered address space that the implementation deems | |
481 | * suitable for a mapping of len bytes. An offset value of 0 is interpreted as | |
482 | * granting the implementation complete freedom in selecting po, subject to | |
483 | * constraints described below. A non-zero value of offset is taken to be a | |
484 | * suggestion of an offset near which the mapping should be placed. When the | |
485 | * implementation selects a value for po, it does not replace any extant | |
486 | * window. In all cases, po will be a multiple of the page size. | |
487 | * | |
488 | * The physical pages which are so represented by a window are available for | |
489 | * access in calls to mmap(), scif_readfrom(), scif_writeto(), | |
490 | * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the | |
491 | * physical pages represented by the window will not be reused by the memory | |
492 | * subsystem for any other purpose. Note that the same physical page may be | |
493 | * represented by multiple windows. | |
494 | * | |
495 | * Subsequent operations which change the memory pages to which virtual | |
496 | * addresses are mapped (such as mmap(), munmap()) have no effect on | |
497 | * existing window. | |
498 | * | |
499 | * If the process will fork(), it is recommended that the registered | |
500 | * virtual address range be marked with MADV_DONTFORK. Doing so will prevent | |
501 | * problems due to copy-on-write semantics. | |
502 | * | |
503 | * The prot_flags argument is formed by OR'ing together one or more of the | |
504 | * following values. | |
505 | * SCIF_PROT_READ - allow read operations from the window | |
506 | * SCIF_PROT_WRITE - allow write operations to the window | |
507 | * | |
508 | * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a | |
509 | * fixed offset. | |
510 | * | |
511 | * Return: | |
512 | * Upon successful completion, scif_register() returns the offset at which the | |
513 | * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that | |
514 | * is (off_t *)-1) is returned and errno is set to indicate the error; in | |
515 | * kernel mode the negative of one of the following errors is returned. | |
516 | * | |
517 | * Errors: | |
518 | * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range | |
519 | * [offset, offset + len -1] are already registered | |
520 | * EAGAIN - The mapping could not be performed due to lack of resources | |
521 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
522 | * ECONNRESET - Connection reset by peer | |
523 | * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid | |
524 | * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is | |
525 | * set in flags, and offset is not a multiple of the page size, or addr is not a | |
526 | * multiple of the page size, or len is not a multiple of the page size, or is | |
527 | * 0, or offset is negative | |
528 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
529 | * network since it may have crashed | |
530 | * ENOMEM - Not enough space | |
531 | * ENOTCONN -The endpoint is not connected | |
532 | */ | |
533 | off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, | |
534 | int prot_flags, int map_flags); | |
535 | ||
536 | /** | |
537 | * scif_unregister() - Mark a memory region for remote access. | |
538 | * @epd: endpoint descriptor | |
539 | * @offset: start of range to unregister | |
540 | * @len: length of range to unregister | |
541 | * | |
542 | * The scif_unregister() function closes those previously registered windows | |
543 | * which are entirely within the range [offset, offset + len - 1]. It is an | |
544 | * error to specify a range which intersects only a subrange of a window. | |
545 | * | |
546 | * On a successful return, pages within the window may no longer be specified | |
547 | * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), | |
548 | * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, | |
549 | * however, continues to exist until all previous references against it are | |
550 | * removed. A window is referenced if there is a mapping to it created by | |
551 | * mmap(), or if scif_get_pages() was called against the window | |
552 | * (and the pages have not been returned via scif_put_pages()). A window is | |
553 | * also referenced while an RMA, in which some range of the window is a source | |
554 | * or destination, is in progress. Finally a window is referenced while some | |
555 | * offset in that window was specified to scif_fence_signal(), and the RMAs | |
556 | * marked by that call to scif_fence_signal() have not completed. While a | |
557 | * window is in this state, its registered address space pages are not | |
558 | * available for use in a new registered window. | |
559 | * | |
560 | * When all such references to the window have been removed, its references to | |
561 | * all the physical pages which it represents are removed. Similarly, the | |
562 | * registered address space pages of the window become available for | |
563 | * registration in a new window. | |
564 | * | |
565 | * Return: | |
566 | * Upon successful completion, scif_unregister() returns 0; otherwise in user | |
567 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
568 | * the negative of one of the following errors is returned. In the event of an | |
569 | * error, no windows are unregistered. | |
570 | * | |
571 | * Errors: | |
572 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
573 | * ECONNRESET - Connection reset by peer | |
574 | * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a | |
575 | * window, or offset is negative | |
576 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
577 | * network since it may have crashed | |
578 | * ENOTCONN - The endpoint is not connected | |
579 | * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the | |
580 | * registered address space of epd | |
581 | */ | |
582 | int scif_unregister(scif_epd_t epd, off_t offset, size_t len); | |
583 | ||
584 | /** | |
585 | * scif_readfrom() - Copy from a remote address space | |
586 | * @epd: endpoint descriptor | |
587 | * @loffset: offset in local registered address space to | |
588 | * which to copy | |
589 | * @len: length of range to copy | |
590 | * @roffset: offset in remote registered address space | |
591 | * from which to copy | |
592 | * @rma_flags: transfer mode flags | |
593 | * | |
594 | * scif_readfrom() copies len bytes from the remote registered address space of | |
595 | * the peer of endpoint epd, starting at the offset roffset to the local | |
596 | * registered address space of epd, starting at the offset loffset. | |
597 | * | |
598 | * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, | |
599 | * roffset + len - 1] must be within some registered window or windows of the | |
600 | * local and remote nodes. A range may intersect multiple registered windows, | |
601 | * but only if those windows are contiguous in the registered address space. | |
602 | * | |
603 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | |
604 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | |
605 | * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the | |
606 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | |
607 | * ously. The order in which any two asynchronous RMA operations complete | |
608 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | |
609 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | |
610 | * the completion of asynchronous RMA operations on the same endpoint. | |
611 | * | |
612 | * The DMA transfer of individual bytes is not guaranteed to complete in | |
613 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | |
614 | * cacheline or partial cacheline of the source range will become visible on | |
615 | * the destination node after all other transferred data in the source | |
616 | * range has become visible on the destination node. | |
617 | * | |
618 | * The optimal DMA performance will likely be realized if both | |
619 | * loffset and roffset are cacheline aligned (are a multiple of 64). Lower | |
620 | * performance will likely be realized if loffset and roffset are not | |
621 | * cacheline aligned but are separated by some multiple of 64. The lowest level | |
622 | * of performance is likely if loffset and roffset are not separated by a | |
623 | * multiple of 64. | |
624 | * | |
625 | * The rma_flags argument is formed by ORing together zero or more of the | |
626 | * following values. | |
627 | * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA | |
628 | * engine. | |
629 | * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the | |
630 | * transfer has completed. Passing this flag results in the | |
631 | * current implementation busy waiting and consuming CPU cycles | |
632 | * while the DMA transfer is in progress for best performance by | |
633 | * avoiding the interrupt latency. | |
634 | * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of | |
635 | * the source range becomes visible on the destination node | |
636 | * after all other transferred data in the source range has | |
637 | * become visible on the destination | |
638 | * | |
639 | * Return: | |
640 | * Upon successful completion, scif_readfrom() returns 0; otherwise in user | |
641 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
642 | * the negative of one of the following errors is returned. | |
643 | * | |
644 | * Errors: | |
645 | * EACCESS - Attempt to write to a read-only range | |
646 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
647 | * ECONNRESET - Connection reset by peer | |
648 | * EINVAL - rma_flags is invalid | |
649 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
650 | * network since it may have crashed | |
651 | * ENOTCONN - The endpoint is not connected | |
652 | * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered | |
653 | * address space of epd, or, The range [roffset, roffset + len - 1] is invalid | |
654 | * for the registered address space of the peer of epd, or loffset or roffset | |
655 | * is negative | |
656 | */ | |
657 | int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t | |
658 | roffset, int rma_flags); | |
659 | ||
660 | /** | |
661 | * scif_writeto() - Copy to a remote address space | |
662 | * @epd: endpoint descriptor | |
663 | * @loffset: offset in local registered address space | |
664 | * from which to copy | |
665 | * @len: length of range to copy | |
666 | * @roffset: offset in remote registered address space to | |
667 | * which to copy | |
668 | * @rma_flags: transfer mode flags | |
669 | * | |
670 | * scif_writeto() copies len bytes from the local registered address space of | |
671 | * epd, starting at the offset loffset to the remote registered address space | |
672 | * of the peer of endpoint epd, starting at the offset roffset. | |
673 | * | |
674 | * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, | |
675 | * roffset + len - 1] must be within some registered window or windows of the | |
676 | * local and remote nodes. A range may intersect multiple registered windows, | |
677 | * but only if those windows are contiguous in the registered address space. | |
678 | * | |
679 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | |
680 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | |
681 | * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the | |
682 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | |
683 | * ously. The order in which any two asynchronous RMA operations complete | |
684 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | |
685 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | |
686 | * the completion of asynchronous RMA operations on the same endpoint. | |
687 | * | |
688 | * The DMA transfer of individual bytes is not guaranteed to complete in | |
689 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | |
690 | * cacheline or partial cacheline of the source range will become visible on | |
691 | * the destination node after all other transferred data in the source | |
692 | * range has become visible on the destination node. | |
693 | * | |
694 | * The optimal DMA performance will likely be realized if both | |
695 | * loffset and roffset are cacheline aligned (are a multiple of 64). Lower | |
696 | * performance will likely be realized if loffset and roffset are not cacheline | |
697 | * aligned but are separated by some multiple of 64. The lowest level of | |
698 | * performance is likely if loffset and roffset are not separated by a multiple | |
699 | * of 64. | |
700 | * | |
701 | * The rma_flags argument is formed by ORing together zero or more of the | |
702 | * following values. | |
703 | * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA | |
704 | * engine. | |
705 | * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the | |
706 | * transfer has completed. Passing this flag results in the | |
707 | * current implementation busy waiting and consuming CPU cycles | |
708 | * while the DMA transfer is in progress for best performance by | |
709 | * avoiding the interrupt latency. | |
710 | * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of | |
711 | * the source range becomes visible on the destination node | |
712 | * after all other transferred data in the source range has | |
713 | * become visible on the destination | |
714 | * | |
715 | * Return: | |
716 | * Upon successful completion, scif_readfrom() returns 0; otherwise in user | |
717 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
718 | * the negative of one of the following errors is returned. | |
719 | * | |
720 | * Errors: | |
721 | * EACCESS - Attempt to write to a read-only range | |
722 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
723 | * ECONNRESET - Connection reset by peer | |
724 | * EINVAL - rma_flags is invalid | |
725 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
726 | * network since it may have crashed | |
727 | * ENOTCONN - The endpoint is not connected | |
728 | * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered | |
729 | * address space of epd, or, The range [roffset , roffset + len -1] is invalid | |
730 | * for the registered address space of the peer of epd, or loffset or roffset | |
731 | * is negative | |
732 | */ | |
733 | int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t | |
734 | roffset, int rma_flags); | |
735 | ||
736 | /** | |
737 | * scif_vreadfrom() - Copy from a remote address space | |
738 | * @epd: endpoint descriptor | |
739 | * @addr: address to which to copy | |
740 | * @len: length of range to copy | |
741 | * @roffset: offset in remote registered address space | |
742 | * from which to copy | |
743 | * @rma_flags: transfer mode flags | |
744 | * | |
745 | * scif_vreadfrom() copies len bytes from the remote registered address | |
746 | * space of the peer of endpoint epd, starting at the offset roffset, to local | |
747 | * memory, starting at addr. | |
748 | * | |
749 | * The specified range [roffset, roffset + len - 1] must be within some | |
750 | * registered window or windows of the remote nodes. The range may | |
751 | * intersect multiple registered windows, but only if those windows are | |
752 | * contiguous in the registered address space. | |
753 | * | |
754 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | |
755 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | |
756 | * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the | |
757 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | |
758 | * ously. The order in which any two asynchronous RMA operations complete | |
759 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | |
760 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | |
761 | * the completion of asynchronous RMA operations on the same endpoint. | |
762 | * | |
763 | * The DMA transfer of individual bytes is not guaranteed to complete in | |
764 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | |
765 | * cacheline or partial cacheline of the source range will become visible on | |
766 | * the destination node after all other transferred data in the source | |
767 | * range has become visible on the destination node. | |
768 | * | |
769 | * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back | |
770 | * the specified local memory range may be remain in a pinned state even after | |
771 | * the specified transfer completes. This may reduce overhead if some or all of | |
772 | * the same virtual address range is referenced in a subsequent call of | |
773 | * scif_vreadfrom() or scif_vwriteto(). | |
774 | * | |
775 | * The optimal DMA performance will likely be realized if both | |
776 | * addr and roffset are cacheline aligned (are a multiple of 64). Lower | |
777 | * performance will likely be realized if addr and roffset are not | |
778 | * cacheline aligned but are separated by some multiple of 64. The lowest level | |
779 | * of performance is likely if addr and roffset are not separated by a | |
780 | * multiple of 64. | |
781 | * | |
782 | * The rma_flags argument is formed by ORing together zero or more of the | |
783 | * following values. | |
784 | * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA | |
785 | * engine. | |
786 | * SCIF_RMA_USECACHE - enable registration caching | |
787 | * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the | |
788 | * transfer has completed. Passing this flag results in the | |
789 | * current implementation busy waiting and consuming CPU cycles | |
790 | * while the DMA transfer is in progress for best performance by | |
791 | * avoiding the interrupt latency. | |
792 | * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of | |
793 | * the source range becomes visible on the destination node | |
794 | * after all other transferred data in the source range has | |
795 | * become visible on the destination | |
796 | * | |
797 | * Return: | |
798 | * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user | |
799 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
800 | * the negative of one of the following errors is returned. | |
801 | * | |
802 | * Errors: | |
803 | * EACCESS - Attempt to write to a read-only range | |
804 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
805 | * ECONNRESET - Connection reset by peer | |
806 | * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid | |
807 | * EINVAL - rma_flags is invalid | |
808 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
809 | * network since it may have crashed | |
810 | * ENOTCONN - The endpoint is not connected | |
811 | * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the | |
812 | * registered address space of epd | |
813 | */ | |
814 | int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, | |
815 | int rma_flags); | |
816 | ||
817 | /** | |
818 | * scif_vwriteto() - Copy to a remote address space | |
819 | * @epd: endpoint descriptor | |
820 | * @addr: address from which to copy | |
821 | * @len: length of range to copy | |
822 | * @roffset: offset in remote registered address space to | |
823 | * which to copy | |
824 | * @rma_flags: transfer mode flags | |
825 | * | |
826 | * scif_vwriteto() copies len bytes from the local memory, starting at addr, to | |
827 | * the remote registered address space of the peer of endpoint epd, starting at | |
828 | * the offset roffset. | |
829 | * | |
830 | * The specified range [roffset, roffset + len - 1] must be within some | |
831 | * registered window or windows of the remote nodes. The range may intersect | |
832 | * multiple registered windows, but only if those windows are contiguous in the | |
833 | * registered address space. | |
834 | * | |
835 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | |
836 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | |
837 | * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the | |
838 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | |
839 | * ously. The order in which any two asynchronous RMA operations complete | |
840 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | |
841 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | |
842 | * the completion of asynchronous RMA operations on the same endpoint. | |
843 | * | |
844 | * The DMA transfer of individual bytes is not guaranteed to complete in | |
845 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | |
846 | * cacheline or partial cacheline of the source range will become visible on | |
847 | * the destination node after all other transferred data in the source | |
848 | * range has become visible on the destination node. | |
849 | * | |
850 | * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back | |
851 | * the specified local memory range may be remain in a pinned state even after | |
852 | * the specified transfer completes. This may reduce overhead if some or all of | |
853 | * the same virtual address range is referenced in a subsequent call of | |
854 | * scif_vreadfrom() or scif_vwriteto(). | |
855 | * | |
856 | * The optimal DMA performance will likely be realized if both | |
857 | * addr and offset are cacheline aligned (are a multiple of 64). Lower | |
858 | * performance will likely be realized if addr and offset are not cacheline | |
859 | * aligned but are separated by some multiple of 64. The lowest level of | |
860 | * performance is likely if addr and offset are not separated by a multiple of | |
861 | * 64. | |
862 | * | |
863 | * The rma_flags argument is formed by ORing together zero or more of the | |
864 | * following values. | |
865 | * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA | |
866 | * engine. | |
867 | * SCIF_RMA_USECACHE - allow registration caching | |
868 | * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the | |
869 | * transfer has completed. Passing this flag results in the | |
870 | * current implementation busy waiting and consuming CPU cycles | |
871 | * while the DMA transfer is in progress for best performance by | |
872 | * avoiding the interrupt latency. | |
873 | * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of | |
874 | * the source range becomes visible on the destination node | |
875 | * after all other transferred data in the source range has | |
876 | * become visible on the destination | |
877 | * | |
878 | * Return: | |
879 | * Upon successful completion, scif_vwriteto() returns 0; otherwise in user | |
880 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
881 | * the negative of one of the following errors is returned. | |
882 | * | |
883 | * Errors: | |
884 | * EACCESS - Attempt to write to a read-only range | |
885 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
886 | * ECONNRESET - Connection reset by peer | |
887 | * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid | |
888 | * EINVAL - rma_flags is invalid | |
889 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
890 | * network since it may have crashed | |
891 | * ENOTCONN - The endpoint is not connected | |
892 | * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the | |
893 | * registered address space of epd | |
894 | */ | |
895 | int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset, | |
896 | int rma_flags); | |
897 | ||
898 | /** | |
899 | * scif_fence_mark() - Mark previously issued RMAs | |
900 | * @epd: endpoint descriptor | |
901 | * @flags: control flags | |
902 | * @mark: marked value returned as output. | |
903 | * | |
904 | * scif_fence_mark() returns after marking the current set of all uncompleted | |
905 | * RMAs initiated through the endpoint epd or the current set of all | |
906 | * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are | |
907 | * marked with a value returned at mark. The application may subsequently call | |
908 | * scif_fence_wait(), passing the value returned at mark, to await completion | |
909 | * of all RMAs so marked. | |
910 | * | |
911 | * The flags argument has exactly one of the following values. | |
912 | * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint | |
913 | * epd are marked | |
914 | * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer | |
915 | * of endpoint epd are marked | |
916 | * | |
917 | * Return: | |
918 | * Upon successful completion, scif_fence_mark() returns 0; otherwise in user | |
919 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
920 | * the negative of one of the following errors is returned. | |
921 | * | |
922 | * Errors: | |
923 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
924 | * ECONNRESET - Connection reset by peer | |
925 | * EINVAL - flags is invalid | |
926 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
927 | * network since it may have crashed | |
928 | * ENOTCONN - The endpoint is not connected | |
929 | * ENOMEM - Insufficient kernel memory was available | |
930 | */ | |
931 | int scif_fence_mark(scif_epd_t epd, int flags, int *mark); | |
932 | ||
933 | /** | |
934 | * scif_fence_wait() - Wait for completion of marked RMAs | |
935 | * @epd: endpoint descriptor | |
936 | * @mark: mark request | |
937 | * | |
938 | * scif_fence_wait() returns after all RMAs marked with mark have completed. | |
939 | * The value passed in mark must have been obtained in a previous call to | |
940 | * scif_fence_mark(). | |
941 | * | |
942 | * Return: | |
943 | * Upon successful completion, scif_fence_wait() returns 0; otherwise in user | |
944 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
945 | * the negative of one of the following errors is returned. | |
946 | * | |
947 | * Errors: | |
948 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
949 | * ECONNRESET - Connection reset by peer | |
950 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
951 | * network since it may have crashed | |
952 | * ENOTCONN - The endpoint is not connected | |
953 | * ENOMEM - Insufficient kernel memory was available | |
954 | */ | |
955 | int scif_fence_wait(scif_epd_t epd, int mark); | |
956 | ||
957 | /** | |
958 | * scif_fence_signal() - Request a memory update on completion of RMAs | |
959 | * @epd: endpoint descriptor | |
960 | * @loff: local offset | |
961 | * @lval: local value to write to loffset | |
962 | * @roff: remote offset | |
963 | * @rval: remote value to write to roffset | |
964 | * @flags: flags | |
965 | * | |
966 | * scif_fence_signal() returns after marking the current set of all uncompleted | |
967 | * RMAs initiated through the endpoint epd or marking the current set of all | |
968 | * uncompleted RMAs initiated through the peer of endpoint epd. | |
969 | * | |
970 | * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the | |
971 | * marked set, lval is written to memory at the address corresponding to offset | |
972 | * loff in the local registered address space of epd. loff must be within a | |
973 | * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion | |
974 | * of the RMAs in the marked set, rval is written to memory at the address | |
975 | * corresponding to offset roff in the remote registered address space of epd. | |
976 | * roff must be within a remote registered window of the peer of epd. Note | |
977 | * that any specified offset must be DWORD (4 byte / 32 bit) aligned. | |
978 | * | |
979 | * The flags argument is formed by OR'ing together the following. | |
980 | * Exactly one of the following values. | |
981 | * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint | |
982 | * epd are marked | |
983 | * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer | |
984 | * of endpoint epd are marked | |
985 | * One or more of the following values. | |
986 | * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to | |
987 | * memory at the address corresponding to offset loff in the local | |
988 | * registered address space of epd. | |
989 | * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to | |
990 | * memory at the address corresponding to offset roff in the remote | |
991 | * registered address space of epd. | |
992 | * | |
993 | * Return: | |
994 | * Upon successful completion, scif_fence_signal() returns 0; otherwise in | |
995 | * user mode -1 is returned and errno is set to indicate the error; in kernel | |
996 | * mode the negative of one of the following errors is returned. | |
997 | * | |
998 | * Errors: | |
999 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
1000 | * ECONNRESET - Connection reset by peer | |
1001 | * EINVAL - flags is invalid, or loff or roff are not DWORD aligned | |
1002 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
1003 | * network since it may have crashed | |
1004 | * ENOTCONN - The endpoint is not connected | |
1005 | * ENXIO - loff is invalid for the registered address of epd, or roff is invalid | |
1006 | * for the registered address space, of the peer of epd | |
1007 | */ | |
1008 | int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff, | |
1009 | u64 rval, int flags); | |
1010 | ||
1011 | /** | |
1012 | * scif_get_node_ids() - Return information about online nodes | |
1013 | * @nodes: array in which to return online node IDs | |
1014 | * @len: number of entries in the nodes array | |
1015 | * @self: address to place the node ID of the local node | |
1016 | * | |
1017 | * scif_get_node_ids() fills in the nodes array with up to len node IDs of the | |
1018 | * nodes in the SCIF network. If there is not enough space in nodes, as | |
1019 | * indicated by the len parameter, only len node IDs are returned in nodes. The | |
1020 | * return value of scif_get_node_ids() is the total number of nodes currently in | |
1021 | * the SCIF network. By checking the return value against the len parameter, | |
1022 | * the user may determine if enough space for nodes was allocated. | |
1023 | * | |
1024 | * The node ID of the local node is returned at self. | |
1025 | * | |
1026 | * Return: | |
1027 | * Upon successful completion, scif_get_node_ids() returns the actual number of | |
1028 | * online nodes in the SCIF network including 'self'; otherwise in user mode | |
1029 | * -1 is returned and errno is set to indicate the error; in kernel mode no | |
1030 | * errors are returned. | |
1031 | * | |
1032 | * Errors: | |
1033 | * EFAULT - Bad address | |
1034 | */ | |
1035 | int scif_get_node_ids(u16 *nodes, int len, u16 *self); | |
1036 | ||
b7f94441 AD |
1037 | /** |
1038 | * scif_poll() - Wait for some event on an endpoint | |
1039 | * @epds: Array of endpoint descriptors | |
1040 | * @nepds: Length of epds | |
1041 | * @timeout: Upper limit on time for which scif_poll() will block | |
1042 | * | |
1043 | * scif_poll() waits for one of a set of endpoints to become ready to perform | |
1044 | * an I/O operation. | |
1045 | * | |
1046 | * The epds argument specifies the endpoint descriptors to be examined and the | |
1047 | * events of interest for each endpoint descriptor. epds is a pointer to an | |
1048 | * array with one member for each open endpoint descriptor of interest. | |
1049 | * | |
1050 | * The number of items in the epds array is specified in nepds. The epd field | |
1051 | * of scif_pollepd is an endpoint descriptor of an open endpoint. The field | |
1052 | * events is a bitmask specifying the events which the application is | |
1053 | * interested in. The field revents is an output parameter, filled by the | |
1054 | * kernel with the events that actually occurred. The bits returned in revents | |
1055 | * can include any of those specified in events, or one of the values POLLERR, | |
1056 | * POLLHUP, or POLLNVAL. (These three bits are meaningless in the events | |
1057 | * field, and will be set in the revents field whenever the corresponding | |
1058 | * condition is true.) | |
1059 | * | |
1060 | * If none of the events requested (and no error) has occurred for any of the | |
1061 | * endpoint descriptors, then scif_poll() blocks until one of the events occurs. | |
1062 | * | |
1063 | * The timeout argument specifies an upper limit on the time for which | |
1064 | * scif_poll() will block, in milliseconds. Specifying a negative value in | |
1065 | * timeout means an infinite timeout. | |
1066 | * | |
1067 | * The following bits may be set in events and returned in revents. | |
1068 | * POLLIN - Data may be received without blocking. For a connected | |
1069 | * endpoint, this means that scif_recv() may be called without blocking. For a | |
1070 | * listening endpoint, this means that scif_accept() may be called without | |
1071 | * blocking. | |
1072 | * POLLOUT - Data may be sent without blocking. For a connected endpoint, this | |
1073 | * means that scif_send() may be called without blocking. POLLOUT may also be | |
1074 | * used to block waiting for a non-blocking connect to complete. This bit value | |
1075 | * has no meaning for a listening endpoint and is ignored if specified. | |
1076 | * | |
1077 | * The following bits are only returned in revents, and are ignored if set in | |
1078 | * events. | |
1079 | * POLLERR - An error occurred on the endpoint | |
1080 | * POLLHUP - The connection to the peer endpoint was disconnected | |
1081 | * POLLNVAL - The specified endpoint descriptor is invalid. | |
1082 | * | |
1083 | * Return: | |
1084 | * Upon successful completion, scif_poll() returns a non-negative value. A | |
1085 | * positive value indicates the total number of endpoint descriptors that have | |
1086 | * been selected (that is, endpoint descriptors for which the revents member is | |
1087 | * non-zero). A value of 0 indicates that the call timed out and no endpoint | |
1088 | * descriptors have been selected. Otherwise in user mode -1 is returned and | |
1089 | * errno is set to indicate the error; in kernel mode the negative of one of | |
1090 | * the following errors is returned. | |
1091 | * | |
1092 | * Errors: | |
1093 | * EINTR - A signal occurred before any requested event | |
1094 | * EINVAL - The nepds argument is greater than {OPEN_MAX} | |
1095 | * ENOMEM - There was no space to allocate file descriptor tables | |
1096 | */ | |
1097 | int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout); | |
1098 | ||
d3d912eb AD |
1099 | /** |
1100 | * scif_client_register() - Register a SCIF client | |
1101 | * @client: client to be registered | |
1102 | * | |
1103 | * scif_client_register() registers a SCIF client. The probe() method | |
1104 | * of the client is called when SCIF peer devices come online and the | |
1105 | * remove() method is called when the peer devices disappear. | |
1106 | * | |
1107 | * Return: | |
1108 | * Upon successful completion, scif_client_register() returns a non-negative | |
1109 | * value. Otherwise the return value is the same as subsys_interface_register() | |
1110 | * in the kernel. | |
1111 | */ | |
1112 | int scif_client_register(struct scif_client *client); | |
1113 | ||
1114 | /** | |
1115 | * scif_client_unregister() - Unregister a SCIF client | |
1116 | * @client: client to be unregistered | |
1117 | * | |
1118 | * scif_client_unregister() unregisters a SCIF client. | |
1119 | * | |
1120 | * Return: | |
1121 | * None | |
1122 | */ | |
1123 | void scif_client_unregister(struct scif_client *client); | |
1124 | ||
7df20f2d | 1125 | #endif /* __SCIF_H__ */ |