diff options
| author | Sudeep Dutt <sudeep.dutt@intel.com> | 2015-04-29 08:32:28 -0400 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2015-05-24 15:13:36 -0400 |
| commit | 7df20f2d893db42eaa1ea1e30a2573c971ec9238 (patch) | |
| tree | 372f796f0c48006754facac07edda9ea390b88c7 /include | |
| parent | 0d09f1a54d9710548c9af72dc1564c8291a5307c (diff) | |
misc: mic: SCIF header file and IOCTL interface
This patch introduces the SCIF documentation in the header file
and describes the IOCTL interface for user mode. mic_overview.txt
is updated with documentation on SCIF and a new document
describing SCIF in more details is available in scif_overview.txt.
Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/scif.h | 993 | ||||
| -rw-r--r-- | include/uapi/linux/Kbuild | 1 | ||||
| -rw-r--r-- | include/uapi/linux/scif_ioctl.h | 130 |
3 files changed, 1124 insertions, 0 deletions
diff --git a/include/linux/scif.h b/include/linux/scif.h new file mode 100644 index 000000000000..44f4f3898bbe --- /dev/null +++ b/include/linux/scif.h | |||
| @@ -0,0 +1,993 @@ | |||
| 1 | /* | ||
| 2 | * Intel MIC Platform Software Stack (MPSS) | ||
| 3 | * | ||
| 4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
| 5 | * redistributing this file, you may do so under either license. | ||
| 6 | * | ||
| 7 | * GPL LICENSE SUMMARY | ||
| 8 | * | ||
| 9 | * Copyright(c) 2014 Intel Corporation. | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or modify | ||
| 12 | * it under the terms of version 2 of the GNU General Public License as | ||
| 13 | * published by the Free Software Foundation. | ||
| 14 | * | ||
| 15 | * This program is distributed in the hope that it will be useful, but | ||
| 16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 18 | * General Public License for more details. | ||
| 19 | * | ||
| 20 | * BSD LICENSE | ||
| 21 | * | ||
| 22 | * Copyright(c) 2014 Intel Corporation. | ||
| 23 | * | ||
| 24 | * Redistribution and use in source and binary forms, with or without | ||
| 25 | * modification, are permitted provided that the following conditions | ||
| 26 | * are met: | ||
| 27 | * | ||
| 28 | * * Redistributions of source code must retain the above copyright | ||
| 29 | * notice, this list of conditions and the following disclaimer. | ||
| 30 | * * Redistributions in binary form must reproduce the above copyright | ||
| 31 | * notice, this list of conditions and the following disclaimer in | ||
| 32 | * the documentation and/or other materials provided with the | ||
| 33 | * distribution. | ||
| 34 | * * Neither the name of Intel Corporation nor the names of its | ||
| 35 | * contributors may be used to endorse or promote products derived | ||
| 36 | * from this software without specific prior written permission. | ||
| 37 | * | ||
| 38 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 39 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 40 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 41 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 42 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 43 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 44 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 45 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 46 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 47 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 48 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 49 | * | ||
| 50 | * Intel SCIF driver. | ||
| 51 | * | ||
| 52 | */ | ||
| 53 | #ifndef __SCIF_H__ | ||
| 54 | #define __SCIF_H__ | ||
| 55 | |||
| 56 | #include <linux/types.h> | ||
| 57 | #include <linux/poll.h> | ||
| 58 | #include <linux/scif_ioctl.h> | ||
| 59 | |||
| 60 | #define SCIF_ACCEPT_SYNC 1 | ||
| 61 | #define SCIF_SEND_BLOCK 1 | ||
| 62 | #define SCIF_RECV_BLOCK 1 | ||
| 63 | |||
| 64 | enum { | ||
| 65 | SCIF_PROT_READ = (1 << 0), | ||
| 66 | SCIF_PROT_WRITE = (1 << 1) | ||
| 67 | }; | ||
| 68 | |||
| 69 | enum { | ||
| 70 | SCIF_MAP_FIXED = 0x10, | ||
| 71 | SCIF_MAP_KERNEL = 0x20, | ||
| 72 | }; | ||
| 73 | |||
| 74 | enum { | ||
| 75 | SCIF_FENCE_INIT_SELF = (1 << 0), | ||
| 76 | SCIF_FENCE_INIT_PEER = (1 << 1), | ||
| 77 | SCIF_SIGNAL_LOCAL = (1 << 4), | ||
| 78 | SCIF_SIGNAL_REMOTE = (1 << 5) | ||
| 79 | }; | ||
| 80 | |||
| 81 | enum { | ||
| 82 | SCIF_RMA_USECPU = (1 << 0), | ||
| 83 | SCIF_RMA_USECACHE = (1 << 1), | ||
| 84 | SCIF_RMA_SYNC = (1 << 2), | ||
| 85 | SCIF_RMA_ORDERED = (1 << 3) | ||
| 86 | }; | ||
| 87 | |||
| 88 | /* End of SCIF Admin Reserved Ports */ | ||
| 89 | #define SCIF_ADMIN_PORT_END 1024 | ||
| 90 | |||
| 91 | /* End of SCIF Reserved Ports */ | ||
| 92 | #define SCIF_PORT_RSVD 1088 | ||
| 93 | |||
| 94 | typedef struct scif_endpt *scif_epd_t; | ||
| 95 | |||
| 96 | #define SCIF_OPEN_FAILED ((scif_epd_t)-1) | ||
| 97 | #define SCIF_REGISTER_FAILED ((off_t)-1) | ||
| 98 | #define SCIF_MMAP_FAILED ((void *)-1) | ||
| 99 | |||
| 100 | /** | ||
| 101 | * scif_open() - Create an endpoint | ||
| 102 | * | ||
| 103 | * Return: | ||
| 104 | * Upon successful completion, scif_open() returns an endpoint descriptor to | ||
| 105 | * be used in subsequent SCIF functions calls to refer to that endpoint; | ||
| 106 | * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is | ||
| 107 | * returned and errno is set to indicate the error; in kernel mode a NULL | ||
| 108 | * scif_epd_t is returned. | ||
| 109 | * | ||
| 110 | * Errors: | ||
| 111 | * ENOMEM - Insufficient kernel memory was available | ||
| 112 | */ | ||
| 113 | scif_epd_t scif_open(void); | ||
| 114 | |||
| 115 | /** | ||
| 116 | * scif_bind() - Bind an endpoint to a port | ||
| 117 | * @epd: endpoint descriptor | ||
| 118 | * @pn: port number | ||
| 119 | * | ||
| 120 | * scif_bind() binds endpoint epd to port pn, where pn is a port number on the | ||
| 121 | * local node. If pn is zero, a port number greater than or equal to | ||
| 122 | * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to | ||
| 123 | * exactly one local port. Ports less than 1024 when requested can only be bound | ||
| 124 | * by system (or root) processes or by processes executed by privileged users. | ||
| 125 | * | ||
| 126 | * Return: | ||
| 127 | * Upon successful completion, scif_bind() returns the port number to which epd | ||
| 128 | * is bound; otherwise in user mode -1 is returned and errno is set to | ||
| 129 | * indicate the error; in kernel mode the negative of one of the following | ||
| 130 | * errors is returned. | ||
| 131 | * | ||
| 132 | * Errors: | ||
| 133 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 134 | * EINVAL - the endpoint or the port is already bound | ||
| 135 | * EISCONN - The endpoint is already connected | ||
| 136 | * ENOSPC - No port number available for assignment | ||
| 137 | * EACCES - The port requested is protected and the user is not the superuser | ||
| 138 | */ | ||
| 139 | int scif_bind(scif_epd_t epd, u16 pn); | ||
| 140 | |||
| 141 | /** | ||
| 142 | * scif_listen() - Listen for connections on an endpoint | ||
| 143 | * @epd: endpoint descriptor | ||
| 144 | * @backlog: maximum pending connection requests | ||
| 145 | * | ||
| 146 | * scif_listen() marks the endpoint epd as a listening endpoint - that is, as | ||
| 147 | * an endpoint that will be used to accept incoming connection requests. Once | ||
| 148 | * so marked, the endpoint is said to be in the listening state and may not be | ||
| 149 | * used as the endpoint of a connection. | ||
| 150 | * | ||
| 151 | * The endpoint, epd, must have been bound to a port. | ||
| 152 | * | ||
| 153 | * The backlog argument defines the maximum length to which the queue of | ||
| 154 | * pending connections for epd may grow. If a connection request arrives when | ||
| 155 | * the queue is full, the client may receive an error with an indication that | ||
| 156 | * the connection was refused. | ||
| 157 | * | ||
| 158 | * Return: | ||
| 159 | * Upon successful completion, scif_listen() returns 0; otherwise in user mode | ||
| 160 | * -1 is returned and errno is set to indicate the error; in kernel mode the | ||
| 161 | * negative of one of the following errors is returned. | ||
| 162 | * | ||
| 163 | * Errors: | ||
| 164 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 165 | * EINVAL - the endpoint is not bound to a port | ||
| 166 | * EISCONN - The endpoint is already connected or listening | ||
| 167 | */ | ||
| 168 | int scif_listen(scif_epd_t epd, int backlog); | ||
| 169 | |||
| 170 | /** | ||
| 171 | * scif_connect() - Initiate a connection on a port | ||
| 172 | * @epd: endpoint descriptor | ||
| 173 | * @dst: global id of port to which to connect | ||
| 174 | * | ||
| 175 | * The scif_connect() function requests the connection of endpoint epd to remote | ||
| 176 | * port dst. If the connection is successful, a peer endpoint, bound to dst, is | ||
| 177 | * created on node dst.node. On successful return, the connection is complete. | ||
| 178 | * | ||
| 179 | * If the endpoint epd has not already been bound to a port, scif_connect() | ||
| 180 | * will bind it to an unused local port. | ||
| 181 | * | ||
| 182 | * A connection is terminated when an endpoint of the connection is closed, | ||
| 183 | * either explicitly by scif_close(), or when a process that owns one of the | ||
| 184 | * endpoints of the connection is terminated. | ||
| 185 | * | ||
| 186 | * In user space, scif_connect() supports an asynchronous connection mode | ||
| 187 | * if the application has set the O_NONBLOCK flag on the endpoint via the | ||
| 188 | * fcntl() system call. Setting this flag will result in the calling process | ||
| 189 | * not to wait during scif_connect(). | ||
| 190 | * | ||
| 191 | * Return: | ||
| 192 | * Upon successful completion, scif_connect() returns the port ID to which the | ||
| 193 | * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is | ||
| 194 | * set to indicate the error; in kernel mode the negative of one of the | ||
| 195 | * following errors is returned. | ||
| 196 | * | ||
| 197 | * Errors: | ||
| 198 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 199 | * ECONNREFUSED - The destination was not listening for connections or refused | ||
| 200 | * the connection request | ||
| 201 | * EINVAL - dst.port is not a valid port ID | ||
| 202 | * EISCONN - The endpoint is already connected | ||
| 203 | * ENOMEM - No buffer space is available | ||
| 204 | * ENODEV - The destination node does not exist, or the node is lost or existed, | ||
| 205 | * but is not currently in the network since it may have crashed | ||
| 206 | * ENOSPC - No port number available for assignment | ||
| 207 | * EOPNOTSUPP - The endpoint is listening and cannot be connected | ||
| 208 | */ | ||
| 209 | int scif_connect(scif_epd_t epd, struct scif_port_id *dst); | ||
| 210 | |||
| 211 | /** | ||
| 212 | * scif_accept() - Accept a connection on an endpoint | ||
| 213 | * @epd: endpoint descriptor | ||
| 214 | * @peer: global id of port to which connected | ||
| 215 | * @newepd: new connected endpoint descriptor | ||
| 216 | * @flags: flags | ||
| 217 | * | ||
| 218 | * The scif_accept() call extracts the first connection request from the queue | ||
| 219 | * of pending connections for the port on which epd is listening. scif_accept() | ||
| 220 | * creates a new endpoint, bound to the same port as epd, and allocates a new | ||
| 221 | * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new | ||
| 222 | * endpoint is connected to the endpoint through which the connection was | ||
| 223 | * requested. epd is unaffected by this call, and remains in the listening | ||
| 224 | * state. | ||
| 225 | * | ||
| 226 | * On successful return, peer holds the global port identifier (node id and | ||
| 227 | * local port number) of the port which requested the connection. | ||
| 228 | * | ||
| 229 | * A connection is terminated when an endpoint of the connection is closed, | ||
| 230 | * either explicitly by scif_close(), or when a process that owns one of the | ||
| 231 | * endpoints of the connection is terminated. | ||
| 232 | * | ||
| 233 | * The number of connections that can (subsequently) be accepted on epd is only | ||
| 234 | * limited by system resources (memory). | ||
| 235 | * | ||
| 236 | * The flags argument is formed by OR'ing together zero or more of the | ||
| 237 | * following values. | ||
| 238 | * SCIF_ACCEPT_SYNC - block until a connection request is presented. If | ||
| 239 | * SCIF_ACCEPT_SYNC is not in flags, and no pending | ||
| 240 | * connections are present on the queue, scif_accept() | ||
| 241 | * fails with an EAGAIN error | ||
| 242 | * | ||
| 243 | * In user mode, the select() and poll() functions can be used to determine | ||
| 244 | * when there is a connection request. In kernel mode, the scif_poll() | ||
| 245 | * function may be used for this purpose. A readable event will be delivered | ||
| 246 | * when a connection is requested. | ||
| 247 | * | ||
| 248 | * Return: | ||
| 249 | * Upon successful completion, scif_accept() returns 0; otherwise in user mode | ||
| 250 | * -1 is returned and errno is set to indicate the error; in kernel mode the | ||
| 251 | * negative of one of the following errors is returned. | ||
| 252 | * | ||
| 253 | * Errors: | ||
| 254 | * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be | ||
| 255 | * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete | ||
| 256 | * its connection request | ||
| 257 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 258 | * EINTR - Interrupted function | ||
| 259 | * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is | ||
| 260 | * NULL, or newepd is NULL | ||
| 261 | * ENODEV - The requesting node is lost or existed, but is not currently in the | ||
| 262 | * network since it may have crashed | ||
| 263 | * ENOMEM - Not enough space | ||
| 264 | * ENOENT - Secondary part of epd registration failed | ||
| 265 | */ | ||
| 266 | int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t | ||
| 267 | *newepd, int flags); | ||
| 268 | |||
| 269 | /** | ||
| 270 | * scif_close() - Close an endpoint | ||
| 271 | * @epd: endpoint descriptor | ||
| 272 | * | ||
| 273 | * scif_close() closes an endpoint and performs necessary teardown of | ||
| 274 | * facilities associated with that endpoint. | ||
| 275 | * | ||
| 276 | * If epd is a listening endpoint then it will no longer accept connection | ||
| 277 | * requests on the port to which it is bound. Any pending connection requests | ||
| 278 | * are rejected. | ||
| 279 | * | ||
| 280 | * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs | ||
| 281 | * which are in-process through epd or its peer endpoint will complete before | ||
| 282 | * scif_close() returns. Registered windows of the local and peer endpoints are | ||
| 283 | * released as if scif_unregister() was called against each window. | ||
| 284 | * | ||
| 285 | * Closing a SCIF endpoint does not affect local registered memory mapped by | ||
| 286 | * a SCIF endpoint on a remote node. The local memory remains mapped by the peer | ||
| 287 | * SCIF endpoint explicitly removed by calling munmap(..) by the peer. | ||
| 288 | * | ||
| 289 | * If the peer endpoint's receive queue is not empty at the time that epd is | ||
| 290 | * closed, then the peer endpoint can be passed as the endpoint parameter to | ||
| 291 | * scif_recv() until the receive queue is empty. | ||
| 292 | * | ||
| 293 | * epd is freed and may no longer be accessed. | ||
| 294 | * | ||
| 295 | * Return: | ||
| 296 | * Upon successful completion, scif_close() returns 0; otherwise in user mode | ||
| 297 | * -1 is returned and errno is set to indicate the error; in kernel mode the | ||
| 298 | * negative of one of the following errors is returned. | ||
| 299 | * | ||
| 300 | * Errors: | ||
| 301 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 302 | */ | ||
| 303 | int scif_close(scif_epd_t epd); | ||
| 304 | |||
| 305 | /** | ||
| 306 | * scif_send() - Send a message | ||
| 307 | * @epd: endpoint descriptor | ||
| 308 | * @msg: message buffer address | ||
| 309 | * @len: message length | ||
| 310 | * @flags: blocking mode flags | ||
| 311 | * | ||
| 312 | * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data | ||
| 313 | * are copied from memory starting at address msg. On successful execution the | ||
| 314 | * return value of scif_send() is the number of bytes that were sent, and is | ||
| 315 | * zero if no bytes were sent because len was zero. scif_send() may be called | ||
| 316 | * only when the endpoint is in a connected state. | ||
| 317 | * | ||
| 318 | * If a scif_send() call is non-blocking, then it sends only those bytes which | ||
| 319 | * can be sent without waiting, up to a maximum of len bytes. | ||
| 320 | * | ||
| 321 | * If a scif_send() call is blocking, then it normally returns after sending | ||
| 322 | * all len bytes. If a blocking call is interrupted or the connection is | ||
| 323 | * reset, the call is considered successful if some bytes were sent or len is | ||
| 324 | * zero, otherwise the call is considered unsuccessful. | ||
| 325 | * | ||
| 326 | * In user mode, the select() and poll() functions can be used to determine | ||
| 327 | * when the send queue is not full. In kernel mode, the scif_poll() function | ||
| 328 | * may be used for this purpose. | ||
| 329 | * | ||
| 330 | * It is recommended that scif_send()/scif_recv() only be used for short | ||
| 331 | * control-type message communication between SCIF endpoints. The SCIF RMA | ||
| 332 | * APIs are expected to provide better performance for transfer sizes of | ||
| 333 | * 1024 bytes or longer for the current MIC hardware and software | ||
| 334 | * implementation. | ||
| 335 | * | ||
| 336 | * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK | ||
| 337 | * is passed as the flags argument. | ||
| 338 | * | ||
| 339 | * Return: | ||
| 340 | * Upon successful completion, scif_send() returns the number of bytes sent; | ||
| 341 | * otherwise in user mode -1 is returned and errno is set to indicate the | ||
| 342 | * error; in kernel mode the negative of one of the following errors is | ||
| 343 | * returned. | ||
| 344 | * | ||
| 345 | * Errors: | ||
| 346 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 347 | * ECONNRESET - Connection reset by peer | ||
| 348 | * EFAULT - An invalid address was specified for a parameter | ||
| 349 | * EINVAL - flags is invalid, or len is negative | ||
| 350 | * ENODEV - The remote node is lost or existed, but is not currently in the | ||
| 351 | * network since it may have crashed | ||
| 352 | * ENOMEM - Not enough space | ||
| 353 | * ENOTCONN - The endpoint is not connected | ||
| 354 | */ | ||
| 355 | int scif_send(scif_epd_t epd, void *msg, int len, int flags); | ||
| 356 | |||
| 357 | /** | ||
| 358 | * scif_recv() - Receive a message | ||
| 359 | * @epd: endpoint descriptor | ||
| 360 | * @msg: message buffer address | ||
| 361 | * @len: message buffer length | ||
| 362 | * @flags: blocking mode flags | ||
| 363 | * | ||
| 364 | * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of | ||
| 365 | * data are copied to memory starting at address msg. On successful execution | ||
| 366 | * the return value of scif_recv() is the number of bytes that were received, | ||
| 367 | * and is zero if no bytes were received because len was zero. scif_recv() may | ||
| 368 | * be called only when the endpoint is in a connected state. | ||
| 369 | * | ||
| 370 | * If a scif_recv() call is non-blocking, then it receives only those bytes | ||
| 371 | * which can be received without waiting, up to a maximum of len bytes. | ||
| 372 | * | ||
| 373 | * If a scif_recv() call is blocking, then it normally returns after receiving | ||
| 374 | * all len bytes. If the blocking call was interrupted due to a disconnection, | ||
| 375 | * subsequent calls to scif_recv() will copy all bytes received upto the point | ||
| 376 | * of disconnection. | ||
| 377 | * | ||
| 378 | * In user mode, the select() and poll() functions can be used to determine | ||
| 379 | * when data is available to be received. In kernel mode, the scif_poll() | ||
| 380 | * function may be used for this purpose. | ||
| 381 | * | ||
| 382 | * It is recommended that scif_send()/scif_recv() only be used for short | ||
| 383 | * control-type message communication between SCIF endpoints. The SCIF RMA | ||
| 384 | * APIs are expected to provide better performance for transfer sizes of | ||
| 385 | * 1024 bytes or longer for the current MIC hardware and software | ||
| 386 | * implementation. | ||
| 387 | * | ||
| 388 | * scif_recv() will block until the entire message is received if | ||
| 389 | * SCIF_RECV_BLOCK is passed as the flags argument. | ||
| 390 | * | ||
| 391 | * Return: | ||
| 392 | * Upon successful completion, scif_recv() returns the number of bytes | ||
| 393 | * received; otherwise in user mode -1 is returned and errno is set to | ||
| 394 | * indicate the error; in kernel mode the negative of one of the following | ||
| 395 | * errors is returned. | ||
| 396 | * | ||
| 397 | * Errors: | ||
| 398 | * EAGAIN - The destination node is returning from a low power state | ||
| 399 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 400 | * ECONNRESET - Connection reset by peer | ||
| 401 | * EFAULT - An invalid address was specified for a parameter | ||
| 402 | * EINVAL - flags is invalid, or len is negative | ||
| 403 | * ENODEV - The remote node is lost or existed, but is not currently in the | ||
| 404 | * network since it may have crashed | ||
| 405 | * ENOMEM - Not enough space | ||
| 406 | * ENOTCONN - The endpoint is not connected | ||
| 407 | */ | ||
| 408 | int scif_recv(scif_epd_t epd, void *msg, int len, int flags); | ||
| 409 | |||
| 410 | /** | ||
| 411 | * scif_register() - Mark a memory region for remote access. | ||
| 412 | * @epd: endpoint descriptor | ||
| 413 | * @addr: starting virtual address | ||
| 414 | * @len: length of range | ||
| 415 | * @offset: offset of window | ||
| 416 | * @prot_flags: read/write protection flags | ||
| 417 | * @map_flags: mapping flags | ||
| 418 | * | ||
| 419 | * The scif_register() function opens a window, a range of whole pages of the | ||
| 420 | * registered address space of the endpoint epd, starting at offset po and | ||
| 421 | * continuing for len bytes. The value of po, further described below, is a | ||
| 422 | * function of the parameters offset and len, and the value of map_flags. Each | ||
| 423 | * page of the window represents the physical memory page which backs the | ||
| 424 | * corresponding page of the range of virtual address pages starting at addr | ||
| 425 | * and continuing for len bytes. addr and len are constrained to be multiples | ||
| 426 | * of the page size. A successful scif_register() call returns po. | ||
| 427 | * | ||
| 428 | * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset | ||
| 429 | * exactly, and offset is constrained to be a multiple of the page size. The | ||
| 430 | * mapping established by scif_register() will not replace any existing | ||
| 431 | * registration; an error is returned if any page within the range [offset, | ||
| 432 | * offset + len - 1] intersects an existing window. | ||
| 433 | * | ||
| 434 | * When SCIF_MAP_FIXED is not set, the implementation uses offset in an | ||
| 435 | * implementation-defined manner to arrive at po. The po value so chosen will | ||
| 436 | * be an area of the registered address space that the implementation deems | ||
| 437 | * suitable for a mapping of len bytes. An offset value of 0 is interpreted as | ||
| 438 | * granting the implementation complete freedom in selecting po, subject to | ||
| 439 | * constraints described below. A non-zero value of offset is taken to be a | ||
| 440 | * suggestion of an offset near which the mapping should be placed. When the | ||
| 441 | * implementation selects a value for po, it does not replace any extant | ||
| 442 | * window. In all cases, po will be a multiple of the page size. | ||
| 443 | * | ||
| 444 | * The physical pages which are so represented by a window are available for | ||
| 445 | * access in calls to mmap(), scif_readfrom(), scif_writeto(), | ||
| 446 | * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the | ||
| 447 | * physical pages represented by the window will not be reused by the memory | ||
| 448 | * subsystem for any other purpose. Note that the same physical page may be | ||
| 449 | * represented by multiple windows. | ||
| 450 | * | ||
| 451 | * Subsequent operations which change the memory pages to which virtual | ||
| 452 | * addresses are mapped (such as mmap(), munmap()) have no effect on | ||
| 453 | * existing window. | ||
| 454 | * | ||
| 455 | * If the process will fork(), it is recommended that the registered | ||
| 456 | * virtual address range be marked with MADV_DONTFORK. Doing so will prevent | ||
| 457 | * problems due to copy-on-write semantics. | ||
| 458 | * | ||
| 459 | * The prot_flags argument is formed by OR'ing together one or more of the | ||
| 460 | * following values. | ||
| 461 | * SCIF_PROT_READ - allow read operations from the window | ||
| 462 | * SCIF_PROT_WRITE - allow write operations to the window | ||
| 463 | * | ||
| 464 | * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a | ||
| 465 | * fixed offset. | ||
| 466 | * | ||
| 467 | * Return: | ||
| 468 | * Upon successful completion, scif_register() returns the offset at which the | ||
| 469 | * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that | ||
| 470 | * is (off_t *)-1) is returned and errno is set to indicate the error; in | ||
| 471 | * kernel mode the negative of one of the following errors is returned. | ||
| 472 | * | ||
| 473 | * Errors: | ||
| 474 | * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range | ||
| 475 | * [offset, offset + len -1] are already registered | ||
| 476 | * EAGAIN - The mapping could not be performed due to lack of resources | ||
| 477 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 478 | * ECONNRESET - Connection reset by peer | ||
| 479 | * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid | ||
| 480 | * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is | ||
| 481 | * set in flags, and offset is not a multiple of the page size, or addr is not a | ||
| 482 | * multiple of the page size, or len is not a multiple of the page size, or is | ||
| 483 | * 0, or offset is negative | ||
| 484 | * ENODEV - The remote node is lost or existed, but is not currently in the | ||
| 485 | * network since it may have crashed | ||
| 486 | * ENOMEM - Not enough space | ||
| 487 | * ENOTCONN -The endpoint is not connected | ||
| 488 | */ | ||
| 489 | off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, | ||
| 490 | int prot_flags, int map_flags); | ||
| 491 | |||
| 492 | /** | ||
| 493 | * scif_unregister() - Mark a memory region for remote access. | ||
| 494 | * @epd: endpoint descriptor | ||
| 495 | * @offset: start of range to unregister | ||
| 496 | * @len: length of range to unregister | ||
| 497 | * | ||
| 498 | * The scif_unregister() function closes those previously registered windows | ||
| 499 | * which are entirely within the range [offset, offset + len - 1]. It is an | ||
| 500 | * error to specify a range which intersects only a subrange of a window. | ||
| 501 | * | ||
| 502 | * On a successful return, pages within the window may no longer be specified | ||
| 503 | * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), | ||
| 504 | * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, | ||
| 505 | * however, continues to exist until all previous references against it are | ||
| 506 | * removed. A window is referenced if there is a mapping to it created by | ||
| 507 | * mmap(), or if scif_get_pages() was called against the window | ||
| 508 | * (and the pages have not been returned via scif_put_pages()). A window is | ||
| 509 | * also referenced while an RMA, in which some range of the window is a source | ||
| 510 | * or destination, is in progress. Finally a window is referenced while some | ||
| 511 | * offset in that window was specified to scif_fence_signal(), and the RMAs | ||
| 512 | * marked by that call to scif_fence_signal() have not completed. While a | ||
| 513 | * window is in this state, its registered address space pages are not | ||
| 514 | * available for use in a new registered window. | ||
| 515 | * | ||
| 516 | * When all such references to the window have been removed, its references to | ||
| 517 | * all the physical pages which it represents are removed. Similarly, the | ||
| 518 | * registered address space pages of the window become available for | ||
| 519 | * registration in a new window. | ||
| 520 | * | ||
| 521 | * Return: | ||
| 522 | * Upon successful completion, scif_unregister() returns 0; otherwise in user | ||
| 523 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | ||
| 524 | * the negative of one of the following errors is returned. In the event of an | ||
| 525 | * error, no windows are unregistered. | ||
| 526 | * | ||
| 527 | * Errors: | ||
| 528 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 529 | * ECONNRESET - Connection reset by peer | ||
| 530 | * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a | ||
| 531 | * window, or offset is negative | ||
| 532 | * ENODEV - The remote node is lost or existed, but is not currently in the | ||
| 533 | * network since it may have crashed | ||
| 534 | * ENOTCONN - The endpoint is not connected | ||
| 535 | * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the | ||
| 536 | * registered address space of epd | ||
| 537 | */ | ||
| 538 | int scif_unregister(scif_epd_t epd, off_t offset, size_t len); | ||
| 539 | |||
| 540 | /** | ||
| 541 | * scif_readfrom() - Copy from a remote address space | ||
| 542 | * @epd: endpoint descriptor | ||
| 543 | * @loffset: offset in local registered address space to | ||
| 544 | * which to copy | ||
| 545 | * @len: length of range to copy | ||
| 546 | * @roffset: offset in remote registered address space | ||
| 547 | * from which to copy | ||
| 548 | * @rma_flags: transfer mode flags | ||
| 549 | * | ||
| 550 | * scif_readfrom() copies len bytes from the remote registered address space of | ||
| 551 | * the peer of endpoint epd, starting at the offset roffset to the local | ||
| 552 | * registered address space of epd, starting at the offset loffset. | ||
| 553 | * | ||
| 554 | * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, | ||
| 555 | * roffset + len - 1] must be within some registered window or windows of the | ||
| 556 | * local and remote nodes. A range may intersect multiple registered windows, | ||
| 557 | * but only if those windows are contiguous in the registered address space. | ||
| 558 | * | ||
| 559 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | ||
| 560 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | ||
| 561 | * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the | ||
| 562 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | ||
| 563 | * ously. The order in which any two asynchronous RMA operations complete | ||
| 564 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | ||
| 565 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | ||
| 566 | * the completion of asynchronous RMA operations on the same endpoint. | ||
| 567 | * | ||
| 568 | * The DMA transfer of individual bytes is not guaranteed to complete in | ||
| 569 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | ||
| 570 | * cacheline or partial cacheline of the source range will become visible on | ||
| 571 | * the destination node after all other transferred data in the source | ||
| 572 | * range has become visible on the destination node. | ||
| 573 | * | ||
| 574 | * The optimal DMA performance will likely be realized if both | ||
| 575 | * loffset and roffset are cacheline aligned (are a multiple of 64). Lower | ||
| 576 | * performance will likely be realized if loffset and roffset are not | ||
| 577 | * cacheline aligned but are separated by some multiple of 64. The lowest level | ||
| 578 | * of performance is likely if loffset and roffset are not separated by a | ||
| 579 | * multiple of 64. | ||
| 580 | * | ||
| 581 | * The rma_flags argument is formed by ORing together zero or more of the | ||
| 582 | * following values. | ||
| 583 | * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA | ||
| 584 | * engine. | ||
| 585 | * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the | ||
| 586 | * transfer has completed. Passing this flag results in the | ||
| 587 | * current implementation busy waiting and consuming CPU cycles | ||
| 588 | * while the DMA transfer is in progress for best performance by | ||
| 589 | * avoiding the interrupt latency. | ||
| 590 | * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of | ||
| 591 | * the source range becomes visible on the destination node | ||
| 592 | * after all other transferred data in the source range has | ||
| 593 | * become visible on the destination | ||
| 594 | * | ||
| 595 | * Return: | ||
| 596 | * Upon successful completion, scif_readfrom() returns 0; otherwise in user | ||
| 597 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | ||
| 598 | * the negative of one of the following errors is returned. | ||
| 599 | * | ||
| 600 | * Errors: | ||
| 601 | * EACCESS - Attempt to write to a read-only range | ||
| 602 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 603 | * ECONNRESET - Connection reset by peer | ||
| 604 | * EINVAL - rma_flags is invalid | ||
| 605 | * ENODEV - The remote node is lost or existed, but is not currently in the | ||
| 606 | * network since it may have crashed | ||
| 607 | * ENOTCONN - The endpoint is not connected | ||
| 608 | * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered | ||
| 609 | * address space of epd, or, The range [roffset, roffset + len - 1] is invalid | ||
| 610 | * for the registered address space of the peer of epd, or loffset or roffset | ||
| 611 | * is negative | ||
| 612 | */ | ||
| 613 | int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t | ||
| 614 | roffset, int rma_flags); | ||
| 615 | |||
| 616 | /** | ||
| 617 | * scif_writeto() - Copy to a remote address space | ||
| 618 | * @epd: endpoint descriptor | ||
| 619 | * @loffset: offset in local registered address space | ||
| 620 | * from which to copy | ||
| 621 | * @len: length of range to copy | ||
| 622 | * @roffset: offset in remote registered address space to | ||
| 623 | * which to copy | ||
| 624 | * @rma_flags: transfer mode flags | ||
| 625 | * | ||
| 626 | * scif_writeto() copies len bytes from the local registered address space of | ||
| 627 | * epd, starting at the offset loffset to the remote registered address space | ||
| 628 | * of the peer of endpoint epd, starting at the offset roffset. | ||
| 629 | * | ||
| 630 | * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, | ||
| 631 | * roffset + len - 1] must be within some registered window or windows of the | ||
| 632 | * local and remote nodes. A range may intersect multiple registered windows, | ||
| 633 | * but only if those windows are contiguous in the registered address space. | ||
| 634 | * | ||
| 635 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | ||
| 636 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | ||
| 637 | * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the | ||
| 638 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | ||
| 639 | * ously. The order in which any two asynchronous RMA operations complete | ||
| 640 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | ||
| 641 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | ||
| 642 | * the completion of asynchronous RMA operations on the same endpoint. | ||
| 643 | * | ||
| 644 | * The DMA transfer of individual bytes is not guaranteed to complete in | ||
| 645 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | ||
| 646 | * cacheline or partial cacheline of the source range will become visible on | ||
| 647 | * the destination node after all other transferred data in the source | ||
| 648 | * range has become visible on the destination node. | ||
| 649 | * | ||
| 650 | * The optimal DMA performance will likely be realized if both | ||
| 651 | * loffset and roffset are cacheline aligned (are a multiple of 64). Lower | ||
| 652 | * performance will likely be realized if loffset and roffset are not cacheline | ||
| 653 | * aligned but are separated by some multiple of 64. The lowest level of | ||
| 654 | * performance is likely if loffset and roffset are not separated by a multiple | ||
| 655 | * of 64. | ||
| 656 | * | ||
| 657 | * The rma_flags argument is formed by ORing together zero or more of the | ||
| 658 | * following values. | ||
| 659 | * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA | ||
| 660 | * engine. | ||
| 661 | * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the | ||
| 662 | * transfer has completed. Passing this flag results in the | ||
| 663 | * current implementation busy waiting and consuming CPU cycles | ||
| 664 | * while the DMA transfer is in progress for best performance by | ||
| 665 | * avoiding the interrupt latency. | ||
| 666 | * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of | ||
| 667 | * the source range becomes visible on the destination node | ||
| 668 | * after all other transferred data in the source range has | ||
| 669 | * become visible on the destination | ||
| 670 | * | ||
| 671 | * Return: | ||
| 672 | * Upon successful completion, scif_readfrom() returns 0; otherwise in user | ||
| 673 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | ||
| 674 | * the negative of one of the following errors is returned. | ||
| 675 | * | ||
| 676 | * Errors: | ||
| 677 | * EACCESS - Attempt to write to a read-only range | ||
| 678 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 679 | * ECONNRESET - Connection reset by peer | ||
| 680 | * EINVAL - rma_flags is invalid | ||
| 681 | * ENODEV - The remote node is lost or existed, but is not currently in the | ||
| 682 | * network since it may have crashed | ||
| 683 | * ENOTCONN - The endpoint is not connected | ||
| 684 | * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered | ||
| 685 | * address space of epd, or, The range [roffset , roffset + len -1] is invalid | ||
| 686 | * for the registered address space of the peer of epd, or loffset or roffset | ||
| 687 | * is negative | ||
| 688 | */ | ||
| 689 | int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t | ||
| 690 | roffset, int rma_flags); | ||
| 691 | |||
| 692 | /** | ||
| 693 | * scif_vreadfrom() - Copy from a remote address space | ||
| 694 | * @epd: endpoint descriptor | ||
| 695 | * @addr: address to which to copy | ||
| 696 | * @len: length of range to copy | ||
| 697 | * @roffset: offset in remote registered address space | ||
| 698 | * from which to copy | ||
| 699 | * @rma_flags: transfer mode flags | ||
| 700 | * | ||
| 701 | * scif_vreadfrom() copies len bytes from the remote registered address | ||
| 702 | * space of the peer of endpoint epd, starting at the offset roffset, to local | ||
| 703 | * memory, starting at addr. | ||
| 704 | * | ||
| 705 | * The specified range [roffset, roffset + len - 1] must be within some | ||
| 706 | * registered window or windows of the remote nodes. The range may | ||
| 707 | * intersect multiple registered windows, but only if those windows are | ||
| 708 | * contiguous in the registered address space. | ||
| 709 | * | ||
| 710 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | ||
| 711 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | ||
| 712 | * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the | ||
| 713 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | ||
| 714 | * ously. The order in which any two asynchronous RMA operations complete | ||
| 715 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | ||
| 716 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | ||
| 717 | * the completion of asynchronous RMA operations on the same endpoint. | ||
| 718 | * | ||
| 719 | * The DMA transfer of individual bytes is not guaranteed to complete in | ||
| 720 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | ||
| 721 | * cacheline or partial cacheline of the source range will become visible on | ||
| 722 | * the destination node after all other transferred data in the source | ||
| 723 | * range has become visible on the destination node. | ||
| 724 | * | ||
| 725 | * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back | ||
| 726 | * the specified local memory range may be remain in a pinned state even after | ||
| 727 | * the specified transfer completes. This may reduce overhead if some or all of | ||
| 728 | * the same virtual address range is referenced in a subsequent call of | ||
| 729 | * scif_vreadfrom() or scif_vwriteto(). | ||
| 730 | * | ||
| 731 | * The optimal DMA performance will likely be realized if both | ||
| 732 | * addr and roffset are cacheline aligned (are a multiple of 64). Lower | ||
| 733 | * performance will likely be realized if addr and roffset are not | ||
| 734 | * cacheline aligned but are separated by some multiple of 64. The lowest level | ||
| 735 | * of performance is likely if addr and roffset are not separated by a | ||
| 736 | * multiple of 64. | ||
| 737 | * | ||
| 738 | * The rma_flags argument is formed by ORing together zero or more of the | ||
| 739 | * following values. | ||
| 740 | * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA | ||
| 741 | * engine. | ||
| 742 | * SCIF_RMA_USECACHE - enable registration caching | ||
| 743 | * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the | ||
| 744 | * transfer has completed. Passing this flag results in the | ||
| 745 | * current implementation busy waiting and consuming CPU cycles | ||
| 746 | * while the DMA transfer is in progress for best performance by | ||
| 747 | * avoiding the interrupt latency. | ||
| 748 | * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of | ||
| 749 | * the source range becomes visible on the destination node | ||
| 750 | * after all other transferred data in the source range has | ||
| 751 | * become visible on the destination | ||
| 752 | * | ||
| 753 | * Return: | ||
| 754 | * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user | ||
| 755 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | ||
| 756 | * the negative of one of the following errors is returned. | ||
| 757 | * | ||
| 758 | * Errors: | ||
| 759 | * EACCESS - Attempt to write to a read-only range | ||
| 760 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 761 | * ECONNRESET - Connection reset by peer | ||
| 762 | * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid | ||
| 763 | * EINVAL - rma_flags is invalid | ||
| 764 | * ENODEV - The remote node is lost or existed, but is not currently in the | ||
| 765 | * network since it may have crashed | ||
| 766 | * ENOTCONN - The endpoint is not connected | ||
| 767 | * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the | ||
| 768 | * registered address space of epd | ||
| 769 | */ | ||
| 770 | int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, | ||
| 771 | int rma_flags); | ||
| 772 | |||
| 773 | /** | ||
| 774 | * scif_vwriteto() - Copy to a remote address space | ||
| 775 | * @epd: endpoint descriptor | ||
| 776 | * @addr: address from which to copy | ||
| 777 | * @len: length of range to copy | ||
| 778 | * @roffset: offset in remote registered address space to | ||
| 779 | * which to copy | ||
| 780 | * @rma_flags: transfer mode flags | ||
| 781 | * | ||
| 782 | * scif_vwriteto() copies len bytes from the local memory, starting at addr, to | ||
| 783 | * the remote registered address space of the peer of endpoint epd, starting at | ||
| 784 | * the offset roffset. | ||
| 785 | * | ||
| 786 | * The specified range [roffset, roffset + len - 1] must be within some | ||
| 787 | * registered window or windows of the remote nodes. The range may intersect | ||
| 788 | * multiple registered windows, but only if those windows are contiguous in the | ||
| 789 | * registered address space. | ||
| 790 | * | ||
| 791 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | ||
| 792 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | ||
| 793 | * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the | ||
| 794 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | ||
| 795 | * ously. The order in which any two asynchronous RMA operations complete | ||
| 796 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | ||
| 797 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | ||
| 798 | * the completion of asynchronous RMA operations on the same endpoint. | ||
| 799 | * | ||
| 800 | * The DMA transfer of individual bytes is not guaranteed to complete in | ||
| 801 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | ||
| 802 | * cacheline or partial cacheline of the source range will become visible on | ||
| 803 | * the destination node after all other transferred data in the source | ||
| 804 | * range has become visible on the destination node. | ||
| 805 | * | ||
| 806 | * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back | ||
| 807 | * the specified local memory range may be remain in a pinned state even after | ||
| 808 | * the specified transfer completes. This may reduce overhead if some or all of | ||
| 809 | * the same virtual address range is referenced in a subsequent call of | ||
| 810 | * scif_vreadfrom() or scif_vwriteto(). | ||
| 811 | * | ||
| 812 | * The optimal DMA performance will likely be realized if both | ||
| 813 | * addr and offset are cacheline aligned (are a multiple of 64). Lower | ||
| 814 | * performance will likely be realized if addr and offset are not cacheline | ||
| 815 | * aligned but are separated by some multiple of 64. The lowest level of | ||
| 816 | * performance is likely if addr and offset are not separated by a multiple of | ||
| 817 | * 64. | ||
| 818 | * | ||
| 819 | * The rma_flags argument is formed by ORing together zero or more of the | ||
| 820 | * following values. | ||
| 821 | * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA | ||
| 822 | * engine. | ||
| 823 | * SCIF_RMA_USECACHE - allow registration caching | ||
| 824 | * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the | ||
| 825 | * transfer has completed. Passing this flag results in the | ||
| 826 | * current implementation busy waiting and consuming CPU cycles | ||
| 827 | * while the DMA transfer is in progress for best performance by | ||
| 828 | * avoiding the interrupt latency. | ||
| 829 | * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of | ||
| 830 | * the source range becomes visible on the destination node | ||
| 831 | * after all other transferred data in the source range has | ||
| 832 | * become visible on the destination | ||
| 833 | * | ||
| 834 | * Return: | ||
| 835 | * Upon successful completion, scif_vwriteto() returns 0; otherwise in user | ||
| 836 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | ||
| 837 | * the negative of one of the following errors is returned. | ||
| 838 | * | ||
| 839 | * Errors: | ||
| 840 | * EACCESS - Attempt to write to a read-only range | ||
| 841 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 842 | * ECONNRESET - Connection reset by peer | ||
| 843 | * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid | ||
| 844 | * EINVAL - rma_flags is invalid | ||
| 845 | * ENODEV - The remote node is lost or existed, but is not currently in the | ||
| 846 | * network since it may have crashed | ||
| 847 | * ENOTCONN - The endpoint is not connected | ||
| 848 | * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the | ||
| 849 | * registered address space of epd | ||
| 850 | */ | ||
| 851 | int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset, | ||
| 852 | int rma_flags); | ||
| 853 | |||
| 854 | /** | ||
| 855 | * scif_fence_mark() - Mark previously issued RMAs | ||
| 856 | * @epd: endpoint descriptor | ||
| 857 | * @flags: control flags | ||
| 858 | * @mark: marked value returned as output. | ||
| 859 | * | ||
| 860 | * scif_fence_mark() returns after marking the current set of all uncompleted | ||
| 861 | * RMAs initiated through the endpoint epd or the current set of all | ||
| 862 | * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are | ||
| 863 | * marked with a value returned at mark. The application may subsequently call | ||
| 864 | * scif_fence_wait(), passing the value returned at mark, to await completion | ||
| 865 | * of all RMAs so marked. | ||
| 866 | * | ||
| 867 | * The flags argument has exactly one of the following values. | ||
| 868 | * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint | ||
| 869 | * epd are marked | ||
| 870 | * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer | ||
| 871 | * of endpoint epd are marked | ||
| 872 | * | ||
| 873 | * Return: | ||
| 874 | * Upon successful completion, scif_fence_mark() returns 0; otherwise in user | ||
| 875 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | ||
| 876 | * the negative of one of the following errors is returned. | ||
| 877 | * | ||
| 878 | * Errors: | ||
| 879 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 880 | * ECONNRESET - Connection reset by peer | ||
| 881 | * EINVAL - flags is invalid | ||
| 882 | * ENODEV - The remote node is lost or existed, but is not currently in the | ||
| 883 | * network since it may have crashed | ||
| 884 | * ENOTCONN - The endpoint is not connected | ||
| 885 | * ENOMEM - Insufficient kernel memory was available | ||
| 886 | */ | ||
| 887 | int scif_fence_mark(scif_epd_t epd, int flags, int *mark); | ||
| 888 | |||
| 889 | /** | ||
| 890 | * scif_fence_wait() - Wait for completion of marked RMAs | ||
| 891 | * @epd: endpoint descriptor | ||
| 892 | * @mark: mark request | ||
| 893 | * | ||
| 894 | * scif_fence_wait() returns after all RMAs marked with mark have completed. | ||
| 895 | * The value passed in mark must have been obtained in a previous call to | ||
| 896 | * scif_fence_mark(). | ||
| 897 | * | ||
| 898 | * Return: | ||
| 899 | * Upon successful completion, scif_fence_wait() returns 0; otherwise in user | ||
| 900 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | ||
| 901 | * the negative of one of the following errors is returned. | ||
| 902 | * | ||
| 903 | * Errors: | ||
| 904 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 905 | * ECONNRESET - Connection reset by peer | ||
| 906 | * ENODEV - The remote node is lost or existed, but is not currently in the | ||
| 907 | * network since it may have crashed | ||
| 908 | * ENOTCONN - The endpoint is not connected | ||
| 909 | * ENOMEM - Insufficient kernel memory was available | ||
| 910 | */ | ||
| 911 | int scif_fence_wait(scif_epd_t epd, int mark); | ||
| 912 | |||
| 913 | /** | ||
| 914 | * scif_fence_signal() - Request a memory update on completion of RMAs | ||
| 915 | * @epd: endpoint descriptor | ||
| 916 | * @loff: local offset | ||
| 917 | * @lval: local value to write to loffset | ||
| 918 | * @roff: remote offset | ||
| 919 | * @rval: remote value to write to roffset | ||
| 920 | * @flags: flags | ||
| 921 | * | ||
| 922 | * scif_fence_signal() returns after marking the current set of all uncompleted | ||
| 923 | * RMAs initiated through the endpoint epd or marking the current set of all | ||
| 924 | * uncompleted RMAs initiated through the peer of endpoint epd. | ||
| 925 | * | ||
| 926 | * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the | ||
| 927 | * marked set, lval is written to memory at the address corresponding to offset | ||
| 928 | * loff in the local registered address space of epd. loff must be within a | ||
| 929 | * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion | ||
| 930 | * of the RMAs in the marked set, rval is written to memory at the address | ||
| 931 | * corresponding to offset roff in the remote registered address space of epd. | ||
| 932 | * roff must be within a remote registered window of the peer of epd. Note | ||
| 933 | * that any specified offset must be DWORD (4 byte / 32 bit) aligned. | ||
| 934 | * | ||
| 935 | * The flags argument is formed by OR'ing together the following. | ||
| 936 | * Exactly one of the following values. | ||
| 937 | * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint | ||
| 938 | * epd are marked | ||
| 939 | * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer | ||
| 940 | * of endpoint epd are marked | ||
| 941 | * One or more of the following values. | ||
| 942 | * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to | ||
| 943 | * memory at the address corresponding to offset loff in the local | ||
| 944 | * registered address space of epd. | ||
| 945 | * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to | ||
| 946 | * memory at the address corresponding to offset roff in the remote | ||
| 947 | * registered address space of epd. | ||
| 948 | * | ||
| 949 | * Return: | ||
| 950 | * Upon successful completion, scif_fence_signal() returns 0; otherwise in | ||
| 951 | * user mode -1 is returned and errno is set to indicate the error; in kernel | ||
| 952 | * mode the negative of one of the following errors is returned. | ||
| 953 | * | ||
| 954 | * Errors: | ||
| 955 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | ||
| 956 | * ECONNRESET - Connection reset by peer | ||
| 957 | * EINVAL - flags is invalid, or loff or roff are not DWORD aligned | ||
| 958 | * ENODEV - The remote node is lost or existed, but is not currently in the | ||
| 959 | * network since it may have crashed | ||
| 960 | * ENOTCONN - The endpoint is not connected | ||
| 961 | * ENXIO - loff is invalid for the registered address of epd, or roff is invalid | ||
| 962 | * for the registered address space, of the peer of epd | ||
| 963 | */ | ||
| 964 | int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff, | ||
| 965 | u64 rval, int flags); | ||
| 966 | |||
| 967 | /** | ||
| 968 | * scif_get_node_ids() - Return information about online nodes | ||
| 969 | * @nodes: array in which to return online node IDs | ||
| 970 | * @len: number of entries in the nodes array | ||
| 971 | * @self: address to place the node ID of the local node | ||
| 972 | * | ||
| 973 | * scif_get_node_ids() fills in the nodes array with up to len node IDs of the | ||
| 974 | * nodes in the SCIF network. If there is not enough space in nodes, as | ||
| 975 | * indicated by the len parameter, only len node IDs are returned in nodes. The | ||
| 976 | * return value of scif_get_node_ids() is the total number of nodes currently in | ||
| 977 | * the SCIF network. By checking the return value against the len parameter, | ||
| 978 | * the user may determine if enough space for nodes was allocated. | ||
| 979 | * | ||
| 980 | * The node ID of the local node is returned at self. | ||
| 981 | * | ||
| 982 | * Return: | ||
| 983 | * Upon successful completion, scif_get_node_ids() returns the actual number of | ||
| 984 | * online nodes in the SCIF network including 'self'; otherwise in user mode | ||
| 985 | * -1 is returned and errno is set to indicate the error; in kernel mode no | ||
| 986 | * errors are returned. | ||
| 987 | * | ||
| 988 | * Errors: | ||
| 989 | * EFAULT - Bad address | ||
| 990 | */ | ||
| 991 | int scif_get_node_ids(u16 *nodes, int len, u16 *self); | ||
| 992 | |||
| 993 | #endif /* __SCIF_H__ */ | ||
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 1a0006a76b00..4ad65eebbff8 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild | |||
| @@ -352,6 +352,7 @@ header-y += rtc.h | |||
| 352 | header-y += rtnetlink.h | 352 | header-y += rtnetlink.h |
| 353 | header-y += scc.h | 353 | header-y += scc.h |
| 354 | header-y += sched.h | 354 | header-y += sched.h |
| 355 | header-y += scif_ioctl.h | ||
| 355 | header-y += screen_info.h | 356 | header-y += screen_info.h |
| 356 | header-y += sctp.h | 357 | header-y += sctp.h |
| 357 | header-y += sdla.h | 358 | header-y += sdla.h |
diff --git a/include/uapi/linux/scif_ioctl.h b/include/uapi/linux/scif_ioctl.h new file mode 100644 index 000000000000..4a94d917cf99 --- /dev/null +++ b/include/uapi/linux/scif_ioctl.h | |||
| @@ -0,0 +1,130 @@ | |||
| 1 | /* | ||
| 2 | * Intel MIC Platform Software Stack (MPSS) | ||
| 3 | * | ||
| 4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
| 5 | * redistributing this file, you may do so under either license. | ||
| 6 | * | ||
| 7 | * GPL LICENSE SUMMARY | ||
| 8 | * | ||
| 9 | * Copyright(c) 2014 Intel Corporation. | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or modify | ||
| 12 | * it under the terms of version 2 of the GNU General Public License as | ||
| 13 | * published by the Free Software Foundation. | ||
| 14 | * | ||
| 15 | * This program is distributed in the hope that it will be useful, but | ||
| 16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 18 | * General Public License for more details. | ||
| 19 | * | ||
| 20 | * BSD LICENSE | ||
| 21 | * | ||
| 22 | * Copyright(c) 2014 Intel Corporation. | ||
| 23 | * | ||
| 24 | * Redistribution and use in source and binary forms, with or without | ||
| 25 | * modification, are permitted provided that the following conditions | ||
| 26 | * are met: | ||
| 27 | * | ||
| 28 | * * Redistributions of source code must retain the above copyright | ||
| 29 | * notice, this list of conditions and the following disclaimer. | ||
| 30 | * * Redistributions in binary form must reproduce the above copyright | ||
| 31 | * notice, this list of conditions and the following disclaimer in | ||
| 32 | * the documentation and/or other materials provided with the | ||
| 33 | * distribution. | ||
| 34 | * * Neither the name of Intel Corporation nor the names of its | ||
| 35 | * contributors may be used to endorse or promote products derived | ||
| 36 | * from this software without specific prior written permission. | ||
| 37 | * | ||
| 38 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 39 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 40 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 41 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 42 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 43 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 44 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 45 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 46 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 47 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 48 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 49 | * | ||
| 50 | * Intel SCIF driver. | ||
| 51 | * | ||
| 52 | */ | ||
| 53 | /* | ||
| 54 | * ----------------------------------------- | ||
| 55 | * SCIF IOCTL interface information | ||
| 56 | * ----------------------------------------- | ||
| 57 | */ | ||
| 58 | #ifndef SCIF_IOCTL_H | ||
| 59 | #define SCIF_IOCTL_H | ||
| 60 | |||
| 61 | #include <linux/types.h> | ||
| 62 | |||
| 63 | /** | ||
| 64 | * struct scif_port_id - SCIF port information | ||
| 65 | * @node: node on which port resides | ||
| 66 | * @port: local port number | ||
| 67 | */ | ||
| 68 | struct scif_port_id { | ||
| 69 | __u16 node; | ||
| 70 | __u16 port; | ||
| 71 | }; | ||
| 72 | |||
| 73 | /** | ||
| 74 | * struct scifioctl_connect - used for SCIF_CONNECT IOCTL | ||
| 75 | * @self: used to read back the assigned port_id | ||
| 76 | * @peer: destination node and port to connect to | ||
| 77 | */ | ||
| 78 | struct scifioctl_connect { | ||
| 79 | struct scif_port_id self; | ||
| 80 | struct scif_port_id peer; | ||
| 81 | }; | ||
| 82 | |||
| 83 | /** | ||
| 84 | * struct scifioctl_accept - used for SCIF_ACCEPTREQ IOCTL | ||
| 85 | * @flags: flags | ||
| 86 | * @peer: global id of peer endpoint | ||
| 87 | * @endpt: new connected endpoint descriptor | ||
| 88 | */ | ||
| 89 | struct scifioctl_accept { | ||
| 90 | __s32 flags; | ||
| 91 | struct scif_port_id peer; | ||
| 92 | __u64 endpt; | ||
| 93 | }; | ||
| 94 | |||
| 95 | /** | ||
| 96 | * struct scifioctl_msg - used for SCIF_SEND/SCIF_RECV IOCTL | ||
| 97 | * @msg: message buffer address | ||
| 98 | * @len: message length | ||
| 99 | * @flags: flags | ||
| 100 | * @out_len: number of bytes sent/received | ||
| 101 | */ | ||
| 102 | struct scifioctl_msg { | ||
| 103 | __u64 msg; | ||
| 104 | __s32 len; | ||
| 105 | __s32 flags; | ||
| 106 | __s32 out_len; | ||
| 107 | }; | ||
| 108 | |||
| 109 | /** | ||
| 110 | * struct scifioctl_node_ids - used for SCIF_GET_NODEIDS IOCTL | ||
| 111 | * @nodes: pointer to an array of node_ids | ||
| 112 | * @self: ID of the current node | ||
| 113 | * @len: length of array | ||
| 114 | */ | ||
| 115 | struct scifioctl_node_ids { | ||
| 116 | __u64 nodes; | ||
| 117 | __u64 self; | ||
| 118 | __s32 len; | ||
| 119 | }; | ||
| 120 | |||
| 121 | #define SCIF_BIND _IOWR('s', 1, __u64) | ||
| 122 | #define SCIF_LISTEN _IOW('s', 2, __s32) | ||
| 123 | #define SCIF_CONNECT _IOWR('s', 3, struct scifioctl_connect) | ||
| 124 | #define SCIF_ACCEPTREQ _IOWR('s', 4, struct scifioctl_accept) | ||
| 125 | #define SCIF_ACCEPTREG _IOWR('s', 5, __u64) | ||
| 126 | #define SCIF_SEND _IOWR('s', 6, struct scifioctl_msg) | ||
| 127 | #define SCIF_RECV _IOWR('s', 7, struct scifioctl_msg) | ||
| 128 | #define SCIF_GET_NODEIDS _IOWR('s', 14, struct scifioctl_node_ids) | ||
| 129 | |||
| 130 | #endif /* SCIF_IOCTL_H */ | ||
