aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/scif.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/scif.h')
-rw-r--r--include/linux/scif.h366
1 files changed, 356 insertions, 10 deletions
diff --git a/include/linux/scif.h b/include/linux/scif.h
index 44f4f3898bbe..49a35d6edc94 100644
--- a/include/linux/scif.h
+++ b/include/linux/scif.h
@@ -55,6 +55,7 @@
55 55
56#include <linux/types.h> 56#include <linux/types.h>
57#include <linux/poll.h> 57#include <linux/poll.h>
58#include <linux/device.h>
58#include <linux/scif_ioctl.h> 59#include <linux/scif_ioctl.h>
59 60
60#define SCIF_ACCEPT_SYNC 1 61#define SCIF_ACCEPT_SYNC 1
@@ -92,6 +93,70 @@ enum {
92#define SCIF_PORT_RSVD 1088 93#define SCIF_PORT_RSVD 1088
93 94
94typedef struct scif_endpt *scif_epd_t; 95typedef struct scif_endpt *scif_epd_t;
96typedef struct scif_pinned_pages *scif_pinned_pages_t;
97
98/**
99 * struct scif_range - SCIF registered range used in kernel mode
100 * @cookie: cookie used internally by SCIF
101 * @nr_pages: number of pages of PAGE_SIZE
102 * @prot_flags: R/W protection
103 * @phys_addr: Array of bus addresses
104 * @va: Array of kernel virtual addresses backed by the pages in the phys_addr
105 * array. The va is populated only when called on the host for a remote
106 * SCIF connection on MIC. This is required to support the use case of DMA
107 * between MIC and another device which is not a SCIF node e.g., an IB or
108 * ethernet NIC.
109 */
110struct scif_range {
111 void *cookie;
112 int nr_pages;
113 int prot_flags;
114 dma_addr_t *phys_addr;
115 void __iomem **va;
116};
117
118/**
119 * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll
120 * @epd: SCIF endpoint
121 * @events: requested events
122 * @revents: returned events
123 */
124struct scif_pollepd {
125 scif_epd_t epd;
126 short events;
127 short revents;
128};
129
130/**
131 * scif_peer_dev - representation of a peer SCIF device
132 *
133 * Peer devices show up as PCIe devices for the mgmt node but not the cards.
134 * The mgmt node discovers all the cards on the PCIe bus and informs the other
135 * cards about their peers. Upon notification of a peer a node adds a peer
136 * device to the peer bus to maintain symmetry in the way devices are
137 * discovered across all nodes in the SCIF network.
138 *
139 * @dev: underlying device
140 * @dnode - The destination node which this device will communicate with.
141 */
142struct scif_peer_dev {
143 struct device dev;
144 u8 dnode;
145};
146
147/**
148 * scif_client - representation of a SCIF client
149 * @name: client name
150 * @probe - client method called when a peer device is registered
151 * @remove - client method called when a peer device is unregistered
152 * @si - subsys_interface used internally for implementing SCIF clients
153 */
154struct scif_client {
155 const char *name;
156 void (*probe)(struct scif_peer_dev *spdev);
157 void (*remove)(struct scif_peer_dev *spdev);
158 struct subsys_interface si;
159};
95 160
96#define SCIF_OPEN_FAILED ((scif_epd_t)-1) 161#define SCIF_OPEN_FAILED ((scif_epd_t)-1)
97#define SCIF_REGISTER_FAILED ((off_t)-1) 162#define SCIF_REGISTER_FAILED ((off_t)-1)
@@ -345,7 +410,6 @@ int scif_close(scif_epd_t epd);
345 * Errors: 410 * Errors:
346 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 411 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
347 * ECONNRESET - Connection reset by peer 412 * ECONNRESET - Connection reset by peer
348 * EFAULT - An invalid address was specified for a parameter
349 * EINVAL - flags is invalid, or len is negative 413 * EINVAL - flags is invalid, or len is negative
350 * ENODEV - The remote node is lost or existed, but is not currently in the 414 * ENODEV - The remote node is lost or existed, but is not currently in the
351 * network since it may have crashed 415 * network since it may have crashed
@@ -398,7 +462,6 @@ int scif_send(scif_epd_t epd, void *msg, int len, int flags);
398 * EAGAIN - The destination node is returning from a low power state 462 * EAGAIN - The destination node is returning from a low power state
399 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 463 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
400 * ECONNRESET - Connection reset by peer 464 * ECONNRESET - Connection reset by peer
401 * EFAULT - An invalid address was specified for a parameter
402 * EINVAL - flags is invalid, or len is negative 465 * EINVAL - flags is invalid, or len is negative
403 * ENODEV - The remote node is lost or existed, but is not currently in the 466 * ENODEV - The remote node is lost or existed, but is not currently in the
404 * network since it may have crashed 467 * network since it may have crashed
@@ -461,9 +524,6 @@ int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
461 * SCIF_PROT_READ - allow read operations from the window 524 * SCIF_PROT_READ - allow read operations from the window
462 * SCIF_PROT_WRITE - allow write operations to the window 525 * SCIF_PROT_WRITE - allow write operations to the window
463 * 526 *
464 * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a
465 * fixed offset.
466 *
467 * Return: 527 * Return:
468 * Upon successful completion, scif_register() returns the offset at which the 528 * Upon successful completion, scif_register() returns the offset at which the
469 * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that 529 * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that
@@ -476,7 +536,6 @@ int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
476 * EAGAIN - The mapping could not be performed due to lack of resources 536 * EAGAIN - The mapping could not be performed due to lack of resources
477 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 537 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
478 * ECONNRESET - Connection reset by peer 538 * ECONNRESET - Connection reset by peer
479 * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
480 * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is 539 * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is
481 * set in flags, and offset is not a multiple of the page size, or addr is not a 540 * set in flags, and offset is not a multiple of the page size, or addr is not a
482 * multiple of the page size, or len is not a multiple of the page size, or is 541 * multiple of the page size, or len is not a multiple of the page size, or is
@@ -759,7 +818,6 @@ int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
759 * EACCESS - Attempt to write to a read-only range 818 * EACCESS - Attempt to write to a read-only range
760 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 819 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
761 * ECONNRESET - Connection reset by peer 820 * ECONNRESET - Connection reset by peer
762 * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
763 * EINVAL - rma_flags is invalid 821 * EINVAL - rma_flags is invalid
764 * ENODEV - The remote node is lost or existed, but is not currently in the 822 * ENODEV - The remote node is lost or existed, but is not currently in the
765 * network since it may have crashed 823 * network since it may have crashed
@@ -840,7 +898,6 @@ int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset,
840 * EACCESS - Attempt to write to a read-only range 898 * EACCESS - Attempt to write to a read-only range
841 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 899 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
842 * ECONNRESET - Connection reset by peer 900 * ECONNRESET - Connection reset by peer
843 * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
844 * EINVAL - rma_flags is invalid 901 * EINVAL - rma_flags is invalid
845 * ENODEV - The remote node is lost or existed, but is not currently in the 902 * ENODEV - The remote node is lost or existed, but is not currently in the
846 * network since it may have crashed 903 * network since it may have crashed
@@ -984,10 +1041,299 @@ int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff,
984 * online nodes in the SCIF network including 'self'; otherwise in user mode 1041 * online nodes in the SCIF network including 'self'; otherwise in user mode
985 * -1 is returned and errno is set to indicate the error; in kernel mode no 1042 * -1 is returned and errno is set to indicate the error; in kernel mode no
986 * errors are returned. 1043 * errors are returned.
1044 */
1045int scif_get_node_ids(u16 *nodes, int len, u16 *self);
1046
1047/**
1048 * scif_pin_pages() - Pin a set of pages
1049 * @addr: Virtual address of range to pin
1050 * @len: Length of range to pin
1051 * @prot_flags: Page protection flags
1052 * @map_flags: Page classification flags
1053 * @pinned_pages: Handle to pinned pages
1054 *
1055 * scif_pin_pages() pins (locks in physical memory) the physical pages which
1056 * back the range of virtual address pages starting at addr and continuing for
1057 * len bytes. addr and len are constrained to be multiples of the page size. A
1058 * successful scif_pin_pages() call returns a handle to pinned_pages which may
1059 * be used in subsequent calls to scif_register_pinned_pages().
1060 *
1061 * The pages will remain pinned as long as there is a reference against the
1062 * scif_pinned_pages_t value returned by scif_pin_pages() and until
1063 * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A
1064 * reference is added to a scif_pinned_pages_t value each time a window is
1065 * created by calling scif_register_pinned_pages() and passing the
1066 * scif_pinned_pages_t value. A reference is removed from a
1067 * scif_pinned_pages_t value each time such a window is deleted.
1068 *
1069 * Subsequent operations which change the memory pages to which virtual
1070 * addresses are mapped (such as mmap(), munmap()) have no effect on the
1071 * scif_pinned_pages_t value or windows created against it.
1072 *
1073 * If the process will fork(), it is recommended that the registered
1074 * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
1075 * problems due to copy-on-write semantics.
1076 *
1077 * The prot_flags argument is formed by OR'ing together one or more of the
1078 * following values.
1079 * SCIF_PROT_READ - allow read operations against the pages
1080 * SCIF_PROT_WRITE - allow write operations against the pages
1081 * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a
1082 * kernel space address. By default, addr is interpreted as a user space
1083 * address.
1084 *
1085 * Return:
1086 * Upon successful completion, scif_pin_pages() returns 0; otherwise the
1087 * negative of one of the following errors is returned.
987 * 1088 *
988 * Errors: 1089 * Errors:
989 * EFAULT - Bad address 1090 * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative
1091 * ENOMEM - Not enough space
990 */ 1092 */
991int scif_get_node_ids(u16 *nodes, int len, u16 *self); 1093int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags,
1094 scif_pinned_pages_t *pinned_pages);
1095
1096/**
1097 * scif_unpin_pages() - Unpin a set of pages
1098 * @pinned_pages: Handle to pinned pages to be unpinned
1099 *
1100 * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new
1101 * windows against pinned_pages. The physical pages represented by pinned_pages
1102 * will remain pinned until all windows previously registered against
1103 * pinned_pages are deleted (the window is scif_unregister()'d and all
1104 * references to the window are removed (see scif_unregister()).
1105 *
1106 * pinned_pages must have been obtain from a previous call to scif_pin_pages().
1107 * After calling scif_unpin_pages(), it is an error to pass pinned_pages to
1108 * scif_register_pinned_pages().
1109 *
1110 * Return:
1111 * Upon successful completion, scif_unpin_pages() returns 0; otherwise the
1112 * negative of one of the following errors is returned.
1113 *
1114 * Errors:
1115 * EINVAL - pinned_pages is not valid
1116 */
1117int scif_unpin_pages(scif_pinned_pages_t pinned_pages);
1118
1119/**
1120 * scif_register_pinned_pages() - Mark a memory region for remote access.
1121 * @epd: endpoint descriptor
1122 * @pinned_pages: Handle to pinned pages
1123 * @offset: Registered address space offset
1124 * @map_flags: Flags which control where pages are mapped
1125 *
1126 * The scif_register_pinned_pages() function opens a window, a range of whole
1127 * pages of the registered address space of the endpoint epd, starting at
1128 * offset po. The value of po, further described below, is a function of the
1129 * parameters offset and pinned_pages, and the value of map_flags. Each page of
1130 * the window represents a corresponding physical memory page of the range
1131 * represented by pinned_pages; the length of the window is the same as the
1132 * length of range represented by pinned_pages. A successful
1133 * scif_register_pinned_pages() call returns po as the return value.
1134 *
1135 * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
1136 * exactly, and offset is constrained to be a multiple of the page size. The
1137 * mapping established by scif_register_pinned_pages() will not replace any
1138 * existing registration; an error is returned if any page of the new window
1139 * would intersect an existing window.
1140 *
1141 * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
1142 * implementation-defined manner to arrive at po. The po so chosen will be an
1143 * area of the registered address space that the implementation deems suitable
1144 * for a mapping of the required size. An offset value of 0 is interpreted as
1145 * granting the implementation complete freedom in selecting po, subject to
1146 * constraints described below. A non-zero value of offset is taken to be a
1147 * suggestion of an offset near which the mapping should be placed. When the
1148 * implementation selects a value for po, it does not replace any extant
1149 * window. In all cases, po will be a multiple of the page size.
1150 *
1151 * The physical pages which are so represented by a window are available for
1152 * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(),
1153 * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
1154 * physical pages represented by the window will not be reused by the memory
1155 * subsystem for any other purpose. Note that the same physical page may be
1156 * represented by multiple windows.
1157 *
1158 * Windows created by scif_register_pinned_pages() are unregistered by
1159 * scif_unregister().
1160 *
1161 * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a
1162 * fixed offset.
1163 *
1164 * Return:
1165 * Upon successful completion, scif_register_pinned_pages() returns the offset
1166 * at which the mapping was placed (po); otherwise the negative of one of the
1167 * following errors is returned.
1168 *
1169 * Errors:
1170 * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window
1171 * would intersect an existing window
1172 * EAGAIN - The mapping could not be performed due to lack of resources
1173 * ECONNRESET - Connection reset by peer
1174 * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and
1175 * offset is not a multiple of the page size, or offset is negative
1176 * ENODEV - The remote node is lost or existed, but is not currently in the
1177 * network since it may have crashed
1178 * ENOMEM - Not enough space
1179 * ENOTCONN - The endpoint is not connected
1180 */
1181off_t scif_register_pinned_pages(scif_epd_t epd,
1182 scif_pinned_pages_t pinned_pages,
1183 off_t offset, int map_flags);
1184
1185/**
1186 * scif_get_pages() - Add references to remote registered pages
1187 * @epd: endpoint descriptor
1188 * @offset: remote registered offset
1189 * @len: length of range of pages
1190 * @pages: returned scif_range structure
1191 *
1192 * scif_get_pages() returns the addresses of the physical pages represented by
1193 * those pages of the registered address space of the peer of epd, starting at
1194 * offset and continuing for len bytes. offset and len are constrained to be
1195 * multiples of the page size.
1196 *
1197 * All of the pages in the specified range [offset, offset + len - 1] must be
1198 * within a single window of the registered address space of the peer of epd.
1199 *
1200 * The addresses are returned as a virtually contiguous array pointed to by the
1201 * phys_addr component of the scif_range structure whose address is returned in
1202 * pages. The nr_pages component of scif_range is the length of the array. The
1203 * prot_flags component of scif_range holds the protection flag value passed
1204 * when the pages were registered.
1205 *
1206 * Each physical page whose address is returned by scif_get_pages() remains
1207 * available and will not be released for reuse until the scif_range structure
1208 * is returned in a call to scif_put_pages(). The scif_range structure returned
1209 * by scif_get_pages() must be unmodified.
1210 *
1211 * It is an error to call scif_close() on an endpoint on which a scif_range
1212 * structure of that endpoint has not been returned to scif_put_pages().
1213 *
1214 * Return:
1215 * Upon successful completion, scif_get_pages() returns 0; otherwise the
1216 * negative of one of the following errors is returned.
1217 * Errors:
1218 * ECONNRESET - Connection reset by peer.
1219 * EINVAL - offset is not a multiple of the page size, or offset is negative, or
1220 * len is not a multiple of the page size
1221 * ENODEV - The remote node is lost or existed, but is not currently in the
1222 * network since it may have crashed
1223 * ENOTCONN - The endpoint is not connected
1224 * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid
1225 * for the registered address space of the peer epd
1226 */
1227int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
1228 struct scif_range **pages);
1229
1230/**
1231 * scif_put_pages() - Remove references from remote registered pages
1232 * @pages: pages to be returned
1233 *
1234 * scif_put_pages() releases a scif_range structure previously obtained by
1235 * calling scif_get_pages(). The physical pages represented by pages may
1236 * be reused when the window which represented those pages is unregistered.
1237 * Therefore, those pages must not be accessed after calling scif_put_pages().
1238 *
1239 * Return:
1240 * Upon successful completion, scif_put_pages() returns 0; otherwise the
1241 * negative of one of the following errors is returned.
1242 * Errors:
1243 * EINVAL - pages does not point to a valid scif_range structure, or
1244 * the scif_range structure pointed to by pages was already returned
1245 * ENODEV - The remote node is lost or existed, but is not currently in the
1246 * network since it may have crashed
1247 * ENOTCONN - The endpoint is not connected
1248 */
1249int scif_put_pages(struct scif_range *pages);
1250
1251/**
1252 * scif_poll() - Wait for some event on an endpoint
1253 * @epds: Array of endpoint descriptors
1254 * @nepds: Length of epds
1255 * @timeout: Upper limit on time for which scif_poll() will block
1256 *
1257 * scif_poll() waits for one of a set of endpoints to become ready to perform
1258 * an I/O operation.
1259 *
1260 * The epds argument specifies the endpoint descriptors to be examined and the
1261 * events of interest for each endpoint descriptor. epds is a pointer to an
1262 * array with one member for each open endpoint descriptor of interest.
1263 *
1264 * The number of items in the epds array is specified in nepds. The epd field
1265 * of scif_pollepd is an endpoint descriptor of an open endpoint. The field
1266 * events is a bitmask specifying the events which the application is
1267 * interested in. The field revents is an output parameter, filled by the
1268 * kernel with the events that actually occurred. The bits returned in revents
1269 * can include any of those specified in events, or one of the values POLLERR,
1270 * POLLHUP, or POLLNVAL. (These three bits are meaningless in the events
1271 * field, and will be set in the revents field whenever the corresponding
1272 * condition is true.)
1273 *
1274 * If none of the events requested (and no error) has occurred for any of the
1275 * endpoint descriptors, then scif_poll() blocks until one of the events occurs.
1276 *
1277 * The timeout argument specifies an upper limit on the time for which
1278 * scif_poll() will block, in milliseconds. Specifying a negative value in
1279 * timeout means an infinite timeout.
1280 *
1281 * The following bits may be set in events and returned in revents.
1282 * POLLIN - Data may be received without blocking. For a connected
1283 * endpoint, this means that scif_recv() may be called without blocking. For a
1284 * listening endpoint, this means that scif_accept() may be called without
1285 * blocking.
1286 * POLLOUT - Data may be sent without blocking. For a connected endpoint, this
1287 * means that scif_send() may be called without blocking. POLLOUT may also be
1288 * used to block waiting for a non-blocking connect to complete. This bit value
1289 * has no meaning for a listening endpoint and is ignored if specified.
1290 *
1291 * The following bits are only returned in revents, and are ignored if set in
1292 * events.
1293 * POLLERR - An error occurred on the endpoint
1294 * POLLHUP - The connection to the peer endpoint was disconnected
1295 * POLLNVAL - The specified endpoint descriptor is invalid.
1296 *
1297 * Return:
1298 * Upon successful completion, scif_poll() returns a non-negative value. A
1299 * positive value indicates the total number of endpoint descriptors that have
1300 * been selected (that is, endpoint descriptors for which the revents member is
1301 * non-zero). A value of 0 indicates that the call timed out and no endpoint
1302 * descriptors have been selected. Otherwise in user mode -1 is returned and
1303 * errno is set to indicate the error; in kernel mode the negative of one of
1304 * the following errors is returned.
1305 *
1306 * Errors:
1307 * EINTR - A signal occurred before any requested event
1308 * EINVAL - The nepds argument is greater than {OPEN_MAX}
1309 * ENOMEM - There was no space to allocate file descriptor tables
1310 */
1311int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout);
1312
1313/**
1314 * scif_client_register() - Register a SCIF client
1315 * @client: client to be registered
1316 *
1317 * scif_client_register() registers a SCIF client. The probe() method
1318 * of the client is called when SCIF peer devices come online and the
1319 * remove() method is called when the peer devices disappear.
1320 *
1321 * Return:
1322 * Upon successful completion, scif_client_register() returns a non-negative
1323 * value. Otherwise the return value is the same as subsys_interface_register()
1324 * in the kernel.
1325 */
1326int scif_client_register(struct scif_client *client);
1327
1328/**
1329 * scif_client_unregister() - Unregister a SCIF client
1330 * @client: client to be unregistered
1331 *
1332 * scif_client_unregister() unregisters a SCIF client.
1333 *
1334 * Return:
1335 * None
1336 */
1337void scif_client_unregister(struct scif_client *client);
992 1338
993#endif /* __SCIF_H__ */ 1339#endif /* __SCIF_H__ */