diff options
| author | Dean Nelson <dcn@sgi.com> | 2005-03-23 22:50:00 -0500 |
|---|---|---|
| committer | Tony Luck <tony.luck@intel.com> | 2005-05-03 15:37:19 -0400 |
| commit | a2d974da0afe659cff98913184a97c0ee686d02b (patch) | |
| tree | 1a1479280445226104ddc062bed2e2806639352e | |
| parent | 89eb8eb927e324366c3ac0458998aaf9953fc5cd (diff) | |
[IA64-SGI] SGI Altix cross partition functionality [3/3]
This patch contains the cross partition pseudo-ethernet driver (XPNET)
functional support module.
Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
| -rw-r--r-- | arch/ia64/sn/kernel/Makefile | 1 | ||||
| -rw-r--r-- | arch/ia64/sn/kernel/xpnet.c | 715 |
2 files changed, 716 insertions, 0 deletions
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile index 6959736eadea..4351c4ff9845 100644 --- a/arch/ia64/sn/kernel/Makefile +++ b/arch/ia64/sn/kernel/Makefile | |||
| @@ -15,3 +15,4 @@ obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o | |||
| 15 | xp-y := xp_main.o xp_nofault.o | 15 | xp-y := xp_main.o xp_nofault.o |
| 16 | obj-$(CONFIG_IA64_SGI_SN_XP) += xpc.o | 16 | obj-$(CONFIG_IA64_SGI_SN_XP) += xpc.o |
| 17 | xpc-y := xpc_main.o xpc_channel.o xpc_partition.o | 17 | xpc-y := xpc_main.o xpc_channel.o xpc_partition.o |
| 18 | obj-$(CONFIG_IA64_SGI_SN_XP) += xpnet.o | ||
diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c new file mode 100644 index 000000000000..78c13d676fa6 --- /dev/null +++ b/arch/ia64/sn/kernel/xpnet.c | |||
| @@ -0,0 +1,715 @@ | |||
| 1 | /* | ||
| 2 | * This file is subject to the terms and conditions of the GNU General Public | ||
| 3 | * License. See the file "COPYING" in the main directory of this archive | ||
| 4 | * for more details. | ||
| 5 | * | ||
| 6 | * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved. | ||
| 7 | */ | ||
| 8 | |||
| 9 | |||
| 10 | /* | ||
| 11 | * Cross Partition Network Interface (XPNET) support | ||
| 12 | * | ||
| 13 | * XPNET provides a virtual network layered on top of the Cross | ||
| 14 | * Partition communication layer. | ||
| 15 | * | ||
| 16 | * XPNET provides direct point-to-point and broadcast-like support | ||
| 17 | * for an ethernet-like device. The ethernet broadcast medium is | ||
| 18 | * replaced with a point-to-point message structure which passes | ||
| 19 | * pointers to a DMA-capable block that a remote partition should | ||
| 20 | * retrieve and pass to the upper level networking layer. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | |||
| 24 | |||
| 25 | #include <linux/config.h> | ||
| 26 | #include <linux/module.h> | ||
| 27 | #include <linux/kernel.h> | ||
| 28 | #include <linux/pci.h> | ||
| 29 | #include <linux/init.h> | ||
| 30 | #include <linux/ioport.h> | ||
| 31 | #include <linux/netdevice.h> | ||
| 32 | #include <linux/etherdevice.h> | ||
| 33 | #include <linux/delay.h> | ||
| 34 | #include <linux/ethtool.h> | ||
| 35 | #include <linux/mii.h> | ||
| 36 | #include <linux/smp.h> | ||
| 37 | #include <linux/string.h> | ||
| 38 | #include <asm/sn/bte.h> | ||
| 39 | #include <asm/sn/io.h> | ||
| 40 | #include <asm/sn/sn_sal.h> | ||
| 41 | #include <asm/types.h> | ||
| 42 | #include <asm/atomic.h> | ||
| 43 | #include <asm/sn/xp.h> | ||
| 44 | |||
| 45 | |||
| 46 | /* | ||
| 47 | * The message payload transferred by XPC. | ||
| 48 | * | ||
| 49 | * buf_pa is the physical address where the DMA should pull from. | ||
| 50 | * | ||
| 51 | * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a | ||
| 52 | * cacheline boundary. To accomplish this, we record the number of | ||
| 53 | * bytes from the beginning of the first cacheline to the first useful | ||
| 54 | * byte of the skb (leadin_ignore) and the number of bytes from the | ||
| 55 | * last useful byte of the skb to the end of the last cacheline | ||
| 56 | * (tailout_ignore). | ||
| 57 | * | ||
| 58 | * size is the number of bytes to transfer which includes the skb->len | ||
| 59 | * (useful bytes of the senders skb) plus the leadin and tailout | ||
| 60 | */ | ||
| 61 | struct xpnet_message { | ||
| 62 | u16 version; /* Version for this message */ | ||
| 63 | u16 embedded_bytes; /* #of bytes embedded in XPC message */ | ||
| 64 | u32 magic; /* Special number indicating this is xpnet */ | ||
| 65 | u64 buf_pa; /* phys address of buffer to retrieve */ | ||
| 66 | u32 size; /* #of bytes in buffer */ | ||
| 67 | u8 leadin_ignore; /* #of bytes to ignore at the beginning */ | ||
| 68 | u8 tailout_ignore; /* #of bytes to ignore at the end */ | ||
| 69 | unsigned char data; /* body of small packets */ | ||
| 70 | }; | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Determine the size of our message, the cacheline aligned size, | ||
| 74 | * and then the number of message will request from XPC. | ||
| 75 | * | ||
| 76 | * XPC expects each message to exist in an individual cacheline. | ||
| 77 | */ | ||
| 78 | #define XPNET_MSG_SIZE (L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET) | ||
| 79 | #define XPNET_MSG_DATA_MAX \ | ||
| 80 | (XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data)) | ||
| 81 | #define XPNET_MSG_ALIGNED_SIZE (L1_CACHE_ALIGN(XPNET_MSG_SIZE)) | ||
| 82 | #define XPNET_MSG_NENTRIES (PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE) | ||
| 83 | |||
| 84 | |||
| 85 | #define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1) | ||
| 86 | #define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1) | ||
| 87 | |||
| 88 | /* | ||
| 89 | * Version number of XPNET implementation. XPNET can always talk to versions | ||
| 90 | * with same major #, and never talk to versions with a different version. | ||
| 91 | */ | ||
| 92 | #define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor)) | ||
| 93 | #define XPNET_VERSION_MAJOR(_v) ((_v) >> 4) | ||
| 94 | #define XPNET_VERSION_MINOR(_v) ((_v) & 0xf) | ||
| 95 | |||
| 96 | #define XPNET_VERSION _XPNET_VERSION(1,0) /* version 1.0 */ | ||
| 97 | #define XPNET_VERSION_EMBED _XPNET_VERSION(1,1) /* version 1.1 */ | ||
| 98 | #define XPNET_MAGIC 0x88786984 /* "XNET" */ | ||
| 99 | |||
| 100 | #define XPNET_VALID_MSG(_m) \ | ||
| 101 | ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \ | ||
| 102 | && (msg->magic == XPNET_MAGIC)) | ||
| 103 | |||
| 104 | #define XPNET_DEVICE_NAME "xp0" | ||
| 105 | |||
| 106 | |||
| 107 | /* | ||
| 108 | * When messages are queued with xpc_send_notify, a kmalloc'd buffer | ||
| 109 | * of the following type is passed as a notification cookie. When the | ||
| 110 | * notification function is called, we use the cookie to decide | ||
| 111 | * whether all outstanding message sends have completed. The skb can | ||
| 112 | * then be released. | ||
| 113 | */ | ||
| 114 | struct xpnet_pending_msg { | ||
| 115 | struct list_head free_list; | ||
| 116 | struct sk_buff *skb; | ||
| 117 | atomic_t use_count; | ||
| 118 | }; | ||
| 119 | |||
| 120 | /* driver specific structure pointed to by the device structure */ | ||
| 121 | struct xpnet_dev_private { | ||
| 122 | struct net_device_stats stats; | ||
| 123 | }; | ||
| 124 | |||
| 125 | struct net_device *xpnet_device; | ||
| 126 | |||
| 127 | /* | ||
| 128 | * When we are notified of other partitions activating, we add them to | ||
| 129 | * our bitmask of partitions to which we broadcast. | ||
| 130 | */ | ||
| 131 | static u64 xpnet_broadcast_partitions; | ||
| 132 | /* protect above */ | ||
| 133 | static spinlock_t xpnet_broadcast_lock = SPIN_LOCK_UNLOCKED; | ||
| 134 | |||
| 135 | /* | ||
| 136 | * Since the Block Transfer Engine (BTE) is being used for the transfer | ||
| 137 | * and it relies upon cache-line size transfers, we need to reserve at | ||
| 138 | * least one cache-line for head and tail alignment. The BTE is | ||
| 139 | * limited to 8MB transfers. | ||
| 140 | * | ||
| 141 | * Testing has shown that changing MTU to greater than 64KB has no effect | ||
| 142 | * on TCP as the two sides negotiate a Max Segment Size that is limited | ||
| 143 | * to 64K. Other protocols May use packets greater than this, but for | ||
| 144 | * now, the default is 64KB. | ||
| 145 | */ | ||
| 146 | #define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES) | ||
| 147 | /* 32KB has been determined to be the ideal */ | ||
| 148 | #define XPNET_DEF_MTU (0x8000UL) | ||
| 149 | |||
| 150 | |||
| 151 | /* | ||
| 152 | * The partition id is encapsulated in the MAC address. The following | ||
| 153 | * define locates the octet the partid is in. | ||
| 154 | */ | ||
| 155 | #define XPNET_PARTID_OCTET 1 | ||
| 156 | #define XPNET_LICENSE_OCTET 2 | ||
| 157 | |||
| 158 | |||
| 159 | /* | ||
| 160 | * Define the XPNET debug device structure that is to be used with dev_dbg(), | ||
| 161 | * dev_err(), dev_warn(), and dev_info(). | ||
| 162 | */ | ||
| 163 | struct device_driver xpnet_dbg_name = { | ||
| 164 | .name = "xpnet" | ||
| 165 | }; | ||
| 166 | |||
| 167 | struct device xpnet_dbg_subname = { | ||
| 168 | .bus_id = {0}, /* set to "" */ | ||
| 169 | .driver = &xpnet_dbg_name | ||
| 170 | }; | ||
| 171 | |||
| 172 | struct device *xpnet = &xpnet_dbg_subname; | ||
| 173 | |||
| 174 | /* | ||
| 175 | * Packet was recevied by XPC and forwarded to us. | ||
| 176 | */ | ||
| 177 | static void | ||
| 178 | xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg) | ||
| 179 | { | ||
| 180 | struct sk_buff *skb; | ||
| 181 | bte_result_t bret; | ||
| 182 | struct xpnet_dev_private *priv = | ||
| 183 | (struct xpnet_dev_private *) xpnet_device->priv; | ||
| 184 | |||
| 185 | |||
| 186 | if (!XPNET_VALID_MSG(msg)) { | ||
| 187 | /* | ||
| 188 | * Packet with a different XPC version. Ignore. | ||
| 189 | */ | ||
| 190 | xpc_received(partid, channel, (void *) msg); | ||
| 191 | |||
| 192 | priv->stats.rx_errors++; | ||
| 193 | |||
| 194 | return; | ||
| 195 | } | ||
| 196 | dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size, | ||
| 197 | msg->leadin_ignore, msg->tailout_ignore); | ||
| 198 | |||
| 199 | |||
| 200 | /* reserve an extra cache line */ | ||
| 201 | skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES); | ||
| 202 | if (!skb) { | ||
| 203 | dev_err(xpnet, "failed on dev_alloc_skb(%d)\n", | ||
| 204 | msg->size + L1_CACHE_BYTES); | ||
| 205 | |||
| 206 | xpc_received(partid, channel, (void *) msg); | ||
| 207 | |||
| 208 | priv->stats.rx_errors++; | ||
| 209 | |||
| 210 | return; | ||
| 211 | } | ||
| 212 | |||
| 213 | /* | ||
| 214 | * The allocated skb has some reserved space. | ||
| 215 | * In order to use bte_copy, we need to get the | ||
| 216 | * skb->data pointer moved forward. | ||
| 217 | */ | ||
| 218 | skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data & | ||
| 219 | (L1_CACHE_BYTES - 1)) + | ||
| 220 | msg->leadin_ignore)); | ||
| 221 | |||
| 222 | /* | ||
| 223 | * Update the tail pointer to indicate data actually | ||
| 224 | * transferred. | ||
| 225 | */ | ||
| 226 | skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore)); | ||
| 227 | |||
| 228 | /* | ||
| 229 | * Move the data over from the the other side. | ||
| 230 | */ | ||
| 231 | if ((XPNET_VERSION_MINOR(msg->version) == 1) && | ||
| 232 | (msg->embedded_bytes != 0)) { | ||
| 233 | dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, " | ||
| 234 | "%lu)\n", skb->data, &msg->data, | ||
| 235 | (size_t) msg->embedded_bytes); | ||
| 236 | |||
| 237 | memcpy(skb->data, &msg->data, (size_t) msg->embedded_bytes); | ||
| 238 | } else { | ||
| 239 | dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t" | ||
| 240 | "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa, | ||
| 241 | (void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), | ||
| 242 | msg->size); | ||
| 243 | |||
| 244 | bret = bte_copy(msg->buf_pa, | ||
| 245 | __pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), | ||
| 246 | msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL); | ||
| 247 | |||
| 248 | if (bret != BTE_SUCCESS) { | ||
| 249 | // >>> Need better way of cleaning skb. Currently skb | ||
| 250 | // >>> appears in_use and we can't just call | ||
| 251 | // >>> dev_kfree_skb. | ||
| 252 | dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned " | ||
| 253 | "error=0x%x\n", (void *)msg->buf_pa, | ||
| 254 | (void *)__pa((u64)skb->data & | ||
| 255 | ~(L1_CACHE_BYTES - 1)), | ||
| 256 | msg->size, bret); | ||
| 257 | |||
| 258 | xpc_received(partid, channel, (void *) msg); | ||
| 259 | |||
| 260 | priv->stats.rx_errors++; | ||
| 261 | |||
| 262 | return; | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p " | ||
| 267 | "skb->end=0x%p skb->len=%d\n", (void *) skb->head, | ||
| 268 | (void *) skb->data, (void *) skb->tail, (void *) skb->end, | ||
| 269 | skb->len); | ||
| 270 | |||
| 271 | skb->dev = xpnet_device; | ||
| 272 | skb->protocol = eth_type_trans(skb, xpnet_device); | ||
| 273 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
| 274 | |||
| 275 | dev_dbg(xpnet, "passing skb to network layer; \n\tskb->head=0x%p " | ||
| 276 | "skb->data=0x%p skb->tail=0x%p skb->end=0x%p skb->len=%d\n", | ||
| 277 | (void *) skb->head, (void *) skb->data, (void *) skb->tail, | ||
| 278 | (void *) skb->end, skb->len); | ||
| 279 | |||
| 280 | |||
| 281 | xpnet_device->last_rx = jiffies; | ||
| 282 | priv->stats.rx_packets++; | ||
| 283 | priv->stats.rx_bytes += skb->len + ETH_HLEN; | ||
| 284 | |||
| 285 | netif_rx_ni(skb); | ||
| 286 | xpc_received(partid, channel, (void *) msg); | ||
| 287 | } | ||
| 288 | |||
| 289 | |||
| 290 | /* | ||
| 291 | * This is the handler which XPC calls during any sort of change in | ||
| 292 | * state or message reception on a connection. | ||
| 293 | */ | ||
| 294 | static void | ||
| 295 | xpnet_connection_activity(enum xpc_retval reason, partid_t partid, int channel, | ||
| 296 | void *data, void *key) | ||
| 297 | { | ||
| 298 | long bp; | ||
| 299 | |||
| 300 | |||
| 301 | DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); | ||
| 302 | DBUG_ON(channel != XPC_NET_CHANNEL); | ||
| 303 | |||
| 304 | switch(reason) { | ||
| 305 | case xpcMsgReceived: /* message received */ | ||
| 306 | DBUG_ON(data == NULL); | ||
| 307 | |||
| 308 | xpnet_receive(partid, channel, (struct xpnet_message *) data); | ||
| 309 | break; | ||
| 310 | |||
| 311 | case xpcConnected: /* connection completed to a partition */ | ||
| 312 | spin_lock_bh(&xpnet_broadcast_lock); | ||
| 313 | xpnet_broadcast_partitions |= 1UL << (partid -1 ); | ||
| 314 | bp = xpnet_broadcast_partitions; | ||
| 315 | spin_unlock_bh(&xpnet_broadcast_lock); | ||
| 316 | |||
| 317 | netif_carrier_on(xpnet_device); | ||
| 318 | |||
| 319 | dev_dbg(xpnet, "%s connection created to partition %d; " | ||
| 320 | "xpnet_broadcast_partitions=0x%lx\n", | ||
| 321 | xpnet_device->name, partid, bp); | ||
| 322 | break; | ||
| 323 | |||
| 324 | default: | ||
| 325 | spin_lock_bh(&xpnet_broadcast_lock); | ||
| 326 | xpnet_broadcast_partitions &= ~(1UL << (partid -1 )); | ||
| 327 | bp = xpnet_broadcast_partitions; | ||
| 328 | spin_unlock_bh(&xpnet_broadcast_lock); | ||
| 329 | |||
| 330 | if (bp == 0) { | ||
| 331 | netif_carrier_off(xpnet_device); | ||
| 332 | } | ||
| 333 | |||
| 334 | dev_dbg(xpnet, "%s disconnected from partition %d; " | ||
| 335 | "xpnet_broadcast_partitions=0x%lx\n", | ||
| 336 | xpnet_device->name, partid, bp); | ||
| 337 | break; | ||
| 338 | |||
| 339 | } | ||
| 340 | } | ||
| 341 | |||
| 342 | |||
| 343 | static int | ||
| 344 | xpnet_dev_open(struct net_device *dev) | ||
| 345 | { | ||
| 346 | enum xpc_retval ret; | ||
| 347 | |||
| 348 | |||
| 349 | dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %d, " | ||
| 350 | "%d)\n", XPC_NET_CHANNEL, xpnet_connection_activity, | ||
| 351 | XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS, | ||
| 352 | XPNET_MAX_IDLE_KTHREADS); | ||
| 353 | |||
| 354 | ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL, | ||
| 355 | XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, | ||
| 356 | XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS); | ||
| 357 | if (ret != xpcSuccess) { | ||
| 358 | dev_err(xpnet, "ifconfig up of %s failed on XPC connect, " | ||
| 359 | "ret=%d\n", dev->name, ret); | ||
| 360 | |||
| 361 | return -ENOMEM; | ||
| 362 | } | ||
| 363 | |||
| 364 | dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name); | ||
| 365 | |||
| 366 | return 0; | ||
| 367 | } | ||
| 368 | |||
| 369 | |||
| 370 | static int | ||
| 371 | xpnet_dev_stop(struct net_device *dev) | ||
| 372 | { | ||
| 373 | xpc_disconnect(XPC_NET_CHANNEL); | ||
| 374 | |||
| 375 | dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name); | ||
| 376 | |||
| 377 | return 0; | ||
| 378 | } | ||
| 379 | |||
| 380 | |||
| 381 | static int | ||
| 382 | xpnet_dev_change_mtu(struct net_device *dev, int new_mtu) | ||
| 383 | { | ||
| 384 | /* 68 comes from min TCP+IP+MAC header */ | ||
| 385 | if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) { | ||
| 386 | dev_err(xpnet, "ifconfig %s mtu %d failed; value must be " | ||
| 387 | "between 68 and %ld\n", dev->name, new_mtu, | ||
| 388 | XPNET_MAX_MTU); | ||
| 389 | return -EINVAL; | ||
| 390 | } | ||
| 391 | |||
| 392 | dev->mtu = new_mtu; | ||
| 393 | dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu); | ||
| 394 | return 0; | ||
| 395 | } | ||
| 396 | |||
| 397 | |||
| 398 | /* | ||
| 399 | * Required for the net_device structure. | ||
| 400 | */ | ||
| 401 | static int | ||
| 402 | xpnet_dev_set_config(struct net_device *dev, struct ifmap *new_map) | ||
| 403 | { | ||
| 404 | return 0; | ||
| 405 | } | ||
| 406 | |||
| 407 | |||
| 408 | /* | ||
| 409 | * Return statistics to the caller. | ||
| 410 | */ | ||
| 411 | static struct net_device_stats * | ||
| 412 | xpnet_dev_get_stats(struct net_device *dev) | ||
| 413 | { | ||
| 414 | struct xpnet_dev_private *priv; | ||
| 415 | |||
| 416 | |||
| 417 | priv = (struct xpnet_dev_private *) dev->priv; | ||
| 418 | |||
| 419 | return &priv->stats; | ||
| 420 | } | ||
| 421 | |||
| 422 | |||
| 423 | /* | ||
| 424 | * Notification that the other end has received the message and | ||
| 425 | * DMA'd the skb information. At this point, they are done with | ||
| 426 | * our side. When all recipients are done processing, we | ||
| 427 | * release the skb and then release our pending message structure. | ||
| 428 | */ | ||
| 429 | static void | ||
| 430 | xpnet_send_completed(enum xpc_retval reason, partid_t partid, int channel, | ||
| 431 | void *__qm) | ||
| 432 | { | ||
| 433 | struct xpnet_pending_msg *queued_msg = | ||
| 434 | (struct xpnet_pending_msg *) __qm; | ||
| 435 | |||
| 436 | |||
| 437 | DBUG_ON(queued_msg == NULL); | ||
| 438 | |||
| 439 | dev_dbg(xpnet, "message to %d notified with reason %d\n", | ||
| 440 | partid, reason); | ||
| 441 | |||
| 442 | if (atomic_dec_return(&queued_msg->use_count) == 0) { | ||
| 443 | dev_dbg(xpnet, "all acks for skb->head=-x%p\n", | ||
| 444 | (void *) queued_msg->skb->head); | ||
| 445 | |||
| 446 | dev_kfree_skb_any(queued_msg->skb); | ||
| 447 | kfree(queued_msg); | ||
| 448 | } | ||
| 449 | } | ||
| 450 | |||
| 451 | |||
| 452 | /* | ||
| 453 | * Network layer has formatted a packet (skb) and is ready to place it | ||
| 454 | * "on the wire". Prepare and send an xpnet_message to all partitions | ||
| 455 | * which have connected with us and are targets of this packet. | ||
| 456 | * | ||
| 457 | * MAC-NOTE: For the XPNET driver, the MAC address contains the | ||
| 458 | * destination partition_id. If the destination partition id word | ||
| 459 | * is 0xff, this packet is to broadcast to all partitions. | ||
| 460 | */ | ||
| 461 | static int | ||
| 462 | xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) | ||
| 463 | { | ||
| 464 | struct xpnet_pending_msg *queued_msg; | ||
| 465 | enum xpc_retval ret; | ||
| 466 | struct xpnet_message *msg; | ||
| 467 | u64 start_addr, end_addr; | ||
| 468 | long dp; | ||
| 469 | u8 second_mac_octet; | ||
| 470 | partid_t dest_partid; | ||
| 471 | struct xpnet_dev_private *priv; | ||
| 472 | u16 embedded_bytes; | ||
| 473 | |||
| 474 | |||
| 475 | priv = (struct xpnet_dev_private *) dev->priv; | ||
| 476 | |||
| 477 | |||
| 478 | dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p " | ||
| 479 | "skb->end=0x%p skb->len=%d\n", (void *) skb->head, | ||
| 480 | (void *) skb->data, (void *) skb->tail, (void *) skb->end, | ||
| 481 | skb->len); | ||
| 482 | |||
| 483 | |||
| 484 | /* | ||
| 485 | * The xpnet_pending_msg tracks how many outstanding | ||
| 486 | * xpc_send_notifies are relying on this skb. When none | ||
| 487 | * remain, release the skb. | ||
| 488 | */ | ||
| 489 | queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC); | ||
| 490 | if (queued_msg == NULL) { | ||
| 491 | dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping " | ||
| 492 | "packet\n", sizeof(struct xpnet_pending_msg)); | ||
| 493 | |||
| 494 | priv->stats.tx_errors++; | ||
| 495 | |||
| 496 | return -ENOMEM; | ||
| 497 | } | ||
| 498 | |||
| 499 | |||
| 500 | /* get the beginning of the first cacheline and end of last */ | ||
| 501 | start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1)); | ||
| 502 | end_addr = L1_CACHE_ALIGN((u64) skb->tail); | ||
| 503 | |||
| 504 | /* calculate how many bytes to embed in the XPC message */ | ||
| 505 | embedded_bytes = 0; | ||
| 506 | if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) { | ||
| 507 | /* skb->data does fit so embed */ | ||
| 508 | embedded_bytes = skb->len; | ||
| 509 | } | ||
| 510 | |||
| 511 | |||
| 512 | /* | ||
| 513 | * Since the send occurs asynchronously, we set the count to one | ||
| 514 | * and begin sending. Any sends that happen to complete before | ||
| 515 | * we are done sending will not free the skb. We will be left | ||
| 516 | * with that task during exit. This also handles the case of | ||
| 517 | * a packet destined for a partition which is no longer up. | ||
| 518 | */ | ||
| 519 | atomic_set(&queued_msg->use_count, 1); | ||
| 520 | queued_msg->skb = skb; | ||
| 521 | |||
| 522 | |||
| 523 | second_mac_octet = skb->data[XPNET_PARTID_OCTET]; | ||
| 524 | if (second_mac_octet == 0xff) { | ||
| 525 | /* we are being asked to broadcast to all partitions */ | ||
| 526 | dp = xpnet_broadcast_partitions; | ||
| 527 | } else if (second_mac_octet != 0) { | ||
| 528 | dp = xpnet_broadcast_partitions & | ||
| 529 | (1UL << (second_mac_octet - 1)); | ||
| 530 | } else { | ||
| 531 | /* 0 is an invalid partid. Ignore */ | ||
| 532 | dp = 0; | ||
| 533 | } | ||
| 534 | dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp); | ||
| 535 | |||
| 536 | /* | ||
| 537 | * If we wanted to allow promiscous mode to work like an | ||
| 538 | * unswitched network, this would be a good point to OR in a | ||
| 539 | * mask of partitions which should be receiving all packets. | ||
| 540 | */ | ||
| 541 | |||
| 542 | /* | ||
| 543 | * Main send loop. | ||
| 544 | */ | ||
| 545 | for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS; | ||
| 546 | dest_partid++) { | ||
| 547 | |||
| 548 | |||
| 549 | if (!(dp & (1UL << (dest_partid - 1)))) { | ||
| 550 | /* not destined for this partition */ | ||
| 551 | continue; | ||
| 552 | } | ||
| 553 | |||
| 554 | /* remove this partition from the destinations mask */ | ||
| 555 | dp &= ~(1UL << (dest_partid - 1)); | ||
| 556 | |||
| 557 | |||
| 558 | /* found a partition to send to */ | ||
| 559 | |||
| 560 | ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL, | ||
| 561 | XPC_NOWAIT, (void **)&msg); | ||
| 562 | if (unlikely(ret != xpcSuccess)) { | ||
| 563 | continue; | ||
| 564 | } | ||
| 565 | |||
| 566 | msg->embedded_bytes = embedded_bytes; | ||
| 567 | if (unlikely(embedded_bytes != 0)) { | ||
| 568 | msg->version = XPNET_VERSION_EMBED; | ||
| 569 | dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n", | ||
| 570 | &msg->data, skb->data, (size_t) embedded_bytes); | ||
| 571 | memcpy(&msg->data, skb->data, (size_t) embedded_bytes); | ||
| 572 | } else { | ||
| 573 | msg->version = XPNET_VERSION; | ||
| 574 | } | ||
| 575 | msg->magic = XPNET_MAGIC; | ||
| 576 | msg->size = end_addr - start_addr; | ||
| 577 | msg->leadin_ignore = (u64) skb->data - start_addr; | ||
| 578 | msg->tailout_ignore = end_addr - (u64) skb->tail; | ||
| 579 | msg->buf_pa = __pa(start_addr); | ||
| 580 | |||
| 581 | dev_dbg(xpnet, "sending XPC message to %d:%d\nmsg->buf_pa=" | ||
| 582 | "0x%lx, msg->size=%u, msg->leadin_ignore=%u, " | ||
| 583 | "msg->tailout_ignore=%u\n", dest_partid, | ||
| 584 | XPC_NET_CHANNEL, msg->buf_pa, msg->size, | ||
| 585 | msg->leadin_ignore, msg->tailout_ignore); | ||
| 586 | |||
| 587 | |||
| 588 | atomic_inc(&queued_msg->use_count); | ||
| 589 | |||
| 590 | ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg, | ||
| 591 | xpnet_send_completed, queued_msg); | ||
| 592 | if (unlikely(ret != xpcSuccess)) { | ||
| 593 | atomic_dec(&queued_msg->use_count); | ||
| 594 | continue; | ||
| 595 | } | ||
| 596 | |||
| 597 | } | ||
| 598 | |||
| 599 | if (atomic_dec_return(&queued_msg->use_count) == 0) { | ||
| 600 | dev_dbg(xpnet, "no partitions to receive packet destined for " | ||
| 601 | "%d\n", dest_partid); | ||
| 602 | |||
| 603 | |||
| 604 | dev_kfree_skb(skb); | ||
| 605 | kfree(queued_msg); | ||
| 606 | } | ||
| 607 | |||
| 608 | priv->stats.tx_packets++; | ||
| 609 | priv->stats.tx_bytes += skb->len; | ||
| 610 | |||
| 611 | return 0; | ||
| 612 | } | ||
| 613 | |||
| 614 | |||
| 615 | /* | ||
| 616 | * Deal with transmit timeouts coming from the network layer. | ||
| 617 | */ | ||
| 618 | static void | ||
| 619 | xpnet_dev_tx_timeout (struct net_device *dev) | ||
| 620 | { | ||
| 621 | struct xpnet_dev_private *priv; | ||
| 622 | |||
| 623 | |||
| 624 | priv = (struct xpnet_dev_private *) dev->priv; | ||
| 625 | |||
| 626 | priv->stats.tx_errors++; | ||
| 627 | return; | ||
| 628 | } | ||
| 629 | |||
| 630 | |||
| 631 | static int __init | ||
| 632 | xpnet_init(void) | ||
| 633 | { | ||
| 634 | int i; | ||
| 635 | u32 license_num; | ||
| 636 | int result = -ENOMEM; | ||
| 637 | |||
| 638 | |||
| 639 | dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME); | ||
| 640 | |||
| 641 | /* | ||
| 642 | * use ether_setup() to init the majority of our device | ||
| 643 | * structure and then override the necessary pieces. | ||
| 644 | */ | ||
| 645 | xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private), | ||
| 646 | XPNET_DEVICE_NAME, ether_setup); | ||
| 647 | if (xpnet_device == NULL) { | ||
| 648 | return -ENOMEM; | ||
| 649 | } | ||
| 650 | |||
| 651 | netif_carrier_off(xpnet_device); | ||
| 652 | |||
| 653 | xpnet_device->mtu = XPNET_DEF_MTU; | ||
| 654 | xpnet_device->change_mtu = xpnet_dev_change_mtu; | ||
| 655 | xpnet_device->open = xpnet_dev_open; | ||
| 656 | xpnet_device->get_stats = xpnet_dev_get_stats; | ||
| 657 | xpnet_device->stop = xpnet_dev_stop; | ||
| 658 | xpnet_device->hard_start_xmit = xpnet_dev_hard_start_xmit; | ||
| 659 | xpnet_device->tx_timeout = xpnet_dev_tx_timeout; | ||
| 660 | xpnet_device->set_config = xpnet_dev_set_config; | ||
| 661 | |||
| 662 | /* | ||
| 663 | * Multicast assumes the LSB of the first octet is set for multicast | ||
| 664 | * MAC addresses. We chose the first octet of the MAC to be unlikely | ||
| 665 | * to collide with any vendor's officially issued MAC. | ||
| 666 | */ | ||
| 667 | xpnet_device->dev_addr[0] = 0xfe; | ||
| 668 | xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id; | ||
| 669 | license_num = sn_partition_serial_number_val(); | ||
| 670 | for (i = 3; i >= 0; i--) { | ||
| 671 | xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] = | ||
| 672 | license_num & 0xff; | ||
| 673 | license_num = license_num >> 8; | ||
| 674 | } | ||
| 675 | |||
| 676 | /* | ||
| 677 | * ether_setup() sets this to a multicast device. We are | ||
| 678 | * really not supporting multicast at this time. | ||
| 679 | */ | ||
| 680 | xpnet_device->flags &= ~IFF_MULTICAST; | ||
| 681 | |||
| 682 | /* | ||
| 683 | * No need to checksum as it is a DMA transfer. The BTE will | ||
| 684 | * report an error if the data is not retrievable and the | ||
| 685 | * packet will be dropped. | ||
| 686 | */ | ||
| 687 | xpnet_device->features = NETIF_F_NO_CSUM; | ||
| 688 | |||
| 689 | result = register_netdev(xpnet_device); | ||
| 690 | if (result != 0) { | ||
| 691 | free_netdev(xpnet_device); | ||
| 692 | } | ||
| 693 | |||
| 694 | return result; | ||
| 695 | } | ||
| 696 | module_init(xpnet_init); | ||
| 697 | |||
| 698 | |||
| 699 | static void __exit | ||
| 700 | xpnet_exit(void) | ||
| 701 | { | ||
| 702 | dev_info(xpnet, "unregistering network device %s\n", | ||
| 703 | xpnet_device[0].name); | ||
| 704 | |||
| 705 | unregister_netdev(xpnet_device); | ||
| 706 | |||
| 707 | free_netdev(xpnet_device); | ||
| 708 | } | ||
| 709 | module_exit(xpnet_exit); | ||
| 710 | |||
| 711 | |||
| 712 | MODULE_AUTHOR("Silicon Graphics, Inc."); | ||
| 713 | MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)"); | ||
| 714 | MODULE_LICENSE("GPL"); | ||
| 715 | |||
