aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/acpi/processor_perflib.c21
-rw-r--r--drivers/cdrom/gdrom.c4
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c4
-rw-r--r--drivers/char/mxser.c6
-rw-r--r--drivers/cpufreq/cpufreq.c3
-rw-r--r--drivers/firmware/iscsi_ibft_find.c1
-rw-r--r--drivers/ide/ppc/pmac.c13
-rw-r--r--drivers/input/keyboard/maple_keyb.c1
-rw-r--r--drivers/md/dm-table.c29
-rw-r--r--drivers/misc/Kconfig25
-rw-r--r--drivers/misc/Makefile1
-rw-r--r--drivers/misc/sgi-gru/Makefile3
-rw-r--r--drivers/misc/sgi-gru/gru.h67
-rw-r--r--drivers/misc/sgi-gru/gru_instructions.h669
-rw-r--r--drivers/misc/sgi-gru/grufault.c633
-rw-r--r--drivers/misc/sgi-gru/grufile.c485
-rw-r--r--drivers/misc/sgi-gru/gruhandles.h663
-rw-r--r--drivers/misc/sgi-gru/grukservices.c679
-rw-r--r--drivers/misc/sgi-gru/grukservices.h134
-rw-r--r--drivers/misc/sgi-gru/grulib.h97
-rw-r--r--drivers/misc/sgi-gru/grumain.c802
-rw-r--r--drivers/misc/sgi-gru/gruprocfs.c336
-rw-r--r--drivers/misc/sgi-gru/grutables.h609
-rw-r--r--drivers/misc/sgi-gru/grutlbpurge.c372
-rw-r--r--drivers/misc/sgi-xp/Makefile10
-rw-r--r--drivers/misc/sgi-xp/xp.h225
-rw-r--r--drivers/misc/sgi-xp/xp_main.c131
-rw-r--r--drivers/misc/sgi-xp/xp_sn2.c146
-rw-r--r--drivers/misc/sgi-xp/xp_uv.c72
-rw-r--r--drivers/misc/sgi-xp/xpc.h1200
-rw-r--r--drivers/misc/sgi-xp/xpc_channel.c1585
-rw-r--r--drivers/misc/sgi-xp/xpc_main.c974
-rw-r--r--drivers/misc/sgi-xp/xpc_partition.c928
-rw-r--r--drivers/misc/sgi-xp/xpc_sn2.c2404
-rw-r--r--drivers/misc/sgi-xp/xpc_uv.c1443
-rw-r--r--drivers/misc/sgi-xp/xpnet.c277
-rw-r--r--drivers/mmc/card/block.c17
-rw-r--r--drivers/mmc/card/mmc_test.c85
-rw-r--r--drivers/mmc/core/core.c5
-rw-r--r--drivers/mmc/host/au1xmmc.c8
-rw-r--r--drivers/mmc/host/sdhci-pci.c3
-rw-r--r--drivers/mmc/host/sdhci.c15
-rw-r--r--drivers/mmc/host/sdhci.h2
-rw-r--r--drivers/mtd/mtdsuper.c33
-rw-r--r--drivers/net/niu.c2
-rw-r--r--drivers/net/ps3_gelic_wireless.c12
-rw-r--r--drivers/net/wireless/ath5k/base.c99
-rw-r--r--drivers/net/wireless/ath5k/base.h2
-rw-r--r--drivers/net/wireless/ath5k/hw.c4
-rw-r--r--drivers/net/wireless/b43/main.c3
-rw-r--r--drivers/net/wireless/b43/xmit.c2
-rw-r--r--drivers/net/wireless/b43legacy/main.c5
-rw-r--r--drivers/net/wireless/b43legacy/xmit.c2
-rw-r--r--drivers/net/wireless/ipw2100.c3
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-3945.c2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-core.c3
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-debug.h8
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-led.c4
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-scan.c2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-tx.c2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl3945-base.c5
-rw-r--r--drivers/net/wireless/libertas/persistcfg.c30
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c2
-rw-r--r--drivers/net/wireless/rt2x00/rt2500usb.c13
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00.h6
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00config.c2
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00dev.c7
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00lib.h7
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00mac.c29
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00queue.c36
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00queue.h2
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00usb.c32
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00usb.h22
-rw-r--r--drivers/net/wireless/rt2x00/rt61pci.c4
-rw-r--r--drivers/net/wireless/rt2x00/rt73usb.c56
-rw-r--r--drivers/net/wireless/rtl8187.h11
-rw-r--r--drivers/net/wireless/rtl8187_dev.c93
-rw-r--r--drivers/net/wireless/zd1211rw/zd_mac.c1
-rw-r--r--drivers/power/Kconfig7
-rw-r--r--drivers/power/Makefile1
-rw-r--r--drivers/power/olpc_battery.c273
-rw-r--r--drivers/power/power_supply_sysfs.c1
-rw-r--r--drivers/power/tosa_battery.c486
-rw-r--r--drivers/regulator/Kconfig59
-rw-r--r--drivers/regulator/Makefile12
-rw-r--r--drivers/regulator/bq24022.c167
-rw-r--r--drivers/regulator/core.c1903
-rw-r--r--drivers/regulator/fixed.c129
-rw-r--r--drivers/regulator/virtual.c345
-rw-r--r--drivers/rtc/interface.c10
-rw-r--r--drivers/rtc/rtc-dev.c4
-rw-r--r--drivers/s390/block/dasd_alias.c4
-rw-r--r--drivers/s390/block/dasd_devmap.c16
-rw-r--r--drivers/s390/block/dasd_eckd.c147
-rw-r--r--drivers/s390/block/dasd_eckd.h184
-rw-r--r--drivers/s390/block/dasd_int.h1
-rw-r--r--drivers/s390/char/sclp.c6
-rw-r--r--drivers/s390/char/sclp_cmd.c5
-rw-r--r--drivers/s390/char/sclp_config.c13
-rw-r--r--drivers/s390/cio/idset.c8
-rw-r--r--drivers/s390/cio/qdio_main.c2
-rw-r--r--drivers/s390/cio/qdio_perf.c2
-rw-r--r--drivers/s390/cio/qdio_setup.c4
-rw-r--r--drivers/s390/kvm/kvm_virtio.c2
-rw-r--r--drivers/s390/net/qeth_core.h5
-rw-r--r--drivers/s390/net/qeth_core_main.c63
-rw-r--r--drivers/s390/net/qeth_l2_main.c50
-rw-r--r--drivers/s390/net/qeth_l3_main.c51
-rw-r--r--drivers/serial/8250.c4
-rw-r--r--drivers/serial/Makefile1
-rw-r--r--drivers/serial/cpm_uart/cpm_uart.h11
-rw-r--r--drivers/serial/cpm_uart/cpm_uart_core.c66
-rw-r--r--drivers/serial/sh-sci.h12
-rw-r--r--drivers/serial/v850e_uart.c548
-rw-r--r--drivers/sh/maple/maple.c265
-rw-r--r--drivers/usb/gadget/m66592-udc.c2
-rw-r--r--drivers/video/Makefile1
-rw-r--r--drivers/video/arkfb.c9
-rw-r--r--drivers/video/backlight/hp680_bl.c2
-rw-r--r--drivers/video/backlight/platform_lcd.c4
-rw-r--r--drivers/video/console/sticore.c30
-rw-r--r--drivers/video/gbefb.c50
-rw-r--r--drivers/video/hitfb.c2
-rw-r--r--drivers/video/pvr2fb.c6
-rw-r--r--drivers/video/vt8623fb.c9
-rw-r--r--drivers/watchdog/hpwdt.c2
127 files changed, 15691 insertions, 5027 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index 54ec5e718c0e..a280ab3d0833 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -97,3 +97,4 @@ obj-$(CONFIG_PPC_PS3) += ps3/
97obj-$(CONFIG_OF) += of/ 97obj-$(CONFIG_OF) += of/
98obj-$(CONFIG_SSB) += ssb/ 98obj-$(CONFIG_SSB) += ssb/
99obj-$(CONFIG_VIRTIO) += virtio/ 99obj-$(CONFIG_VIRTIO) += virtio/
100obj-$(CONFIG_REGULATOR) += regulator/
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index b4749969c6b4..0133af49cf06 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -64,7 +64,13 @@ static DEFINE_MUTEX(performance_mutex);
64 * policy is adjusted accordingly. 64 * policy is adjusted accordingly.
65 */ 65 */
66 66
67static unsigned int ignore_ppc = 0; 67/* ignore_ppc:
68 * -1 -> cpufreq low level drivers not initialized -> _PSS, etc. not called yet
69 * ignore _PPC
70 * 0 -> cpufreq low level drivers initialized -> consider _PPC values
71 * 1 -> ignore _PPC totally -> forced by user through boot param
72 */
73static unsigned int ignore_ppc = -1;
68module_param(ignore_ppc, uint, 0644); 74module_param(ignore_ppc, uint, 0644);
69MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \ 75MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \
70 "limited by BIOS, this should help"); 76 "limited by BIOS, this should help");
@@ -72,7 +78,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \
72#define PPC_REGISTERED 1 78#define PPC_REGISTERED 1
73#define PPC_IN_USE 2 79#define PPC_IN_USE 2
74 80
75static int acpi_processor_ppc_status = 0; 81static int acpi_processor_ppc_status;
76 82
77static int acpi_processor_ppc_notifier(struct notifier_block *nb, 83static int acpi_processor_ppc_notifier(struct notifier_block *nb,
78 unsigned long event, void *data) 84 unsigned long event, void *data)
@@ -81,13 +87,18 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
81 struct acpi_processor *pr; 87 struct acpi_processor *pr;
82 unsigned int ppc = 0; 88 unsigned int ppc = 0;
83 89
84 if (ignore_ppc) 90 if (event == CPUFREQ_START && ignore_ppc <= 0) {
91 ignore_ppc = 0;
85 return 0; 92 return 0;
93 }
86 94
87 mutex_lock(&performance_mutex); 95 if (ignore_ppc)
96 return 0;
88 97
89 if (event != CPUFREQ_INCOMPATIBLE) 98 if (event != CPUFREQ_INCOMPATIBLE)
90 goto out; 99 return 0;
100
101 mutex_lock(&performance_mutex);
91 102
92 pr = per_cpu(processors, policy->cpu); 103 pr = per_cpu(processors, policy->cpu);
93 if (!pr || !pr->performance) 104 if (!pr || !pr->performance)
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 71ec426ecffc..1e0455bd6df9 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -39,8 +39,8 @@
39#include <asm/io.h> 39#include <asm/io.h>
40#include <asm/dma.h> 40#include <asm/dma.h>
41#include <asm/delay.h> 41#include <asm/delay.h>
42#include <asm/mach/dma.h> 42#include <mach/dma.h>
43#include <asm/mach/sysasic.h> 43#include <mach/sysasic.h>
44 44
45#define GDROM_DEV_NAME "gdrom" 45#define GDROM_DEV_NAME "gdrom"
46#define GD_SESSION_OFFSET 150 46#define GD_SESSION_OFFSET 150
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 192688344ed2..f52931e1c16e 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -66,8 +66,8 @@
66#include <linux/ctype.h> 66#include <linux/ctype.h>
67 67
68#ifdef CONFIG_PPC_OF 68#ifdef CONFIG_PPC_OF
69#include <asm/of_device.h> 69#include <linux/of_device.h>
70#include <asm/of_platform.h> 70#include <linux/of_platform.h>
71#endif 71#endif
72 72
73#define PFX "ipmi_si: " 73#define PFX "ipmi_si: "
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index e30575e87648..b638403e8e9c 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -1612,8 +1612,10 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
1612 1612
1613 switch (cmd) { 1613 switch (cmd) {
1614 case MOXA_GET_MAJOR: 1614 case MOXA_GET_MAJOR:
1615 printk(KERN_WARNING "mxser: '%s' uses deprecated ioctl %x, fix " 1615 if (printk_ratelimit())
1616 "your userspace\n", current->comm, cmd); 1616 printk(KERN_WARNING "mxser: '%s' uses deprecated ioctl "
1617 "%x (GET_MAJOR), fix your userspace\n",
1618 current->comm, cmd);
1617 return put_user(ttymajor, (int __user *)argp); 1619 return put_user(ttymajor, (int __user *)argp);
1618 1620
1619 case MOXA_CHKPORTENABLE: 1621 case MOXA_CHKPORTENABLE:
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 8d6a3ff02672..8a67f16987db 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -825,6 +825,9 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
825 policy->user_policy.min = policy->cpuinfo.min_freq; 825 policy->user_policy.min = policy->cpuinfo.min_freq;
826 policy->user_policy.max = policy->cpuinfo.max_freq; 826 policy->user_policy.max = policy->cpuinfo.max_freq;
827 827
828 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
829 CPUFREQ_START, policy);
830
828#ifdef CONFIG_SMP 831#ifdef CONFIG_SMP
829 832
830#ifdef CONFIG_HOTPLUG_CPU 833#ifdef CONFIG_HOTPLUG_CPU
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index 11f17440fea6..d53fbbfefa3e 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -81,4 +81,3 @@ void __init reserve_ibft_region(void)
81 if (ibft_addr) 81 if (ibft_addr)
82 reserve_bootmem(pos, PAGE_ALIGN(len), BOOTMEM_DEFAULT); 82 reserve_bootmem(pos, PAGE_ALIGN(len), BOOTMEM_DEFAULT);
83} 83}
84EXPORT_SYMBOL_GPL(reserve_ibft_region);
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index c521bf6e1bf2..fa2be26272d5 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1086,6 +1086,11 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
1086 /* Make sure we have sane timings */ 1086 /* Make sure we have sane timings */
1087 sanitize_timings(pmif); 1087 sanitize_timings(pmif);
1088 1088
1089 host = ide_host_alloc(&d, hws);
1090 if (host == NULL)
1091 return -ENOMEM;
1092 hwif = host->ports[0];
1093
1089#ifndef CONFIG_PPC64 1094#ifndef CONFIG_PPC64
1090 /* XXX FIXME: Media bay stuff need re-organizing */ 1095 /* XXX FIXME: Media bay stuff need re-organizing */
1091 if (np->parent && np->parent->name 1096 if (np->parent && np->parent->name
@@ -1119,11 +1124,11 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
1119 pmif->mdev ? "macio" : "PCI", pmif->aapl_bus_id, 1124 pmif->mdev ? "macio" : "PCI", pmif->aapl_bus_id,
1120 pmif->mediabay ? " (mediabay)" : "", hw->irq); 1125 pmif->mediabay ? " (mediabay)" : "", hw->irq);
1121 1126
1122 rc = ide_host_add(&d, hws, &host); 1127 rc = ide_host_register(host, &d, hws);
1123 if (rc) 1128 if (rc) {
1129 ide_host_free(host);
1124 return rc; 1130 return rc;
1125 1131 }
1126 hwif = host->ports[0];
1127 1132
1128 return 0; 1133 return 0;
1129} 1134}
diff --git a/drivers/input/keyboard/maple_keyb.c b/drivers/input/keyboard/maple_keyb.c
index 2b404284c28a..7797ef6e5e64 100644
--- a/drivers/input/keyboard/maple_keyb.c
+++ b/drivers/input/keyboard/maple_keyb.c
@@ -27,7 +27,6 @@
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/timer.h> 28#include <linux/timer.h>
29#include <linux/maple.h> 29#include <linux/maple.h>
30#include <asm/mach/maple.h>
31 30
32/* Very simple mutex to ensure proper cleanup */ 31/* Very simple mutex to ensure proper cleanup */
33static DEFINE_MUTEX(maple_keyb_mutex); 32static DEFINE_MUTEX(maple_keyb_mutex);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 798e468103b8..61f441409234 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -316,29 +316,12 @@ static inline int check_space(struct dm_table *t)
316 */ 316 */
317static int lookup_device(const char *path, dev_t *dev) 317static int lookup_device(const char *path, dev_t *dev)
318{ 318{
319 int r; 319 struct block_device *bdev = lookup_bdev(path);
320 struct nameidata nd; 320 if (IS_ERR(bdev))
321 struct inode *inode; 321 return PTR_ERR(bdev);
322 322 *dev = bdev->bd_dev;
323 if ((r = path_lookup(path, LOOKUP_FOLLOW, &nd))) 323 bdput(bdev);
324 return r; 324 return 0;
325
326 inode = nd.path.dentry->d_inode;
327 if (!inode) {
328 r = -ENOENT;
329 goto out;
330 }
331
332 if (!S_ISBLK(inode->i_mode)) {
333 r = -ENOTBLK;
334 goto out;
335 }
336
337 *dev = inode->i_rdev;
338
339 out:
340 path_put(&nd.path);
341 return r;
342} 325}
343 326
344/* 327/*
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index f5ade1904aad..fa50e9ede0e6 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -426,7 +426,7 @@ config ENCLOSURE_SERVICES
426 426
427config SGI_XP 427config SGI_XP
428 tristate "Support communication between SGI SSIs" 428 tristate "Support communication between SGI SSIs"
429 depends on IA64_GENERIC || IA64_SGI_SN2 429 depends on IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || (X86_64 && SMP)
430 select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 430 select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
431 select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 431 select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
432 ---help--- 432 ---help---
@@ -450,4 +450,27 @@ config HP_ILO
450 To compile this driver as a module, choose M here: the 450 To compile this driver as a module, choose M here: the
451 module will be called hpilo. 451 module will be called hpilo.
452 452
453config SGI_GRU
454 tristate "SGI GRU driver"
455 depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
456 default n
457 select MMU_NOTIFIER
458 ---help---
459 The GRU is a hardware resource located in the system chipset. The GRU
460 contains memory that can be mmapped into the user address space. This memory is
461 used to communicate with the GRU to perform functions such as load/store,
462 scatter/gather, bcopy, AMOs, etc. The GRU is directly accessed by user
463 instructions using user virtual addresses. GRU instructions (ex., bcopy) use
464 user virtual addresses for operands.
465
466 If you are not running on a SGI UV system, say N.
467
468config SGI_GRU_DEBUG
469 bool "SGI GRU driver debug"
470 depends on SGI_GRU
471 default n
472 ---help---
473 This option enables addition debugging code for the SGI GRU driver. If
474 you are unsure, say N.
475
453endif # MISC_DEVICES 476endif # MISC_DEVICES
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index f5e273420c09..c6c13f60b452 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -28,4 +28,5 @@ obj-$(CONFIG_INTEL_MENLOW) += intel_menlow.o
28obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o 28obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
29obj-$(CONFIG_KGDB_TESTS) += kgdbts.o 29obj-$(CONFIG_KGDB_TESTS) += kgdbts.o
30obj-$(CONFIG_SGI_XP) += sgi-xp/ 30obj-$(CONFIG_SGI_XP) += sgi-xp/
31obj-$(CONFIG_SGI_GRU) += sgi-gru/
31obj-$(CONFIG_HP_ILO) += hpilo.o 32obj-$(CONFIG_HP_ILO) += hpilo.o
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
new file mode 100644
index 000000000000..d03597a521b0
--- /dev/null
+++ b/drivers/misc/sgi-gru/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_SGI_GRU) := gru.o
2gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o
3
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h
new file mode 100644
index 000000000000..40df7cb3f0a5
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru.h
@@ -0,0 +1,67 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2.1 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef __GRU_H__
20#define __GRU_H__
21
22/*
23 * GRU architectural definitions
24 */
25#define GRU_CACHE_LINE_BYTES 64
26#define GRU_HANDLE_STRIDE 256
27#define GRU_CB_BASE 0
28#define GRU_DS_BASE 0x20000
29
30/*
31 * Size used to map GRU GSeg
32 */
33#if defined CONFIG_IA64
34#define GRU_GSEG_PAGESIZE (256 * 1024UL)
35#elif defined CONFIG_X86_64
36#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */
37#else
38#error "Unsupported architecture"
39#endif
40
41/*
42 * Structure for obtaining GRU resource information
43 */
44struct gru_chiplet_info {
45 int node;
46 int chiplet;
47 int blade;
48 int total_dsr_bytes;
49 int total_cbr;
50 int total_user_dsr_bytes;
51 int total_user_cbr;
52 int free_user_dsr_bytes;
53 int free_user_cbr;
54};
55
56/* Flags for GRU options on the gru_create_context() call */
57/* Select one of the follow 4 options to specify how TLB misses are handled */
58#define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */
59#define GRU_OPT_MISS_USER_POLL 0x0001 /* User will poll CB for faults */
60#define GRU_OPT_MISS_FMM_INTR 0x0002 /* Send interrupt to cpu to
61 handle fault */
62#define GRU_OPT_MISS_FMM_POLL 0x0003 /* Use system polling thread */
63#define GRU_OPT_MISS_MASK 0x0003 /* Mask for TLB MISS option */
64
65
66
67#endif /* __GRU_H__ */
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
new file mode 100644
index 000000000000..0dc36225c7c6
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -0,0 +1,669 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2.1 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef __GRU_INSTRUCTIONS_H__
20#define __GRU_INSTRUCTIONS_H__
21
22#define gru_flush_cache_hook(p)
23#define gru_emulator_wait_hook(p, w)
24
25/*
26 * Architecture dependent functions
27 */
28
29#if defined CONFIG_IA64
30#include <linux/compiler.h>
31#include <asm/intrinsics.h>
32#define __flush_cache(p) ia64_fc(p)
33/* Use volatile on IA64 to ensure ordering via st4.rel */
34#define gru_ordered_store_int(p,v) \
35 do { \
36 barrier(); \
37 *((volatile int *)(p)) = v; /* force st.rel */ \
38 } while (0)
39#elif defined CONFIG_X86_64
40#define __flush_cache(p) clflush(p)
41#define gru_ordered_store_int(p,v) \
42 do { \
43 barrier(); \
44 *(int *)p = v; \
45 } while (0)
46#else
47#error "Unsupported architecture"
48#endif
49
50/*
51 * Control block status and exception codes
52 */
53#define CBS_IDLE 0
54#define CBS_EXCEPTION 1
55#define CBS_ACTIVE 2
56#define CBS_CALL_OS 3
57
58/* CB substatus bitmasks */
59#define CBSS_MSG_QUEUE_MASK 7
60#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK 8
61
62/* CB substatus message queue values (low 3 bits of substatus) */
63#define CBSS_NO_ERROR 0
64#define CBSS_LB_OVERFLOWED 1
65#define CBSS_QLIMIT_REACHED 2
66#define CBSS_PAGE_OVERFLOW 3
67#define CBSS_AMO_NACKED 4
68#define CBSS_PUT_NACKED 5
69
70/*
71 * Structure used to fetch exception detail for CBs that terminate with
72 * CBS_EXCEPTION
73 */
74struct control_block_extended_exc_detail {
75 unsigned long cb;
76 int opc;
77 int ecause;
78 int exopc;
79 long exceptdet0;
80 int exceptdet1;
81};
82
83/*
84 * Instruction formats
85 */
86
87/*
88 * Generic instruction format.
89 * This definition has precise bit field definitions.
90 */
91struct gru_instruction_bits {
92 /* DW 0 - low */
93 unsigned int icmd: 1;
94 unsigned char ima: 3; /* CB_DelRep, unmapped mode */
95 unsigned char reserved0: 4;
96 unsigned int xtype: 3;
97 unsigned int iaa0: 2;
98 unsigned int iaa1: 2;
99 unsigned char reserved1: 1;
100 unsigned char opc: 8; /* opcode */
101 unsigned char exopc: 8; /* extended opcode */
102 /* DW 0 - high */
103 unsigned int idef2: 22; /* TRi0 */
104 unsigned char reserved2: 2;
105 unsigned char istatus: 2;
106 unsigned char isubstatus:4;
107 unsigned char reserved3: 2;
108 /* DW 1 */
109 unsigned long idef4; /* 42 bits: TRi1, BufSize */
110 /* DW 2-6 */
111 unsigned long idef1; /* BAddr0 */
112 unsigned long idef5; /* Nelem */
113 unsigned long idef6; /* Stride, Operand1 */
114 unsigned long idef3; /* BAddr1, Value, Operand2 */
115 unsigned long reserved4;
116 /* DW 7 */
117 unsigned long avalue; /* AValue */
118};
119
120/*
121 * Generic instruction with friendlier names. This format is used
122 * for inline instructions.
123 */
124struct gru_instruction {
125 /* DW 0 */
126 unsigned int op32; /* icmd,xtype,iaa0,ima,opc */
127 unsigned int tri0;
128 unsigned long tri1_bufsize; /* DW 1 */
129 unsigned long baddr0; /* DW 2 */
130 unsigned long nelem; /* DW 3 */
131 unsigned long op1_stride; /* DW 4 */
132 unsigned long op2_value_baddr1; /* DW 5 */
133 unsigned long reserved0; /* DW 6 */
134 unsigned long avalue; /* DW 7 */
135};
136
137/* Some shifts and masks for the low 32 bits of a GRU command */
138#define GRU_CB_ICMD_SHFT 0
139#define GRU_CB_ICMD_MASK 0x1
140#define GRU_CB_XTYPE_SHFT 8
141#define GRU_CB_XTYPE_MASK 0x7
142#define GRU_CB_IAA0_SHFT 11
143#define GRU_CB_IAA0_MASK 0x3
144#define GRU_CB_IAA1_SHFT 13
145#define GRU_CB_IAA1_MASK 0x3
146#define GRU_CB_IMA_SHFT 1
147#define GRU_CB_IMA_MASK 0x3
148#define GRU_CB_OPC_SHFT 16
149#define GRU_CB_OPC_MASK 0xff
150#define GRU_CB_EXOPC_SHFT 24
151#define GRU_CB_EXOPC_MASK 0xff
152
153/* GRU instruction opcodes (opc field) */
154#define OP_NOP 0x00
155#define OP_BCOPY 0x01
156#define OP_VLOAD 0x02
157#define OP_IVLOAD 0x03
158#define OP_VSTORE 0x04
159#define OP_IVSTORE 0x05
160#define OP_VSET 0x06
161#define OP_IVSET 0x07
162#define OP_MESQ 0x08
163#define OP_GAMXR 0x09
164#define OP_GAMIR 0x0a
165#define OP_GAMIRR 0x0b
166#define OP_GAMER 0x0c
167#define OP_GAMERR 0x0d
168#define OP_BSTORE 0x0e
169#define OP_VFLUSH 0x0f
170
171
172/* Extended opcodes values (exopc field) */
173
174/* GAMIR - AMOs with implicit operands */
175#define EOP_IR_FETCH 0x01 /* Plain fetch of memory */
176#define EOP_IR_CLR 0x02 /* Fetch and clear */
177#define EOP_IR_INC 0x05 /* Fetch and increment */
178#define EOP_IR_DEC 0x07 /* Fetch and decrement */
179#define EOP_IR_QCHK1 0x0d /* Queue check, 64 byte msg */
180#define EOP_IR_QCHK2 0x0e /* Queue check, 128 byte msg */
181
182/* GAMIRR - Registered AMOs with implicit operands */
183#define EOP_IRR_FETCH 0x01 /* Registered fetch of memory */
184#define EOP_IRR_CLR 0x02 /* Registered fetch and clear */
185#define EOP_IRR_INC 0x05 /* Registered fetch and increment */
186#define EOP_IRR_DEC 0x07 /* Registered fetch and decrement */
187#define EOP_IRR_DECZ 0x0f /* Registered fetch and decrement, update on zero*/
188
189/* GAMER - AMOs with explicit operands */
190#define EOP_ER_SWAP 0x00 /* Exchange argument and memory */
191#define EOP_ER_OR 0x01 /* Logical OR with memory */
192#define EOP_ER_AND 0x02 /* Logical AND with memory */
193#define EOP_ER_XOR 0x03 /* Logical XOR with memory */
194#define EOP_ER_ADD 0x04 /* Add value to memory */
195#define EOP_ER_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/
196#define EOP_ER_CADD 0x0c /* Queue check, operand1*64 byte msg */
197
198/* GAMERR - Registered AMOs with explicit operands */
199#define EOP_ERR_SWAP 0x00 /* Exchange argument and memory */
200#define EOP_ERR_OR 0x01 /* Logical OR with memory */
201#define EOP_ERR_AND 0x02 /* Logical AND with memory */
202#define EOP_ERR_XOR 0x03 /* Logical XOR with memory */
203#define EOP_ERR_ADD 0x04 /* Add value to memory */
204#define EOP_ERR_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/
205#define EOP_ERR_EPOLL 0x09 /* Poll for equality */
206#define EOP_ERR_NPOLL 0x0a /* Poll for inequality */
207
208/* GAMXR - SGI Arithmetic unit */
209#define EOP_XR_CSWAP 0x0b /* Masked compare exchange */
210
211
212/* Transfer types (xtype field) */
213#define XTYPE_B 0x0 /* byte */
214#define XTYPE_S 0x1 /* short (2-byte) */
215#define XTYPE_W 0x2 /* word (4-byte) */
216#define XTYPE_DW 0x3 /* doubleword (8-byte) */
217#define XTYPE_CL 0x6 /* cacheline (64-byte) */
218
219
220/* Instruction access attributes (iaa0, iaa1 fields) */
221#define IAA_RAM 0x0 /* normal cached RAM access */
222#define IAA_NCRAM 0x2 /* noncoherent RAM access */
223#define IAA_MMIO 0x1 /* noncoherent memory-mapped I/O space */
224#define IAA_REGISTER 0x3 /* memory-mapped registers, etc. */
225
226
227/* Instruction mode attributes (ima field) */
228#define IMA_MAPPED 0x0 /* Virtual mode */
229#define IMA_CB_DELAY 0x1 /* hold read responses until status changes */
230#define IMA_UNMAPPED 0x2 /* bypass the TLBs (OS only) */
231#define IMA_INTERRUPT 0x4 /* Interrupt when instruction completes */
232
233/* CBE ecause bits */
234#define CBE_CAUSE_RI (1 << 0)
235#define CBE_CAUSE_INVALID_INSTRUCTION (1 << 1)
236#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN (1 << 2)
237#define CBE_CAUSE_PE_CHECK_DATA_ERROR (1 << 3)
238#define CBE_CAUSE_IAA_GAA_MISMATCH (1 << 4)
239#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION (1 << 5)
240#define CBE_CAUSE_OS_FATAL_TLB_FAULT (1 << 6)
241#define CBE_CAUSE_EXECUTION_HW_ERROR (1 << 7)
242#define CBE_CAUSE_TLBHW_ERROR (1 << 8)
243#define CBE_CAUSE_RA_REQUEST_TIMEOUT (1 << 9)
244#define CBE_CAUSE_HA_REQUEST_TIMEOUT (1 << 10)
245#define CBE_CAUSE_RA_RESPONSE_FATAL (1 << 11)
246#define CBE_CAUSE_RA_RESPONSE_NON_FATAL (1 << 12)
247#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13)
248#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14)
249#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15)
250#define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16)
251#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17)
252
253/*
254 * Exceptions are retried for the following cases. If any OTHER bits are set
255 * in ecause, the exception is not retryable.
256 */
257#define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR | \
258 CBE_CAUSE_RA_REQUEST_TIMEOUT | \
259 CBE_CAUSE_TLBHW_ERROR | \
260 CBE_CAUSE_HA_REQUEST_TIMEOUT)
261
262/* Message queue head structure */
263union gru_mesqhead {
264 unsigned long val;
265 struct {
266 unsigned int head;
267 unsigned int limit;
268 };
269};
270
271
272/* Generate the low word of a GRU instruction */
273static inline unsigned int
274__opword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
275 unsigned char iaa0, unsigned char iaa1,
276 unsigned char ima)
277{
278 return (1 << GRU_CB_ICMD_SHFT) |
279 (iaa0 << GRU_CB_IAA0_SHFT) |
280 (iaa1 << GRU_CB_IAA1_SHFT) |
281 (ima << GRU_CB_IMA_SHFT) |
282 (xtype << GRU_CB_XTYPE_SHFT) |
283 (opcode << GRU_CB_OPC_SHFT) |
284 (exopc << GRU_CB_EXOPC_SHFT);
285}
286
287/*
288 * Architecture specific intrinsics
289 */
290static inline void gru_flush_cache(void *p)
291{
292 __flush_cache(p);
293}
294
295/*
296 * Store the lower 32 bits of the command including the "start" bit. Then
297 * start the instruction executing.
298 */
299static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
300{
301 gru_ordered_store_int(ins, op32);
302}
303
304
305/* Convert "hints" to IMA */
306#define CB_IMA(h) ((h) | IMA_UNMAPPED)
307
308/* Convert data segment cache line index into TRI0 / TRI1 value */
309#define GRU_DINDEX(i) ((i) * GRU_CACHE_LINE_BYTES)
310
311/* Inline functions for GRU instructions.
312 * Note:
313 * - nelem and stride are in elements
314 * - tri0/tri1 is in bytes for the beginning of the data segment.
315 */
316static inline void gru_vload(void *cb, unsigned long mem_addr,
317 unsigned int tri0, unsigned char xtype, unsigned long nelem,
318 unsigned long stride, unsigned long hints)
319{
320 struct gru_instruction *ins = (struct gru_instruction *)cb;
321
322 ins->baddr0 = (long)mem_addr;
323 ins->nelem = nelem;
324 ins->tri0 = tri0;
325 ins->op1_stride = stride;
326 gru_start_instruction(ins, __opword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
327 CB_IMA(hints)));
328}
329
330static inline void gru_vstore(void *cb, unsigned long mem_addr,
331 unsigned int tri0, unsigned char xtype, unsigned long nelem,
332 unsigned long stride, unsigned long hints)
333{
334 struct gru_instruction *ins = (void *)cb;
335
336 ins->baddr0 = (long)mem_addr;
337 ins->nelem = nelem;
338 ins->tri0 = tri0;
339 ins->op1_stride = stride;
340 gru_start_instruction(ins, __opword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
341 CB_IMA(hints)));
342}
343
344static inline void gru_ivload(void *cb, unsigned long mem_addr,
345 unsigned int tri0, unsigned int tri1, unsigned char xtype,
346 unsigned long nelem, unsigned long hints)
347{
348 struct gru_instruction *ins = (void *)cb;
349
350 ins->baddr0 = (long)mem_addr;
351 ins->nelem = nelem;
352 ins->tri0 = tri0;
353 ins->tri1_bufsize = tri1;
354 gru_start_instruction(ins, __opword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
355 CB_IMA(hints)));
356}
357
358static inline void gru_ivstore(void *cb, unsigned long mem_addr,
359 unsigned int tri0, unsigned int tri1,
360 unsigned char xtype, unsigned long nelem, unsigned long hints)
361{
362 struct gru_instruction *ins = (void *)cb;
363
364 ins->baddr0 = (long)mem_addr;
365 ins->nelem = nelem;
366 ins->tri0 = tri0;
367 ins->tri1_bufsize = tri1;
368 gru_start_instruction(ins, __opword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
369 CB_IMA(hints)));
370}
371
372static inline void gru_vset(void *cb, unsigned long mem_addr,
373 unsigned long value, unsigned char xtype, unsigned long nelem,
374 unsigned long stride, unsigned long hints)
375{
376 struct gru_instruction *ins = (void *)cb;
377
378 ins->baddr0 = (long)mem_addr;
379 ins->op2_value_baddr1 = value;
380 ins->nelem = nelem;
381 ins->op1_stride = stride;
382 gru_start_instruction(ins, __opword(OP_VSET, 0, xtype, IAA_RAM, 0,
383 CB_IMA(hints)));
384}
385
386static inline void gru_ivset(void *cb, unsigned long mem_addr,
387 unsigned int tri1, unsigned long value, unsigned char xtype,
388 unsigned long nelem, unsigned long hints)
389{
390 struct gru_instruction *ins = (void *)cb;
391
392 ins->baddr0 = (long)mem_addr;
393 ins->op2_value_baddr1 = value;
394 ins->nelem = nelem;
395 ins->tri1_bufsize = tri1;
396 gru_start_instruction(ins, __opword(OP_IVSET, 0, xtype, IAA_RAM, 0,
397 CB_IMA(hints)));
398}
399
400static inline void gru_vflush(void *cb, unsigned long mem_addr,
401 unsigned long nelem, unsigned char xtype, unsigned long stride,
402 unsigned long hints)
403{
404 struct gru_instruction *ins = (void *)cb;
405
406 ins->baddr0 = (long)mem_addr;
407 ins->op1_stride = stride;
408 ins->nelem = nelem;
409 gru_start_instruction(ins, __opword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
410 CB_IMA(hints)));
411}
412
413static inline void gru_nop(void *cb, int hints)
414{
415 struct gru_instruction *ins = (void *)cb;
416
417 gru_start_instruction(ins, __opword(OP_NOP, 0, 0, 0, 0, CB_IMA(hints)));
418}
419
420
421static inline void gru_bcopy(void *cb, const unsigned long src,
422 unsigned long dest,
423 unsigned int tri0, unsigned int xtype, unsigned long nelem,
424 unsigned int bufsize, unsigned long hints)
425{
426 struct gru_instruction *ins = (void *)cb;
427
428 ins->baddr0 = (long)src;
429 ins->op2_value_baddr1 = (long)dest;
430 ins->nelem = nelem;
431 ins->tri0 = tri0;
432 ins->tri1_bufsize = bufsize;
433 gru_start_instruction(ins, __opword(OP_BCOPY, 0, xtype, IAA_RAM,
434 IAA_RAM, CB_IMA(hints)));
435}
436
437static inline void gru_bstore(void *cb, const unsigned long src,
438 unsigned long dest, unsigned int tri0, unsigned int xtype,
439 unsigned long nelem, unsigned long hints)
440{
441 struct gru_instruction *ins = (void *)cb;
442
443 ins->baddr0 = (long)src;
444 ins->op2_value_baddr1 = (long)dest;
445 ins->nelem = nelem;
446 ins->tri0 = tri0;
447 gru_start_instruction(ins, __opword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
448 CB_IMA(hints)));
449}
450
451static inline void gru_gamir(void *cb, int exopc, unsigned long src,
452 unsigned int xtype, unsigned long hints)
453{
454 struct gru_instruction *ins = (void *)cb;
455
456 ins->baddr0 = (long)src;
457 gru_start_instruction(ins, __opword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
458 CB_IMA(hints)));
459}
460
461static inline void gru_gamirr(void *cb, int exopc, unsigned long src,
462 unsigned int xtype, unsigned long hints)
463{
464 struct gru_instruction *ins = (void *)cb;
465
466 ins->baddr0 = (long)src;
467 gru_start_instruction(ins, __opword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
468 CB_IMA(hints)));
469}
470
471static inline void gru_gamer(void *cb, int exopc, unsigned long src,
472 unsigned int xtype,
473 unsigned long operand1, unsigned long operand2,
474 unsigned long hints)
475{
476 struct gru_instruction *ins = (void *)cb;
477
478 ins->baddr0 = (long)src;
479 ins->op1_stride = operand1;
480 ins->op2_value_baddr1 = operand2;
481 gru_start_instruction(ins, __opword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
482 CB_IMA(hints)));
483}
484
485static inline void gru_gamerr(void *cb, int exopc, unsigned long src,
486 unsigned int xtype, unsigned long operand1,
487 unsigned long operand2, unsigned long hints)
488{
489 struct gru_instruction *ins = (void *)cb;
490
491 ins->baddr0 = (long)src;
492 ins->op1_stride = operand1;
493 ins->op2_value_baddr1 = operand2;
494 gru_start_instruction(ins, __opword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
495 CB_IMA(hints)));
496}
497
498static inline void gru_gamxr(void *cb, unsigned long src,
499 unsigned int tri0, unsigned long hints)
500{
501 struct gru_instruction *ins = (void *)cb;
502
503 ins->baddr0 = (long)src;
504 ins->nelem = 4;
505 gru_start_instruction(ins, __opword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
506 IAA_RAM, 0, CB_IMA(hints)));
507}
508
509static inline void gru_mesq(void *cb, unsigned long queue,
510 unsigned long tri0, unsigned long nelem,
511 unsigned long hints)
512{
513 struct gru_instruction *ins = (void *)cb;
514
515 ins->baddr0 = (long)queue;
516 ins->nelem = nelem;
517 ins->tri0 = tri0;
518 gru_start_instruction(ins, __opword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
519 CB_IMA(hints)));
520}
521
522static inline unsigned long gru_get_amo_value(void *cb)
523{
524 struct gru_instruction *ins = (void *)cb;
525
526 return ins->avalue;
527}
528
529static inline int gru_get_amo_value_head(void *cb)
530{
531 struct gru_instruction *ins = (void *)cb;
532
533 return ins->avalue & 0xffffffff;
534}
535
536static inline int gru_get_amo_value_limit(void *cb)
537{
538 struct gru_instruction *ins = (void *)cb;
539
540 return ins->avalue >> 32;
541}
542
543static inline union gru_mesqhead gru_mesq_head(int head, int limit)
544{
545 union gru_mesqhead mqh;
546
547 mqh.head = head;
548 mqh.limit = limit;
549 return mqh;
550}
551
552/*
553 * Get struct control_block_extended_exc_detail for CB.
554 */
555extern int gru_get_cb_exception_detail(void *cb,
556 struct control_block_extended_exc_detail *excdet);
557
558#define GRU_EXC_STR_SIZE 256
559
560extern int gru_check_status_proc(void *cb);
561extern int gru_wait_proc(void *cb);
562extern void gru_wait_abort_proc(void *cb);
563
564/*
565 * Control block definition for checking status
566 */
567struct gru_control_block_status {
568 unsigned int icmd :1;
569 unsigned int unused1 :31;
570 unsigned int unused2 :24;
571 unsigned int istatus :2;
572 unsigned int isubstatus :4;
573 unsigned int inused3 :2;
574};
575
576/* Get CB status */
577static inline int gru_get_cb_status(void *cb)
578{
579 struct gru_control_block_status *cbs = (void *)cb;
580
581 return cbs->istatus;
582}
583
584/* Get CB message queue substatus */
585static inline int gru_get_cb_message_queue_substatus(void *cb)
586{
587 struct gru_control_block_status *cbs = (void *)cb;
588
589 return cbs->isubstatus & CBSS_MSG_QUEUE_MASK;
590}
591
592/* Get CB substatus */
593static inline int gru_get_cb_substatus(void *cb)
594{
595 struct gru_control_block_status *cbs = (void *)cb;
596
597 return cbs->isubstatus;
598}
599
600/* Check the status of a CB. If the CB is in UPM mode, call the
601 * OS to handle the UPM status.
602 * Returns the CB status field value (0 for normal completion)
603 */
604static inline int gru_check_status(void *cb)
605{
606 struct gru_control_block_status *cbs = (void *)cb;
607 int ret = cbs->istatus;
608
609 if (ret == CBS_CALL_OS)
610 ret = gru_check_status_proc(cb);
611 return ret;
612}
613
614/* Wait for CB to complete.
615 * Returns the CB status field value (0 for normal completion)
616 */
617static inline int gru_wait(void *cb)
618{
619 struct gru_control_block_status *cbs = (void *)cb;
620 int ret = cbs->istatus;;
621
622 if (ret != CBS_IDLE)
623 ret = gru_wait_proc(cb);
624 return ret;
625}
626
627/* Wait for CB to complete. Aborts program if error. (Note: error does NOT
628 * mean TLB mis - only fatal errors such as memory parity error or user
629 * bugs will cause termination.
630 */
631static inline void gru_wait_abort(void *cb)
632{
633 struct gru_control_block_status *cbs = (void *)cb;
634
635 if (cbs->istatus != CBS_IDLE)
636 gru_wait_abort_proc(cb);
637}
638
639
640/*
641 * Get a pointer to a control block
642 * gseg - GSeg address returned from gru_get_thread_gru_segment()
643 * index - index of desired CB
644 */
645static inline void *gru_get_cb_pointer(void *gseg,
646 int index)
647{
648 return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE;
649}
650
651/*
652 * Get a pointer to a cacheline in the data segment portion of a GSeg
653 * gseg - GSeg address returned from gru_get_thread_gru_segment()
654 * index - index of desired cache line
655 */
656static inline void *gru_get_data_pointer(void *gseg, int index)
657{
658 return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES;
659}
660
661/*
662 * Convert a vaddr into the tri index within the GSEG
663 * vaddr - virtual address of within gseg
664 */
665static inline int gru_get_tri(void *vaddr)
666{
667 return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE;
668}
669#endif /* __GRU_INSTRUCTIONS_H__ */
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
new file mode 100644
index 000000000000..3d33015bbf31
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -0,0 +1,633 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * FAULT HANDLER FOR GRU DETECTED TLB MISSES
5 *
6 * This file contains code that handles TLB misses within the GRU.
7 * These misses are reported either via interrupts or user polling of
8 * the user CB.
9 *
10 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27#include <linux/kernel.h>
28#include <linux/errno.h>
29#include <linux/spinlock.h>
30#include <linux/mm.h>
31#include <linux/hugetlb.h>
32#include <linux/device.h>
33#include <linux/io.h>
34#include <linux/uaccess.h>
35#include <asm/pgtable.h>
36#include "gru.h"
37#include "grutables.h"
38#include "grulib.h"
39#include "gru_instructions.h"
40#include <asm/uv/uv_hub.h>
41
42/*
43 * Test if a physical address is a valid GRU GSEG address
44 */
45static inline int is_gru_paddr(unsigned long paddr)
46{
47 return paddr >= gru_start_paddr && paddr < gru_end_paddr;
48}
49
50/*
51 * Find the vma of a GRU segment. Caller must hold mmap_sem.
52 */
53struct vm_area_struct *gru_find_vma(unsigned long vaddr)
54{
55 struct vm_area_struct *vma;
56
57 vma = find_vma(current->mm, vaddr);
58 if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops)
59 return vma;
60 return NULL;
61}
62
63/*
64 * Find and lock the gts that contains the specified user vaddr.
65 *
66 * Returns:
67 * - *gts with the mmap_sem locked for read and the GTS locked.
68 * - NULL if vaddr invalid OR is not a valid GSEG vaddr.
69 */
70
71static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr)
72{
73 struct mm_struct *mm = current->mm;
74 struct vm_area_struct *vma;
75 struct gru_thread_state *gts = NULL;
76
77 down_read(&mm->mmap_sem);
78 vma = gru_find_vma(vaddr);
79 if (vma)
80 gts = gru_find_thread_state(vma, TSID(vaddr, vma));
81 if (gts)
82 mutex_lock(&gts->ts_ctxlock);
83 else
84 up_read(&mm->mmap_sem);
85 return gts;
86}
87
88static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
89{
90 struct mm_struct *mm = current->mm;
91 struct vm_area_struct *vma;
92 struct gru_thread_state *gts = NULL;
93
94 down_write(&mm->mmap_sem);
95 vma = gru_find_vma(vaddr);
96 if (vma)
97 gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
98 if (gts) {
99 mutex_lock(&gts->ts_ctxlock);
100 downgrade_write(&mm->mmap_sem);
101 } else {
102 up_write(&mm->mmap_sem);
103 }
104
105 return gts;
106}
107
108/*
109 * Unlock a GTS that was previously locked with gru_find_lock_gts().
110 */
111static void gru_unlock_gts(struct gru_thread_state *gts)
112{
113 mutex_unlock(&gts->ts_ctxlock);
114 up_read(&current->mm->mmap_sem);
115}
116
117/*
118 * Set a CB.istatus to active using a user virtual address. This must be done
119 * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY.
120 * If the line is evicted, the status may be lost. The in-cache update
121 * is necessary to prevent the user from seeing a stale cb.istatus that will
122 * change as soon as the TFH restart is complete. Races may cause an
123 * occasional failure to clear the cb.istatus, but that is ok.
124 *
125 * If the cb address is not valid (should not happen, but...), nothing
126 * bad will happen.. The get_user()/put_user() will fail but there
127 * are no bad side-effects.
128 */
129static void gru_cb_set_istatus_active(unsigned long __user *cb)
130{
131 union {
132 struct gru_instruction_bits bits;
133 unsigned long dw;
134 } u;
135
136 if (cb) {
137 get_user(u.dw, cb);
138 u.bits.istatus = CBS_ACTIVE;
139 put_user(u.dw, cb);
140 }
141}
142
143/*
144 * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the
145 * interrupt. Interrupts are always sent to a cpu on the blade that contains the
146 * GRU (except for headless blades which are not currently supported). A blade
147 * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ
148 * number uniquely identifies the GRU chiplet on the local blade that caused the
149 * interrupt. Always called in interrupt context.
150 */
151static inline struct gru_state *irq_to_gru(int irq)
152{
153 return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU];
154}
155
156/*
157 * Read & clear a TFM
158 *
159 * The GRU has an array of fault maps. A map is private to a cpu
160 * Only one cpu will be accessing a cpu's fault map.
161 *
162 * This function scans the cpu-private fault map & clears all bits that
163 * are set. The function returns a bitmap that indicates the bits that
164 * were cleared. Note that sense the maps may be updated asynchronously by
165 * the GRU, atomic operations must be used to clear bits.
166 */
167static void get_clear_fault_map(struct gru_state *gru,
168 struct gru_tlb_fault_map *map)
169{
170 unsigned long i, k;
171 struct gru_tlb_fault_map *tfm;
172
173 tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id());
174 prefetchw(tfm); /* Helps on hardware, required for emulator */
175 for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) {
176 k = tfm->fault_bits[i];
177 if (k)
178 k = xchg(&tfm->fault_bits[i], 0UL);
179 map->fault_bits[i] = k;
180 }
181
182 /*
183 * Not functionally required but helps performance. (Required
184 * on emulator)
185 */
186 gru_flush_cache(tfm);
187}
188
189/*
190 * Atomic (interrupt context) & non-atomic (user context) functions to
191 * convert a vaddr into a physical address. The size of the page
192 * is returned in pageshift.
193 * returns:
194 * 0 - successful
195 * < 0 - error code
196 * 1 - (atomic only) try again in non-atomic context
197 */
198static int non_atomic_pte_lookup(struct vm_area_struct *vma,
199 unsigned long vaddr, int write,
200 unsigned long *paddr, int *pageshift)
201{
202 struct page *page;
203
204 /* ZZZ Need to handle HUGE pages */
205 if (is_vm_hugetlb_page(vma))
206 return -EFAULT;
207 *pageshift = PAGE_SHIFT;
208 if (get_user_pages
209 (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0)
210 return -EFAULT;
211 *paddr = page_to_phys(page);
212 put_page(page);
213 return 0;
214}
215
216/*
217 *
218 * atomic_pte_lookup
219 *
220 * Convert a user virtual address to a physical address
221 * Only supports Intel large pages (2MB only) on x86_64.
222 * ZZZ - hugepage support is incomplete
223 */
224static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
225 int write, unsigned long *paddr, int *pageshift)
226{
227 pgd_t *pgdp;
228 pmd_t *pmdp;
229 pud_t *pudp;
230 pte_t pte;
231
232 WARN_ON(irqs_disabled()); /* ZZZ debug */
233
234 local_irq_disable();
235 pgdp = pgd_offset(vma->vm_mm, vaddr);
236 if (unlikely(pgd_none(*pgdp)))
237 goto err;
238
239 pudp = pud_offset(pgdp, vaddr);
240 if (unlikely(pud_none(*pudp)))
241 goto err;
242
243 pmdp = pmd_offset(pudp, vaddr);
244 if (unlikely(pmd_none(*pmdp)))
245 goto err;
246#ifdef CONFIG_X86_64
247 if (unlikely(pmd_large(*pmdp)))
248 pte = *(pte_t *) pmdp;
249 else
250#endif
251 pte = *pte_offset_kernel(pmdp, vaddr);
252
253 local_irq_enable();
254
255 if (unlikely(!pte_present(pte) ||
256 (write && (!pte_write(pte) || !pte_dirty(pte)))))
257 return 1;
258
259 *paddr = pte_pfn(pte) << PAGE_SHIFT;
260 *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
261 return 0;
262
263err:
264 local_irq_enable();
265 return 1;
266}
267
268/*
269 * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
270 * Input:
271 * cb Address of user CBR. Null if not running in user context
272 * Return:
273 * 0 = dropin, exception, or switch to UPM successful
274 * 1 = range invalidate active
275 * < 0 = error code
276 *
277 */
278static int gru_try_dropin(struct gru_thread_state *gts,
279 struct gru_tlb_fault_handle *tfh,
280 unsigned long __user *cb)
281{
282 struct mm_struct *mm = gts->ts_mm;
283 struct vm_area_struct *vma;
284 int pageshift, asid, write, ret;
285 unsigned long paddr, gpa, vaddr;
286
287 /*
288 * NOTE: The GRU contains magic hardware that eliminates races between
289 * TLB invalidates and TLB dropins. If an invalidate occurs
290 * in the window between reading the TFH and the subsequent TLB dropin,
291 * the dropin is ignored. This eliminates the need for additional locks.
292 */
293
294 /*
295 * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
296 * Might be a hardware race OR a stupid user. Ignore FMM because FMM
297 * is a transient state.
298 */
299 if (tfh->state == TFHSTATE_IDLE)
300 goto failidle;
301 if (tfh->state == TFHSTATE_MISS_FMM && cb)
302 goto failfmm;
303
304 write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
305 vaddr = tfh->missvaddr;
306 asid = tfh->missasid;
307 if (asid == 0)
308 goto failnoasid;
309
310 rmb(); /* TFH must be cache resident before reading ms_range_active */
311
312 /*
313 * TFH is cache resident - at least briefly. Fail the dropin
314 * if a range invalidate is active.
315 */
316 if (atomic_read(&gts->ts_gms->ms_range_active))
317 goto failactive;
318
319 vma = find_vma(mm, vaddr);
320 if (!vma)
321 goto failinval;
322
323 /*
324 * Atomic lookup is faster & usually works even if called in non-atomic
325 * context.
326 */
327 ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift);
328 if (ret) {
329 if (!cb)
330 goto failupm;
331 if (non_atomic_pte_lookup(vma, vaddr, write, &paddr,
332 &pageshift))
333 goto failinval;
334 }
335 if (is_gru_paddr(paddr))
336 goto failinval;
337
338 paddr = paddr & ~((1UL << pageshift) - 1);
339 gpa = uv_soc_phys_ram_to_gpa(paddr);
340 gru_cb_set_istatus_active(cb);
341 tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
342 GRU_PAGESIZE(pageshift));
343 STAT(tlb_dropin);
344 gru_dbg(grudev,
345 "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n",
346 ret ? "non-atomic" : "atomic", tfh, vaddr, asid,
347 pageshift, gpa);
348 return 0;
349
350failnoasid:
351 /* No asid (delayed unload). */
352 STAT(tlb_dropin_fail_no_asid);
353 gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
354 if (!cb)
355 tfh_user_polling_mode(tfh);
356 else
357 gru_flush_cache(tfh);
358 return -EAGAIN;
359
360failupm:
361 /* Atomic failure switch CBR to UPM */
362 tfh_user_polling_mode(tfh);
363 STAT(tlb_dropin_fail_upm);
364 gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
365 return 1;
366
367failfmm:
368 /* FMM state on UPM call */
369 STAT(tlb_dropin_fail_fmm);
370 gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
371 return 0;
372
373failidle:
374 /* TFH was idle - no miss pending */
375 gru_flush_cache(tfh);
376 if (cb)
377 gru_flush_cache(cb);
378 STAT(tlb_dropin_fail_idle);
379 gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
380 return 0;
381
382failinval:
383 /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
384 tfh_exception(tfh);
385 STAT(tlb_dropin_fail_invalid);
386 gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
387 return -EFAULT;
388
389failactive:
390 /* Range invalidate active. Switch to UPM iff atomic */
391 if (!cb)
392 tfh_user_polling_mode(tfh);
393 else
394 gru_flush_cache(tfh);
395 STAT(tlb_dropin_fail_range_active);
396 gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
397 tfh, vaddr);
398 return 1;
399}
400
401/*
402 * Process an external interrupt from the GRU. This interrupt is
403 * caused by a TLB miss.
404 * Note that this is the interrupt handler that is registered with linux
405 * interrupt handlers.
406 */
407irqreturn_t gru_intr(int irq, void *dev_id)
408{
409 struct gru_state *gru;
410 struct gru_tlb_fault_map map;
411 struct gru_thread_state *gts;
412 struct gru_tlb_fault_handle *tfh = NULL;
413 int cbrnum, ctxnum;
414
415 STAT(intr);
416
417 gru = irq_to_gru(irq);
418 if (!gru) {
419 dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n",
420 raw_smp_processor_id(), irq);
421 return IRQ_NONE;
422 }
423 get_clear_fault_map(gru, &map);
424 gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid,
425 map.fault_bits[0]);
426
427 for_each_cbr_in_tfm(cbrnum, map.fault_bits) {
428 tfh = get_tfh_by_index(gru, cbrnum);
429 prefetchw(tfh); /* Helps on hdw, required for emulator */
430
431 /*
432 * When hardware sets a bit in the faultmap, it implicitly
433 * locks the GRU context so that it cannot be unloaded.
434 * The gts cannot change until a TFH start/writestart command
435 * is issued.
436 */
437 ctxnum = tfh->ctxnum;
438 gts = gru->gs_gts[ctxnum];
439
440 /*
441 * This is running in interrupt context. Trylock the mmap_sem.
442 * If it fails, retry the fault in user context.
443 */
444 if (down_read_trylock(&gts->ts_mm->mmap_sem)) {
445 gru_try_dropin(gts, tfh, NULL);
446 up_read(&gts->ts_mm->mmap_sem);
447 } else {
448 tfh_user_polling_mode(tfh);
449 }
450 }
451 return IRQ_HANDLED;
452}
453
454
455static int gru_user_dropin(struct gru_thread_state *gts,
456 struct gru_tlb_fault_handle *tfh,
457 unsigned long __user *cb)
458{
459 struct gru_mm_struct *gms = gts->ts_gms;
460 int ret;
461
462 while (1) {
463 wait_event(gms->ms_wait_queue,
464 atomic_read(&gms->ms_range_active) == 0);
465 prefetchw(tfh); /* Helps on hdw, required for emulator */
466 ret = gru_try_dropin(gts, tfh, cb);
467 if (ret <= 0)
468 return ret;
469 STAT(call_os_wait_queue);
470 }
471}
472
473/*
474 * This interface is called as a result of a user detecting a "call OS" bit
475 * in a user CB. Normally means that a TLB fault has occurred.
476 * cb - user virtual address of the CB
477 */
478int gru_handle_user_call_os(unsigned long cb)
479{
480 struct gru_tlb_fault_handle *tfh;
481 struct gru_thread_state *gts;
482 unsigned long __user *cbp;
483 int ucbnum, cbrnum, ret = -EINVAL;
484
485 STAT(call_os);
486 gru_dbg(grudev, "address 0x%lx\n", cb);
487
488 /* sanity check the cb pointer */
489 ucbnum = get_cb_number((void *)cb);
490 if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
491 return -EINVAL;
492 cbp = (unsigned long *)cb;
493
494 gts = gru_find_lock_gts(cb);
495 if (!gts)
496 return -EINVAL;
497
498 if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
499 ret = -EINVAL;
500 goto exit;
501 }
502
503 /*
504 * If force_unload is set, the UPM TLB fault is phony. The task
505 * has migrated to another node and the GSEG must be moved. Just
506 * unload the context. The task will page fault and assign a new
507 * context.
508 */
509 ret = -EAGAIN;
510 cbrnum = thread_cbr_number(gts, ucbnum);
511 if (gts->ts_force_unload) {
512 gru_unload_context(gts, 1);
513 } else if (gts->ts_gru) {
514 tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
515 ret = gru_user_dropin(gts, tfh, cbp);
516 }
517exit:
518 gru_unlock_gts(gts);
519 return ret;
520}
521
522/*
523 * Fetch the exception detail information for a CB that terminated with
524 * an exception.
525 */
526int gru_get_exception_detail(unsigned long arg)
527{
528 struct control_block_extended_exc_detail excdet;
529 struct gru_control_block_extended *cbe;
530 struct gru_thread_state *gts;
531 int ucbnum, cbrnum, ret;
532
533 STAT(user_exception);
534 if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
535 return -EFAULT;
536
537 gru_dbg(grudev, "address 0x%lx\n", excdet.cb);
538 gts = gru_find_lock_gts(excdet.cb);
539 if (!gts)
540 return -EINVAL;
541
542 if (gts->ts_gru) {
543 ucbnum = get_cb_number((void *)excdet.cb);
544 cbrnum = thread_cbr_number(gts, ucbnum);
545 cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
546 excdet.opc = cbe->opccpy;
547 excdet.exopc = cbe->exopccpy;
548 excdet.ecause = cbe->ecause;
549 excdet.exceptdet0 = cbe->idef1upd;
550 excdet.exceptdet1 = cbe->idef3upd;
551 ret = 0;
552 } else {
553 ret = -EAGAIN;
554 }
555 gru_unlock_gts(gts);
556
557 gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb,
558 excdet.ecause);
559 if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
560 ret = -EFAULT;
561 return ret;
562}
563
564/*
565 * User request to unload a context. Content is saved for possible reload.
566 */
567int gru_user_unload_context(unsigned long arg)
568{
569 struct gru_thread_state *gts;
570 struct gru_unload_context_req req;
571
572 STAT(user_unload_context);
573 if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
574 return -EFAULT;
575
576 gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
577
578 gts = gru_find_lock_gts(req.gseg);
579 if (!gts)
580 return -EINVAL;
581
582 if (gts->ts_gru)
583 gru_unload_context(gts, 1);
584 gru_unlock_gts(gts);
585
586 return 0;
587}
588
589/*
590 * User request to flush a range of virtual addresses from the GRU TLB
591 * (Mainly for testing).
592 */
593int gru_user_flush_tlb(unsigned long arg)
594{
595 struct gru_thread_state *gts;
596 struct gru_flush_tlb_req req;
597
598 STAT(user_flush_tlb);
599 if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
600 return -EFAULT;
601
602 gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg,
603 req.vaddr, req.len);
604
605 gts = gru_find_lock_gts(req.gseg);
606 if (!gts)
607 return -EINVAL;
608
609 gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.vaddr + req.len);
610 gru_unlock_gts(gts);
611
612 return 0;
613}
614
615/*
616 * Register the current task as the user of the GSEG slice.
617 * Needed for TLB fault interrupt targeting.
618 */
619int gru_set_task_slice(long address)
620{
621 struct gru_thread_state *gts;
622
623 STAT(set_task_slice);
624 gru_dbg(grudev, "address 0x%lx\n", address);
625 gts = gru_alloc_locked_gts(address);
626 if (!gts)
627 return -EINVAL;
628
629 gts->ts_tgid_owner = current->tgid;
630 gru_unlock_gts(gts);
631
632 return 0;
633}
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
new file mode 100644
index 000000000000..23c91f5f6b61
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -0,0 +1,485 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * FILE OPERATIONS & DRIVER INITIALIZATION
5 *
6 * This file supports the user system call for file open, close, mmap, etc.
7 * This also incudes the driver initialization code.
8 *
9 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26#include <linux/module.h>
27#include <linux/kernel.h>
28#include <linux/errno.h>
29#include <linux/slab.h>
30#include <linux/mm.h>
31#include <linux/io.h>
32#include <linux/smp_lock.h>
33#include <linux/spinlock.h>
34#include <linux/device.h>
35#include <linux/miscdevice.h>
36#include <linux/interrupt.h>
37#include <linux/proc_fs.h>
38#include <linux/uaccess.h>
39#include "gru.h"
40#include "grulib.h"
41#include "grutables.h"
42
43#if defined CONFIG_X86_64
44#include <asm/genapic.h>
45#include <asm/irq.h>
46#define IS_UV() is_uv_system()
47#elif defined CONFIG_IA64
48#include <asm/system.h>
49#include <asm/sn/simulator.h>
50/* temp support for running on hardware simulator */
51#define IS_UV() IS_MEDUSA() || ia64_platform_is("uv")
52#else
53#define IS_UV() 0
54#endif
55
56#include <asm/uv/uv_hub.h>
57#include <asm/uv/uv_mmrs.h>
58
59struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
60unsigned long gru_start_paddr, gru_end_paddr __read_mostly;
61struct gru_stats_s gru_stats;
62
63/* Guaranteed user available resources on each node */
64static int max_user_cbrs, max_user_dsr_bytes;
65
66static struct file_operations gru_fops;
67static struct miscdevice gru_miscdev;
68
69
70/*
71 * gru_vma_close
72 *
73 * Called when unmapping a device mapping. Frees all gru resources
74 * and tables belonging to the vma.
75 */
76static void gru_vma_close(struct vm_area_struct *vma)
77{
78 struct gru_vma_data *vdata;
79 struct gru_thread_state *gts;
80 struct list_head *entry, *next;
81
82 if (!vma->vm_private_data)
83 return;
84
85 vdata = vma->vm_private_data;
86 vma->vm_private_data = NULL;
87 gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file,
88 vdata);
89 list_for_each_safe(entry, next, &vdata->vd_head) {
90 gts =
91 list_entry(entry, struct gru_thread_state, ts_next);
92 list_del(&gts->ts_next);
93 mutex_lock(&gts->ts_ctxlock);
94 if (gts->ts_gru)
95 gru_unload_context(gts, 0);
96 mutex_unlock(&gts->ts_ctxlock);
97 gts_drop(gts);
98 }
99 kfree(vdata);
100 STAT(vdata_free);
101}
102
103/*
104 * gru_file_mmap
105 *
106 * Called when mmaping the device. Initializes the vma with a fault handler
107 * and private data structure necessary to allocate, track, and free the
108 * underlying pages.
109 */
110static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
111{
112 if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
113 return -EPERM;
114
115 if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
116 vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
117 return -EINVAL;
118
119 vma->vm_flags |=
120 (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
121 VM_RESERVED);
122 vma->vm_page_prot = PAGE_SHARED;
123 vma->vm_ops = &gru_vm_ops;
124
125 vma->vm_private_data = gru_alloc_vma_data(vma, 0);
126 if (!vma->vm_private_data)
127 return -ENOMEM;
128
129 gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n",
130 file, vma->vm_start, vma, vma->vm_private_data);
131 return 0;
132}
133
134/*
135 * Create a new GRU context
136 */
137static int gru_create_new_context(unsigned long arg)
138{
139 struct gru_create_context_req req;
140 struct vm_area_struct *vma;
141 struct gru_vma_data *vdata;
142 int ret = -EINVAL;
143
144
145 if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
146 return -EFAULT;
147
148 if (req.data_segment_bytes == 0 ||
149 req.data_segment_bytes > max_user_dsr_bytes)
150 return -EINVAL;
151 if (!req.control_blocks || !req.maximum_thread_count ||
152 req.control_blocks > max_user_cbrs)
153 return -EINVAL;
154
155 if (!(req.options & GRU_OPT_MISS_MASK))
156 req.options |= GRU_OPT_MISS_FMM_INTR;
157
158 down_write(&current->mm->mmap_sem);
159 vma = gru_find_vma(req.gseg);
160 if (vma) {
161 vdata = vma->vm_private_data;
162 vdata->vd_user_options = req.options;
163 vdata->vd_dsr_au_count =
164 GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
165 vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
166 ret = 0;
167 }
168 up_write(&current->mm->mmap_sem);
169
170 return ret;
171}
172
173/*
174 * Get GRU configuration info (temp - for emulator testing)
175 */
176static long gru_get_config_info(unsigned long arg)
177{
178 struct gru_config_info info;
179 int nodesperblade;
180
181 if (num_online_nodes() > 1 &&
182 (uv_node_to_blade_id(1) == uv_node_to_blade_id(0)))
183 nodesperblade = 2;
184 else
185 nodesperblade = 1;
186 info.cpus = num_online_cpus();
187 info.nodes = num_online_nodes();
188 info.blades = info.nodes / nodesperblade;
189 info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades;
190
191 if (copy_to_user((void __user *)arg, &info, sizeof(info)))
192 return -EFAULT;
193 return 0;
194}
195
196/*
197 * Get GRU chiplet status
198 */
199static long gru_get_chiplet_status(unsigned long arg)
200{
201 struct gru_state *gru;
202 struct gru_chiplet_info info;
203
204 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
205 return -EFAULT;
206
207 if (info.node == -1)
208 info.node = numa_node_id();
209 if (info.node >= num_possible_nodes() ||
210 info.chiplet >= GRU_CHIPLETS_PER_HUB ||
211 info.node < 0 || info.chiplet < 0)
212 return -EINVAL;
213
214 info.blade = uv_node_to_blade_id(info.node);
215 gru = get_gru(info.blade, info.chiplet);
216
217 info.total_dsr_bytes = GRU_NUM_DSR_BYTES;
218 info.total_cbr = GRU_NUM_CB;
219 info.total_user_dsr_bytes = GRU_NUM_DSR_BYTES -
220 gru->gs_reserved_dsr_bytes;
221 info.total_user_cbr = GRU_NUM_CB - gru->gs_reserved_cbrs;
222 info.free_user_dsr_bytes = hweight64(gru->gs_dsr_map) *
223 GRU_DSR_AU_BYTES;
224 info.free_user_cbr = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
225
226 if (copy_to_user((void __user *)arg, &info, sizeof(info)))
227 return -EFAULT;
228 return 0;
229}
230
231/*
232 * gru_file_unlocked_ioctl
233 *
234 * Called to update file attributes via IOCTL calls.
235 */
236static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
237 unsigned long arg)
238{
239 int err = -EBADRQC;
240
241 gru_dbg(grudev, "file %p\n", file);
242
243 switch (req) {
244 case GRU_CREATE_CONTEXT:
245 err = gru_create_new_context(arg);
246 break;
247 case GRU_SET_TASK_SLICE:
248 err = gru_set_task_slice(arg);
249 break;
250 case GRU_USER_GET_EXCEPTION_DETAIL:
251 err = gru_get_exception_detail(arg);
252 break;
253 case GRU_USER_UNLOAD_CONTEXT:
254 err = gru_user_unload_context(arg);
255 break;
256 case GRU_GET_CHIPLET_STATUS:
257 err = gru_get_chiplet_status(arg);
258 break;
259 case GRU_USER_FLUSH_TLB:
260 err = gru_user_flush_tlb(arg);
261 break;
262 case GRU_USER_CALL_OS:
263 err = gru_handle_user_call_os(arg);
264 break;
265 case GRU_GET_CONFIG_INFO:
266 err = gru_get_config_info(arg);
267 break;
268 }
269 return err;
270}
271
272/*
273 * Called at init time to build tables for all GRUs that are present in the
274 * system.
275 */
276static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
277 void *vaddr, int nid, int bid, int grunum)
278{
279 spin_lock_init(&gru->gs_lock);
280 spin_lock_init(&gru->gs_asid_lock);
281 gru->gs_gru_base_paddr = paddr;
282 gru->gs_gru_base_vaddr = vaddr;
283 gru->gs_gid = bid * GRU_CHIPLETS_PER_BLADE + grunum;
284 gru->gs_blade = gru_base[bid];
285 gru->gs_blade_id = bid;
286 gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
287 gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
288 gru_tgh_flush_init(gru);
289 gru_dbg(grudev, "bid %d, nid %d, gru %x, vaddr %p (0x%lx)\n",
290 bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr,
291 gru->gs_gru_base_paddr);
292 gru_kservices_init(gru);
293}
294
295static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
296{
297 int pnode, nid, bid, chip;
298 int cbrs, dsrbytes, n;
299 int order = get_order(sizeof(struct gru_blade_state));
300 struct page *page;
301 struct gru_state *gru;
302 unsigned long paddr;
303 void *vaddr;
304
305 max_user_cbrs = GRU_NUM_CB;
306 max_user_dsr_bytes = GRU_NUM_DSR_BYTES;
307 for_each_online_node(nid) {
308 bid = uv_node_to_blade_id(nid);
309 pnode = uv_node_to_pnode(nid);
310 if (gru_base[bid])
311 continue;
312 page = alloc_pages_node(nid, GFP_KERNEL, order);
313 if (!page)
314 goto fail;
315 gru_base[bid] = page_address(page);
316 memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
317 gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
318 spin_lock_init(&gru_base[bid]->bs_lock);
319
320 dsrbytes = 0;
321 cbrs = 0;
322 for (gru = gru_base[bid]->bs_grus, chip = 0;
323 chip < GRU_CHIPLETS_PER_BLADE;
324 chip++, gru++) {
325 paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
326 vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
327 gru_init_chiplet(gru, paddr, vaddr, bid, nid, chip);
328 n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
329 cbrs = max(cbrs, n);
330 n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
331 dsrbytes = max(dsrbytes, n);
332 }
333 max_user_cbrs = min(max_user_cbrs, cbrs);
334 max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes);
335 }
336
337 return 0;
338
339fail:
340 for (nid--; nid >= 0; nid--)
341 free_pages((unsigned long)gru_base[nid], order);
342 return -ENOMEM;
343}
344
345#ifdef CONFIG_IA64
346
347static int get_base_irq(void)
348{
349 return IRQ_GRU;
350}
351
352#elif defined CONFIG_X86_64
353
354static void noop(unsigned int irq)
355{
356}
357
358static struct irq_chip gru_chip = {
359 .name = "gru",
360 .mask = noop,
361 .unmask = noop,
362 .ack = noop,
363};
364
365static int get_base_irq(void)
366{
367 set_irq_chip(IRQ_GRU, &gru_chip);
368 set_irq_chip(IRQ_GRU + 1, &gru_chip);
369 return IRQ_GRU;
370}
371#endif
372
373/*
374 * gru_init
375 *
376 * Called at boot or module load time to initialize the GRUs.
377 */
378static int __init gru_init(void)
379{
380 int ret, irq, chip;
381 char id[10];
382 void *gru_start_vaddr;
383
384 if (!IS_UV())
385 return 0;
386
387#if defined CONFIG_IA64
388 gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */
389#else
390 gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) &
391 0x7fffffffffffUL;
392
393#endif
394 gru_start_vaddr = __va(gru_start_paddr);
395 gru_end_paddr = gru_start_paddr + MAX_NUMNODES * GRU_SIZE;
396 printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n",
397 gru_start_paddr, gru_end_paddr);
398 irq = get_base_irq();
399 for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) {
400 ret = request_irq(irq + chip, gru_intr, 0, id, NULL);
401 if (ret) {
402 printk(KERN_ERR "%s: request_irq failed\n",
403 GRU_DRIVER_ID_STR);
404 goto exit1;
405 }
406 }
407
408 ret = misc_register(&gru_miscdev);
409 if (ret) {
410 printk(KERN_ERR "%s: misc_register failed\n",
411 GRU_DRIVER_ID_STR);
412 goto exit1;
413 }
414
415 ret = gru_proc_init();
416 if (ret) {
417 printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR);
418 goto exit2;
419 }
420
421 ret = gru_init_tables(gru_start_paddr, gru_start_vaddr);
422 if (ret) {
423 printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
424 goto exit3;
425 }
426
427 printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
428 GRU_DRIVER_VERSION_STR);
429 return 0;
430
431exit3:
432 gru_proc_exit();
433exit2:
434 misc_deregister(&gru_miscdev);
435exit1:
436 for (--chip; chip >= 0; chip--)
437 free_irq(irq + chip, NULL);
438 return ret;
439
440}
441
442static void __exit gru_exit(void)
443{
444 int i, bid;
445 int order = get_order(sizeof(struct gru_state) *
446 GRU_CHIPLETS_PER_BLADE);
447
448 for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
449 free_irq(IRQ_GRU + i, NULL);
450
451 for (bid = 0; bid < GRU_MAX_BLADES; bid++)
452 free_pages((unsigned long)gru_base[bid], order);
453
454 misc_deregister(&gru_miscdev);
455 gru_proc_exit();
456}
457
458static struct file_operations gru_fops = {
459 .owner = THIS_MODULE,
460 .unlocked_ioctl = gru_file_unlocked_ioctl,
461 .mmap = gru_file_mmap,
462};
463
464static struct miscdevice gru_miscdev = {
465 .minor = MISC_DYNAMIC_MINOR,
466 .name = "gru",
467 .fops = &gru_fops,
468};
469
470struct vm_operations_struct gru_vm_ops = {
471 .close = gru_vma_close,
472 .fault = gru_fault,
473};
474
475module_init(gru_init);
476module_exit(gru_exit);
477
478module_param(gru_options, ulong, 0644);
479MODULE_PARM_DESC(gru_options, "Various debug options");
480
481MODULE_AUTHOR("Silicon Graphics, Inc.");
482MODULE_LICENSE("GPL");
483MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR);
484MODULE_VERSION(GRU_DRIVER_VERSION_STR);
485
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
new file mode 100644
index 000000000000..d16031d62673
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -0,0 +1,663 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * GRU HANDLE DEFINITION
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#ifndef __GRUHANDLES_H__
24#define __GRUHANDLES_H__
25#include "gru_instructions.h"
26
27/*
28 * Manifest constants for GRU Memory Map
29 */
30#define GRU_GSEG0_BASE 0
31#define GRU_MCS_BASE (64 * 1024 * 1024)
32#define GRU_SIZE (128UL * 1024 * 1024)
33
34/* Handle & resource counts */
35#define GRU_NUM_CB 128
36#define GRU_NUM_DSR_BYTES (32 * 1024)
37#define GRU_NUM_TFM 16
38#define GRU_NUM_TGH 24
39#define GRU_NUM_CBE 128
40#define GRU_NUM_TFH 128
41#define GRU_NUM_CCH 16
42#define GRU_NUM_GSH 1
43
44/* Maximum resource counts that can be reserved by user programs */
45#define GRU_NUM_USER_CBR GRU_NUM_CBE
46#define GRU_NUM_USER_DSR_BYTES GRU_NUM_DSR_BYTES
47
48/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles
49 * are the same */
50#define GRU_HANDLE_BYTES 64
51#define GRU_HANDLE_STRIDE 256
52
53/* Base addresses of handles */
54#define GRU_TFM_BASE (GRU_MCS_BASE + 0x00000)
55#define GRU_TGH_BASE (GRU_MCS_BASE + 0x08000)
56#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000)
57#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000)
58#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000)
59#define GRU_GSH_BASE (GRU_MCS_BASE + 0x30000)
60
61/* User gseg constants */
62#define GRU_GSEG_STRIDE (4 * 1024 * 1024)
63#define GSEG_BASE(a) ((a) & ~(GRU_GSEG_PAGESIZE - 1))
64
65/* Data segment constants */
66#define GRU_DSR_AU_BYTES 1024
67#define GRU_DSR_CL (GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES)
68#define GRU_DSR_AU_CL (GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES)
69#define GRU_DSR_AU (GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES)
70
71/* Control block constants */
72#define GRU_CBR_AU_SIZE 2
73#define GRU_CBR_AU (GRU_NUM_CBE / GRU_CBR_AU_SIZE)
74
75/* Convert resource counts to the number of AU */
76#define GRU_DS_BYTES_TO_AU(n) DIV_ROUND_UP(n, GRU_DSR_AU_BYTES)
77#define GRU_CB_COUNT_TO_AU(n) DIV_ROUND_UP(n, GRU_CBR_AU_SIZE)
78
79/* UV limits */
80#define GRU_CHIPLETS_PER_HUB 2
81#define GRU_HUBS_PER_BLADE 1
82#define GRU_CHIPLETS_PER_BLADE (GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB)
83
84/* User GRU Gseg offsets */
85#define GRU_CB_BASE 0
86#define GRU_CB_LIMIT (GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE)
87#define GRU_DS_BASE 0x20000
88#define GRU_DS_LIMIT (GRU_DS_BASE + GRU_NUM_DSR_BYTES)
89
90/* Convert a GRU physical address to the chiplet offset */
91#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1))
92
93/* Convert an arbitrary handle address to the beginning of the GRU segment */
94#ifndef __PLUGIN__
95#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
96#else
97extern void *gmu_grubase(void *h);
98#define GRUBASE(h) gmu_grubase(h)
99#endif
100
101/* General addressing macros. */
102static inline void *get_gseg_base_address(void *base, int ctxnum)
103{
104 return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum);
105}
106
107static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line)
108{
109 return (void *)(get_gseg_base_address(base, ctxnum) +
110 GRU_CB_BASE + GRU_HANDLE_STRIDE * line);
111}
112
113static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line)
114{
115 return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE +
116 GRU_CACHE_LINE_BYTES * line);
117}
118
119static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum)
120{
121 return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE +
122 ctxnum * GRU_HANDLE_STRIDE);
123}
124
125static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum)
126{
127 return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE +
128 ctxnum * GRU_HANDLE_STRIDE);
129}
130
131static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum)
132{
133 return (struct gru_control_block_extended *)(base + GRU_CBE_BASE +
134 ctxnum * GRU_HANDLE_STRIDE);
135}
136
137static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum)
138{
139 return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE +
140 ctxnum * GRU_HANDLE_STRIDE);
141}
142
143static inline struct gru_context_configuration_handle *get_cch(void *base,
144 int ctxnum)
145{
146 return (struct gru_context_configuration_handle *)(base +
147 GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE);
148}
149
150static inline unsigned long get_cb_number(void *cb)
151{
152 return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) /
153 GRU_HANDLE_STRIDE;
154}
155
156/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/
157static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode,
158 int chiplet)
159{
160 return paddr + GRU_SIZE * (2 * pnode + chiplet);
161}
162
163static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet)
164{
165 return vaddr + GRU_SIZE * (2 * pnode + chiplet);
166}
167
168
169
170/*
171 * Global TLB Fault Map
172 * Bitmap of outstanding TLB misses needing interrupt/polling service.
173 *
174 */
175struct gru_tlb_fault_map {
176 unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
177 unsigned long fill0[2];
178 unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
179 unsigned long fill1[2];
180};
181
182/*
183 * TGH - TLB Global Handle
184 * Used for TLB flushing.
185 *
186 */
187struct gru_tlb_global_handle {
188 unsigned int cmd:1; /* DW 0 */
189 unsigned int delresp:1;
190 unsigned int opc:1;
191 unsigned int fill1:5;
192
193 unsigned int fill2:8;
194
195 unsigned int status:2;
196 unsigned long fill3:2;
197 unsigned int state:3;
198 unsigned long fill4:1;
199
200 unsigned int cause:3;
201 unsigned long fill5:37;
202
203 unsigned long vaddr:64; /* DW 1 */
204
205 unsigned int asid:24; /* DW 2 */
206 unsigned int fill6:8;
207
208 unsigned int pagesize:5;
209 unsigned int fill7:11;
210
211 unsigned int global:1;
212 unsigned int fill8:15;
213
214 unsigned long vaddrmask:39; /* DW 3 */
215 unsigned int fill9:9;
216 unsigned int n:10;
217 unsigned int fill10:6;
218
219 unsigned int ctxbitmap:16; /* DW4 */
220 unsigned long fill11[3];
221};
222
223enum gru_tgh_cmd {
224 TGHCMD_START
225};
226
227enum gru_tgh_opc {
228 TGHOP_TLBNOP,
229 TGHOP_TLBINV
230};
231
232enum gru_tgh_status {
233 TGHSTATUS_IDLE,
234 TGHSTATUS_EXCEPTION,
235 TGHSTATUS_ACTIVE
236};
237
238enum gru_tgh_state {
239 TGHSTATE_IDLE,
240 TGHSTATE_PE_INVAL,
241 TGHSTATE_INTERRUPT_INVAL,
242 TGHSTATE_WAITDONE,
243 TGHSTATE_RESTART_CTX,
244};
245
246/*
247 * TFH - TLB Global Handle
248 * Used for TLB dropins into the GRU TLB.
249 *
250 */
251struct gru_tlb_fault_handle {
252 unsigned int cmd:1; /* DW 0 - low 32*/
253 unsigned int delresp:1;
254 unsigned int fill0:2;
255 unsigned int opc:3;
256 unsigned int fill1:9;
257
258 unsigned int status:2;
259 unsigned int fill2:1;
260 unsigned int color:1;
261 unsigned int state:3;
262 unsigned int fill3:1;
263
264 unsigned int cause:7; /* DW 0 - high 32 */
265 unsigned int fill4:1;
266
267 unsigned int indexway:12;
268 unsigned int fill5:4;
269
270 unsigned int ctxnum:4;
271 unsigned int fill6:12;
272
273 unsigned long missvaddr:64; /* DW 1 */
274
275 unsigned int missasid:24; /* DW 2 */
276 unsigned int fill7:8;
277 unsigned int fillasid:24;
278 unsigned int dirty:1;
279 unsigned int gaa:2;
280 unsigned long fill8:5;
281
282 unsigned long pfn:41; /* DW 3 */
283 unsigned int fill9:7;
284 unsigned int pagesize:5;
285 unsigned int fill10:11;
286
287 unsigned long fillvaddr:64; /* DW 4 */
288
289 unsigned long fill11[3];
290};
291
292enum gru_tfh_opc {
293 TFHOP_NOOP,
294 TFHOP_RESTART,
295 TFHOP_WRITE_ONLY,
296 TFHOP_WRITE_RESTART,
297 TFHOP_EXCEPTION,
298 TFHOP_USER_POLLING_MODE = 7,
299};
300
301enum tfh_status {
302 TFHSTATUS_IDLE,
303 TFHSTATUS_EXCEPTION,
304 TFHSTATUS_ACTIVE,
305};
306
307enum tfh_state {
308 TFHSTATE_INACTIVE,
309 TFHSTATE_IDLE,
310 TFHSTATE_MISS_UPM,
311 TFHSTATE_MISS_FMM,
312 TFHSTATE_HW_ERR,
313 TFHSTATE_WRITE_TLB,
314 TFHSTATE_RESTART_CBR,
315};
316
317/* TFH cause bits */
318enum tfh_cause {
319 TFHCAUSE_NONE,
320 TFHCAUSE_TLB_MISS,
321 TFHCAUSE_TLB_MOD,
322 TFHCAUSE_HW_ERROR_RR,
323 TFHCAUSE_HW_ERROR_MAIN_ARRAY,
324 TFHCAUSE_HW_ERROR_VALID,
325 TFHCAUSE_HW_ERROR_PAGESIZE,
326 TFHCAUSE_INSTRUCTION_EXCEPTION,
327 TFHCAUSE_UNCORRECTIBLE_ERROR,
328};
329
330/* GAA values */
331#define GAA_RAM 0x0
332#define GAA_NCRAM 0x2
333#define GAA_MMIO 0x1
334#define GAA_REGISTER 0x3
335
336/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */
337#define GRU_PADDR_SHIFT 12
338
339/*
340 * Context Configuration handle
341 * Used to allocate resources to a GSEG context.
342 *
343 */
344struct gru_context_configuration_handle {
345 unsigned int cmd:1; /* DW0 */
346 unsigned int delresp:1;
347 unsigned int opc:3;
348 unsigned int unmap_enable:1;
349 unsigned int req_slice_set_enable:1;
350 unsigned int req_slice:2;
351 unsigned int cb_int_enable:1;
352 unsigned int tlb_int_enable:1;
353 unsigned int tfm_fault_bit_enable:1;
354 unsigned int tlb_int_select:4;
355
356 unsigned int status:2;
357 unsigned int state:2;
358 unsigned int reserved2:4;
359
360 unsigned int cause:4;
361 unsigned int tfm_done_bit_enable:1;
362 unsigned int unused:3;
363
364 unsigned int dsr_allocation_map;
365
366 unsigned long cbr_allocation_map; /* DW1 */
367
368 unsigned int asid[8]; /* DW 2 - 5 */
369 unsigned short sizeavail[8]; /* DW 6 - 7 */
370} __attribute__ ((packed));
371
372enum gru_cch_opc {
373 CCHOP_START = 1,
374 CCHOP_ALLOCATE,
375 CCHOP_INTERRUPT,
376 CCHOP_DEALLOCATE,
377 CCHOP_INTERRUPT_SYNC,
378};
379
380enum gru_cch_status {
381 CCHSTATUS_IDLE,
382 CCHSTATUS_EXCEPTION,
383 CCHSTATUS_ACTIVE,
384};
385
386enum gru_cch_state {
387 CCHSTATE_INACTIVE,
388 CCHSTATE_MAPPED,
389 CCHSTATE_ACTIVE,
390 CCHSTATE_INTERRUPTED,
391};
392
393/* CCH Exception cause */
394enum gru_cch_cause {
395 CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1,
396 CCHCAUSE_ILLEGAL_OPCODE = 2,
397 CCHCAUSE_INVALID_START_REQUEST = 3,
398 CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4,
399 CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5,
400 CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6,
401 CCHCAUSE_CCH_BUSY = 7,
402 CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8,
403 CCHCAUSE_BAD_TFM_CONFIG = 9,
404 CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10,
405 CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11,
406 CCHCAUSE_CBR_DEALLOCATION_ERROR = 12,
407};
408/*
409 * CBE - Control Block Extended
410 * Maintains internal GRU state for active CBs.
411 *
412 */
413struct gru_control_block_extended {
414 unsigned int reserved0:1; /* DW 0 - low */
415 unsigned int imacpy:3;
416 unsigned int reserved1:4;
417 unsigned int xtypecpy:3;
418 unsigned int iaa0cpy:2;
419 unsigned int iaa1cpy:2;
420 unsigned int reserved2:1;
421 unsigned int opccpy:8;
422 unsigned int exopccpy:8;
423
424 unsigned int idef2cpy:22; /* DW 0 - high */
425 unsigned int reserved3:10;
426
427 unsigned int idef4cpy:22; /* DW 1 */
428 unsigned int reserved4:10;
429 unsigned int idef4upd:22;
430 unsigned int reserved5:10;
431
432 unsigned long idef1upd:64; /* DW 2 */
433
434 unsigned long idef5cpy:64; /* DW 3 */
435
436 unsigned long idef6cpy:64; /* DW 4 */
437
438 unsigned long idef3upd:64; /* DW 5 */
439
440 unsigned long idef5upd:64; /* DW 6 */
441
442 unsigned int idef2upd:22; /* DW 7 */
443 unsigned int reserved6:10;
444
445 unsigned int ecause:20;
446 unsigned int cbrstate:4;
447 unsigned int cbrexecstatus:8;
448};
449
450enum gru_cbr_state {
451 CBRSTATE_INACTIVE,
452 CBRSTATE_IDLE,
453 CBRSTATE_PE_CHECK,
454 CBRSTATE_QUEUED,
455 CBRSTATE_WAIT_RESPONSE,
456 CBRSTATE_INTERRUPTED,
457 CBRSTATE_INTERRUPTED_MISS_FMM,
458 CBRSTATE_BUSY_INTERRUPT_MISS_FMM,
459 CBRSTATE_INTERRUPTED_MISS_UPM,
460 CBRSTATE_BUSY_INTERRUPTED_MISS_UPM,
461 CBRSTATE_REQUEST_ISSUE,
462 CBRSTATE_BUSY_INTERRUPT,
463};
464
465/* CBE cbrexecstatus bits */
466#define CBR_EXS_ABORT_OCC_BIT 0
467#define CBR_EXS_INT_OCC_BIT 1
468#define CBR_EXS_PENDING_BIT 2
469#define CBR_EXS_QUEUED_BIT 3
470#define CBR_EXS_TLBHW_BIT 4
471#define CBR_EXS_EXCEPTION_BIT 5
472
473#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT)
474#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT)
475#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT)
476#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT)
477#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT)
478#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT)
479
480/* CBE ecause bits - defined in gru_instructions.h */
481
482/*
483 * Convert a processor pagesize into the strange encoded pagesize used by the
484 * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT)
485 * pagesize log pagesize grupagesize
486 * 4k 12 0
487 * 16k 14 1
488 * 64k 16 2
489 * 256k 18 3
490 * 1m 20 4
491 * 2m 21 5
492 * 4m 22 6
493 * 16m 24 7
494 * 64m 26 8
495 * ...
496 */
497#define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2: (sh)) >> 1) - 6)
498#define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh))
499
500/* minimum TLB purge count to ensure a full purge */
501#define GRUMAXINVAL 1024UL
502
503
504/* Extract the status field from a kernel handle */
505#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3)
506
507static inline void start_instruction(void *h)
508{
509 unsigned long *w0 = h;
510
511 wmb(); /* setting CMD bit must be last */
512 *w0 = *w0 | 1;
513 gru_flush_cache(h);
514}
515
516static inline int wait_instruction_complete(void *h)
517{
518 int status;
519
520 do {
521 cpu_relax();
522 barrier();
523 status = GET_MSEG_HANDLE_STATUS(h);
524 } while (status == CCHSTATUS_ACTIVE);
525 return status;
526}
527
528#if defined CONFIG_IA64
529static inline void cch_allocate_set_asids(
530 struct gru_context_configuration_handle *cch, int asidval)
531{
532 int i;
533
534 for (i = 0; i <= RGN_HPAGE; i++) { /* assume HPAGE is last region */
535 cch->asid[i] = (asidval++);
536#if 0
537 /* ZZZ hugepages not supported yet */
538 if (i == RGN_HPAGE)
539 cch->sizeavail[i] = GRU_SIZEAVAIL(hpage_shift);
540 else
541#endif
542 cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT);
543 }
544}
545#elif defined CONFIG_X86_64
546static inline void cch_allocate_set_asids(
547 struct gru_context_configuration_handle *cch, int asidval)
548{
549 int i;
550
551 for (i = 0; i < 8; i++) {
552 cch->asid[i] = asidval++;
553 cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT) |
554 GRU_SIZEAVAIL(21);
555 }
556}
557#endif
558
559static inline int cch_allocate(struct gru_context_configuration_handle *cch,
560 int asidval, unsigned long cbrmap,
561 unsigned long dsrmap)
562{
563 cch_allocate_set_asids(cch, asidval);
564 cch->dsr_allocation_map = dsrmap;
565 cch->cbr_allocation_map = cbrmap;
566 cch->opc = CCHOP_ALLOCATE;
567 start_instruction(cch);
568 return wait_instruction_complete(cch);
569}
570
571static inline int cch_start(struct gru_context_configuration_handle *cch)
572{
573 cch->opc = CCHOP_START;
574 start_instruction(cch);
575 return wait_instruction_complete(cch);
576}
577
578static inline int cch_interrupt(struct gru_context_configuration_handle *cch)
579{
580 cch->opc = CCHOP_INTERRUPT;
581 start_instruction(cch);
582 return wait_instruction_complete(cch);
583}
584
585static inline int cch_deallocate(struct gru_context_configuration_handle *cch)
586{
587 cch->opc = CCHOP_DEALLOCATE;
588 start_instruction(cch);
589 return wait_instruction_complete(cch);
590}
591
592static inline int cch_interrupt_sync(struct gru_context_configuration_handle
593 *cch)
594{
595 cch->opc = CCHOP_INTERRUPT_SYNC;
596 start_instruction(cch);
597 return wait_instruction_complete(cch);
598}
599
600static inline int tgh_invalidate(struct gru_tlb_global_handle *tgh,
601 unsigned long vaddr, unsigned long vaddrmask,
602 int asid, int pagesize, int global, int n,
603 unsigned short ctxbitmap)
604{
605 tgh->vaddr = vaddr;
606 tgh->asid = asid;
607 tgh->pagesize = pagesize;
608 tgh->n = n;
609 tgh->global = global;
610 tgh->vaddrmask = vaddrmask;
611 tgh->ctxbitmap = ctxbitmap;
612 tgh->opc = TGHOP_TLBINV;
613 start_instruction(tgh);
614 return wait_instruction_complete(tgh);
615}
616
617static inline void tfh_write_only(struct gru_tlb_fault_handle *tfh,
618 unsigned long pfn, unsigned long vaddr,
619 int asid, int dirty, int pagesize)
620{
621 tfh->fillasid = asid;
622 tfh->fillvaddr = vaddr;
623 tfh->pfn = pfn;
624 tfh->dirty = dirty;
625 tfh->pagesize = pagesize;
626 tfh->opc = TFHOP_WRITE_ONLY;
627 start_instruction(tfh);
628}
629
630static inline void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
631 unsigned long paddr, int gaa,
632 unsigned long vaddr, int asid, int dirty,
633 int pagesize)
634{
635 tfh->fillasid = asid;
636 tfh->fillvaddr = vaddr;
637 tfh->pfn = paddr >> GRU_PADDR_SHIFT;
638 tfh->gaa = gaa;
639 tfh->dirty = dirty;
640 tfh->pagesize = pagesize;
641 tfh->opc = TFHOP_WRITE_RESTART;
642 start_instruction(tfh);
643}
644
645static inline void tfh_restart(struct gru_tlb_fault_handle *tfh)
646{
647 tfh->opc = TFHOP_RESTART;
648 start_instruction(tfh);
649}
650
651static inline void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
652{
653 tfh->opc = TFHOP_USER_POLLING_MODE;
654 start_instruction(tfh);
655}
656
657static inline void tfh_exception(struct gru_tlb_fault_handle *tfh)
658{
659 tfh->opc = TFHOP_EXCEPTION;
660 start_instruction(tfh);
661}
662
663#endif /* __GRUHANDLES_H__ */
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
new file mode 100644
index 000000000000..dfd49af0fe18
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -0,0 +1,679 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * KERNEL SERVICES THAT USE THE GRU
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#include <linux/kernel.h>
24#include <linux/errno.h>
25#include <linux/slab.h>
26#include <linux/mm.h>
27#include <linux/smp_lock.h>
28#include <linux/spinlock.h>
29#include <linux/device.h>
30#include <linux/miscdevice.h>
31#include <linux/proc_fs.h>
32#include <linux/interrupt.h>
33#include <linux/uaccess.h>
34#include "gru.h"
35#include "grulib.h"
36#include "grutables.h"
37#include "grukservices.h"
38#include "gru_instructions.h"
39#include <asm/uv/uv_hub.h>
40
41/*
42 * Kernel GRU Usage
43 *
44 * The following is an interim algorithm for management of kernel GRU
45 * resources. This will likely be replaced when we better understand the
46 * kernel/user requirements.
47 *
48 * At boot time, the kernel permanently reserves a fixed number of
49 * CBRs/DSRs for each cpu to use. The resources are all taken from
50 * the GRU chiplet 1 on the blade. This leaves the full set of resources
51 * of chiplet 0 available to be allocated to a single user.
52 */
53
54/* Blade percpu resources PERMANENTLY reserved for kernel use */
55#define GRU_NUM_KERNEL_CBR 1
56#define GRU_NUM_KERNEL_DSR_BYTES 256
57#define KERNEL_CTXNUM 15
58
59/* GRU instruction attributes for all instructions */
60#define IMA IMA_CB_DELAY
61
62/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
63#define __gru_cacheline_aligned__ \
64 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
65
66#define MAGIC 0x1234567887654321UL
67
68/* Default retry count for GRU errors on kernel instructions */
69#define EXCEPTION_RETRY_LIMIT 3
70
71/* Status of message queue sections */
72#define MQS_EMPTY 0
73#define MQS_FULL 1
74#define MQS_NOOP 2
75
76/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
77/* optimized for x86_64 */
78struct message_queue {
79 union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */
80 int qlines; /* DW 1 */
81 long hstatus[2];
82 void *next __gru_cacheline_aligned__;/* CL 1 */
83 void *limit;
84 void *start;
85 void *start2;
86 char data ____cacheline_aligned; /* CL 2 */
87};
88
89/* First word in every message - used by mesq interface */
90struct message_header {
91 char present;
92 char present2;
93 char lines;
94 char fill;
95};
96
97#define QLINES(mq) ((mq) + offsetof(struct message_queue, qlines))
98#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
99
100static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
101{
102 struct gru_blade_state *bs;
103 int lcpu;
104
105 BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
106 preempt_disable();
107 bs = gru_base[uv_numa_blade_id()];
108 lcpu = uv_blade_processor_id();
109 *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
110 *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
111 return 0;
112}
113
114static void gru_free_cpu_resources(void *cb, void *dsr)
115{
116 preempt_enable();
117}
118
119int gru_get_cb_exception_detail(void *cb,
120 struct control_block_extended_exc_detail *excdet)
121{
122 struct gru_control_block_extended *cbe;
123
124 cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
125 excdet->opc = cbe->opccpy;
126 excdet->exopc = cbe->exopccpy;
127 excdet->ecause = cbe->ecause;
128 excdet->exceptdet0 = cbe->idef1upd;
129 excdet->exceptdet1 = cbe->idef3upd;
130 return 0;
131}
132
133char *gru_get_cb_exception_detail_str(int ret, void *cb,
134 char *buf, int size)
135{
136 struct gru_control_block_status *gen = (void *)cb;
137 struct control_block_extended_exc_detail excdet;
138
139 if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
140 gru_get_cb_exception_detail(cb, &excdet);
141 snprintf(buf, size,
142 "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
143 "excdet0 0x%lx, excdet1 0x%x",
144 gen, excdet.opc, excdet.exopc, excdet.ecause,
145 excdet.exceptdet0, excdet.exceptdet1);
146 } else {
147 snprintf(buf, size, "No exception");
148 }
149 return buf;
150}
151
152static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
153{
154 while (gen->istatus >= CBS_ACTIVE) {
155 cpu_relax();
156 barrier();
157 }
158 return gen->istatus;
159}
160
161static int gru_retry_exception(void *cb)
162{
163 struct gru_control_block_status *gen = (void *)cb;
164 struct control_block_extended_exc_detail excdet;
165 int retry = EXCEPTION_RETRY_LIMIT;
166
167 while (1) {
168 if (gru_get_cb_message_queue_substatus(cb))
169 break;
170 if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
171 return CBS_IDLE;
172
173 gru_get_cb_exception_detail(cb, &excdet);
174 if (excdet.ecause & ~EXCEPTION_RETRY_BITS)
175 break;
176 if (retry-- == 0)
177 break;
178 gen->icmd = 1;
179 gru_flush_cache(gen);
180 }
181 return CBS_EXCEPTION;
182}
183
184int gru_check_status_proc(void *cb)
185{
186 struct gru_control_block_status *gen = (void *)cb;
187 int ret;
188
189 ret = gen->istatus;
190 if (ret != CBS_EXCEPTION)
191 return ret;
192 return gru_retry_exception(cb);
193
194}
195
196int gru_wait_proc(void *cb)
197{
198 struct gru_control_block_status *gen = (void *)cb;
199 int ret;
200
201 ret = gru_wait_idle_or_exception(gen);
202 if (ret == CBS_EXCEPTION)
203 ret = gru_retry_exception(cb);
204
205 return ret;
206}
207
208void gru_abort(int ret, void *cb, char *str)
209{
210 char buf[GRU_EXC_STR_SIZE];
211
212 panic("GRU FATAL ERROR: %s - %s\n", str,
213 gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));
214}
215
216void gru_wait_abort_proc(void *cb)
217{
218 int ret;
219
220 ret = gru_wait_proc(cb);
221 if (ret)
222 gru_abort(ret, cb, "gru_wait_abort");
223}
224
225
226/*------------------------------ MESSAGE QUEUES -----------------------------*/
227
228/* Internal status . These are NOT returned to the user. */
229#define MQIE_AGAIN -1 /* try again */
230
231
232/*
233 * Save/restore the "present" flag that is in the second line of 2-line
234 * messages
235 */
236static inline int get_present2(void *p)
237{
238 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
239 return mhdr->present;
240}
241
242static inline void restore_present2(void *p, int val)
243{
244 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
245 mhdr->present = val;
246}
247
248/*
249 * Create a message queue.
250 * qlines - message queue size in cache lines. Includes 2-line header.
251 */
252int gru_create_message_queue(void *p, unsigned int bytes)
253{
254 struct message_queue *mq = p;
255 unsigned int qlines;
256
257 qlines = bytes / GRU_CACHE_LINE_BYTES - 2;
258 memset(mq, 0, bytes);
259 mq->start = &mq->data;
260 mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;
261 mq->next = &mq->data;
262 mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;
263 mq->qlines = qlines;
264 mq->hstatus[0] = 0;
265 mq->hstatus[1] = 1;
266 mq->head = gru_mesq_head(2, qlines / 2 + 1);
267 return 0;
268}
269EXPORT_SYMBOL_GPL(gru_create_message_queue);
270
271/*
272 * Send a NOOP message to a message queue
273 * Returns:
274 * 0 - if queue is full after the send. This is the normal case
275 * but various races can change this.
276 * -1 - if mesq sent successfully but queue not full
277 * >0 - unexpected error. MQE_xxx returned
278 */
279static int send_noop_message(void *cb,
280 unsigned long mq, void *mesg)
281{
282 const struct message_header noop_header = {
283 .present = MQS_NOOP, .lines = 1};
284 unsigned long m;
285 int substatus, ret;
286 struct message_header save_mhdr, *mhdr = mesg;
287
288 STAT(mesq_noop);
289 save_mhdr = *mhdr;
290 *mhdr = noop_header;
291 gru_mesq(cb, mq, gru_get_tri(mhdr), 1, IMA);
292 ret = gru_wait(cb);
293
294 if (ret) {
295 substatus = gru_get_cb_message_queue_substatus(cb);
296 switch (substatus) {
297 case CBSS_NO_ERROR:
298 STAT(mesq_noop_unexpected_error);
299 ret = MQE_UNEXPECTED_CB_ERR;
300 break;
301 case CBSS_LB_OVERFLOWED:
302 STAT(mesq_noop_lb_overflow);
303 ret = MQE_CONGESTION;
304 break;
305 case CBSS_QLIMIT_REACHED:
306 STAT(mesq_noop_qlimit_reached);
307 ret = 0;
308 break;
309 case CBSS_AMO_NACKED:
310 STAT(mesq_noop_amo_nacked);
311 ret = MQE_CONGESTION;
312 break;
313 case CBSS_PUT_NACKED:
314 STAT(mesq_noop_put_nacked);
315 m = mq + (gru_get_amo_value_head(cb) << 6);
316 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
317 IMA);
318 if (gru_wait(cb) == CBS_IDLE)
319 ret = MQIE_AGAIN;
320 else
321 ret = MQE_UNEXPECTED_CB_ERR;
322 break;
323 case CBSS_PAGE_OVERFLOW:
324 default:
325 BUG();
326 }
327 }
328 *mhdr = save_mhdr;
329 return ret;
330}
331
332/*
333 * Handle a gru_mesq full.
334 */
335static int send_message_queue_full(void *cb,
336 unsigned long mq, void *mesg, int lines)
337{
338 union gru_mesqhead mqh;
339 unsigned int limit, head;
340 unsigned long avalue;
341 int half, qlines, save;
342
343 /* Determine if switching to first/second half of q */
344 avalue = gru_get_amo_value(cb);
345 head = gru_get_amo_value_head(cb);
346 limit = gru_get_amo_value_limit(cb);
347
348 /*
349 * Fetch "qlines" from the queue header. Since the queue may be
350 * in memory that can't be accessed using socket addresses, use
351 * the GRU to access the data. Use DSR space from the message.
352 */
353 save = *(int *)mesg;
354 gru_vload(cb, QLINES(mq), gru_get_tri(mesg), XTYPE_W, 1, 1, IMA);
355 if (gru_wait(cb) != CBS_IDLE)
356 goto cberr;
357 qlines = *(int *)mesg;
358 *(int *)mesg = save;
359 half = (limit != qlines);
360
361 if (half)
362 mqh = gru_mesq_head(qlines / 2 + 1, qlines);
363 else
364 mqh = gru_mesq_head(2, qlines / 2 + 1);
365
366 /* Try to get lock for switching head pointer */
367 gru_gamir(cb, EOP_IR_CLR, HSTATUS(mq, half), XTYPE_DW, IMA);
368 if (gru_wait(cb) != CBS_IDLE)
369 goto cberr;
370 if (!gru_get_amo_value(cb)) {
371 STAT(mesq_qf_locked);
372 return MQE_QUEUE_FULL;
373 }
374
375 /* Got the lock. Send optional NOP if queue not full, */
376 if (head != limit) {
377 if (send_noop_message(cb, mq, mesg)) {
378 gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half),
379 XTYPE_DW, IMA);
380 if (gru_wait(cb) != CBS_IDLE)
381 goto cberr;
382 STAT(mesq_qf_noop_not_full);
383 return MQIE_AGAIN;
384 }
385 avalue++;
386 }
387
388 /* Then flip queuehead to other half of queue. */
389 gru_gamer(cb, EOP_ERR_CSWAP, mq, XTYPE_DW, mqh.val, avalue, IMA);
390 if (gru_wait(cb) != CBS_IDLE)
391 goto cberr;
392
393 /* If not successfully in swapping queue head, clear the hstatus lock */
394 if (gru_get_amo_value(cb) != avalue) {
395 STAT(mesq_qf_switch_head_failed);
396 gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half), XTYPE_DW, IMA);
397 if (gru_wait(cb) != CBS_IDLE)
398 goto cberr;
399 }
400 return MQIE_AGAIN;
401cberr:
402 STAT(mesq_qf_unexpected_error);
403 return MQE_UNEXPECTED_CB_ERR;
404}
405
406
407/*
408 * Handle a gru_mesq failure. Some of these failures are software recoverable
409 * or retryable.
410 */
411static int send_message_failure(void *cb,
412 unsigned long mq,
413 void *mesg,
414 int lines)
415{
416 int substatus, ret = 0;
417 unsigned long m;
418
419 substatus = gru_get_cb_message_queue_substatus(cb);
420 switch (substatus) {
421 case CBSS_NO_ERROR:
422 STAT(mesq_send_unexpected_error);
423 ret = MQE_UNEXPECTED_CB_ERR;
424 break;
425 case CBSS_LB_OVERFLOWED:
426 STAT(mesq_send_lb_overflow);
427 ret = MQE_CONGESTION;
428 break;
429 case CBSS_QLIMIT_REACHED:
430 STAT(mesq_send_qlimit_reached);
431 ret = send_message_queue_full(cb, mq, mesg, lines);
432 break;
433 case CBSS_AMO_NACKED:
434 STAT(mesq_send_amo_nacked);
435 ret = MQE_CONGESTION;
436 break;
437 case CBSS_PUT_NACKED:
438 STAT(mesq_send_put_nacked);
439 m =mq + (gru_get_amo_value_head(cb) << 6);
440 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
441 if (gru_wait(cb) == CBS_IDLE)
442 ret = MQE_OK;
443 else
444 ret = MQE_UNEXPECTED_CB_ERR;
445 break;
446 default:
447 BUG();
448 }
449 return ret;
450}
451
452/*
453 * Send a message to a message queue
454 * cb GRU control block to use to send message
455 * mq message queue
456 * mesg message. ust be vaddr within a GSEG
457 * bytes message size (<= 2 CL)
458 */
459int gru_send_message_gpa(unsigned long mq, void *mesg, unsigned int bytes)
460{
461 struct message_header *mhdr;
462 void *cb;
463 void *dsr;
464 int istatus, clines, ret;
465
466 STAT(mesq_send);
467 BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);
468
469 clines = (bytes + GRU_CACHE_LINE_BYTES - 1) / GRU_CACHE_LINE_BYTES;
470 if (gru_get_cpu_resources(bytes, &cb, &dsr))
471 return MQE_BUG_NO_RESOURCES;
472 memcpy(dsr, mesg, bytes);
473 mhdr = dsr;
474 mhdr->present = MQS_FULL;
475 mhdr->lines = clines;
476 if (clines == 2) {
477 mhdr->present2 = get_present2(mhdr);
478 restore_present2(mhdr, MQS_FULL);
479 }
480
481 do {
482 ret = MQE_OK;
483 gru_mesq(cb, mq, gru_get_tri(mhdr), clines, IMA);
484 istatus = gru_wait(cb);
485 if (istatus != CBS_IDLE)
486 ret = send_message_failure(cb, mq, dsr, clines);
487 } while (ret == MQIE_AGAIN);
488 gru_free_cpu_resources(cb, dsr);
489
490 if (ret)
491 STAT(mesq_send_failed);
492 return ret;
493}
494EXPORT_SYMBOL_GPL(gru_send_message_gpa);
495
496/*
497 * Advance the receive pointer for the queue to the next message.
498 */
499void gru_free_message(void *rmq, void *mesg)
500{
501 struct message_queue *mq = rmq;
502 struct message_header *mhdr = mq->next;
503 void *next, *pnext;
504 int half = -1;
505 int lines = mhdr->lines;
506
507 if (lines == 2)
508 restore_present2(mhdr, MQS_EMPTY);
509 mhdr->present = MQS_EMPTY;
510
511 pnext = mq->next;
512 next = pnext + GRU_CACHE_LINE_BYTES * lines;
513 if (next == mq->limit) {
514 next = mq->start;
515 half = 1;
516 } else if (pnext < mq->start2 && next >= mq->start2) {
517 half = 0;
518 }
519
520 if (half >= 0)
521 mq->hstatus[half] = 1;
522 mq->next = next;
523}
524EXPORT_SYMBOL_GPL(gru_free_message);
525
526/*
527 * Get next message from message queue. Return NULL if no message
528 * present. User must call next_message() to move to next message.
529 * rmq message queue
530 */
531void *gru_get_next_message(void *rmq)
532{
533 struct message_queue *mq = rmq;
534 struct message_header *mhdr = mq->next;
535 int present = mhdr->present;
536
537 /* skip NOOP messages */
538 STAT(mesq_receive);
539 while (present == MQS_NOOP) {
540 gru_free_message(rmq, mhdr);
541 mhdr = mq->next;
542 present = mhdr->present;
543 }
544
545 /* Wait for both halves of 2 line messages */
546 if (present == MQS_FULL && mhdr->lines == 2 &&
547 get_present2(mhdr) == MQS_EMPTY)
548 present = MQS_EMPTY;
549
550 if (!present) {
551 STAT(mesq_receive_none);
552 return NULL;
553 }
554
555 if (mhdr->lines == 2)
556 restore_present2(mhdr, mhdr->present2);
557
558 return mhdr;
559}
560EXPORT_SYMBOL_GPL(gru_get_next_message);
561
562/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
563
564/*
565 * Copy a block of data using the GRU resources
566 */
567int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
568 unsigned int bytes)
569{
570 void *cb;
571 void *dsr;
572 int ret;
573
574 STAT(copy_gpa);
575 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
576 return MQE_BUG_NO_RESOURCES;
577 gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
578 XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_BYTES, IMA);
579 ret = gru_wait(cb);
580 gru_free_cpu_resources(cb, dsr);
581 return ret;
582}
583EXPORT_SYMBOL_GPL(gru_copy_gpa);
584
585/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
586/* Temp - will delete after we gain confidence in the GRU */
587static __cacheline_aligned unsigned long word0;
588static __cacheline_aligned unsigned long word1;
589
590static int quicktest(struct gru_state *gru)
591{
592 void *cb;
593 void *ds;
594 unsigned long *p;
595
596 cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
597 ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
598 p = ds;
599 word0 = MAGIC;
600
601 gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA);
602 if (gru_wait(cb) != CBS_IDLE)
603 BUG();
604
605 if (*(unsigned long *)ds != MAGIC)
606 BUG();
607 gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA);
608 if (gru_wait(cb) != CBS_IDLE)
609 BUG();
610
611 if (word0 != word1 || word0 != MAGIC) {
612 printk
613 ("GRU quicktest err: gru %d, found 0x%lx, expected 0x%lx\n",
614 gru->gs_gid, word1, MAGIC);
615 BUG(); /* ZZZ should not be fatal */
616 }
617
618 return 0;
619}
620
621
622int gru_kservices_init(struct gru_state *gru)
623{
624 struct gru_blade_state *bs;
625 struct gru_context_configuration_handle *cch;
626 unsigned long cbr_map, dsr_map;
627 int err, num, cpus_possible;
628
629 /*
630 * Currently, resources are reserved ONLY on the second chiplet
631 * on each blade. This leaves ALL resources on chiplet 0 available
632 * for user code.
633 */
634 bs = gru->gs_blade;
635 if (gru != &bs->bs_grus[1])
636 return 0;
637
638 cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
639
640 num = GRU_NUM_KERNEL_CBR * cpus_possible;
641 cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
642 gru->gs_reserved_cbrs += num;
643
644 num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
645 dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
646 gru->gs_reserved_dsr_bytes += num;
647
648 gru->gs_active_contexts++;
649 __set_bit(KERNEL_CTXNUM, &gru->gs_context_map);
650 cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
651
652 bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr,
653 KERNEL_CTXNUM, 0);
654 bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr,
655 KERNEL_CTXNUM, 0);
656
657 lock_cch_handle(cch);
658 cch->tfm_fault_bit_enable = 0;
659 cch->tlb_int_enable = 0;
660 cch->tfm_done_bit_enable = 0;
661 cch->unmap_enable = 1;
662 err = cch_allocate(cch, 0, cbr_map, dsr_map);
663 if (err) {
664 gru_dbg(grudev,
665 "Unable to allocate kernel CCH: gru %d, err %d\n",
666 gru->gs_gid, err);
667 BUG();
668 }
669 if (cch_start(cch)) {
670 gru_dbg(grudev, "Unable to start kernel CCH: gru %d, err %d\n",
671 gru->gs_gid, err);
672 BUG();
673 }
674 unlock_cch_handle(cch);
675
676 if (gru_options & GRU_QUICKLOOK)
677 quicktest(gru);
678 return 0;
679}
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
new file mode 100644
index 000000000000..eb17e0a3ac61
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -0,0 +1,134 @@
1
2/*
3 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19#ifndef __GRU_KSERVICES_H_
20#define __GRU_KSERVICES_H_
21
22
23/*
24 * Message queues using the GRU to send/receive messages.
25 *
26 * These function allow the user to create a message queue for
27 * sending/receiving 1 or 2 cacheline messages using the GRU.
28 *
29 * Processes SENDING messages will use a kernel CBR/DSR to send
30 * the message. This is transparent to the caller.
31 *
32 * The receiver does not use any GRU resources.
33 *
34 * The functions support:
35 * - single receiver
36 * - multiple senders
37 * - cross partition message
38 *
39 * Missing features ZZZ:
40 * - user options for dealing with timeouts, queue full, etc.
41 * - gru_create_message_queue() needs interrupt vector info
42 */
43
44/*
45 * Initialize a user allocated chunk of memory to be used as
46 * a message queue. The caller must ensure that the queue is
47 * in contiguous physical memory and is cacheline aligned.
48 *
49 * Message queue size is the total number of bytes allocated
50 * to the queue including a 2 cacheline header that is used
51 * to manage the queue.
52 *
53 * Input:
54 * p pointer to user allocated memory.
55 * bytes size of message queue in bytes
56 *
57 * Errors:
58 * 0 OK
59 * >0 error
60 */
61extern int gru_create_message_queue(void *p, unsigned int bytes);
62
63/*
64 * Send a message to a message queue.
65 *
66 * Note: The message queue transport mechanism uses the first 32
67 * bits of the message. Users should avoid using these bits.
68 *
69 *
70 * Input:
71 * xmq message queue - must be a UV global physical address
72 * mesg pointer to message. Must be 64-bit aligned
73 * bytes size of message in bytes
74 *
75 * Output:
76 * 0 message sent
77 * >0 Send failure - see error codes below
78 *
79 */
80extern int gru_send_message_gpa(unsigned long mq_gpa, void *mesg,
81 unsigned int bytes);
82
83/* Status values for gru_send_message() */
84#define MQE_OK 0 /* message sent successfully */
85#define MQE_CONGESTION 1 /* temporary congestion, try again */
86#define MQE_QUEUE_FULL 2 /* queue is full */
87#define MQE_UNEXPECTED_CB_ERR 3 /* unexpected CB error */
88#define MQE_PAGE_OVERFLOW 10 /* BUG - queue overflowed a page */
89#define MQE_BUG_NO_RESOURCES 11 /* BUG - could not alloc GRU cb/dsr */
90
91/*
92 * Advance the receive pointer for the message queue to the next message.
93 * Note: current API requires messages to be gotten & freed in order. Future
94 * API extensions may allow for out-of-order freeing.
95 *
96 * Input
97 * mq message queue
98 * mesq message being freed
99 */
100extern void gru_free_message(void *mq, void *mesq);
101
102/*
103 * Get next message from message queue. Returns pointer to
104 * message OR NULL if no message present.
105 * User must call gru_free_message() after message is processed
106 * in order to move the queue pointers to next message.
107 *
108 * Input
109 * mq message queue
110 *
111 * Output:
112 * p pointer to message
113 * NULL no message available
114 */
115extern void *gru_get_next_message(void *mq);
116
117
118/*
119 * Copy data using the GRU. Source or destination can be located in a remote
120 * partition.
121 *
122 * Input:
123 * dest_gpa destination global physical address
124 * src_gpa source global physical address
125 * bytes number of bytes to copy
126 *
127 * Output:
128 * 0 OK
129 * >0 error
130 */
131extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
132 unsigned int bytes);
133
134#endif /* __GRU_KSERVICES_H_ */
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
new file mode 100644
index 000000000000..e56e196a6998
--- /dev/null
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -0,0 +1,97 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2.1 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef __GRULIB_H__
20#define __GRULIB_H__
21
22#define GRU_BASENAME "gru"
23#define GRU_FULLNAME "/dev/gru"
24#define GRU_IOCTL_NUM 'G'
25
26/*
27 * Maximum number of GRU segments that a user can have open
28 * ZZZ temp - set high for testing. Revisit.
29 */
30#define GRU_MAX_OPEN_CONTEXTS 32
31
32/* Set Number of Request Blocks */
33#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *)
34
35/* Register task as using the slice */
36#define GRU_SET_TASK_SLICE _IOWR(GRU_IOCTL_NUM, 5, void *)
37
38/* Fetch exception detail */
39#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *)
40
41/* For user call_os handling - normally a TLB fault */
42#define GRU_USER_CALL_OS _IOWR(GRU_IOCTL_NUM, 8, void *)
43
44/* For user unload context */
45#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *)
46
47/* For fetching GRU chiplet status */
48#define GRU_GET_CHIPLET_STATUS _IOWR(GRU_IOCTL_NUM, 10, void *)
49
50/* For user TLB flushing (primarily for tests) */
51#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *)
52
53/* Get some config options (primarily for tests & emulator) */
54#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *)
55
56#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th))
57#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th))
58
59/*
60 * Structure used to pass TLB flush parameters to the driver
61 */
62struct gru_create_context_req {
63 unsigned long gseg;
64 unsigned int data_segment_bytes;
65 unsigned int control_blocks;
66 unsigned int maximum_thread_count;
67 unsigned int options;
68};
69
70/*
71 * Structure used to pass unload context parameters to the driver
72 */
73struct gru_unload_context_req {
74 unsigned long gseg;
75};
76
77/*
78 * Structure used to pass TLB flush parameters to the driver
79 */
80struct gru_flush_tlb_req {
81 unsigned long gseg;
82 unsigned long vaddr;
83 size_t len;
84};
85
86/*
87 * GRU configuration info (temp - for testing)
88 */
89struct gru_config_info {
90 int cpus;
91 int blades;
92 int nodes;
93 int chiplets;
94 int fill[16];
95};
96
97#endif /* __GRULIB_H__ */
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
new file mode 100644
index 000000000000..0eeb8dddd2f5
--- /dev/null
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -0,0 +1,802 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
5 *
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
9 *
10 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
11 */
12
13#include <linux/kernel.h>
14#include <linux/slab.h>
15#include <linux/mm.h>
16#include <linux/spinlock.h>
17#include <linux/sched.h>
18#include <linux/device.h>
19#include <linux/list.h>
20#include <asm/uv/uv_hub.h>
21#include "gru.h"
22#include "grutables.h"
23#include "gruhandles.h"
24
25unsigned long gru_options __read_mostly;
26
27static struct device_driver gru_driver = {
28 .name = "gru"
29};
30
31static struct device gru_device = {
32 .bus_id = {0},
33 .driver = &gru_driver,
34};
35
36struct device *grudev = &gru_device;
37
38/*
39 * Select a gru fault map to be used by the current cpu. Note that
40 * multiple cpus may be using the same map.
41 * ZZZ should "shift" be used?? Depends on HT cpu numbering
42 * ZZZ should be inline but did not work on emulator
43 */
44int gru_cpu_fault_map_id(void)
45{
46 return uv_blade_processor_id() % GRU_NUM_TFM;
47}
48
49/*--------- ASID Management -------------------------------------------
50 *
51 * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
52 * Once MAX is reached, flush the TLB & start over. However,
53 * some asids may still be in use. There won't be many (percentage wise) still
54 * in use. Search active contexts & determine the value of the first
55 * asid in use ("x"s below). Set "limit" to this value.
56 * This defines a block of assignable asids.
57 *
58 * When "limit" is reached, search forward from limit+1 and determine the
59 * next block of assignable asids.
60 *
61 * Repeat until MAX_ASID is reached, then start over again.
62 *
63 * Each time MAX_ASID is reached, increment the asid generation. Since
64 * the search for in-use asids only checks contexts with GRUs currently
65 * assigned, asids in some contexts will be missed. Prior to loading
66 * a context, the asid generation of the GTS asid is rechecked. If it
67 * doesn't match the current generation, a new asid will be assigned.
68 *
69 * 0---------------x------------x---------------------x----|
70 * ^-next ^-limit ^-MAX_ASID
71 *
72 * All asid manipulation & context loading/unloading is protected by the
73 * gs_lock.
74 */
75
76/* Hit the asid limit. Start over */
77static int gru_wrap_asid(struct gru_state *gru)
78{
79 gru_dbg(grudev, "gru %p\n", gru);
80 STAT(asid_wrap);
81 gru->gs_asid_gen++;
82 gru_flush_all_tlb(gru);
83 return MIN_ASID;
84}
85
86/* Find the next chunk of unused asids */
87static int gru_reset_asid_limit(struct gru_state *gru, int asid)
88{
89 int i, gid, inuse_asid, limit;
90
91 gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
92 STAT(asid_next);
93 limit = MAX_ASID;
94 if (asid >= limit)
95 asid = gru_wrap_asid(gru);
96 gid = gru->gs_gid;
97again:
98 for (i = 0; i < GRU_NUM_CCH; i++) {
99 if (!gru->gs_gts[i])
100 continue;
101 inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
102 gru_dbg(grudev, "gru %p, inuse_asid 0x%x, cxtnum %d, gts %p\n",
103 gru, inuse_asid, i, gru->gs_gts[i]);
104 if (inuse_asid == asid) {
105 asid += ASID_INC;
106 if (asid >= limit) {
107 /*
108 * empty range: reset the range limit and
109 * start over
110 */
111 limit = MAX_ASID;
112 if (asid >= MAX_ASID)
113 asid = gru_wrap_asid(gru);
114 goto again;
115 }
116 }
117
118 if ((inuse_asid > asid) && (inuse_asid < limit))
119 limit = inuse_asid;
120 }
121 gru->gs_asid_limit = limit;
122 gru->gs_asid = asid;
123 gru_dbg(grudev, "gru %p, new asid 0x%x, new_limit 0x%x\n", gru, asid,
124 limit);
125 return asid;
126}
127
128/* Assign a new ASID to a thread context. */
129static int gru_assign_asid(struct gru_state *gru)
130{
131 int asid;
132
133 spin_lock(&gru->gs_asid_lock);
134 gru->gs_asid += ASID_INC;
135 asid = gru->gs_asid;
136 if (asid >= gru->gs_asid_limit)
137 asid = gru_reset_asid_limit(gru, asid);
138 spin_unlock(&gru->gs_asid_lock);
139
140 gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
141 return asid;
142}
143
144/*
145 * Clear n bits in a word. Return a word indicating the bits that were cleared.
146 * Optionally, build an array of chars that contain the bit numbers allocated.
147 */
148static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
149 char *idx)
150{
151 unsigned long bits = 0;
152 int i;
153
154 do {
155 i = find_first_bit(p, mmax);
156 if (i == mmax)
157 BUG();
158 __clear_bit(i, p);
159 __set_bit(i, &bits);
160 if (idx)
161 *idx++ = i;
162 } while (--n);
163 return bits;
164}
165
166unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
167 char *cbmap)
168{
169 return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
170 cbmap);
171}
172
173unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
174 char *dsmap)
175{
176 return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
177 dsmap);
178}
179
180static void reserve_gru_resources(struct gru_state *gru,
181 struct gru_thread_state *gts)
182{
183 gru->gs_active_contexts++;
184 gts->ts_cbr_map =
185 gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
186 gts->ts_cbr_idx);
187 gts->ts_dsr_map =
188 gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
189}
190
191static void free_gru_resources(struct gru_state *gru,
192 struct gru_thread_state *gts)
193{
194 gru->gs_active_contexts--;
195 gru->gs_cbr_map |= gts->ts_cbr_map;
196 gru->gs_dsr_map |= gts->ts_dsr_map;
197}
198
199/*
200 * Check if a GRU has sufficient free resources to satisfy an allocation
201 * request. Note: GRU locks may or may not be held when this is called. If
202 * not held, recheck after acquiring the appropriate locks.
203 *
204 * Returns 1 if sufficient resources, 0 if not
205 */
206static int check_gru_resources(struct gru_state *gru, int cbr_au_count,
207 int dsr_au_count, int max_active_contexts)
208{
209 return hweight64(gru->gs_cbr_map) >= cbr_au_count
210 && hweight64(gru->gs_dsr_map) >= dsr_au_count
211 && gru->gs_active_contexts < max_active_contexts;
212}
213
214/*
215 * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
216 * context.
217 */
218static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms,
219 int ctxnum)
220{
221 struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
222 unsigned short ctxbitmap = (1 << ctxnum);
223 int asid;
224
225 spin_lock(&gms->ms_asid_lock);
226 asid = asids->mt_asid;
227
228 if (asid == 0 || asids->mt_asid_gen != gru->gs_asid_gen) {
229 asid = gru_assign_asid(gru);
230 asids->mt_asid = asid;
231 asids->mt_asid_gen = gru->gs_asid_gen;
232 STAT(asid_new);
233 } else {
234 STAT(asid_reuse);
235 }
236
237 BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
238 asids->mt_ctxbitmap |= ctxbitmap;
239 if (!test_bit(gru->gs_gid, gms->ms_asidmap))
240 __set_bit(gru->gs_gid, gms->ms_asidmap);
241 spin_unlock(&gms->ms_asid_lock);
242
243 gru_dbg(grudev,
244 "gru %x, gms %p, ctxnum 0x%d, asid 0x%x, asidmap 0x%lx\n",
245 gru->gs_gid, gms, ctxnum, asid, gms->ms_asidmap[0]);
246 return asid;
247}
248
249static void gru_unload_mm_tracker(struct gru_state *gru,
250 struct gru_mm_struct *gms, int ctxnum)
251{
252 struct gru_mm_tracker *asids;
253 unsigned short ctxbitmap;
254
255 asids = &gms->ms_asids[gru->gs_gid];
256 ctxbitmap = (1 << ctxnum);
257 spin_lock(&gms->ms_asid_lock);
258 BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
259 asids->mt_ctxbitmap ^= ctxbitmap;
260 gru_dbg(grudev, "gru %x, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
261 gru->gs_gid, gms, ctxnum, gms->ms_asidmap[0]);
262 spin_unlock(&gms->ms_asid_lock);
263}
264
265/*
266 * Decrement the reference count on a GTS structure. Free the structure
267 * if the reference count goes to zero.
268 */
269void gts_drop(struct gru_thread_state *gts)
270{
271 if (gts && atomic_dec_return(&gts->ts_refcnt) == 0) {
272 gru_drop_mmu_notifier(gts->ts_gms);
273 kfree(gts);
274 STAT(gts_free);
275 }
276}
277
278/*
279 * Locate the GTS structure for the current thread.
280 */
281static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
282 *vdata, int tsid)
283{
284 struct gru_thread_state *gts;
285
286 list_for_each_entry(gts, &vdata->vd_head, ts_next)
287 if (gts->ts_tsid == tsid)
288 return gts;
289 return NULL;
290}
291
292/*
293 * Allocate a thread state structure.
294 */
295static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
296 struct gru_vma_data *vdata,
297 int tsid)
298{
299 struct gru_thread_state *gts;
300 int bytes;
301
302 bytes = DSR_BYTES(vdata->vd_dsr_au_count) +
303 CBR_BYTES(vdata->vd_cbr_au_count);
304 bytes += sizeof(struct gru_thread_state);
305 gts = kzalloc(bytes, GFP_KERNEL);
306 if (!gts)
307 return NULL;
308
309 STAT(gts_alloc);
310 atomic_set(&gts->ts_refcnt, 1);
311 mutex_init(&gts->ts_ctxlock);
312 gts->ts_cbr_au_count = vdata->vd_cbr_au_count;
313 gts->ts_dsr_au_count = vdata->vd_dsr_au_count;
314 gts->ts_user_options = vdata->vd_user_options;
315 gts->ts_tsid = tsid;
316 gts->ts_user_options = vdata->vd_user_options;
317 gts->ts_ctxnum = NULLCTX;
318 gts->ts_mm = current->mm;
319 gts->ts_vma = vma;
320 gts->ts_tlb_int_select = -1;
321 gts->ts_gms = gru_register_mmu_notifier();
322 if (!gts->ts_gms)
323 goto err;
324
325 gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts);
326 return gts;
327
328err:
329 gts_drop(gts);
330 return NULL;
331}
332
333/*
334 * Allocate a vma private data structure.
335 */
336struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
337{
338 struct gru_vma_data *vdata = NULL;
339
340 vdata = kmalloc(sizeof(*vdata), GFP_KERNEL);
341 if (!vdata)
342 return NULL;
343
344 INIT_LIST_HEAD(&vdata->vd_head);
345 spin_lock_init(&vdata->vd_lock);
346 gru_dbg(grudev, "alloc vdata %p\n", vdata);
347 return vdata;
348}
349
350/*
351 * Find the thread state structure for the current thread.
352 */
353struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma,
354 int tsid)
355{
356 struct gru_vma_data *vdata = vma->vm_private_data;
357 struct gru_thread_state *gts;
358
359 spin_lock(&vdata->vd_lock);
360 gts = gru_find_current_gts_nolock(vdata, tsid);
361 spin_unlock(&vdata->vd_lock);
362 gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
363 return gts;
364}
365
366/*
367 * Allocate a new thread state for a GSEG. Note that races may allow
368 * another thread to race to create a gts.
369 */
370struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
371 int tsid)
372{
373 struct gru_vma_data *vdata = vma->vm_private_data;
374 struct gru_thread_state *gts, *ngts;
375
376 gts = gru_alloc_gts(vma, vdata, tsid);
377 if (!gts)
378 return NULL;
379
380 spin_lock(&vdata->vd_lock);
381 ngts = gru_find_current_gts_nolock(vdata, tsid);
382 if (ngts) {
383 gts_drop(gts);
384 gts = ngts;
385 STAT(gts_double_allocate);
386 } else {
387 list_add(&gts->ts_next, &vdata->vd_head);
388 }
389 spin_unlock(&vdata->vd_lock);
390 gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
391 return gts;
392}
393
394/*
395 * Free the GRU context assigned to the thread state.
396 */
397static void gru_free_gru_context(struct gru_thread_state *gts)
398{
399 struct gru_state *gru;
400
401 gru = gts->ts_gru;
402 gru_dbg(grudev, "gts %p, gru %p\n", gts, gru);
403
404 spin_lock(&gru->gs_lock);
405 gru->gs_gts[gts->ts_ctxnum] = NULL;
406 free_gru_resources(gru, gts);
407 BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0);
408 __clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
409 gts->ts_ctxnum = NULLCTX;
410 gts->ts_gru = NULL;
411 spin_unlock(&gru->gs_lock);
412
413 gts_drop(gts);
414 STAT(free_context);
415}
416
417/*
418 * Prefetching cachelines help hardware performance.
419 * (Strictly a performance enhancement. Not functionally required).
420 */
421static void prefetch_data(void *p, int num, int stride)
422{
423 while (num-- > 0) {
424 prefetchw(p);
425 p += stride;
426 }
427}
428
429static inline long gru_copy_handle(void *d, void *s)
430{
431 memcpy(d, s, GRU_HANDLE_BYTES);
432 return GRU_HANDLE_BYTES;
433}
434
435/* rewrite in assembly & use lots of prefetch */
436static void gru_load_context_data(void *save, void *grubase, int ctxnum,
437 unsigned long cbrmap, unsigned long dsrmap)
438{
439 void *gseg, *cb, *cbe;
440 unsigned long length;
441 int i, scr;
442
443 gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
444 length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
445 prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
446 GRU_CACHE_LINE_BYTES);
447
448 cb = gseg + GRU_CB_BASE;
449 cbe = grubase + GRU_CBE_BASE;
450 for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
451 prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
452 prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
453 GRU_CACHE_LINE_BYTES);
454 cb += GRU_HANDLE_STRIDE;
455 }
456
457 cb = gseg + GRU_CB_BASE;
458 for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
459 save += gru_copy_handle(cb, save);
460 save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
461 cb += GRU_HANDLE_STRIDE;
462 }
463
464 memcpy(gseg + GRU_DS_BASE, save, length);
465}
466
467static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
468 unsigned long cbrmap, unsigned long dsrmap)
469{
470 void *gseg, *cb, *cbe;
471 unsigned long length;
472 int i, scr;
473
474 gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
475
476 cb = gseg + GRU_CB_BASE;
477 cbe = grubase + GRU_CBE_BASE;
478 for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
479 save += gru_copy_handle(save, cb);
480 save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
481 cb += GRU_HANDLE_STRIDE;
482 }
483 length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
484 memcpy(save, gseg + GRU_DS_BASE, length);
485}
486
487void gru_unload_context(struct gru_thread_state *gts, int savestate)
488{
489 struct gru_state *gru = gts->ts_gru;
490 struct gru_context_configuration_handle *cch;
491 int ctxnum = gts->ts_ctxnum;
492
493 zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
494 cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
495
496 lock_cch_handle(cch);
497 if (cch_interrupt_sync(cch))
498 BUG();
499 gru_dbg(grudev, "gts %p\n", gts);
500
501 gru_unload_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
502 if (savestate)
503 gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
504 ctxnum, gts->ts_cbr_map,
505 gts->ts_dsr_map);
506
507 if (cch_deallocate(cch))
508 BUG();
509 gts->ts_force_unload = 0; /* ts_force_unload locked by CCH lock */
510 unlock_cch_handle(cch);
511
512 gru_free_gru_context(gts);
513 STAT(unload_context);
514}
515
516/*
517 * Load a GRU context by copying it from the thread data structure in memory
518 * to the GRU.
519 */
520static void gru_load_context(struct gru_thread_state *gts)
521{
522 struct gru_state *gru = gts->ts_gru;
523 struct gru_context_configuration_handle *cch;
524 int err, asid, ctxnum = gts->ts_ctxnum;
525
526 gru_dbg(grudev, "gts %p\n", gts);
527 cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
528
529 lock_cch_handle(cch);
530 asid = gru_load_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
531 cch->tfm_fault_bit_enable =
532 (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
533 || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
534 cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
535 if (cch->tlb_int_enable) {
536 gts->ts_tlb_int_select = gru_cpu_fault_map_id();
537 cch->tlb_int_select = gts->ts_tlb_int_select;
538 }
539 cch->tfm_done_bit_enable = 0;
540 err = cch_allocate(cch, asid, gts->ts_cbr_map, gts->ts_dsr_map);
541 if (err) {
542 gru_dbg(grudev,
543 "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
544 err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map);
545 BUG();
546 }
547
548 gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
549 gts->ts_cbr_map, gts->ts_dsr_map);
550
551 if (cch_start(cch))
552 BUG();
553 unlock_cch_handle(cch);
554
555 STAT(load_context);
556}
557
558/*
559 * Update fields in an active CCH:
560 * - retarget interrupts on local blade
561 * - force a delayed context unload by clearing the CCH asids. This
562 * forces TLB misses for new GRU instructions. The context is unloaded
563 * when the next TLB miss occurs.
564 */
565static int gru_update_cch(struct gru_thread_state *gts, int int_select)
566{
567 struct gru_context_configuration_handle *cch;
568 struct gru_state *gru = gts->ts_gru;
569 int i, ctxnum = gts->ts_ctxnum, ret = 0;
570
571 cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
572
573 lock_cch_handle(cch);
574 if (cch->state == CCHSTATE_ACTIVE) {
575 if (gru->gs_gts[gts->ts_ctxnum] != gts)
576 goto exit;
577 if (cch_interrupt(cch))
578 BUG();
579 if (int_select >= 0) {
580 gts->ts_tlb_int_select = int_select;
581 cch->tlb_int_select = int_select;
582 } else {
583 for (i = 0; i < 8; i++)
584 cch->asid[i] = 0;
585 cch->tfm_fault_bit_enable = 0;
586 cch->tlb_int_enable = 0;
587 gts->ts_force_unload = 1;
588 }
589 if (cch_start(cch))
590 BUG();
591 ret = 1;
592 }
593exit:
594 unlock_cch_handle(cch);
595 return ret;
596}
597
598/*
599 * Update CCH tlb interrupt select. Required when all the following is true:
600 * - task's GRU context is loaded into a GRU
601 * - task is using interrupt notification for TLB faults
602 * - task has migrated to a different cpu on the same blade where
603 * it was previously running.
604 */
605static int gru_retarget_intr(struct gru_thread_state *gts)
606{
607 if (gts->ts_tlb_int_select < 0
608 || gts->ts_tlb_int_select == gru_cpu_fault_map_id())
609 return 0;
610
611 gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
612 gru_cpu_fault_map_id());
613 return gru_update_cch(gts, gru_cpu_fault_map_id());
614}
615
616
617/*
618 * Insufficient GRU resources available on the local blade. Steal a context from
619 * a process. This is a hack until a _real_ resource scheduler is written....
620 */
621#define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0)
622#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \
623 ((g)+1) : &(b)->bs_grus[0])
624
625static void gru_steal_context(struct gru_thread_state *gts)
626{
627 struct gru_blade_state *blade;
628 struct gru_state *gru, *gru0;
629 struct gru_thread_state *ngts = NULL;
630 int ctxnum, ctxnum0, flag = 0, cbr, dsr;
631
632 cbr = gts->ts_cbr_au_count;
633 dsr = gts->ts_dsr_au_count;
634
635 preempt_disable();
636 blade = gru_base[uv_numa_blade_id()];
637 spin_lock(&blade->bs_lock);
638
639 ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
640 gru = blade->bs_lru_gru;
641 if (ctxnum == 0)
642 gru = next_gru(blade, gru);
643 ctxnum0 = ctxnum;
644 gru0 = gru;
645 while (1) {
646 if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
647 break;
648 spin_lock(&gru->gs_lock);
649 for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
650 if (flag && gru == gru0 && ctxnum == ctxnum0)
651 break;
652 ngts = gru->gs_gts[ctxnum];
653 /*
654 * We are grabbing locks out of order, so trylock is
655 * needed. GTSs are usually not locked, so the odds of
656 * success are high. If trylock fails, try to steal a
657 * different GSEG.
658 */
659 if (ngts && mutex_trylock(&ngts->ts_ctxlock))
660 break;
661 ngts = NULL;
662 flag = 1;
663 }
664 spin_unlock(&gru->gs_lock);
665 if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
666 break;
667 ctxnum = 0;
668 gru = next_gru(blade, gru);
669 }
670 blade->bs_lru_gru = gru;
671 blade->bs_lru_ctxnum = ctxnum;
672 spin_unlock(&blade->bs_lock);
673 preempt_enable();
674
675 if (ngts) {
676 STAT(steal_context);
677 ngts->ts_steal_jiffies = jiffies;
678 gru_unload_context(ngts, 1);
679 mutex_unlock(&ngts->ts_ctxlock);
680 } else {
681 STAT(steal_context_failed);
682 }
683 gru_dbg(grudev,
684 "stole gru %x, ctxnum %d from gts %p. Need cb %d, ds %d;"
685 " avail cb %ld, ds %ld\n",
686 gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
687 hweight64(gru->gs_dsr_map));
688}
689
690/*
691 * Scan the GRUs on the local blade & assign a GRU context.
692 */
693static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
694{
695 struct gru_state *gru, *grux;
696 int i, max_active_contexts;
697
698 preempt_disable();
699
700again:
701 gru = NULL;
702 max_active_contexts = GRU_NUM_CCH;
703 for_each_gru_on_blade(grux, uv_numa_blade_id(), i) {
704 if (check_gru_resources(grux, gts->ts_cbr_au_count,
705 gts->ts_dsr_au_count,
706 max_active_contexts)) {
707 gru = grux;
708 max_active_contexts = grux->gs_active_contexts;
709 if (max_active_contexts == 0)
710 break;
711 }
712 }
713
714 if (gru) {
715 spin_lock(&gru->gs_lock);
716 if (!check_gru_resources(gru, gts->ts_cbr_au_count,
717 gts->ts_dsr_au_count, GRU_NUM_CCH)) {
718 spin_unlock(&gru->gs_lock);
719 goto again;
720 }
721 reserve_gru_resources(gru, gts);
722 gts->ts_gru = gru;
723 gts->ts_ctxnum =
724 find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
725 BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH);
726 atomic_inc(&gts->ts_refcnt);
727 gru->gs_gts[gts->ts_ctxnum] = gts;
728 __set_bit(gts->ts_ctxnum, &gru->gs_context_map);
729 spin_unlock(&gru->gs_lock);
730
731 STAT(assign_context);
732 gru_dbg(grudev,
733 "gseg %p, gts %p, gru %x, ctx %d, cbr %d, dsr %d\n",
734 gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
735 gts->ts_gru->gs_gid, gts->ts_ctxnum,
736 gts->ts_cbr_au_count, gts->ts_dsr_au_count);
737 } else {
738 gru_dbg(grudev, "failed to allocate a GTS %s\n", "");
739 STAT(assign_context_failed);
740 }
741
742 preempt_enable();
743 return gru;
744}
745
746/*
747 * gru_nopage
748 *
749 * Map the user's GRU segment
750 *
751 * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
752 */
753int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
754{
755 struct gru_thread_state *gts;
756 unsigned long paddr, vaddr;
757
758 vaddr = (unsigned long)vmf->virtual_address;
759 gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
760 vma, vaddr, GSEG_BASE(vaddr));
761 STAT(nopfn);
762
763 /* The following check ensures vaddr is a valid address in the VMA */
764 gts = gru_find_thread_state(vma, TSID(vaddr, vma));
765 if (!gts)
766 return VM_FAULT_SIGBUS;
767
768again:
769 preempt_disable();
770 mutex_lock(&gts->ts_ctxlock);
771 if (gts->ts_gru) {
772 if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) {
773 STAT(migrated_nopfn_unload);
774 gru_unload_context(gts, 1);
775 } else {
776 if (gru_retarget_intr(gts))
777 STAT(migrated_nopfn_retarget);
778 }
779 }
780
781 if (!gts->ts_gru) {
782 if (!gru_assign_gru_context(gts)) {
783 mutex_unlock(&gts->ts_ctxlock);
784 preempt_enable();
785 schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
786 if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
787 gru_steal_context(gts);
788 goto again;
789 }
790 gru_load_context(gts);
791 paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum);
792 remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1),
793 paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE,
794 vma->vm_page_prot);
795 }
796
797 mutex_unlock(&gts->ts_ctxlock);
798 preempt_enable();
799
800 return VM_FAULT_NOPAGE;
801}
802
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
new file mode 100644
index 000000000000..533923f83f1a
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -0,0 +1,336 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * PROC INTERFACES
5 *
6 * This file supports the /proc interfaces for the GRU driver
7 *
8 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25#include <linux/proc_fs.h>
26#include <linux/device.h>
27#include <linux/seq_file.h>
28#include <linux/uaccess.h>
29#include "gru.h"
30#include "grulib.h"
31#include "grutables.h"
32
33#define printstat(s, f) printstat_val(s, &gru_stats.f, #f)
34
35static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id)
36{
37 unsigned long val = atomic_long_read(v);
38
39 if (val)
40 seq_printf(s, "%16lu %s\n", val, id);
41}
42
43static int statistics_show(struct seq_file *s, void *p)
44{
45 printstat(s, vdata_alloc);
46 printstat(s, vdata_free);
47 printstat(s, gts_alloc);
48 printstat(s, gts_free);
49 printstat(s, vdata_double_alloc);
50 printstat(s, gts_double_allocate);
51 printstat(s, assign_context);
52 printstat(s, assign_context_failed);
53 printstat(s, free_context);
54 printstat(s, load_context);
55 printstat(s, unload_context);
56 printstat(s, steal_context);
57 printstat(s, steal_context_failed);
58 printstat(s, nopfn);
59 printstat(s, break_cow);
60 printstat(s, asid_new);
61 printstat(s, asid_next);
62 printstat(s, asid_wrap);
63 printstat(s, asid_reuse);
64 printstat(s, intr);
65 printstat(s, call_os);
66 printstat(s, call_os_check_for_bug);
67 printstat(s, call_os_wait_queue);
68 printstat(s, user_flush_tlb);
69 printstat(s, user_unload_context);
70 printstat(s, user_exception);
71 printstat(s, set_task_slice);
72 printstat(s, migrate_check);
73 printstat(s, migrated_retarget);
74 printstat(s, migrated_unload);
75 printstat(s, migrated_unload_delay);
76 printstat(s, migrated_nopfn_retarget);
77 printstat(s, migrated_nopfn_unload);
78 printstat(s, tlb_dropin);
79 printstat(s, tlb_dropin_fail_no_asid);
80 printstat(s, tlb_dropin_fail_upm);
81 printstat(s, tlb_dropin_fail_invalid);
82 printstat(s, tlb_dropin_fail_range_active);
83 printstat(s, tlb_dropin_fail_idle);
84 printstat(s, tlb_dropin_fail_fmm);
85 printstat(s, mmu_invalidate_range);
86 printstat(s, mmu_invalidate_page);
87 printstat(s, mmu_clear_flush_young);
88 printstat(s, flush_tlb);
89 printstat(s, flush_tlb_gru);
90 printstat(s, flush_tlb_gru_tgh);
91 printstat(s, flush_tlb_gru_zero_asid);
92 printstat(s, copy_gpa);
93 printstat(s, mesq_receive);
94 printstat(s, mesq_receive_none);
95 printstat(s, mesq_send);
96 printstat(s, mesq_send_failed);
97 printstat(s, mesq_noop);
98 printstat(s, mesq_send_unexpected_error);
99 printstat(s, mesq_send_lb_overflow);
100 printstat(s, mesq_send_qlimit_reached);
101 printstat(s, mesq_send_amo_nacked);
102 printstat(s, mesq_send_put_nacked);
103 printstat(s, mesq_qf_not_full);
104 printstat(s, mesq_qf_locked);
105 printstat(s, mesq_qf_noop_not_full);
106 printstat(s, mesq_qf_switch_head_failed);
107 printstat(s, mesq_qf_unexpected_error);
108 printstat(s, mesq_noop_unexpected_error);
109 printstat(s, mesq_noop_lb_overflow);
110 printstat(s, mesq_noop_qlimit_reached);
111 printstat(s, mesq_noop_amo_nacked);
112 printstat(s, mesq_noop_put_nacked);
113 return 0;
114}
115
116static ssize_t statistics_write(struct file *file, const char __user *userbuf,
117 size_t count, loff_t *data)
118{
119 memset(&gru_stats, 0, sizeof(gru_stats));
120 return count;
121}
122
123static int options_show(struct seq_file *s, void *p)
124{
125 seq_printf(s, "0x%lx\n", gru_options);
126 return 0;
127}
128
129static ssize_t options_write(struct file *file, const char __user *userbuf,
130 size_t count, loff_t *data)
131{
132 unsigned long val;
133 char buf[80];
134
135 if (copy_from_user
136 (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
137 return -EFAULT;
138 if (!strict_strtoul(buf, 10, &val))
139 gru_options = val;
140
141 return count;
142}
143
144static int cch_seq_show(struct seq_file *file, void *data)
145{
146 long gid = *(long *)data;
147 int i;
148 struct gru_state *gru = GID_TO_GRU(gid);
149 struct gru_thread_state *ts;
150 const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" };
151
152 if (gid == 0)
153 seq_printf(file, "#%5s%5s%6s%9s%6s%8s%8s\n", "gid", "bid",
154 "ctx#", "pid", "cbrs", "dsbytes", "mode");
155 if (gru)
156 for (i = 0; i < GRU_NUM_CCH; i++) {
157 ts = gru->gs_gts[i];
158 if (!ts)
159 continue;
160 seq_printf(file, " %5d%5d%6d%9d%6d%8d%8s\n",
161 gru->gs_gid, gru->gs_blade_id, i,
162 ts->ts_tgid_owner,
163 ts->ts_cbr_au_count * GRU_CBR_AU_SIZE,
164 ts->ts_cbr_au_count * GRU_DSR_AU_BYTES,
165 mode[ts->ts_user_options &
166 GRU_OPT_MISS_MASK]);
167 }
168
169 return 0;
170}
171
172static int gru_seq_show(struct seq_file *file, void *data)
173{
174 long gid = *(long *)data, ctxfree, cbrfree, dsrfree;
175 struct gru_state *gru = GID_TO_GRU(gid);
176
177 if (gid == 0) {
178 seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid",
179 "ctx", "cbr", "dsr", "ctx", "cbr", "dsr");
180 seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy",
181 "busy", "busy", "free", "free", "free");
182 }
183 if (gru) {
184 ctxfree = GRU_NUM_CCH - gru->gs_active_contexts;
185 cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
186 dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
187 seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n",
188 gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree,
189 GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree,
190 ctxfree, cbrfree, dsrfree);
191 }
192
193 return 0;
194}
195
196static void seq_stop(struct seq_file *file, void *data)
197{
198}
199
200static void *seq_start(struct seq_file *file, loff_t *gid)
201{
202 if (*gid < GRU_MAX_GRUS)
203 return gid;
204 return NULL;
205}
206
207static void *seq_next(struct seq_file *file, void *data, loff_t *gid)
208{
209 (*gid)++;
210 if (*gid < GRU_MAX_GRUS)
211 return gid;
212 return NULL;
213}
214
215static const struct seq_operations cch_seq_ops = {
216 .start = seq_start,
217 .next = seq_next,
218 .stop = seq_stop,
219 .show = cch_seq_show
220};
221
222static const struct seq_operations gru_seq_ops = {
223 .start = seq_start,
224 .next = seq_next,
225 .stop = seq_stop,
226 .show = gru_seq_show
227};
228
229static int statistics_open(struct inode *inode, struct file *file)
230{
231 return single_open(file, statistics_show, NULL);
232}
233
234static int options_open(struct inode *inode, struct file *file)
235{
236 return single_open(file, options_show, NULL);
237}
238
239static int cch_open(struct inode *inode, struct file *file)
240{
241 return seq_open(file, &cch_seq_ops);
242}
243
244static int gru_open(struct inode *inode, struct file *file)
245{
246 return seq_open(file, &gru_seq_ops);
247}
248
249/* *INDENT-OFF* */
250static const struct file_operations statistics_fops = {
251 .open = statistics_open,
252 .read = seq_read,
253 .write = statistics_write,
254 .llseek = seq_lseek,
255 .release = single_release,
256};
257
258static const struct file_operations options_fops = {
259 .open = options_open,
260 .read = seq_read,
261 .write = options_write,
262 .llseek = seq_lseek,
263 .release = single_release,
264};
265
266static const struct file_operations cch_fops = {
267 .open = cch_open,
268 .read = seq_read,
269 .llseek = seq_lseek,
270 .release = seq_release,
271};
272static const struct file_operations gru_fops = {
273 .open = gru_open,
274 .read = seq_read,
275 .llseek = seq_lseek,
276 .release = seq_release,
277};
278
279static struct proc_entry {
280 char *name;
281 int mode;
282 const struct file_operations *fops;
283 struct proc_dir_entry *entry;
284} proc_files[] = {
285 {"statistics", 0644, &statistics_fops},
286 {"debug_options", 0644, &options_fops},
287 {"cch_status", 0444, &cch_fops},
288 {"gru_status", 0444, &gru_fops},
289 {NULL}
290};
291/* *INDENT-ON* */
292
293static struct proc_dir_entry *proc_gru __read_mostly;
294
295static int create_proc_file(struct proc_entry *p)
296{
297 p->entry = create_proc_entry(p->name, p->mode, proc_gru);
298 if (!p->entry)
299 return -1;
300 p->entry->proc_fops = p->fops;
301 return 0;
302}
303
304static void delete_proc_files(void)
305{
306 struct proc_entry *p;
307
308 if (proc_gru) {
309 for (p = proc_files; p->name; p++)
310 if (p->entry)
311 remove_proc_entry(p->name, proc_gru);
312 remove_proc_entry("gru", NULL);
313 }
314}
315
316int gru_proc_init(void)
317{
318 struct proc_entry *p;
319
320 proc_mkdir("sgi_uv", NULL);
321 proc_gru = proc_mkdir("sgi_uv/gru", NULL);
322
323 for (p = proc_files; p->name; p++)
324 if (create_proc_file(p))
325 goto err;
326 return 0;
327
328err:
329 delete_proc_files();
330 return -1;
331}
332
333void gru_proc_exit(void)
334{
335 delete_proc_files();
336}
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
new file mode 100644
index 000000000000..4251018f70ff
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -0,0 +1,609 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * GRU DRIVER TABLES, MACROS, externs, etc
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#ifndef __GRUTABLES_H__
24#define __GRUTABLES_H__
25
26/*
27 * GRU Chiplet:
28 * The GRU is a user addressible memory accelerator. It provides
29 * several forms of load, store, memset, bcopy instructions. In addition, it
30 * contains special instructions for AMOs, sending messages to message
31 * queues, etc.
32 *
33 * The GRU is an integral part of the node controller. It connects
34 * directly to the cpu socket. In its current implementation, there are 2
35 * GRU chiplets in the node controller on each blade (~node).
36 *
37 * The entire GRU memory space is fully coherent and cacheable by the cpus.
38 *
39 * Each GRU chiplet has a physical memory map that looks like the following:
40 *
41 * +-----------------+
42 * |/////////////////|
43 * |/////////////////|
44 * |/////////////////|
45 * |/////////////////|
46 * |/////////////////|
47 * |/////////////////|
48 * |/////////////////|
49 * |/////////////////|
50 * +-----------------+
51 * | system control |
52 * +-----------------+ _______ +-------------+
53 * |/////////////////| / | |
54 * |/////////////////| / | |
55 * |/////////////////| / | instructions|
56 * |/////////////////| / | |
57 * |/////////////////| / | |
58 * |/////////////////| / |-------------|
59 * |/////////////////| / | |
60 * +-----------------+ | |
61 * | context 15 | | data |
62 * +-----------------+ | |
63 * | ...... | \ | |
64 * +-----------------+ \____________ +-------------+
65 * | context 1 |
66 * +-----------------+
67 * | context 0 |
68 * +-----------------+
69 *
70 * Each of the "contexts" is a chunk of memory that can be mmaped into user
71 * space. The context consists of 2 parts:
72 *
73 * - an instruction space that can be directly accessed by the user
74 * to issue GRU instructions and to check instruction status.
75 *
76 * - a data area that acts as normal RAM.
77 *
78 * User instructions contain virtual addresses of data to be accessed by the
79 * GRU. The GRU contains a TLB that is used to convert these user virtual
80 * addresses to physical addresses.
81 *
82 * The "system control" area of the GRU chiplet is used by the kernel driver
83 * to manage user contexts and to perform functions such as TLB dropin and
84 * purging.
85 *
86 * One context may be reserved for the kernel and used for cross-partition
87 * communication. The GRU will also be used to asynchronously zero out
88 * large blocks of memory (not currently implemented).
89 *
90 *
91 * Tables:
92 *
93 * VDATA-VMA Data - Holds a few parameters. Head of linked list of
94 * GTS tables for threads using the GSEG
95 * GTS - Gru Thread State - contains info for managing a GSEG context. A
96 * GTS is allocated for each thread accessing a
97 * GSEG.
98 * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is
99 * not loaded into a GRU
100 * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs
101 * where a GSEG has been loaded. Similar to
102 * an mm_struct but for GRU.
103 *
104 * GS - GRU State - Used to manage the state of a GRU chiplet
105 * BS - Blade State - Used to manage state of all GRU chiplets
106 * on a blade
107 *
108 *
109 * Normal task tables for task using GRU.
110 * - 2 threads in process
111 * - 2 GSEGs open in process
112 * - GSEG1 is being used by both threads
113 * - GSEG2 is used only by thread 2
114 *
115 * task -->|
116 * task ---+---> mm ->------ (notifier) -------+-> gms
117 * | |
118 * |--> vma -> vdata ---> gts--->| GSEG1 (thread1)
119 * | | |
120 * | +-> gts--->| GSEG1 (thread2)
121 * | |
122 * |--> vma -> vdata ---> gts--->| GSEG2 (thread2)
123 * .
124 * .
125 *
126 * GSEGs are marked DONTCOPY on fork
127 *
128 * At open
129 * file.private_data -> NULL
130 *
131 * At mmap,
132 * vma -> vdata
133 *
134 * After gseg reference
135 * vma -> vdata ->gts
136 *
137 * After fork
138 * parent
139 * vma -> vdata -> gts
140 * child
141 * (vma is not copied)
142 *
143 */
144
145#include <linux/rmap.h>
146#include <linux/interrupt.h>
147#include <linux/mutex.h>
148#include <linux/wait.h>
149#include <linux/mmu_notifier.h>
150#include "gru.h"
151#include "gruhandles.h"
152
153extern struct gru_stats_s gru_stats;
154extern struct gru_blade_state *gru_base[];
155extern unsigned long gru_start_paddr, gru_end_paddr;
156
157#define GRU_MAX_BLADES MAX_NUMNODES
158#define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE)
159
160#define GRU_DRIVER_ID_STR "SGI GRU Device Driver"
161#define GRU_DRIVER_VERSION_STR "0.80"
162
163/*
164 * GRU statistics.
165 */
166struct gru_stats_s {
167 atomic_long_t vdata_alloc;
168 atomic_long_t vdata_free;
169 atomic_long_t gts_alloc;
170 atomic_long_t gts_free;
171 atomic_long_t vdata_double_alloc;
172 atomic_long_t gts_double_allocate;
173 atomic_long_t assign_context;
174 atomic_long_t assign_context_failed;
175 atomic_long_t free_context;
176 atomic_long_t load_context;
177 atomic_long_t unload_context;
178 atomic_long_t steal_context;
179 atomic_long_t steal_context_failed;
180 atomic_long_t nopfn;
181 atomic_long_t break_cow;
182 atomic_long_t asid_new;
183 atomic_long_t asid_next;
184 atomic_long_t asid_wrap;
185 atomic_long_t asid_reuse;
186 atomic_long_t intr;
187 atomic_long_t call_os;
188 atomic_long_t call_os_check_for_bug;
189 atomic_long_t call_os_wait_queue;
190 atomic_long_t user_flush_tlb;
191 atomic_long_t user_unload_context;
192 atomic_long_t user_exception;
193 atomic_long_t set_task_slice;
194 atomic_long_t migrate_check;
195 atomic_long_t migrated_retarget;
196 atomic_long_t migrated_unload;
197 atomic_long_t migrated_unload_delay;
198 atomic_long_t migrated_nopfn_retarget;
199 atomic_long_t migrated_nopfn_unload;
200 atomic_long_t tlb_dropin;
201 atomic_long_t tlb_dropin_fail_no_asid;
202 atomic_long_t tlb_dropin_fail_upm;
203 atomic_long_t tlb_dropin_fail_invalid;
204 atomic_long_t tlb_dropin_fail_range_active;
205 atomic_long_t tlb_dropin_fail_idle;
206 atomic_long_t tlb_dropin_fail_fmm;
207 atomic_long_t mmu_invalidate_range;
208 atomic_long_t mmu_invalidate_page;
209 atomic_long_t mmu_clear_flush_young;
210 atomic_long_t flush_tlb;
211 atomic_long_t flush_tlb_gru;
212 atomic_long_t flush_tlb_gru_tgh;
213 atomic_long_t flush_tlb_gru_zero_asid;
214
215 atomic_long_t copy_gpa;
216
217 atomic_long_t mesq_receive;
218 atomic_long_t mesq_receive_none;
219 atomic_long_t mesq_send;
220 atomic_long_t mesq_send_failed;
221 atomic_long_t mesq_noop;
222 atomic_long_t mesq_send_unexpected_error;
223 atomic_long_t mesq_send_lb_overflow;
224 atomic_long_t mesq_send_qlimit_reached;
225 atomic_long_t mesq_send_amo_nacked;
226 atomic_long_t mesq_send_put_nacked;
227 atomic_long_t mesq_qf_not_full;
228 atomic_long_t mesq_qf_locked;
229 atomic_long_t mesq_qf_noop_not_full;
230 atomic_long_t mesq_qf_switch_head_failed;
231 atomic_long_t mesq_qf_unexpected_error;
232 atomic_long_t mesq_noop_unexpected_error;
233 atomic_long_t mesq_noop_lb_overflow;
234 atomic_long_t mesq_noop_qlimit_reached;
235 atomic_long_t mesq_noop_amo_nacked;
236 atomic_long_t mesq_noop_put_nacked;
237
238};
239
240#define OPT_DPRINT 1
241#define OPT_STATS 2
242#define GRU_QUICKLOOK 4
243
244
245#define IRQ_GRU 110 /* Starting IRQ number for interrupts */
246
247/* Delay in jiffies between attempts to assign a GRU context */
248#define GRU_ASSIGN_DELAY ((HZ * 20) / 1000)
249
250/*
251 * If a process has it's context stolen, min delay in jiffies before trying to
252 * steal a context from another process.
253 */
254#define GRU_STEAL_DELAY ((HZ * 200) / 1000)
255
256#define STAT(id) do { \
257 if (gru_options & OPT_STATS) \
258 atomic_long_inc(&gru_stats.id); \
259 } while (0)
260
261#ifdef CONFIG_SGI_GRU_DEBUG
262#define gru_dbg(dev, fmt, x...) \
263 do { \
264 if (gru_options & OPT_DPRINT) \
265 dev_dbg(dev, "%s: " fmt, __func__, x); \
266 } while (0)
267#else
268#define gru_dbg(x...)
269#endif
270
271/*-----------------------------------------------------------------------------
272 * ASID management
273 */
274#define MAX_ASID 0xfffff0
275#define MIN_ASID 8
276#define ASID_INC 8 /* number of regions */
277
278/* Generate a GRU asid value from a GRU base asid & a virtual address. */
279#if defined CONFIG_IA64
280#define VADDR_HI_BIT 64
281#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3)
282#elif defined __x86_64
283#define VADDR_HI_BIT 48
284#define GRUREGION(addr) (0) /* ZZZ could do better */
285#else
286#error "Unsupported architecture"
287#endif
288#define GRUASID(asid, addr) ((asid) + GRUREGION(addr))
289
290/*------------------------------------------------------------------------------
291 * File & VMS Tables
292 */
293
294struct gru_state;
295
296/*
297 * This structure is pointed to from the mmstruct via the notifier pointer.
298 * There is one of these per address space.
299 */
300struct gru_mm_tracker {
301 unsigned int mt_asid_gen; /* ASID wrap count */
302 int mt_asid; /* current base ASID for gru */
303 unsigned short mt_ctxbitmap; /* bitmap of contexts using
304 asid */
305};
306
307struct gru_mm_struct {
308 struct mmu_notifier ms_notifier;
309 atomic_t ms_refcnt;
310 spinlock_t ms_asid_lock; /* protects ASID assignment */
311 atomic_t ms_range_active;/* num range_invals active */
312 char ms_released;
313 wait_queue_head_t ms_wait_queue;
314 DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS);
315 struct gru_mm_tracker ms_asids[GRU_MAX_GRUS];
316};
317
318/*
319 * One of these structures is allocated when a GSEG is mmaped. The
320 * structure is pointed to by the vma->vm_private_data field in the vma struct.
321 */
322struct gru_vma_data {
323 spinlock_t vd_lock; /* Serialize access to vma */
324 struct list_head vd_head; /* head of linked list of gts */
325 long vd_user_options;/* misc user option flags */
326 int vd_cbr_au_count;
327 int vd_dsr_au_count;
328};
329
330/*
331 * One of these is allocated for each thread accessing a mmaped GRU. A linked
332 * list of these structure is hung off the struct gru_vma_data in the mm_struct.
333 */
334struct gru_thread_state {
335 struct list_head ts_next; /* list - head at vma-private */
336 struct mutex ts_ctxlock; /* load/unload CTX lock */
337 struct mm_struct *ts_mm; /* mm currently mapped to
338 context */
339 struct vm_area_struct *ts_vma; /* vma of GRU context */
340 struct gru_state *ts_gru; /* GRU where the context is
341 loaded */
342 struct gru_mm_struct *ts_gms; /* asid & ioproc struct */
343 unsigned long ts_cbr_map; /* map of allocated CBRs */
344 unsigned long ts_dsr_map; /* map of allocated DATA
345 resources */
346 unsigned long ts_steal_jiffies;/* jiffies when context last
347 stolen */
348 long ts_user_options;/* misc user option flags */
349 pid_t ts_tgid_owner; /* task that is using the
350 context - for migration */
351 int ts_tsid; /* thread that owns the
352 structure */
353 int ts_tlb_int_select;/* target cpu if interrupts
354 enabled */
355 int ts_ctxnum; /* context number where the
356 context is loaded */
357 atomic_t ts_refcnt; /* reference count GTS */
358 unsigned char ts_dsr_au_count;/* Number of DSR resources
359 required for contest */
360 unsigned char ts_cbr_au_count;/* Number of CBR resources
361 required for contest */
362 char ts_force_unload;/* force context to be unloaded
363 after migration */
364 char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
365 allocated CB */
366 unsigned long ts_gdata[0]; /* save area for GRU data (CB,
367 DS, CBE) */
368};
369
370/*
371 * Threaded programs actually allocate an array of GSEGs when a context is
372 * created. Each thread uses a separate GSEG. TSID is the index into the GSEG
373 * array.
374 */
375#define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE)
376#define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \
377 (gts)->ts_tsid * GRU_GSEG_PAGESIZE)
378
379#define NULLCTX (-1) /* if context not loaded into GRU */
380
381/*-----------------------------------------------------------------------------
382 * GRU State Tables
383 */
384
385/*
386 * One of these exists for each GRU chiplet.
387 */
388struct gru_state {
389 struct gru_blade_state *gs_blade; /* GRU state for entire
390 blade */
391 unsigned long gs_gru_base_paddr; /* Physical address of
392 gru segments (64) */
393 void *gs_gru_base_vaddr; /* Virtual address of
394 gru segments (64) */
395 unsigned char gs_gid; /* unique GRU number */
396 unsigned char gs_tgh_local_shift; /* used to pick TGH for
397 local flush */
398 unsigned char gs_tgh_first_remote; /* starting TGH# for
399 remote flush */
400 unsigned short gs_blade_id; /* blade of GRU */
401 spinlock_t gs_asid_lock; /* lock used for
402 assigning asids */
403 spinlock_t gs_lock; /* lock used for
404 assigning contexts */
405
406 /* -- the following are protected by the gs_asid_lock spinlock ---- */
407 unsigned int gs_asid; /* Next availe ASID */
408 unsigned int gs_asid_limit; /* Limit of available
409 ASIDs */
410 unsigned int gs_asid_gen; /* asid generation.
411 Inc on wrap */
412
413 /* --- the following fields are protected by the gs_lock spinlock --- */
414 unsigned long gs_context_map; /* bitmap to manage
415 contexts in use */
416 unsigned long gs_cbr_map; /* bitmap to manage CB
417 resources */
418 unsigned long gs_dsr_map; /* bitmap used to manage
419 DATA resources */
420 unsigned int gs_reserved_cbrs; /* Number of kernel-
421 reserved cbrs */
422 unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel-
423 reserved dsrs */
424 unsigned short gs_active_contexts; /* number of contexts
425 in use */
426 struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using
427 the context */
428};
429
430/*
431 * This structure contains the GRU state for all the GRUs on a blade.
432 */
433struct gru_blade_state {
434 void *kernel_cb; /* First kernel
435 reserved cb */
436 void *kernel_dsr; /* First kernel
437 reserved DSR */
438 /* ---- the following are protected by the bs_lock spinlock ---- */
439 spinlock_t bs_lock; /* lock used for
440 stealing contexts */
441 int bs_lru_ctxnum; /* STEAL - last context
442 stolen */
443 struct gru_state *bs_lru_gru; /* STEAL - last gru
444 stolen */
445
446 struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE];
447};
448
449/*-----------------------------------------------------------------------------
450 * Address Primitives
451 */
452#define get_tfm_for_cpu(g, c) \
453 ((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c)))
454#define get_tfh_by_index(g, i) \
455 ((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i)))
456#define get_tgh_by_index(g, i) \
457 ((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i)))
458#define get_cbe_by_index(g, i) \
459 ((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\
460 (i)))
461
462/*-----------------------------------------------------------------------------
463 * Useful Macros
464 */
465
466/* Given a blade# & chiplet#, get a pointer to the GRU */
467#define get_gru(b, c) (&gru_base[b]->bs_grus[c])
468
469/* Number of bytes to save/restore when unloading/loading GRU contexts */
470#define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES)
471#define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2)
472
473/* Convert a user CB number to the actual CBRNUM */
474#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \
475 * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE)
476
477/* Convert a gid to a pointer to the GRU */
478#define GID_TO_GRU(gid) \
479 (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \
480 (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \
481 bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \
482 NULL)
483
484/* Scan all active GRUs in a GRU bitmap */
485#define for_each_gru_in_bitmap(gid, map) \
486 for ((gid) = find_first_bit((map), GRU_MAX_GRUS); (gid) < GRU_MAX_GRUS;\
487 (gid)++, (gid) = find_next_bit((map), GRU_MAX_GRUS, (gid)))
488
489/* Scan all active GRUs on a specific blade */
490#define for_each_gru_on_blade(gru, nid, i) \
491 for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \
492 (i) < GRU_CHIPLETS_PER_BLADE; \
493 (i)++, (gru)++)
494
495/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */
496#define for_each_gts_on_gru(gts, gru, ctxnum) \
497 for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \
498 if (((gts) = (gru)->gs_gts[ctxnum]))
499
500/* Scan each CBR whose bit is set in a TFM (or copy of) */
501#define for_each_cbr_in_tfm(i, map) \
502 for ((i) = find_first_bit(map, GRU_NUM_CBE); \
503 (i) < GRU_NUM_CBE; \
504 (i)++, (i) = find_next_bit(map, GRU_NUM_CBE, i))
505
506/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */
507#define for_each_cbr_in_allocation_map(i, map, k) \
508 for ((k) = find_first_bit(map, GRU_CBR_AU); (k) < GRU_CBR_AU; \
509 (k) = find_next_bit(map, GRU_CBR_AU, (k) + 1)) \
510 for ((i) = (k)*GRU_CBR_AU_SIZE; \
511 (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
512
513/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */
514#define for_each_dsr_in_allocation_map(i, map, k) \
515 for ((k) = find_first_bit((const unsigned long *)map, GRU_DSR_AU);\
516 (k) < GRU_DSR_AU; \
517 (k) = find_next_bit((const unsigned long *)map, \
518 GRU_DSR_AU, (k) + 1)) \
519 for ((i) = (k) * GRU_DSR_AU_CL; \
520 (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++)
521
522#define gseg_physical_address(gru, ctxnum) \
523 ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE)
524#define gseg_virtual_address(gru, ctxnum) \
525 ((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE)
526
527/*-----------------------------------------------------------------------------
528 * Lock / Unlock GRU handles
529 * Use the "delresp" bit in the handle as a "lock" bit.
530 */
531
532/* Lock hierarchy checking enabled only in emulator */
533
534static inline void __lock_handle(void *h)
535{
536 while (test_and_set_bit(1, h))
537 cpu_relax();
538}
539
540static inline void __unlock_handle(void *h)
541{
542 clear_bit(1, h);
543}
544
545static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
546{
547 __lock_handle(cch);
548}
549
550static inline void unlock_cch_handle(struct gru_context_configuration_handle
551 *cch)
552{
553 __unlock_handle(cch);
554}
555
556static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh)
557{
558 __lock_handle(tgh);
559}
560
561static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
562{
563 __unlock_handle(tgh);
564}
565
566/*-----------------------------------------------------------------------------
567 * Function prototypes & externs
568 */
569struct gru_unload_context_req;
570
571extern struct vm_operations_struct gru_vm_ops;
572extern struct device *grudev;
573
574extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
575 int tsid);
576extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
577 *vma, int tsid);
578extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
579 *vma, int tsid);
580extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
581extern void gts_drop(struct gru_thread_state *gts);
582extern void gru_tgh_flush_init(struct gru_state *gru);
583extern int gru_kservices_init(struct gru_state *gru);
584extern irqreturn_t gru_intr(int irq, void *dev_id);
585extern int gru_handle_user_call_os(unsigned long address);
586extern int gru_user_flush_tlb(unsigned long arg);
587extern int gru_user_unload_context(unsigned long arg);
588extern int gru_get_exception_detail(unsigned long arg);
589extern int gru_set_task_slice(long address);
590extern int gru_cpu_fault_map_id(void);
591extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
592extern void gru_flush_all_tlb(struct gru_state *gru);
593extern int gru_proc_init(void);
594extern void gru_proc_exit(void);
595
596extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
597 int cbr_au_count, char *cbmap);
598extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
599 int dsr_au_count, char *dsmap);
600extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
601extern struct gru_mm_struct *gru_register_mmu_notifier(void);
602extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
603
604extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
605 unsigned long len);
606
607extern unsigned long gru_options;
608
609#endif /* __GRUTABLES_H__ */
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
new file mode 100644
index 000000000000..bcfd5425e2e6
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -0,0 +1,372 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * MMUOPS callbacks + TLB flushing
5 *
6 * This file handles emu notifier callbacks from the core kernel. The callbacks
7 * are used to update the TLB in the GRU as a result of changes in the
8 * state of a process address space. This file also handles TLB invalidates
9 * from the GRU driver.
10 *
11 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/spinlock.h>
31#include <linux/mm.h>
32#include <linux/slab.h>
33#include <linux/device.h>
34#include <linux/hugetlb.h>
35#include <linux/delay.h>
36#include <linux/timex.h>
37#include <linux/delay.h>
38#include <linux/srcu.h>
39#include <asm/processor.h>
40#include "gru.h"
41#include "grutables.h"
42#include <asm/uv/uv_hub.h>
43
44#define gru_random() get_cycles()
45
46/* ---------------------------------- TLB Invalidation functions --------
47 * get_tgh_handle
48 *
49 * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
50 * local blade, use a fixed TGH that is a function of the blade-local cpu
51 * number. Normally, this TGH is private to the cpu & no contention occurs for
52 * the TGH. For offblade GRUs, select a random TGH in the range above the
53 * private TGHs. A spinlock is required to access this TGH & the lock must be
54 * released when the invalidate is completes. This sucks, but it is the best we
55 * can do.
56 *
57 * Note that the spinlock is IN the TGH handle so locking does not involve
58 * additional cache lines.
59 *
60 */
61static inline int get_off_blade_tgh(struct gru_state *gru)
62{
63 int n;
64
65 n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
66 n = gru_random() % n;
67 n += gru->gs_tgh_first_remote;
68 return n;
69}
70
71static inline int get_on_blade_tgh(struct gru_state *gru)
72{
73 return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
74}
75
76static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
77 *gru)
78{
79 struct gru_tlb_global_handle *tgh;
80 int n;
81
82 preempt_disable();
83 if (uv_numa_blade_id() == gru->gs_blade_id)
84 n = get_on_blade_tgh(gru);
85 else
86 n = get_off_blade_tgh(gru);
87 tgh = get_tgh_by_index(gru, n);
88 lock_tgh_handle(tgh);
89
90 return tgh;
91}
92
93static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
94{
95 unlock_tgh_handle(tgh);
96 preempt_enable();
97}
98
99/*
100 * gru_flush_tlb_range
101 *
102 * General purpose TLB invalidation function. This function scans every GRU in
103 * the ENTIRE system (partition) looking for GRUs where the specified MM has
104 * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
105 * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
106 * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
107 * cost of (possibly) a large number of future TLBmisses.
108 *
109 * The current algorithm is optimized based on the following (somewhat true)
110 * assumptions:
111 * - GRU contexts are not loaded into a GRU unless a reference is made to
112 * the data segment or control block (this is true, not an assumption).
113 * If a DS/CB is referenced, the user will also issue instructions that
114 * cause TLBmisses. It is not necessary to optimize for the case where
115 * contexts are loaded but no instructions cause TLB misses. (I know
116 * this will happen but I'm not optimizing for it).
117 * - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
118 * a few usec but in unusual cases, it could be longer. Avoid if
119 * possible.
120 * - intrablade process migration between cpus is not frequent but is
121 * common.
122 * - a GRU context is not typically migrated to a different GRU on the
123 * blade because of intrablade migration
124 * - interblade migration is rare. Processes migrate their GRU context to
125 * the new blade.
126 * - if interblade migration occurs, migration back to the original blade
127 * is very very rare (ie., no optimization for this case)
128 * - most GRU instruction operate on a subset of the user REGIONS. Code
129 * & shared library regions are not likely targets of GRU instructions.
130 *
131 * To help improve the efficiency of TLB invalidation, the GMS data
132 * structure is maintained for EACH address space (MM struct). The GMS is
133 * also the structure that contains the pointer to the mmu callout
134 * functions. This structure is linked to the mm_struct for the address space
135 * using the mmu "register" function. The mmu interfaces are used to
136 * provide the callbacks for TLB invalidation. The GMS contains:
137 *
138 * - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
139 * loaded into the GRU.
140 * - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
141 * the above array
142 * - ctxbitmap[maxgrus]. Indicates the contexts that are currently active
143 * in the GRU for the address space. This bitmap must be passed to the
144 * GRU to do an invalidate.
145 *
146 * The current algorithm for invalidating TLBs is:
147 * - scan the asidmap for GRUs where the context has been loaded, ie,
148 * asid is non-zero.
149 * - for each gru found:
150 * - if the ctxtmap is non-zero, there are active contexts in the
151 * GRU. TLB invalidate instructions must be issued to the GRU.
152 * - if the ctxtmap is zero, no context is active. Set the ASID to
153 * zero to force a full TLB invalidation. This is fast but will
154 * cause a lot of TLB misses if the context is reloaded onto the
155 * GRU
156 *
157 */
158
159void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
160 unsigned long len)
161{
162 struct gru_state *gru;
163 struct gru_mm_tracker *asids;
164 struct gru_tlb_global_handle *tgh;
165 unsigned long num;
166 int grupagesize, pagesize, pageshift, gid, asid;
167
168 /* ZZZ TODO - handle huge pages */
169 pageshift = PAGE_SHIFT;
170 pagesize = (1UL << pageshift);
171 grupagesize = GRU_PAGESIZE(pageshift);
172 num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
173
174 STAT(flush_tlb);
175 gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
176 start, len, gms->ms_asidmap[0]);
177
178 spin_lock(&gms->ms_asid_lock);
179 for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
180 STAT(flush_tlb_gru);
181 gru = GID_TO_GRU(gid);
182 asids = gms->ms_asids + gid;
183 asid = asids->mt_asid;
184 if (asids->mt_ctxbitmap && asid) {
185 STAT(flush_tlb_gru_tgh);
186 asid = GRUASID(asid, start);
187 gru_dbg(grudev,
188 " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n",
189 gid, asid, num, asids->mt_ctxbitmap);
190 tgh = get_lock_tgh_handle(gru);
191 tgh_invalidate(tgh, start, 0, asid, grupagesize, 0,
192 num - 1, asids->mt_ctxbitmap);
193 get_unlock_tgh_handle(tgh);
194 } else {
195 STAT(flush_tlb_gru_zero_asid);
196 asids->mt_asid = 0;
197 __clear_bit(gru->gs_gid, gms->ms_asidmap);
198 gru_dbg(grudev,
199 " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
200 gid, asid, asids->mt_ctxbitmap,
201 gms->ms_asidmap[0]);
202 }
203 }
204 spin_unlock(&gms->ms_asid_lock);
205}
206
207/*
208 * Flush the entire TLB on a chiplet.
209 */
210void gru_flush_all_tlb(struct gru_state *gru)
211{
212 struct gru_tlb_global_handle *tgh;
213
214 gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid);
215 tgh = get_lock_tgh_handle(gru);
216 tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0);
217 get_unlock_tgh_handle(tgh);
218 preempt_enable();
219}
220
221/*
222 * MMUOPS notifier callout functions
223 */
224static void gru_invalidate_range_start(struct mmu_notifier *mn,
225 struct mm_struct *mm,
226 unsigned long start, unsigned long end)
227{
228 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
229 ms_notifier);
230
231 STAT(mmu_invalidate_range);
232 atomic_inc(&gms->ms_range_active);
233 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
234 start, end, atomic_read(&gms->ms_range_active));
235 gru_flush_tlb_range(gms, start, end - start);
236}
237
238static void gru_invalidate_range_end(struct mmu_notifier *mn,
239 struct mm_struct *mm, unsigned long start,
240 unsigned long end)
241{
242 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
243 ms_notifier);
244
245 /* ..._and_test() provides needed barrier */
246 (void)atomic_dec_and_test(&gms->ms_range_active);
247
248 wake_up_all(&gms->ms_wait_queue);
249 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
250}
251
252static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
253 unsigned long address)
254{
255 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
256 ms_notifier);
257
258 STAT(mmu_invalidate_page);
259 gru_flush_tlb_range(gms, address, PAGE_SIZE);
260 gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
261}
262
263static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
264{
265 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
266 ms_notifier);
267
268 gms->ms_released = 1;
269 gru_dbg(grudev, "gms %p\n", gms);
270}
271
272
273static const struct mmu_notifier_ops gru_mmuops = {
274 .invalidate_page = gru_invalidate_page,
275 .invalidate_range_start = gru_invalidate_range_start,
276 .invalidate_range_end = gru_invalidate_range_end,
277 .release = gru_release,
278};
279
280/* Move this to the basic mmu_notifier file. But for now... */
281static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
282 const struct mmu_notifier_ops *ops)
283{
284 struct mmu_notifier *mn, *gru_mn = NULL;
285 struct hlist_node *n;
286
287 if (mm->mmu_notifier_mm) {
288 rcu_read_lock();
289 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
290 hlist)
291 if (mn->ops == ops) {
292 gru_mn = mn;
293 break;
294 }
295 rcu_read_unlock();
296 }
297 return gru_mn;
298}
299
300struct gru_mm_struct *gru_register_mmu_notifier(void)
301{
302 struct gru_mm_struct *gms;
303 struct mmu_notifier *mn;
304
305 mn = mmu_find_ops(current->mm, &gru_mmuops);
306 if (mn) {
307 gms = container_of(mn, struct gru_mm_struct, ms_notifier);
308 atomic_inc(&gms->ms_refcnt);
309 } else {
310 gms = kzalloc(sizeof(*gms), GFP_KERNEL);
311 if (gms) {
312 spin_lock_init(&gms->ms_asid_lock);
313 gms->ms_notifier.ops = &gru_mmuops;
314 atomic_set(&gms->ms_refcnt, 1);
315 init_waitqueue_head(&gms->ms_wait_queue);
316 __mmu_notifier_register(&gms->ms_notifier, current->mm);
317 }
318 }
319 gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
320 atomic_read(&gms->ms_refcnt));
321 return gms;
322}
323
324void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
325{
326 gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
327 atomic_read(&gms->ms_refcnt), gms->ms_released);
328 if (atomic_dec_return(&gms->ms_refcnt) == 0) {
329 if (!gms->ms_released)
330 mmu_notifier_unregister(&gms->ms_notifier, current->mm);
331 kfree(gms);
332 }
333}
334
335/*
336 * Setup TGH parameters. There are:
337 * - 24 TGH handles per GRU chiplet
338 * - a portion (MAX_LOCAL_TGH) of the handles are reserved for
339 * use by blade-local cpus
340 * - the rest are used by off-blade cpus. This usage is
341 * less frequent than blade-local usage.
342 *
343 * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
344 * has less tan or equal to 16 cpus, each cpu has a unique handle that it can
345 * use.
346 */
347#define MAX_LOCAL_TGH 16
348
349void gru_tgh_flush_init(struct gru_state *gru)
350{
351 int cpus, shift = 0, n;
352
353 cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
354
355 /* n = cpus rounded up to next power of 2 */
356 if (cpus) {
357 n = 1 << fls(cpus - 1);
358
359 /*
360 * shift count for converting local cpu# to TGH index
361 * 0 if cpus <= MAX_LOCAL_TGH,
362 * 1 if cpus <= 2*MAX_LOCAL_TGH,
363 * etc
364 */
365 shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
366 }
367 gru->gs_tgh_local_shift = shift;
368
369 /* first starting TGH index to use for remote purges */
370 gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
371
372}
diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile
index b6e40a7958ce..35ce28578075 100644
--- a/drivers/misc/sgi-xp/Makefile
+++ b/drivers/misc/sgi-xp/Makefile
@@ -3,9 +3,17 @@
3# 3#
4 4
5obj-$(CONFIG_SGI_XP) += xp.o 5obj-$(CONFIG_SGI_XP) += xp.o
6xp-y := xp_main.o xp_nofault.o 6xp-y := xp_main.o
7xp-$(CONFIG_IA64_SGI_SN2) += xp_sn2.o xp_nofault.o
8xp-$(CONFIG_IA64_GENERIC) += xp_sn2.o xp_nofault.o xp_uv.o
9xp-$(CONFIG_IA64_SGI_UV) += xp_uv.o
10xp-$(CONFIG_X86_64) += xp_uv.o
7 11
8obj-$(CONFIG_SGI_XP) += xpc.o 12obj-$(CONFIG_SGI_XP) += xpc.o
9xpc-y := xpc_main.o xpc_channel.o xpc_partition.o 13xpc-y := xpc_main.o xpc_channel.o xpc_partition.o
14xpc-$(CONFIG_IA64_SGI_SN2) += xpc_sn2.o
15xpc-$(CONFIG_IA64_GENERIC) += xpc_sn2.o xpc_uv.o
16xpc-$(CONFIG_IA64_SGI_UV) += xpc_uv.o
17xpc-$(CONFIG_X86_64) += xpc_uv.o
10 18
11obj-$(CONFIG_SGI_XP) += xpnet.o 19obj-$(CONFIG_SGI_XP) += xpnet.o
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 03a87a307e32..859a5281c61b 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -13,11 +13,34 @@
13#ifndef _DRIVERS_MISC_SGIXP_XP_H 13#ifndef _DRIVERS_MISC_SGIXP_XP_H
14#define _DRIVERS_MISC_SGIXP_XP_H 14#define _DRIVERS_MISC_SGIXP_XP_H
15 15
16#include <linux/cache.h>
17#include <linux/hardirq.h>
18#include <linux/mutex.h> 16#include <linux/mutex.h>
19#include <asm/sn/types.h> 17
20#include <asm/sn/bte.h> 18#ifdef CONFIG_IA64
19#include <asm/system.h>
20#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */
21#define is_shub() ia64_platform_is("sn2")
22#define is_uv() ia64_platform_is("uv")
23#endif
24#ifdef CONFIG_X86_64
25#include <asm/genapic.h>
26#define is_uv() is_uv_system()
27#endif
28
29#ifndef is_shub1
30#define is_shub1() 0
31#endif
32
33#ifndef is_shub2
34#define is_shub2() 0
35#endif
36
37#ifndef is_shub
38#define is_shub() 0
39#endif
40
41#ifndef is_uv
42#define is_uv() 0
43#endif
21 44
22#ifdef USE_DBUG_ON 45#ifdef USE_DBUG_ON
23#define DBUG_ON(condition) BUG_ON(condition) 46#define DBUG_ON(condition) BUG_ON(condition)
@@ -26,133 +49,56 @@
26#endif 49#endif
27 50
28/* 51/*
29 * Define the maximum number of logically defined partitions the system 52 * Define the maximum number of partitions the system can possibly support.
30 * can support. It is constrained by the maximum number of hardware 53 * It is based on the maximum number of hardware partitionable regions. The
31 * partitionable regions. The term 'region' in this context refers to the 54 * term 'region' in this context refers to the minimum number of nodes that
32 * minimum number of nodes that can comprise an access protection grouping. 55 * can comprise an access protection grouping. The access protection is in
33 * The access protection is in regards to memory, IPI and IOI. 56 * regards to memory, IPI and IOI.
34 * 57 *
35 * The maximum number of hardware partitionable regions is equal to the 58 * The maximum number of hardware partitionable regions is equal to the
36 * maximum number of nodes in the entire system divided by the minimum number 59 * maximum number of nodes in the entire system divided by the minimum number
37 * of nodes that comprise an access protection grouping. 60 * of nodes that comprise an access protection grouping.
38 */ 61 */
39#define XP_MAX_PARTITIONS 64 62#define XP_MAX_NPARTITIONS_SN2 64
40 63#define XP_MAX_NPARTITIONS_UV 256
41/*
42 * Define the number of u64s required to represent all the C-brick nasids
43 * as a bitmap. The cross-partition kernel modules deal only with
44 * C-brick nasids, thus the need for bitmaps which don't account for
45 * odd-numbered (non C-brick) nasids.
46 */
47#define XP_MAX_PHYSNODE_ID (MAX_NUMALINK_NODES / 2)
48#define XP_NASID_MASK_BYTES ((XP_MAX_PHYSNODE_ID + 7) / 8)
49#define XP_NASID_MASK_WORDS ((XP_MAX_PHYSNODE_ID + 63) / 64)
50
51/*
52 * Wrapper for bte_copy() that should it return a failure status will retry
53 * the bte_copy() once in the hope that the failure was due to a temporary
54 * aberration (i.e., the link going down temporarily).
55 *
56 * src - physical address of the source of the transfer.
57 * vdst - virtual address of the destination of the transfer.
58 * len - number of bytes to transfer from source to destination.
59 * mode - see bte_copy() for definition.
60 * notification - see bte_copy() for definition.
61 *
62 * Note: xp_bte_copy() should never be called while holding a spinlock.
63 */
64static inline bte_result_t
65xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification)
66{
67 bte_result_t ret;
68 u64 pdst = ia64_tpa(vdst);
69
70 /*
71 * Ensure that the physically mapped memory is contiguous.
72 *
73 * We do this by ensuring that the memory is from region 7 only.
74 * If the need should arise to use memory from one of the other
75 * regions, then modify the BUG_ON() statement to ensure that the
76 * memory from that region is always physically contiguous.
77 */
78 BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL);
79
80 ret = bte_copy(src, pdst, len, mode, notification);
81 if ((ret != BTE_SUCCESS) && BTE_ERROR_RETRY(ret)) {
82 if (!in_interrupt())
83 cond_resched();
84
85 ret = bte_copy(src, pdst, len, mode, notification);
86 }
87
88 return ret;
89}
90 64
91/* 65/*
92 * XPC establishes channel connections between the local partition and any 66 * XPC establishes channel connections between the local partition and any
93 * other partition that is currently up. Over these channels, kernel-level 67 * other partition that is currently up. Over these channels, kernel-level
94 * `users' can communicate with their counterparts on the other partitions. 68 * `users' can communicate with their counterparts on the other partitions.
95 * 69 *
96 * The maxinum number of channels is limited to eight. For performance reasons,
97 * the internal cross partition structures require sixteen bytes per channel,
98 * and eight allows all of this interface-shared info to fit in one cache line.
99 *
100 * XPC_NCHANNELS reflects the total number of channels currently defined.
101 * If the need for additional channels arises, one can simply increase 70 * If the need for additional channels arises, one can simply increase
102 * XPC_NCHANNELS accordingly. If the day should come where that number 71 * XPC_MAX_NCHANNELS accordingly. If the day should come where that number
103 * exceeds the MAXIMUM number of channels allowed (eight), then one will need 72 * exceeds the absolute MAXIMUM number of channels possible (eight), then one
104 * to make changes to the XPC code to allow for this. 73 * will need to make changes to the XPC code to accommodate for this.
74 *
75 * The absolute maximum number of channels possible is limited to eight for
76 * performance reasons on sn2 hardware. The internal cross partition structures
77 * require sixteen bytes per channel, and eight allows all of this
78 * interface-shared info to fit in one 128-byte cacheline.
105 */ 79 */
106#define XPC_MEM_CHANNEL 0 /* memory channel number */ 80#define XPC_MEM_CHANNEL 0 /* memory channel number */
107#define XPC_NET_CHANNEL 1 /* network channel number */ 81#define XPC_NET_CHANNEL 1 /* network channel number */
108 82
109#define XPC_NCHANNELS 2 /* #of defined channels */ 83#define XPC_MAX_NCHANNELS 2 /* max #of channels allowed */
110#define XPC_MAX_NCHANNELS 8 /* max #of channels allowed */
111 84
112#if XPC_NCHANNELS > XPC_MAX_NCHANNELS 85#if XPC_MAX_NCHANNELS > 8
113#error XPC_NCHANNELS exceeds MAXIMUM allowed. 86#error XPC_MAX_NCHANNELS exceeds absolute MAXIMUM possible.
114#endif 87#endif
115 88
116/* 89/*
117 * The format of an XPC message is as follows: 90 * Define macro, XPC_MSG_SIZE(), is provided for the user
118 *
119 * +-------+--------------------------------+
120 * | flags |////////////////////////////////|
121 * +-------+--------------------------------+
122 * | message # |
123 * +----------------------------------------+
124 * | payload (user-defined message) |
125 * | |
126 * :
127 * | |
128 * +----------------------------------------+
129 *
130 * The size of the payload is defined by the user via xpc_connect(). A user-
131 * defined message resides in the payload area.
132 *
133 * The user should have no dealings with the message header, but only the
134 * message's payload. When a message entry is allocated (via xpc_allocate())
135 * a pointer to the payload area is returned and not the actual beginning of
136 * the XPC message. The user then constructs a message in the payload area
137 * and passes that pointer as an argument on xpc_send() or xpc_send_notify().
138 *
139 * The size of a message entry (within a message queue) must be a cacheline
140 * sized multiple in order to facilitate the BTE transfer of messages from one
141 * message queue to another. A macro, XPC_MSG_SIZE(), is provided for the user
142 * that wants to fit as many msg entries as possible in a given memory size 91 * that wants to fit as many msg entries as possible in a given memory size
143 * (e.g. a memory page). 92 * (e.g. a memory page).
144 */ 93 */
145struct xpc_msg { 94#define XPC_MSG_MAX_SIZE 128
146 u8 flags; /* FOR XPC INTERNAL USE ONLY */ 95#define XPC_MSG_HDR_MAX_SIZE 16
147 u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */ 96#define XPC_MSG_PAYLOAD_MAX_SIZE (XPC_MSG_MAX_SIZE - XPC_MSG_HDR_MAX_SIZE)
148 s64 number; /* FOR XPC INTERNAL USE ONLY */
149
150 u64 payload; /* user defined portion of message */
151};
152 97
153#define XPC_MSG_PAYLOAD_OFFSET (u64) (&((struct xpc_msg *)0)->payload)
154#define XPC_MSG_SIZE(_payload_size) \ 98#define XPC_MSG_SIZE(_payload_size) \
155 L1_CACHE_ALIGN(XPC_MSG_PAYLOAD_OFFSET + (_payload_size)) 99 ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \
100 is_uv() ? 64 : 128)
101
156 102
157/* 103/*
158 * Define the return values and values passed to user's callout functions. 104 * Define the return values and values passed to user's callout functions.
@@ -233,8 +179,20 @@ enum xp_retval {
233 xpDisconnected, /* 51: channel disconnected (closed) */ 179 xpDisconnected, /* 51: channel disconnected (closed) */
234 180
235 xpBteCopyError, /* 52: bte_copy() returned error */ 181 xpBteCopyError, /* 52: bte_copy() returned error */
182 xpSalError, /* 53: sn SAL error */
183 xpRsvdPageNotSet, /* 54: the reserved page is not set up */
184 xpPayloadTooBig, /* 55: payload too large for message slot */
185
186 xpUnsupported, /* 56: unsupported functionality or resource */
187 xpNeedMoreInfo, /* 57: more info is needed by SAL */
236 188
237 xpUnknownReason /* 53: unknown reason - must be last in enum */ 189 xpGruCopyError, /* 58: gru_copy_gru() returned error */
190 xpGruSendMqError, /* 59: gru send message queue related error */
191
192 xpBadChannelNumber, /* 60: invalid channel number */
193 xpBadMsgType, /* 60: invalid message type */
194
195 xpUnknownReason /* 61: unknown reason - must be last in enum */
238}; 196};
239 197
240/* 198/*
@@ -285,6 +243,9 @@ typedef void (*xpc_channel_func) (enum xp_retval reason, short partid,
285 * calling xpc_received(). 243 * calling xpc_received().
286 * 244 *
287 * All other reason codes indicate failure. 245 * All other reason codes indicate failure.
246 *
247 * NOTE: The user defined function must be callable by an interrupt handler
248 * and thus cannot block.
288 */ 249 */
289typedef void (*xpc_notify_func) (enum xp_retval reason, short partid, 250typedef void (*xpc_notify_func) (enum xp_retval reason, short partid,
290 int ch_number, void *key); 251 int ch_number, void *key);
@@ -308,23 +269,22 @@ struct xpc_registration {
308 xpc_channel_func func; /* function to call */ 269 xpc_channel_func func; /* function to call */
309 void *key; /* pointer to user's key */ 270 void *key; /* pointer to user's key */
310 u16 nentries; /* #of msg entries in local msg queue */ 271 u16 nentries; /* #of msg entries in local msg queue */
311 u16 msg_size; /* message queue's message size */ 272 u16 entry_size; /* message queue's message entry size */
312 u32 assigned_limit; /* limit on #of assigned kthreads */ 273 u32 assigned_limit; /* limit on #of assigned kthreads */
313 u32 idle_limit; /* limit on #of idle kthreads */ 274 u32 idle_limit; /* limit on #of idle kthreads */
314} ____cacheline_aligned; 275} ____cacheline_aligned;
315 276
316#define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL) 277#define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL)
317 278
318/* the following are valid xpc_allocate() flags */ 279/* the following are valid xpc_send() or xpc_send_notify() flags */
319#define XPC_WAIT 0 /* wait flag */ 280#define XPC_WAIT 0 /* wait flag */
320#define XPC_NOWAIT 1 /* no wait flag */ 281#define XPC_NOWAIT 1 /* no wait flag */
321 282
322struct xpc_interface { 283struct xpc_interface {
323 void (*connect) (int); 284 void (*connect) (int);
324 void (*disconnect) (int); 285 void (*disconnect) (int);
325 enum xp_retval (*allocate) (short, int, u32, void **); 286 enum xp_retval (*send) (short, int, u32, void *, u16);
326 enum xp_retval (*send) (short, int, void *); 287 enum xp_retval (*send_notify) (short, int, u32, void *, u16,
327 enum xp_retval (*send_notify) (short, int, void *,
328 xpc_notify_func, void *); 288 xpc_notify_func, void *);
329 void (*received) (short, int, void *); 289 void (*received) (short, int, void *);
330 enum xp_retval (*partid_to_nasids) (short, void *); 290 enum xp_retval (*partid_to_nasids) (short, void *);
@@ -334,10 +294,9 @@ extern struct xpc_interface xpc_interface;
334 294
335extern void xpc_set_interface(void (*)(int), 295extern void xpc_set_interface(void (*)(int),
336 void (*)(int), 296 void (*)(int),
337 enum xp_retval (*)(short, int, u32, void **), 297 enum xp_retval (*)(short, int, u32, void *, u16),
338 enum xp_retval (*)(short, int, void *), 298 enum xp_retval (*)(short, int, u32, void *, u16,
339 enum xp_retval (*)(short, int, void *, 299 xpc_notify_func, void *),
340 xpc_notify_func, void *),
341 void (*)(short, int, void *), 300 void (*)(short, int, void *),
342 enum xp_retval (*)(short, void *)); 301 enum xp_retval (*)(short, void *));
343extern void xpc_clear_interface(void); 302extern void xpc_clear_interface(void);
@@ -347,22 +306,19 @@ extern enum xp_retval xpc_connect(int, xpc_channel_func, void *, u16,
347extern void xpc_disconnect(int); 306extern void xpc_disconnect(int);
348 307
349static inline enum xp_retval 308static inline enum xp_retval
350xpc_allocate(short partid, int ch_number, u32 flags, void **payload) 309xpc_send(short partid, int ch_number, u32 flags, void *payload,
351{ 310 u16 payload_size)
352 return xpc_interface.allocate(partid, ch_number, flags, payload);
353}
354
355static inline enum xp_retval
356xpc_send(short partid, int ch_number, void *payload)
357{ 311{
358 return xpc_interface.send(partid, ch_number, payload); 312 return xpc_interface.send(partid, ch_number, flags, payload,
313 payload_size);
359} 314}
360 315
361static inline enum xp_retval 316static inline enum xp_retval
362xpc_send_notify(short partid, int ch_number, void *payload, 317xpc_send_notify(short partid, int ch_number, u32 flags, void *payload,
363 xpc_notify_func func, void *key) 318 u16 payload_size, xpc_notify_func func, void *key)
364{ 319{
365 return xpc_interface.send_notify(partid, ch_number, payload, func, key); 320 return xpc_interface.send_notify(partid, ch_number, flags, payload,
321 payload_size, func, key);
366} 322}
367 323
368static inline void 324static inline void
@@ -377,8 +333,23 @@ xpc_partid_to_nasids(short partid, void *nasids)
377 return xpc_interface.partid_to_nasids(partid, nasids); 333 return xpc_interface.partid_to_nasids(partid, nasids);
378} 334}
379 335
336extern short xp_max_npartitions;
337extern short xp_partition_id;
338extern u8 xp_region_size;
339
340extern unsigned long (*xp_pa) (void *);
341extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long,
342 size_t);
343extern int (*xp_cpu_to_nasid) (int);
344
380extern u64 xp_nofault_PIOR_target; 345extern u64 xp_nofault_PIOR_target;
381extern int xp_nofault_PIOR(void *); 346extern int xp_nofault_PIOR(void *);
382extern int xp_error_PIOR(void); 347extern int xp_error_PIOR(void);
383 348
349extern struct device *xp;
350extern enum xp_retval xp_init_sn2(void);
351extern enum xp_retval xp_init_uv(void);
352extern void xp_exit_sn2(void);
353extern void xp_exit_uv(void);
354
384#endif /* _DRIVERS_MISC_SGIXP_XP_H */ 355#endif /* _DRIVERS_MISC_SGIXP_XP_H */
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index 196480b691a1..66a1d19e08ad 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -14,29 +14,48 @@
14 * 14 *
15 */ 15 */
16 16
17#include <linux/kernel.h>
18#include <linux/interrupt.h>
19#include <linux/module.h> 17#include <linux/module.h>
20#include <linux/mutex.h> 18#include <linux/device.h>
21#include <asm/sn/intr.h>
22#include <asm/sn/sn_sal.h>
23#include "xp.h" 19#include "xp.h"
24 20
25/* 21/* define the XP debug device structures to be used with dev_dbg() et al */
26 * The export of xp_nofault_PIOR needs to happen here since it is defined 22
27 * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is 23struct device_driver xp_dbg_name = {
28 * defined here. 24 .name = "xp"
29 */ 25};
30EXPORT_SYMBOL_GPL(xp_nofault_PIOR); 26
27struct device xp_dbg_subname = {
28 .bus_id = {0}, /* set to "" */
29 .driver = &xp_dbg_name
30};
31
32struct device *xp = &xp_dbg_subname;
33
34/* max #of partitions possible */
35short xp_max_npartitions;
36EXPORT_SYMBOL_GPL(xp_max_npartitions);
37
38short xp_partition_id;
39EXPORT_SYMBOL_GPL(xp_partition_id);
40
41u8 xp_region_size;
42EXPORT_SYMBOL_GPL(xp_region_size);
43
44unsigned long (*xp_pa) (void *addr);
45EXPORT_SYMBOL_GPL(xp_pa);
46
47enum xp_retval (*xp_remote_memcpy) (unsigned long dst_gpa,
48 const unsigned long src_gpa, size_t len);
49EXPORT_SYMBOL_GPL(xp_remote_memcpy);
31 50
32u64 xp_nofault_PIOR_target; 51int (*xp_cpu_to_nasid) (int cpuid);
33EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target); 52EXPORT_SYMBOL_GPL(xp_cpu_to_nasid);
34 53
35/* 54/*
36 * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level 55 * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
37 * users of XPC. 56 * users of XPC.
38 */ 57 */
39struct xpc_registration xpc_registrations[XPC_NCHANNELS]; 58struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS];
40EXPORT_SYMBOL_GPL(xpc_registrations); 59EXPORT_SYMBOL_GPL(xpc_registrations);
41 60
42/* 61/*
@@ -51,10 +70,9 @@ xpc_notloaded(void)
51struct xpc_interface xpc_interface = { 70struct xpc_interface xpc_interface = {
52 (void (*)(int))xpc_notloaded, 71 (void (*)(int))xpc_notloaded,
53 (void (*)(int))xpc_notloaded, 72 (void (*)(int))xpc_notloaded,
54 (enum xp_retval(*)(short, int, u32, void **))xpc_notloaded, 73 (enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded,
55 (enum xp_retval(*)(short, int, void *))xpc_notloaded, 74 (enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func,
56 (enum xp_retval(*)(short, int, void *, xpc_notify_func, void *)) 75 void *))xpc_notloaded,
57 xpc_notloaded,
58 (void (*)(short, int, void *))xpc_notloaded, 76 (void (*)(short, int, void *))xpc_notloaded,
59 (enum xp_retval(*)(short, void *))xpc_notloaded 77 (enum xp_retval(*)(short, void *))xpc_notloaded
60}; 78};
@@ -66,16 +84,14 @@ EXPORT_SYMBOL_GPL(xpc_interface);
66void 84void
67xpc_set_interface(void (*connect) (int), 85xpc_set_interface(void (*connect) (int),
68 void (*disconnect) (int), 86 void (*disconnect) (int),
69 enum xp_retval (*allocate) (short, int, u32, void **), 87 enum xp_retval (*send) (short, int, u32, void *, u16),
70 enum xp_retval (*send) (short, int, void *), 88 enum xp_retval (*send_notify) (short, int, u32, void *, u16,
71 enum xp_retval (*send_notify) (short, int, void *,
72 xpc_notify_func, void *), 89 xpc_notify_func, void *),
73 void (*received) (short, int, void *), 90 void (*received) (short, int, void *),
74 enum xp_retval (*partid_to_nasids) (short, void *)) 91 enum xp_retval (*partid_to_nasids) (short, void *))
75{ 92{
76 xpc_interface.connect = connect; 93 xpc_interface.connect = connect;
77 xpc_interface.disconnect = disconnect; 94 xpc_interface.disconnect = disconnect;
78 xpc_interface.allocate = allocate;
79 xpc_interface.send = send; 95 xpc_interface.send = send;
80 xpc_interface.send_notify = send_notify; 96 xpc_interface.send_notify = send_notify;
81 xpc_interface.received = received; 97 xpc_interface.received = received;
@@ -91,13 +107,11 @@ xpc_clear_interface(void)
91{ 107{
92 xpc_interface.connect = (void (*)(int))xpc_notloaded; 108 xpc_interface.connect = (void (*)(int))xpc_notloaded;
93 xpc_interface.disconnect = (void (*)(int))xpc_notloaded; 109 xpc_interface.disconnect = (void (*)(int))xpc_notloaded;
94 xpc_interface.allocate = (enum xp_retval(*)(short, int, u32, 110 xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16))
95 void **))xpc_notloaded;
96 xpc_interface.send = (enum xp_retval(*)(short, int, void *))
97 xpc_notloaded; 111 xpc_notloaded;
98 xpc_interface.send_notify = (enum xp_retval(*)(short, int, void *, 112 xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *,
99 xpc_notify_func, 113 u16, xpc_notify_func,
100 void *))xpc_notloaded; 114 void *))xpc_notloaded;
101 xpc_interface.received = (void (*)(short, int, void *)) 115 xpc_interface.received = (void (*)(short, int, void *))
102 xpc_notloaded; 116 xpc_notloaded;
103 xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *)) 117 xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *))
@@ -135,11 +149,14 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
135{ 149{
136 struct xpc_registration *registration; 150 struct xpc_registration *registration;
137 151
138 DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); 152 DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
139 DBUG_ON(payload_size == 0 || nentries == 0); 153 DBUG_ON(payload_size == 0 || nentries == 0);
140 DBUG_ON(func == NULL); 154 DBUG_ON(func == NULL);
141 DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit); 155 DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
142 156
157 if (XPC_MSG_SIZE(payload_size) > XPC_MSG_MAX_SIZE)
158 return xpPayloadTooBig;
159
143 registration = &xpc_registrations[ch_number]; 160 registration = &xpc_registrations[ch_number];
144 161
145 if (mutex_lock_interruptible(&registration->mutex) != 0) 162 if (mutex_lock_interruptible(&registration->mutex) != 0)
@@ -152,7 +169,7 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
152 } 169 }
153 170
154 /* register the channel for connection */ 171 /* register the channel for connection */
155 registration->msg_size = XPC_MSG_SIZE(payload_size); 172 registration->entry_size = XPC_MSG_SIZE(payload_size);
156 registration->nentries = nentries; 173 registration->nentries = nentries;
157 registration->assigned_limit = assigned_limit; 174 registration->assigned_limit = assigned_limit;
158 registration->idle_limit = idle_limit; 175 registration->idle_limit = idle_limit;
@@ -185,7 +202,7 @@ xpc_disconnect(int ch_number)
185{ 202{
186 struct xpc_registration *registration; 203 struct xpc_registration *registration;
187 204
188 DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); 205 DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
189 206
190 registration = &xpc_registrations[ch_number]; 207 registration = &xpc_registrations[ch_number];
191 208
@@ -206,7 +223,7 @@ xpc_disconnect(int ch_number)
206 registration->func = NULL; 223 registration->func = NULL;
207 registration->key = NULL; 224 registration->key = NULL;
208 registration->nentries = 0; 225 registration->nentries = 0;
209 registration->msg_size = 0; 226 registration->entry_size = 0;
210 registration->assigned_limit = 0; 227 registration->assigned_limit = 0;
211 registration->idle_limit = 0; 228 registration->idle_limit = 0;
212 229
@@ -221,39 +238,21 @@ EXPORT_SYMBOL_GPL(xpc_disconnect);
221int __init 238int __init
222xp_init(void) 239xp_init(void)
223{ 240{
224 int ret, ch_number; 241 enum xp_retval ret;
225 u64 func_addr = *(u64 *)xp_nofault_PIOR; 242 int ch_number;
226 u64 err_func_addr = *(u64 *)xp_error_PIOR;
227
228 if (!ia64_platform_is("sn2"))
229 return -ENODEV;
230 243
231 /* 244 if (is_shub())
232 * Register a nofault code region which performs a cross-partition 245 ret = xp_init_sn2();
233 * PIO read. If the PIO read times out, the MCA handler will consume 246 else if (is_uv())
234 * the error and return to a kernel-provided instruction to indicate 247 ret = xp_init_uv();
235 * an error. This PIO read exists because it is guaranteed to timeout
236 * if the destination is down (AMO operations do not timeout on at
237 * least some CPUs on Shubs <= v1.2, which unfortunately we have to
238 * work around).
239 */
240 ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr,
241 1, 1);
242 if (ret != 0) {
243 printk(KERN_ERR "XP: can't register nofault code, error=%d\n",
244 ret);
245 }
246 /*
247 * Setup the nofault PIO read target. (There is no special reason why
248 * SH_IPI_ACCESS was selected.)
249 */
250 if (is_shub2())
251 xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
252 else 248 else
253 xp_nofault_PIOR_target = SH1_IPI_ACCESS; 249 ret = xpUnsupported;
250
251 if (ret != xpSuccess)
252 return -ENODEV;
254 253
255 /* initialize the connection registration mutex */ 254 /* initialize the connection registration mutex */
256 for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) 255 for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++)
257 mutex_init(&xpc_registrations[ch_number].mutex); 256 mutex_init(&xpc_registrations[ch_number].mutex);
258 257
259 return 0; 258 return 0;
@@ -264,12 +263,10 @@ module_init(xp_init);
264void __exit 263void __exit
265xp_exit(void) 264xp_exit(void)
266{ 265{
267 u64 func_addr = *(u64 *)xp_nofault_PIOR; 266 if (is_shub())
268 u64 err_func_addr = *(u64 *)xp_error_PIOR; 267 xp_exit_sn2();
269 268 else if (is_uv())
270 /* unregister the PIO read nofault code region */ 269 xp_exit_uv();
271 (void)sn_register_nofault_code(func_addr, err_func_addr,
272 err_func_addr, 1, 0);
273} 270}
274 271
275module_exit(xp_exit); 272module_exit(xp_exit);
diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c
new file mode 100644
index 000000000000..1440134caf31
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_sn2.c
@@ -0,0 +1,146 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9/*
10 * Cross Partition (XP) sn2-based functions.
11 *
12 * Architecture specific implementation of common functions.
13 */
14
15#include <linux/module.h>
16#include <linux/device.h>
17#include <asm/sn/bte.h>
18#include <asm/sn/sn_sal.h>
19#include "xp.h"
20
21/*
22 * The export of xp_nofault_PIOR needs to happen here since it is defined
23 * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is
24 * defined here.
25 */
26EXPORT_SYMBOL_GPL(xp_nofault_PIOR);
27
28u64 xp_nofault_PIOR_target;
29EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target);
30
31/*
32 * Register a nofault code region which performs a cross-partition PIO read.
33 * If the PIO read times out, the MCA handler will consume the error and
34 * return to a kernel-provided instruction to indicate an error. This PIO read
35 * exists because it is guaranteed to timeout if the destination is down
36 * (amo operations do not timeout on at least some CPUs on Shubs <= v1.2,
37 * which unfortunately we have to work around).
38 */
39static enum xp_retval
40xp_register_nofault_code_sn2(void)
41{
42 int ret;
43 u64 func_addr;
44 u64 err_func_addr;
45
46 func_addr = *(u64 *)xp_nofault_PIOR;
47 err_func_addr = *(u64 *)xp_error_PIOR;
48 ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr,
49 1, 1);
50 if (ret != 0) {
51 dev_err(xp, "can't register nofault code, error=%d\n", ret);
52 return xpSalError;
53 }
54 /*
55 * Setup the nofault PIO read target. (There is no special reason why
56 * SH_IPI_ACCESS was selected.)
57 */
58 if (is_shub1())
59 xp_nofault_PIOR_target = SH1_IPI_ACCESS;
60 else if (is_shub2())
61 xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
62
63 return xpSuccess;
64}
65
66static void
67xp_unregister_nofault_code_sn2(void)
68{
69 u64 func_addr = *(u64 *)xp_nofault_PIOR;
70 u64 err_func_addr = *(u64 *)xp_error_PIOR;
71
72 /* unregister the PIO read nofault code region */
73 (void)sn_register_nofault_code(func_addr, err_func_addr,
74 err_func_addr, 1, 0);
75}
76
77/*
78 * Convert a virtual memory address to a physical memory address.
79 */
80static unsigned long
81xp_pa_sn2(void *addr)
82{
83 return __pa(addr);
84}
85
86/*
87 * Wrapper for bte_copy().
88 *
89 * dst_pa - physical address of the destination of the transfer.
90 * src_pa - physical address of the source of the transfer.
91 * len - number of bytes to transfer from source to destination.
92 *
93 * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock.
94 */
95static enum xp_retval
96xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa,
97 size_t len)
98{
99 bte_result_t ret;
100
101 ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
102 if (ret == BTE_SUCCESS)
103 return xpSuccess;
104
105 if (is_shub2()) {
106 dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa="
107 "0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa,
108 src_pa, len);
109 } else {
110 dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx "
111 "src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len);
112 }
113
114 return xpBteCopyError;
115}
116
117static int
118xp_cpu_to_nasid_sn2(int cpuid)
119{
120 return cpuid_to_nasid(cpuid);
121}
122
123enum xp_retval
124xp_init_sn2(void)
125{
126 BUG_ON(!is_shub());
127
128 xp_max_npartitions = XP_MAX_NPARTITIONS_SN2;
129 xp_partition_id = sn_partition_id;
130 xp_region_size = sn_region_size;
131
132 xp_pa = xp_pa_sn2;
133 xp_remote_memcpy = xp_remote_memcpy_sn2;
134 xp_cpu_to_nasid = xp_cpu_to_nasid_sn2;
135
136 return xp_register_nofault_code_sn2();
137}
138
139void
140xp_exit_sn2(void)
141{
142 BUG_ON(!is_shub());
143
144 xp_unregister_nofault_code_sn2();
145}
146
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
new file mode 100644
index 000000000000..d9f7ce2510bc
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -0,0 +1,72 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9/*
10 * Cross Partition (XP) uv-based functions.
11 *
12 * Architecture specific implementation of common functions.
13 *
14 */
15
16#include <linux/device.h>
17#include <asm/uv/uv_hub.h>
18#include "../sgi-gru/grukservices.h"
19#include "xp.h"
20
21/*
22 * Convert a virtual memory address to a physical memory address.
23 */
24static unsigned long
25xp_pa_uv(void *addr)
26{
27 return uv_gpa(addr);
28}
29
30static enum xp_retval
31xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa,
32 size_t len)
33{
34 int ret;
35
36 ret = gru_copy_gpa(dst_gpa, src_gpa, len);
37 if (ret == 0)
38 return xpSuccess;
39
40 dev_err(xp, "gru_copy_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
41 "len=%ld\n", dst_gpa, src_gpa, len);
42 return xpGruCopyError;
43}
44
45static int
46xp_cpu_to_nasid_uv(int cpuid)
47{
48 /* ??? Is this same as sn2 nasid in mach/part bitmaps set up by SAL? */
49 return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid));
50}
51
52enum xp_retval
53xp_init_uv(void)
54{
55 BUG_ON(!is_uv());
56
57 xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
58 xp_partition_id = 0; /* !!! not correct value */
59 xp_region_size = 0; /* !!! not correct value */
60
61 xp_pa = xp_pa_uv;
62 xp_remote_memcpy = xp_remote_memcpy_uv;
63 xp_cpu_to_nasid = xp_cpu_to_nasid_uv;
64
65 return xpSuccess;
66}
67
68void
69xp_exit_uv(void)
70{
71 BUG_ON(!is_uv());
72}
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 11ac267ed68f..619208d61862 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -13,18 +13,10 @@
13#ifndef _DRIVERS_MISC_SGIXP_XPC_H 13#ifndef _DRIVERS_MISC_SGIXP_XPC_H
14#define _DRIVERS_MISC_SGIXP_XPC_H 14#define _DRIVERS_MISC_SGIXP_XPC_H
15 15
16#include <linux/interrupt.h> 16#include <linux/wait.h>
17#include <linux/sysctl.h>
18#include <linux/device.h>
19#include <linux/mutex.h>
20#include <linux/completion.h> 17#include <linux/completion.h>
21#include <asm/pgtable.h> 18#include <linux/timer.h>
22#include <asm/processor.h> 19#include <linux/sched.h>
23#include <asm/sn/bte.h>
24#include <asm/sn/clksupport.h>
25#include <asm/sn/addrs.h>
26#include <asm/sn/mspec.h>
27#include <asm/sn/shub_mmr.h>
28#include "xp.h" 20#include "xp.h"
29 21
30/* 22/*
@@ -36,23 +28,7 @@
36#define XPC_VERSION_MAJOR(_v) ((_v) >> 4) 28#define XPC_VERSION_MAJOR(_v) ((_v) >> 4)
37#define XPC_VERSION_MINOR(_v) ((_v) & 0xf) 29#define XPC_VERSION_MINOR(_v) ((_v) & 0xf)
38 30
39/* 31/* define frequency of the heartbeat and frequency how often it's checked */
40 * The next macros define word or bit representations for given
41 * C-brick nasid in either the SAL provided bit array representing
42 * nasids in the partition/machine or the AMO_t array used for
43 * inter-partition initiation communications.
44 *
45 * For SN2 machines, C-Bricks are alway even numbered NASIDs. As
46 * such, some space will be saved by insisting that nasid information
47 * passed from SAL always be packed for C-Bricks and the
48 * cross-partition interrupts use the same packing scheme.
49 */
50#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2)
51#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1))
52#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \
53 (1UL << XPC_NASID_B_INDEX(_n)))
54#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
55
56#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */ 32#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
57#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */ 33#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */
58 34
@@ -72,11 +48,11 @@
72 * 48 *
73 * reserved page header 49 * reserved page header
74 * 50 *
75 * The first cacheline of the reserved page contains the header 51 * The first two 64-byte cachelines of the reserved page contain the
76 * (struct xpc_rsvd_page). Before SAL initialization has completed, 52 * header (struct xpc_rsvd_page). Before SAL initialization has completed,
77 * SAL has set up the following fields of the reserved page header: 53 * SAL has set up the following fields of the reserved page header:
78 * SAL_signature, SAL_version, partid, and nasids_size. The other 54 * SAL_signature, SAL_version, SAL_partid, and SAL_nasids_size. The
79 * fields are set up by XPC. (xpc_rsvd_page points to the local 55 * other fields are set up by XPC. (xpc_rsvd_page points to the local
80 * partition's reserved page.) 56 * partition's reserved page.)
81 * 57 *
82 * part_nasids mask 58 * part_nasids mask
@@ -87,14 +63,16 @@
87 * the actual nasids in the entire machine (mach_nasids). We're only 63 * the actual nasids in the entire machine (mach_nasids). We're only
88 * interested in the even numbered nasids (which contain the processors 64 * interested in the even numbered nasids (which contain the processors
89 * and/or memory), so we only need half as many bits to represent the 65 * and/or memory), so we only need half as many bits to represent the
90 * nasids. The part_nasids mask is located starting at the first cacheline 66 * nasids. When mapping nasid to bit in a mask (or bit to nasid) be sure
91 * following the reserved page header. The mach_nasids mask follows right 67 * to either divide or multiply by 2. The part_nasids mask is located
92 * after the part_nasids mask. The size in bytes of each mask is reflected 68 * starting at the first cacheline following the reserved page header. The
93 * by the reserved page header field 'nasids_size'. (Local partition's 69 * mach_nasids mask follows right after the part_nasids mask. The size in
94 * mask pointers are xpc_part_nasids and xpc_mach_nasids.) 70 * bytes of each mask is reflected by the reserved page header field
71 * 'SAL_nasids_size'. (Local partition's mask pointers are xpc_part_nasids
72 * and xpc_mach_nasids.)
95 * 73 *
96 * vars 74 * vars (ia64-sn2 only)
97 * vars part 75 * vars part (ia64-sn2 only)
98 * 76 *
99 * Immediately following the mach_nasids mask are the XPC variables 77 * Immediately following the mach_nasids mask are the XPC variables
100 * required by other partitions. First are those that are generic to all 78 * required by other partitions. First are those that are generic to all
@@ -102,43 +80,26 @@
102 * which are partition specific (vars part). These are setup by XPC. 80 * which are partition specific (vars part). These are setup by XPC.
103 * (Local partition's vars pointers are xpc_vars and xpc_vars_part.) 81 * (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
104 * 82 *
105 * Note: Until vars_pa is set, the partition XPC code has not been initialized. 83 * Note: Until 'ts_jiffies' is set non-zero, the partition XPC code has not been
84 * initialized.
106 */ 85 */
107struct xpc_rsvd_page { 86struct xpc_rsvd_page {
108 u64 SAL_signature; /* SAL: unique signature */ 87 u64 SAL_signature; /* SAL: unique signature */
109 u64 SAL_version; /* SAL: version */ 88 u64 SAL_version; /* SAL: version */
110 u8 partid; /* SAL: partition ID */ 89 short SAL_partid; /* SAL: partition ID */
90 short max_npartitions; /* value of XPC_MAX_PARTITIONS */
111 u8 version; 91 u8 version;
112 u8 pad1[6]; /* align to next u64 in cacheline */ 92 u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */
113 u64 vars_pa; /* physical address of struct xpc_vars */ 93 union {
114 struct timespec stamp; /* time when reserved page was setup by XPC */ 94 unsigned long vars_pa; /* phys address of struct xpc_vars */
115 u64 pad2[9]; /* align to last u64 in cacheline */ 95 unsigned long activate_mq_gpa; /* gru phy addr of activate_mq */
116 u64 nasids_size; /* SAL: size of each nasid mask in bytes */ 96 } sn;
97 unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
98 u64 pad2[10]; /* align to last u64 in 2nd 64-byte cacheline */
99 u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */
117}; 100};
118 101
119#define XPC_RP_VERSION _XPC_VERSION(1, 1) /* version 1.1 of the reserved page */ 102#define XPC_RP_VERSION _XPC_VERSION(2, 0) /* version 2.0 of the reserved page */
120
121#define XPC_SUPPORTS_RP_STAMP(_version) \
122 (_version >= _XPC_VERSION(1, 1))
123
124/*
125 * compare stamps - the return value is:
126 *
127 * < 0, if stamp1 < stamp2
128 * = 0, if stamp1 == stamp2
129 * > 0, if stamp1 > stamp2
130 */
131static inline int
132xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
133{
134 int ret;
135
136 ret = stamp1->tv_sec - stamp2->tv_sec;
137 if (ret == 0)
138 ret = stamp1->tv_nsec - stamp2->tv_nsec;
139
140 return ret;
141}
142 103
143/* 104/*
144 * Define the structures by which XPC variables can be exported to other 105 * Define the structures by which XPC variables can be exported to other
@@ -154,85 +115,40 @@ xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
154 * reflected by incrementing either the major or minor version numbers 115 * reflected by incrementing either the major or minor version numbers
155 * of struct xpc_vars. 116 * of struct xpc_vars.
156 */ 117 */
157struct xpc_vars { 118struct xpc_vars_sn2 {
158 u8 version; 119 u8 version;
159 u64 heartbeat; 120 u64 heartbeat;
160 u64 heartbeating_to_mask; 121 DECLARE_BITMAP(heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
161 u64 heartbeat_offline; /* if 0, heartbeat should be changing */ 122 u64 heartbeat_offline; /* if 0, heartbeat should be changing */
162 int act_nasid; 123 int activate_IRQ_nasid;
163 int act_phys_cpuid; 124 int activate_IRQ_phys_cpuid;
164 u64 vars_part_pa; 125 unsigned long vars_part_pa;
165 u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */ 126 unsigned long amos_page_pa;/* paddr of page of amos from MSPEC driver */
166 AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */ 127 struct amo *amos_page; /* vaddr of page of amos from MSPEC driver */
167}; 128};
168 129
169#define XPC_V_VERSION _XPC_VERSION(3, 1) /* version 3.1 of the cross vars */ 130#define XPC_V_VERSION _XPC_VERSION(3, 1) /* version 3.1 of the cross vars */
170 131
171#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
172 (_version >= _XPC_VERSION(3, 1))
173
174static inline int
175xpc_hb_allowed(short partid, struct xpc_vars *vars)
176{
177 return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
178}
179
180static inline void
181xpc_allow_hb(short partid, struct xpc_vars *vars)
182{
183 u64 old_mask, new_mask;
184
185 do {
186 old_mask = vars->heartbeating_to_mask;
187 new_mask = (old_mask | (1UL << partid));
188 } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
189 old_mask);
190}
191
192static inline void
193xpc_disallow_hb(short partid, struct xpc_vars *vars)
194{
195 u64 old_mask, new_mask;
196
197 do {
198 old_mask = vars->heartbeating_to_mask;
199 new_mask = (old_mask & ~(1UL << partid));
200 } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
201 old_mask);
202}
203
204/*
205 * The AMOs page consists of a number of AMO variables which are divided into
206 * four groups, The first two groups are used to identify an IRQ's sender.
207 * These two groups consist of 64 and 128 AMO variables respectively. The last
208 * two groups, consisting of just one AMO variable each, are used to identify
209 * the remote partitions that are currently engaged (from the viewpoint of
210 * the XPC running on the remote partition).
211 */
212#define XPC_NOTIFY_IRQ_AMOS 0
213#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
214#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
215#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1)
216
217/* 132/*
218 * The following structure describes the per partition specific variables. 133 * The following structure describes the per partition specific variables.
219 * 134 *
220 * An array of these structures, one per partition, will be defined. As a 135 * An array of these structures, one per partition, will be defined. As a
221 * partition becomes active XPC will copy the array entry corresponding to 136 * partition becomes active XPC will copy the array entry corresponding to
222 * itself from that partition. It is desirable that the size of this 137 * itself from that partition. It is desirable that the size of this structure
223 * structure evenly divide into a cacheline, such that none of the entries 138 * evenly divides into a 128-byte cacheline, such that none of the entries in
224 * in this array crosses a cacheline boundary. As it is now, each entry 139 * this array crosses a 128-byte cacheline boundary. As it is now, each entry
225 * occupies half a cacheline. 140 * occupies 64-bytes.
226 */ 141 */
227struct xpc_vars_part { 142struct xpc_vars_part_sn2 {
228 u64 magic; 143 u64 magic;
229 144
230 u64 openclose_args_pa; /* physical address of open and close args */ 145 unsigned long openclose_args_pa; /* phys addr of open and close args */
231 u64 GPs_pa; /* physical address of Get/Put values */ 146 unsigned long GPs_pa; /* physical address of Get/Put values */
147
148 unsigned long chctl_amo_pa; /* physical address of chctl flags' amo */
232 149
233 u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */ 150 int notify_IRQ_nasid; /* nasid of where to send notify IRQs */
234 int IPI_nasid; /* nasid of where to send IPIs */ 151 int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */
235 int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */
236 152
237 u8 nchannels; /* #of defined channels supported */ 153 u8 nchannels; /* #of defined channels supported */
238 154
@@ -248,20 +164,95 @@ struct xpc_vars_part {
248 * MAGIC2 indicates that this partition has pulled the remote partititions 164 * MAGIC2 indicates that this partition has pulled the remote partititions
249 * per partition variables that pertain to this partition. 165 * per partition variables that pertain to this partition.
250 */ 166 */
251#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */ 167#define XPC_VP_MAGIC1_SN2 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
252#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ 168#define XPC_VP_MAGIC2_SN2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
253 169
254/* the reserved page sizes and offsets */ 170/* the reserved page sizes and offsets */
255 171
256#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)) 172#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
257#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars)) 173#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2))
258 174
259#define XPC_RP_PART_NASIDS(_rp) ((u64 *)((u8 *)(_rp) + XPC_RP_HEADER_SIZE)) 175#define XPC_RP_PART_NASIDS(_rp) ((unsigned long *)((u8 *)(_rp) + \
260#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words) 176 XPC_RP_HEADER_SIZE))
261#define XPC_RP_VARS(_rp) ((struct xpc_vars *)(XPC_RP_MACH_NASIDS(_rp) + \ 177#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \
262 xp_nasid_mask_words)) 178 xpc_nasid_mask_nlongs)
263#define XPC_RP_VARS_PART(_rp) ((struct xpc_vars_part *) \ 179#define XPC_RP_VARS(_rp) ((struct xpc_vars_sn2 *) \
264 ((u8 *)XPC_RP_VARS(_rp) + XPC_RP_VARS_SIZE)) 180 (XPC_RP_MACH_NASIDS(_rp) + \
181 xpc_nasid_mask_nlongs))
182
183/*
184 * The activate_mq is used to send/receive GRU messages that affect XPC's
185 * heartbeat, partition active state, and channel state. This is UV only.
186 */
187struct xpc_activate_mq_msghdr_uv {
188 short partid; /* sender's partid */
189 u8 act_state; /* sender's act_state at time msg sent */
190 u8 type; /* message's type */
191 unsigned long rp_ts_jiffies; /* timestamp of sender's rp setup by XPC */
192};
193
194/* activate_mq defined message types */
195#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0
196#define XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV 1
197#define XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV 2
198#define XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV 3
199
200#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 4
201#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 5
202
203#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 6
204#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 7
205#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 8
206#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 9
207
208#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 10
209#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 11
210
211struct xpc_activate_mq_msg_uv {
212 struct xpc_activate_mq_msghdr_uv hdr;
213};
214
215struct xpc_activate_mq_msg_heartbeat_req_uv {
216 struct xpc_activate_mq_msghdr_uv hdr;
217 u64 heartbeat;
218};
219
220struct xpc_activate_mq_msg_activate_req_uv {
221 struct xpc_activate_mq_msghdr_uv hdr;
222 unsigned long rp_gpa;
223 unsigned long activate_mq_gpa;
224};
225
226struct xpc_activate_mq_msg_deactivate_req_uv {
227 struct xpc_activate_mq_msghdr_uv hdr;
228 enum xp_retval reason;
229};
230
231struct xpc_activate_mq_msg_chctl_closerequest_uv {
232 struct xpc_activate_mq_msghdr_uv hdr;
233 short ch_number;
234 enum xp_retval reason;
235};
236
237struct xpc_activate_mq_msg_chctl_closereply_uv {
238 struct xpc_activate_mq_msghdr_uv hdr;
239 short ch_number;
240};
241
242struct xpc_activate_mq_msg_chctl_openrequest_uv {
243 struct xpc_activate_mq_msghdr_uv hdr;
244 short ch_number;
245 short entry_size; /* size of notify_mq's GRU messages */
246 short local_nentries; /* ??? Is this needed? What is? */
247};
248
249struct xpc_activate_mq_msg_chctl_openreply_uv {
250 struct xpc_activate_mq_msghdr_uv hdr;
251 short ch_number;
252 short remote_nentries; /* ??? Is this needed? What is? */
253 short local_nentries; /* ??? Is this needed? What is? */
254 unsigned long local_notify_mq_gpa;
255};
265 256
266/* 257/*
267 * Functions registered by add_timer() or called by kernel_thread() only 258 * Functions registered by add_timer() or called by kernel_thread() only
@@ -270,22 +261,22 @@ struct xpc_vars_part {
270 * the passed argument. 261 * the passed argument.
271 */ 262 */
272#define XPC_PACK_ARGS(_arg1, _arg2) \ 263#define XPC_PACK_ARGS(_arg1, _arg2) \
273 ((((u64) _arg1) & 0xffffffff) | \ 264 ((((u64)_arg1) & 0xffffffff) | \
274 ((((u64) _arg2) & 0xffffffff) << 32)) 265 ((((u64)_arg2) & 0xffffffff) << 32))
275 266
276#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff) 267#define XPC_UNPACK_ARG1(_args) (((u64)_args) & 0xffffffff)
277#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff) 268#define XPC_UNPACK_ARG2(_args) ((((u64)_args) >> 32) & 0xffffffff)
278 269
279/* 270/*
280 * Define a Get/Put value pair (pointers) used with a message queue. 271 * Define a Get/Put value pair (pointers) used with a message queue.
281 */ 272 */
282struct xpc_gp { 273struct xpc_gp_sn2 {
283 s64 get; /* Get value */ 274 s64 get; /* Get value */
284 s64 put; /* Put value */ 275 s64 put; /* Put value */
285}; 276};
286 277
287#define XPC_GP_SIZE \ 278#define XPC_GP_SIZE \
288 L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS) 279 L1_CACHE_ALIGN(sizeof(struct xpc_gp_sn2) * XPC_MAX_NCHANNELS)
289 280
290/* 281/*
291 * Define a structure that contains arguments associated with opening and 282 * Define a structure that contains arguments associated with opening and
@@ -293,31 +284,89 @@ struct xpc_gp {
293 */ 284 */
294struct xpc_openclose_args { 285struct xpc_openclose_args {
295 u16 reason; /* reason why channel is closing */ 286 u16 reason; /* reason why channel is closing */
296 u16 msg_size; /* sizeof each message entry */ 287 u16 entry_size; /* sizeof each message entry */
297 u16 remote_nentries; /* #of message entries in remote msg queue */ 288 u16 remote_nentries; /* #of message entries in remote msg queue */
298 u16 local_nentries; /* #of message entries in local msg queue */ 289 u16 local_nentries; /* #of message entries in local msg queue */
299 u64 local_msgqueue_pa; /* physical address of local message queue */ 290 unsigned long local_msgqueue_pa; /* phys addr of local message queue */
300}; 291};
301 292
302#define XPC_OPENCLOSE_ARGS_SIZE \ 293#define XPC_OPENCLOSE_ARGS_SIZE \
303 L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS) 294 L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * \
295 XPC_MAX_NCHANNELS)
304 296
305/* struct xpc_msg flags */
306 297
307#define XPC_M_DONE 0x01 /* msg has been received/consumed */ 298/*
308#define XPC_M_READY 0x02 /* msg is ready to be sent */ 299 * Structures to define a fifo singly-linked list.
309#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */ 300 */
310 301
311#define XPC_MSG_ADDRESS(_payload) \ 302struct xpc_fifo_entry_uv {
312 ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET)) 303 struct xpc_fifo_entry_uv *next;
304};
305
306struct xpc_fifo_head_uv {
307 struct xpc_fifo_entry_uv *first;
308 struct xpc_fifo_entry_uv *last;
309 spinlock_t lock;
310 int n_entries;
311};
313 312
314/* 313/*
315 * Defines notify entry. 314 * Define a sn2 styled message.
315 *
316 * A user-defined message resides in the payload area. The max size of the
317 * payload is defined by the user via xpc_connect().
318 *
319 * The size of a message entry (within a message queue) must be a 128-byte
320 * cacheline sized multiple in order to facilitate the BTE transfer of messages
321 * from one message queue to another.
322 */
323struct xpc_msg_sn2 {
324 u8 flags; /* FOR XPC INTERNAL USE ONLY */
325 u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */
326 s64 number; /* FOR XPC INTERNAL USE ONLY */
327
328 u64 payload; /* user defined portion of message */
329};
330
331/* struct xpc_msg_sn2 flags */
332
333#define XPC_M_SN2_DONE 0x01 /* msg has been received/consumed */
334#define XPC_M_SN2_READY 0x02 /* msg is ready to be sent */
335#define XPC_M_SN2_INTERRUPT 0x04 /* send interrupt when msg consumed */
336
337/*
338 * The format of a uv XPC notify_mq GRU message is as follows:
339 *
340 * A user-defined message resides in the payload area. The max size of the
341 * payload is defined by the user via xpc_connect().
342 *
343 * The size of a message (payload and header) sent via the GRU must be either 1
344 * or 2 GRU_CACHE_LINE_BYTES in length.
345 */
346
347struct xpc_notify_mq_msghdr_uv {
348 union {
349 unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
350 struct xpc_fifo_entry_uv next; /* FOR XPC INTERNAL USE ONLY */
351 } u;
352 short partid; /* FOR XPC INTERNAL USE ONLY */
353 u8 ch_number; /* FOR XPC INTERNAL USE ONLY */
354 u8 size; /* FOR XPC INTERNAL USE ONLY */
355 unsigned int msg_slot_number; /* FOR XPC INTERNAL USE ONLY */
356};
357
358struct xpc_notify_mq_msg_uv {
359 struct xpc_notify_mq_msghdr_uv hdr;
360 unsigned long payload;
361};
362
363/*
364 * Define sn2's notify entry.
316 * 365 *
317 * This is used to notify a message's sender that their message was received 366 * This is used to notify a message's sender that their message was received
318 * and consumed by the intended recipient. 367 * and consumed by the intended recipient.
319 */ 368 */
320struct xpc_notify { 369struct xpc_notify_sn2 {
321 u8 type; /* type of notification */ 370 u8 type; /* type of notification */
322 371
323 /* the following two fields are only used if type == XPC_N_CALL */ 372 /* the following two fields are only used if type == XPC_N_CALL */
@@ -325,9 +374,20 @@ struct xpc_notify {
325 void *key; /* pointer to user's key */ 374 void *key; /* pointer to user's key */
326}; 375};
327 376
328/* struct xpc_notify type of notification */ 377/* struct xpc_notify_sn2 type of notification */
378
379#define XPC_N_CALL 0x01 /* notify function provided by user */
329 380
330#define XPC_N_CALL 0x01 /* notify function provided by user */ 381/*
382 * Define uv's version of the notify entry. It additionally is used to allocate
383 * a msg slot on the remote partition into which is copied a sent message.
384 */
385struct xpc_send_msg_slot_uv {
386 struct xpc_fifo_entry_uv next;
387 unsigned int msg_slot_number;
388 xpc_notify_func func; /* user's notify function */
389 void *key; /* pointer to user's key */
390};
331 391
332/* 392/*
333 * Define the structure that manages all the stuff required by a channel. In 393 * Define the structure that manages all the stuff required by a channel. In
@@ -339,8 +399,12 @@ struct xpc_notify {
339 * There is an array of these structures for each remote partition. It is 399 * There is an array of these structures for each remote partition. It is
340 * allocated at the time a partition becomes active. The array contains one 400 * allocated at the time a partition becomes active. The array contains one
341 * of these structures for each potential channel connection to that partition. 401 * of these structures for each potential channel connection to that partition.
402 */
403
404/*
405 * The following is sn2 only.
342 * 406 *
343 * Each of these structures manages two message queues (circular buffers). 407 * Each channel structure manages two message queues (circular buffers).
344 * They are allocated at the time a channel connection is made. One of 408 * They are allocated at the time a channel connection is made. One of
345 * these message queues (local_msgqueue) holds the locally created messages 409 * these message queues (local_msgqueue) holds the locally created messages
346 * that are destined for the remote partition. The other of these message 410 * that are destined for the remote partition. The other of these message
@@ -407,58 +471,72 @@ struct xpc_notify {
407 * new messages, by the clearing of the message flags of the acknowledged 471 * new messages, by the clearing of the message flags of the acknowledged
408 * messages. 472 * messages.
409 */ 473 */
474
475struct xpc_channel_sn2 {
476 struct xpc_openclose_args *local_openclose_args; /* args passed on */
477 /* opening or closing of channel */
478
479 void *local_msgqueue_base; /* base address of kmalloc'd space */
480 struct xpc_msg_sn2 *local_msgqueue; /* local message queue */
481 void *remote_msgqueue_base; /* base address of kmalloc'd space */
482 struct xpc_msg_sn2 *remote_msgqueue; /* cached copy of remote */
483 /* partition's local message queue */
484 unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */
485 /* local message queue */
486
487 struct xpc_notify_sn2 *notify_queue;/* notify queue for messages sent */
488
489 /* various flavors of local and remote Get/Put values */
490
491 struct xpc_gp_sn2 *local_GP; /* local Get/Put values */
492 struct xpc_gp_sn2 remote_GP; /* remote Get/Put values */
493 struct xpc_gp_sn2 w_local_GP; /* working local Get/Put values */
494 struct xpc_gp_sn2 w_remote_GP; /* working remote Get/Put values */
495 s64 next_msg_to_pull; /* Put value of next msg to pull */
496
497 struct mutex msg_to_pull_mutex; /* next msg to pull serialization */
498};
499
500struct xpc_channel_uv {
501 unsigned long remote_notify_mq_gpa; /* gru phys address of remote */
502 /* partition's notify mq */
503
504 struct xpc_send_msg_slot_uv *send_msg_slots;
505 struct xpc_notify_mq_msg_uv *recv_msg_slots;
506
507 struct xpc_fifo_head_uv msg_slot_free_list;
508 struct xpc_fifo_head_uv recv_msg_list; /* deliverable payloads */
509};
510
410struct xpc_channel { 511struct xpc_channel {
411 short partid; /* ID of remote partition connected */ 512 short partid; /* ID of remote partition connected */
412 spinlock_t lock; /* lock for updating this structure */ 513 spinlock_t lock; /* lock for updating this structure */
413 u32 flags; /* general flags */ 514 unsigned int flags; /* general flags */
414 515
415 enum xp_retval reason; /* reason why channel is disconnect'g */ 516 enum xp_retval reason; /* reason why channel is disconnect'g */
416 int reason_line; /* line# disconnect initiated from */ 517 int reason_line; /* line# disconnect initiated from */
417 518
418 u16 number; /* channel # */ 519 u16 number; /* channel # */
419 520
420 u16 msg_size; /* sizeof each msg entry */ 521 u16 entry_size; /* sizeof each msg entry */
421 u16 local_nentries; /* #of msg entries in local msg queue */ 522 u16 local_nentries; /* #of msg entries in local msg queue */
422 u16 remote_nentries; /* #of msg entries in remote msg queue */ 523 u16 remote_nentries; /* #of msg entries in remote msg queue */
423 524
424 void *local_msgqueue_base; /* base address of kmalloc'd space */
425 struct xpc_msg *local_msgqueue; /* local message queue */
426 void *remote_msgqueue_base; /* base address of kmalloc'd space */
427 struct xpc_msg *remote_msgqueue; /* cached copy of remote partition's */
428 /* local message queue */
429 u64 remote_msgqueue_pa; /* phys addr of remote partition's */
430 /* local message queue */
431
432 atomic_t references; /* #of external references to queues */ 525 atomic_t references; /* #of external references to queues */
433 526
434 atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */ 527 atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */
435 wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */ 528 wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
436 529
437 u8 delayed_IPI_flags; /* IPI flags received, but delayed */ 530 u8 delayed_chctl_flags; /* chctl flags received, but delayed */
438 /* action until channel disconnected */ 531 /* action until channel disconnected */
439 532
440 /* queue of msg senders who want to be notified when msg received */
441
442 atomic_t n_to_notify; /* #of msg senders to notify */ 533 atomic_t n_to_notify; /* #of msg senders to notify */
443 struct xpc_notify *notify_queue; /* notify queue for messages sent */
444 534
445 xpc_channel_func func; /* user's channel function */ 535 xpc_channel_func func; /* user's channel function */
446 void *key; /* pointer to user's key */ 536 void *key; /* pointer to user's key */
447 537
448 struct mutex msg_to_pull_mutex; /* next msg to pull serialization */
449 struct completion wdisconnect_wait; /* wait for channel disconnect */ 538 struct completion wdisconnect_wait; /* wait for channel disconnect */
450 539
451 struct xpc_openclose_args *local_openclose_args; /* args passed on */
452 /* opening or closing of channel */
453
454 /* various flavors of local and remote Get/Put values */
455
456 struct xpc_gp *local_GP; /* local Get/Put values */
457 struct xpc_gp remote_GP; /* remote Get/Put values */
458 struct xpc_gp w_local_GP; /* working local Get/Put values */
459 struct xpc_gp w_remote_GP; /* working remote Get/Put values */
460 s64 next_msg_to_pull; /* Put value of next msg to pull */
461
462 /* kthread management related fields */ 540 /* kthread management related fields */
463 541
464 atomic_t kthreads_assigned; /* #of kthreads assigned to channel */ 542 atomic_t kthreads_assigned; /* #of kthreads assigned to channel */
@@ -469,6 +547,11 @@ struct xpc_channel {
469 547
470 wait_queue_head_t idle_wq; /* idle kthread wait queue */ 548 wait_queue_head_t idle_wq; /* idle kthread wait queue */
471 549
550 union {
551 struct xpc_channel_sn2 sn2;
552 struct xpc_channel_uv uv;
553 } sn;
554
472} ____cacheline_aligned; 555} ____cacheline_aligned;
473 556
474/* struct xpc_channel flags */ 557/* struct xpc_channel flags */
@@ -501,33 +584,128 @@ struct xpc_channel {
501#define XPC_C_WDISCONNECT 0x00040000 /* waiting for channel disconnect */ 584#define XPC_C_WDISCONNECT 0x00040000 /* waiting for channel disconnect */
502 585
503/* 586/*
504 * Manages channels on a partition basis. There is one of these structures 587 * The channel control flags (chctl) union consists of a 64-bit variable which
588 * is divided up into eight bytes, ordered from right to left. Byte zero
589 * pertains to channel 0, byte one to channel 1, and so on. Each channel's byte
590 * can have one or more of the chctl flags set in it.
591 */
592
593union xpc_channel_ctl_flags {
594 u64 all_flags;
595 u8 flags[XPC_MAX_NCHANNELS];
596};
597
598/* chctl flags */
599#define XPC_CHCTL_CLOSEREQUEST 0x01
600#define XPC_CHCTL_CLOSEREPLY 0x02
601#define XPC_CHCTL_OPENREQUEST 0x04
602#define XPC_CHCTL_OPENREPLY 0x08
603#define XPC_CHCTL_MSGREQUEST 0x10
604
605#define XPC_OPENCLOSE_CHCTL_FLAGS \
606 (XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \
607 XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY)
608#define XPC_MSG_CHCTL_FLAGS XPC_CHCTL_MSGREQUEST
609
610static inline int
611xpc_any_openclose_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
612{
613 int ch_number;
614
615 for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
616 if (chctl->flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS)
617 return 1;
618 }
619 return 0;
620}
621
622static inline int
623xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
624{
625 int ch_number;
626
627 for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
628 if (chctl->flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
629 return 1;
630 }
631 return 0;
632}
633
634/*
635 * Manage channels on a partition basis. There is one of these structures
505 * for each partition (a partition will never utilize the structure that 636 * for each partition (a partition will never utilize the structure that
506 * represents itself). 637 * represents itself).
507 */ 638 */
639
640struct xpc_partition_sn2 {
641 unsigned long remote_amos_page_pa; /* paddr of partition's amos page */
642 int activate_IRQ_nasid; /* active partition's act/deact nasid */
643 int activate_IRQ_phys_cpuid; /* active part's act/deact phys cpuid */
644
645 unsigned long remote_vars_pa; /* phys addr of partition's vars */
646 unsigned long remote_vars_part_pa; /* paddr of partition's vars part */
647 u8 remote_vars_version; /* version# of partition's vars */
648
649 void *local_GPs_base; /* base address of kmalloc'd space */
650 struct xpc_gp_sn2 *local_GPs; /* local Get/Put values */
651 void *remote_GPs_base; /* base address of kmalloc'd space */
652 struct xpc_gp_sn2 *remote_GPs; /* copy of remote partition's local */
653 /* Get/Put values */
654 unsigned long remote_GPs_pa; /* phys addr of remote partition's local */
655 /* Get/Put values */
656
657 void *local_openclose_args_base; /* base address of kmalloc'd space */
658 struct xpc_openclose_args *local_openclose_args; /* local's args */
659 unsigned long remote_openclose_args_pa; /* phys addr of remote's args */
660
661 int notify_IRQ_nasid; /* nasid of where to send notify IRQs */
662 int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */
663 char notify_IRQ_owner[8]; /* notify IRQ's owner's name */
664
665 struct amo *remote_chctl_amo_va; /* addr of remote chctl flags' amo */
666 struct amo *local_chctl_amo_va; /* address of chctl flags' amo */
667
668 struct timer_list dropped_notify_IRQ_timer; /* dropped IRQ timer */
669};
670
671struct xpc_partition_uv {
672 unsigned long remote_activate_mq_gpa; /* gru phys address of remote */
673 /* partition's activate mq */
674 spinlock_t flags_lock; /* protect updating of flags */
675 unsigned int flags; /* general flags */
676 u8 remote_act_state; /* remote partition's act_state */
677 u8 act_state_req; /* act_state request from remote partition */
678 enum xp_retval reason; /* reason for deactivate act_state request */
679 u64 heartbeat; /* incremented by remote partition */
680};
681
682/* struct xpc_partition_uv flags */
683
684#define XPC_P_HEARTBEAT_OFFLINE_UV 0x00000001
685#define XPC_P_ENGAGED_UV 0x00000002
686
687/* struct xpc_partition_uv act_state change requests */
688
689#define XPC_P_ASR_ACTIVATE_UV 0x01
690#define XPC_P_ASR_REACTIVATE_UV 0x02
691#define XPC_P_ASR_DEACTIVATE_UV 0x03
692
508struct xpc_partition { 693struct xpc_partition {
509 694
510 /* XPC HB infrastructure */ 695 /* XPC HB infrastructure */
511 696
512 u8 remote_rp_version; /* version# of partition's rsvd pg */ 697 u8 remote_rp_version; /* version# of partition's rsvd pg */
513 struct timespec remote_rp_stamp; /* time when rsvd pg was initialized */ 698 unsigned long remote_rp_ts_jiffies; /* timestamp when rsvd pg setup */
514 u64 remote_rp_pa; /* phys addr of partition's rsvd pg */ 699 unsigned long remote_rp_pa; /* phys addr of partition's rsvd pg */
515 u64 remote_vars_pa; /* phys addr of partition's vars */
516 u64 remote_vars_part_pa; /* phys addr of partition's vars part */
517 u64 last_heartbeat; /* HB at last read */ 700 u64 last_heartbeat; /* HB at last read */
518 u64 remote_amos_page_pa; /* phys addr of partition's amos page */ 701 u32 activate_IRQ_rcvd; /* IRQs since activation */
519 int remote_act_nasid; /* active part's act/deact nasid */
520 int remote_act_phys_cpuid; /* active part's act/deact phys cpuid */
521 u32 act_IRQ_rcvd; /* IRQs since activation */
522 spinlock_t act_lock; /* protect updating of act_state */ 702 spinlock_t act_lock; /* protect updating of act_state */
523 u8 act_state; /* from XPC HB viewpoint */ 703 u8 act_state; /* from XPC HB viewpoint */
524 u8 remote_vars_version; /* version# of partition's vars */
525 enum xp_retval reason; /* reason partition is deactivating */ 704 enum xp_retval reason; /* reason partition is deactivating */
526 int reason_line; /* line# deactivation initiated from */ 705 int reason_line; /* line# deactivation initiated from */
527 int reactivate_nasid; /* nasid in partition to reactivate */
528 706
529 unsigned long disengage_request_timeout; /* timeout in jiffies */ 707 unsigned long disengage_timeout; /* timeout in jiffies */
530 struct timer_list disengage_request_timer; 708 struct timer_list disengage_timer;
531 709
532 /* XPC infrastructure referencing and teardown control */ 710 /* XPC infrastructure referencing and teardown control */
533 711
@@ -535,85 +713,63 @@ struct xpc_partition {
535 wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ 713 wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */
536 atomic_t references; /* #of references to infrastructure */ 714 atomic_t references; /* #of references to infrastructure */
537 715
538 /*
539 * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN
540 * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION
541 * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE
542 * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.)
543 */
544
545 u8 nchannels; /* #of defined channels supported */ 716 u8 nchannels; /* #of defined channels supported */
546 atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */ 717 atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
547 atomic_t nchannels_engaged; /* #of channels engaged with remote part */ 718 atomic_t nchannels_engaged; /* #of channels engaged with remote part */
548 struct xpc_channel *channels; /* array of channel structures */ 719 struct xpc_channel *channels; /* array of channel structures */
549 720
550 void *local_GPs_base; /* base address of kmalloc'd space */ 721 /* fields used for managing channel avialability and activity */
551 struct xpc_gp *local_GPs; /* local Get/Put values */
552 void *remote_GPs_base; /* base address of kmalloc'd space */
553 struct xpc_gp *remote_GPs; /* copy of remote partition's local */
554 /* Get/Put values */
555 u64 remote_GPs_pa; /* phys address of remote partition's local */
556 /* Get/Put values */
557 722
558 /* fields used to pass args when opening or closing a channel */ 723 union xpc_channel_ctl_flags chctl; /* chctl flags yet to be processed */
724 spinlock_t chctl_lock; /* chctl flags lock */
559 725
560 void *local_openclose_args_base; /* base address of kmalloc'd space */
561 struct xpc_openclose_args *local_openclose_args; /* local's args */
562 void *remote_openclose_args_base; /* base address of kmalloc'd space */ 726 void *remote_openclose_args_base; /* base address of kmalloc'd space */
563 struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */ 727 struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
564 /* args */ 728 /* args */
565 u64 remote_openclose_args_pa; /* phys addr of remote's args */
566
567 /* IPI sending, receiving and handling related fields */
568
569 int remote_IPI_nasid; /* nasid of where to send IPIs */
570 int remote_IPI_phys_cpuid; /* phys CPU ID of where to send IPIs */
571 AMO_t *remote_IPI_amo_va; /* address of remote IPI AMO_t structure */
572
573 AMO_t *local_IPI_amo_va; /* address of IPI AMO_t structure */
574 u64 local_IPI_amo; /* IPI amo flags yet to be handled */
575 char IPI_owner[8]; /* IPI owner's name */
576 struct timer_list dropped_IPI_timer; /* dropped IPI timer */
577
578 spinlock_t IPI_lock; /* IPI handler lock */
579 729
580 /* channel manager related fields */ 730 /* channel manager related fields */
581 731
582 atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */ 732 atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */
583 wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */ 733 wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */
584 734
735 union {
736 struct xpc_partition_sn2 sn2;
737 struct xpc_partition_uv uv;
738 } sn;
739
585} ____cacheline_aligned; 740} ____cacheline_aligned;
586 741
587/* struct xpc_partition act_state values (for XPC HB) */ 742/* struct xpc_partition act_state values (for XPC HB) */
588 743
589#define XPC_P_INACTIVE 0x00 /* partition is not active */ 744#define XPC_P_AS_INACTIVE 0x00 /* partition is not active */
590#define XPC_P_ACTIVATION_REQ 0x01 /* created thread to activate */ 745#define XPC_P_AS_ACTIVATION_REQ 0x01 /* created thread to activate */
591#define XPC_P_ACTIVATING 0x02 /* activation thread started */ 746#define XPC_P_AS_ACTIVATING 0x02 /* activation thread started */
592#define XPC_P_ACTIVE 0x03 /* xpc_partition_up() was called */ 747#define XPC_P_AS_ACTIVE 0x03 /* xpc_partition_up() was called */
593#define XPC_P_DEACTIVATING 0x04 /* partition deactivation initiated */ 748#define XPC_P_AS_DEACTIVATING 0x04 /* partition deactivation initiated */
594 749
595#define XPC_DEACTIVATE_PARTITION(_p, _reason) \ 750#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
596 xpc_deactivate_partition(__LINE__, (_p), (_reason)) 751 xpc_deactivate_partition(__LINE__, (_p), (_reason))
597 752
598/* struct xpc_partition setup_state values */ 753/* struct xpc_partition setup_state values */
599 754
600#define XPC_P_UNSET 0x00 /* infrastructure was never setup */ 755#define XPC_P_SS_UNSET 0x00 /* infrastructure was never setup */
601#define XPC_P_SETUP 0x01 /* infrastructure is setup */ 756#define XPC_P_SS_SETUP 0x01 /* infrastructure is setup */
602#define XPC_P_WTEARDOWN 0x02 /* waiting to teardown infrastructure */ 757#define XPC_P_SS_WTEARDOWN 0x02 /* waiting to teardown infrastructure */
603#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */ 758#define XPC_P_SS_TORNDOWN 0x03 /* infrastructure is torndown */
604 759
605/* 760/*
606 * struct xpc_partition IPI_timer #of seconds to wait before checking for 761 * struct xpc_partition_sn2's dropped notify IRQ timer is set to wait the
607 * dropped IPIs. These occur whenever an IPI amo write doesn't complete until 762 * following interval #of seconds before checking for dropped notify IRQs.
608 * after the IPI was received. 763 * These can occur whenever an IRQ's associated amo write doesn't complete
764 * until after the IRQ was received.
609 */ 765 */
610#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ) 766#define XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL (0.25 * HZ)
611 767
612/* number of seconds to wait for other partitions to disengage */ 768/* number of seconds to wait for other partitions to disengage */
613#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90 769#define XPC_DISENGAGE_DEFAULT_TIMELIMIT 90
614 770
615/* interval in seconds to print 'waiting disengagement' messages */ 771/* interval in seconds to print 'waiting deactivation' messages */
616#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10 772#define XPC_DEACTIVATE_PRINTMSG_INTERVAL 10
617 773
618#define XPC_PARTID(_p) ((short)((_p) - &xpc_partitions[0])) 774#define XPC_PARTID(_p) ((short)((_p) - &xpc_partitions[0]))
619 775
@@ -623,33 +779,92 @@ extern struct xpc_registration xpc_registrations[];
623/* found in xpc_main.c */ 779/* found in xpc_main.c */
624extern struct device *xpc_part; 780extern struct device *xpc_part;
625extern struct device *xpc_chan; 781extern struct device *xpc_chan;
626extern int xpc_disengage_request_timelimit; 782extern int xpc_disengage_timelimit;
627extern int xpc_disengage_request_timedout; 783extern int xpc_disengage_timedout;
628extern irqreturn_t xpc_notify_IRQ_handler(int, void *); 784extern int xpc_activate_IRQ_rcvd;
629extern void xpc_dropped_IPI_check(struct xpc_partition *); 785extern spinlock_t xpc_activate_IRQ_rcvd_lock;
786extern wait_queue_head_t xpc_activate_IRQ_wq;
787extern void *xpc_heartbeating_to_mask;
788extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
630extern void xpc_activate_partition(struct xpc_partition *); 789extern void xpc_activate_partition(struct xpc_partition *);
631extern void xpc_activate_kthreads(struct xpc_channel *, int); 790extern void xpc_activate_kthreads(struct xpc_channel *, int);
632extern void xpc_create_kthreads(struct xpc_channel *, int, int); 791extern void xpc_create_kthreads(struct xpc_channel *, int, int);
633extern void xpc_disconnect_wait(int); 792extern void xpc_disconnect_wait(int);
793extern int (*xpc_setup_partitions_sn) (void);
794extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *, u64 *,
795 unsigned long *,
796 size_t *);
797extern int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *);
798extern void (*xpc_heartbeat_init) (void);
799extern void (*xpc_heartbeat_exit) (void);
800extern void (*xpc_increment_heartbeat) (void);
801extern void (*xpc_offline_heartbeat) (void);
802extern void (*xpc_online_heartbeat) (void);
803extern enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *);
804extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
805extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *);
806extern enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *);
807extern void (*xpc_teardown_msg_structures) (struct xpc_channel *);
808extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *);
809extern void (*xpc_process_msg_chctl_flags) (struct xpc_partition *, int);
810extern int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *);
811extern void *(*xpc_get_deliverable_payload) (struct xpc_channel *);
812extern void (*xpc_request_partition_activation) (struct xpc_rsvd_page *,
813 unsigned long, int);
814extern void (*xpc_request_partition_reactivation) (struct xpc_partition *);
815extern void (*xpc_request_partition_deactivation) (struct xpc_partition *);
816extern void (*xpc_cancel_partition_deactivation_request) (
817 struct xpc_partition *);
818extern void (*xpc_process_activate_IRQ_rcvd) (void);
819extern enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *);
820extern void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *);
821
822extern void (*xpc_indicate_partition_engaged) (struct xpc_partition *);
823extern int (*xpc_partition_engaged) (short);
824extern int (*xpc_any_partition_engaged) (void);
825extern void (*xpc_indicate_partition_disengaged) (struct xpc_partition *);
826extern void (*xpc_assume_partition_disengaged) (short);
827
828extern void (*xpc_send_chctl_closerequest) (struct xpc_channel *,
829 unsigned long *);
830extern void (*xpc_send_chctl_closereply) (struct xpc_channel *,
831 unsigned long *);
832extern void (*xpc_send_chctl_openrequest) (struct xpc_channel *,
833 unsigned long *);
834extern void (*xpc_send_chctl_openreply) (struct xpc_channel *, unsigned long *);
835
836extern void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *,
837 unsigned long);
838
839extern enum xp_retval (*xpc_send_payload) (struct xpc_channel *, u32, void *,
840 u16, u8, xpc_notify_func, void *);
841extern void (*xpc_received_payload) (struct xpc_channel *, void *);
842
843/* found in xpc_sn2.c */
844extern int xpc_init_sn2(void);
845extern void xpc_exit_sn2(void);
846
847/* found in xpc_uv.c */
848extern int xpc_init_uv(void);
849extern void xpc_exit_uv(void);
634 850
635/* found in xpc_partition.c */ 851/* found in xpc_partition.c */
636extern int xpc_exiting; 852extern int xpc_exiting;
637extern struct xpc_vars *xpc_vars; 853extern int xpc_nasid_mask_nlongs;
638extern struct xpc_rsvd_page *xpc_rsvd_page; 854extern struct xpc_rsvd_page *xpc_rsvd_page;
639extern struct xpc_vars_part *xpc_vars_part; 855extern unsigned long *xpc_mach_nasids;
640extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; 856extern struct xpc_partition *xpc_partitions;
641extern char *xpc_remote_copy_buffer;
642extern void *xpc_remote_copy_buffer_base;
643extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **); 857extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
644extern struct xpc_rsvd_page *xpc_rsvd_page_init(void); 858extern int xpc_setup_rsvd_page(void);
645extern void xpc_allow_IPI_ops(void); 859extern void xpc_teardown_rsvd_page(void);
646extern void xpc_restrict_IPI_ops(void); 860extern int xpc_identify_activate_IRQ_sender(void);
647extern int xpc_identify_act_IRQ_sender(void);
648extern int xpc_partition_disengaged(struct xpc_partition *); 861extern int xpc_partition_disengaged(struct xpc_partition *);
649extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *); 862extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *);
650extern void xpc_mark_partition_inactive(struct xpc_partition *); 863extern void xpc_mark_partition_inactive(struct xpc_partition *);
651extern void xpc_discovery(void); 864extern void xpc_discovery(void);
652extern void xpc_check_remote_hb(void); 865extern enum xp_retval xpc_get_remote_rp(int, unsigned long *,
866 struct xpc_rsvd_page *,
867 unsigned long *);
653extern void xpc_deactivate_partition(const int, struct xpc_partition *, 868extern void xpc_deactivate_partition(const int, struct xpc_partition *,
654 enum xp_retval); 869 enum xp_retval);
655extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *); 870extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
@@ -657,21 +872,52 @@ extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
657/* found in xpc_channel.c */ 872/* found in xpc_channel.c */
658extern void xpc_initiate_connect(int); 873extern void xpc_initiate_connect(int);
659extern void xpc_initiate_disconnect(int); 874extern void xpc_initiate_disconnect(int);
660extern enum xp_retval xpc_initiate_allocate(short, int, u32, void **); 875extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *);
661extern enum xp_retval xpc_initiate_send(short, int, void *); 876extern enum xp_retval xpc_initiate_send(short, int, u32, void *, u16);
662extern enum xp_retval xpc_initiate_send_notify(short, int, void *, 877extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16,
663 xpc_notify_func, void *); 878 xpc_notify_func, void *);
664extern void xpc_initiate_received(short, int, void *); 879extern void xpc_initiate_received(short, int, void *);
665extern enum xp_retval xpc_setup_infrastructure(struct xpc_partition *); 880extern void xpc_process_sent_chctl_flags(struct xpc_partition *);
666extern enum xp_retval xpc_pull_remote_vars_part(struct xpc_partition *);
667extern void xpc_process_channel_activity(struct xpc_partition *);
668extern void xpc_connected_callout(struct xpc_channel *); 881extern void xpc_connected_callout(struct xpc_channel *);
669extern void xpc_deliver_msg(struct xpc_channel *); 882extern void xpc_deliver_payload(struct xpc_channel *);
670extern void xpc_disconnect_channel(const int, struct xpc_channel *, 883extern void xpc_disconnect_channel(const int, struct xpc_channel *,
671 enum xp_retval, unsigned long *); 884 enum xp_retval, unsigned long *);
672extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval); 885extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
673extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval); 886extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
674extern void xpc_teardown_infrastructure(struct xpc_partition *); 887
888static inline int
889xpc_hb_allowed(short partid, void *heartbeating_to_mask)
890{
891 return test_bit(partid, heartbeating_to_mask);
892}
893
894static inline int
895xpc_any_hbs_allowed(void)
896{
897 DBUG_ON(xpc_heartbeating_to_mask == NULL);
898 return !bitmap_empty(xpc_heartbeating_to_mask, xp_max_npartitions);
899}
900
901static inline void
902xpc_allow_hb(short partid)
903{
904 DBUG_ON(xpc_heartbeating_to_mask == NULL);
905 set_bit(partid, xpc_heartbeating_to_mask);
906}
907
908static inline void
909xpc_disallow_hb(short partid)
910{
911 DBUG_ON(xpc_heartbeating_to_mask == NULL);
912 clear_bit(partid, xpc_heartbeating_to_mask);
913}
914
915static inline void
916xpc_disallow_all_hbs(void)
917{
918 DBUG_ON(xpc_heartbeating_to_mask == NULL);
919 bitmap_zero(xpc_heartbeating_to_mask, xp_max_npartitions);
920}
675 921
676static inline void 922static inline void
677xpc_wakeup_channel_mgr(struct xpc_partition *part) 923xpc_wakeup_channel_mgr(struct xpc_partition *part)
@@ -713,7 +959,7 @@ xpc_part_deref(struct xpc_partition *part)
713 s32 refs = atomic_dec_return(&part->references); 959 s32 refs = atomic_dec_return(&part->references);
714 960
715 DBUG_ON(refs < 0); 961 DBUG_ON(refs < 0);
716 if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN) 962 if (refs == 0 && part->setup_state == XPC_P_SS_WTEARDOWN)
717 wake_up(&part->teardown_wq); 963 wake_up(&part->teardown_wq);
718} 964}
719 965
@@ -723,7 +969,7 @@ xpc_part_ref(struct xpc_partition *part)
723 int setup; 969 int setup;
724 970
725 atomic_inc(&part->references); 971 atomic_inc(&part->references);
726 setup = (part->setup_state == XPC_P_SETUP); 972 setup = (part->setup_state == XPC_P_SS_SETUP);
727 if (!setup) 973 if (!setup)
728 xpc_part_deref(part); 974 xpc_part_deref(part);
729 975
@@ -741,416 +987,4 @@ xpc_part_ref(struct xpc_partition *part)
741 (_p)->reason_line = _line; \ 987 (_p)->reason_line = _line; \
742 } 988 }
743 989
744/*
745 * This next set of inlines are used to keep track of when a partition is
746 * potentially engaged in accessing memory belonging to another partition.
747 */
748
749static inline void
750xpc_mark_partition_engaged(struct xpc_partition *part)
751{
752 unsigned long irq_flags;
753 AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
754 (XPC_ENGAGED_PARTITIONS_AMO *
755 sizeof(AMO_t)));
756
757 local_irq_save(irq_flags);
758
759 /* set bit corresponding to our partid in remote partition's AMO */
760 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
761 (1UL << sn_partition_id));
762 /*
763 * We must always use the nofault function regardless of whether we
764 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
765 * didn't, we'd never know that the other partition is down and would
766 * keep sending IPIs and AMOs to it until the heartbeat times out.
767 */
768 (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
769 variable),
770 xp_nofault_PIOR_target));
771
772 local_irq_restore(irq_flags);
773}
774
775static inline void
776xpc_mark_partition_disengaged(struct xpc_partition *part)
777{
778 unsigned long irq_flags;
779 AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
780 (XPC_ENGAGED_PARTITIONS_AMO *
781 sizeof(AMO_t)));
782
783 local_irq_save(irq_flags);
784
785 /* clear bit corresponding to our partid in remote partition's AMO */
786 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
787 ~(1UL << sn_partition_id));
788 /*
789 * We must always use the nofault function regardless of whether we
790 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
791 * didn't, we'd never know that the other partition is down and would
792 * keep sending IPIs and AMOs to it until the heartbeat times out.
793 */
794 (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
795 variable),
796 xp_nofault_PIOR_target));
797
798 local_irq_restore(irq_flags);
799}
800
801static inline void
802xpc_request_partition_disengage(struct xpc_partition *part)
803{
804 unsigned long irq_flags;
805 AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
806 (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
807
808 local_irq_save(irq_flags);
809
810 /* set bit corresponding to our partid in remote partition's AMO */
811 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
812 (1UL << sn_partition_id));
813 /*
814 * We must always use the nofault function regardless of whether we
815 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
816 * didn't, we'd never know that the other partition is down and would
817 * keep sending IPIs and AMOs to it until the heartbeat times out.
818 */
819 (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
820 variable),
821 xp_nofault_PIOR_target));
822
823 local_irq_restore(irq_flags);
824}
825
826static inline void
827xpc_cancel_partition_disengage_request(struct xpc_partition *part)
828{
829 unsigned long irq_flags;
830 AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
831 (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
832
833 local_irq_save(irq_flags);
834
835 /* clear bit corresponding to our partid in remote partition's AMO */
836 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
837 ~(1UL << sn_partition_id));
838 /*
839 * We must always use the nofault function regardless of whether we
840 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
841 * didn't, we'd never know that the other partition is down and would
842 * keep sending IPIs and AMOs to it until the heartbeat times out.
843 */
844 (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
845 variable),
846 xp_nofault_PIOR_target));
847
848 local_irq_restore(irq_flags);
849}
850
851static inline u64
852xpc_partition_engaged(u64 partid_mask)
853{
854 AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
855
856 /* return our partition's AMO variable ANDed with partid_mask */
857 return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
858 partid_mask);
859}
860
861static inline u64
862xpc_partition_disengage_requested(u64 partid_mask)
863{
864 AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
865
866 /* return our partition's AMO variable ANDed with partid_mask */
867 return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
868 partid_mask);
869}
870
871static inline void
872xpc_clear_partition_engaged(u64 partid_mask)
873{
874 AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
875
876 /* clear bit(s) based on partid_mask in our partition's AMO */
877 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
878 ~partid_mask);
879}
880
881static inline void
882xpc_clear_partition_disengage_request(u64 partid_mask)
883{
884 AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
885
886 /* clear bit(s) based on partid_mask in our partition's AMO */
887 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
888 ~partid_mask);
889}
890
891/*
892 * The following set of macros and inlines are used for the sending and
893 * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
894 * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
895 * the other that is associated with channel activity (SGI_XPC_NOTIFY).
896 */
897
898static inline u64
899xpc_IPI_receive(AMO_t *amo)
900{
901 return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
902}
903
904static inline enum xp_retval
905xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
906{
907 int ret = 0;
908 unsigned long irq_flags;
909
910 local_irq_save(irq_flags);
911
912 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag);
913 sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
914
915 /*
916 * We must always use the nofault function regardless of whether we
917 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
918 * didn't, we'd never know that the other partition is down and would
919 * keep sending IPIs and AMOs to it until the heartbeat times out.
920 */
921 ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
922 xp_nofault_PIOR_target));
923
924 local_irq_restore(irq_flags);
925
926 return ((ret == 0) ? xpSuccess : xpPioReadError);
927}
928
929/*
930 * IPIs associated with SGI_XPC_ACTIVATE IRQ.
931 */
932
933/*
934 * Flag the appropriate AMO variable and send an IPI to the specified node.
935 */
936static inline void
937xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
938 int to_phys_cpuid)
939{
940 int w_index = XPC_NASID_W_INDEX(from_nasid);
941 int b_index = XPC_NASID_B_INDEX(from_nasid);
942 AMO_t *amos = (AMO_t *)__va(amos_page_pa +
943 (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
944
945 (void)xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
946 to_phys_cpuid, SGI_XPC_ACTIVATE);
947}
948
949static inline void
950xpc_IPI_send_activate(struct xpc_vars *vars)
951{
952 xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0),
953 vars->act_nasid, vars->act_phys_cpuid);
954}
955
956static inline void
957xpc_IPI_send_activated(struct xpc_partition *part)
958{
959 xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
960 part->remote_act_nasid,
961 part->remote_act_phys_cpuid);
962}
963
964static inline void
965xpc_IPI_send_reactivate(struct xpc_partition *part)
966{
967 xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid,
968 xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
969}
970
971static inline void
972xpc_IPI_send_disengage(struct xpc_partition *part)
973{
974 xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
975 part->remote_act_nasid,
976 part->remote_act_phys_cpuid);
977}
978
979/*
980 * IPIs associated with SGI_XPC_NOTIFY IRQ.
981 */
982
983/*
984 * Send an IPI to the remote partition that is associated with the
985 * specified channel.
986 */
987#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \
988 xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f)
989
990static inline void
991xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string,
992 unsigned long *irq_flags)
993{
994 struct xpc_partition *part = &xpc_partitions[ch->partid];
995 enum xp_retval ret;
996
997 if (likely(part->act_state != XPC_P_DEACTIVATING)) {
998 ret = xpc_IPI_send(part->remote_IPI_amo_va,
999 (u64)ipi_flag << (ch->number * 8),
1000 part->remote_IPI_nasid,
1001 part->remote_IPI_phys_cpuid, SGI_XPC_NOTIFY);
1002 dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
1003 ipi_flag_string, ch->partid, ch->number, ret);
1004 if (unlikely(ret != xpSuccess)) {
1005 if (irq_flags != NULL)
1006 spin_unlock_irqrestore(&ch->lock, *irq_flags);
1007 XPC_DEACTIVATE_PARTITION(part, ret);
1008 if (irq_flags != NULL)
1009 spin_lock_irqsave(&ch->lock, *irq_flags);
1010 }
1011 }
1012}
1013
1014/*
1015 * Make it look like the remote partition, which is associated with the
1016 * specified channel, sent us an IPI. This faked IPI will be handled
1017 * by xpc_dropped_IPI_check().
1018 */
1019#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \
1020 xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f)
1021
1022static inline void
1023xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
1024 char *ipi_flag_string)
1025{
1026 struct xpc_partition *part = &xpc_partitions[ch->partid];
1027
1028 FETCHOP_STORE_OP(TO_AMO((u64)&part->local_IPI_amo_va->variable),
1029 FETCHOP_OR, ((u64)ipi_flag << (ch->number * 8)));
1030 dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
1031 ipi_flag_string, ch->partid, ch->number);
1032}
1033
1034/*
1035 * The sending and receiving of IPIs includes the setting of an AMO variable
1036 * to indicate the reason the IPI was sent. The 64-bit variable is divided
1037 * up into eight bytes, ordered from right to left. Byte zero pertains to
1038 * channel 0, byte one to channel 1, and so on. Each byte is described by
1039 * the following IPI flags.
1040 */
1041
1042#define XPC_IPI_CLOSEREQUEST 0x01
1043#define XPC_IPI_CLOSEREPLY 0x02
1044#define XPC_IPI_OPENREQUEST 0x04
1045#define XPC_IPI_OPENREPLY 0x08
1046#define XPC_IPI_MSGREQUEST 0x10
1047
1048/* given an AMO variable and a channel#, get its associated IPI flags */
1049#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff))
1050#define XPC_SET_IPI_FLAGS(_amo, _c, _f) (_amo) |= ((u64) (_f) << ((_c) * 8))
1051
1052#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0fUL)
1053#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010UL)
1054
1055static inline void
1056xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags)
1057{
1058 struct xpc_openclose_args *args = ch->local_openclose_args;
1059
1060 args->reason = ch->reason;
1061
1062 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags);
1063}
1064
1065static inline void
1066xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags)
1067{
1068 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags);
1069}
1070
1071static inline void
1072xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags)
1073{
1074 struct xpc_openclose_args *args = ch->local_openclose_args;
1075
1076 args->msg_size = ch->msg_size;
1077 args->local_nentries = ch->local_nentries;
1078
1079 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags);
1080}
1081
1082static inline void
1083xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags)
1084{
1085 struct xpc_openclose_args *args = ch->local_openclose_args;
1086
1087 args->remote_nentries = ch->remote_nentries;
1088 args->local_nentries = ch->local_nentries;
1089 args->local_msgqueue_pa = __pa(ch->local_msgqueue);
1090
1091 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags);
1092}
1093
1094static inline void
1095xpc_IPI_send_msgrequest(struct xpc_channel *ch)
1096{
1097 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL);
1098}
1099
1100static inline void
1101xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
1102{
1103 XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST);
1104}
1105
1106/*
1107 * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
1108 * pages are located in the lowest granule. The lowest granule uses 4k pages
1109 * for cached references and an alternate TLB handler to never provide a
1110 * cacheable mapping for the entire region. This will prevent speculative
1111 * reading of cached copies of our lines from being issued which will cause
1112 * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
1113 * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an
1114 * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition
1115 * activation and 2 AMO variables for partition deactivation.
1116 */
1117static inline AMO_t *
1118xpc_IPI_init(int index)
1119{
1120 AMO_t *amo = xpc_vars->amos_page + index;
1121
1122 (void)xpc_IPI_receive(amo); /* clear AMO variable */
1123 return amo;
1124}
1125
1126static inline enum xp_retval
1127xpc_map_bte_errors(bte_result_t error)
1128{
1129 return ((error == BTE_SUCCESS) ? xpSuccess : xpBteCopyError);
1130}
1131
1132/*
1133 * Check to see if there is any channel activity to/from the specified
1134 * partition.
1135 */
1136static inline void
1137xpc_check_for_channel_activity(struct xpc_partition *part)
1138{
1139 u64 IPI_amo;
1140 unsigned long irq_flags;
1141
1142 IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va);
1143 if (IPI_amo == 0)
1144 return;
1145
1146 spin_lock_irqsave(&part->IPI_lock, irq_flags);
1147 part->local_IPI_amo |= IPI_amo;
1148 spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
1149
1150 dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
1151 XPC_PARTID(part), IPI_amo);
1152
1153 xpc_wakeup_channel_mgr(part);
1154}
1155
1156#endif /* _DRIVERS_MISC_SGIXP_XPC_H */ 990#endif /* _DRIVERS_MISC_SGIXP_XPC_H */
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 9c90c2d55c08..9cd2ebe2a3b6 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -14,536 +14,10 @@
14 * 14 *
15 */ 15 */
16 16
17#include <linux/kernel.h> 17#include <linux/device.h>
18#include <linux/init.h>
19#include <linux/sched.h>
20#include <linux/cache.h>
21#include <linux/interrupt.h>
22#include <linux/mutex.h>
23#include <linux/completion.h>
24#include <asm/sn/bte.h>
25#include <asm/sn/sn_sal.h>
26#include "xpc.h" 18#include "xpc.h"
27 19
28/* 20/*
29 * Guarantee that the kzalloc'd memory is cacheline aligned.
30 */
31static void *
32xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
33{
34 /* see if kzalloc will give us cachline aligned memory by default */
35 *base = kzalloc(size, flags);
36 if (*base == NULL)
37 return NULL;
38
39 if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
40 return *base;
41
42 kfree(*base);
43
44 /* nope, we'll have to do it ourselves */
45 *base = kzalloc(size + L1_CACHE_BYTES, flags);
46 if (*base == NULL)
47 return NULL;
48
49 return (void *)L1_CACHE_ALIGN((u64)*base);
50}
51
52/*
53 * Set up the initial values for the XPartition Communication channels.
54 */
55static void
56xpc_initialize_channels(struct xpc_partition *part, short partid)
57{
58 int ch_number;
59 struct xpc_channel *ch;
60
61 for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
62 ch = &part->channels[ch_number];
63
64 ch->partid = partid;
65 ch->number = ch_number;
66 ch->flags = XPC_C_DISCONNECTED;
67
68 ch->local_GP = &part->local_GPs[ch_number];
69 ch->local_openclose_args =
70 &part->local_openclose_args[ch_number];
71
72 atomic_set(&ch->kthreads_assigned, 0);
73 atomic_set(&ch->kthreads_idle, 0);
74 atomic_set(&ch->kthreads_active, 0);
75
76 atomic_set(&ch->references, 0);
77 atomic_set(&ch->n_to_notify, 0);
78
79 spin_lock_init(&ch->lock);
80 mutex_init(&ch->msg_to_pull_mutex);
81 init_completion(&ch->wdisconnect_wait);
82
83 atomic_set(&ch->n_on_msg_allocate_wq, 0);
84 init_waitqueue_head(&ch->msg_allocate_wq);
85 init_waitqueue_head(&ch->idle_wq);
86 }
87}
88
89/*
90 * Setup the infrastructure necessary to support XPartition Communication
91 * between the specified remote partition and the local one.
92 */
93enum xp_retval
94xpc_setup_infrastructure(struct xpc_partition *part)
95{
96 int ret, cpuid;
97 struct timer_list *timer;
98 short partid = XPC_PARTID(part);
99
100 /*
101 * Zero out MOST of the entry for this partition. Only the fields
102 * starting with `nchannels' will be zeroed. The preceding fields must
103 * remain `viable' across partition ups and downs, since they may be
104 * referenced during this memset() operation.
105 */
106 memset(&part->nchannels, 0, sizeof(struct xpc_partition) -
107 offsetof(struct xpc_partition, nchannels));
108
109 /*
110 * Allocate all of the channel structures as a contiguous chunk of
111 * memory.
112 */
113 part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
114 GFP_KERNEL);
115 if (part->channels == NULL) {
116 dev_err(xpc_chan, "can't get memory for channels\n");
117 return xpNoMemory;
118 }
119
120 part->nchannels = XPC_NCHANNELS;
121
122 /* allocate all the required GET/PUT values */
123
124 part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
125 GFP_KERNEL,
126 &part->local_GPs_base);
127 if (part->local_GPs == NULL) {
128 kfree(part->channels);
129 part->channels = NULL;
130 dev_err(xpc_chan, "can't get memory for local get/put "
131 "values\n");
132 return xpNoMemory;
133 }
134
135 part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
136 GFP_KERNEL,
137 &part->
138 remote_GPs_base);
139 if (part->remote_GPs == NULL) {
140 dev_err(xpc_chan, "can't get memory for remote get/put "
141 "values\n");
142 kfree(part->local_GPs_base);
143 part->local_GPs = NULL;
144 kfree(part->channels);
145 part->channels = NULL;
146 return xpNoMemory;
147 }
148
149 /* allocate all the required open and close args */
150
151 part->local_openclose_args =
152 xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
153 &part->local_openclose_args_base);
154 if (part->local_openclose_args == NULL) {
155 dev_err(xpc_chan, "can't get memory for local connect args\n");
156 kfree(part->remote_GPs_base);
157 part->remote_GPs = NULL;
158 kfree(part->local_GPs_base);
159 part->local_GPs = NULL;
160 kfree(part->channels);
161 part->channels = NULL;
162 return xpNoMemory;
163 }
164
165 part->remote_openclose_args =
166 xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
167 &part->remote_openclose_args_base);
168 if (part->remote_openclose_args == NULL) {
169 dev_err(xpc_chan, "can't get memory for remote connect args\n");
170 kfree(part->local_openclose_args_base);
171 part->local_openclose_args = NULL;
172 kfree(part->remote_GPs_base);
173 part->remote_GPs = NULL;
174 kfree(part->local_GPs_base);
175 part->local_GPs = NULL;
176 kfree(part->channels);
177 part->channels = NULL;
178 return xpNoMemory;
179 }
180
181 xpc_initialize_channels(part, partid);
182
183 atomic_set(&part->nchannels_active, 0);
184 atomic_set(&part->nchannels_engaged, 0);
185
186 /* local_IPI_amo were set to 0 by an earlier memset() */
187
188 /* Initialize this partitions AMO_t structure */
189 part->local_IPI_amo_va = xpc_IPI_init(partid);
190
191 spin_lock_init(&part->IPI_lock);
192
193 atomic_set(&part->channel_mgr_requests, 1);
194 init_waitqueue_head(&part->channel_mgr_wq);
195
196 sprintf(part->IPI_owner, "xpc%02d", partid);
197 ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, IRQF_SHARED,
198 part->IPI_owner, (void *)(u64)partid);
199 if (ret != 0) {
200 dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
201 "errno=%d\n", -ret);
202 kfree(part->remote_openclose_args_base);
203 part->remote_openclose_args = NULL;
204 kfree(part->local_openclose_args_base);
205 part->local_openclose_args = NULL;
206 kfree(part->remote_GPs_base);
207 part->remote_GPs = NULL;
208 kfree(part->local_GPs_base);
209 part->local_GPs = NULL;
210 kfree(part->channels);
211 part->channels = NULL;
212 return xpLackOfResources;
213 }
214
215 /* Setup a timer to check for dropped IPIs */
216 timer = &part->dropped_IPI_timer;
217 init_timer(timer);
218 timer->function = (void (*)(unsigned long))xpc_dropped_IPI_check;
219 timer->data = (unsigned long)part;
220 timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT;
221 add_timer(timer);
222
223 /*
224 * With the setting of the partition setup_state to XPC_P_SETUP, we're
225 * declaring that this partition is ready to go.
226 */
227 part->setup_state = XPC_P_SETUP;
228
229 /*
230 * Setup the per partition specific variables required by the
231 * remote partition to establish channel connections with us.
232 *
233 * The setting of the magic # indicates that these per partition
234 * specific variables are ready to be used.
235 */
236 xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs);
237 xpc_vars_part[partid].openclose_args_pa =
238 __pa(part->local_openclose_args);
239 xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
240 cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */
241 xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
242 xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
243 xpc_vars_part[partid].nchannels = part->nchannels;
244 xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
245
246 return xpSuccess;
247}
248
249/*
250 * Create a wrapper that hides the underlying mechanism for pulling a cacheline
251 * (or multiple cachelines) from a remote partition.
252 *
253 * src must be a cacheline aligned physical address on the remote partition.
254 * dst must be a cacheline aligned virtual address on this partition.
255 * cnt must be an cacheline sized
256 */
257static enum xp_retval
258xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst,
259 const void *src, size_t cnt)
260{
261 bte_result_t bte_ret;
262
263 DBUG_ON((u64)src != L1_CACHE_ALIGN((u64)src));
264 DBUG_ON((u64)dst != L1_CACHE_ALIGN((u64)dst));
265 DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
266
267 if (part->act_state == XPC_P_DEACTIVATING)
268 return part->reason;
269
270 bte_ret = xp_bte_copy((u64)src, (u64)dst, (u64)cnt,
271 (BTE_NORMAL | BTE_WACQUIRE), NULL);
272 if (bte_ret == BTE_SUCCESS)
273 return xpSuccess;
274
275 dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n",
276 XPC_PARTID(part), bte_ret);
277
278 return xpc_map_bte_errors(bte_ret);
279}
280
281/*
282 * Pull the remote per partition specific variables from the specified
283 * partition.
284 */
285enum xp_retval
286xpc_pull_remote_vars_part(struct xpc_partition *part)
287{
288 u8 buffer[L1_CACHE_BYTES * 2];
289 struct xpc_vars_part *pulled_entry_cacheline =
290 (struct xpc_vars_part *)L1_CACHE_ALIGN((u64)buffer);
291 struct xpc_vars_part *pulled_entry;
292 u64 remote_entry_cacheline_pa, remote_entry_pa;
293 short partid = XPC_PARTID(part);
294 enum xp_retval ret;
295
296 /* pull the cacheline that contains the variables we're interested in */
297
298 DBUG_ON(part->remote_vars_part_pa !=
299 L1_CACHE_ALIGN(part->remote_vars_part_pa));
300 DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2);
301
302 remote_entry_pa = part->remote_vars_part_pa +
303 sn_partition_id * sizeof(struct xpc_vars_part);
304
305 remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
306
307 pulled_entry = (struct xpc_vars_part *)((u64)pulled_entry_cacheline +
308 (remote_entry_pa &
309 (L1_CACHE_BYTES - 1)));
310
311 ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline,
312 (void *)remote_entry_cacheline_pa,
313 L1_CACHE_BYTES);
314 if (ret != xpSuccess) {
315 dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
316 "partition %d, ret=%d\n", partid, ret);
317 return ret;
318 }
319
320 /* see if they've been set up yet */
321
322 if (pulled_entry->magic != XPC_VP_MAGIC1 &&
323 pulled_entry->magic != XPC_VP_MAGIC2) {
324
325 if (pulled_entry->magic != 0) {
326 dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
327 "partition %d has bad magic value (=0x%lx)\n",
328 partid, sn_partition_id, pulled_entry->magic);
329 return xpBadMagic;
330 }
331
332 /* they've not been initialized yet */
333 return xpRetry;
334 }
335
336 if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) {
337
338 /* validate the variables */
339
340 if (pulled_entry->GPs_pa == 0 ||
341 pulled_entry->openclose_args_pa == 0 ||
342 pulled_entry->IPI_amo_pa == 0) {
343
344 dev_err(xpc_chan, "partition %d's XPC vars_part for "
345 "partition %d are not valid\n", partid,
346 sn_partition_id);
347 return xpInvalidAddress;
348 }
349
350 /* the variables we imported look to be valid */
351
352 part->remote_GPs_pa = pulled_entry->GPs_pa;
353 part->remote_openclose_args_pa =
354 pulled_entry->openclose_args_pa;
355 part->remote_IPI_amo_va =
356 (AMO_t *)__va(pulled_entry->IPI_amo_pa);
357 part->remote_IPI_nasid = pulled_entry->IPI_nasid;
358 part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid;
359
360 if (part->nchannels > pulled_entry->nchannels)
361 part->nchannels = pulled_entry->nchannels;
362
363 /* let the other side know that we've pulled their variables */
364
365 xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
366 }
367
368 if (pulled_entry->magic == XPC_VP_MAGIC1)
369 return xpRetry;
370
371 return xpSuccess;
372}
373
374/*
375 * Get the IPI flags and pull the openclose args and/or remote GPs as needed.
376 */
377static u64
378xpc_get_IPI_flags(struct xpc_partition *part)
379{
380 unsigned long irq_flags;
381 u64 IPI_amo;
382 enum xp_retval ret;
383
384 /*
385 * See if there are any IPI flags to be handled.
386 */
387
388 spin_lock_irqsave(&part->IPI_lock, irq_flags);
389 IPI_amo = part->local_IPI_amo;
390 if (IPI_amo != 0)
391 part->local_IPI_amo = 0;
392
393 spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
394
395 if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) {
396 ret = xpc_pull_remote_cachelines(part,
397 part->remote_openclose_args,
398 (void *)part->
399 remote_openclose_args_pa,
400 XPC_OPENCLOSE_ARGS_SIZE);
401 if (ret != xpSuccess) {
402 XPC_DEACTIVATE_PARTITION(part, ret);
403
404 dev_dbg(xpc_chan, "failed to pull openclose args from "
405 "partition %d, ret=%d\n", XPC_PARTID(part),
406 ret);
407
408 /* don't bother processing IPIs anymore */
409 IPI_amo = 0;
410 }
411 }
412
413 if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) {
414 ret = xpc_pull_remote_cachelines(part, part->remote_GPs,
415 (void *)part->remote_GPs_pa,
416 XPC_GP_SIZE);
417 if (ret != xpSuccess) {
418 XPC_DEACTIVATE_PARTITION(part, ret);
419
420 dev_dbg(xpc_chan, "failed to pull GPs from partition "
421 "%d, ret=%d\n", XPC_PARTID(part), ret);
422
423 /* don't bother processing IPIs anymore */
424 IPI_amo = 0;
425 }
426 }
427
428 return IPI_amo;
429}
430
431/*
432 * Allocate the local message queue and the notify queue.
433 */
434static enum xp_retval
435xpc_allocate_local_msgqueue(struct xpc_channel *ch)
436{
437 unsigned long irq_flags;
438 int nentries;
439 size_t nbytes;
440
441 for (nentries = ch->local_nentries; nentries > 0; nentries--) {
442
443 nbytes = nentries * ch->msg_size;
444 ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
445 GFP_KERNEL,
446 &ch->local_msgqueue_base);
447 if (ch->local_msgqueue == NULL)
448 continue;
449
450 nbytes = nentries * sizeof(struct xpc_notify);
451 ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
452 if (ch->notify_queue == NULL) {
453 kfree(ch->local_msgqueue_base);
454 ch->local_msgqueue = NULL;
455 continue;
456 }
457
458 spin_lock_irqsave(&ch->lock, irq_flags);
459 if (nentries < ch->local_nentries) {
460 dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
461 "partid=%d, channel=%d\n", nentries,
462 ch->local_nentries, ch->partid, ch->number);
463
464 ch->local_nentries = nentries;
465 }
466 spin_unlock_irqrestore(&ch->lock, irq_flags);
467 return xpSuccess;
468 }
469
470 dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
471 "queue, partid=%d, channel=%d\n", ch->partid, ch->number);
472 return xpNoMemory;
473}
474
475/*
476 * Allocate the cached remote message queue.
477 */
478static enum xp_retval
479xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
480{
481 unsigned long irq_flags;
482 int nentries;
483 size_t nbytes;
484
485 DBUG_ON(ch->remote_nentries <= 0);
486
487 for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
488
489 nbytes = nentries * ch->msg_size;
490 ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
491 GFP_KERNEL,
492 &ch->remote_msgqueue_base);
493 if (ch->remote_msgqueue == NULL)
494 continue;
495
496 spin_lock_irqsave(&ch->lock, irq_flags);
497 if (nentries < ch->remote_nentries) {
498 dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
499 "partid=%d, channel=%d\n", nentries,
500 ch->remote_nentries, ch->partid, ch->number);
501
502 ch->remote_nentries = nentries;
503 }
504 spin_unlock_irqrestore(&ch->lock, irq_flags);
505 return xpSuccess;
506 }
507
508 dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
509 "partid=%d, channel=%d\n", ch->partid, ch->number);
510 return xpNoMemory;
511}
512
513/*
514 * Allocate message queues and other stuff associated with a channel.
515 *
516 * Note: Assumes all of the channel sizes are filled in.
517 */
518static enum xp_retval
519xpc_allocate_msgqueues(struct xpc_channel *ch)
520{
521 unsigned long irq_flags;
522 enum xp_retval ret;
523
524 DBUG_ON(ch->flags & XPC_C_SETUP);
525
526 ret = xpc_allocate_local_msgqueue(ch);
527 if (ret != xpSuccess)
528 return ret;
529
530 ret = xpc_allocate_remote_msgqueue(ch);
531 if (ret != xpSuccess) {
532 kfree(ch->local_msgqueue_base);
533 ch->local_msgqueue = NULL;
534 kfree(ch->notify_queue);
535 ch->notify_queue = NULL;
536 return ret;
537 }
538
539 spin_lock_irqsave(&ch->lock, irq_flags);
540 ch->flags |= XPC_C_SETUP;
541 spin_unlock_irqrestore(&ch->lock, irq_flags);
542
543 return xpSuccess;
544}
545
546/*
547 * Process a connect message from a remote partition. 21 * Process a connect message from a remote partition.
548 * 22 *
549 * Note: xpc_process_connect() is expecting to be called with the 23 * Note: xpc_process_connect() is expecting to be called with the
@@ -565,30 +39,29 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
565 39
566 if (!(ch->flags & XPC_C_SETUP)) { 40 if (!(ch->flags & XPC_C_SETUP)) {
567 spin_unlock_irqrestore(&ch->lock, *irq_flags); 41 spin_unlock_irqrestore(&ch->lock, *irq_flags);
568 ret = xpc_allocate_msgqueues(ch); 42 ret = xpc_setup_msg_structures(ch);
569 spin_lock_irqsave(&ch->lock, *irq_flags); 43 spin_lock_irqsave(&ch->lock, *irq_flags);
570 44
571 if (ret != xpSuccess) 45 if (ret != xpSuccess)
572 XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags); 46 XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
573 47
48 ch->flags |= XPC_C_SETUP;
49
574 if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING)) 50 if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING))
575 return; 51 return;
576 52
577 DBUG_ON(!(ch->flags & XPC_C_SETUP));
578 DBUG_ON(ch->local_msgqueue == NULL); 53 DBUG_ON(ch->local_msgqueue == NULL);
579 DBUG_ON(ch->remote_msgqueue == NULL); 54 DBUG_ON(ch->remote_msgqueue == NULL);
580 } 55 }
581 56
582 if (!(ch->flags & XPC_C_OPENREPLY)) { 57 if (!(ch->flags & XPC_C_OPENREPLY)) {
583 ch->flags |= XPC_C_OPENREPLY; 58 ch->flags |= XPC_C_OPENREPLY;
584 xpc_IPI_send_openreply(ch, irq_flags); 59 xpc_send_chctl_openreply(ch, irq_flags);
585 } 60 }
586 61
587 if (!(ch->flags & XPC_C_ROPENREPLY)) 62 if (!(ch->flags & XPC_C_ROPENREPLY))
588 return; 63 return;
589 64
590 DBUG_ON(ch->remote_msgqueue_pa == 0);
591
592 ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */ 65 ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */
593 66
594 dev_info(xpc_chan, "channel %d to partition %d connected\n", 67 dev_info(xpc_chan, "channel %d to partition %d connected\n",
@@ -600,99 +73,6 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
600} 73}
601 74
602/* 75/*
603 * Notify those who wanted to be notified upon delivery of their message.
604 */
605static void
606xpc_notify_senders(struct xpc_channel *ch, enum xp_retval reason, s64 put)
607{
608 struct xpc_notify *notify;
609 u8 notify_type;
610 s64 get = ch->w_remote_GP.get - 1;
611
612 while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
613
614 notify = &ch->notify_queue[get % ch->local_nentries];
615
616 /*
617 * See if the notify entry indicates it was associated with
618 * a message who's sender wants to be notified. It is possible
619 * that it is, but someone else is doing or has done the
620 * notification.
621 */
622 notify_type = notify->type;
623 if (notify_type == 0 ||
624 cmpxchg(&notify->type, notify_type, 0) != notify_type) {
625 continue;
626 }
627
628 DBUG_ON(notify_type != XPC_N_CALL);
629
630 atomic_dec(&ch->n_to_notify);
631
632 if (notify->func != NULL) {
633 dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
634 "msg_number=%ld, partid=%d, channel=%d\n",
635 (void *)notify, get, ch->partid, ch->number);
636
637 notify->func(reason, ch->partid, ch->number,
638 notify->key);
639
640 dev_dbg(xpc_chan, "notify->func() returned, "
641 "notify=0x%p, msg_number=%ld, partid=%d, "
642 "channel=%d\n", (void *)notify, get,
643 ch->partid, ch->number);
644 }
645 }
646}
647
648/*
649 * Free up message queues and other stuff that were allocated for the specified
650 * channel.
651 *
652 * Note: ch->reason and ch->reason_line are left set for debugging purposes,
653 * they're cleared when XPC_C_DISCONNECTED is cleared.
654 */
655static void
656xpc_free_msgqueues(struct xpc_channel *ch)
657{
658 DBUG_ON(!spin_is_locked(&ch->lock));
659 DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
660
661 ch->remote_msgqueue_pa = 0;
662 ch->func = NULL;
663 ch->key = NULL;
664 ch->msg_size = 0;
665 ch->local_nentries = 0;
666 ch->remote_nentries = 0;
667 ch->kthreads_assigned_limit = 0;
668 ch->kthreads_idle_limit = 0;
669
670 ch->local_GP->get = 0;
671 ch->local_GP->put = 0;
672 ch->remote_GP.get = 0;
673 ch->remote_GP.put = 0;
674 ch->w_local_GP.get = 0;
675 ch->w_local_GP.put = 0;
676 ch->w_remote_GP.get = 0;
677 ch->w_remote_GP.put = 0;
678 ch->next_msg_to_pull = 0;
679
680 if (ch->flags & XPC_C_SETUP) {
681 ch->flags &= ~XPC_C_SETUP;
682
683 dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
684 ch->flags, ch->partid, ch->number);
685
686 kfree(ch->local_msgqueue_base);
687 ch->local_msgqueue = NULL;
688 kfree(ch->remote_msgqueue_base);
689 ch->remote_msgqueue = NULL;
690 kfree(ch->notify_queue);
691 ch->notify_queue = NULL;
692 }
693}
694
695/*
696 * spin_lock_irqsave() is expected to be held on entry. 76 * spin_lock_irqsave() is expected to be held on entry.
697 */ 77 */
698static void 78static void
@@ -717,9 +97,9 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
717 DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && 97 DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
718 !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE)); 98 !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE));
719 99
720 if (part->act_state == XPC_P_DEACTIVATING) { 100 if (part->act_state == XPC_P_AS_DEACTIVATING) {
721 /* can't proceed until the other side disengages from us */ 101 /* can't proceed until the other side disengages from us */
722 if (xpc_partition_engaged(1UL << ch->partid)) 102 if (xpc_partition_engaged(ch->partid))
723 return; 103 return;
724 104
725 } else { 105 } else {
@@ -731,7 +111,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
731 111
732 if (!(ch->flags & XPC_C_CLOSEREPLY)) { 112 if (!(ch->flags & XPC_C_CLOSEREPLY)) {
733 ch->flags |= XPC_C_CLOSEREPLY; 113 ch->flags |= XPC_C_CLOSEREPLY;
734 xpc_IPI_send_closereply(ch, irq_flags); 114 xpc_send_chctl_closereply(ch, irq_flags);
735 } 115 }
736 116
737 if (!(ch->flags & XPC_C_RCLOSEREPLY)) 117 if (!(ch->flags & XPC_C_RCLOSEREPLY))
@@ -740,8 +120,8 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
740 120
741 /* wake those waiting for notify completion */ 121 /* wake those waiting for notify completion */
742 if (atomic_read(&ch->n_to_notify) > 0) { 122 if (atomic_read(&ch->n_to_notify) > 0) {
743 /* >>> we do callout while holding ch->lock */ 123 /* we do callout while holding ch->lock, callout can't block */
744 xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put); 124 xpc_notify_senders_of_disconnect(ch);
745 } 125 }
746 126
747 /* both sides are disconnected now */ 127 /* both sides are disconnected now */
@@ -752,10 +132,24 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
752 spin_lock_irqsave(&ch->lock, *irq_flags); 132 spin_lock_irqsave(&ch->lock, *irq_flags);
753 } 133 }
754 134
135 DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
136
755 /* it's now safe to free the channel's message queues */ 137 /* it's now safe to free the channel's message queues */
756 xpc_free_msgqueues(ch); 138 xpc_teardown_msg_structures(ch);
757 139
758 /* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */ 140 ch->func = NULL;
141 ch->key = NULL;
142 ch->entry_size = 0;
143 ch->local_nentries = 0;
144 ch->remote_nentries = 0;
145 ch->kthreads_assigned_limit = 0;
146 ch->kthreads_idle_limit = 0;
147
148 /*
149 * Mark the channel disconnected and clear all other flags, including
150 * XPC_C_SETUP (because of call to xpc_teardown_msg_structures()) but
151 * not including XPC_C_WDISCONNECT (if it was set).
152 */
759 ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT)); 153 ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
760 154
761 atomic_dec(&part->nchannels_active); 155 atomic_dec(&part->nchannels_active);
@@ -768,15 +162,15 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
768 if (ch->flags & XPC_C_WDISCONNECT) { 162 if (ch->flags & XPC_C_WDISCONNECT) {
769 /* we won't lose the CPU since we're holding ch->lock */ 163 /* we won't lose the CPU since we're holding ch->lock */
770 complete(&ch->wdisconnect_wait); 164 complete(&ch->wdisconnect_wait);
771 } else if (ch->delayed_IPI_flags) { 165 } else if (ch->delayed_chctl_flags) {
772 if (part->act_state != XPC_P_DEACTIVATING) { 166 if (part->act_state != XPC_P_AS_DEACTIVATING) {
773 /* time to take action on any delayed IPI flags */ 167 /* time to take action on any delayed chctl flags */
774 spin_lock(&part->IPI_lock); 168 spin_lock(&part->chctl_lock);
775 XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number, 169 part->chctl.flags[ch->number] |=
776 ch->delayed_IPI_flags); 170 ch->delayed_chctl_flags;
777 spin_unlock(&part->IPI_lock); 171 spin_unlock(&part->chctl_lock);
778 } 172 }
779 ch->delayed_IPI_flags = 0; 173 ch->delayed_chctl_flags = 0;
780 } 174 }
781} 175}
782 176
@@ -784,8 +178,8 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
784 * Process a change in the channel's remote connection state. 178 * Process a change in the channel's remote connection state.
785 */ 179 */
786static void 180static void
787xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, 181xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
788 u8 IPI_flags) 182 u8 chctl_flags)
789{ 183{
790 unsigned long irq_flags; 184 unsigned long irq_flags;
791 struct xpc_openclose_args *args = 185 struct xpc_openclose_args *args =
@@ -800,24 +194,24 @@ again:
800 if ((ch->flags & XPC_C_DISCONNECTED) && 194 if ((ch->flags & XPC_C_DISCONNECTED) &&
801 (ch->flags & XPC_C_WDISCONNECT)) { 195 (ch->flags & XPC_C_WDISCONNECT)) {
802 /* 196 /*
803 * Delay processing IPI flags until thread waiting disconnect 197 * Delay processing chctl flags until thread waiting disconnect
804 * has had a chance to see that the channel is disconnected. 198 * has had a chance to see that the channel is disconnected.
805 */ 199 */
806 ch->delayed_IPI_flags |= IPI_flags; 200 ch->delayed_chctl_flags |= chctl_flags;
807 spin_unlock_irqrestore(&ch->lock, irq_flags); 201 spin_unlock_irqrestore(&ch->lock, irq_flags);
808 return; 202 return;
809 } 203 }
810 204
811 if (IPI_flags & XPC_IPI_CLOSEREQUEST) { 205 if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) {
812 206
813 dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received " 207 dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREQUEST (reason=%d) received "
814 "from partid=%d, channel=%d\n", args->reason, 208 "from partid=%d, channel=%d\n", args->reason,
815 ch->partid, ch->number); 209 ch->partid, ch->number);
816 210
817 /* 211 /*
818 * If RCLOSEREQUEST is set, we're probably waiting for 212 * If RCLOSEREQUEST is set, we're probably waiting for
819 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed 213 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
820 * with this RCLOSEREQUEST in the IPI_flags. 214 * with this RCLOSEREQUEST in the chctl_flags.
821 */ 215 */
822 216
823 if (ch->flags & XPC_C_RCLOSEREQUEST) { 217 if (ch->flags & XPC_C_RCLOSEREQUEST) {
@@ -826,8 +220,8 @@ again:
826 DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY)); 220 DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY));
827 DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY); 221 DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY);
828 222
829 DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY)); 223 DBUG_ON(!(chctl_flags & XPC_CHCTL_CLOSEREPLY));
830 IPI_flags &= ~XPC_IPI_CLOSEREPLY; 224 chctl_flags &= ~XPC_CHCTL_CLOSEREPLY;
831 ch->flags |= XPC_C_RCLOSEREPLY; 225 ch->flags |= XPC_C_RCLOSEREPLY;
832 226
833 /* both sides have finished disconnecting */ 227 /* both sides have finished disconnecting */
@@ -837,17 +231,15 @@ again:
837 } 231 }
838 232
839 if (ch->flags & XPC_C_DISCONNECTED) { 233 if (ch->flags & XPC_C_DISCONNECTED) {
840 if (!(IPI_flags & XPC_IPI_OPENREQUEST)) { 234 if (!(chctl_flags & XPC_CHCTL_OPENREQUEST)) {
841 if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, 235 if (part->chctl.flags[ch_number] &
842 ch_number) & 236 XPC_CHCTL_OPENREQUEST) {
843 XPC_IPI_OPENREQUEST)) { 237
844 238 DBUG_ON(ch->delayed_chctl_flags != 0);
845 DBUG_ON(ch->delayed_IPI_flags != 0); 239 spin_lock(&part->chctl_lock);
846 spin_lock(&part->IPI_lock); 240 part->chctl.flags[ch_number] |=
847 XPC_SET_IPI_FLAGS(part->local_IPI_amo, 241 XPC_CHCTL_CLOSEREQUEST;
848 ch_number, 242 spin_unlock(&part->chctl_lock);
849 XPC_IPI_CLOSEREQUEST);
850 spin_unlock(&part->IPI_lock);
851 } 243 }
852 spin_unlock_irqrestore(&ch->lock, irq_flags); 244 spin_unlock_irqrestore(&ch->lock, irq_flags);
853 return; 245 return;
@@ -860,7 +252,7 @@ again:
860 ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST); 252 ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
861 } 253 }
862 254
863 IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY); 255 chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY);
864 256
865 /* 257 /*
866 * The meaningful CLOSEREQUEST connection state fields are: 258 * The meaningful CLOSEREQUEST connection state fields are:
@@ -878,7 +270,7 @@ again:
878 270
879 XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); 271 XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
880 272
881 DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY); 273 DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY);
882 spin_unlock_irqrestore(&ch->lock, irq_flags); 274 spin_unlock_irqrestore(&ch->lock, irq_flags);
883 return; 275 return;
884 } 276 }
@@ -886,13 +278,13 @@ again:
886 xpc_process_disconnect(ch, &irq_flags); 278 xpc_process_disconnect(ch, &irq_flags);
887 } 279 }
888 280
889 if (IPI_flags & XPC_IPI_CLOSEREPLY) { 281 if (chctl_flags & XPC_CHCTL_CLOSEREPLY) {
890 282
891 dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d," 283 dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREPLY received from partid="
892 " channel=%d\n", ch->partid, ch->number); 284 "%d, channel=%d\n", ch->partid, ch->number);
893 285
894 if (ch->flags & XPC_C_DISCONNECTED) { 286 if (ch->flags & XPC_C_DISCONNECTED) {
895 DBUG_ON(part->act_state != XPC_P_DEACTIVATING); 287 DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING);
896 spin_unlock_irqrestore(&ch->lock, irq_flags); 288 spin_unlock_irqrestore(&ch->lock, irq_flags);
897 return; 289 return;
898 } 290 }
@@ -900,15 +292,14 @@ again:
900 DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); 292 DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
901 293
902 if (!(ch->flags & XPC_C_RCLOSEREQUEST)) { 294 if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
903 if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number) 295 if (part->chctl.flags[ch_number] &
904 & XPC_IPI_CLOSEREQUEST)) { 296 XPC_CHCTL_CLOSEREQUEST) {
905 297
906 DBUG_ON(ch->delayed_IPI_flags != 0); 298 DBUG_ON(ch->delayed_chctl_flags != 0);
907 spin_lock(&part->IPI_lock); 299 spin_lock(&part->chctl_lock);
908 XPC_SET_IPI_FLAGS(part->local_IPI_amo, 300 part->chctl.flags[ch_number] |=
909 ch_number, 301 XPC_CHCTL_CLOSEREPLY;
910 XPC_IPI_CLOSEREPLY); 302 spin_unlock(&part->chctl_lock);
911 spin_unlock(&part->IPI_lock);
912 } 303 }
913 spin_unlock_irqrestore(&ch->lock, irq_flags); 304 spin_unlock_irqrestore(&ch->lock, irq_flags);
914 return; 305 return;
@@ -922,21 +313,21 @@ again:
922 } 313 }
923 } 314 }
924 315
925 if (IPI_flags & XPC_IPI_OPENREQUEST) { 316 if (chctl_flags & XPC_CHCTL_OPENREQUEST) {
926 317
927 dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, " 318 dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (entry_size=%d, "
928 "local_nentries=%d) received from partid=%d, " 319 "local_nentries=%d) received from partid=%d, "
929 "channel=%d\n", args->msg_size, args->local_nentries, 320 "channel=%d\n", args->entry_size, args->local_nentries,
930 ch->partid, ch->number); 321 ch->partid, ch->number);
931 322
932 if (part->act_state == XPC_P_DEACTIVATING || 323 if (part->act_state == XPC_P_AS_DEACTIVATING ||
933 (ch->flags & XPC_C_ROPENREQUEST)) { 324 (ch->flags & XPC_C_ROPENREQUEST)) {
934 spin_unlock_irqrestore(&ch->lock, irq_flags); 325 spin_unlock_irqrestore(&ch->lock, irq_flags);
935 return; 326 return;
936 } 327 }
937 328
938 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) { 329 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
939 ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST; 330 ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST;
940 spin_unlock_irqrestore(&ch->lock, irq_flags); 331 spin_unlock_irqrestore(&ch->lock, irq_flags);
941 return; 332 return;
942 } 333 }
@@ -947,10 +338,10 @@ again:
947 338
948 /* 339 /*
949 * The meaningful OPENREQUEST connection state fields are: 340 * The meaningful OPENREQUEST connection state fields are:
950 * msg_size = size of channel's messages in bytes 341 * entry_size = size of channel's messages in bytes
951 * local_nentries = remote partition's local_nentries 342 * local_nentries = remote partition's local_nentries
952 */ 343 */
953 if (args->msg_size == 0 || args->local_nentries == 0) { 344 if (args->entry_size == 0 || args->local_nentries == 0) {
954 /* assume OPENREQUEST was delayed by mistake */ 345 /* assume OPENREQUEST was delayed by mistake */
955 spin_unlock_irqrestore(&ch->lock, irq_flags); 346 spin_unlock_irqrestore(&ch->lock, irq_flags);
956 return; 347 return;
@@ -960,14 +351,14 @@ again:
960 ch->remote_nentries = args->local_nentries; 351 ch->remote_nentries = args->local_nentries;
961 352
962 if (ch->flags & XPC_C_OPENREQUEST) { 353 if (ch->flags & XPC_C_OPENREQUEST) {
963 if (args->msg_size != ch->msg_size) { 354 if (args->entry_size != ch->entry_size) {
964 XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes, 355 XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
965 &irq_flags); 356 &irq_flags);
966 spin_unlock_irqrestore(&ch->lock, irq_flags); 357 spin_unlock_irqrestore(&ch->lock, irq_flags);
967 return; 358 return;
968 } 359 }
969 } else { 360 } else {
970 ch->msg_size = args->msg_size; 361 ch->entry_size = args->entry_size;
971 362
972 XPC_SET_REASON(ch, 0, 0); 363 XPC_SET_REASON(ch, 0, 0);
973 ch->flags &= ~XPC_C_DISCONNECTED; 364 ch->flags &= ~XPC_C_DISCONNECTED;
@@ -978,13 +369,13 @@ again:
978 xpc_process_connect(ch, &irq_flags); 369 xpc_process_connect(ch, &irq_flags);
979 } 370 }
980 371
981 if (IPI_flags & XPC_IPI_OPENREPLY) { 372 if (chctl_flags & XPC_CHCTL_OPENREPLY) {
982 373
983 dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, " 374 dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa="
984 "local_nentries=%d, remote_nentries=%d) received from " 375 "0x%lx, local_nentries=%d, remote_nentries=%d) "
985 "partid=%d, channel=%d\n", args->local_msgqueue_pa, 376 "received from partid=%d, channel=%d\n",
986 args->local_nentries, args->remote_nentries, 377 args->local_msgqueue_pa, args->local_nentries,
987 ch->partid, ch->number); 378 args->remote_nentries, ch->partid, ch->number);
988 379
989 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) { 380 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
990 spin_unlock_irqrestore(&ch->lock, irq_flags); 381 spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -1012,10 +403,10 @@ again:
1012 DBUG_ON(args->remote_nentries == 0); 403 DBUG_ON(args->remote_nentries == 0);
1013 404
1014 ch->flags |= XPC_C_ROPENREPLY; 405 ch->flags |= XPC_C_ROPENREPLY;
1015 ch->remote_msgqueue_pa = args->local_msgqueue_pa; 406 xpc_save_remote_msgqueue_pa(ch, args->local_msgqueue_pa);
1016 407
1017 if (args->local_nentries < ch->remote_nentries) { 408 if (args->local_nentries < ch->remote_nentries) {
1018 dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new " 409 dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
1019 "remote_nentries=%d, old remote_nentries=%d, " 410 "remote_nentries=%d, old remote_nentries=%d, "
1020 "partid=%d, channel=%d\n", 411 "partid=%d, channel=%d\n",
1021 args->local_nentries, ch->remote_nentries, 412 args->local_nentries, ch->remote_nentries,
@@ -1024,7 +415,7 @@ again:
1024 ch->remote_nentries = args->local_nentries; 415 ch->remote_nentries = args->local_nentries;
1025 } 416 }
1026 if (args->remote_nentries < ch->local_nentries) { 417 if (args->remote_nentries < ch->local_nentries) {
1027 dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new " 418 dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
1028 "local_nentries=%d, old local_nentries=%d, " 419 "local_nentries=%d, old local_nentries=%d, "
1029 "partid=%d, channel=%d\n", 420 "partid=%d, channel=%d\n",
1030 args->remote_nentries, ch->local_nentries, 421 args->remote_nentries, ch->local_nentries,
@@ -1082,7 +473,7 @@ xpc_connect_channel(struct xpc_channel *ch)
1082 ch->local_nentries = registration->nentries; 473 ch->local_nentries = registration->nentries;
1083 474
1084 if (ch->flags & XPC_C_ROPENREQUEST) { 475 if (ch->flags & XPC_C_ROPENREQUEST) {
1085 if (registration->msg_size != ch->msg_size) { 476 if (registration->entry_size != ch->entry_size) {
1086 /* the local and remote sides aren't the same */ 477 /* the local and remote sides aren't the same */
1087 478
1088 /* 479 /*
@@ -1101,7 +492,7 @@ xpc_connect_channel(struct xpc_channel *ch)
1101 return xpUnequalMsgSizes; 492 return xpUnequalMsgSizes;
1102 } 493 }
1103 } else { 494 } else {
1104 ch->msg_size = registration->msg_size; 495 ch->entry_size = registration->entry_size;
1105 496
1106 XPC_SET_REASON(ch, 0, 0); 497 XPC_SET_REASON(ch, 0, 0);
1107 ch->flags &= ~XPC_C_DISCONNECTED; 498 ch->flags &= ~XPC_C_DISCONNECTED;
@@ -1114,7 +505,7 @@ xpc_connect_channel(struct xpc_channel *ch)
1114 /* initiate the connection */ 505 /* initiate the connection */
1115 506
1116 ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING); 507 ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
1117 xpc_IPI_send_openrequest(ch, &irq_flags); 508 xpc_send_chctl_openrequest(ch, &irq_flags);
1118 509
1119 xpc_process_connect(ch, &irq_flags); 510 xpc_process_connect(ch, &irq_flags);
1120 511
@@ -1123,152 +514,16 @@ xpc_connect_channel(struct xpc_channel *ch)
1123 return xpSuccess; 514 return xpSuccess;
1124} 515}
1125 516
1126/*
1127 * Clear some of the msg flags in the local message queue.
1128 */
1129static inline void
1130xpc_clear_local_msgqueue_flags(struct xpc_channel *ch)
1131{
1132 struct xpc_msg *msg;
1133 s64 get;
1134
1135 get = ch->w_remote_GP.get;
1136 do {
1137 msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
1138 (get % ch->local_nentries) *
1139 ch->msg_size);
1140 msg->flags = 0;
1141 } while (++get < ch->remote_GP.get);
1142}
1143
1144/*
1145 * Clear some of the msg flags in the remote message queue.
1146 */
1147static inline void
1148xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch)
1149{
1150 struct xpc_msg *msg;
1151 s64 put;
1152
1153 put = ch->w_remote_GP.put;
1154 do {
1155 msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
1156 (put % ch->remote_nentries) *
1157 ch->msg_size);
1158 msg->flags = 0;
1159 } while (++put < ch->remote_GP.put);
1160}
1161
1162static void
1163xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
1164{
1165 struct xpc_channel *ch = &part->channels[ch_number];
1166 int nmsgs_sent;
1167
1168 ch->remote_GP = part->remote_GPs[ch_number];
1169
1170 /* See what, if anything, has changed for each connected channel */
1171
1172 xpc_msgqueue_ref(ch);
1173
1174 if (ch->w_remote_GP.get == ch->remote_GP.get &&
1175 ch->w_remote_GP.put == ch->remote_GP.put) {
1176 /* nothing changed since GPs were last pulled */
1177 xpc_msgqueue_deref(ch);
1178 return;
1179 }
1180
1181 if (!(ch->flags & XPC_C_CONNECTED)) {
1182 xpc_msgqueue_deref(ch);
1183 return;
1184 }
1185
1186 /*
1187 * First check to see if messages recently sent by us have been
1188 * received by the other side. (The remote GET value will have
1189 * changed since we last looked at it.)
1190 */
1191
1192 if (ch->w_remote_GP.get != ch->remote_GP.get) {
1193
1194 /*
1195 * We need to notify any senders that want to be notified
1196 * that their sent messages have been received by their
1197 * intended recipients. We need to do this before updating
1198 * w_remote_GP.get so that we don't allocate the same message
1199 * queue entries prematurely (see xpc_allocate_msg()).
1200 */
1201 if (atomic_read(&ch->n_to_notify) > 0) {
1202 /*
1203 * Notify senders that messages sent have been
1204 * received and delivered by the other side.
1205 */
1206 xpc_notify_senders(ch, xpMsgDelivered,
1207 ch->remote_GP.get);
1208 }
1209
1210 /*
1211 * Clear msg->flags in previously sent messages, so that
1212 * they're ready for xpc_allocate_msg().
1213 */
1214 xpc_clear_local_msgqueue_flags(ch);
1215
1216 ch->w_remote_GP.get = ch->remote_GP.get;
1217
1218 dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
1219 "channel=%d\n", ch->w_remote_GP.get, ch->partid,
1220 ch->number);
1221
1222 /*
1223 * If anyone was waiting for message queue entries to become
1224 * available, wake them up.
1225 */
1226 if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
1227 wake_up(&ch->msg_allocate_wq);
1228 }
1229
1230 /*
1231 * Now check for newly sent messages by the other side. (The remote
1232 * PUT value will have changed since we last looked at it.)
1233 */
1234
1235 if (ch->w_remote_GP.put != ch->remote_GP.put) {
1236 /*
1237 * Clear msg->flags in previously received messages, so that
1238 * they're ready for xpc_get_deliverable_msg().
1239 */
1240 xpc_clear_remote_msgqueue_flags(ch);
1241
1242 ch->w_remote_GP.put = ch->remote_GP.put;
1243
1244 dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
1245 "channel=%d\n", ch->w_remote_GP.put, ch->partid,
1246 ch->number);
1247
1248 nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get;
1249 if (nmsgs_sent > 0) {
1250 dev_dbg(xpc_chan, "msgs waiting to be copied and "
1251 "delivered=%d, partid=%d, channel=%d\n",
1252 nmsgs_sent, ch->partid, ch->number);
1253
1254 if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
1255 xpc_activate_kthreads(ch, nmsgs_sent);
1256 }
1257 }
1258
1259 xpc_msgqueue_deref(ch);
1260}
1261
1262void 517void
1263xpc_process_channel_activity(struct xpc_partition *part) 518xpc_process_sent_chctl_flags(struct xpc_partition *part)
1264{ 519{
1265 unsigned long irq_flags; 520 unsigned long irq_flags;
1266 u64 IPI_amo, IPI_flags; 521 union xpc_channel_ctl_flags chctl;
1267 struct xpc_channel *ch; 522 struct xpc_channel *ch;
1268 int ch_number; 523 int ch_number;
1269 u32 ch_flags; 524 u32 ch_flags;
1270 525
1271 IPI_amo = xpc_get_IPI_flags(part); 526 chctl.all_flags = xpc_get_chctl_all_flags(part);
1272 527
1273 /* 528 /*
1274 * Initiate channel connections for registered channels. 529 * Initiate channel connections for registered channels.
@@ -1281,14 +536,14 @@ xpc_process_channel_activity(struct xpc_partition *part)
1281 ch = &part->channels[ch_number]; 536 ch = &part->channels[ch_number];
1282 537
1283 /* 538 /*
1284 * Process any open or close related IPI flags, and then deal 539 * Process any open or close related chctl flags, and then deal
1285 * with connecting or disconnecting the channel as required. 540 * with connecting or disconnecting the channel as required.
1286 */ 541 */
1287 542
1288 IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number); 543 if (chctl.flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) {
1289 544 xpc_process_openclose_chctl_flags(part, ch_number,
1290 if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags)) 545 chctl.flags[ch_number]);
1291 xpc_process_openclose_IPI(part, ch_number, IPI_flags); 546 }
1292 547
1293 ch_flags = ch->flags; /* need an atomic snapshot of flags */ 548 ch_flags = ch->flags; /* need an atomic snapshot of flags */
1294 549
@@ -1299,7 +554,7 @@ xpc_process_channel_activity(struct xpc_partition *part)
1299 continue; 554 continue;
1300 } 555 }
1301 556
1302 if (part->act_state == XPC_P_DEACTIVATING) 557 if (part->act_state == XPC_P_AS_DEACTIVATING)
1303 continue; 558 continue;
1304 559
1305 if (!(ch_flags & XPC_C_CONNECTED)) { 560 if (!(ch_flags & XPC_C_CONNECTED)) {
@@ -1315,13 +570,13 @@ xpc_process_channel_activity(struct xpc_partition *part)
1315 } 570 }
1316 571
1317 /* 572 /*
1318 * Process any message related IPI flags, this may involve the 573 * Process any message related chctl flags, this may involve
1319 * activation of kthreads to deliver any pending messages sent 574 * the activation of kthreads to deliver any pending messages
1320 * from the other partition. 575 * sent from the other partition.
1321 */ 576 */
1322 577
1323 if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags)) 578 if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
1324 xpc_process_msg_IPI(part, ch_number); 579 xpc_process_msg_chctl_flags(part, ch_number);
1325 } 580 }
1326} 581}
1327 582
@@ -1369,59 +624,6 @@ xpc_partition_going_down(struct xpc_partition *part, enum xp_retval reason)
1369} 624}
1370 625
1371/* 626/*
1372 * Teardown the infrastructure necessary to support XPartition Communication
1373 * between the specified remote partition and the local one.
1374 */
1375void
1376xpc_teardown_infrastructure(struct xpc_partition *part)
1377{
1378 short partid = XPC_PARTID(part);
1379
1380 /*
1381 * We start off by making this partition inaccessible to local
1382 * processes by marking it as no longer setup. Then we make it
1383 * inaccessible to remote processes by clearing the XPC per partition
1384 * specific variable's magic # (which indicates that these variables
1385 * are no longer valid) and by ignoring all XPC notify IPIs sent to
1386 * this partition.
1387 */
1388
1389 DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
1390 DBUG_ON(atomic_read(&part->nchannels_active) != 0);
1391 DBUG_ON(part->setup_state != XPC_P_SETUP);
1392 part->setup_state = XPC_P_WTEARDOWN;
1393
1394 xpc_vars_part[partid].magic = 0;
1395
1396 free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
1397
1398 /*
1399 * Before proceeding with the teardown we have to wait until all
1400 * existing references cease.
1401 */
1402 wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
1403
1404 /* now we can begin tearing down the infrastructure */
1405
1406 part->setup_state = XPC_P_TORNDOWN;
1407
1408 /* in case we've still got outstanding timers registered... */
1409 del_timer_sync(&part->dropped_IPI_timer);
1410
1411 kfree(part->remote_openclose_args_base);
1412 part->remote_openclose_args = NULL;
1413 kfree(part->local_openclose_args_base);
1414 part->local_openclose_args = NULL;
1415 kfree(part->remote_GPs_base);
1416 part->remote_GPs = NULL;
1417 kfree(part->local_GPs_base);
1418 part->local_GPs = NULL;
1419 kfree(part->channels);
1420 part->channels = NULL;
1421 part->local_IPI_amo_va = NULL;
1422}
1423
1424/*
1425 * Called by XP at the time of channel connection registration to cause 627 * Called by XP at the time of channel connection registration to cause
1426 * XPC to establish connections to all currently active partitions. 628 * XPC to establish connections to all currently active partitions.
1427 */ 629 */
@@ -1432,9 +634,9 @@ xpc_initiate_connect(int ch_number)
1432 struct xpc_partition *part; 634 struct xpc_partition *part;
1433 struct xpc_channel *ch; 635 struct xpc_channel *ch;
1434 636
1435 DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); 637 DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
1436 638
1437 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 639 for (partid = 0; partid < xp_max_npartitions; partid++) {
1438 part = &xpc_partitions[partid]; 640 part = &xpc_partitions[partid];
1439 641
1440 if (xpc_part_ref(part)) { 642 if (xpc_part_ref(part)) {
@@ -1488,10 +690,10 @@ xpc_initiate_disconnect(int ch_number)
1488 struct xpc_partition *part; 690 struct xpc_partition *part;
1489 struct xpc_channel *ch; 691 struct xpc_channel *ch;
1490 692
1491 DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); 693 DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
1492 694
1493 /* initiate the channel disconnect for every active partition */ 695 /* initiate the channel disconnect for every active partition */
1494 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 696 for (partid = 0; partid < xp_max_npartitions; partid++) {
1495 part = &xpc_partitions[partid]; 697 part = &xpc_partitions[partid];
1496 698
1497 if (xpc_part_ref(part)) { 699 if (xpc_part_ref(part)) {
@@ -1550,7 +752,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
1550 XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | 752 XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
1551 XPC_C_CONNECTING | XPC_C_CONNECTED); 753 XPC_C_CONNECTING | XPC_C_CONNECTED);
1552 754
1553 xpc_IPI_send_closerequest(ch, irq_flags); 755 xpc_send_chctl_closerequest(ch, irq_flags);
1554 756
1555 if (channel_was_connected) 757 if (channel_was_connected)
1556 ch->flags |= XPC_C_WASCONNECTED; 758 ch->flags |= XPC_C_WASCONNECTED;
@@ -1598,7 +800,7 @@ xpc_disconnect_callout(struct xpc_channel *ch, enum xp_retval reason)
1598 * Wait for a message entry to become available for the specified channel, 800 * Wait for a message entry to become available for the specified channel,
1599 * but don't wait any longer than 1 jiffy. 801 * but don't wait any longer than 1 jiffy.
1600 */ 802 */
1601static enum xp_retval 803enum xp_retval
1602xpc_allocate_msg_wait(struct xpc_channel *ch) 804xpc_allocate_msg_wait(struct xpc_channel *ch)
1603{ 805{
1604 enum xp_retval ret; 806 enum xp_retval ret;
@@ -1625,315 +827,54 @@ xpc_allocate_msg_wait(struct xpc_channel *ch)
1625} 827}
1626 828
1627/* 829/*
1628 * Allocate an entry for a message from the message queue associated with the 830 * Send a message that contains the user's payload on the specified channel
1629 * specified channel. 831 * connected to the specified partition.
1630 */
1631static enum xp_retval
1632xpc_allocate_msg(struct xpc_channel *ch, u32 flags,
1633 struct xpc_msg **address_of_msg)
1634{
1635 struct xpc_msg *msg;
1636 enum xp_retval ret;
1637 s64 put;
1638
1639 /* this reference will be dropped in xpc_send_msg() */
1640 xpc_msgqueue_ref(ch);
1641
1642 if (ch->flags & XPC_C_DISCONNECTING) {
1643 xpc_msgqueue_deref(ch);
1644 return ch->reason;
1645 }
1646 if (!(ch->flags & XPC_C_CONNECTED)) {
1647 xpc_msgqueue_deref(ch);
1648 return xpNotConnected;
1649 }
1650
1651 /*
1652 * Get the next available message entry from the local message queue.
1653 * If none are available, we'll make sure that we grab the latest
1654 * GP values.
1655 */
1656 ret = xpTimeout;
1657
1658 while (1) {
1659
1660 put = ch->w_local_GP.put;
1661 rmb(); /* guarantee that .put loads before .get */
1662 if (put - ch->w_remote_GP.get < ch->local_nentries) {
1663
1664 /* There are available message entries. We need to try
1665 * to secure one for ourselves. We'll do this by trying
1666 * to increment w_local_GP.put as long as someone else
1667 * doesn't beat us to it. If they do, we'll have to
1668 * try again.
1669 */
1670 if (cmpxchg(&ch->w_local_GP.put, put, put + 1) == put) {
1671 /* we got the entry referenced by put */
1672 break;
1673 }
1674 continue; /* try again */
1675 }
1676
1677 /*
1678 * There aren't any available msg entries at this time.
1679 *
1680 * In waiting for a message entry to become available,
1681 * we set a timeout in case the other side is not
1682 * sending completion IPIs. This lets us fake an IPI
1683 * that will cause the IPI handler to fetch the latest
1684 * GP values as if an IPI was sent by the other side.
1685 */
1686 if (ret == xpTimeout)
1687 xpc_IPI_send_local_msgrequest(ch);
1688
1689 if (flags & XPC_NOWAIT) {
1690 xpc_msgqueue_deref(ch);
1691 return xpNoWait;
1692 }
1693
1694 ret = xpc_allocate_msg_wait(ch);
1695 if (ret != xpInterrupted && ret != xpTimeout) {
1696 xpc_msgqueue_deref(ch);
1697 return ret;
1698 }
1699 }
1700
1701 /* get the message's address and initialize it */
1702 msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
1703 (put % ch->local_nentries) * ch->msg_size);
1704
1705 DBUG_ON(msg->flags != 0);
1706 msg->number = put;
1707
1708 dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, "
1709 "msg_number=%ld, partid=%d, channel=%d\n", put + 1,
1710 (void *)msg, msg->number, ch->partid, ch->number);
1711
1712 *address_of_msg = msg;
1713
1714 return xpSuccess;
1715}
1716
1717/*
1718 * Allocate an entry for a message from the message queue associated with the
1719 * specified channel. NOTE that this routine can sleep waiting for a message
1720 * entry to become available. To not sleep, pass in the XPC_NOWAIT flag.
1721 * 832 *
1722 * Arguments: 833 * NOTE that this routine can sleep waiting for a message entry to become
834 * available. To not sleep, pass in the XPC_NOWAIT flag.
1723 * 835 *
1724 * partid - ID of partition to which the channel is connected. 836 * Once sent, this routine will not wait for the message to be received, nor
1725 * ch_number - channel #. 837 * will notification be given when it does happen.
1726 * flags - see xpc.h for valid flags.
1727 * payload - address of the allocated payload area pointer (filled in on
1728 * return) in which the user-defined message is constructed.
1729 */
1730enum xp_retval
1731xpc_initiate_allocate(short partid, int ch_number, u32 flags, void **payload)
1732{
1733 struct xpc_partition *part = &xpc_partitions[partid];
1734 enum xp_retval ret = xpUnknownReason;
1735 struct xpc_msg *msg = NULL;
1736
1737 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
1738 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
1739
1740 *payload = NULL;
1741
1742 if (xpc_part_ref(part)) {
1743 ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg);
1744 xpc_part_deref(part);
1745
1746 if (msg != NULL)
1747 *payload = &msg->payload;
1748 }
1749
1750 return ret;
1751}
1752
1753/*
1754 * Now we actually send the messages that are ready to be sent by advancing
1755 * the local message queue's Put value and then send an IPI to the recipient
1756 * partition.
1757 */
1758static void
1759xpc_send_msgs(struct xpc_channel *ch, s64 initial_put)
1760{
1761 struct xpc_msg *msg;
1762 s64 put = initial_put + 1;
1763 int send_IPI = 0;
1764
1765 while (1) {
1766
1767 while (1) {
1768 if (put == ch->w_local_GP.put)
1769 break;
1770
1771 msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
1772 (put % ch->local_nentries) *
1773 ch->msg_size);
1774
1775 if (!(msg->flags & XPC_M_READY))
1776 break;
1777
1778 put++;
1779 }
1780
1781 if (put == initial_put) {
1782 /* nothing's changed */
1783 break;
1784 }
1785
1786 if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) !=
1787 initial_put) {
1788 /* someone else beat us to it */
1789 DBUG_ON(ch->local_GP->put < initial_put);
1790 break;
1791 }
1792
1793 /* we just set the new value of local_GP->put */
1794
1795 dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
1796 "channel=%d\n", put, ch->partid, ch->number);
1797
1798 send_IPI = 1;
1799
1800 /*
1801 * We need to ensure that the message referenced by
1802 * local_GP->put is not XPC_M_READY or that local_GP->put
1803 * equals w_local_GP.put, so we'll go have a look.
1804 */
1805 initial_put = put;
1806 }
1807
1808 if (send_IPI)
1809 xpc_IPI_send_msgrequest(ch);
1810}
1811
1812/*
1813 * Common code that does the actual sending of the message by advancing the
1814 * local message queue's Put value and sends an IPI to the partition the
1815 * message is being sent to.
1816 */
1817static enum xp_retval
1818xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
1819 xpc_notify_func func, void *key)
1820{
1821 enum xp_retval ret = xpSuccess;
1822 struct xpc_notify *notify = notify;
1823 s64 put, msg_number = msg->number;
1824
1825 DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
1826 DBUG_ON((((u64)msg - (u64)ch->local_msgqueue) / ch->msg_size) !=
1827 msg_number % ch->local_nentries);
1828 DBUG_ON(msg->flags & XPC_M_READY);
1829
1830 if (ch->flags & XPC_C_DISCONNECTING) {
1831 /* drop the reference grabbed in xpc_allocate_msg() */
1832 xpc_msgqueue_deref(ch);
1833 return ch->reason;
1834 }
1835
1836 if (notify_type != 0) {
1837 /*
1838 * Tell the remote side to send an ACK interrupt when the
1839 * message has been delivered.
1840 */
1841 msg->flags |= XPC_M_INTERRUPT;
1842
1843 atomic_inc(&ch->n_to_notify);
1844
1845 notify = &ch->notify_queue[msg_number % ch->local_nentries];
1846 notify->func = func;
1847 notify->key = key;
1848 notify->type = notify_type;
1849
1850 /* >>> is a mb() needed here? */
1851
1852 if (ch->flags & XPC_C_DISCONNECTING) {
1853 /*
1854 * An error occurred between our last error check and
1855 * this one. We will try to clear the type field from
1856 * the notify entry. If we succeed then
1857 * xpc_disconnect_channel() didn't already process
1858 * the notify entry.
1859 */
1860 if (cmpxchg(&notify->type, notify_type, 0) ==
1861 notify_type) {
1862 atomic_dec(&ch->n_to_notify);
1863 ret = ch->reason;
1864 }
1865
1866 /* drop the reference grabbed in xpc_allocate_msg() */
1867 xpc_msgqueue_deref(ch);
1868 return ret;
1869 }
1870 }
1871
1872 msg->flags |= XPC_M_READY;
1873
1874 /*
1875 * The preceding store of msg->flags must occur before the following
1876 * load of ch->local_GP->put.
1877 */
1878 mb();
1879
1880 /* see if the message is next in line to be sent, if so send it */
1881
1882 put = ch->local_GP->put;
1883 if (put == msg_number)
1884 xpc_send_msgs(ch, put);
1885
1886 /* drop the reference grabbed in xpc_allocate_msg() */
1887 xpc_msgqueue_deref(ch);
1888 return ret;
1889}
1890
1891/*
1892 * Send a message previously allocated using xpc_initiate_allocate() on the
1893 * specified channel connected to the specified partition.
1894 *
1895 * This routine will not wait for the message to be received, nor will
1896 * notification be given when it does happen. Once this routine has returned
1897 * the message entry allocated via xpc_initiate_allocate() is no longer
1898 * accessable to the caller.
1899 *
1900 * This routine, although called by users, does not call xpc_part_ref() to
1901 * ensure that the partition infrastructure is in place. It relies on the
1902 * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
1903 * 838 *
1904 * Arguments: 839 * Arguments:
1905 * 840 *
1906 * partid - ID of partition to which the channel is connected. 841 * partid - ID of partition to which the channel is connected.
1907 * ch_number - channel # to send message on. 842 * ch_number - channel # to send message on.
1908 * payload - pointer to the payload area allocated via 843 * flags - see xp.h for valid flags.
1909 * xpc_initiate_allocate(). 844 * payload - pointer to the payload which is to be sent.
845 * payload_size - size of the payload in bytes.
1910 */ 846 */
1911enum xp_retval 847enum xp_retval
1912xpc_initiate_send(short partid, int ch_number, void *payload) 848xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload,
849 u16 payload_size)
1913{ 850{
1914 struct xpc_partition *part = &xpc_partitions[partid]; 851 struct xpc_partition *part = &xpc_partitions[partid];
1915 struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); 852 enum xp_retval ret = xpUnknownReason;
1916 enum xp_retval ret;
1917 853
1918 dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg, 854 dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
1919 partid, ch_number); 855 partid, ch_number);
1920 856
1921 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); 857 DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
1922 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); 858 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
1923 DBUG_ON(msg == NULL); 859 DBUG_ON(payload == NULL);
1924 860
1925 ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL); 861 if (xpc_part_ref(part)) {
862 ret = xpc_send_payload(&part->channels[ch_number], flags,
863 payload, payload_size, 0, NULL, NULL);
864 xpc_part_deref(part);
865 }
1926 866
1927 return ret; 867 return ret;
1928} 868}
1929 869
1930/* 870/*
1931 * Send a message previously allocated using xpc_initiate_allocate on the 871 * Send a message that contains the user's payload on the specified channel
1932 * specified channel connected to the specified partition. 872 * connected to the specified partition.
1933 * 873 *
1934 * This routine will not wait for the message to be sent. Once this routine 874 * NOTE that this routine can sleep waiting for a message entry to become
1935 * has returned the message entry allocated via xpc_initiate_allocate() is no 875 * available. To not sleep, pass in the XPC_NOWAIT flag.
1936 * longer accessable to the caller. 876 *
877 * This routine will not wait for the message to be sent or received.
1937 * 878 *
1938 * Once the remote end of the channel has received the message, the function 879 * Once the remote end of the channel has received the message, the function
1939 * passed as an argument to xpc_initiate_send_notify() will be called. This 880 * passed as an argument to xpc_initiate_send_notify() will be called. This
@@ -1943,158 +884,51 @@ xpc_initiate_send(short partid, int ch_number, void *payload)
1943 * 884 *
1944 * If this routine returns an error, the caller's function will NOT be called. 885 * If this routine returns an error, the caller's function will NOT be called.
1945 * 886 *
1946 * This routine, although called by users, does not call xpc_part_ref() to
1947 * ensure that the partition infrastructure is in place. It relies on the
1948 * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
1949 *
1950 * Arguments: 887 * Arguments:
1951 * 888 *
1952 * partid - ID of partition to which the channel is connected. 889 * partid - ID of partition to which the channel is connected.
1953 * ch_number - channel # to send message on. 890 * ch_number - channel # to send message on.
1954 * payload - pointer to the payload area allocated via 891 * flags - see xp.h for valid flags.
1955 * xpc_initiate_allocate(). 892 * payload - pointer to the payload which is to be sent.
893 * payload_size - size of the payload in bytes.
1956 * func - function to call with asynchronous notification of message 894 * func - function to call with asynchronous notification of message
1957 * receipt. THIS FUNCTION MUST BE NON-BLOCKING. 895 * receipt. THIS FUNCTION MUST BE NON-BLOCKING.
1958 * key - user-defined key to be passed to the function when it's called. 896 * key - user-defined key to be passed to the function when it's called.
1959 */ 897 */
1960enum xp_retval 898enum xp_retval
1961xpc_initiate_send_notify(short partid, int ch_number, void *payload, 899xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload,
1962 xpc_notify_func func, void *key) 900 u16 payload_size, xpc_notify_func func, void *key)
1963{ 901{
1964 struct xpc_partition *part = &xpc_partitions[partid]; 902 struct xpc_partition *part = &xpc_partitions[partid];
1965 struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); 903 enum xp_retval ret = xpUnknownReason;
1966 enum xp_retval ret;
1967 904
1968 dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg, 905 dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
1969 partid, ch_number); 906 partid, ch_number);
1970 907
1971 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); 908 DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
1972 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); 909 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
1973 DBUG_ON(msg == NULL); 910 DBUG_ON(payload == NULL);
1974 DBUG_ON(func == NULL); 911 DBUG_ON(func == NULL);
1975 912
1976 ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL, 913 if (xpc_part_ref(part)) {
1977 func, key); 914 ret = xpc_send_payload(&part->channels[ch_number], flags,
1978 return ret; 915 payload, payload_size, XPC_N_CALL, func,
1979} 916 key);
1980 917 xpc_part_deref(part);
1981static struct xpc_msg *
1982xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
1983{
1984 struct xpc_partition *part = &xpc_partitions[ch->partid];
1985 struct xpc_msg *remote_msg, *msg;
1986 u32 msg_index, nmsgs;
1987 u64 msg_offset;
1988 enum xp_retval ret;
1989
1990 if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) {
1991 /* we were interrupted by a signal */
1992 return NULL;
1993 }
1994
1995 while (get >= ch->next_msg_to_pull) {
1996
1997 /* pull as many messages as are ready and able to be pulled */
1998
1999 msg_index = ch->next_msg_to_pull % ch->remote_nentries;
2000
2001 DBUG_ON(ch->next_msg_to_pull >= ch->w_remote_GP.put);
2002 nmsgs = ch->w_remote_GP.put - ch->next_msg_to_pull;
2003 if (msg_index + nmsgs > ch->remote_nentries) {
2004 /* ignore the ones that wrap the msg queue for now */
2005 nmsgs = ch->remote_nentries - msg_index;
2006 }
2007
2008 msg_offset = msg_index * ch->msg_size;
2009 msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
2010 remote_msg = (struct xpc_msg *)(ch->remote_msgqueue_pa +
2011 msg_offset);
2012
2013 ret = xpc_pull_remote_cachelines(part, msg, remote_msg,
2014 nmsgs * ch->msg_size);
2015 if (ret != xpSuccess) {
2016
2017 dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
2018 " msg %ld from partition %d, channel=%d, "
2019 "ret=%d\n", nmsgs, ch->next_msg_to_pull,
2020 ch->partid, ch->number, ret);
2021
2022 XPC_DEACTIVATE_PARTITION(part, ret);
2023
2024 mutex_unlock(&ch->msg_to_pull_mutex);
2025 return NULL;
2026 }
2027
2028 ch->next_msg_to_pull += nmsgs;
2029 } 918 }
2030 919 return ret;
2031 mutex_unlock(&ch->msg_to_pull_mutex);
2032
2033 /* return the message we were looking for */
2034 msg_offset = (get % ch->remote_nentries) * ch->msg_size;
2035 msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
2036
2037 return msg;
2038}
2039
2040/*
2041 * Get a message to be delivered.
2042 */
2043static struct xpc_msg *
2044xpc_get_deliverable_msg(struct xpc_channel *ch)
2045{
2046 struct xpc_msg *msg = NULL;
2047 s64 get;
2048
2049 do {
2050 if (ch->flags & XPC_C_DISCONNECTING)
2051 break;
2052
2053 get = ch->w_local_GP.get;
2054 rmb(); /* guarantee that .get loads before .put */
2055 if (get == ch->w_remote_GP.put)
2056 break;
2057
2058 /* There are messages waiting to be pulled and delivered.
2059 * We need to try to secure one for ourselves. We'll do this
2060 * by trying to increment w_local_GP.get and hope that no one
2061 * else beats us to it. If they do, we'll we'll simply have
2062 * to try again for the next one.
2063 */
2064
2065 if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) {
2066 /* we got the entry referenced by get */
2067
2068 dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
2069 "partid=%d, channel=%d\n", get + 1,
2070 ch->partid, ch->number);
2071
2072 /* pull the message from the remote partition */
2073
2074 msg = xpc_pull_remote_msg(ch, get);
2075
2076 DBUG_ON(msg != NULL && msg->number != get);
2077 DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE));
2078 DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY));
2079
2080 break;
2081 }
2082
2083 } while (1);
2084
2085 return msg;
2086} 920}
2087 921
2088/* 922/*
2089 * Deliver a message to its intended recipient. 923 * Deliver a message's payload to its intended recipient.
2090 */ 924 */
2091void 925void
2092xpc_deliver_msg(struct xpc_channel *ch) 926xpc_deliver_payload(struct xpc_channel *ch)
2093{ 927{
2094 struct xpc_msg *msg; 928 void *payload;
2095 929
2096 msg = xpc_get_deliverable_msg(ch); 930 payload = xpc_get_deliverable_payload(ch);
2097 if (msg != NULL) { 931 if (payload != NULL) {
2098 932
2099 /* 933 /*
2100 * This ref is taken to protect the payload itself from being 934 * This ref is taken to protect the payload itself from being
@@ -2106,18 +940,16 @@ xpc_deliver_msg(struct xpc_channel *ch)
2106 atomic_inc(&ch->kthreads_active); 940 atomic_inc(&ch->kthreads_active);
2107 941
2108 if (ch->func != NULL) { 942 if (ch->func != NULL) {
2109 dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, " 943 dev_dbg(xpc_chan, "ch->func() called, payload=0x%p "
2110 "msg_number=%ld, partid=%d, channel=%d\n", 944 "partid=%d channel=%d\n", payload, ch->partid,
2111 (void *)msg, msg->number, ch->partid,
2112 ch->number); 945 ch->number);
2113 946
2114 /* deliver the message to its intended recipient */ 947 /* deliver the message to its intended recipient */
2115 ch->func(xpMsgReceived, ch->partid, ch->number, 948 ch->func(xpMsgReceived, ch->partid, ch->number, payload,
2116 &msg->payload, ch->key); 949 ch->key);
2117 950
2118 dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, " 951 dev_dbg(xpc_chan, "ch->func() returned, payload=0x%p "
2119 "msg_number=%ld, partid=%d, channel=%d\n", 952 "partid=%d channel=%d\n", payload, ch->partid,
2120 (void *)msg, msg->number, ch->partid,
2121 ch->number); 953 ch->number);
2122 } 954 }
2123 955
@@ -2126,118 +958,31 @@ xpc_deliver_msg(struct xpc_channel *ch)
2126} 958}
2127 959
2128/* 960/*
2129 * Now we actually acknowledge the messages that have been delivered and ack'd 961 * Acknowledge receipt of a delivered message's payload.
2130 * by advancing the cached remote message queue's Get value and if requested
2131 * send an IPI to the message sender's partition.
2132 */
2133static void
2134xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
2135{
2136 struct xpc_msg *msg;
2137 s64 get = initial_get + 1;
2138 int send_IPI = 0;
2139
2140 while (1) {
2141
2142 while (1) {
2143 if (get == ch->w_local_GP.get)
2144 break;
2145
2146 msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
2147 (get % ch->remote_nentries) *
2148 ch->msg_size);
2149
2150 if (!(msg->flags & XPC_M_DONE))
2151 break;
2152
2153 msg_flags |= msg->flags;
2154 get++;
2155 }
2156
2157 if (get == initial_get) {
2158 /* nothing's changed */
2159 break;
2160 }
2161
2162 if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) !=
2163 initial_get) {
2164 /* someone else beat us to it */
2165 DBUG_ON(ch->local_GP->get <= initial_get);
2166 break;
2167 }
2168
2169 /* we just set the new value of local_GP->get */
2170
2171 dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
2172 "channel=%d\n", get, ch->partid, ch->number);
2173
2174 send_IPI = (msg_flags & XPC_M_INTERRUPT);
2175
2176 /*
2177 * We need to ensure that the message referenced by
2178 * local_GP->get is not XPC_M_DONE or that local_GP->get
2179 * equals w_local_GP.get, so we'll go have a look.
2180 */
2181 initial_get = get;
2182 }
2183
2184 if (send_IPI)
2185 xpc_IPI_send_msgrequest(ch);
2186}
2187
2188/*
2189 * Acknowledge receipt of a delivered message.
2190 *
2191 * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition
2192 * that sent the message.
2193 * 962 *
2194 * This function, although called by users, does not call xpc_part_ref() to 963 * This function, although called by users, does not call xpc_part_ref() to
2195 * ensure that the partition infrastructure is in place. It relies on the 964 * ensure that the partition infrastructure is in place. It relies on the
2196 * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg(). 965 * fact that we called xpc_msgqueue_ref() in xpc_deliver_payload().
2197 * 966 *
2198 * Arguments: 967 * Arguments:
2199 * 968 *
2200 * partid - ID of partition to which the channel is connected. 969 * partid - ID of partition to which the channel is connected.
2201 * ch_number - channel # message received on. 970 * ch_number - channel # message received on.
2202 * payload - pointer to the payload area allocated via 971 * payload - pointer to the payload area allocated via
2203 * xpc_initiate_allocate(). 972 * xpc_initiate_send() or xpc_initiate_send_notify().
2204 */ 973 */
2205void 974void
2206xpc_initiate_received(short partid, int ch_number, void *payload) 975xpc_initiate_received(short partid, int ch_number, void *payload)
2207{ 976{
2208 struct xpc_partition *part = &xpc_partitions[partid]; 977 struct xpc_partition *part = &xpc_partitions[partid];
2209 struct xpc_channel *ch; 978 struct xpc_channel *ch;
2210 struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
2211 s64 get, msg_number = msg->number;
2212 979
2213 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); 980 DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
2214 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); 981 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
2215 982
2216 ch = &part->channels[ch_number]; 983 ch = &part->channels[ch_number];
984 xpc_received_payload(ch, payload);
2217 985
2218 dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n", 986 /* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload() */
2219 (void *)msg, msg_number, ch->partid, ch->number);
2220
2221 DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->msg_size) !=
2222 msg_number % ch->remote_nentries);
2223 DBUG_ON(msg->flags & XPC_M_DONE);
2224
2225 msg->flags |= XPC_M_DONE;
2226
2227 /*
2228 * The preceding store of msg->flags must occur before the following
2229 * load of ch->local_GP->get.
2230 */
2231 mb();
2232
2233 /*
2234 * See if this message is next in line to be acknowledged as having
2235 * been delivered.
2236 */
2237 get = ch->local_GP->get;
2238 if (get == msg_number)
2239 xpc_acknowledge_msgs(ch, get, msg->flags);
2240
2241 /* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg() */
2242 xpc_msgqueue_deref(ch); 987 xpc_msgqueue_deref(ch);
2243} 988}
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index c3b4227f48a5..46325fc84811 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -25,37 +25,31 @@
25 * 25 *
26 * Caveats: 26 * Caveats:
27 * 27 *
28 * . We currently have no way to determine which nasid an IPI came 28 * . Currently on sn2, we have no way to determine which nasid an IRQ
29 * from. Thus, xpc_IPI_send() does a remote AMO write followed by 29 * came from. Thus, xpc_send_IRQ_sn2() does a remote amo write
30 * an IPI. The AMO indicates where data is to be pulled from, so 30 * followed by an IPI. The amo indicates where data is to be pulled
31 * after the IPI arrives, the remote partition checks the AMO word. 31 * from, so after the IPI arrives, the remote partition checks the amo
32 * The IPI can actually arrive before the AMO however, so other code 32 * word. The IPI can actually arrive before the amo however, so other
33 * must periodically check for this case. Also, remote AMO operations 33 * code must periodically check for this case. Also, remote amo
34 * do not reliably time out. Thus we do a remote PIO read solely to 34 * operations do not reliably time out. Thus we do a remote PIO read
35 * know whether the remote partition is down and whether we should 35 * solely to know whether the remote partition is down and whether we
36 * stop sending IPIs to it. This remote PIO read operation is set up 36 * should stop sending IPIs to it. This remote PIO read operation is
37 * in a special nofault region so SAL knows to ignore (and cleanup) 37 * set up in a special nofault region so SAL knows to ignore (and
38 * any errors due to the remote AMO write, PIO read, and/or PIO 38 * cleanup) any errors due to the remote amo write, PIO read, and/or
39 * write operations. 39 * PIO write operations.
40 * 40 *
41 * If/when new hardware solves this IPI problem, we should abandon 41 * If/when new hardware solves this IPI problem, we should abandon
42 * the current approach. 42 * the current approach.
43 * 43 *
44 */ 44 */
45 45
46#include <linux/kernel.h>
47#include <linux/module.h> 46#include <linux/module.h>
48#include <linux/init.h> 47#include <linux/sysctl.h>
49#include <linux/cache.h> 48#include <linux/device.h>
50#include <linux/interrupt.h>
51#include <linux/delay.h> 49#include <linux/delay.h>
52#include <linux/reboot.h> 50#include <linux/reboot.h>
53#include <linux/completion.h>
54#include <linux/kdebug.h> 51#include <linux/kdebug.h>
55#include <linux/kthread.h> 52#include <linux/kthread.h>
56#include <linux/uaccess.h>
57#include <asm/sn/intr.h>
58#include <asm/sn/sn_sal.h>
59#include "xpc.h" 53#include "xpc.h"
60 54
61/* define two XPC debug device structures to be used with dev_dbg() et al */ 55/* define two XPC debug device structures to be used with dev_dbg() et al */
@@ -89,9 +83,9 @@ static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
89static int xpc_hb_check_min_interval = 10; 83static int xpc_hb_check_min_interval = 10;
90static int xpc_hb_check_max_interval = 120; 84static int xpc_hb_check_max_interval = 120;
91 85
92int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT; 86int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT;
93static int xpc_disengage_request_min_timelimit; /* = 0 */ 87static int xpc_disengage_min_timelimit; /* = 0 */
94static int xpc_disengage_request_max_timelimit = 120; 88static int xpc_disengage_max_timelimit = 120;
95 89
96static ctl_table xpc_sys_xpc_hb_dir[] = { 90static ctl_table xpc_sys_xpc_hb_dir[] = {
97 { 91 {
@@ -124,14 +118,14 @@ static ctl_table xpc_sys_xpc_dir[] = {
124 .child = xpc_sys_xpc_hb_dir}, 118 .child = xpc_sys_xpc_hb_dir},
125 { 119 {
126 .ctl_name = CTL_UNNUMBERED, 120 .ctl_name = CTL_UNNUMBERED,
127 .procname = "disengage_request_timelimit", 121 .procname = "disengage_timelimit",
128 .data = &xpc_disengage_request_timelimit, 122 .data = &xpc_disengage_timelimit,
129 .maxlen = sizeof(int), 123 .maxlen = sizeof(int),
130 .mode = 0644, 124 .mode = 0644,
131 .proc_handler = &proc_dointvec_minmax, 125 .proc_handler = &proc_dointvec_minmax,
132 .strategy = &sysctl_intvec, 126 .strategy = &sysctl_intvec,
133 .extra1 = &xpc_disengage_request_min_timelimit, 127 .extra1 = &xpc_disengage_min_timelimit,
134 .extra2 = &xpc_disengage_request_max_timelimit}, 128 .extra2 = &xpc_disengage_max_timelimit},
135 {} 129 {}
136}; 130};
137static ctl_table xpc_sys_dir[] = { 131static ctl_table xpc_sys_dir[] = {
@@ -144,16 +138,19 @@ static ctl_table xpc_sys_dir[] = {
144}; 138};
145static struct ctl_table_header *xpc_sysctl; 139static struct ctl_table_header *xpc_sysctl;
146 140
147/* non-zero if any remote partition disengage request was timed out */ 141/* non-zero if any remote partition disengage was timed out */
148int xpc_disengage_request_timedout; 142int xpc_disengage_timedout;
149 143
150/* #of IRQs received */ 144/* #of activate IRQs received and not yet processed */
151static atomic_t xpc_act_IRQ_rcvd; 145int xpc_activate_IRQ_rcvd;
146DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock);
152 147
153/* IRQ handler notifies this wait queue on receipt of an IRQ */ 148/* IRQ handler notifies this wait queue on receipt of an IRQ */
154static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq); 149DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
155 150
156static unsigned long xpc_hb_check_timeout; 151static unsigned long xpc_hb_check_timeout;
152static struct timer_list xpc_hb_timer;
153void *xpc_heartbeating_to_mask;
157 154
158/* notification that the xpc_hb_checker thread has exited */ 155/* notification that the xpc_hb_checker thread has exited */
159static DECLARE_COMPLETION(xpc_hb_checker_exited); 156static DECLARE_COMPLETION(xpc_hb_checker_exited);
@@ -161,8 +158,6 @@ static DECLARE_COMPLETION(xpc_hb_checker_exited);
161/* notification that the xpc_discovery thread has exited */ 158/* notification that the xpc_discovery thread has exited */
162static DECLARE_COMPLETION(xpc_discovery_exited); 159static DECLARE_COMPLETION(xpc_discovery_exited);
163 160
164static struct timer_list xpc_hb_timer;
165
166static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *); 161static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
167 162
168static int xpc_system_reboot(struct notifier_block *, unsigned long, void *); 163static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
@@ -175,31 +170,76 @@ static struct notifier_block xpc_die_notifier = {
175 .notifier_call = xpc_system_die, 170 .notifier_call = xpc_system_die,
176}; 171};
177 172
173int (*xpc_setup_partitions_sn) (void);
174enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie,
175 unsigned long *rp_pa,
176 size_t *len);
177int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *rp);
178void (*xpc_heartbeat_init) (void);
179void (*xpc_heartbeat_exit) (void);
180void (*xpc_increment_heartbeat) (void);
181void (*xpc_offline_heartbeat) (void);
182void (*xpc_online_heartbeat) (void);
183enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part);
184
185enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
186void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
187u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part);
188enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch);
189void (*xpc_teardown_msg_structures) (struct xpc_channel *ch);
190void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number);
191int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *ch);
192void *(*xpc_get_deliverable_payload) (struct xpc_channel *ch);
193
194void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp,
195 unsigned long remote_rp_pa,
196 int nasid);
197void (*xpc_request_partition_reactivation) (struct xpc_partition *part);
198void (*xpc_request_partition_deactivation) (struct xpc_partition *part);
199void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part);
200
201void (*xpc_process_activate_IRQ_rcvd) (void);
202enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *part);
203void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *part);
204
205void (*xpc_indicate_partition_engaged) (struct xpc_partition *part);
206int (*xpc_partition_engaged) (short partid);
207int (*xpc_any_partition_engaged) (void);
208void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part);
209void (*xpc_assume_partition_disengaged) (short partid);
210
211void (*xpc_send_chctl_closerequest) (struct xpc_channel *ch,
212 unsigned long *irq_flags);
213void (*xpc_send_chctl_closereply) (struct xpc_channel *ch,
214 unsigned long *irq_flags);
215void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch,
216 unsigned long *irq_flags);
217void (*xpc_send_chctl_openreply) (struct xpc_channel *ch,
218 unsigned long *irq_flags);
219
220void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch,
221 unsigned long msgqueue_pa);
222
223enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags,
224 void *payload, u16 payload_size,
225 u8 notify_type, xpc_notify_func func,
226 void *key);
227void (*xpc_received_payload) (struct xpc_channel *ch, void *payload);
228
178/* 229/*
179 * Timer function to enforce the timelimit on the partition disengage request. 230 * Timer function to enforce the timelimit on the partition disengage.
180 */ 231 */
181static void 232static void
182xpc_timeout_partition_disengage_request(unsigned long data) 233xpc_timeout_partition_disengage(unsigned long data)
183{ 234{
184 struct xpc_partition *part = (struct xpc_partition *)data; 235 struct xpc_partition *part = (struct xpc_partition *)data;
185 236
186 DBUG_ON(time_before(jiffies, part->disengage_request_timeout)); 237 DBUG_ON(time_is_after_jiffies(part->disengage_timeout));
187 238
188 (void)xpc_partition_disengaged(part); 239 (void)xpc_partition_disengaged(part);
189 240
190 DBUG_ON(part->disengage_request_timeout != 0); 241 DBUG_ON(part->disengage_timeout != 0);
191 DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0); 242 DBUG_ON(xpc_partition_engaged(XPC_PARTID(part)));
192}
193
194/*
195 * Notify the heartbeat check thread that an IRQ has been received.
196 */
197static irqreturn_t
198xpc_act_IRQ_handler(int irq, void *dev_id)
199{
200 atomic_inc(&xpc_act_IRQ_rcvd);
201 wake_up_interruptible(&xpc_act_IRQ_wq);
202 return IRQ_HANDLED;
203} 243}
204 244
205/* 245/*
@@ -210,15 +250,63 @@ xpc_act_IRQ_handler(int irq, void *dev_id)
210static void 250static void
211xpc_hb_beater(unsigned long dummy) 251xpc_hb_beater(unsigned long dummy)
212{ 252{
213 xpc_vars->heartbeat++; 253 xpc_increment_heartbeat();
214 254
215 if (time_after_eq(jiffies, xpc_hb_check_timeout)) 255 if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
216 wake_up_interruptible(&xpc_act_IRQ_wq); 256 wake_up_interruptible(&xpc_activate_IRQ_wq);
217 257
218 xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ); 258 xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
219 add_timer(&xpc_hb_timer); 259 add_timer(&xpc_hb_timer);
220} 260}
221 261
262static void
263xpc_start_hb_beater(void)
264{
265 xpc_heartbeat_init();
266 init_timer(&xpc_hb_timer);
267 xpc_hb_timer.function = xpc_hb_beater;
268 xpc_hb_beater(0);
269}
270
271static void
272xpc_stop_hb_beater(void)
273{
274 del_timer_sync(&xpc_hb_timer);
275 xpc_heartbeat_exit();
276}
277
278/*
279 * At periodic intervals, scan through all active partitions and ensure
280 * their heartbeat is still active. If not, the partition is deactivated.
281 */
282static void
283xpc_check_remote_hb(void)
284{
285 struct xpc_partition *part;
286 short partid;
287 enum xp_retval ret;
288
289 for (partid = 0; partid < xp_max_npartitions; partid++) {
290
291 if (xpc_exiting)
292 break;
293
294 if (partid == xp_partition_id)
295 continue;
296
297 part = &xpc_partitions[partid];
298
299 if (part->act_state == XPC_P_AS_INACTIVE ||
300 part->act_state == XPC_P_AS_DEACTIVATING) {
301 continue;
302 }
303
304 ret = xpc_get_remote_heartbeat(part);
305 if (ret != xpSuccess)
306 XPC_DEACTIVATE_PARTITION(part, ret);
307 }
308}
309
222/* 310/*
223 * This thread is responsible for nearly all of the partition 311 * This thread is responsible for nearly all of the partition
224 * activation/deactivation. 312 * activation/deactivation.
@@ -226,8 +314,6 @@ xpc_hb_beater(unsigned long dummy)
226static int 314static int
227xpc_hb_checker(void *ignore) 315xpc_hb_checker(void *ignore)
228{ 316{
229 int last_IRQ_count = 0;
230 int new_IRQ_count;
231 int force_IRQ = 0; 317 int force_IRQ = 0;
232 318
233 /* this thread was marked active by xpc_hb_init() */ 319 /* this thread was marked active by xpc_hb_init() */
@@ -236,56 +322,49 @@ xpc_hb_checker(void *ignore)
236 322
237 /* set our heartbeating to other partitions into motion */ 323 /* set our heartbeating to other partitions into motion */
238 xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); 324 xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
239 xpc_hb_beater(0); 325 xpc_start_hb_beater();
240 326
241 while (!xpc_exiting) { 327 while (!xpc_exiting) {
242 328
243 dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " 329 dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
244 "been received\n", 330 "been received\n",
245 (int)(xpc_hb_check_timeout - jiffies), 331 (int)(xpc_hb_check_timeout - jiffies),
246 atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count); 332 xpc_activate_IRQ_rcvd);
247 333
248 /* checking of remote heartbeats is skewed by IRQ handling */ 334 /* checking of remote heartbeats is skewed by IRQ handling */
249 if (time_after_eq(jiffies, xpc_hb_check_timeout)) { 335 if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
336 xpc_hb_check_timeout = jiffies +
337 (xpc_hb_check_interval * HZ);
338
250 dev_dbg(xpc_part, "checking remote heartbeats\n"); 339 dev_dbg(xpc_part, "checking remote heartbeats\n");
251 xpc_check_remote_hb(); 340 xpc_check_remote_hb();
252 341
253 /* 342 /*
254 * We need to periodically recheck to ensure no 343 * On sn2 we need to periodically recheck to ensure no
255 * IPI/AMO pairs have been missed. That check 344 * IRQ/amo pairs have been missed.
256 * must always reset xpc_hb_check_timeout.
257 */ 345 */
258 force_IRQ = 1; 346 if (is_shub())
347 force_IRQ = 1;
259 } 348 }
260 349
261 /* check for outstanding IRQs */ 350 /* check for outstanding IRQs */
262 new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd); 351 if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) {
263 if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
264 force_IRQ = 0; 352 force_IRQ = 0;
265 353 dev_dbg(xpc_part, "processing activate IRQs "
266 dev_dbg(xpc_part, "found an IRQ to process; will be " 354 "received\n");
267 "resetting xpc_hb_check_timeout\n"); 355 xpc_process_activate_IRQ_rcvd();
268
269 last_IRQ_count += xpc_identify_act_IRQ_sender();
270 if (last_IRQ_count < new_IRQ_count) {
271 /* retry once to help avoid missing AMO */
272 (void)xpc_identify_act_IRQ_sender();
273 }
274 last_IRQ_count = new_IRQ_count;
275
276 xpc_hb_check_timeout = jiffies +
277 (xpc_hb_check_interval * HZ);
278 } 356 }
279 357
280 /* wait for IRQ or timeout */ 358 /* wait for IRQ or timeout */
281 (void)wait_event_interruptible(xpc_act_IRQ_wq, 359 (void)wait_event_interruptible(xpc_activate_IRQ_wq,
282 (last_IRQ_count < 360 (time_is_before_eq_jiffies(
283 atomic_read(&xpc_act_IRQ_rcvd) 361 xpc_hb_check_timeout) ||
284 || time_after_eq(jiffies, 362 xpc_activate_IRQ_rcvd > 0 ||
285 xpc_hb_check_timeout) ||
286 xpc_exiting)); 363 xpc_exiting));
287 } 364 }
288 365
366 xpc_stop_hb_beater();
367
289 dev_dbg(xpc_part, "heartbeat checker is exiting\n"); 368 dev_dbg(xpc_part, "heartbeat checker is exiting\n");
290 369
291 /* mark this thread as having exited */ 370 /* mark this thread as having exited */
@@ -311,37 +390,8 @@ xpc_initiate_discovery(void *ignore)
311} 390}
312 391
313/* 392/*
314 * Establish first contact with the remote partititon. This involves pulling
315 * the XPC per partition variables from the remote partition and waiting for
316 * the remote partition to pull ours.
317 */
318static enum xp_retval
319xpc_make_first_contact(struct xpc_partition *part)
320{
321 enum xp_retval ret;
322
323 while ((ret = xpc_pull_remote_vars_part(part)) != xpSuccess) {
324 if (ret != xpRetry) {
325 XPC_DEACTIVATE_PARTITION(part, ret);
326 return ret;
327 }
328
329 dev_dbg(xpc_chan, "waiting to make first contact with "
330 "partition %d\n", XPC_PARTID(part));
331
332 /* wait a 1/4 of a second or so */
333 (void)msleep_interruptible(250);
334
335 if (part->act_state == XPC_P_DEACTIVATING)
336 return part->reason;
337 }
338
339 return xpc_mark_partition_active(part);
340}
341
342/*
343 * The first kthread assigned to a newly activated partition is the one 393 * The first kthread assigned to a newly activated partition is the one
344 * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to 394 * created by XPC HB with which it calls xpc_activating(). XPC hangs on to
345 * that kthread until the partition is brought down, at which time that kthread 395 * that kthread until the partition is brought down, at which time that kthread
346 * returns back to XPC HB. (The return of that kthread will signify to XPC HB 396 * returns back to XPC HB. (The return of that kthread will signify to XPC HB
347 * that XPC has dismantled all communication infrastructure for the associated 397 * that XPC has dismantled all communication infrastructure for the associated
@@ -354,11 +404,11 @@ xpc_make_first_contact(struct xpc_partition *part)
354static void 404static void
355xpc_channel_mgr(struct xpc_partition *part) 405xpc_channel_mgr(struct xpc_partition *part)
356{ 406{
357 while (part->act_state != XPC_P_DEACTIVATING || 407 while (part->act_state != XPC_P_AS_DEACTIVATING ||
358 atomic_read(&part->nchannels_active) > 0 || 408 atomic_read(&part->nchannels_active) > 0 ||
359 !xpc_partition_disengaged(part)) { 409 !xpc_partition_disengaged(part)) {
360 410
361 xpc_process_channel_activity(part); 411 xpc_process_sent_chctl_flags(part);
362 412
363 /* 413 /*
364 * Wait until we've been requested to activate kthreads or 414 * Wait until we've been requested to activate kthreads or
@@ -376,8 +426,8 @@ xpc_channel_mgr(struct xpc_partition *part)
376 atomic_dec(&part->channel_mgr_requests); 426 atomic_dec(&part->channel_mgr_requests);
377 (void)wait_event_interruptible(part->channel_mgr_wq, 427 (void)wait_event_interruptible(part->channel_mgr_wq,
378 (atomic_read(&part->channel_mgr_requests) > 0 || 428 (atomic_read(&part->channel_mgr_requests) > 0 ||
379 part->local_IPI_amo != 0 || 429 part->chctl.all_flags != 0 ||
380 (part->act_state == XPC_P_DEACTIVATING && 430 (part->act_state == XPC_P_AS_DEACTIVATING &&
381 atomic_read(&part->nchannels_active) == 0 && 431 atomic_read(&part->nchannels_active) == 0 &&
382 xpc_partition_disengaged(part)))); 432 xpc_partition_disengaged(part))));
383 atomic_set(&part->channel_mgr_requests, 1); 433 atomic_set(&part->channel_mgr_requests, 1);
@@ -385,47 +435,163 @@ xpc_channel_mgr(struct xpc_partition *part)
385} 435}
386 436
387/* 437/*
388 * When XPC HB determines that a partition has come up, it will create a new 438 * Guarantee that the kzalloc'd memory is cacheline aligned.
389 * kthread and that kthread will call this function to attempt to set up the
390 * basic infrastructure used for Cross Partition Communication with the newly
391 * upped partition.
392 *
393 * The kthread that was created by XPC HB and which setup the XPC
394 * infrastructure will remain assigned to the partition until the partition
395 * goes down. At which time the kthread will teardown the XPC infrastructure
396 * and then exit.
397 *
398 * XPC HB will put the remote partition's XPC per partition specific variables
399 * physical address into xpc_partitions[partid].remote_vars_part_pa prior to
400 * calling xpc_partition_up().
401 */ 439 */
402static void 440void *
403xpc_partition_up(struct xpc_partition *part) 441xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
442{
443 /* see if kzalloc will give us cachline aligned memory by default */
444 *base = kzalloc(size, flags);
445 if (*base == NULL)
446 return NULL;
447
448 if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
449 return *base;
450
451 kfree(*base);
452
453 /* nope, we'll have to do it ourselves */
454 *base = kzalloc(size + L1_CACHE_BYTES, flags);
455 if (*base == NULL)
456 return NULL;
457
458 return (void *)L1_CACHE_ALIGN((u64)*base);
459}
460
461/*
462 * Setup the channel structures necessary to support XPartition Communication
463 * between the specified remote partition and the local one.
464 */
465static enum xp_retval
466xpc_setup_ch_structures(struct xpc_partition *part)
404{ 467{
468 enum xp_retval ret;
469 int ch_number;
470 struct xpc_channel *ch;
471 short partid = XPC_PARTID(part);
472
473 /*
474 * Allocate all of the channel structures as a contiguous chunk of
475 * memory.
476 */
405 DBUG_ON(part->channels != NULL); 477 DBUG_ON(part->channels != NULL);
478 part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
479 GFP_KERNEL);
480 if (part->channels == NULL) {
481 dev_err(xpc_chan, "can't get memory for channels\n");
482 return xpNoMemory;
483 }
406 484
407 dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part)); 485 /* allocate the remote open and close args */
408 486
409 if (xpc_setup_infrastructure(part) != xpSuccess) 487 part->remote_openclose_args =
410 return; 488 xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
489 GFP_KERNEL, &part->
490 remote_openclose_args_base);
491 if (part->remote_openclose_args == NULL) {
492 dev_err(xpc_chan, "can't get memory for remote connect args\n");
493 ret = xpNoMemory;
494 goto out_1;
495 }
496
497 part->chctl.all_flags = 0;
498 spin_lock_init(&part->chctl_lock);
499
500 atomic_set(&part->channel_mgr_requests, 1);
501 init_waitqueue_head(&part->channel_mgr_wq);
502
503 part->nchannels = XPC_MAX_NCHANNELS;
504
505 atomic_set(&part->nchannels_active, 0);
506 atomic_set(&part->nchannels_engaged, 0);
507
508 for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
509 ch = &part->channels[ch_number];
510
511 ch->partid = partid;
512 ch->number = ch_number;
513 ch->flags = XPC_C_DISCONNECTED;
514
515 atomic_set(&ch->kthreads_assigned, 0);
516 atomic_set(&ch->kthreads_idle, 0);
517 atomic_set(&ch->kthreads_active, 0);
518
519 atomic_set(&ch->references, 0);
520 atomic_set(&ch->n_to_notify, 0);
521
522 spin_lock_init(&ch->lock);
523 init_completion(&ch->wdisconnect_wait);
524
525 atomic_set(&ch->n_on_msg_allocate_wq, 0);
526 init_waitqueue_head(&ch->msg_allocate_wq);
527 init_waitqueue_head(&ch->idle_wq);
528 }
529
530 ret = xpc_setup_ch_structures_sn(part);
531 if (ret != xpSuccess)
532 goto out_2;
533
534 /*
535 * With the setting of the partition setup_state to XPC_P_SS_SETUP,
536 * we're declaring that this partition is ready to go.
537 */
538 part->setup_state = XPC_P_SS_SETUP;
539
540 return xpSuccess;
541
542 /* setup of ch structures failed */
543out_2:
544 kfree(part->remote_openclose_args_base);
545 part->remote_openclose_args = NULL;
546out_1:
547 kfree(part->channels);
548 part->channels = NULL;
549 return ret;
550}
551
552/*
553 * Teardown the channel structures necessary to support XPartition Communication
554 * between the specified remote partition and the local one.
555 */
556static void
557xpc_teardown_ch_structures(struct xpc_partition *part)
558{
559 DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
560 DBUG_ON(atomic_read(&part->nchannels_active) != 0);
411 561
412 /* 562 /*
413 * The kthread that XPC HB called us with will become the 563 * Make this partition inaccessible to local processes by marking it
414 * channel manager for this partition. It will not return 564 * as no longer setup. Then wait before proceeding with the teardown
415 * back to XPC HB until the partition's XPC infrastructure 565 * until all existing references cease.
416 * has been dismantled.
417 */ 566 */
567 DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
568 part->setup_state = XPC_P_SS_WTEARDOWN;
418 569
419 (void)xpc_part_ref(part); /* this will always succeed */ 570 wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
420 571
421 if (xpc_make_first_contact(part) == xpSuccess) 572 /* now we can begin tearing down the infrastructure */
422 xpc_channel_mgr(part);
423 573
424 xpc_part_deref(part); 574 xpc_teardown_ch_structures_sn(part);
425 575
426 xpc_teardown_infrastructure(part); 576 kfree(part->remote_openclose_args_base);
577 part->remote_openclose_args = NULL;
578 kfree(part->channels);
579 part->channels = NULL;
580
581 part->setup_state = XPC_P_SS_TORNDOWN;
427} 582}
428 583
584/*
585 * When XPC HB determines that a partition has come up, it will create a new
586 * kthread and that kthread will call this function to attempt to set up the
587 * basic infrastructure used for Cross Partition Communication with the newly
588 * upped partition.
589 *
590 * The kthread that was created by XPC HB and which setup the XPC
591 * infrastructure will remain assigned to the partition becoming the channel
592 * manager for that partition until the partition is deactivating, at which
593 * time the kthread will teardown the XPC infrastructure and then exit.
594 */
429static int 595static int
430xpc_activating(void *__partid) 596xpc_activating(void *__partid)
431{ 597{
@@ -433,64 +599,47 @@ xpc_activating(void *__partid)
433 struct xpc_partition *part = &xpc_partitions[partid]; 599 struct xpc_partition *part = &xpc_partitions[partid];
434 unsigned long irq_flags; 600 unsigned long irq_flags;
435 601
436 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); 602 DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
437 603
438 spin_lock_irqsave(&part->act_lock, irq_flags); 604 spin_lock_irqsave(&part->act_lock, irq_flags);
439 605
440 if (part->act_state == XPC_P_DEACTIVATING) { 606 if (part->act_state == XPC_P_AS_DEACTIVATING) {
441 part->act_state = XPC_P_INACTIVE; 607 part->act_state = XPC_P_AS_INACTIVE;
442 spin_unlock_irqrestore(&part->act_lock, irq_flags); 608 spin_unlock_irqrestore(&part->act_lock, irq_flags);
443 part->remote_rp_pa = 0; 609 part->remote_rp_pa = 0;
444 return 0; 610 return 0;
445 } 611 }
446 612
447 /* indicate the thread is activating */ 613 /* indicate the thread is activating */
448 DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ); 614 DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ);
449 part->act_state = XPC_P_ACTIVATING; 615 part->act_state = XPC_P_AS_ACTIVATING;
450 616
451 XPC_SET_REASON(part, 0, 0); 617 XPC_SET_REASON(part, 0, 0);
452 spin_unlock_irqrestore(&part->act_lock, irq_flags); 618 spin_unlock_irqrestore(&part->act_lock, irq_flags);
453 619
454 dev_dbg(xpc_part, "bringing partition %d up\n", partid); 620 dev_dbg(xpc_part, "activating partition %d\n", partid);
455 621
456 /* 622 xpc_allow_hb(partid);
457 * Register the remote partition's AMOs with SAL so it can handle
458 * and cleanup errors within that address range should the remote
459 * partition go down. We don't unregister this range because it is
460 * difficult to tell when outstanding writes to the remote partition
461 * are finished and thus when it is safe to unregister. This should
462 * not result in wasted space in the SAL xp_addr_region table because
463 * we should get the same page for remote_amos_page_pa after module
464 * reloads and system reboots.
465 */
466 if (sn_register_xp_addr_region(part->remote_amos_page_pa,
467 PAGE_SIZE, 1) < 0) {
468 dev_warn(xpc_part, "xpc_partition_up(%d) failed to register "
469 "xp_addr region\n", partid);
470 623
471 spin_lock_irqsave(&part->act_lock, irq_flags); 624 if (xpc_setup_ch_structures(part) == xpSuccess) {
472 part->act_state = XPC_P_INACTIVE; 625 (void)xpc_part_ref(part); /* this will always succeed */
473 XPC_SET_REASON(part, xpPhysAddrRegFailed, __LINE__);
474 spin_unlock_irqrestore(&part->act_lock, irq_flags);
475 part->remote_rp_pa = 0;
476 return 0;
477 }
478 626
479 xpc_allow_hb(partid, xpc_vars); 627 if (xpc_make_first_contact(part) == xpSuccess) {
480 xpc_IPI_send_activated(part); 628 xpc_mark_partition_active(part);
629 xpc_channel_mgr(part);
630 /* won't return until partition is deactivating */
631 }
481 632
482 /* 633 xpc_part_deref(part);
483 * xpc_partition_up() holds this thread and marks this partition as 634 xpc_teardown_ch_structures(part);
484 * XPC_P_ACTIVE by calling xpc_hb_mark_active(). 635 }
485 */
486 (void)xpc_partition_up(part);
487 636
488 xpc_disallow_hb(partid, xpc_vars); 637 xpc_disallow_hb(partid);
489 xpc_mark_partition_inactive(part); 638 xpc_mark_partition_inactive(part);
490 639
491 if (part->reason == xpReactivating) { 640 if (part->reason == xpReactivating) {
492 /* interrupting ourselves results in activating partition */ 641 /* interrupting ourselves results in activating partition */
493 xpc_IPI_send_reactivate(part); 642 xpc_request_partition_reactivation(part);
494 } 643 }
495 644
496 return 0; 645 return 0;
@@ -505,9 +654,9 @@ xpc_activate_partition(struct xpc_partition *part)
505 654
506 spin_lock_irqsave(&part->act_lock, irq_flags); 655 spin_lock_irqsave(&part->act_lock, irq_flags);
507 656
508 DBUG_ON(part->act_state != XPC_P_INACTIVE); 657 DBUG_ON(part->act_state != XPC_P_AS_INACTIVE);
509 658
510 part->act_state = XPC_P_ACTIVATION_REQ; 659 part->act_state = XPC_P_AS_ACTIVATION_REQ;
511 XPC_SET_REASON(part, xpCloneKThread, __LINE__); 660 XPC_SET_REASON(part, xpCloneKThread, __LINE__);
512 661
513 spin_unlock_irqrestore(&part->act_lock, irq_flags); 662 spin_unlock_irqrestore(&part->act_lock, irq_flags);
@@ -516,62 +665,12 @@ xpc_activate_partition(struct xpc_partition *part)
516 partid); 665 partid);
517 if (IS_ERR(kthread)) { 666 if (IS_ERR(kthread)) {
518 spin_lock_irqsave(&part->act_lock, irq_flags); 667 spin_lock_irqsave(&part->act_lock, irq_flags);
519 part->act_state = XPC_P_INACTIVE; 668 part->act_state = XPC_P_AS_INACTIVE;
520 XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__); 669 XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__);
521 spin_unlock_irqrestore(&part->act_lock, irq_flags); 670 spin_unlock_irqrestore(&part->act_lock, irq_flags);
522 } 671 }
523} 672}
524 673
525/*
526 * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
527 * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
528 * than one partition, we use an AMO_t structure per partition to indicate
529 * whether a partition has sent an IPI or not. If it has, then wake up the
530 * associated kthread to handle it.
531 *
532 * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
533 * running on other partitions.
534 *
535 * Noteworthy Arguments:
536 *
537 * irq - Interrupt ReQuest number. NOT USED.
538 *
539 * dev_id - partid of IPI's potential sender.
540 */
541irqreturn_t
542xpc_notify_IRQ_handler(int irq, void *dev_id)
543{
544 short partid = (short)(u64)dev_id;
545 struct xpc_partition *part = &xpc_partitions[partid];
546
547 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
548
549 if (xpc_part_ref(part)) {
550 xpc_check_for_channel_activity(part);
551
552 xpc_part_deref(part);
553 }
554 return IRQ_HANDLED;
555}
556
557/*
558 * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor
559 * because the write to their associated IPI amo completed after the IRQ/IPI
560 * was received.
561 */
562void
563xpc_dropped_IPI_check(struct xpc_partition *part)
564{
565 if (xpc_part_ref(part)) {
566 xpc_check_for_channel_activity(part);
567
568 part->dropped_IPI_timer.expires = jiffies +
569 XPC_P_DROPPED_IPI_WAIT;
570 add_timer(&part->dropped_IPI_timer);
571 xpc_part_deref(part);
572 }
573}
574
575void 674void
576xpc_activate_kthreads(struct xpc_channel *ch, int needed) 675xpc_activate_kthreads(struct xpc_channel *ch, int needed)
577{ 676{
@@ -616,9 +715,9 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
616 do { 715 do {
617 /* deliver messages to their intended recipients */ 716 /* deliver messages to their intended recipients */
618 717
619 while (ch->w_local_GP.get < ch->w_remote_GP.put && 718 while (xpc_n_of_deliverable_payloads(ch) > 0 &&
620 !(ch->flags & XPC_C_DISCONNECTING)) { 719 !(ch->flags & XPC_C_DISCONNECTING)) {
621 xpc_deliver_msg(ch); 720 xpc_deliver_payload(ch);
622 } 721 }
623 722
624 if (atomic_inc_return(&ch->kthreads_idle) > 723 if (atomic_inc_return(&ch->kthreads_idle) >
@@ -632,7 +731,7 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
632 "wait_event_interruptible_exclusive()\n"); 731 "wait_event_interruptible_exclusive()\n");
633 732
634 (void)wait_event_interruptible_exclusive(ch->idle_wq, 733 (void)wait_event_interruptible_exclusive(ch->idle_wq,
635 (ch->w_local_GP.get < ch->w_remote_GP.put || 734 (xpc_n_of_deliverable_payloads(ch) > 0 ||
636 (ch->flags & XPC_C_DISCONNECTING))); 735 (ch->flags & XPC_C_DISCONNECTING)));
637 736
638 atomic_dec(&ch->kthreads_idle); 737 atomic_dec(&ch->kthreads_idle);
@@ -677,7 +776,7 @@ xpc_kthread_start(void *args)
677 * additional kthreads to help deliver them. We only 776 * additional kthreads to help deliver them. We only
678 * need one less than total #of messages to deliver. 777 * need one less than total #of messages to deliver.
679 */ 778 */
680 n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1; 779 n_needed = xpc_n_of_deliverable_payloads(ch) - 1;
681 if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING)) 780 if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
682 xpc_activate_kthreads(ch, n_needed); 781 xpc_activate_kthreads(ch, n_needed);
683 782
@@ -703,11 +802,9 @@ xpc_kthread_start(void *args)
703 } 802 }
704 spin_unlock_irqrestore(&ch->lock, irq_flags); 803 spin_unlock_irqrestore(&ch->lock, irq_flags);
705 804
706 if (atomic_dec_return(&ch->kthreads_assigned) == 0) { 805 if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
707 if (atomic_dec_return(&part->nchannels_engaged) == 0) { 806 atomic_dec_return(&part->nchannels_engaged) == 0) {
708 xpc_mark_partition_disengaged(part); 807 xpc_indicate_partition_disengaged(part);
709 xpc_IPI_send_disengage(part);
710 }
711 } 808 }
712 809
713 xpc_msgqueue_deref(ch); 810 xpc_msgqueue_deref(ch);
@@ -758,9 +855,9 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
758 } else if (ch->flags & XPC_C_DISCONNECTING) { 855 } else if (ch->flags & XPC_C_DISCONNECTING) {
759 break; 856 break;
760 857
761 } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) { 858 } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
762 if (atomic_inc_return(&part->nchannels_engaged) == 1) 859 atomic_inc_return(&part->nchannels_engaged) == 1) {
763 xpc_mark_partition_engaged(part); 860 xpc_indicate_partition_engaged(part);
764 } 861 }
765 (void)xpc_part_ref(part); 862 (void)xpc_part_ref(part);
766 xpc_msgqueue_ref(ch); 863 xpc_msgqueue_ref(ch);
@@ -782,8 +879,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
782 879
783 if (atomic_dec_return(&ch->kthreads_assigned) == 0 && 880 if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
784 atomic_dec_return(&part->nchannels_engaged) == 0) { 881 atomic_dec_return(&part->nchannels_engaged) == 0) {
785 xpc_mark_partition_disengaged(part); 882 xpc_indicate_partition_disengaged(part);
786 xpc_IPI_send_disengage(part);
787 } 883 }
788 xpc_msgqueue_deref(ch); 884 xpc_msgqueue_deref(ch);
789 xpc_part_deref(part); 885 xpc_part_deref(part);
@@ -815,7 +911,7 @@ xpc_disconnect_wait(int ch_number)
815 int wakeup_channel_mgr; 911 int wakeup_channel_mgr;
816 912
817 /* now wait for all callouts to the caller's function to cease */ 913 /* now wait for all callouts to the caller's function to cease */
818 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 914 for (partid = 0; partid < xp_max_npartitions; partid++) {
819 part = &xpc_partitions[partid]; 915 part = &xpc_partitions[partid];
820 916
821 if (!xpc_part_ref(part)) 917 if (!xpc_part_ref(part))
@@ -834,16 +930,15 @@ xpc_disconnect_wait(int ch_number)
834 DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); 930 DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
835 wakeup_channel_mgr = 0; 931 wakeup_channel_mgr = 0;
836 932
837 if (ch->delayed_IPI_flags) { 933 if (ch->delayed_chctl_flags) {
838 if (part->act_state != XPC_P_DEACTIVATING) { 934 if (part->act_state != XPC_P_AS_DEACTIVATING) {
839 spin_lock(&part->IPI_lock); 935 spin_lock(&part->chctl_lock);
840 XPC_SET_IPI_FLAGS(part->local_IPI_amo, 936 part->chctl.flags[ch->number] |=
841 ch->number, 937 ch->delayed_chctl_flags;
842 ch->delayed_IPI_flags); 938 spin_unlock(&part->chctl_lock);
843 spin_unlock(&part->IPI_lock);
844 wakeup_channel_mgr = 1; 939 wakeup_channel_mgr = 1;
845 } 940 }
846 ch->delayed_IPI_flags = 0; 941 ch->delayed_chctl_flags = 0;
847 } 942 }
848 943
849 ch->flags &= ~XPC_C_WDISCONNECT; 944 ch->flags &= ~XPC_C_WDISCONNECT;
@@ -856,13 +951,63 @@ xpc_disconnect_wait(int ch_number)
856 } 951 }
857} 952}
858 953
954static int
955xpc_setup_partitions(void)
956{
957 short partid;
958 struct xpc_partition *part;
959
960 xpc_partitions = kzalloc(sizeof(struct xpc_partition) *
961 xp_max_npartitions, GFP_KERNEL);
962 if (xpc_partitions == NULL) {
963 dev_err(xpc_part, "can't get memory for partition structure\n");
964 return -ENOMEM;
965 }
966
967 /*
968 * The first few fields of each entry of xpc_partitions[] need to
969 * be initialized now so that calls to xpc_connect() and
970 * xpc_disconnect() can be made prior to the activation of any remote
971 * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
972 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
973 * PARTITION HAS BEEN ACTIVATED.
974 */
975 for (partid = 0; partid < xp_max_npartitions; partid++) {
976 part = &xpc_partitions[partid];
977
978 DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
979
980 part->activate_IRQ_rcvd = 0;
981 spin_lock_init(&part->act_lock);
982 part->act_state = XPC_P_AS_INACTIVE;
983 XPC_SET_REASON(part, 0, 0);
984
985 init_timer(&part->disengage_timer);
986 part->disengage_timer.function =
987 xpc_timeout_partition_disengage;
988 part->disengage_timer.data = (unsigned long)part;
989
990 part->setup_state = XPC_P_SS_UNSET;
991 init_waitqueue_head(&part->teardown_wq);
992 atomic_set(&part->references, 0);
993 }
994
995 return xpc_setup_partitions_sn();
996}
997
998static void
999xpc_teardown_partitions(void)
1000{
1001 kfree(xpc_partitions);
1002}
1003
859static void 1004static void
860xpc_do_exit(enum xp_retval reason) 1005xpc_do_exit(enum xp_retval reason)
861{ 1006{
862 short partid; 1007 short partid;
863 int active_part_count, printed_waiting_msg = 0; 1008 int active_part_count, printed_waiting_msg = 0;
864 struct xpc_partition *part; 1009 struct xpc_partition *part;
865 unsigned long printmsg_time, disengage_request_timeout = 0; 1010 unsigned long printmsg_time, disengage_timeout = 0;
866 1011
867 /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */ 1012 /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
868 DBUG_ON(xpc_exiting == 1); 1013 DBUG_ON(xpc_exiting == 1);
@@ -873,10 +1018,7 @@ xpc_do_exit(enum xp_retval reason)
873 * the heartbeat checker thread in case it's sleeping. 1018 * the heartbeat checker thread in case it's sleeping.
874 */ 1019 */
875 xpc_exiting = 1; 1020 xpc_exiting = 1;
876 wake_up_interruptible(&xpc_act_IRQ_wq); 1021 wake_up_interruptible(&xpc_activate_IRQ_wq);
877
878 /* ignore all incoming interrupts */
879 free_irq(SGI_XPC_ACTIVATE, NULL);
880 1022
881 /* wait for the discovery thread to exit */ 1023 /* wait for the discovery thread to exit */
882 wait_for_completion(&xpc_discovery_exited); 1024 wait_for_completion(&xpc_discovery_exited);
@@ -889,17 +1031,17 @@ xpc_do_exit(enum xp_retval reason)
889 1031
890 /* wait for all partitions to become inactive */ 1032 /* wait for all partitions to become inactive */
891 1033
892 printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); 1034 printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
893 xpc_disengage_request_timedout = 0; 1035 xpc_disengage_timedout = 0;
894 1036
895 do { 1037 do {
896 active_part_count = 0; 1038 active_part_count = 0;
897 1039
898 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 1040 for (partid = 0; partid < xp_max_npartitions; partid++) {
899 part = &xpc_partitions[partid]; 1041 part = &xpc_partitions[partid];
900 1042
901 if (xpc_partition_disengaged(part) && 1043 if (xpc_partition_disengaged(part) &&
902 part->act_state == XPC_P_INACTIVE) { 1044 part->act_state == XPC_P_AS_INACTIVE) {
903 continue; 1045 continue;
904 } 1046 }
905 1047
@@ -907,36 +1049,32 @@ xpc_do_exit(enum xp_retval reason)
907 1049
908 XPC_DEACTIVATE_PARTITION(part, reason); 1050 XPC_DEACTIVATE_PARTITION(part, reason);
909 1051
910 if (part->disengage_request_timeout > 1052 if (part->disengage_timeout > disengage_timeout)
911 disengage_request_timeout) { 1053 disengage_timeout = part->disengage_timeout;
912 disengage_request_timeout =
913 part->disengage_request_timeout;
914 }
915 } 1054 }
916 1055
917 if (xpc_partition_engaged(-1UL)) { 1056 if (xpc_any_partition_engaged()) {
918 if (time_after(jiffies, printmsg_time)) { 1057 if (time_is_before_jiffies(printmsg_time)) {
919 dev_info(xpc_part, "waiting for remote " 1058 dev_info(xpc_part, "waiting for remote "
920 "partitions to disengage, timeout in " 1059 "partitions to deactivate, timeout in "
921 "%ld seconds\n", 1060 "%ld seconds\n", (disengage_timeout -
922 (disengage_request_timeout - jiffies) 1061 jiffies) / HZ);
923 / HZ);
924 printmsg_time = jiffies + 1062 printmsg_time = jiffies +
925 (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); 1063 (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
926 printed_waiting_msg = 1; 1064 printed_waiting_msg = 1;
927 } 1065 }
928 1066
929 } else if (active_part_count > 0) { 1067 } else if (active_part_count > 0) {
930 if (printed_waiting_msg) { 1068 if (printed_waiting_msg) {
931 dev_info(xpc_part, "waiting for local partition" 1069 dev_info(xpc_part, "waiting for local partition"
932 " to disengage\n"); 1070 " to deactivate\n");
933 printed_waiting_msg = 0; 1071 printed_waiting_msg = 0;
934 } 1072 }
935 1073
936 } else { 1074 } else {
937 if (!xpc_disengage_request_timedout) { 1075 if (!xpc_disengage_timedout) {
938 dev_info(xpc_part, "all partitions have " 1076 dev_info(xpc_part, "all partitions have "
939 "disengaged\n"); 1077 "deactivated\n");
940 } 1078 }
941 break; 1079 break;
942 } 1080 }
@@ -946,33 +1084,28 @@ xpc_do_exit(enum xp_retval reason)
946 1084
947 } while (1); 1085 } while (1);
948 1086
949 DBUG_ON(xpc_partition_engaged(-1UL)); 1087 DBUG_ON(xpc_any_partition_engaged());
1088 DBUG_ON(xpc_any_hbs_allowed() != 0);
950 1089
951 /* indicate to others that our reserved page is uninitialized */ 1090 xpc_teardown_rsvd_page();
952 xpc_rsvd_page->vars_pa = 0;
953
954 /* now it's time to eliminate our heartbeat */
955 del_timer_sync(&xpc_hb_timer);
956 DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
957 1091
958 if (reason == xpUnloading) { 1092 if (reason == xpUnloading) {
959 /* take ourselves off of the reboot_notifier_list */
960 (void)unregister_reboot_notifier(&xpc_reboot_notifier);
961
962 /* take ourselves off of the die_notifier list */
963 (void)unregister_die_notifier(&xpc_die_notifier); 1093 (void)unregister_die_notifier(&xpc_die_notifier);
1094 (void)unregister_reboot_notifier(&xpc_reboot_notifier);
964 } 1095 }
965 1096
966 /* close down protections for IPI operations */
967 xpc_restrict_IPI_ops();
968
969 /* clear the interface to XPC's functions */ 1097 /* clear the interface to XPC's functions */
970 xpc_clear_interface(); 1098 xpc_clear_interface();
971 1099
972 if (xpc_sysctl) 1100 if (xpc_sysctl)
973 unregister_sysctl_table(xpc_sysctl); 1101 unregister_sysctl_table(xpc_sysctl);
974 1102
975 kfree(xpc_remote_copy_buffer_base); 1103 xpc_teardown_partitions();
1104
1105 if (is_shub())
1106 xpc_exit_sn2();
1107 else
1108 xpc_exit_uv();
976} 1109}
977 1110
978/* 1111/*
@@ -1002,60 +1135,57 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
1002} 1135}
1003 1136
1004/* 1137/*
1005 * Notify other partitions to disengage from all references to our memory. 1138 * Notify other partitions to deactivate from us by first disengaging from all
1139 * references to our memory.
1006 */ 1140 */
1007static void 1141static void
1008xpc_die_disengage(void) 1142xpc_die_deactivate(void)
1009{ 1143{
1010 struct xpc_partition *part; 1144 struct xpc_partition *part;
1011 short partid; 1145 short partid;
1012 unsigned long engaged; 1146 int any_engaged;
1013 long time, printmsg_time, disengage_request_timeout; 1147 long keep_waiting;
1148 long wait_to_print;
1014 1149
1015 /* keep xpc_hb_checker thread from doing anything (just in case) */ 1150 /* keep xpc_hb_checker thread from doing anything (just in case) */
1016 xpc_exiting = 1; 1151 xpc_exiting = 1;
1017 1152
1018 xpc_vars->heartbeating_to_mask = 0; /* indicate we're deactivated */ 1153 xpc_disallow_all_hbs(); /*indicate we're deactivated */
1019 1154
1020 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 1155 for (partid = 0; partid < xp_max_npartitions; partid++) {
1021 part = &xpc_partitions[partid]; 1156 part = &xpc_partitions[partid];
1022 1157
1023 if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part-> 1158 if (xpc_partition_engaged(partid) ||
1024 remote_vars_version)) { 1159 part->act_state != XPC_P_AS_INACTIVE) {
1025 1160 xpc_request_partition_deactivation(part);
1026 /* just in case it was left set by an earlier XPC */ 1161 xpc_indicate_partition_disengaged(part);
1027 xpc_clear_partition_engaged(1UL << partid);
1028 continue;
1029 }
1030
1031 if (xpc_partition_engaged(1UL << partid) ||
1032 part->act_state != XPC_P_INACTIVE) {
1033 xpc_request_partition_disengage(part);
1034 xpc_mark_partition_disengaged(part);
1035 xpc_IPI_send_disengage(part);
1036 } 1162 }
1037 } 1163 }
1038 1164
1039 time = rtc_time(); 1165 /*
1040 printmsg_time = time + 1166 * Though we requested that all other partitions deactivate from us,
1041 (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second); 1167 * we only wait until they've all disengaged or we've reached the
1042 disengage_request_timeout = time + 1168 * defined timelimit.
1043 (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second); 1169 *
1044 1170 * Given that one iteration through the following while-loop takes
1045 /* wait for all other partitions to disengage from us */ 1171 * approximately 200 microseconds, calculate the #of loops to take
1172 * before bailing and the #of loops before printing a waiting message.
1173 */
1174 keep_waiting = xpc_disengage_timelimit * 1000 * 5;
1175 wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
1046 1176
1047 while (1) { 1177 while (1) {
1048 engaged = xpc_partition_engaged(-1UL); 1178 any_engaged = xpc_any_partition_engaged();
1049 if (!engaged) { 1179 if (!any_engaged) {
1050 dev_info(xpc_part, "all partitions have disengaged\n"); 1180 dev_info(xpc_part, "all partitions have deactivated\n");
1051 break; 1181 break;
1052 } 1182 }
1053 1183
1054 time = rtc_time(); 1184 if (!keep_waiting--) {
1055 if (time >= disengage_request_timeout) { 1185 for (partid = 0; partid < xp_max_npartitions;
1056 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 1186 partid++) {
1057 if (engaged & (1UL << partid)) { 1187 if (xpc_partition_engaged(partid)) {
1058 dev_info(xpc_part, "disengage from " 1188 dev_info(xpc_part, "deactivate from "
1059 "remote partition %d timed " 1189 "remote partition %d timed "
1060 "out\n", partid); 1190 "out\n", partid);
1061 } 1191 }
@@ -1063,15 +1193,15 @@ xpc_die_disengage(void)
1063 break; 1193 break;
1064 } 1194 }
1065 1195
1066 if (time >= printmsg_time) { 1196 if (!wait_to_print--) {
1067 dev_info(xpc_part, "waiting for remote partitions to " 1197 dev_info(xpc_part, "waiting for remote partitions to "
1068 "disengage, timeout in %ld seconds\n", 1198 "deactivate, timeout in %ld seconds\n",
1069 (disengage_request_timeout - time) / 1199 keep_waiting / (1000 * 5));
1070 sn_rtc_cycles_per_second); 1200 wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL *
1071 printmsg_time = time + 1201 1000 * 5;
1072 (XPC_DISENGAGE_PRINTMSG_INTERVAL *
1073 sn_rtc_cycles_per_second);
1074 } 1202 }
1203
1204 udelay(200);
1075 } 1205 }
1076} 1206}
1077 1207
@@ -1086,10 +1216,11 @@ xpc_die_disengage(void)
1086static int 1216static int
1087xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) 1217xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
1088{ 1218{
1219#ifdef CONFIG_IA64 /* !!! temporary kludge */
1089 switch (event) { 1220 switch (event) {
1090 case DIE_MACHINE_RESTART: 1221 case DIE_MACHINE_RESTART:
1091 case DIE_MACHINE_HALT: 1222 case DIE_MACHINE_HALT:
1092 xpc_die_disengage(); 1223 xpc_die_deactivate();
1093 break; 1224 break;
1094 1225
1095 case DIE_KDEBUG_ENTER: 1226 case DIE_KDEBUG_ENTER:
@@ -1100,8 +1231,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
1100 /* fall through */ 1231 /* fall through */
1101 case DIE_MCA_MONARCH_ENTER: 1232 case DIE_MCA_MONARCH_ENTER:
1102 case DIE_INIT_MONARCH_ENTER: 1233 case DIE_INIT_MONARCH_ENTER:
1103 xpc_vars->heartbeat++; 1234 xpc_offline_heartbeat();
1104 xpc_vars->heartbeat_offline = 1;
1105 break; 1235 break;
1106 1236
1107 case DIE_KDEBUG_LEAVE: 1237 case DIE_KDEBUG_LEAVE:
@@ -1112,10 +1242,12 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
1112 /* fall through */ 1242 /* fall through */
1113 case DIE_MCA_MONARCH_LEAVE: 1243 case DIE_MCA_MONARCH_LEAVE:
1114 case DIE_INIT_MONARCH_LEAVE: 1244 case DIE_INIT_MONARCH_LEAVE:
1115 xpc_vars->heartbeat++; 1245 xpc_online_heartbeat();
1116 xpc_vars->heartbeat_offline = 0;
1117 break; 1246 break;
1118 } 1247 }
1248#else
1249 xpc_die_deactivate();
1250#endif
1119 1251
1120 return NOTIFY_DONE; 1252 return NOTIFY_DONE;
1121} 1253}
@@ -1124,105 +1256,52 @@ int __init
1124xpc_init(void) 1256xpc_init(void)
1125{ 1257{
1126 int ret; 1258 int ret;
1127 short partid;
1128 struct xpc_partition *part;
1129 struct task_struct *kthread; 1259 struct task_struct *kthread;
1130 size_t buf_size;
1131
1132 if (!ia64_platform_is("sn2"))
1133 return -ENODEV;
1134
1135 buf_size = max(XPC_RP_VARS_SIZE,
1136 XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
1137 xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
1138 GFP_KERNEL,
1139 &xpc_remote_copy_buffer_base);
1140 if (xpc_remote_copy_buffer == NULL)
1141 return -ENOMEM;
1142 1260
1143 snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part"); 1261 snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
1144 snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan"); 1262 snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
1145 1263
1146 xpc_sysctl = register_sysctl_table(xpc_sys_dir); 1264 if (is_shub()) {
1147 1265 /*
1148 /* 1266 * The ia64-sn2 architecture supports at most 64 partitions.
1149 * The first few fields of each entry of xpc_partitions[] need to 1267 * And the inability to unregister remote amos restricts us
1150 * be initialized now so that calls to xpc_connect() and 1268 * further to only support exactly 64 partitions on this
1151 * xpc_disconnect() can be made prior to the activation of any remote 1269 * architecture, no less.
1152 * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE 1270 */
1153 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING 1271 if (xp_max_npartitions != 64) {
1154 * PARTITION HAS BEEN ACTIVATED. 1272 dev_err(xpc_part, "max #of partitions not set to 64\n");
1155 */ 1273 ret = -EINVAL;
1156 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 1274 } else {
1157 part = &xpc_partitions[partid]; 1275 ret = xpc_init_sn2();
1158 1276 }
1159 DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
1160
1161 part->act_IRQ_rcvd = 0;
1162 spin_lock_init(&part->act_lock);
1163 part->act_state = XPC_P_INACTIVE;
1164 XPC_SET_REASON(part, 0, 0);
1165 1277
1166 init_timer(&part->disengage_request_timer); 1278 } else if (is_uv()) {
1167 part->disengage_request_timer.function = 1279 ret = xpc_init_uv();
1168 xpc_timeout_partition_disengage_request;
1169 part->disengage_request_timer.data = (unsigned long)part;
1170 1280
1171 part->setup_state = XPC_P_UNSET; 1281 } else {
1172 init_waitqueue_head(&part->teardown_wq); 1282 ret = -ENODEV;
1173 atomic_set(&part->references, 0);
1174 } 1283 }
1175 1284
1176 /* 1285 if (ret != 0)
1177 * Open up protections for IPI operations (and AMO operations on 1286 return ret;
1178 * Shub 1.1 systems).
1179 */
1180 xpc_allow_IPI_ops();
1181
1182 /*
1183 * Interrupts being processed will increment this atomic variable and
1184 * awaken the heartbeat thread which will process the interrupts.
1185 */
1186 atomic_set(&xpc_act_IRQ_rcvd, 0);
1187 1287
1188 /* 1288 ret = xpc_setup_partitions();
1189 * This is safe to do before the xpc_hb_checker thread has started
1190 * because the handler releases a wait queue. If an interrupt is
1191 * received before the thread is waiting, it will not go to sleep,
1192 * but rather immediately process the interrupt.
1193 */
1194 ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0,
1195 "xpc hb", NULL);
1196 if (ret != 0) { 1289 if (ret != 0) {
1197 dev_err(xpc_part, "can't register ACTIVATE IRQ handler, " 1290 dev_err(xpc_part, "can't get memory for partition structure\n");
1198 "errno=%d\n", -ret); 1291 goto out_1;
1199
1200 xpc_restrict_IPI_ops();
1201
1202 if (xpc_sysctl)
1203 unregister_sysctl_table(xpc_sysctl);
1204
1205 kfree(xpc_remote_copy_buffer_base);
1206 return -EBUSY;
1207 } 1292 }
1208 1293
1294 xpc_sysctl = register_sysctl_table(xpc_sys_dir);
1295
1209 /* 1296 /*
1210 * Fill the partition reserved page with the information needed by 1297 * Fill the partition reserved page with the information needed by
1211 * other partitions to discover we are alive and establish initial 1298 * other partitions to discover we are alive and establish initial
1212 * communications. 1299 * communications.
1213 */ 1300 */
1214 xpc_rsvd_page = xpc_rsvd_page_init(); 1301 ret = xpc_setup_rsvd_page();
1215 if (xpc_rsvd_page == NULL) { 1302 if (ret != 0) {
1216 dev_err(xpc_part, "could not setup our reserved page\n"); 1303 dev_err(xpc_part, "can't setup our reserved page\n");
1217 1304 goto out_2;
1218 free_irq(SGI_XPC_ACTIVATE, NULL);
1219 xpc_restrict_IPI_ops();
1220
1221 if (xpc_sysctl)
1222 unregister_sysctl_table(xpc_sysctl);
1223
1224 kfree(xpc_remote_copy_buffer_base);
1225 return -EBUSY;
1226 } 1305 }
1227 1306
1228 /* add ourselves to the reboot_notifier_list */ 1307 /* add ourselves to the reboot_notifier_list */
@@ -1235,9 +1314,6 @@ xpc_init(void)
1235 if (ret != 0) 1314 if (ret != 0)
1236 dev_warn(xpc_part, "can't register die notifier\n"); 1315 dev_warn(xpc_part, "can't register die notifier\n");
1237 1316
1238 init_timer(&xpc_hb_timer);
1239 xpc_hb_timer.function = xpc_hb_beater;
1240
1241 /* 1317 /*
1242 * The real work-horse behind xpc. This processes incoming 1318 * The real work-horse behind xpc. This processes incoming
1243 * interrupts and monitors remote heartbeats. 1319 * interrupts and monitors remote heartbeats.
@@ -1245,25 +1321,8 @@ xpc_init(void)
1245 kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME); 1321 kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
1246 if (IS_ERR(kthread)) { 1322 if (IS_ERR(kthread)) {
1247 dev_err(xpc_part, "failed while forking hb check thread\n"); 1323 dev_err(xpc_part, "failed while forking hb check thread\n");
1248 1324 ret = -EBUSY;
1249 /* indicate to others that our reserved page is uninitialized */ 1325 goto out_3;
1250 xpc_rsvd_page->vars_pa = 0;
1251
1252 /* take ourselves off of the reboot_notifier_list */
1253 (void)unregister_reboot_notifier(&xpc_reboot_notifier);
1254
1255 /* take ourselves off of the die_notifier list */
1256 (void)unregister_die_notifier(&xpc_die_notifier);
1257
1258 del_timer_sync(&xpc_hb_timer);
1259 free_irq(SGI_XPC_ACTIVATE, NULL);
1260 xpc_restrict_IPI_ops();
1261
1262 if (xpc_sysctl)
1263 unregister_sysctl_table(xpc_sysctl);
1264
1265 kfree(xpc_remote_copy_buffer_base);
1266 return -EBUSY;
1267 } 1326 }
1268 1327
1269 /* 1328 /*
@@ -1285,11 +1344,28 @@ xpc_init(void)
1285 1344
1286 /* set the interface to point at XPC's functions */ 1345 /* set the interface to point at XPC's functions */
1287 xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect, 1346 xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
1288 xpc_initiate_allocate, xpc_initiate_send, 1347 xpc_initiate_send, xpc_initiate_send_notify,
1289 xpc_initiate_send_notify, xpc_initiate_received, 1348 xpc_initiate_received, xpc_initiate_partid_to_nasids);
1290 xpc_initiate_partid_to_nasids);
1291 1349
1292 return 0; 1350 return 0;
1351
1352 /* initialization was not successful */
1353out_3:
1354 xpc_teardown_rsvd_page();
1355
1356 (void)unregister_die_notifier(&xpc_die_notifier);
1357 (void)unregister_reboot_notifier(&xpc_reboot_notifier);
1358out_2:
1359 if (xpc_sysctl)
1360 unregister_sysctl_table(xpc_sysctl);
1361
1362 xpc_teardown_partitions();
1363out_1:
1364 if (is_shub())
1365 xpc_exit_sn2();
1366 else
1367 xpc_exit_uv();
1368 return ret;
1293} 1369}
1294 1370
1295module_init(xpc_init); 1371module_init(xpc_init);
@@ -1314,9 +1390,9 @@ module_param(xpc_hb_check_interval, int, 0);
1314MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between " 1390MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
1315 "heartbeat checks."); 1391 "heartbeat checks.");
1316 1392
1317module_param(xpc_disengage_request_timelimit, int, 0); 1393module_param(xpc_disengage_timelimit, int, 0);
1318MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait " 1394MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait "
1319 "for disengage request to complete."); 1395 "for disengage to complete.");
1320 1396
1321module_param(xpc_kdebug_ignore, int, 0); 1397module_param(xpc_kdebug_ignore, int, 0);
1322MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by " 1398MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 7dd4b5812c42..6722f6fe4dc7 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -15,57 +15,22 @@
15 * 15 *
16 */ 16 */
17 17
18#include <linux/kernel.h> 18#include <linux/device.h>
19#include <linux/sysctl.h> 19#include <linux/hardirq.h>
20#include <linux/cache.h>
21#include <linux/mmzone.h>
22#include <linux/nodemask.h>
23#include <asm/uncached.h>
24#include <asm/sn/bte.h>
25#include <asm/sn/intr.h>
26#include <asm/sn/sn_sal.h>
27#include <asm/sn/nodepda.h>
28#include <asm/sn/addrs.h>
29#include "xpc.h" 20#include "xpc.h"
30 21
31/* XPC is exiting flag */ 22/* XPC is exiting flag */
32int xpc_exiting; 23int xpc_exiting;
33 24
34/* SH_IPI_ACCESS shub register value on startup */
35static u64 xpc_sh1_IPI_access;
36static u64 xpc_sh2_IPI_access0;
37static u64 xpc_sh2_IPI_access1;
38static u64 xpc_sh2_IPI_access2;
39static u64 xpc_sh2_IPI_access3;
40
41/* original protection values for each node */
42u64 xpc_prot_vec[MAX_NUMNODES];
43
44/* this partition's reserved page pointers */ 25/* this partition's reserved page pointers */
45struct xpc_rsvd_page *xpc_rsvd_page; 26struct xpc_rsvd_page *xpc_rsvd_page;
46static u64 *xpc_part_nasids; 27static unsigned long *xpc_part_nasids;
47static u64 *xpc_mach_nasids; 28unsigned long *xpc_mach_nasids;
48struct xpc_vars *xpc_vars;
49struct xpc_vars_part *xpc_vars_part;
50 29
51static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */ 30static int xpc_nasid_mask_nbytes; /* #of bytes in nasid mask */
52static int xp_nasid_mask_words; /* actual size in words of nasid mask */ 31int xpc_nasid_mask_nlongs; /* #of longs in nasid mask */
53
54/*
55 * For performance reasons, each entry of xpc_partitions[] is cacheline
56 * aligned. And xpc_partitions[] is padded with an additional entry at the
57 * end so that the last legitimate entry doesn't share its cacheline with
58 * another variable.
59 */
60struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
61 32
62/* 33struct xpc_partition *xpc_partitions;
63 * Generic buffer used to store a local copy of portions of a remote
64 * partition's reserved page (either its header and part_nasids mask,
65 * or its vars).
66 */
67char *xpc_remote_copy_buffer;
68void *xpc_remote_copy_buffer_base;
69 34
70/* 35/*
71 * Guarantee that the kmalloc'd memory is cacheline aligned. 36 * Guarantee that the kmalloc'd memory is cacheline aligned.
@@ -95,56 +60,59 @@ xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
95 * Given a nasid, get the physical address of the partition's reserved page 60 * Given a nasid, get the physical address of the partition's reserved page
96 * for that nasid. This function returns 0 on any error. 61 * for that nasid. This function returns 0 on any error.
97 */ 62 */
98static u64 63static unsigned long
99xpc_get_rsvd_page_pa(int nasid) 64xpc_get_rsvd_page_pa(int nasid)
100{ 65{
101 bte_result_t bte_res; 66 enum xp_retval ret;
102 s64 status;
103 u64 cookie = 0; 67 u64 cookie = 0;
104 u64 rp_pa = nasid; /* seed with nasid */ 68 unsigned long rp_pa = nasid; /* seed with nasid */
105 u64 len = 0; 69 size_t len = 0;
106 u64 buf = buf; 70 size_t buf_len = 0;
107 u64 buf_len = 0; 71 void *buf = buf;
108 void *buf_base = NULL; 72 void *buf_base = NULL;
109 73
110 while (1) { 74 while (1) {
111 75
112 status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa, 76 /* !!! rp_pa will need to be _gpa on UV.
113 &len); 77 * ??? So do we save it into the architecture specific parts
78 * ??? of the xpc_partition structure? Do we rename this
79 * ??? function or have two versions? Rename rp_pa for UV to
80 * ??? rp_gpa?
81 */
82 ret = xpc_get_partition_rsvd_page_pa(buf, &cookie, &rp_pa,
83 &len);
114 84
115 dev_dbg(xpc_part, "SAL returned with status=%li, cookie=" 85 dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
116 "0x%016lx, address=0x%016lx, len=0x%016lx\n", 86 "address=0x%016lx, len=0x%016lx\n", ret,
117 status, cookie, rp_pa, len); 87 (unsigned long)cookie, rp_pa, len);
118 88
119 if (status != SALRET_MORE_PASSES) 89 if (ret != xpNeedMoreInfo)
120 break; 90 break;
121 91
92 /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
122 if (L1_CACHE_ALIGN(len) > buf_len) { 93 if (L1_CACHE_ALIGN(len) > buf_len) {
123 kfree(buf_base); 94 kfree(buf_base);
124 buf_len = L1_CACHE_ALIGN(len); 95 buf_len = L1_CACHE_ALIGN(len);
125 buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len, 96 buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
126 GFP_KERNEL, 97 &buf_base);
127 &buf_base);
128 if (buf_base == NULL) { 98 if (buf_base == NULL) {
129 dev_err(xpc_part, "unable to kmalloc " 99 dev_err(xpc_part, "unable to kmalloc "
130 "len=0x%016lx\n", buf_len); 100 "len=0x%016lx\n", buf_len);
131 status = SALRET_ERROR; 101 ret = xpNoMemory;
132 break; 102 break;
133 } 103 }
134 } 104 }
135 105
136 bte_res = xp_bte_copy(rp_pa, buf, buf_len, 106 ret = xp_remote_memcpy(xp_pa(buf), rp_pa, buf_len);
137 (BTE_NOTIFY | BTE_WACQUIRE), NULL); 107 if (ret != xpSuccess) {
138 if (bte_res != BTE_SUCCESS) { 108 dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
139 dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
140 status = SALRET_ERROR;
141 break; 109 break;
142 } 110 }
143 } 111 }
144 112
145 kfree(buf_base); 113 kfree(buf_base);
146 114
147 if (status != SALRET_OK) 115 if (ret != xpSuccess)
148 rp_pa = 0; 116 rp_pa = 0;
149 117
150 dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); 118 dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
@@ -156,300 +124,77 @@ xpc_get_rsvd_page_pa(int nasid)
156 * other partitions to discover we are alive and establish initial 124 * other partitions to discover we are alive and establish initial
157 * communications. 125 * communications.
158 */ 126 */
159struct xpc_rsvd_page * 127int
160xpc_rsvd_page_init(void) 128xpc_setup_rsvd_page(void)
161{ 129{
130 int ret;
162 struct xpc_rsvd_page *rp; 131 struct xpc_rsvd_page *rp;
163 AMO_t *amos_page; 132 unsigned long rp_pa;
164 u64 rp_pa, nasid_array = 0; 133 unsigned long new_ts_jiffies;
165 int i, ret;
166 134
167 /* get the local reserved page's address */ 135 /* get the local reserved page's address */
168 136
169 preempt_disable(); 137 preempt_disable();
170 rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id())); 138 rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
171 preempt_enable(); 139 preempt_enable();
172 if (rp_pa == 0) { 140 if (rp_pa == 0) {
173 dev_err(xpc_part, "SAL failed to locate the reserved page\n"); 141 dev_err(xpc_part, "SAL failed to locate the reserved page\n");
174 return NULL; 142 return -ESRCH;
175 } 143 }
176 rp = (struct xpc_rsvd_page *)__va(rp_pa); 144 rp = (struct xpc_rsvd_page *)__va(rp_pa);
177 145
178 if (rp->partid != sn_partition_id) { 146 if (rp->SAL_version < 3) {
179 dev_err(xpc_part, "the reserved page's partid of %d should be " 147 /* SAL_versions < 3 had a SAL_partid defined as a u8 */
180 "%d\n", rp->partid, sn_partition_id); 148 rp->SAL_partid &= 0xff;
181 return NULL; 149 }
150 BUG_ON(rp->SAL_partid != xp_partition_id);
151
152 if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
153 dev_err(xpc_part, "the reserved page's partid of %d is outside "
154 "supported range (< 0 || >= %d)\n", rp->SAL_partid,
155 xp_max_npartitions);
156 return -EINVAL;
182 } 157 }
183 158
184 rp->version = XPC_RP_VERSION; 159 rp->version = XPC_RP_VERSION;
160 rp->max_npartitions = xp_max_npartitions;
185 161
186 /* establish the actual sizes of the nasid masks */ 162 /* establish the actual sizes of the nasid masks */
187 if (rp->SAL_version == 1) { 163 if (rp->SAL_version == 1) {
188 /* SAL_version 1 didn't set the nasids_size field */ 164 /* SAL_version 1 didn't set the nasids_size field */
189 rp->nasids_size = 128; 165 rp->SAL_nasids_size = 128;
190 } 166 }
191 xp_nasid_mask_bytes = rp->nasids_size; 167 xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
192 xp_nasid_mask_words = xp_nasid_mask_bytes / 8; 168 xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
169 BITS_PER_BYTE);
193 170
194 /* setup the pointers to the various items in the reserved page */ 171 /* setup the pointers to the various items in the reserved page */
195 xpc_part_nasids = XPC_RP_PART_NASIDS(rp); 172 xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
196 xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); 173 xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
197 xpc_vars = XPC_RP_VARS(rp);
198 xpc_vars_part = XPC_RP_VARS_PART(rp);
199
200 /*
201 * Before clearing xpc_vars, see if a page of AMOs had been previously
202 * allocated. If not we'll need to allocate one and set permissions
203 * so that cross-partition AMOs are allowed.
204 *
205 * The allocated AMO page needs MCA reporting to remain disabled after
206 * XPC has unloaded. To make this work, we keep a copy of the pointer
207 * to this page (i.e., amos_page) in the struct xpc_vars structure,
208 * which is pointed to by the reserved page, and re-use that saved copy
209 * on subsequent loads of XPC. This AMO page is never freed, and its
210 * memory protections are never restricted.
211 */
212 amos_page = xpc_vars->amos_page;
213 if (amos_page == NULL) {
214 amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0, 1));
215 if (amos_page == NULL) {
216 dev_err(xpc_part, "can't allocate page of AMOs\n");
217 return NULL;
218 }
219
220 /*
221 * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems
222 * when xpc_allow_IPI_ops() is called via xpc_hb_init().
223 */
224 if (!enable_shub_wars_1_1()) {
225 ret = sn_change_memprotect(ia64_tpa((u64)amos_page),
226 PAGE_SIZE,
227 SN_MEMPROT_ACCESS_CLASS_1,
228 &nasid_array);
229 if (ret != 0) {
230 dev_err(xpc_part, "can't change memory "
231 "protections\n");
232 uncached_free_page(__IA64_UNCACHED_OFFSET |
233 TO_PHYS((u64)amos_page), 1);
234 return NULL;
235 }
236 }
237 } else if (!IS_AMO_ADDRESS((u64)amos_page)) {
238 /*
239 * EFI's XPBOOT can also set amos_page in the reserved page,
240 * but it happens to leave it as an uncached physical address
241 * and we need it to be an uncached virtual, so we'll have to
242 * convert it.
243 */
244 if (!IS_AMO_PHYS_ADDRESS((u64)amos_page)) {
245 dev_err(xpc_part, "previously used amos_page address "
246 "is bad = 0x%p\n", (void *)amos_page);
247 return NULL;
248 }
249 amos_page = (AMO_t *)TO_AMO((u64)amos_page);
250 }
251
252 /* clear xpc_vars */
253 memset(xpc_vars, 0, sizeof(struct xpc_vars));
254
255 xpc_vars->version = XPC_V_VERSION;
256 xpc_vars->act_nasid = cpuid_to_nasid(0);
257 xpc_vars->act_phys_cpuid = cpu_physical_id(0);
258 xpc_vars->vars_part_pa = __pa(xpc_vars_part);
259 xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page);
260 xpc_vars->amos_page = amos_page; /* save for next load of XPC */
261
262 /* clear xpc_vars_part */
263 memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
264 XP_MAX_PARTITIONS);
265
266 /* initialize the activate IRQ related AMO variables */
267 for (i = 0; i < xp_nasid_mask_words; i++)
268 (void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
269
270 /* initialize the engaged remote partitions related AMO variables */
271 (void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
272 (void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
273 174
274 /* timestamp of when reserved page was setup by XPC */ 175 ret = xpc_setup_rsvd_page_sn(rp);
275 rp->stamp = CURRENT_TIME; 176 if (ret != 0)
177 return ret;
276 178
277 /* 179 /*
180 * Set timestamp of when reserved page was setup by XPC.
278 * This signifies to the remote partition that our reserved 181 * This signifies to the remote partition that our reserved
279 * page is initialized. 182 * page is initialized.
280 */ 183 */
281 rp->vars_pa = __pa(xpc_vars); 184 new_ts_jiffies = jiffies;
185 if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
186 new_ts_jiffies++;
187 rp->ts_jiffies = new_ts_jiffies;
282 188
283 return rp; 189 xpc_rsvd_page = rp;
190 return 0;
284} 191}
285 192
286/*
287 * Change protections to allow IPI operations (and AMO operations on
288 * Shub 1.1 systems).
289 */
290void 193void
291xpc_allow_IPI_ops(void) 194xpc_teardown_rsvd_page(void)
292{ 195{
293 int node; 196 /* a zero timestamp indicates our rsvd page is not initialized */
294 int nasid; 197 xpc_rsvd_page->ts_jiffies = 0;
295
296 /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
297
298 if (is_shub2()) {
299 xpc_sh2_IPI_access0 =
300 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
301 xpc_sh2_IPI_access1 =
302 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
303 xpc_sh2_IPI_access2 =
304 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
305 xpc_sh2_IPI_access3 =
306 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
307
308 for_each_online_node(node) {
309 nasid = cnodeid_to_nasid(node);
310 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
311 -1UL);
312 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
313 -1UL);
314 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
315 -1UL);
316 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
317 -1UL);
318 }
319
320 } else {
321 xpc_sh1_IPI_access =
322 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
323
324 for_each_online_node(node) {
325 nasid = cnodeid_to_nasid(node);
326 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
327 -1UL);
328
329 /*
330 * Since the BIST collides with memory operations on
331 * SHUB 1.1 sn_change_memprotect() cannot be used.
332 */
333 if (enable_shub_wars_1_1()) {
334 /* open up everything */
335 xpc_prot_vec[node] = (u64)HUB_L((u64 *)
336 GLOBAL_MMR_ADDR
337 (nasid,
338 SH1_MD_DQLP_MMR_DIR_PRIVEC0));
339 HUB_S((u64 *)
340 GLOBAL_MMR_ADDR(nasid,
341 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
342 -1UL);
343 HUB_S((u64 *)
344 GLOBAL_MMR_ADDR(nasid,
345 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
346 -1UL);
347 }
348 }
349 }
350}
351
352/*
353 * Restrict protections to disallow IPI operations (and AMO operations on
354 * Shub 1.1 systems).
355 */
356void
357xpc_restrict_IPI_ops(void)
358{
359 int node;
360 int nasid;
361
362 /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
363
364 if (is_shub2()) {
365
366 for_each_online_node(node) {
367 nasid = cnodeid_to_nasid(node);
368 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
369 xpc_sh2_IPI_access0);
370 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
371 xpc_sh2_IPI_access1);
372 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
373 xpc_sh2_IPI_access2);
374 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
375 xpc_sh2_IPI_access3);
376 }
377
378 } else {
379
380 for_each_online_node(node) {
381 nasid = cnodeid_to_nasid(node);
382 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
383 xpc_sh1_IPI_access);
384
385 if (enable_shub_wars_1_1()) {
386 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
387 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
388 xpc_prot_vec[node]);
389 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
390 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
391 xpc_prot_vec[node]);
392 }
393 }
394 }
395}
396
397/*
398 * At periodic intervals, scan through all active partitions and ensure
399 * their heartbeat is still active. If not, the partition is deactivated.
400 */
401void
402xpc_check_remote_hb(void)
403{
404 struct xpc_vars *remote_vars;
405 struct xpc_partition *part;
406 short partid;
407 bte_result_t bres;
408
409 remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
410
411 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
412
413 if (xpc_exiting)
414 break;
415
416 if (partid == sn_partition_id)
417 continue;
418
419 part = &xpc_partitions[partid];
420
421 if (part->act_state == XPC_P_INACTIVE ||
422 part->act_state == XPC_P_DEACTIVATING) {
423 continue;
424 }
425
426 /* pull the remote_hb cache line */
427 bres = xp_bte_copy(part->remote_vars_pa,
428 (u64)remote_vars,
429 XPC_RP_VARS_SIZE,
430 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
431 if (bres != BTE_SUCCESS) {
432 XPC_DEACTIVATE_PARTITION(part,
433 xpc_map_bte_errors(bres));
434 continue;
435 }
436
437 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
438 " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
439 partid, remote_vars->heartbeat, part->last_heartbeat,
440 remote_vars->heartbeat_offline,
441 remote_vars->heartbeating_to_mask);
442
443 if (((remote_vars->heartbeat == part->last_heartbeat) &&
444 (remote_vars->heartbeat_offline == 0)) ||
445 !xpc_hb_allowed(sn_partition_id, remote_vars)) {
446
447 XPC_DEACTIVATE_PARTITION(part, xpNoHeartbeat);
448 continue;
449 }
450
451 part->last_heartbeat = remote_vars->heartbeat;
452 }
453} 198}
454 199
455/* 200/*
@@ -459,11 +204,12 @@ xpc_check_remote_hb(void)
459 * is large enough to contain a copy of their reserved page header and 204 * is large enough to contain a copy of their reserved page header and
460 * part_nasids mask. 205 * part_nasids mask.
461 */ 206 */
462static enum xp_retval 207enum xp_retval
463xpc_get_remote_rp(int nasid, u64 *discovered_nasids, 208xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
464 struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa) 209 struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
465{ 210{
466 int bres, i; 211 int l;
212 enum xp_retval ret;
467 213
468 /* get the reserved page's physical address */ 214 /* get the reserved page's physical address */
469 215
@@ -472,355 +218,45 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
472 return xpNoRsvdPageAddr; 218 return xpNoRsvdPageAddr;
473 219
474 /* pull over the reserved page header and part_nasids mask */ 220 /* pull over the reserved page header and part_nasids mask */
475 bres = xp_bte_copy(*remote_rp_pa, (u64)remote_rp, 221 ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
476 XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes, 222 XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
477 (BTE_NOTIFY | BTE_WACQUIRE), NULL); 223 if (ret != xpSuccess)
478 if (bres != BTE_SUCCESS) 224 return ret;
479 return xpc_map_bte_errors(bres);
480 225
481 if (discovered_nasids != NULL) { 226 if (discovered_nasids != NULL) {
482 u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp); 227 unsigned long *remote_part_nasids =
483 228 XPC_RP_PART_NASIDS(remote_rp);
484 for (i = 0; i < xp_nasid_mask_words; i++)
485 discovered_nasids[i] |= remote_part_nasids[i];
486 }
487
488 /* check that the partid is for another partition */
489 229
490 if (remote_rp->partid < 1 || 230 for (l = 0; l < xpc_nasid_mask_nlongs; l++)
491 remote_rp->partid > (XP_MAX_PARTITIONS - 1)) { 231 discovered_nasids[l] |= remote_part_nasids[l];
492 return xpInvalidPartid;
493 } 232 }
494 233
495 if (remote_rp->partid == sn_partition_id) 234 /* zero timestamp indicates the reserved page has not been setup */
496 return xpLocalPartid; 235 if (remote_rp->ts_jiffies == 0)
236 return xpRsvdPageNotSet;
497 237
498 if (XPC_VERSION_MAJOR(remote_rp->version) != 238 if (XPC_VERSION_MAJOR(remote_rp->version) !=
499 XPC_VERSION_MAJOR(XPC_RP_VERSION)) { 239 XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
500 return xpBadVersion; 240 return xpBadVersion;
501 } 241 }
502 242
503 return xpSuccess; 243 /* check that both remote and local partids are valid for each side */
504} 244 if (remote_rp->SAL_partid < 0 ||
505 245 remote_rp->SAL_partid >= xp_max_npartitions ||
506/* 246 remote_rp->max_npartitions <= xp_partition_id) {
507 * Get a copy of the remote partition's XPC variables from the reserved page. 247 return xpInvalidPartid;
508 *
509 * remote_vars points to a buffer that is cacheline aligned for BTE copies and
510 * assumed to be of size XPC_RP_VARS_SIZE.
511 */
512static enum xp_retval
513xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
514{
515 int bres;
516
517 if (remote_vars_pa == 0)
518 return xpVarsNotSet;
519
520 /* pull over the cross partition variables */
521 bres = xp_bte_copy(remote_vars_pa, (u64)remote_vars, XPC_RP_VARS_SIZE,
522 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
523 if (bres != BTE_SUCCESS)
524 return xpc_map_bte_errors(bres);
525
526 if (XPC_VERSION_MAJOR(remote_vars->version) !=
527 XPC_VERSION_MAJOR(XPC_V_VERSION)) {
528 return xpBadVersion;
529 }
530
531 return xpSuccess;
532}
533
534/*
535 * Update the remote partition's info.
536 */
537static void
538xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
539 struct timespec *remote_rp_stamp, u64 remote_rp_pa,
540 u64 remote_vars_pa, struct xpc_vars *remote_vars)
541{
542 part->remote_rp_version = remote_rp_version;
543 dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n",
544 part->remote_rp_version);
545
546 part->remote_rp_stamp = *remote_rp_stamp;
547 dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
548 part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
549
550 part->remote_rp_pa = remote_rp_pa;
551 dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
552
553 part->remote_vars_pa = remote_vars_pa;
554 dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
555 part->remote_vars_pa);
556
557 part->last_heartbeat = remote_vars->heartbeat;
558 dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
559 part->last_heartbeat);
560
561 part->remote_vars_part_pa = remote_vars->vars_part_pa;
562 dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
563 part->remote_vars_part_pa);
564
565 part->remote_act_nasid = remote_vars->act_nasid;
566 dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
567 part->remote_act_nasid);
568
569 part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
570 dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
571 part->remote_act_phys_cpuid);
572
573 part->remote_amos_page_pa = remote_vars->amos_page_pa;
574 dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
575 part->remote_amos_page_pa);
576
577 part->remote_vars_version = remote_vars->version;
578 dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
579 part->remote_vars_version);
580}
581
582/*
583 * Prior code has determined the nasid which generated an IPI. Inspect
584 * that nasid to determine if its partition needs to be activated or
585 * deactivated.
586 *
587 * A partition is consider "awaiting activation" if our partition
588 * flags indicate it is not active and it has a heartbeat. A
589 * partition is considered "awaiting deactivation" if our partition
590 * flags indicate it is active but it has no heartbeat or it is not
591 * sending its heartbeat to us.
592 *
593 * To determine the heartbeat, the remote nasid must have a properly
594 * initialized reserved page.
595 */
596static void
597xpc_identify_act_IRQ_req(int nasid)
598{
599 struct xpc_rsvd_page *remote_rp;
600 struct xpc_vars *remote_vars;
601 u64 remote_rp_pa;
602 u64 remote_vars_pa;
603 int remote_rp_version;
604 int reactivate = 0;
605 int stamp_diff;
606 struct timespec remote_rp_stamp = { 0, 0 };
607 short partid;
608 struct xpc_partition *part;
609 enum xp_retval ret;
610
611 /* pull over the reserved page structure */
612
613 remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
614
615 ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
616 if (ret != xpSuccess) {
617 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
618 "which sent interrupt, reason=%d\n", nasid, ret);
619 return;
620 }
621
622 remote_vars_pa = remote_rp->vars_pa;
623 remote_rp_version = remote_rp->version;
624 if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
625 remote_rp_stamp = remote_rp->stamp;
626
627 partid = remote_rp->partid;
628 part = &xpc_partitions[partid];
629
630 /* pull over the cross partition variables */
631
632 remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
633
634 ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
635 if (ret != xpSuccess) {
636
637 dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
638 "which sent interrupt, reason=%d\n", nasid, ret);
639
640 XPC_DEACTIVATE_PARTITION(part, ret);
641 return;
642 }
643
644 part->act_IRQ_rcvd++;
645
646 dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
647 "%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd,
648 remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
649
650 if (xpc_partition_disengaged(part) &&
651 part->act_state == XPC_P_INACTIVE) {
652
653 xpc_update_partition_info(part, remote_rp_version,
654 &remote_rp_stamp, remote_rp_pa,
655 remote_vars_pa, remote_vars);
656
657 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
658 if (xpc_partition_disengage_requested(1UL << partid)) {
659 /*
660 * Other side is waiting on us to disengage,
661 * even though we already have.
662 */
663 return;
664 }
665 } else {
666 /* other side doesn't support disengage requests */
667 xpc_clear_partition_disengage_request(1UL << partid);
668 }
669
670 xpc_activate_partition(part);
671 return;
672 }
673
674 DBUG_ON(part->remote_rp_version == 0);
675 DBUG_ON(part->remote_vars_version == 0);
676
677 if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
678 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
679 remote_vars_version));
680
681 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
682 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
683 version));
684 /* see if the other side rebooted */
685 if (part->remote_amos_page_pa ==
686 remote_vars->amos_page_pa &&
687 xpc_hb_allowed(sn_partition_id, remote_vars)) {
688 /* doesn't look that way, so ignore the IPI */
689 return;
690 }
691 }
692
693 /*
694 * Other side rebooted and previous XPC didn't support the
695 * disengage request, so we don't need to do anything special.
696 */
697
698 xpc_update_partition_info(part, remote_rp_version,
699 &remote_rp_stamp, remote_rp_pa,
700 remote_vars_pa, remote_vars);
701 part->reactivate_nasid = nasid;
702 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
703 return;
704 }
705
706 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
707
708 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
709 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
710
711 /*
712 * Other side rebooted and previous XPC did support the
713 * disengage request, but the new one doesn't.
714 */
715
716 xpc_clear_partition_engaged(1UL << partid);
717 xpc_clear_partition_disengage_request(1UL << partid);
718
719 xpc_update_partition_info(part, remote_rp_version,
720 &remote_rp_stamp, remote_rp_pa,
721 remote_vars_pa, remote_vars);
722 reactivate = 1;
723
724 } else {
725 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
726
727 stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
728 &remote_rp_stamp);
729 if (stamp_diff != 0) {
730 DBUG_ON(stamp_diff >= 0);
731
732 /*
733 * Other side rebooted and the previous XPC did support
734 * the disengage request, as does the new one.
735 */
736
737 DBUG_ON(xpc_partition_engaged(1UL << partid));
738 DBUG_ON(xpc_partition_disengage_requested(1UL <<
739 partid));
740
741 xpc_update_partition_info(part, remote_rp_version,
742 &remote_rp_stamp,
743 remote_rp_pa, remote_vars_pa,
744 remote_vars);
745 reactivate = 1;
746 }
747 }
748
749 if (part->disengage_request_timeout > 0 &&
750 !xpc_partition_disengaged(part)) {
751 /* still waiting on other side to disengage from us */
752 return;
753 }
754
755 if (reactivate) {
756 part->reactivate_nasid = nasid;
757 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
758
759 } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
760 xpc_partition_disengage_requested(1UL << partid)) {
761 XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
762 } 248 }
763}
764 249
765/* 250 if (remote_rp->SAL_partid == xp_partition_id)
766 * Loop through the activation AMO variables and process any bits 251 return xpLocalPartid;
767 * which are set. Each bit indicates a nasid sending a partition
768 * activation or deactivation request.
769 *
770 * Return #of IRQs detected.
771 */
772int
773xpc_identify_act_IRQ_sender(void)
774{
775 int word, bit;
776 u64 nasid_mask;
777 u64 nasid; /* remote nasid */
778 int n_IRQs_detected = 0;
779 AMO_t *act_amos;
780
781 act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
782
783 /* scan through act AMO variable looking for non-zero entries */
784 for (word = 0; word < xp_nasid_mask_words; word++) {
785
786 if (xpc_exiting)
787 break;
788
789 nasid_mask = xpc_IPI_receive(&act_amos[word]);
790 if (nasid_mask == 0) {
791 /* no IRQs from nasids in this variable */
792 continue;
793 }
794
795 dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
796 nasid_mask);
797
798 /*
799 * If this nasid has been added to the machine since
800 * our partition was reset, this will retain the
801 * remote nasid in our reserved pages machine mask.
802 * This is used in the event of module reload.
803 */
804 xpc_mach_nasids[word] |= nasid_mask;
805
806 /* locate the nasid(s) which sent interrupts */
807 252
808 for (bit = 0; bit < (8 * sizeof(u64)); bit++) { 253 return xpSuccess;
809 if (nasid_mask & (1UL << bit)) {
810 n_IRQs_detected++;
811 nasid = XPC_NASID_FROM_W_B(word, bit);
812 dev_dbg(xpc_part, "interrupt from nasid %ld\n",
813 nasid);
814 xpc_identify_act_IRQ_req(nasid);
815 }
816 }
817 }
818 return n_IRQs_detected;
819} 254}
820 255
821/* 256/*
822 * See if the other side has responded to a partition disengage request 257 * See if the other side has responded to a partition deactivate request
823 * from us. 258 * from us. Though we requested the remote partition to deactivate with regard
259 * to us, we really only need to wait for the other side to disengage from us.
824 */ 260 */
825int 261int
826xpc_partition_disengaged(struct xpc_partition *part) 262xpc_partition_disengaged(struct xpc_partition *part)
@@ -828,41 +264,37 @@ xpc_partition_disengaged(struct xpc_partition *part)
828 short partid = XPC_PARTID(part); 264 short partid = XPC_PARTID(part);
829 int disengaged; 265 int disengaged;
830 266
831 disengaged = (xpc_partition_engaged(1UL << partid) == 0); 267 disengaged = !xpc_partition_engaged(partid);
832 if (part->disengage_request_timeout) { 268 if (part->disengage_timeout) {
833 if (!disengaged) { 269 if (!disengaged) {
834 if (time_before(jiffies, 270 if (time_is_after_jiffies(part->disengage_timeout)) {
835 part->disengage_request_timeout)) {
836 /* timelimit hasn't been reached yet */ 271 /* timelimit hasn't been reached yet */
837 return 0; 272 return 0;
838 } 273 }
839 274
840 /* 275 /*
841 * Other side hasn't responded to our disengage 276 * Other side hasn't responded to our deactivate
842 * request in a timely fashion, so assume it's dead. 277 * request in a timely fashion, so assume it's dead.
843 */ 278 */
844 279
845 dev_info(xpc_part, "disengage from remote partition %d " 280 dev_info(xpc_part, "deactivate request to remote "
846 "timed out\n", partid); 281 "partition %d timed out\n", partid);
847 xpc_disengage_request_timedout = 1; 282 xpc_disengage_timedout = 1;
848 xpc_clear_partition_engaged(1UL << partid); 283 xpc_assume_partition_disengaged(partid);
849 disengaged = 1; 284 disengaged = 1;
850 } 285 }
851 part->disengage_request_timeout = 0; 286 part->disengage_timeout = 0;
852 287
853 /* cancel the timer function, provided it's not us */ 288 /* cancel the timer function, provided it's not us */
854 if (!in_interrupt()) { 289 if (!in_interrupt())
855 del_singleshot_timer_sync(&part-> 290 del_singleshot_timer_sync(&part->disengage_timer);
856 disengage_request_timer);
857 }
858 291
859 DBUG_ON(part->act_state != XPC_P_DEACTIVATING && 292 DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING &&
860 part->act_state != XPC_P_INACTIVE); 293 part->act_state != XPC_P_AS_INACTIVE);
861 if (part->act_state != XPC_P_INACTIVE) 294 if (part->act_state != XPC_P_AS_INACTIVE)
862 xpc_wakeup_channel_mgr(part); 295 xpc_wakeup_channel_mgr(part);
863 296
864 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) 297 xpc_cancel_partition_deactivation_request(part);
865 xpc_cancel_partition_disengage_request(part);
866 } 298 }
867 return disengaged; 299 return disengaged;
868} 300}
@@ -879,8 +311,8 @@ xpc_mark_partition_active(struct xpc_partition *part)
879 dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); 311 dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
880 312
881 spin_lock_irqsave(&part->act_lock, irq_flags); 313 spin_lock_irqsave(&part->act_lock, irq_flags);
882 if (part->act_state == XPC_P_ACTIVATING) { 314 if (part->act_state == XPC_P_AS_ACTIVATING) {
883 part->act_state = XPC_P_ACTIVE; 315 part->act_state = XPC_P_AS_ACTIVE;
884 ret = xpSuccess; 316 ret = xpSuccess;
885 } else { 317 } else {
886 DBUG_ON(part->reason == xpSuccess); 318 DBUG_ON(part->reason == xpSuccess);
@@ -892,7 +324,7 @@ xpc_mark_partition_active(struct xpc_partition *part)
892} 324}
893 325
894/* 326/*
895 * Notify XPC that the partition is down. 327 * Start the process of deactivating the specified partition.
896 */ 328 */
897void 329void
898xpc_deactivate_partition(const int line, struct xpc_partition *part, 330xpc_deactivate_partition(const int line, struct xpc_partition *part,
@@ -902,16 +334,16 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
902 334
903 spin_lock_irqsave(&part->act_lock, irq_flags); 335 spin_lock_irqsave(&part->act_lock, irq_flags);
904 336
905 if (part->act_state == XPC_P_INACTIVE) { 337 if (part->act_state == XPC_P_AS_INACTIVE) {
906 XPC_SET_REASON(part, reason, line); 338 XPC_SET_REASON(part, reason, line);
907 spin_unlock_irqrestore(&part->act_lock, irq_flags); 339 spin_unlock_irqrestore(&part->act_lock, irq_flags);
908 if (reason == xpReactivating) { 340 if (reason == xpReactivating) {
909 /* we interrupt ourselves to reactivate partition */ 341 /* we interrupt ourselves to reactivate partition */
910 xpc_IPI_send_reactivate(part); 342 xpc_request_partition_reactivation(part);
911 } 343 }
912 return; 344 return;
913 } 345 }
914 if (part->act_state == XPC_P_DEACTIVATING) { 346 if (part->act_state == XPC_P_AS_DEACTIVATING) {
915 if ((part->reason == xpUnloading && reason != xpUnloading) || 347 if ((part->reason == xpUnloading && reason != xpUnloading) ||
916 reason == xpReactivating) { 348 reason == xpReactivating) {
917 XPC_SET_REASON(part, reason, line); 349 XPC_SET_REASON(part, reason, line);
@@ -920,22 +352,18 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
920 return; 352 return;
921 } 353 }
922 354
923 part->act_state = XPC_P_DEACTIVATING; 355 part->act_state = XPC_P_AS_DEACTIVATING;
924 XPC_SET_REASON(part, reason, line); 356 XPC_SET_REASON(part, reason, line);
925 357
926 spin_unlock_irqrestore(&part->act_lock, irq_flags); 358 spin_unlock_irqrestore(&part->act_lock, irq_flags);
927 359
928 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { 360 /* ask remote partition to deactivate with regard to us */
929 xpc_request_partition_disengage(part); 361 xpc_request_partition_deactivation(part);
930 xpc_IPI_send_disengage(part);
931 362
932 /* set a timelimit on the disengage request */ 363 /* set a timelimit on the disengage phase of the deactivation request */
933 part->disengage_request_timeout = jiffies + 364 part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
934 (xpc_disengage_request_timelimit * HZ); 365 part->disengage_timer.expires = part->disengage_timeout;
935 part->disengage_request_timer.expires = 366 add_timer(&part->disengage_timer);
936 part->disengage_request_timeout;
937 add_timer(&part->disengage_request_timer);
938 }
939 367
940 dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", 368 dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
941 XPC_PARTID(part), reason); 369 XPC_PARTID(part), reason);
@@ -955,7 +383,7 @@ xpc_mark_partition_inactive(struct xpc_partition *part)
955 XPC_PARTID(part)); 383 XPC_PARTID(part));
956 384
957 spin_lock_irqsave(&part->act_lock, irq_flags); 385 spin_lock_irqsave(&part->act_lock, irq_flags);
958 part->act_state = XPC_P_INACTIVE; 386 part->act_state = XPC_P_AS_INACTIVE;
959 spin_unlock_irqrestore(&part->act_lock, irq_flags); 387 spin_unlock_irqrestore(&part->act_lock, irq_flags);
960 part->remote_rp_pa = 0; 388 part->remote_rp_pa = 0;
961} 389}
@@ -974,28 +402,22 @@ xpc_discovery(void)
974{ 402{
975 void *remote_rp_base; 403 void *remote_rp_base;
976 struct xpc_rsvd_page *remote_rp; 404 struct xpc_rsvd_page *remote_rp;
977 struct xpc_vars *remote_vars; 405 unsigned long remote_rp_pa;
978 u64 remote_rp_pa;
979 u64 remote_vars_pa;
980 int region; 406 int region;
981 int region_size; 407 int region_size;
982 int max_regions; 408 int max_regions;
983 int nasid; 409 int nasid;
984 struct xpc_rsvd_page *rp; 410 struct xpc_rsvd_page *rp;
985 short partid; 411 unsigned long *discovered_nasids;
986 struct xpc_partition *part;
987 u64 *discovered_nasids;
988 enum xp_retval ret; 412 enum xp_retval ret;
989 413
990 remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE + 414 remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
991 xp_nasid_mask_bytes, 415 xpc_nasid_mask_nbytes,
992 GFP_KERNEL, &remote_rp_base); 416 GFP_KERNEL, &remote_rp_base);
993 if (remote_rp == NULL) 417 if (remote_rp == NULL)
994 return; 418 return;
995 419
996 remote_vars = (struct xpc_vars *)remote_rp; 420 discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs,
997
998 discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
999 GFP_KERNEL); 421 GFP_KERNEL);
1000 if (discovered_nasids == NULL) { 422 if (discovered_nasids == NULL) {
1001 kfree(remote_rp_base); 423 kfree(remote_rp_base);
@@ -1010,7 +432,7 @@ xpc_discovery(void)
1010 * protection is in regards to memory, IOI and IPI. 432 * protection is in regards to memory, IOI and IPI.
1011 */ 433 */
1012 max_regions = 64; 434 max_regions = 64;
1013 region_size = sn_region_size; 435 region_size = xp_region_size;
1014 436
1015 switch (region_size) { 437 switch (region_size) {
1016 case 128: 438 case 128:
@@ -1038,28 +460,28 @@ xpc_discovery(void)
1038 460
1039 dev_dbg(xpc_part, "checking nasid %d\n", nasid); 461 dev_dbg(xpc_part, "checking nasid %d\n", nasid);
1040 462
1041 if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) { 463 if (test_bit(nasid / 2, xpc_part_nasids)) {
1042 dev_dbg(xpc_part, "PROM indicates Nasid %d is " 464 dev_dbg(xpc_part, "PROM indicates Nasid %d is "
1043 "part of the local partition; skipping " 465 "part of the local partition; skipping "
1044 "region\n", nasid); 466 "region\n", nasid);
1045 break; 467 break;
1046 } 468 }
1047 469
1048 if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) { 470 if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
1049 dev_dbg(xpc_part, "PROM indicates Nasid %d was " 471 dev_dbg(xpc_part, "PROM indicates Nasid %d was "
1050 "not on Numa-Link network at reset\n", 472 "not on Numa-Link network at reset\n",
1051 nasid); 473 nasid);
1052 continue; 474 continue;
1053 } 475 }
1054 476
1055 if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) { 477 if (test_bit(nasid / 2, discovered_nasids)) {
1056 dev_dbg(xpc_part, "Nasid %d is part of a " 478 dev_dbg(xpc_part, "Nasid %d is part of a "
1057 "partition which was previously " 479 "partition which was previously "
1058 "discovered\n", nasid); 480 "discovered\n", nasid);
1059 continue; 481 continue;
1060 } 482 }
1061 483
1062 /* pull over the reserved page structure */ 484 /* pull over the rsvd page header & part_nasids mask */
1063 485
1064 ret = xpc_get_remote_rp(nasid, discovered_nasids, 486 ret = xpc_get_remote_rp(nasid, discovered_nasids,
1065 remote_rp, &remote_rp_pa); 487 remote_rp, &remote_rp_pa);
@@ -1074,72 +496,8 @@ xpc_discovery(void)
1074 continue; 496 continue;
1075 } 497 }
1076 498
1077 remote_vars_pa = remote_rp->vars_pa; 499 xpc_request_partition_activation(remote_rp,
1078 500 remote_rp_pa, nasid);
1079 partid = remote_rp->partid;
1080 part = &xpc_partitions[partid];
1081
1082 /* pull over the cross partition variables */
1083
1084 ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
1085 if (ret != xpSuccess) {
1086 dev_dbg(xpc_part, "unable to get XPC variables "
1087 "from nasid %d, reason=%d\n", nasid,
1088 ret);
1089
1090 XPC_DEACTIVATE_PARTITION(part, ret);
1091 continue;
1092 }
1093
1094 if (part->act_state != XPC_P_INACTIVE) {
1095 dev_dbg(xpc_part, "partition %d on nasid %d is "
1096 "already activating\n", partid, nasid);
1097 break;
1098 }
1099
1100 /*
1101 * Register the remote partition's AMOs with SAL so it
1102 * can handle and cleanup errors within that address
1103 * range should the remote partition go down. We don't
1104 * unregister this range because it is difficult to
1105 * tell when outstanding writes to the remote partition
1106 * are finished and thus when it is thus safe to
1107 * unregister. This should not result in wasted space
1108 * in the SAL xp_addr_region table because we should
1109 * get the same page for remote_act_amos_pa after
1110 * module reloads and system reboots.
1111 */
1112 if (sn_register_xp_addr_region
1113 (remote_vars->amos_page_pa, PAGE_SIZE, 1) < 0) {
1114 dev_dbg(xpc_part,
1115 "partition %d failed to "
1116 "register xp_addr region 0x%016lx\n",
1117 partid, remote_vars->amos_page_pa);
1118
1119 XPC_SET_REASON(part, xpPhysAddrRegFailed,
1120 __LINE__);
1121 break;
1122 }
1123
1124 /*
1125 * The remote nasid is valid and available.
1126 * Send an interrupt to that nasid to notify
1127 * it that we are ready to begin activation.
1128 */
1129 dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
1130 "nasid %d, phys_cpuid 0x%x\n",
1131 remote_vars->amos_page_pa,
1132 remote_vars->act_nasid,
1133 remote_vars->act_phys_cpuid);
1134
1135 if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
1136 version)) {
1137 part->remote_amos_page_pa =
1138 remote_vars->amos_page_pa;
1139 xpc_mark_partition_disengaged(part);
1140 xpc_cancel_partition_disengage_request(part);
1141 }
1142 xpc_IPI_send_activate(remote_vars);
1143 } 501 }
1144 } 502 }
1145 503
@@ -1155,20 +513,16 @@ enum xp_retval
1155xpc_initiate_partid_to_nasids(short partid, void *nasid_mask) 513xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
1156{ 514{
1157 struct xpc_partition *part; 515 struct xpc_partition *part;
1158 u64 part_nasid_pa; 516 unsigned long part_nasid_pa;
1159 int bte_res;
1160 517
1161 part = &xpc_partitions[partid]; 518 part = &xpc_partitions[partid];
1162 if (part->remote_rp_pa == 0) 519 if (part->remote_rp_pa == 0)
1163 return xpPartitionDown; 520 return xpPartitionDown;
1164 521
1165 memset(nasid_mask, 0, XP_NASID_MASK_BYTES); 522 memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
1166
1167 part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
1168 523
1169 bte_res = xp_bte_copy(part_nasid_pa, (u64)nasid_mask, 524 part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
1170 xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE),
1171 NULL);
1172 525
1173 return xpc_map_bte_errors(bte_res); 526 return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
527 xpc_nasid_mask_nbytes);
1174} 528}
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
new file mode 100644
index 000000000000..b4882ccf6344
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -0,0 +1,2404 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9/*
10 * Cross Partition Communication (XPC) sn2-based functions.
11 *
12 * Architecture specific implementation of common functions.
13 *
14 */
15
16#include <linux/delay.h>
17#include <asm/uncached.h>
18#include <asm/sn/mspec.h>
19#include <asm/sn/sn_sal.h>
20#include "xpc.h"
21
22/*
23 * Define the number of u64s required to represent all the C-brick nasids
24 * as a bitmap. The cross-partition kernel modules deal only with
25 * C-brick nasids, thus the need for bitmaps which don't account for
26 * odd-numbered (non C-brick) nasids.
27 */
28#define XPC_MAX_PHYSNODES_SN2 (MAX_NUMALINK_NODES / 2)
29#define XP_NASID_MASK_BYTES_SN2 ((XPC_MAX_PHYSNODES_SN2 + 7) / 8)
30#define XP_NASID_MASK_WORDS_SN2 ((XPC_MAX_PHYSNODES_SN2 + 63) / 64)
31
32/*
33 * Memory for XPC's amo variables is allocated by the MSPEC driver. These
34 * pages are located in the lowest granule. The lowest granule uses 4k pages
35 * for cached references and an alternate TLB handler to never provide a
36 * cacheable mapping for the entire region. This will prevent speculative
37 * reading of cached copies of our lines from being issued which will cause
38 * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
39 * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of
40 * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS_SN2) to identify
41 * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote
42 * partitions (i.e., XPCs) consider themselves currently engaged with the
43 * local XPC and 1 amo variable to request partition deactivation.
44 */
45#define XPC_NOTIFY_IRQ_AMOS_SN2 0
46#define XPC_ACTIVATE_IRQ_AMOS_SN2 (XPC_NOTIFY_IRQ_AMOS_SN2 + \
47 XP_MAX_NPARTITIONS_SN2)
48#define XPC_ENGAGED_PARTITIONS_AMO_SN2 (XPC_ACTIVATE_IRQ_AMOS_SN2 + \
49 XP_NASID_MASK_WORDS_SN2)
50#define XPC_DEACTIVATE_REQUEST_AMO_SN2 (XPC_ENGAGED_PARTITIONS_AMO_SN2 + 1)
51
52/*
53 * Buffer used to store a local copy of portions of a remote partition's
54 * reserved page (either its header and part_nasids mask, or its vars).
55 */
56static void *xpc_remote_copy_buffer_base_sn2;
57static char *xpc_remote_copy_buffer_sn2;
58
59static struct xpc_vars_sn2 *xpc_vars_sn2;
60static struct xpc_vars_part_sn2 *xpc_vars_part_sn2;
61
62static int
63xpc_setup_partitions_sn_sn2(void)
64{
65 /* nothing needs to be done */
66 return 0;
67}
68
69/* SH_IPI_ACCESS shub register value on startup */
70static u64 xpc_sh1_IPI_access_sn2;
71static u64 xpc_sh2_IPI_access0_sn2;
72static u64 xpc_sh2_IPI_access1_sn2;
73static u64 xpc_sh2_IPI_access2_sn2;
74static u64 xpc_sh2_IPI_access3_sn2;
75
76/*
77 * Change protections to allow IPI operations.
78 */
79static void
80xpc_allow_IPI_ops_sn2(void)
81{
82 int node;
83 int nasid;
84
85 /* !!! The following should get moved into SAL. */
86 if (is_shub2()) {
87 xpc_sh2_IPI_access0_sn2 =
88 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
89 xpc_sh2_IPI_access1_sn2 =
90 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
91 xpc_sh2_IPI_access2_sn2 =
92 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
93 xpc_sh2_IPI_access3_sn2 =
94 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
95
96 for_each_online_node(node) {
97 nasid = cnodeid_to_nasid(node);
98 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
99 -1UL);
100 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
101 -1UL);
102 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
103 -1UL);
104 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
105 -1UL);
106 }
107 } else {
108 xpc_sh1_IPI_access_sn2 =
109 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
110
111 for_each_online_node(node) {
112 nasid = cnodeid_to_nasid(node);
113 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
114 -1UL);
115 }
116 }
117}
118
119/*
120 * Restrict protections to disallow IPI operations.
121 */
122static void
123xpc_disallow_IPI_ops_sn2(void)
124{
125 int node;
126 int nasid;
127
128 /* !!! The following should get moved into SAL. */
129 if (is_shub2()) {
130 for_each_online_node(node) {
131 nasid = cnodeid_to_nasid(node);
132 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
133 xpc_sh2_IPI_access0_sn2);
134 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
135 xpc_sh2_IPI_access1_sn2);
136 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
137 xpc_sh2_IPI_access2_sn2);
138 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
139 xpc_sh2_IPI_access3_sn2);
140 }
141 } else {
142 for_each_online_node(node) {
143 nasid = cnodeid_to_nasid(node);
144 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
145 xpc_sh1_IPI_access_sn2);
146 }
147 }
148}
149
150/*
151 * The following set of functions are used for the sending and receiving of
152 * IRQs (also known as IPIs). There are two flavors of IRQs, one that is
153 * associated with partition activity (SGI_XPC_ACTIVATE) and the other that
154 * is associated with channel activity (SGI_XPC_NOTIFY).
155 */
156
157static u64
158xpc_receive_IRQ_amo_sn2(struct amo *amo)
159{
160 return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
161}
162
163static enum xp_retval
164xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid,
165 int vector)
166{
167 int ret = 0;
168 unsigned long irq_flags;
169
170 local_irq_save(irq_flags);
171
172 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag);
173 sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
174
175 /*
176 * We must always use the nofault function regardless of whether we
177 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
178 * didn't, we'd never know that the other partition is down and would
179 * keep sending IRQs and amos to it until the heartbeat times out.
180 */
181 ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
182 xp_nofault_PIOR_target));
183
184 local_irq_restore(irq_flags);
185
186 return (ret == 0) ? xpSuccess : xpPioReadError;
187}
188
189static struct amo *
190xpc_init_IRQ_amo_sn2(int index)
191{
192 struct amo *amo = xpc_vars_sn2->amos_page + index;
193
194 (void)xpc_receive_IRQ_amo_sn2(amo); /* clear amo variable */
195 return amo;
196}
197
198/*
199 * Functions associated with SGI_XPC_ACTIVATE IRQ.
200 */
201
202/*
203 * Notify the heartbeat check thread that an activate IRQ has been received.
204 */
205static irqreturn_t
206xpc_handle_activate_IRQ_sn2(int irq, void *dev_id)
207{
208 unsigned long irq_flags;
209
210 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
211 xpc_activate_IRQ_rcvd++;
212 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
213
214 wake_up_interruptible(&xpc_activate_IRQ_wq);
215 return IRQ_HANDLED;
216}
217
218/*
219 * Flag the appropriate amo variable and send an IRQ to the specified node.
220 */
221static void
222xpc_send_activate_IRQ_sn2(unsigned long amos_page_pa, int from_nasid,
223 int to_nasid, int to_phys_cpuid)
224{
225 struct amo *amos = (struct amo *)__va(amos_page_pa +
226 (XPC_ACTIVATE_IRQ_AMOS_SN2 *
227 sizeof(struct amo)));
228
229 (void)xpc_send_IRQ_sn2(&amos[BIT_WORD(from_nasid / 2)],
230 BIT_MASK(from_nasid / 2), to_nasid,
231 to_phys_cpuid, SGI_XPC_ACTIVATE);
232}
233
234static void
235xpc_send_local_activate_IRQ_sn2(int from_nasid)
236{
237 unsigned long irq_flags;
238 struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa +
239 (XPC_ACTIVATE_IRQ_AMOS_SN2 *
240 sizeof(struct amo)));
241
242 /* fake the sending and receipt of an activate IRQ from remote nasid */
243 FETCHOP_STORE_OP(TO_AMO((u64)&amos[BIT_WORD(from_nasid / 2)].variable),
244 FETCHOP_OR, BIT_MASK(from_nasid / 2));
245
246 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
247 xpc_activate_IRQ_rcvd++;
248 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
249
250 wake_up_interruptible(&xpc_activate_IRQ_wq);
251}
252
253/*
254 * Functions associated with SGI_XPC_NOTIFY IRQ.
255 */
256
257/*
258 * Check to see if any chctl flags were sent from the specified partition.
259 */
260static void
261xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part)
262{
263 union xpc_channel_ctl_flags chctl;
264 unsigned long irq_flags;
265
266 chctl.all_flags = xpc_receive_IRQ_amo_sn2(part->sn.sn2.
267 local_chctl_amo_va);
268 if (chctl.all_flags == 0)
269 return;
270
271 spin_lock_irqsave(&part->chctl_lock, irq_flags);
272 part->chctl.all_flags |= chctl.all_flags;
273 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
274
275 dev_dbg(xpc_chan, "received notify IRQ from partid=%d, chctl.all_flags="
276 "0x%lx\n", XPC_PARTID(part), chctl.all_flags);
277
278 xpc_wakeup_channel_mgr(part);
279}
280
281/*
282 * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
283 * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
284 * than one partition, we use an amo structure per partition to indicate
285 * whether a partition has sent an IRQ or not. If it has, then wake up the
286 * associated kthread to handle it.
287 *
288 * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IRQs sent by XPC
289 * running on other partitions.
290 *
291 * Noteworthy Arguments:
292 *
293 * irq - Interrupt ReQuest number. NOT USED.
294 *
295 * dev_id - partid of IRQ's potential sender.
296 */
297static irqreturn_t
298xpc_handle_notify_IRQ_sn2(int irq, void *dev_id)
299{
300 short partid = (short)(u64)dev_id;
301 struct xpc_partition *part = &xpc_partitions[partid];
302
303 DBUG_ON(partid < 0 || partid >= XP_MAX_NPARTITIONS_SN2);
304
305 if (xpc_part_ref(part)) {
306 xpc_check_for_sent_chctl_flags_sn2(part);
307
308 xpc_part_deref(part);
309 }
310 return IRQ_HANDLED;
311}
312
313/*
314 * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IRQs on the floor
315 * because the write to their associated amo variable completed after the IRQ
316 * was received.
317 */
318static void
319xpc_check_for_dropped_notify_IRQ_sn2(struct xpc_partition *part)
320{
321 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
322
323 if (xpc_part_ref(part)) {
324 xpc_check_for_sent_chctl_flags_sn2(part);
325
326 part_sn2->dropped_notify_IRQ_timer.expires = jiffies +
327 XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
328 add_timer(&part_sn2->dropped_notify_IRQ_timer);
329 xpc_part_deref(part);
330 }
331}
332
333/*
334 * Send a notify IRQ to the remote partition that is associated with the
335 * specified channel.
336 */
337static void
338xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
339 char *chctl_flag_string, unsigned long *irq_flags)
340{
341 struct xpc_partition *part = &xpc_partitions[ch->partid];
342 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
343 union xpc_channel_ctl_flags chctl = { 0 };
344 enum xp_retval ret;
345
346 if (likely(part->act_state != XPC_P_AS_DEACTIVATING)) {
347 chctl.flags[ch->number] = chctl_flag;
348 ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va,
349 chctl.all_flags,
350 part_sn2->notify_IRQ_nasid,
351 part_sn2->notify_IRQ_phys_cpuid,
352 SGI_XPC_NOTIFY);
353 dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
354 chctl_flag_string, ch->partid, ch->number, ret);
355 if (unlikely(ret != xpSuccess)) {
356 if (irq_flags != NULL)
357 spin_unlock_irqrestore(&ch->lock, *irq_flags);
358 XPC_DEACTIVATE_PARTITION(part, ret);
359 if (irq_flags != NULL)
360 spin_lock_irqsave(&ch->lock, *irq_flags);
361 }
362 }
363}
364
365#define XPC_SEND_NOTIFY_IRQ_SN2(_ch, _ipi_f, _irq_f) \
366 xpc_send_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f, _irq_f)
367
368/*
369 * Make it look like the remote partition, which is associated with the
370 * specified channel, sent us a notify IRQ. This faked IRQ will be handled
371 * by xpc_check_for_dropped_notify_IRQ_sn2().
372 */
373static void
374xpc_send_local_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
375 char *chctl_flag_string)
376{
377 struct xpc_partition *part = &xpc_partitions[ch->partid];
378 union xpc_channel_ctl_flags chctl = { 0 };
379
380 chctl.flags[ch->number] = chctl_flag;
381 FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_chctl_amo_va->
382 variable), FETCHOP_OR, chctl.all_flags);
383 dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
384 chctl_flag_string, ch->partid, ch->number);
385}
386
387#define XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(_ch, _ipi_f) \
388 xpc_send_local_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f)
389
390static void
391xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch,
392 unsigned long *irq_flags)
393{
394 struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
395
396 args->reason = ch->reason;
397 XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags);
398}
399
400static void
401xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
402{
403 XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREPLY, irq_flags);
404}
405
406static void
407xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
408{
409 struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
410
411 args->entry_size = ch->entry_size;
412 args->local_nentries = ch->local_nentries;
413 XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags);
414}
415
416static void
417xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
418{
419 struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
420
421 args->remote_nentries = ch->remote_nentries;
422 args->local_nentries = ch->local_nentries;
423 args->local_msgqueue_pa = xp_pa(ch->sn.sn2.local_msgqueue);
424 XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags);
425}
426
427static void
428xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch)
429{
430 XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL);
431}
432
433static void
434xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch)
435{
436 XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST);
437}
438
439static void
440xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch,
441 unsigned long msgqueue_pa)
442{
443 ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa;
444}
445
446/*
447 * This next set of functions are used to keep track of when a partition is
448 * potentially engaged in accessing memory belonging to another partition.
449 */
450
451static void
452xpc_indicate_partition_engaged_sn2(struct xpc_partition *part)
453{
454 unsigned long irq_flags;
455 struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
456 (XPC_ENGAGED_PARTITIONS_AMO_SN2 *
457 sizeof(struct amo)));
458
459 local_irq_save(irq_flags);
460
461 /* set bit corresponding to our partid in remote partition's amo */
462 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
463 BIT(sn_partition_id));
464
465 /*
466 * We must always use the nofault function regardless of whether we
467 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
468 * didn't, we'd never know that the other partition is down and would
469 * keep sending IRQs and amos to it until the heartbeat times out.
470 */
471 (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
472 variable),
473 xp_nofault_PIOR_target));
474
475 local_irq_restore(irq_flags);
476}
477
478static void
479xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
480{
481 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
482 unsigned long irq_flags;
483 struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
484 (XPC_ENGAGED_PARTITIONS_AMO_SN2 *
485 sizeof(struct amo)));
486
487 local_irq_save(irq_flags);
488
489 /* clear bit corresponding to our partid in remote partition's amo */
490 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
491 ~BIT(sn_partition_id));
492
493 /*
494 * We must always use the nofault function regardless of whether we
495 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
496 * didn't, we'd never know that the other partition is down and would
497 * keep sending IRQs and amos to it until the heartbeat times out.
498 */
499 (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
500 variable),
501 xp_nofault_PIOR_target));
502
503 local_irq_restore(irq_flags);
504
505 /*
506 * Send activate IRQ to get other side to see that we've cleared our
507 * bit in their engaged partitions amo.
508 */
509 xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
510 cnodeid_to_nasid(0),
511 part_sn2->activate_IRQ_nasid,
512 part_sn2->activate_IRQ_phys_cpuid);
513}
514
515static void
516xpc_assume_partition_disengaged_sn2(short partid)
517{
518 struct amo *amo = xpc_vars_sn2->amos_page +
519 XPC_ENGAGED_PARTITIONS_AMO_SN2;
520
521 /* clear bit(s) based on partid mask in our partition's amo */
522 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
523 ~BIT(partid));
524}
525
526static int
527xpc_partition_engaged_sn2(short partid)
528{
529 struct amo *amo = xpc_vars_sn2->amos_page +
530 XPC_ENGAGED_PARTITIONS_AMO_SN2;
531
532 /* our partition's amo variable ANDed with partid mask */
533 return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
534 BIT(partid)) != 0;
535}
536
537static int
538xpc_any_partition_engaged_sn2(void)
539{
540 struct amo *amo = xpc_vars_sn2->amos_page +
541 XPC_ENGAGED_PARTITIONS_AMO_SN2;
542
543 /* our partition's amo variable */
544 return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0;
545}
546
547/* original protection values for each node */
548static u64 xpc_prot_vec_sn2[MAX_NUMNODES];
549
550/*
551 * Change protections to allow amo operations on non-Shub 1.1 systems.
552 */
553static enum xp_retval
554xpc_allow_amo_ops_sn2(struct amo *amos_page)
555{
556 u64 nasid_array = 0;
557 int ret;
558
559 /*
560 * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST
561 * collides with memory operations. On those systems we call
562 * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead.
563 */
564 if (!enable_shub_wars_1_1()) {
565 ret = sn_change_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE,
566 SN_MEMPROT_ACCESS_CLASS_1,
567 &nasid_array);
568 if (ret != 0)
569 return xpSalError;
570 }
571 return xpSuccess;
572}
573
574/*
575 * Change protections to allow amo operations on Shub 1.1 systems.
576 */
577static void
578xpc_allow_amo_ops_shub_wars_1_1_sn2(void)
579{
580 int node;
581 int nasid;
582
583 if (!enable_shub_wars_1_1())
584 return;
585
586 for_each_online_node(node) {
587 nasid = cnodeid_to_nasid(node);
588 /* save current protection values */
589 xpc_prot_vec_sn2[node] =
590 (u64)HUB_L((u64 *)GLOBAL_MMR_ADDR(nasid,
591 SH1_MD_DQLP_MMR_DIR_PRIVEC0));
592 /* open up everything */
593 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
594 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
595 -1UL);
596 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
597 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
598 -1UL);
599 }
600}
601
602static enum xp_retval
603xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa,
604 size_t *len)
605{
606 s64 status;
607 enum xp_retval ret;
608
609 status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len);
610 if (status == SALRET_OK)
611 ret = xpSuccess;
612 else if (status == SALRET_MORE_PASSES)
613 ret = xpNeedMoreInfo;
614 else
615 ret = xpSalError;
616
617 return ret;
618}
619
620
621static int
622xpc_setup_rsvd_page_sn_sn2(struct xpc_rsvd_page *rp)
623{
624 struct amo *amos_page;
625 int i;
626 int ret;
627
628 xpc_vars_sn2 = XPC_RP_VARS(rp);
629
630 rp->sn.vars_pa = xp_pa(xpc_vars_sn2);
631
632 /* vars_part array follows immediately after vars */
633 xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
634 XPC_RP_VARS_SIZE);
635
636 /*
637 * Before clearing xpc_vars_sn2, see if a page of amos had been
638 * previously allocated. If not we'll need to allocate one and set
639 * permissions so that cross-partition amos are allowed.
640 *
641 * The allocated amo page needs MCA reporting to remain disabled after
642 * XPC has unloaded. To make this work, we keep a copy of the pointer
643 * to this page (i.e., amos_page) in the struct xpc_vars_sn2 structure,
644 * which is pointed to by the reserved page, and re-use that saved copy
645 * on subsequent loads of XPC. This amo page is never freed, and its
646 * memory protections are never restricted.
647 */
648 amos_page = xpc_vars_sn2->amos_page;
649 if (amos_page == NULL) {
650 amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1));
651 if (amos_page == NULL) {
652 dev_err(xpc_part, "can't allocate page of amos\n");
653 return -ENOMEM;
654 }
655
656 /*
657 * Open up amo-R/W to cpu. This is done on Shub 1.1 systems
658 * when xpc_allow_amo_ops_shub_wars_1_1_sn2() is called.
659 */
660 ret = xpc_allow_amo_ops_sn2(amos_page);
661 if (ret != xpSuccess) {
662 dev_err(xpc_part, "can't allow amo operations\n");
663 uncached_free_page(__IA64_UNCACHED_OFFSET |
664 TO_PHYS((u64)amos_page), 1);
665 return -EPERM;
666 }
667 }
668
669 /* clear xpc_vars_sn2 */
670 memset(xpc_vars_sn2, 0, sizeof(struct xpc_vars_sn2));
671
672 xpc_vars_sn2->version = XPC_V_VERSION;
673 xpc_vars_sn2->activate_IRQ_nasid = cpuid_to_nasid(0);
674 xpc_vars_sn2->activate_IRQ_phys_cpuid = cpu_physical_id(0);
675 xpc_vars_sn2->vars_part_pa = xp_pa(xpc_vars_part_sn2);
676 xpc_vars_sn2->amos_page_pa = ia64_tpa((u64)amos_page);
677 xpc_vars_sn2->amos_page = amos_page; /* save for next load of XPC */
678
679 /* clear xpc_vars_part_sn2 */
680 memset((u64 *)xpc_vars_part_sn2, 0, sizeof(struct xpc_vars_part_sn2) *
681 XP_MAX_NPARTITIONS_SN2);
682
683 /* initialize the activate IRQ related amo variables */
684 for (i = 0; i < xpc_nasid_mask_nlongs; i++)
685 (void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS_SN2 + i);
686
687 /* initialize the engaged remote partitions related amo variables */
688 (void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO_SN2);
689 (void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO_SN2);
690
691 return 0;
692}
693
694static void
695xpc_increment_heartbeat_sn2(void)
696{
697 xpc_vars_sn2->heartbeat++;
698}
699
700static void
701xpc_offline_heartbeat_sn2(void)
702{
703 xpc_increment_heartbeat_sn2();
704 xpc_vars_sn2->heartbeat_offline = 1;
705}
706
707static void
708xpc_online_heartbeat_sn2(void)
709{
710 xpc_increment_heartbeat_sn2();
711 xpc_vars_sn2->heartbeat_offline = 0;
712}
713
714static void
715xpc_heartbeat_init_sn2(void)
716{
717 DBUG_ON(xpc_vars_sn2 == NULL);
718
719 bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
720 xpc_heartbeating_to_mask = &xpc_vars_sn2->heartbeating_to_mask[0];
721 xpc_online_heartbeat_sn2();
722}
723
724static void
725xpc_heartbeat_exit_sn2(void)
726{
727 xpc_offline_heartbeat_sn2();
728}
729
730static enum xp_retval
731xpc_get_remote_heartbeat_sn2(struct xpc_partition *part)
732{
733 struct xpc_vars_sn2 *remote_vars;
734 enum xp_retval ret;
735
736 remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
737
738 /* pull the remote vars structure that contains the heartbeat */
739 ret = xp_remote_memcpy(xp_pa(remote_vars),
740 part->sn.sn2.remote_vars_pa,
741 XPC_RP_VARS_SIZE);
742 if (ret != xpSuccess)
743 return ret;
744
745 dev_dbg(xpc_part, "partid=%d, heartbeat=%ld, last_heartbeat=%ld, "
746 "heartbeat_offline=%ld, HB_mask[0]=0x%lx\n", XPC_PARTID(part),
747 remote_vars->heartbeat, part->last_heartbeat,
748 remote_vars->heartbeat_offline,
749 remote_vars->heartbeating_to_mask[0]);
750
751 if ((remote_vars->heartbeat == part->last_heartbeat &&
752 remote_vars->heartbeat_offline == 0) ||
753 !xpc_hb_allowed(sn_partition_id,
754 &remote_vars->heartbeating_to_mask)) {
755 ret = xpNoHeartbeat;
756 } else {
757 part->last_heartbeat = remote_vars->heartbeat;
758 }
759
760 return ret;
761}
762
763/*
764 * Get a copy of the remote partition's XPC variables from the reserved page.
765 *
766 * remote_vars points to a buffer that is cacheline aligned for BTE copies and
767 * assumed to be of size XPC_RP_VARS_SIZE.
768 */
769static enum xp_retval
770xpc_get_remote_vars_sn2(unsigned long remote_vars_pa,
771 struct xpc_vars_sn2 *remote_vars)
772{
773 enum xp_retval ret;
774
775 if (remote_vars_pa == 0)
776 return xpVarsNotSet;
777
778 /* pull over the cross partition variables */
779 ret = xp_remote_memcpy(xp_pa(remote_vars), remote_vars_pa,
780 XPC_RP_VARS_SIZE);
781 if (ret != xpSuccess)
782 return ret;
783
784 if (XPC_VERSION_MAJOR(remote_vars->version) !=
785 XPC_VERSION_MAJOR(XPC_V_VERSION)) {
786 return xpBadVersion;
787 }
788
789 return xpSuccess;
790}
791
792static void
793xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp,
794 unsigned long remote_rp_pa, int nasid)
795{
796 xpc_send_local_activate_IRQ_sn2(nasid);
797}
798
799static void
800xpc_request_partition_reactivation_sn2(struct xpc_partition *part)
801{
802 xpc_send_local_activate_IRQ_sn2(part->sn.sn2.activate_IRQ_nasid);
803}
804
805static void
806xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
807{
808 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
809 unsigned long irq_flags;
810 struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
811 (XPC_DEACTIVATE_REQUEST_AMO_SN2 *
812 sizeof(struct amo)));
813
814 local_irq_save(irq_flags);
815
816 /* set bit corresponding to our partid in remote partition's amo */
817 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
818 BIT(sn_partition_id));
819
820 /*
821 * We must always use the nofault function regardless of whether we
822 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
823 * didn't, we'd never know that the other partition is down and would
824 * keep sending IRQs and amos to it until the heartbeat times out.
825 */
826 (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
827 variable),
828 xp_nofault_PIOR_target));
829
830 local_irq_restore(irq_flags);
831
832 /*
833 * Send activate IRQ to get other side to see that we've set our
834 * bit in their deactivate request amo.
835 */
836 xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
837 cnodeid_to_nasid(0),
838 part_sn2->activate_IRQ_nasid,
839 part_sn2->activate_IRQ_phys_cpuid);
840}
841
842static void
843xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
844{
845 unsigned long irq_flags;
846 struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
847 (XPC_DEACTIVATE_REQUEST_AMO_SN2 *
848 sizeof(struct amo)));
849
850 local_irq_save(irq_flags);
851
852 /* clear bit corresponding to our partid in remote partition's amo */
853 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
854 ~BIT(sn_partition_id));
855
856 /*
857 * We must always use the nofault function regardless of whether we
858 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
859 * didn't, we'd never know that the other partition is down and would
860 * keep sending IRQs and amos to it until the heartbeat times out.
861 */
862 (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
863 variable),
864 xp_nofault_PIOR_target));
865
866 local_irq_restore(irq_flags);
867}
868
869static int
870xpc_partition_deactivation_requested_sn2(short partid)
871{
872 struct amo *amo = xpc_vars_sn2->amos_page +
873 XPC_DEACTIVATE_REQUEST_AMO_SN2;
874
875 /* our partition's amo variable ANDed with partid mask */
876 return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
877 BIT(partid)) != 0;
878}
879
880/*
881 * Update the remote partition's info.
882 */
883static void
884xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
885 unsigned long *remote_rp_ts_jiffies,
886 unsigned long remote_rp_pa,
887 unsigned long remote_vars_pa,
888 struct xpc_vars_sn2 *remote_vars)
889{
890 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
891
892 part->remote_rp_version = remote_rp_version;
893 dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n",
894 part->remote_rp_version);
895
896 part->remote_rp_ts_jiffies = *remote_rp_ts_jiffies;
897 dev_dbg(xpc_part, " remote_rp_ts_jiffies = 0x%016lx\n",
898 part->remote_rp_ts_jiffies);
899
900 part->remote_rp_pa = remote_rp_pa;
901 dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
902
903 part_sn2->remote_vars_pa = remote_vars_pa;
904 dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
905 part_sn2->remote_vars_pa);
906
907 part->last_heartbeat = remote_vars->heartbeat;
908 dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
909 part->last_heartbeat);
910
911 part_sn2->remote_vars_part_pa = remote_vars->vars_part_pa;
912 dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
913 part_sn2->remote_vars_part_pa);
914
915 part_sn2->activate_IRQ_nasid = remote_vars->activate_IRQ_nasid;
916 dev_dbg(xpc_part, " activate_IRQ_nasid = 0x%x\n",
917 part_sn2->activate_IRQ_nasid);
918
919 part_sn2->activate_IRQ_phys_cpuid =
920 remote_vars->activate_IRQ_phys_cpuid;
921 dev_dbg(xpc_part, " activate_IRQ_phys_cpuid = 0x%x\n",
922 part_sn2->activate_IRQ_phys_cpuid);
923
924 part_sn2->remote_amos_page_pa = remote_vars->amos_page_pa;
925 dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
926 part_sn2->remote_amos_page_pa);
927
928 part_sn2->remote_vars_version = remote_vars->version;
929 dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
930 part_sn2->remote_vars_version);
931}
932
933/*
934 * Prior code has determined the nasid which generated a activate IRQ.
935 * Inspect that nasid to determine if its partition needs to be activated
936 * or deactivated.
937 *
938 * A partition is considered "awaiting activation" if our partition
939 * flags indicate it is not active and it has a heartbeat. A
940 * partition is considered "awaiting deactivation" if our partition
941 * flags indicate it is active but it has no heartbeat or it is not
942 * sending its heartbeat to us.
943 *
944 * To determine the heartbeat, the remote nasid must have a properly
945 * initialized reserved page.
946 */
947static void
948xpc_identify_activate_IRQ_req_sn2(int nasid)
949{
950 struct xpc_rsvd_page *remote_rp;
951 struct xpc_vars_sn2 *remote_vars;
952 unsigned long remote_rp_pa;
953 unsigned long remote_vars_pa;
954 int remote_rp_version;
955 int reactivate = 0;
956 unsigned long remote_rp_ts_jiffies = 0;
957 short partid;
958 struct xpc_partition *part;
959 struct xpc_partition_sn2 *part_sn2;
960 enum xp_retval ret;
961
962 /* pull over the reserved page structure */
963
964 remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer_sn2;
965
966 ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
967 if (ret != xpSuccess) {
968 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
969 "which sent interrupt, reason=%d\n", nasid, ret);
970 return;
971 }
972
973 remote_vars_pa = remote_rp->sn.vars_pa;
974 remote_rp_version = remote_rp->version;
975 remote_rp_ts_jiffies = remote_rp->ts_jiffies;
976
977 partid = remote_rp->SAL_partid;
978 part = &xpc_partitions[partid];
979 part_sn2 = &part->sn.sn2;
980
981 /* pull over the cross partition variables */
982
983 remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
984
985 ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars);
986 if (ret != xpSuccess) {
987 dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
988 "which sent interrupt, reason=%d\n", nasid, ret);
989
990 XPC_DEACTIVATE_PARTITION(part, ret);
991 return;
992 }
993
994 part->activate_IRQ_rcvd++;
995
996 dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
997 "%ld:0x%lx\n", (int)nasid, (int)partid, part->activate_IRQ_rcvd,
998 remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]);
999
1000 if (xpc_partition_disengaged(part) &&
1001 part->act_state == XPC_P_AS_INACTIVE) {
1002
1003 xpc_update_partition_info_sn2(part, remote_rp_version,
1004 &remote_rp_ts_jiffies,
1005 remote_rp_pa, remote_vars_pa,
1006 remote_vars);
1007
1008 if (xpc_partition_deactivation_requested_sn2(partid)) {
1009 /*
1010 * Other side is waiting on us to deactivate even though
1011 * we already have.
1012 */
1013 return;
1014 }
1015
1016 xpc_activate_partition(part);
1017 return;
1018 }
1019
1020 DBUG_ON(part->remote_rp_version == 0);
1021 DBUG_ON(part_sn2->remote_vars_version == 0);
1022
1023 if (remote_rp_ts_jiffies != part->remote_rp_ts_jiffies) {
1024
1025 /* the other side rebooted */
1026
1027 DBUG_ON(xpc_partition_engaged_sn2(partid));
1028 DBUG_ON(xpc_partition_deactivation_requested_sn2(partid));
1029
1030 xpc_update_partition_info_sn2(part, remote_rp_version,
1031 &remote_rp_ts_jiffies,
1032 remote_rp_pa, remote_vars_pa,
1033 remote_vars);
1034 reactivate = 1;
1035 }
1036
1037 if (part->disengage_timeout > 0 && !xpc_partition_disengaged(part)) {
1038 /* still waiting on other side to disengage from us */
1039 return;
1040 }
1041
1042 if (reactivate)
1043 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
1044 else if (xpc_partition_deactivation_requested_sn2(partid))
1045 XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
1046}
1047
1048/*
1049 * Loop through the activation amo variables and process any bits
1050 * which are set. Each bit indicates a nasid sending a partition
1051 * activation or deactivation request.
1052 *
1053 * Return #of IRQs detected.
1054 */
1055int
1056xpc_identify_activate_IRQ_sender_sn2(void)
1057{
1058 int l;
1059 int b;
1060 unsigned long nasid_mask_long;
1061 u64 nasid; /* remote nasid */
1062 int n_IRQs_detected = 0;
1063 struct amo *act_amos;
1064
1065 act_amos = xpc_vars_sn2->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2;
1066
1067 /* scan through activate amo variables looking for non-zero entries */
1068 for (l = 0; l < xpc_nasid_mask_nlongs; l++) {
1069
1070 if (xpc_exiting)
1071 break;
1072
1073 nasid_mask_long = xpc_receive_IRQ_amo_sn2(&act_amos[l]);
1074
1075 b = find_first_bit(&nasid_mask_long, BITS_PER_LONG);
1076 if (b >= BITS_PER_LONG) {
1077 /* no IRQs from nasids in this amo variable */
1078 continue;
1079 }
1080
1081 dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", l,
1082 nasid_mask_long);
1083
1084 /*
1085 * If this nasid has been added to the machine since
1086 * our partition was reset, this will retain the
1087 * remote nasid in our reserved pages machine mask.
1088 * This is used in the event of module reload.
1089 */
1090 xpc_mach_nasids[l] |= nasid_mask_long;
1091
1092 /* locate the nasid(s) which sent interrupts */
1093
1094 do {
1095 n_IRQs_detected++;
1096 nasid = (l * BITS_PER_LONG + b) * 2;
1097 dev_dbg(xpc_part, "interrupt from nasid %ld\n", nasid);
1098 xpc_identify_activate_IRQ_req_sn2(nasid);
1099
1100 b = find_next_bit(&nasid_mask_long, BITS_PER_LONG,
1101 b + 1);
1102 } while (b < BITS_PER_LONG);
1103 }
1104 return n_IRQs_detected;
1105}
1106
1107static void
1108xpc_process_activate_IRQ_rcvd_sn2(void)
1109{
1110 unsigned long irq_flags;
1111 int n_IRQs_expected;
1112 int n_IRQs_detected;
1113
1114 DBUG_ON(xpc_activate_IRQ_rcvd == 0);
1115
1116 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1117 n_IRQs_expected = xpc_activate_IRQ_rcvd;
1118 xpc_activate_IRQ_rcvd = 0;
1119 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1120
1121 n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2();
1122 if (n_IRQs_detected < n_IRQs_expected) {
1123 /* retry once to help avoid missing amo */
1124 (void)xpc_identify_activate_IRQ_sender_sn2();
1125 }
1126}
1127
1128/*
1129 * Setup the channel structures that are sn2 specific.
1130 */
1131static enum xp_retval
1132xpc_setup_ch_structures_sn_sn2(struct xpc_partition *part)
1133{
1134 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
1135 struct xpc_channel_sn2 *ch_sn2;
1136 enum xp_retval retval;
1137 int ret;
1138 int cpuid;
1139 int ch_number;
1140 struct timer_list *timer;
1141 short partid = XPC_PARTID(part);
1142
1143 /* allocate all the required GET/PUT values */
1144
1145 part_sn2->local_GPs =
1146 xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL,
1147 &part_sn2->local_GPs_base);
1148 if (part_sn2->local_GPs == NULL) {
1149 dev_err(xpc_chan, "can't get memory for local get/put "
1150 "values\n");
1151 return xpNoMemory;
1152 }
1153
1154 part_sn2->remote_GPs =
1155 xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL,
1156 &part_sn2->remote_GPs_base);
1157 if (part_sn2->remote_GPs == NULL) {
1158 dev_err(xpc_chan, "can't get memory for remote get/put "
1159 "values\n");
1160 retval = xpNoMemory;
1161 goto out_1;
1162 }
1163
1164 part_sn2->remote_GPs_pa = 0;
1165
1166 /* allocate all the required open and close args */
1167
1168 part_sn2->local_openclose_args =
1169 xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
1170 GFP_KERNEL, &part_sn2->
1171 local_openclose_args_base);
1172 if (part_sn2->local_openclose_args == NULL) {
1173 dev_err(xpc_chan, "can't get memory for local connect args\n");
1174 retval = xpNoMemory;
1175 goto out_2;
1176 }
1177
1178 part_sn2->remote_openclose_args_pa = 0;
1179
1180 part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid);
1181
1182 part_sn2->notify_IRQ_nasid = 0;
1183 part_sn2->notify_IRQ_phys_cpuid = 0;
1184 part_sn2->remote_chctl_amo_va = NULL;
1185
1186 sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid);
1187 ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2,
1188 IRQF_SHARED, part_sn2->notify_IRQ_owner,
1189 (void *)(u64)partid);
1190 if (ret != 0) {
1191 dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
1192 "errno=%d\n", -ret);
1193 retval = xpLackOfResources;
1194 goto out_3;
1195 }
1196
1197 /* Setup a timer to check for dropped notify IRQs */
1198 timer = &part_sn2->dropped_notify_IRQ_timer;
1199 init_timer(timer);
1200 timer->function =
1201 (void (*)(unsigned long))xpc_check_for_dropped_notify_IRQ_sn2;
1202 timer->data = (unsigned long)part;
1203 timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
1204 add_timer(timer);
1205
1206 for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
1207 ch_sn2 = &part->channels[ch_number].sn.sn2;
1208
1209 ch_sn2->local_GP = &part_sn2->local_GPs[ch_number];
1210 ch_sn2->local_openclose_args =
1211 &part_sn2->local_openclose_args[ch_number];
1212
1213 mutex_init(&ch_sn2->msg_to_pull_mutex);
1214 }
1215
1216 /*
1217 * Setup the per partition specific variables required by the
1218 * remote partition to establish channel connections with us.
1219 *
1220 * The setting of the magic # indicates that these per partition
1221 * specific variables are ready to be used.
1222 */
1223 xpc_vars_part_sn2[partid].GPs_pa = xp_pa(part_sn2->local_GPs);
1224 xpc_vars_part_sn2[partid].openclose_args_pa =
1225 xp_pa(part_sn2->local_openclose_args);
1226 xpc_vars_part_sn2[partid].chctl_amo_pa =
1227 xp_pa(part_sn2->local_chctl_amo_va);
1228 cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */
1229 xpc_vars_part_sn2[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid);
1230 xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid =
1231 cpu_physical_id(cpuid);
1232 xpc_vars_part_sn2[partid].nchannels = part->nchannels;
1233 xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1_SN2;
1234
1235 return xpSuccess;
1236
1237 /* setup of ch structures failed */
1238out_3:
1239 kfree(part_sn2->local_openclose_args_base);
1240 part_sn2->local_openclose_args = NULL;
1241out_2:
1242 kfree(part_sn2->remote_GPs_base);
1243 part_sn2->remote_GPs = NULL;
1244out_1:
1245 kfree(part_sn2->local_GPs_base);
1246 part_sn2->local_GPs = NULL;
1247 return retval;
1248}
1249
1250/*
1251 * Teardown the channel structures that are sn2 specific.
1252 */
1253static void
1254xpc_teardown_ch_structures_sn_sn2(struct xpc_partition *part)
1255{
1256 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
1257 short partid = XPC_PARTID(part);
1258
1259 /*
1260 * Indicate that the variables specific to the remote partition are no
1261 * longer available for its use.
1262 */
1263 xpc_vars_part_sn2[partid].magic = 0;
1264
1265 /* in case we've still got outstanding timers registered... */
1266 del_timer_sync(&part_sn2->dropped_notify_IRQ_timer);
1267 free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
1268
1269 kfree(part_sn2->local_openclose_args_base);
1270 part_sn2->local_openclose_args = NULL;
1271 kfree(part_sn2->remote_GPs_base);
1272 part_sn2->remote_GPs = NULL;
1273 kfree(part_sn2->local_GPs_base);
1274 part_sn2->local_GPs = NULL;
1275 part_sn2->local_chctl_amo_va = NULL;
1276}
1277
1278/*
1279 * Create a wrapper that hides the underlying mechanism for pulling a cacheline
1280 * (or multiple cachelines) from a remote partition.
1281 *
1282 * src_pa must be a cacheline aligned physical address on the remote partition.
1283 * dst must be a cacheline aligned virtual address on this partition.
1284 * cnt must be cacheline sized
1285 */
1286/* ??? Replace this function by call to xp_remote_memcpy() or bte_copy()? */
1287static enum xp_retval
1288xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst,
1289 const unsigned long src_pa, size_t cnt)
1290{
1291 enum xp_retval ret;
1292
1293 DBUG_ON(src_pa != L1_CACHE_ALIGN(src_pa));
1294 DBUG_ON((unsigned long)dst != L1_CACHE_ALIGN((unsigned long)dst));
1295 DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
1296
1297 if (part->act_state == XPC_P_AS_DEACTIVATING)
1298 return part->reason;
1299
1300 ret = xp_remote_memcpy(xp_pa(dst), src_pa, cnt);
1301 if (ret != xpSuccess) {
1302 dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed,"
1303 " ret=%d\n", XPC_PARTID(part), ret);
1304 }
1305 return ret;
1306}
1307
1308/*
1309 * Pull the remote per partition specific variables from the specified
1310 * partition.
1311 */
1312static enum xp_retval
1313xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
1314{
1315 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
1316 u8 buffer[L1_CACHE_BYTES * 2];
1317 struct xpc_vars_part_sn2 *pulled_entry_cacheline =
1318 (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer);
1319 struct xpc_vars_part_sn2 *pulled_entry;
1320 unsigned long remote_entry_cacheline_pa;
1321 unsigned long remote_entry_pa;
1322 short partid = XPC_PARTID(part);
1323 enum xp_retval ret;
1324
1325 /* pull the cacheline that contains the variables we're interested in */
1326
1327 DBUG_ON(part_sn2->remote_vars_part_pa !=
1328 L1_CACHE_ALIGN(part_sn2->remote_vars_part_pa));
1329 DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2);
1330
1331 remote_entry_pa = part_sn2->remote_vars_part_pa +
1332 sn_partition_id * sizeof(struct xpc_vars_part_sn2);
1333
1334 remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
1335
1336 pulled_entry = (struct xpc_vars_part_sn2 *)((u64)pulled_entry_cacheline
1337 + (remote_entry_pa &
1338 (L1_CACHE_BYTES - 1)));
1339
1340 ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline,
1341 remote_entry_cacheline_pa,
1342 L1_CACHE_BYTES);
1343 if (ret != xpSuccess) {
1344 dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
1345 "partition %d, ret=%d\n", partid, ret);
1346 return ret;
1347 }
1348
1349 /* see if they've been set up yet */
1350
1351 if (pulled_entry->magic != XPC_VP_MAGIC1_SN2 &&
1352 pulled_entry->magic != XPC_VP_MAGIC2_SN2) {
1353
1354 if (pulled_entry->magic != 0) {
1355 dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
1356 "partition %d has bad magic value (=0x%lx)\n",
1357 partid, sn_partition_id, pulled_entry->magic);
1358 return xpBadMagic;
1359 }
1360
1361 /* they've not been initialized yet */
1362 return xpRetry;
1363 }
1364
1365 if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1_SN2) {
1366
1367 /* validate the variables */
1368
1369 if (pulled_entry->GPs_pa == 0 ||
1370 pulled_entry->openclose_args_pa == 0 ||
1371 pulled_entry->chctl_amo_pa == 0) {
1372
1373 dev_err(xpc_chan, "partition %d's XPC vars_part for "
1374 "partition %d are not valid\n", partid,
1375 sn_partition_id);
1376 return xpInvalidAddress;
1377 }
1378
1379 /* the variables we imported look to be valid */
1380
1381 part_sn2->remote_GPs_pa = pulled_entry->GPs_pa;
1382 part_sn2->remote_openclose_args_pa =
1383 pulled_entry->openclose_args_pa;
1384 part_sn2->remote_chctl_amo_va =
1385 (struct amo *)__va(pulled_entry->chctl_amo_pa);
1386 part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid;
1387 part_sn2->notify_IRQ_phys_cpuid =
1388 pulled_entry->notify_IRQ_phys_cpuid;
1389
1390 if (part->nchannels > pulled_entry->nchannels)
1391 part->nchannels = pulled_entry->nchannels;
1392
1393 /* let the other side know that we've pulled their variables */
1394
1395 xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2_SN2;
1396 }
1397
1398 if (pulled_entry->magic == XPC_VP_MAGIC1_SN2)
1399 return xpRetry;
1400
1401 return xpSuccess;
1402}
1403
1404/*
1405 * Establish first contact with the remote partititon. This involves pulling
1406 * the XPC per partition variables from the remote partition and waiting for
1407 * the remote partition to pull ours.
1408 */
1409static enum xp_retval
1410xpc_make_first_contact_sn2(struct xpc_partition *part)
1411{
1412 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
1413 enum xp_retval ret;
1414
1415 /*
1416 * Register the remote partition's amos with SAL so it can handle
1417 * and cleanup errors within that address range should the remote
1418 * partition go down. We don't unregister this range because it is
1419 * difficult to tell when outstanding writes to the remote partition
1420 * are finished and thus when it is safe to unregister. This should
1421 * not result in wasted space in the SAL xp_addr_region table because
1422 * we should get the same page for remote_amos_page_pa after module
1423 * reloads and system reboots.
1424 */
1425 if (sn_register_xp_addr_region(part_sn2->remote_amos_page_pa,
1426 PAGE_SIZE, 1) < 0) {
1427 dev_warn(xpc_part, "xpc_activating(%d) failed to register "
1428 "xp_addr region\n", XPC_PARTID(part));
1429
1430 ret = xpPhysAddrRegFailed;
1431 XPC_DEACTIVATE_PARTITION(part, ret);
1432 return ret;
1433 }
1434
1435 /*
1436 * Send activate IRQ to get other side to activate if they've not
1437 * already begun to do so.
1438 */
1439 xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
1440 cnodeid_to_nasid(0),
1441 part_sn2->activate_IRQ_nasid,
1442 part_sn2->activate_IRQ_phys_cpuid);
1443
1444 while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) {
1445 if (ret != xpRetry) {
1446 XPC_DEACTIVATE_PARTITION(part, ret);
1447 return ret;
1448 }
1449
1450 dev_dbg(xpc_part, "waiting to make first contact with "
1451 "partition %d\n", XPC_PARTID(part));
1452
1453 /* wait a 1/4 of a second or so */
1454 (void)msleep_interruptible(250);
1455
1456 if (part->act_state == XPC_P_AS_DEACTIVATING)
1457 return part->reason;
1458 }
1459
1460 return xpSuccess;
1461}
1462
1463/*
1464 * Get the chctl flags and pull the openclose args and/or remote GPs as needed.
1465 */
1466static u64
1467xpc_get_chctl_all_flags_sn2(struct xpc_partition *part)
1468{
1469 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
1470 unsigned long irq_flags;
1471 union xpc_channel_ctl_flags chctl;
1472 enum xp_retval ret;
1473
1474 /*
1475 * See if there are any chctl flags to be handled.
1476 */
1477
1478 spin_lock_irqsave(&part->chctl_lock, irq_flags);
1479 chctl = part->chctl;
1480 if (chctl.all_flags != 0)
1481 part->chctl.all_flags = 0;
1482
1483 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
1484
1485 if (xpc_any_openclose_chctl_flags_set(&chctl)) {
1486 ret = xpc_pull_remote_cachelines_sn2(part, part->
1487 remote_openclose_args,
1488 part_sn2->
1489 remote_openclose_args_pa,
1490 XPC_OPENCLOSE_ARGS_SIZE);
1491 if (ret != xpSuccess) {
1492 XPC_DEACTIVATE_PARTITION(part, ret);
1493
1494 dev_dbg(xpc_chan, "failed to pull openclose args from "
1495 "partition %d, ret=%d\n", XPC_PARTID(part),
1496 ret);
1497
1498 /* don't bother processing chctl flags anymore */
1499 chctl.all_flags = 0;
1500 }
1501 }
1502
1503 if (xpc_any_msg_chctl_flags_set(&chctl)) {
1504 ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs,
1505 part_sn2->remote_GPs_pa,
1506 XPC_GP_SIZE);
1507 if (ret != xpSuccess) {
1508 XPC_DEACTIVATE_PARTITION(part, ret);
1509
1510 dev_dbg(xpc_chan, "failed to pull GPs from partition "
1511 "%d, ret=%d\n", XPC_PARTID(part), ret);
1512
1513 /* don't bother processing chctl flags anymore */
1514 chctl.all_flags = 0;
1515 }
1516 }
1517
1518 return chctl.all_flags;
1519}
1520
1521/*
1522 * Allocate the local message queue and the notify queue.
1523 */
1524static enum xp_retval
1525xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch)
1526{
1527 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1528 unsigned long irq_flags;
1529 int nentries;
1530 size_t nbytes;
1531
1532 for (nentries = ch->local_nentries; nentries > 0; nentries--) {
1533
1534 nbytes = nentries * ch->entry_size;
1535 ch_sn2->local_msgqueue =
1536 xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL,
1537 &ch_sn2->local_msgqueue_base);
1538 if (ch_sn2->local_msgqueue == NULL)
1539 continue;
1540
1541 nbytes = nentries * sizeof(struct xpc_notify_sn2);
1542 ch_sn2->notify_queue = kzalloc(nbytes, GFP_KERNEL);
1543 if (ch_sn2->notify_queue == NULL) {
1544 kfree(ch_sn2->local_msgqueue_base);
1545 ch_sn2->local_msgqueue = NULL;
1546 continue;
1547 }
1548
1549 spin_lock_irqsave(&ch->lock, irq_flags);
1550 if (nentries < ch->local_nentries) {
1551 dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
1552 "partid=%d, channel=%d\n", nentries,
1553 ch->local_nentries, ch->partid, ch->number);
1554
1555 ch->local_nentries = nentries;
1556 }
1557 spin_unlock_irqrestore(&ch->lock, irq_flags);
1558 return xpSuccess;
1559 }
1560
1561 dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
1562 "queue, partid=%d, channel=%d\n", ch->partid, ch->number);
1563 return xpNoMemory;
1564}
1565
1566/*
1567 * Allocate the cached remote message queue.
1568 */
1569static enum xp_retval
1570xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch)
1571{
1572 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1573 unsigned long irq_flags;
1574 int nentries;
1575 size_t nbytes;
1576
1577 DBUG_ON(ch->remote_nentries <= 0);
1578
1579 for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
1580
1581 nbytes = nentries * ch->entry_size;
1582 ch_sn2->remote_msgqueue =
1583 xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch_sn2->
1584 remote_msgqueue_base);
1585 if (ch_sn2->remote_msgqueue == NULL)
1586 continue;
1587
1588 spin_lock_irqsave(&ch->lock, irq_flags);
1589 if (nentries < ch->remote_nentries) {
1590 dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
1591 "partid=%d, channel=%d\n", nentries,
1592 ch->remote_nentries, ch->partid, ch->number);
1593
1594 ch->remote_nentries = nentries;
1595 }
1596 spin_unlock_irqrestore(&ch->lock, irq_flags);
1597 return xpSuccess;
1598 }
1599
1600 dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
1601 "partid=%d, channel=%d\n", ch->partid, ch->number);
1602 return xpNoMemory;
1603}
1604
1605/*
1606 * Allocate message queues and other stuff associated with a channel.
1607 *
1608 * Note: Assumes all of the channel sizes are filled in.
1609 */
1610static enum xp_retval
1611xpc_setup_msg_structures_sn2(struct xpc_channel *ch)
1612{
1613 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1614 enum xp_retval ret;
1615
1616 DBUG_ON(ch->flags & XPC_C_SETUP);
1617
1618 ret = xpc_allocate_local_msgqueue_sn2(ch);
1619 if (ret == xpSuccess) {
1620
1621 ret = xpc_allocate_remote_msgqueue_sn2(ch);
1622 if (ret != xpSuccess) {
1623 kfree(ch_sn2->local_msgqueue_base);
1624 ch_sn2->local_msgqueue = NULL;
1625 kfree(ch_sn2->notify_queue);
1626 ch_sn2->notify_queue = NULL;
1627 }
1628 }
1629 return ret;
1630}
1631
1632/*
1633 * Free up message queues and other stuff that were allocated for the specified
1634 * channel.
1635 */
1636static void
1637xpc_teardown_msg_structures_sn2(struct xpc_channel *ch)
1638{
1639 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1640
1641 DBUG_ON(!spin_is_locked(&ch->lock));
1642
1643 ch_sn2->remote_msgqueue_pa = 0;
1644
1645 ch_sn2->local_GP->get = 0;
1646 ch_sn2->local_GP->put = 0;
1647 ch_sn2->remote_GP.get = 0;
1648 ch_sn2->remote_GP.put = 0;
1649 ch_sn2->w_local_GP.get = 0;
1650 ch_sn2->w_local_GP.put = 0;
1651 ch_sn2->w_remote_GP.get = 0;
1652 ch_sn2->w_remote_GP.put = 0;
1653 ch_sn2->next_msg_to_pull = 0;
1654
1655 if (ch->flags & XPC_C_SETUP) {
1656 dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
1657 ch->flags, ch->partid, ch->number);
1658
1659 kfree(ch_sn2->local_msgqueue_base);
1660 ch_sn2->local_msgqueue = NULL;
1661 kfree(ch_sn2->remote_msgqueue_base);
1662 ch_sn2->remote_msgqueue = NULL;
1663 kfree(ch_sn2->notify_queue);
1664 ch_sn2->notify_queue = NULL;
1665 }
1666}
1667
1668/*
1669 * Notify those who wanted to be notified upon delivery of their message.
1670 */
1671static void
1672xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put)
1673{
1674 struct xpc_notify_sn2 *notify;
1675 u8 notify_type;
1676 s64 get = ch->sn.sn2.w_remote_GP.get - 1;
1677
1678 while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
1679
1680 notify = &ch->sn.sn2.notify_queue[get % ch->local_nentries];
1681
1682 /*
1683 * See if the notify entry indicates it was associated with
1684 * a message who's sender wants to be notified. It is possible
1685 * that it is, but someone else is doing or has done the
1686 * notification.
1687 */
1688 notify_type = notify->type;
1689 if (notify_type == 0 ||
1690 cmpxchg(&notify->type, notify_type, 0) != notify_type) {
1691 continue;
1692 }
1693
1694 DBUG_ON(notify_type != XPC_N_CALL);
1695
1696 atomic_dec(&ch->n_to_notify);
1697
1698 if (notify->func != NULL) {
1699 dev_dbg(xpc_chan, "notify->func() called, notify=0x%p "
1700 "msg_number=%ld partid=%d channel=%d\n",
1701 (void *)notify, get, ch->partid, ch->number);
1702
1703 notify->func(reason, ch->partid, ch->number,
1704 notify->key);
1705
1706 dev_dbg(xpc_chan, "notify->func() returned, notify=0x%p"
1707 " msg_number=%ld partid=%d channel=%d\n",
1708 (void *)notify, get, ch->partid, ch->number);
1709 }
1710 }
1711}
1712
1713static void
1714xpc_notify_senders_of_disconnect_sn2(struct xpc_channel *ch)
1715{
1716 xpc_notify_senders_sn2(ch, ch->reason, ch->sn.sn2.w_local_GP.put);
1717}
1718
1719/*
1720 * Clear some of the msg flags in the local message queue.
1721 */
1722static inline void
1723xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch)
1724{
1725 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1726 struct xpc_msg_sn2 *msg;
1727 s64 get;
1728
1729 get = ch_sn2->w_remote_GP.get;
1730 do {
1731 msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue +
1732 (get % ch->local_nentries) *
1733 ch->entry_size);
1734 msg->flags = 0;
1735 } while (++get < ch_sn2->remote_GP.get);
1736}
1737
1738/*
1739 * Clear some of the msg flags in the remote message queue.
1740 */
1741static inline void
1742xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch)
1743{
1744 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1745 struct xpc_msg_sn2 *msg;
1746 s64 put;
1747
1748 put = ch_sn2->w_remote_GP.put;
1749 do {
1750 msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
1751 (put % ch->remote_nentries) *
1752 ch->entry_size);
1753 msg->flags = 0;
1754 } while (++put < ch_sn2->remote_GP.put);
1755}
1756
1757static int
1758xpc_n_of_deliverable_payloads_sn2(struct xpc_channel *ch)
1759{
1760 return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get;
1761}
1762
1763static void
1764xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number)
1765{
1766 struct xpc_channel *ch = &part->channels[ch_number];
1767 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1768 int npayloads_sent;
1769
1770 ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number];
1771
1772 /* See what, if anything, has changed for each connected channel */
1773
1774 xpc_msgqueue_ref(ch);
1775
1776 if (ch_sn2->w_remote_GP.get == ch_sn2->remote_GP.get &&
1777 ch_sn2->w_remote_GP.put == ch_sn2->remote_GP.put) {
1778 /* nothing changed since GPs were last pulled */
1779 xpc_msgqueue_deref(ch);
1780 return;
1781 }
1782
1783 if (!(ch->flags & XPC_C_CONNECTED)) {
1784 xpc_msgqueue_deref(ch);
1785 return;
1786 }
1787
1788 /*
1789 * First check to see if messages recently sent by us have been
1790 * received by the other side. (The remote GET value will have
1791 * changed since we last looked at it.)
1792 */
1793
1794 if (ch_sn2->w_remote_GP.get != ch_sn2->remote_GP.get) {
1795
1796 /*
1797 * We need to notify any senders that want to be notified
1798 * that their sent messages have been received by their
1799 * intended recipients. We need to do this before updating
1800 * w_remote_GP.get so that we don't allocate the same message
1801 * queue entries prematurely (see xpc_allocate_msg()).
1802 */
1803 if (atomic_read(&ch->n_to_notify) > 0) {
1804 /*
1805 * Notify senders that messages sent have been
1806 * received and delivered by the other side.
1807 */
1808 xpc_notify_senders_sn2(ch, xpMsgDelivered,
1809 ch_sn2->remote_GP.get);
1810 }
1811
1812 /*
1813 * Clear msg->flags in previously sent messages, so that
1814 * they're ready for xpc_allocate_msg().
1815 */
1816 xpc_clear_local_msgqueue_flags_sn2(ch);
1817
1818 ch_sn2->w_remote_GP.get = ch_sn2->remote_GP.get;
1819
1820 dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
1821 "channel=%d\n", ch_sn2->w_remote_GP.get, ch->partid,
1822 ch->number);
1823
1824 /*
1825 * If anyone was waiting for message queue entries to become
1826 * available, wake them up.
1827 */
1828 if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
1829 wake_up(&ch->msg_allocate_wq);
1830 }
1831
1832 /*
1833 * Now check for newly sent messages by the other side. (The remote
1834 * PUT value will have changed since we last looked at it.)
1835 */
1836
1837 if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) {
1838 /*
1839 * Clear msg->flags in previously received messages, so that
1840 * they're ready for xpc_get_deliverable_payload_sn2().
1841 */
1842 xpc_clear_remote_msgqueue_flags_sn2(ch);
1843
1844 ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put;
1845
1846 dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
1847 "channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid,
1848 ch->number);
1849
1850 npayloads_sent = xpc_n_of_deliverable_payloads_sn2(ch);
1851 if (npayloads_sent > 0) {
1852 dev_dbg(xpc_chan, "msgs waiting to be copied and "
1853 "delivered=%d, partid=%d, channel=%d\n",
1854 npayloads_sent, ch->partid, ch->number);
1855
1856 if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
1857 xpc_activate_kthreads(ch, npayloads_sent);
1858 }
1859 }
1860
1861 xpc_msgqueue_deref(ch);
1862}
1863
1864static struct xpc_msg_sn2 *
1865xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
1866{
1867 struct xpc_partition *part = &xpc_partitions[ch->partid];
1868 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1869 unsigned long remote_msg_pa;
1870 struct xpc_msg_sn2 *msg;
1871 u32 msg_index;
1872 u32 nmsgs;
1873 u64 msg_offset;
1874 enum xp_retval ret;
1875
1876 if (mutex_lock_interruptible(&ch_sn2->msg_to_pull_mutex) != 0) {
1877 /* we were interrupted by a signal */
1878 return NULL;
1879 }
1880
1881 while (get >= ch_sn2->next_msg_to_pull) {
1882
1883 /* pull as many messages as are ready and able to be pulled */
1884
1885 msg_index = ch_sn2->next_msg_to_pull % ch->remote_nentries;
1886
1887 DBUG_ON(ch_sn2->next_msg_to_pull >= ch_sn2->w_remote_GP.put);
1888 nmsgs = ch_sn2->w_remote_GP.put - ch_sn2->next_msg_to_pull;
1889 if (msg_index + nmsgs > ch->remote_nentries) {
1890 /* ignore the ones that wrap the msg queue for now */
1891 nmsgs = ch->remote_nentries - msg_index;
1892 }
1893
1894 msg_offset = msg_index * ch->entry_size;
1895 msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
1896 msg_offset);
1897 remote_msg_pa = ch_sn2->remote_msgqueue_pa + msg_offset;
1898
1899 ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa,
1900 nmsgs * ch->entry_size);
1901 if (ret != xpSuccess) {
1902
1903 dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
1904 " msg %ld from partition %d, channel=%d, "
1905 "ret=%d\n", nmsgs, ch_sn2->next_msg_to_pull,
1906 ch->partid, ch->number, ret);
1907
1908 XPC_DEACTIVATE_PARTITION(part, ret);
1909
1910 mutex_unlock(&ch_sn2->msg_to_pull_mutex);
1911 return NULL;
1912 }
1913
1914 ch_sn2->next_msg_to_pull += nmsgs;
1915 }
1916
1917 mutex_unlock(&ch_sn2->msg_to_pull_mutex);
1918
1919 /* return the message we were looking for */
1920 msg_offset = (get % ch->remote_nentries) * ch->entry_size;
1921 msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + msg_offset);
1922
1923 return msg;
1924}
1925
1926/*
1927 * Get the next deliverable message's payload.
1928 */
1929static void *
1930xpc_get_deliverable_payload_sn2(struct xpc_channel *ch)
1931{
1932 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1933 struct xpc_msg_sn2 *msg;
1934 void *payload = NULL;
1935 s64 get;
1936
1937 do {
1938 if (ch->flags & XPC_C_DISCONNECTING)
1939 break;
1940
1941 get = ch_sn2->w_local_GP.get;
1942 rmb(); /* guarantee that .get loads before .put */
1943 if (get == ch_sn2->w_remote_GP.put)
1944 break;
1945
1946 /* There are messages waiting to be pulled and delivered.
1947 * We need to try to secure one for ourselves. We'll do this
1948 * by trying to increment w_local_GP.get and hope that no one
1949 * else beats us to it. If they do, we'll we'll simply have
1950 * to try again for the next one.
1951 */
1952
1953 if (cmpxchg(&ch_sn2->w_local_GP.get, get, get + 1) == get) {
1954 /* we got the entry referenced by get */
1955
1956 dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
1957 "partid=%d, channel=%d\n", get + 1,
1958 ch->partid, ch->number);
1959
1960 /* pull the message from the remote partition */
1961
1962 msg = xpc_pull_remote_msg_sn2(ch, get);
1963
1964 DBUG_ON(msg != NULL && msg->number != get);
1965 DBUG_ON(msg != NULL && (msg->flags & XPC_M_SN2_DONE));
1966 DBUG_ON(msg != NULL && !(msg->flags & XPC_M_SN2_READY));
1967
1968 payload = &msg->payload;
1969 break;
1970 }
1971
1972 } while (1);
1973
1974 return payload;
1975}
1976
1977/*
1978 * Now we actually send the messages that are ready to be sent by advancing
1979 * the local message queue's Put value and then send a chctl msgrequest to the
1980 * recipient partition.
1981 */
1982static void
1983xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
1984{
1985 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1986 struct xpc_msg_sn2 *msg;
1987 s64 put = initial_put + 1;
1988 int send_msgrequest = 0;
1989
1990 while (1) {
1991
1992 while (1) {
1993 if (put == ch_sn2->w_local_GP.put)
1994 break;
1995
1996 msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->
1997 local_msgqueue + (put %
1998 ch->local_nentries) *
1999 ch->entry_size);
2000
2001 if (!(msg->flags & XPC_M_SN2_READY))
2002 break;
2003
2004 put++;
2005 }
2006
2007 if (put == initial_put) {
2008 /* nothing's changed */
2009 break;
2010 }
2011
2012 if (cmpxchg_rel(&ch_sn2->local_GP->put, initial_put, put) !=
2013 initial_put) {
2014 /* someone else beat us to it */
2015 DBUG_ON(ch_sn2->local_GP->put < initial_put);
2016 break;
2017 }
2018
2019 /* we just set the new value of local_GP->put */
2020
2021 dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
2022 "channel=%d\n", put, ch->partid, ch->number);
2023
2024 send_msgrequest = 1;
2025
2026 /*
2027 * We need to ensure that the message referenced by
2028 * local_GP->put is not XPC_M_SN2_READY or that local_GP->put
2029 * equals w_local_GP.put, so we'll go have a look.
2030 */
2031 initial_put = put;
2032 }
2033
2034 if (send_msgrequest)
2035 xpc_send_chctl_msgrequest_sn2(ch);
2036}
2037
2038/*
2039 * Allocate an entry for a message from the message queue associated with the
2040 * specified channel.
2041 */
2042static enum xp_retval
2043xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
2044 struct xpc_msg_sn2 **address_of_msg)
2045{
2046 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
2047 struct xpc_msg_sn2 *msg;
2048 enum xp_retval ret;
2049 s64 put;
2050
2051 /*
2052 * Get the next available message entry from the local message queue.
2053 * If none are available, we'll make sure that we grab the latest
2054 * GP values.
2055 */
2056 ret = xpTimeout;
2057
2058 while (1) {
2059
2060 put = ch_sn2->w_local_GP.put;
2061 rmb(); /* guarantee that .put loads before .get */
2062 if (put - ch_sn2->w_remote_GP.get < ch->local_nentries) {
2063
2064 /* There are available message entries. We need to try
2065 * to secure one for ourselves. We'll do this by trying
2066 * to increment w_local_GP.put as long as someone else
2067 * doesn't beat us to it. If they do, we'll have to
2068 * try again.
2069 */
2070 if (cmpxchg(&ch_sn2->w_local_GP.put, put, put + 1) ==
2071 put) {
2072 /* we got the entry referenced by put */
2073 break;
2074 }
2075 continue; /* try again */
2076 }
2077
2078 /*
2079 * There aren't any available msg entries at this time.
2080 *
2081 * In waiting for a message entry to become available,
2082 * we set a timeout in case the other side is not sending
2083 * completion interrupts. This lets us fake a notify IRQ
2084 * that will cause the notify IRQ handler to fetch the latest
2085 * GP values as if an interrupt was sent by the other side.
2086 */
2087 if (ret == xpTimeout)
2088 xpc_send_chctl_local_msgrequest_sn2(ch);
2089
2090 if (flags & XPC_NOWAIT)
2091 return xpNoWait;
2092
2093 ret = xpc_allocate_msg_wait(ch);
2094 if (ret != xpInterrupted && ret != xpTimeout)
2095 return ret;
2096 }
2097
2098 /* get the message's address and initialize it */
2099 msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue +
2100 (put % ch->local_nentries) *
2101 ch->entry_size);
2102
2103 DBUG_ON(msg->flags != 0);
2104 msg->number = put;
2105
2106 dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, "
2107 "msg_number=%ld, partid=%d, channel=%d\n", put + 1,
2108 (void *)msg, msg->number, ch->partid, ch->number);
2109
2110 *address_of_msg = msg;
2111 return xpSuccess;
2112}
2113
2114/*
2115 * Common code that does the actual sending of the message by advancing the
2116 * local message queue's Put value and sends a chctl msgrequest to the
2117 * partition the message is being sent to.
2118 */
2119static enum xp_retval
2120xpc_send_payload_sn2(struct xpc_channel *ch, u32 flags, void *payload,
2121 u16 payload_size, u8 notify_type, xpc_notify_func func,
2122 void *key)
2123{
2124 enum xp_retval ret = xpSuccess;
2125 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
2126 struct xpc_msg_sn2 *msg = msg;
2127 struct xpc_notify_sn2 *notify = notify;
2128 s64 msg_number;
2129 s64 put;
2130
2131 DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
2132
2133 if (XPC_MSG_SIZE(payload_size) > ch->entry_size)
2134 return xpPayloadTooBig;
2135
2136 xpc_msgqueue_ref(ch);
2137
2138 if (ch->flags & XPC_C_DISCONNECTING) {
2139 ret = ch->reason;
2140 goto out_1;
2141 }
2142 if (!(ch->flags & XPC_C_CONNECTED)) {
2143 ret = xpNotConnected;
2144 goto out_1;
2145 }
2146
2147 ret = xpc_allocate_msg_sn2(ch, flags, &msg);
2148 if (ret != xpSuccess)
2149 goto out_1;
2150
2151 msg_number = msg->number;
2152
2153 if (notify_type != 0) {
2154 /*
2155 * Tell the remote side to send an ACK interrupt when the
2156 * message has been delivered.
2157 */
2158 msg->flags |= XPC_M_SN2_INTERRUPT;
2159
2160 atomic_inc(&ch->n_to_notify);
2161
2162 notify = &ch_sn2->notify_queue[msg_number % ch->local_nentries];
2163 notify->func = func;
2164 notify->key = key;
2165 notify->type = notify_type;
2166
2167 /* ??? Is a mb() needed here? */
2168
2169 if (ch->flags & XPC_C_DISCONNECTING) {
2170 /*
2171 * An error occurred between our last error check and
2172 * this one. We will try to clear the type field from
2173 * the notify entry. If we succeed then
2174 * xpc_disconnect_channel() didn't already process
2175 * the notify entry.
2176 */
2177 if (cmpxchg(&notify->type, notify_type, 0) ==
2178 notify_type) {
2179 atomic_dec(&ch->n_to_notify);
2180 ret = ch->reason;
2181 }
2182 goto out_1;
2183 }
2184 }
2185
2186 memcpy(&msg->payload, payload, payload_size);
2187
2188 msg->flags |= XPC_M_SN2_READY;
2189
2190 /*
2191 * The preceding store of msg->flags must occur before the following
2192 * load of local_GP->put.
2193 */
2194 mb();
2195
2196 /* see if the message is next in line to be sent, if so send it */
2197
2198 put = ch_sn2->local_GP->put;
2199 if (put == msg_number)
2200 xpc_send_msgs_sn2(ch, put);
2201
2202out_1:
2203 xpc_msgqueue_deref(ch);
2204 return ret;
2205}
2206
2207/*
2208 * Now we actually acknowledge the messages that have been delivered and ack'd
2209 * by advancing the cached remote message queue's Get value and if requested
2210 * send a chctl msgrequest to the message sender's partition.
2211 *
2212 * If a message has XPC_M_SN2_INTERRUPT set, send an interrupt to the partition
2213 * that sent the message.
2214 */
2215static void
2216xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
2217{
2218 struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
2219 struct xpc_msg_sn2 *msg;
2220 s64 get = initial_get + 1;
2221 int send_msgrequest = 0;
2222
2223 while (1) {
2224
2225 while (1) {
2226 if (get == ch_sn2->w_local_GP.get)
2227 break;
2228
2229 msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->
2230 remote_msgqueue + (get %
2231 ch->remote_nentries) *
2232 ch->entry_size);
2233
2234 if (!(msg->flags & XPC_M_SN2_DONE))
2235 break;
2236
2237 msg_flags |= msg->flags;
2238 get++;
2239 }
2240
2241 if (get == initial_get) {
2242 /* nothing's changed */
2243 break;
2244 }
2245
2246 if (cmpxchg_rel(&ch_sn2->local_GP->get, initial_get, get) !=
2247 initial_get) {
2248 /* someone else beat us to it */
2249 DBUG_ON(ch_sn2->local_GP->get <= initial_get);
2250 break;
2251 }
2252
2253 /* we just set the new value of local_GP->get */
2254
2255 dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
2256 "channel=%d\n", get, ch->partid, ch->number);
2257
2258 send_msgrequest = (msg_flags & XPC_M_SN2_INTERRUPT);
2259
2260 /*
2261 * We need to ensure that the message referenced by
2262 * local_GP->get is not XPC_M_SN2_DONE or that local_GP->get
2263 * equals w_local_GP.get, so we'll go have a look.
2264 */
2265 initial_get = get;
2266 }
2267
2268 if (send_msgrequest)
2269 xpc_send_chctl_msgrequest_sn2(ch);
2270}
2271
2272static void
2273xpc_received_payload_sn2(struct xpc_channel *ch, void *payload)
2274{
2275 struct xpc_msg_sn2 *msg;
2276 s64 msg_number;
2277 s64 get;
2278
2279 msg = container_of(payload, struct xpc_msg_sn2, payload);
2280 msg_number = msg->number;
2281
2282 dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n",
2283 (void *)msg, msg_number, ch->partid, ch->number);
2284
2285 DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->entry_size) !=
2286 msg_number % ch->remote_nentries);
2287 DBUG_ON(msg->flags & XPC_M_SN2_DONE);
2288
2289 msg->flags |= XPC_M_SN2_DONE;
2290
2291 /*
2292 * The preceding store of msg->flags must occur before the following
2293 * load of local_GP->get.
2294 */
2295 mb();
2296
2297 /*
2298 * See if this message is next in line to be acknowledged as having
2299 * been delivered.
2300 */
2301 get = ch->sn.sn2.local_GP->get;
2302 if (get == msg_number)
2303 xpc_acknowledge_msgs_sn2(ch, get, msg->flags);
2304}
2305
2306int
2307xpc_init_sn2(void)
2308{
2309 int ret;
2310 size_t buf_size;
2311
2312 xpc_setup_partitions_sn = xpc_setup_partitions_sn_sn2;
2313 xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2;
2314 xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_sn2;
2315 xpc_increment_heartbeat = xpc_increment_heartbeat_sn2;
2316 xpc_offline_heartbeat = xpc_offline_heartbeat_sn2;
2317 xpc_online_heartbeat = xpc_online_heartbeat_sn2;
2318 xpc_heartbeat_init = xpc_heartbeat_init_sn2;
2319 xpc_heartbeat_exit = xpc_heartbeat_exit_sn2;
2320 xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_sn2;
2321
2322 xpc_request_partition_activation = xpc_request_partition_activation_sn2;
2323 xpc_request_partition_reactivation =
2324 xpc_request_partition_reactivation_sn2;
2325 xpc_request_partition_deactivation =
2326 xpc_request_partition_deactivation_sn2;
2327 xpc_cancel_partition_deactivation_request =
2328 xpc_cancel_partition_deactivation_request_sn2;
2329
2330 xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2;
2331 xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_sn2;
2332 xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_sn2;
2333 xpc_make_first_contact = xpc_make_first_contact_sn2;
2334
2335 xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_sn2;
2336 xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_sn2;
2337 xpc_send_chctl_closereply = xpc_send_chctl_closereply_sn2;
2338 xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_sn2;
2339 xpc_send_chctl_openreply = xpc_send_chctl_openreply_sn2;
2340
2341 xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2;
2342
2343 xpc_setup_msg_structures = xpc_setup_msg_structures_sn2;
2344 xpc_teardown_msg_structures = xpc_teardown_msg_structures_sn2;
2345
2346 xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2;
2347 xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2;
2348 xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2;
2349 xpc_get_deliverable_payload = xpc_get_deliverable_payload_sn2;
2350
2351 xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_sn2;
2352 xpc_indicate_partition_disengaged =
2353 xpc_indicate_partition_disengaged_sn2;
2354 xpc_partition_engaged = xpc_partition_engaged_sn2;
2355 xpc_any_partition_engaged = xpc_any_partition_engaged_sn2;
2356 xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_sn2;
2357
2358 xpc_send_payload = xpc_send_payload_sn2;
2359 xpc_received_payload = xpc_received_payload_sn2;
2360
2361 if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) {
2362 dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is "
2363 "larger than %d\n", XPC_MSG_HDR_MAX_SIZE);
2364 return -E2BIG;
2365 }
2366
2367 buf_size = max(XPC_RP_VARS_SIZE,
2368 XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES_SN2);
2369 xpc_remote_copy_buffer_sn2 = xpc_kmalloc_cacheline_aligned(buf_size,
2370 GFP_KERNEL,
2371 &xpc_remote_copy_buffer_base_sn2);
2372 if (xpc_remote_copy_buffer_sn2 == NULL) {
2373 dev_err(xpc_part, "can't get memory for remote copy buffer\n");
2374 return -ENOMEM;
2375 }
2376
2377 /* open up protections for IPI and [potentially] amo operations */
2378 xpc_allow_IPI_ops_sn2();
2379 xpc_allow_amo_ops_shub_wars_1_1_sn2();
2380
2381 /*
2382 * This is safe to do before the xpc_hb_checker thread has started
2383 * because the handler releases a wait queue. If an interrupt is
2384 * received before the thread is waiting, it will not go to sleep,
2385 * but rather immediately process the interrupt.
2386 */
2387 ret = request_irq(SGI_XPC_ACTIVATE, xpc_handle_activate_IRQ_sn2, 0,
2388 "xpc hb", NULL);
2389 if (ret != 0) {
2390 dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
2391 "errno=%d\n", -ret);
2392 xpc_disallow_IPI_ops_sn2();
2393 kfree(xpc_remote_copy_buffer_base_sn2);
2394 }
2395 return ret;
2396}
2397
2398void
2399xpc_exit_sn2(void)
2400{
2401 free_irq(SGI_XPC_ACTIVATE, NULL);
2402 xpc_disallow_IPI_ops_sn2();
2403 kfree(xpc_remote_copy_buffer_base_sn2);
2404}
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
new file mode 100644
index 000000000000..1ac694c01623
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -0,0 +1,1443 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9/*
10 * Cross Partition Communication (XPC) uv-based functions.
11 *
12 * Architecture specific implementation of common functions.
13 *
14 */
15
16#include <linux/kernel.h>
17#include <linux/mm.h>
18#include <linux/interrupt.h>
19#include <linux/delay.h>
20#include <linux/device.h>
21#include <asm/uv/uv_hub.h>
22#include "../sgi-gru/gru.h"
23#include "../sgi-gru/grukservices.h"
24#include "xpc.h"
25
26static atomic64_t xpc_heartbeat_uv;
27static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
28
29#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES)
30#define XPC_NOTIFY_MSG_SIZE_UV (2 * GRU_CACHE_LINE_BYTES)
31
32#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \
33 XPC_ACTIVATE_MSG_SIZE_UV)
34#define XPC_NOTIFY_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \
35 XPC_NOTIFY_MSG_SIZE_UV)
36
37static void *xpc_activate_mq_uv;
38static void *xpc_notify_mq_uv;
39
40static int
41xpc_setup_partitions_sn_uv(void)
42{
43 short partid;
44 struct xpc_partition_uv *part_uv;
45
46 for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
47 part_uv = &xpc_partitions[partid].sn.uv;
48
49 spin_lock_init(&part_uv->flags_lock);
50 part_uv->remote_act_state = XPC_P_AS_INACTIVE;
51 }
52 return 0;
53}
54
55static void *
56xpc_create_gru_mq_uv(unsigned int mq_size, int cpuid, unsigned int irq,
57 irq_handler_t irq_handler)
58{
59 int ret;
60 int nid;
61 int mq_order;
62 struct page *page;
63 void *mq;
64
65 nid = cpu_to_node(cpuid);
66 mq_order = get_order(mq_size);
67 page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
68 mq_order);
69 if (page == NULL) {
70 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
71 "bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
72 return NULL;
73 }
74
75 mq = page_address(page);
76 ret = gru_create_message_queue(mq, mq_size);
77 if (ret != 0) {
78 dev_err(xpc_part, "gru_create_message_queue() returned "
79 "error=%d\n", ret);
80 free_pages((unsigned long)mq, mq_order);
81 return NULL;
82 }
83
84 /* !!! Need to do some other things to set up IRQ */
85
86 ret = request_irq(irq, irq_handler, 0, "xpc", NULL);
87 if (ret != 0) {
88 dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
89 irq, ret);
90 free_pages((unsigned long)mq, mq_order);
91 return NULL;
92 }
93
94 /* !!! enable generation of irq when GRU mq op occurs to this mq */
95
96 /* ??? allow other partitions to access GRU mq? */
97
98 return mq;
99}
100
101static void
102xpc_destroy_gru_mq_uv(void *mq, unsigned int mq_size, unsigned int irq)
103{
104 /* ??? disallow other partitions to access GRU mq? */
105
106 /* !!! disable generation of irq when GRU mq op occurs to this mq */
107
108 free_irq(irq, NULL);
109
110 free_pages((unsigned long)mq, get_order(mq_size));
111}
112
113static enum xp_retval
114xpc_send_gru_msg(unsigned long mq_gpa, void *msg, size_t msg_size)
115{
116 enum xp_retval xp_ret;
117 int ret;
118
119 while (1) {
120 ret = gru_send_message_gpa(mq_gpa, msg, msg_size);
121 if (ret == MQE_OK) {
122 xp_ret = xpSuccess;
123 break;
124 }
125
126 if (ret == MQE_QUEUE_FULL) {
127 dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
128 "error=MQE_QUEUE_FULL\n");
129 /* !!! handle QLimit reached; delay & try again */
130 /* ??? Do we add a limit to the number of retries? */
131 (void)msleep_interruptible(10);
132 } else if (ret == MQE_CONGESTION) {
133 dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
134 "error=MQE_CONGESTION\n");
135 /* !!! handle LB Overflow; simply try again */
136 /* ??? Do we add a limit to the number of retries? */
137 } else {
138 /* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
139 dev_err(xpc_chan, "gru_send_message_gpa() returned "
140 "error=%d\n", ret);
141 xp_ret = xpGruSendMqError;
142 break;
143 }
144 }
145 return xp_ret;
146}
147
148static void
149xpc_process_activate_IRQ_rcvd_uv(void)
150{
151 unsigned long irq_flags;
152 short partid;
153 struct xpc_partition *part;
154 u8 act_state_req;
155
156 DBUG_ON(xpc_activate_IRQ_rcvd == 0);
157
158 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
159 for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
160 part = &xpc_partitions[partid];
161
162 if (part->sn.uv.act_state_req == 0)
163 continue;
164
165 xpc_activate_IRQ_rcvd--;
166 BUG_ON(xpc_activate_IRQ_rcvd < 0);
167
168 act_state_req = part->sn.uv.act_state_req;
169 part->sn.uv.act_state_req = 0;
170 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
171
172 if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
173 if (part->act_state == XPC_P_AS_INACTIVE)
174 xpc_activate_partition(part);
175 else if (part->act_state == XPC_P_AS_DEACTIVATING)
176 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
177
178 } else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
179 if (part->act_state == XPC_P_AS_INACTIVE)
180 xpc_activate_partition(part);
181 else
182 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
183
184 } else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
185 XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
186
187 } else {
188 BUG();
189 }
190
191 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
192 if (xpc_activate_IRQ_rcvd == 0)
193 break;
194 }
195 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
196
197}
198
199static void
200xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
201 struct xpc_activate_mq_msghdr_uv *msg_hdr,
202 int *wakeup_hb_checker)
203{
204 unsigned long irq_flags;
205 struct xpc_partition_uv *part_uv = &part->sn.uv;
206 struct xpc_openclose_args *args;
207
208 part_uv->remote_act_state = msg_hdr->act_state;
209
210 switch (msg_hdr->type) {
211 case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
212 /* syncing of remote_act_state was just done above */
213 break;
214
215 case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: {
216 struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
217
218 msg = container_of(msg_hdr,
219 struct xpc_activate_mq_msg_heartbeat_req_uv,
220 hdr);
221 part_uv->heartbeat = msg->heartbeat;
222 break;
223 }
224 case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: {
225 struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
226
227 msg = container_of(msg_hdr,
228 struct xpc_activate_mq_msg_heartbeat_req_uv,
229 hdr);
230 part_uv->heartbeat = msg->heartbeat;
231
232 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
233 part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV;
234 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
235 break;
236 }
237 case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: {
238 struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
239
240 msg = container_of(msg_hdr,
241 struct xpc_activate_mq_msg_heartbeat_req_uv,
242 hdr);
243 part_uv->heartbeat = msg->heartbeat;
244
245 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
246 part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV;
247 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
248 break;
249 }
250 case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
251 struct xpc_activate_mq_msg_activate_req_uv *msg;
252
253 /*
254 * ??? Do we deal here with ts_jiffies being different
255 * ??? if act_state != XPC_P_AS_INACTIVE instead of
256 * ??? below?
257 */
258 msg = container_of(msg_hdr, struct
259 xpc_activate_mq_msg_activate_req_uv, hdr);
260
261 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
262 if (part_uv->act_state_req == 0)
263 xpc_activate_IRQ_rcvd++;
264 part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
265 part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
266 part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
267 part_uv->remote_activate_mq_gpa = msg->activate_mq_gpa;
268 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
269
270 (*wakeup_hb_checker)++;
271 break;
272 }
273 case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
274 struct xpc_activate_mq_msg_deactivate_req_uv *msg;
275
276 msg = container_of(msg_hdr, struct
277 xpc_activate_mq_msg_deactivate_req_uv, hdr);
278
279 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
280 if (part_uv->act_state_req == 0)
281 xpc_activate_IRQ_rcvd++;
282 part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
283 part_uv->reason = msg->reason;
284 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
285
286 (*wakeup_hb_checker)++;
287 return;
288 }
289 case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
290 struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
291
292 msg = container_of(msg_hdr, struct
293 xpc_activate_mq_msg_chctl_closerequest_uv,
294 hdr);
295 args = &part->remote_openclose_args[msg->ch_number];
296 args->reason = msg->reason;
297
298 spin_lock_irqsave(&part->chctl_lock, irq_flags);
299 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
300 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
301
302 xpc_wakeup_channel_mgr(part);
303 break;
304 }
305 case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
306 struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
307
308 msg = container_of(msg_hdr, struct
309 xpc_activate_mq_msg_chctl_closereply_uv,
310 hdr);
311
312 spin_lock_irqsave(&part->chctl_lock, irq_flags);
313 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
314 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
315
316 xpc_wakeup_channel_mgr(part);
317 break;
318 }
319 case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
320 struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
321
322 msg = container_of(msg_hdr, struct
323 xpc_activate_mq_msg_chctl_openrequest_uv,
324 hdr);
325 args = &part->remote_openclose_args[msg->ch_number];
326 args->entry_size = msg->entry_size;
327 args->local_nentries = msg->local_nentries;
328
329 spin_lock_irqsave(&part->chctl_lock, irq_flags);
330 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
331 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
332
333 xpc_wakeup_channel_mgr(part);
334 break;
335 }
336 case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
337 struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
338
339 msg = container_of(msg_hdr, struct
340 xpc_activate_mq_msg_chctl_openreply_uv, hdr);
341 args = &part->remote_openclose_args[msg->ch_number];
342 args->remote_nentries = msg->remote_nentries;
343 args->local_nentries = msg->local_nentries;
344 args->local_msgqueue_pa = msg->local_notify_mq_gpa;
345
346 spin_lock_irqsave(&part->chctl_lock, irq_flags);
347 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
348 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
349
350 xpc_wakeup_channel_mgr(part);
351 break;
352 }
353 case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
354 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
355 part_uv->flags |= XPC_P_ENGAGED_UV;
356 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
357 break;
358
359 case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
360 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
361 part_uv->flags &= ~XPC_P_ENGAGED_UV;
362 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
363 break;
364
365 default:
366 dev_err(xpc_part, "received unknown activate_mq msg type=%d "
367 "from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
368
369 /* get hb checker to deactivate from the remote partition */
370 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
371 if (part_uv->act_state_req == 0)
372 xpc_activate_IRQ_rcvd++;
373 part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
374 part_uv->reason = xpBadMsgType;
375 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
376
377 (*wakeup_hb_checker)++;
378 return;
379 }
380
381 if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
382 part->remote_rp_ts_jiffies != 0) {
383 /*
384 * ??? Does what we do here need to be sensitive to
385 * ??? act_state or remote_act_state?
386 */
387 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
388 if (part_uv->act_state_req == 0)
389 xpc_activate_IRQ_rcvd++;
390 part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
391 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
392
393 (*wakeup_hb_checker)++;
394 }
395}
396
397static irqreturn_t
398xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
399{
400 struct xpc_activate_mq_msghdr_uv *msg_hdr;
401 short partid;
402 struct xpc_partition *part;
403 int wakeup_hb_checker = 0;
404
405 while ((msg_hdr = gru_get_next_message(xpc_activate_mq_uv)) != NULL) {
406
407 partid = msg_hdr->partid;
408 if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
409 dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
410 "received invalid partid=0x%x in message\n",
411 partid);
412 } else {
413 part = &xpc_partitions[partid];
414 if (xpc_part_ref(part)) {
415 xpc_handle_activate_mq_msg_uv(part, msg_hdr,
416 &wakeup_hb_checker);
417 xpc_part_deref(part);
418 }
419 }
420
421 gru_free_message(xpc_activate_mq_uv, msg_hdr);
422 }
423
424 if (wakeup_hb_checker)
425 wake_up_interruptible(&xpc_activate_IRQ_wq);
426
427 return IRQ_HANDLED;
428}
429
430static enum xp_retval
431xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
432 int msg_type)
433{
434 struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
435
436 DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
437
438 msg_hdr->type = msg_type;
439 msg_hdr->partid = XPC_PARTID(part);
440 msg_hdr->act_state = part->act_state;
441 msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
442
443 /* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
444 return xpc_send_gru_msg(part->sn.uv.remote_activate_mq_gpa, msg,
445 msg_size);
446}
447
448static void
449xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
450 size_t msg_size, int msg_type)
451{
452 enum xp_retval ret;
453
454 ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
455 if (unlikely(ret != xpSuccess))
456 XPC_DEACTIVATE_PARTITION(part, ret);
457}
458
459static void
460xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
461 void *msg, size_t msg_size, int msg_type)
462{
463 struct xpc_partition *part = &xpc_partitions[ch->number];
464 enum xp_retval ret;
465
466 ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
467 if (unlikely(ret != xpSuccess)) {
468 if (irq_flags != NULL)
469 spin_unlock_irqrestore(&ch->lock, *irq_flags);
470
471 XPC_DEACTIVATE_PARTITION(part, ret);
472
473 if (irq_flags != NULL)
474 spin_lock_irqsave(&ch->lock, *irq_flags);
475 }
476}
477
478static void
479xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
480{
481 unsigned long irq_flags;
482 struct xpc_partition_uv *part_uv = &part->sn.uv;
483
484 /*
485 * !!! Make our side think that the remote parition sent an activate
486 * !!! message our way by doing what the activate IRQ handler would
487 * !!! do had one really been sent.
488 */
489
490 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
491 if (part_uv->act_state_req == 0)
492 xpc_activate_IRQ_rcvd++;
493 part_uv->act_state_req = act_state_req;
494 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
495
496 wake_up_interruptible(&xpc_activate_IRQ_wq);
497}
498
499static enum xp_retval
500xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
501 size_t *len)
502{
503 /* !!! call the UV version of sn_partition_reserved_page_pa() */
504 return xpUnsupported;
505}
506
507static int
508xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp)
509{
510 rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq_uv);
511 return 0;
512}
513
514static void
515xpc_send_heartbeat_uv(int msg_type)
516{
517 short partid;
518 struct xpc_partition *part;
519 struct xpc_activate_mq_msg_heartbeat_req_uv msg;
520
521 /*
522 * !!! On uv we're broadcasting a heartbeat message every 5 seconds.
523 * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20
524 * !!! seconds. This is an increase in numalink traffic.
525 * ??? Is this good?
526 */
527
528 msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv);
529
530 partid = find_first_bit(xpc_heartbeating_to_mask_uv,
531 XP_MAX_NPARTITIONS_UV);
532
533 while (partid < XP_MAX_NPARTITIONS_UV) {
534 part = &xpc_partitions[partid];
535
536 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
537 msg_type);
538
539 partid = find_next_bit(xpc_heartbeating_to_mask_uv,
540 XP_MAX_NPARTITIONS_UV, partid + 1);
541 }
542}
543
544static void
545xpc_increment_heartbeat_uv(void)
546{
547 xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV);
548}
549
550static void
551xpc_offline_heartbeat_uv(void)
552{
553 xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
554}
555
556static void
557xpc_online_heartbeat_uv(void)
558{
559 xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV);
560}
561
562static void
563xpc_heartbeat_init_uv(void)
564{
565 atomic64_set(&xpc_heartbeat_uv, 0);
566 bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
567 xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0];
568}
569
570static void
571xpc_heartbeat_exit_uv(void)
572{
573 xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
574}
575
576static enum xp_retval
577xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
578{
579 struct xpc_partition_uv *part_uv = &part->sn.uv;
580 enum xp_retval ret = xpNoHeartbeat;
581
582 if (part_uv->remote_act_state != XPC_P_AS_INACTIVE &&
583 part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) {
584
585 if (part_uv->heartbeat != part->last_heartbeat ||
586 (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) {
587
588 part->last_heartbeat = part_uv->heartbeat;
589 ret = xpSuccess;
590 }
591 }
592 return ret;
593}
594
595static void
596xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
597 unsigned long remote_rp_gpa, int nasid)
598{
599 short partid = remote_rp->SAL_partid;
600 struct xpc_partition *part = &xpc_partitions[partid];
601 struct xpc_activate_mq_msg_activate_req_uv msg;
602
603 part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
604 part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
605 part->sn.uv.remote_activate_mq_gpa = remote_rp->sn.activate_mq_gpa;
606
607 /*
608 * ??? Is it a good idea to make this conditional on what is
609 * ??? potentially stale state information?
610 */
611 if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
612 msg.rp_gpa = uv_gpa(xpc_rsvd_page);
613 msg.activate_mq_gpa = xpc_rsvd_page->sn.activate_mq_gpa;
614 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
615 XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
616 }
617
618 if (part->act_state == XPC_P_AS_INACTIVE)
619 xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
620}
621
622static void
623xpc_request_partition_reactivation_uv(struct xpc_partition *part)
624{
625 xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
626}
627
628static void
629xpc_request_partition_deactivation_uv(struct xpc_partition *part)
630{
631 struct xpc_activate_mq_msg_deactivate_req_uv msg;
632
633 /*
634 * ??? Is it a good idea to make this conditional on what is
635 * ??? potentially stale state information?
636 */
637 if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
638 part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
639
640 msg.reason = part->reason;
641 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
642 XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
643 }
644}
645
646static void
647xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
648{
649 /* nothing needs to be done */
650 return;
651}
652
653static void
654xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
655{
656 head->first = NULL;
657 head->last = NULL;
658 spin_lock_init(&head->lock);
659 head->n_entries = 0;
660}
661
662static void *
663xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
664{
665 unsigned long irq_flags;
666 struct xpc_fifo_entry_uv *first;
667
668 spin_lock_irqsave(&head->lock, irq_flags);
669 first = head->first;
670 if (head->first != NULL) {
671 head->first = first->next;
672 if (head->first == NULL)
673 head->last = NULL;
674 }
675 head->n_entries++;
676 spin_unlock_irqrestore(&head->lock, irq_flags);
677 first->next = NULL;
678 return first;
679}
680
681static void
682xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
683 struct xpc_fifo_entry_uv *last)
684{
685 unsigned long irq_flags;
686
687 last->next = NULL;
688 spin_lock_irqsave(&head->lock, irq_flags);
689 if (head->last != NULL)
690 head->last->next = last;
691 else
692 head->first = last;
693 head->last = last;
694 head->n_entries--;
695 BUG_ON(head->n_entries < 0);
696 spin_unlock_irqrestore(&head->lock, irq_flags);
697}
698
699static int
700xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
701{
702 return head->n_entries;
703}
704
705/*
706 * Setup the channel structures that are uv specific.
707 */
708static enum xp_retval
709xpc_setup_ch_structures_sn_uv(struct xpc_partition *part)
710{
711 struct xpc_channel_uv *ch_uv;
712 int ch_number;
713
714 for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
715 ch_uv = &part->channels[ch_number].sn.uv;
716
717 xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
718 xpc_init_fifo_uv(&ch_uv->recv_msg_list);
719 }
720
721 return xpSuccess;
722}
723
724/*
725 * Teardown the channel structures that are uv specific.
726 */
727static void
728xpc_teardown_ch_structures_sn_uv(struct xpc_partition *part)
729{
730 /* nothing needs to be done */
731 return;
732}
733
734static enum xp_retval
735xpc_make_first_contact_uv(struct xpc_partition *part)
736{
737 struct xpc_activate_mq_msg_uv msg;
738
739 /*
740 * We send a sync msg to get the remote partition's remote_act_state
741 * updated to our current act_state which at this point should
742 * be XPC_P_AS_ACTIVATING.
743 */
744 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
745 XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
746
747 while (part->sn.uv.remote_act_state != XPC_P_AS_ACTIVATING) {
748
749 dev_dbg(xpc_part, "waiting to make first contact with "
750 "partition %d\n", XPC_PARTID(part));
751
752 /* wait a 1/4 of a second or so */
753 (void)msleep_interruptible(250);
754
755 if (part->act_state == XPC_P_AS_DEACTIVATING)
756 return part->reason;
757 }
758
759 return xpSuccess;
760}
761
762static u64
763xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
764{
765 unsigned long irq_flags;
766 union xpc_channel_ctl_flags chctl;
767
768 spin_lock_irqsave(&part->chctl_lock, irq_flags);
769 chctl = part->chctl;
770 if (chctl.all_flags != 0)
771 part->chctl.all_flags = 0;
772
773 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
774 return chctl.all_flags;
775}
776
777static enum xp_retval
778xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
779{
780 struct xpc_channel_uv *ch_uv = &ch->sn.uv;
781 struct xpc_send_msg_slot_uv *msg_slot;
782 unsigned long irq_flags;
783 int nentries;
784 int entry;
785 size_t nbytes;
786
787 for (nentries = ch->local_nentries; nentries > 0; nentries--) {
788 nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
789 ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
790 if (ch_uv->send_msg_slots == NULL)
791 continue;
792
793 for (entry = 0; entry < nentries; entry++) {
794 msg_slot = &ch_uv->send_msg_slots[entry];
795
796 msg_slot->msg_slot_number = entry;
797 xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
798 &msg_slot->next);
799 }
800
801 spin_lock_irqsave(&ch->lock, irq_flags);
802 if (nentries < ch->local_nentries)
803 ch->local_nentries = nentries;
804 spin_unlock_irqrestore(&ch->lock, irq_flags);
805 return xpSuccess;
806 }
807
808 return xpNoMemory;
809}
810
811static enum xp_retval
812xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
813{
814 struct xpc_channel_uv *ch_uv = &ch->sn.uv;
815 struct xpc_notify_mq_msg_uv *msg_slot;
816 unsigned long irq_flags;
817 int nentries;
818 int entry;
819 size_t nbytes;
820
821 for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
822 nbytes = nentries * ch->entry_size;
823 ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
824 if (ch_uv->recv_msg_slots == NULL)
825 continue;
826
827 for (entry = 0; entry < nentries; entry++) {
828 msg_slot = ch_uv->recv_msg_slots + entry *
829 ch->entry_size;
830
831 msg_slot->hdr.msg_slot_number = entry;
832 }
833
834 spin_lock_irqsave(&ch->lock, irq_flags);
835 if (nentries < ch->remote_nentries)
836 ch->remote_nentries = nentries;
837 spin_unlock_irqrestore(&ch->lock, irq_flags);
838 return xpSuccess;
839 }
840
841 return xpNoMemory;
842}
843
844/*
845 * Allocate msg_slots associated with the channel.
846 */
847static enum xp_retval
848xpc_setup_msg_structures_uv(struct xpc_channel *ch)
849{
850 static enum xp_retval ret;
851 struct xpc_channel_uv *ch_uv = &ch->sn.uv;
852
853 DBUG_ON(ch->flags & XPC_C_SETUP);
854
855 ret = xpc_allocate_send_msg_slot_uv(ch);
856 if (ret == xpSuccess) {
857
858 ret = xpc_allocate_recv_msg_slot_uv(ch);
859 if (ret != xpSuccess) {
860 kfree(ch_uv->send_msg_slots);
861 xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
862 }
863 }
864 return ret;
865}
866
867/*
868 * Free up msg_slots and clear other stuff that were setup for the specified
869 * channel.
870 */
871static void
872xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
873{
874 struct xpc_channel_uv *ch_uv = &ch->sn.uv;
875
876 DBUG_ON(!spin_is_locked(&ch->lock));
877
878 ch_uv->remote_notify_mq_gpa = 0;
879
880 if (ch->flags & XPC_C_SETUP) {
881 xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
882 kfree(ch_uv->send_msg_slots);
883 xpc_init_fifo_uv(&ch_uv->recv_msg_list);
884 kfree(ch_uv->recv_msg_slots);
885 }
886}
887
888static void
889xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
890{
891 struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
892
893 msg.ch_number = ch->number;
894 msg.reason = ch->reason;
895 xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
896 XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
897}
898
899static void
900xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
901{
902 struct xpc_activate_mq_msg_chctl_closereply_uv msg;
903
904 msg.ch_number = ch->number;
905 xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
906 XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
907}
908
909static void
910xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
911{
912 struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
913
914 msg.ch_number = ch->number;
915 msg.entry_size = ch->entry_size;
916 msg.local_nentries = ch->local_nentries;
917 xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
918 XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
919}
920
921static void
922xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
923{
924 struct xpc_activate_mq_msg_chctl_openreply_uv msg;
925
926 msg.ch_number = ch->number;
927 msg.local_nentries = ch->local_nentries;
928 msg.remote_nentries = ch->remote_nentries;
929 msg.local_notify_mq_gpa = uv_gpa(xpc_notify_mq_uv);
930 xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
931 XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
932}
933
934static void
935xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
936{
937 unsigned long irq_flags;
938
939 spin_lock_irqsave(&part->chctl_lock, irq_flags);
940 part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
941 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
942
943 xpc_wakeup_channel_mgr(part);
944}
945
946static void
947xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
948 unsigned long msgqueue_pa)
949{
950 ch->sn.uv.remote_notify_mq_gpa = msgqueue_pa;
951}
952
953static void
954xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
955{
956 struct xpc_activate_mq_msg_uv msg;
957
958 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
959 XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
960}
961
962static void
963xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
964{
965 struct xpc_activate_mq_msg_uv msg;
966
967 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
968 XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
969}
970
971static void
972xpc_assume_partition_disengaged_uv(short partid)
973{
974 struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
975 unsigned long irq_flags;
976
977 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
978 part_uv->flags &= ~XPC_P_ENGAGED_UV;
979 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
980}
981
982static int
983xpc_partition_engaged_uv(short partid)
984{
985 return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
986}
987
988static int
989xpc_any_partition_engaged_uv(void)
990{
991 struct xpc_partition_uv *part_uv;
992 short partid;
993
994 for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
995 part_uv = &xpc_partitions[partid].sn.uv;
996 if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
997 return 1;
998 }
999 return 0;
1000}
1001
1002static enum xp_retval
1003xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
1004 struct xpc_send_msg_slot_uv **address_of_msg_slot)
1005{
1006 enum xp_retval ret;
1007 struct xpc_send_msg_slot_uv *msg_slot;
1008 struct xpc_fifo_entry_uv *entry;
1009
1010 while (1) {
1011 entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
1012 if (entry != NULL)
1013 break;
1014
1015 if (flags & XPC_NOWAIT)
1016 return xpNoWait;
1017
1018 ret = xpc_allocate_msg_wait(ch);
1019 if (ret != xpInterrupted && ret != xpTimeout)
1020 return ret;
1021 }
1022
1023 msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
1024 *address_of_msg_slot = msg_slot;
1025 return xpSuccess;
1026}
1027
1028static void
1029xpc_free_msg_slot_uv(struct xpc_channel *ch,
1030 struct xpc_send_msg_slot_uv *msg_slot)
1031{
1032 xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
1033
1034 /* wakeup anyone waiting for a free msg slot */
1035 if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
1036 wake_up(&ch->msg_allocate_wq);
1037}
1038
1039static void
1040xpc_notify_sender_uv(struct xpc_channel *ch,
1041 struct xpc_send_msg_slot_uv *msg_slot,
1042 enum xp_retval reason)
1043{
1044 xpc_notify_func func = msg_slot->func;
1045
1046 if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
1047
1048 atomic_dec(&ch->n_to_notify);
1049
1050 dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
1051 "msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1052 msg_slot->msg_slot_number, ch->partid, ch->number);
1053
1054 func(reason, ch->partid, ch->number, msg_slot->key);
1055
1056 dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
1057 "msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1058 msg_slot->msg_slot_number, ch->partid, ch->number);
1059 }
1060}
1061
1062static void
1063xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
1064 struct xpc_notify_mq_msg_uv *msg)
1065{
1066 struct xpc_send_msg_slot_uv *msg_slot;
1067 int entry = msg->hdr.msg_slot_number % ch->local_nentries;
1068
1069 msg_slot = &ch->sn.uv.send_msg_slots[entry];
1070
1071 BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
1072 msg_slot->msg_slot_number += ch->local_nentries;
1073
1074 if (msg_slot->func != NULL)
1075 xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
1076
1077 xpc_free_msg_slot_uv(ch, msg_slot);
1078}
1079
1080static void
1081xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
1082 struct xpc_notify_mq_msg_uv *msg)
1083{
1084 struct xpc_partition_uv *part_uv = &part->sn.uv;
1085 struct xpc_channel *ch;
1086 struct xpc_channel_uv *ch_uv;
1087 struct xpc_notify_mq_msg_uv *msg_slot;
1088 unsigned long irq_flags;
1089 int ch_number = msg->hdr.ch_number;
1090
1091 if (unlikely(ch_number >= part->nchannels)) {
1092 dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
1093 "channel number=0x%x in message from partid=%d\n",
1094 ch_number, XPC_PARTID(part));
1095
1096 /* get hb checker to deactivate from the remote partition */
1097 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1098 if (part_uv->act_state_req == 0)
1099 xpc_activate_IRQ_rcvd++;
1100 part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
1101 part_uv->reason = xpBadChannelNumber;
1102 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1103
1104 wake_up_interruptible(&xpc_activate_IRQ_wq);
1105 return;
1106 }
1107
1108 ch = &part->channels[ch_number];
1109 xpc_msgqueue_ref(ch);
1110
1111 if (!(ch->flags & XPC_C_CONNECTED)) {
1112 xpc_msgqueue_deref(ch);
1113 return;
1114 }
1115
1116 /* see if we're really dealing with an ACK for a previously sent msg */
1117 if (msg->hdr.size == 0) {
1118 xpc_handle_notify_mq_ack_uv(ch, msg);
1119 xpc_msgqueue_deref(ch);
1120 return;
1121 }
1122
1123 /* we're dealing with a normal message sent via the notify_mq */
1124 ch_uv = &ch->sn.uv;
1125
1126 msg_slot = (struct xpc_notify_mq_msg_uv *)((u64)ch_uv->recv_msg_slots +
1127 (msg->hdr.msg_slot_number % ch->remote_nentries) *
1128 ch->entry_size);
1129
1130 BUG_ON(msg->hdr.msg_slot_number != msg_slot->hdr.msg_slot_number);
1131 BUG_ON(msg_slot->hdr.size != 0);
1132
1133 memcpy(msg_slot, msg, msg->hdr.size);
1134
1135 xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
1136
1137 if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
1138 /*
1139 * If there is an existing idle kthread get it to deliver
1140 * the payload, otherwise we'll have to get the channel mgr
1141 * for this partition to create a kthread to do the delivery.
1142 */
1143 if (atomic_read(&ch->kthreads_idle) > 0)
1144 wake_up_nr(&ch->idle_wq, 1);
1145 else
1146 xpc_send_chctl_local_msgrequest_uv(part, ch->number);
1147 }
1148 xpc_msgqueue_deref(ch);
1149}
1150
1151static irqreturn_t
1152xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
1153{
1154 struct xpc_notify_mq_msg_uv *msg;
1155 short partid;
1156 struct xpc_partition *part;
1157
1158 while ((msg = gru_get_next_message(xpc_notify_mq_uv)) != NULL) {
1159
1160 partid = msg->hdr.partid;
1161 if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
1162 dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
1163 "invalid partid=0x%x in message\n", partid);
1164 } else {
1165 part = &xpc_partitions[partid];
1166
1167 if (xpc_part_ref(part)) {
1168 xpc_handle_notify_mq_msg_uv(part, msg);
1169 xpc_part_deref(part);
1170 }
1171 }
1172
1173 gru_free_message(xpc_notify_mq_uv, msg);
1174 }
1175
1176 return IRQ_HANDLED;
1177}
1178
1179static int
1180xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
1181{
1182 return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
1183}
1184
1185static void
1186xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
1187{
1188 struct xpc_channel *ch = &part->channels[ch_number];
1189 int ndeliverable_payloads;
1190
1191 xpc_msgqueue_ref(ch);
1192
1193 ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
1194
1195 if (ndeliverable_payloads > 0 &&
1196 (ch->flags & XPC_C_CONNECTED) &&
1197 (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
1198
1199 xpc_activate_kthreads(ch, ndeliverable_payloads);
1200 }
1201
1202 xpc_msgqueue_deref(ch);
1203}
1204
1205static enum xp_retval
1206xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
1207 u16 payload_size, u8 notify_type, xpc_notify_func func,
1208 void *key)
1209{
1210 enum xp_retval ret = xpSuccess;
1211 struct xpc_send_msg_slot_uv *msg_slot = NULL;
1212 struct xpc_notify_mq_msg_uv *msg;
1213 u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
1214 size_t msg_size;
1215
1216 DBUG_ON(notify_type != XPC_N_CALL);
1217
1218 msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
1219 if (msg_size > ch->entry_size)
1220 return xpPayloadTooBig;
1221
1222 xpc_msgqueue_ref(ch);
1223
1224 if (ch->flags & XPC_C_DISCONNECTING) {
1225 ret = ch->reason;
1226 goto out_1;
1227 }
1228 if (!(ch->flags & XPC_C_CONNECTED)) {
1229 ret = xpNotConnected;
1230 goto out_1;
1231 }
1232
1233 ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
1234 if (ret != xpSuccess)
1235 goto out_1;
1236
1237 if (func != NULL) {
1238 atomic_inc(&ch->n_to_notify);
1239
1240 msg_slot->key = key;
1241 wmb(); /* a non-NULL func must hit memory after the key */
1242 msg_slot->func = func;
1243
1244 if (ch->flags & XPC_C_DISCONNECTING) {
1245 ret = ch->reason;
1246 goto out_2;
1247 }
1248 }
1249
1250 msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
1251 msg->hdr.partid = xp_partition_id;
1252 msg->hdr.ch_number = ch->number;
1253 msg->hdr.size = msg_size;
1254 msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
1255 memcpy(&msg->payload, payload, payload_size);
1256
1257 ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg, msg_size);
1258 if (ret == xpSuccess)
1259 goto out_1;
1260
1261 XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1262out_2:
1263 if (func != NULL) {
1264 /*
1265 * Try to NULL the msg_slot's func field. If we fail, then
1266 * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
1267 * case we need to pretend we succeeded to send the message
1268 * since the user will get a callout for the disconnect error
1269 * by xpc_notify_senders_of_disconnect_uv(), and to also get an
1270 * error returned here will confuse them. Additionally, since
1271 * in this case the channel is being disconnected we don't need
1272 * to put the the msg_slot back on the free list.
1273 */
1274 if (cmpxchg(&msg_slot->func, func, NULL) != func) {
1275 ret = xpSuccess;
1276 goto out_1;
1277 }
1278
1279 msg_slot->key = NULL;
1280 atomic_dec(&ch->n_to_notify);
1281 }
1282 xpc_free_msg_slot_uv(ch, msg_slot);
1283out_1:
1284 xpc_msgqueue_deref(ch);
1285 return ret;
1286}
1287
1288/*
1289 * Tell the callers of xpc_send_notify() that the status of their payloads
1290 * is unknown because the channel is now disconnecting.
1291 *
1292 * We don't worry about putting these msg_slots on the free list since the
1293 * msg_slots themselves are about to be kfree'd.
1294 */
1295static void
1296xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
1297{
1298 struct xpc_send_msg_slot_uv *msg_slot;
1299 int entry;
1300
1301 DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
1302
1303 for (entry = 0; entry < ch->local_nentries; entry++) {
1304
1305 if (atomic_read(&ch->n_to_notify) == 0)
1306 break;
1307
1308 msg_slot = &ch->sn.uv.send_msg_slots[entry];
1309 if (msg_slot->func != NULL)
1310 xpc_notify_sender_uv(ch, msg_slot, ch->reason);
1311 }
1312}
1313
1314/*
1315 * Get the next deliverable message's payload.
1316 */
1317static void *
1318xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
1319{
1320 struct xpc_fifo_entry_uv *entry;
1321 struct xpc_notify_mq_msg_uv *msg;
1322 void *payload = NULL;
1323
1324 if (!(ch->flags & XPC_C_DISCONNECTING)) {
1325 entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
1326 if (entry != NULL) {
1327 msg = container_of(entry, struct xpc_notify_mq_msg_uv,
1328 hdr.u.next);
1329 payload = &msg->payload;
1330 }
1331 }
1332 return payload;
1333}
1334
1335static void
1336xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
1337{
1338 struct xpc_notify_mq_msg_uv *msg;
1339 enum xp_retval ret;
1340
1341 msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
1342
1343 /* return an ACK to the sender of this message */
1344
1345 msg->hdr.partid = xp_partition_id;
1346 msg->hdr.size = 0; /* size of zero indicates this is an ACK */
1347
1348 ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg,
1349 sizeof(struct xpc_notify_mq_msghdr_uv));
1350 if (ret != xpSuccess)
1351 XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1352
1353 msg->hdr.msg_slot_number += ch->remote_nentries;
1354}
1355
1356int
1357xpc_init_uv(void)
1358{
1359 xpc_setup_partitions_sn = xpc_setup_partitions_sn_uv;
1360 xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv;
1361 xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv;
1362 xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv;
1363 xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
1364 xpc_offline_heartbeat = xpc_offline_heartbeat_uv;
1365 xpc_online_heartbeat = xpc_online_heartbeat_uv;
1366 xpc_heartbeat_init = xpc_heartbeat_init_uv;
1367 xpc_heartbeat_exit = xpc_heartbeat_exit_uv;
1368 xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_uv;
1369
1370 xpc_request_partition_activation = xpc_request_partition_activation_uv;
1371 xpc_request_partition_reactivation =
1372 xpc_request_partition_reactivation_uv;
1373 xpc_request_partition_deactivation =
1374 xpc_request_partition_deactivation_uv;
1375 xpc_cancel_partition_deactivation_request =
1376 xpc_cancel_partition_deactivation_request_uv;
1377
1378 xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_uv;
1379 xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_uv;
1380
1381 xpc_make_first_contact = xpc_make_first_contact_uv;
1382
1383 xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_uv;
1384 xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_uv;
1385 xpc_send_chctl_closereply = xpc_send_chctl_closereply_uv;
1386 xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_uv;
1387 xpc_send_chctl_openreply = xpc_send_chctl_openreply_uv;
1388
1389 xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv;
1390
1391 xpc_setup_msg_structures = xpc_setup_msg_structures_uv;
1392 xpc_teardown_msg_structures = xpc_teardown_msg_structures_uv;
1393
1394 xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_uv;
1395 xpc_indicate_partition_disengaged =
1396 xpc_indicate_partition_disengaged_uv;
1397 xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_uv;
1398 xpc_partition_engaged = xpc_partition_engaged_uv;
1399 xpc_any_partition_engaged = xpc_any_partition_engaged_uv;
1400
1401 xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv;
1402 xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv;
1403 xpc_send_payload = xpc_send_payload_uv;
1404 xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv;
1405 xpc_get_deliverable_payload = xpc_get_deliverable_payload_uv;
1406 xpc_received_payload = xpc_received_payload_uv;
1407
1408 if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
1409 dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
1410 XPC_MSG_HDR_MAX_SIZE);
1411 return -E2BIG;
1412 }
1413
1414 /* ??? The cpuid argument's value is 0, is that what we want? */
1415 /* !!! The irq argument's value isn't correct. */
1416 xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, 0,
1417 xpc_handle_activate_IRQ_uv);
1418 if (xpc_activate_mq_uv == NULL)
1419 return -ENOMEM;
1420
1421 /* ??? The cpuid argument's value is 0, is that what we want? */
1422 /* !!! The irq argument's value isn't correct. */
1423 xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, 0,
1424 xpc_handle_notify_IRQ_uv);
1425 if (xpc_notify_mq_uv == NULL) {
1426 /* !!! The irq argument's value isn't correct. */
1427 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv,
1428 XPC_ACTIVATE_MQ_SIZE_UV, 0);
1429 return -ENOMEM;
1430 }
1431
1432 return 0;
1433}
1434
1435void
1436xpc_exit_uv(void)
1437{
1438 /* !!! The irq argument's value isn't correct. */
1439 xpc_destroy_gru_mq_uv(xpc_notify_mq_uv, XPC_NOTIFY_MQ_SIZE_UV, 0);
1440
1441 /* !!! The irq argument's value isn't correct. */
1442 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv, XPC_ACTIVATE_MQ_SIZE_UV, 0);
1443}
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 822dc8e8d7f0..71513b3af708 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -21,21 +21,8 @@
21 */ 21 */
22 22
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/types.h>
25#include <linux/kernel.h>
26#include <linux/init.h>
27#include <linux/ioport.h>
28#include <linux/netdevice.h> 24#include <linux/netdevice.h>
29#include <linux/etherdevice.h> 25#include <linux/etherdevice.h>
30#include <linux/delay.h>
31#include <linux/ethtool.h>
32#include <linux/mii.h>
33#include <linux/smp.h>
34#include <linux/string.h>
35#include <asm/sn/bte.h>
36#include <asm/sn/io.h>
37#include <asm/sn/sn_sal.h>
38#include <asm/atomic.h>
39#include "xp.h" 26#include "xp.h"
40 27
41/* 28/*
@@ -57,7 +44,7 @@ struct xpnet_message {
57 u16 version; /* Version for this message */ 44 u16 version; /* Version for this message */
58 u16 embedded_bytes; /* #of bytes embedded in XPC message */ 45 u16 embedded_bytes; /* #of bytes embedded in XPC message */
59 u32 magic; /* Special number indicating this is xpnet */ 46 u32 magic; /* Special number indicating this is xpnet */
60 u64 buf_pa; /* phys address of buffer to retrieve */ 47 unsigned long buf_pa; /* phys address of buffer to retrieve */
61 u32 size; /* #of bytes in buffer */ 48 u32 size; /* #of bytes in buffer */
62 u8 leadin_ignore; /* #of bytes to ignore at the beginning */ 49 u8 leadin_ignore; /* #of bytes to ignore at the beginning */
63 u8 tailout_ignore; /* #of bytes to ignore at the end */ 50 u8 tailout_ignore; /* #of bytes to ignore at the end */
@@ -70,11 +57,10 @@ struct xpnet_message {
70 * 57 *
71 * XPC expects each message to exist in an individual cacheline. 58 * XPC expects each message to exist in an individual cacheline.
72 */ 59 */
73#define XPNET_MSG_SIZE (L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET) 60#define XPNET_MSG_SIZE XPC_MSG_PAYLOAD_MAX_SIZE
74#define XPNET_MSG_DATA_MAX \ 61#define XPNET_MSG_DATA_MAX \
75 (XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data)) 62 (XPNET_MSG_SIZE - offsetof(struct xpnet_message, data))
76#define XPNET_MSG_ALIGNED_SIZE (L1_CACHE_ALIGN(XPNET_MSG_SIZE)) 63#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPC_MSG_MAX_SIZE)
77#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE)
78 64
79#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1) 65#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1)
80#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1) 66#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1)
@@ -105,7 +91,6 @@ struct xpnet_message {
105 * then be released. 91 * then be released.
106 */ 92 */
107struct xpnet_pending_msg { 93struct xpnet_pending_msg {
108 struct list_head free_list;
109 struct sk_buff *skb; 94 struct sk_buff *skb;
110 atomic_t use_count; 95 atomic_t use_count;
111}; 96};
@@ -121,7 +106,7 @@ struct net_device *xpnet_device;
121 * When we are notified of other partitions activating, we add them to 106 * When we are notified of other partitions activating, we add them to
122 * our bitmask of partitions to which we broadcast. 107 * our bitmask of partitions to which we broadcast.
123 */ 108 */
124static u64 xpnet_broadcast_partitions; 109static unsigned long *xpnet_broadcast_partitions;
125/* protect above */ 110/* protect above */
126static DEFINE_SPINLOCK(xpnet_broadcast_lock); 111static DEFINE_SPINLOCK(xpnet_broadcast_lock);
127 112
@@ -141,16 +126,13 @@ static DEFINE_SPINLOCK(xpnet_broadcast_lock);
141#define XPNET_DEF_MTU (0x8000UL) 126#define XPNET_DEF_MTU (0x8000UL)
142 127
143/* 128/*
144 * The partition id is encapsulated in the MAC address. The following 129 * The partid is encapsulated in the MAC address beginning in the following
145 * define locates the octet the partid is in. 130 * octet and it consists of two octets.
146 */ 131 */
147#define XPNET_PARTID_OCTET 1 132#define XPNET_PARTID_OCTET 2
148#define XPNET_LICENSE_OCTET 2 133
134/* Define the XPNET debug device structures to be used with dev_dbg() et al */
149 135
150/*
151 * Define the XPNET debug device structure that is to be used with dev_dbg(),
152 * dev_err(), dev_warn(), and dev_info().
153 */
154struct device_driver xpnet_dbg_name = { 136struct device_driver xpnet_dbg_name = {
155 .name = "xpnet" 137 .name = "xpnet"
156}; 138};
@@ -169,7 +151,8 @@ static void
169xpnet_receive(short partid, int channel, struct xpnet_message *msg) 151xpnet_receive(short partid, int channel, struct xpnet_message *msg)
170{ 152{
171 struct sk_buff *skb; 153 struct sk_buff *skb;
172 bte_result_t bret; 154 void *dst;
155 enum xp_retval ret;
173 struct xpnet_dev_private *priv = 156 struct xpnet_dev_private *priv =
174 (struct xpnet_dev_private *)xpnet_device->priv; 157 (struct xpnet_dev_private *)xpnet_device->priv;
175 158
@@ -201,7 +184,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
201 184
202 /* 185 /*
203 * The allocated skb has some reserved space. 186 * The allocated skb has some reserved space.
204 * In order to use bte_copy, we need to get the 187 * In order to use xp_remote_memcpy(), we need to get the
205 * skb->data pointer moved forward. 188 * skb->data pointer moved forward.
206 */ 189 */
207 skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data & 190 skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
@@ -226,26 +209,21 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
226 skb_copy_to_linear_data(skb, &msg->data, 209 skb_copy_to_linear_data(skb, &msg->data,
227 (size_t)msg->embedded_bytes); 210 (size_t)msg->embedded_bytes);
228 } else { 211 } else {
212 dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1));
229 dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t" 213 dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
230 "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa, 214 "xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst,
231 (void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), 215 (void *)msg->buf_pa, msg->size);
232 msg->size);
233
234 bret = bte_copy(msg->buf_pa,
235 __pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
236 msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
237 216
238 if (bret != BTE_SUCCESS) { 217 ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size);
218 if (ret != xpSuccess) {
239 /* 219 /*
240 * >>> Need better way of cleaning skb. Currently skb 220 * !!! Need better way of cleaning skb. Currently skb
241 * >>> appears in_use and we can't just call 221 * !!! appears in_use and we can't just call
242 * >>> dev_kfree_skb. 222 * !!! dev_kfree_skb.
243 */ 223 */
244 dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned " 224 dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) "
245 "error=0x%x\n", (void *)msg->buf_pa, 225 "returned error=0x%x\n", dst,
246 (void *)__pa((u64)skb->data & 226 (void *)msg->buf_pa, msg->size, ret);
247 ~(L1_CACHE_BYTES - 1)),
248 msg->size, bret);
249 227
250 xpc_received(partid, channel, (void *)msg); 228 xpc_received(partid, channel, (void *)msg);
251 229
@@ -285,9 +263,7 @@ static void
285xpnet_connection_activity(enum xp_retval reason, short partid, int channel, 263xpnet_connection_activity(enum xp_retval reason, short partid, int channel,
286 void *data, void *key) 264 void *data, void *key)
287{ 265{
288 long bp; 266 DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
289
290 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
291 DBUG_ON(channel != XPC_NET_CHANNEL); 267 DBUG_ON(channel != XPC_NET_CHANNEL);
292 268
293 switch (reason) { 269 switch (reason) {
@@ -299,31 +275,28 @@ xpnet_connection_activity(enum xp_retval reason, short partid, int channel,
299 275
300 case xpConnected: /* connection completed to a partition */ 276 case xpConnected: /* connection completed to a partition */
301 spin_lock_bh(&xpnet_broadcast_lock); 277 spin_lock_bh(&xpnet_broadcast_lock);
302 xpnet_broadcast_partitions |= 1UL << (partid - 1); 278 __set_bit(partid, xpnet_broadcast_partitions);
303 bp = xpnet_broadcast_partitions;
304 spin_unlock_bh(&xpnet_broadcast_lock); 279 spin_unlock_bh(&xpnet_broadcast_lock);
305 280
306 netif_carrier_on(xpnet_device); 281 netif_carrier_on(xpnet_device);
307 282
308 dev_dbg(xpnet, "%s connection created to partition %d; " 283 dev_dbg(xpnet, "%s connected to partition %d\n",
309 "xpnet_broadcast_partitions=0x%lx\n", 284 xpnet_device->name, partid);
310 xpnet_device->name, partid, bp);
311 break; 285 break;
312 286
313 default: 287 default:
314 spin_lock_bh(&xpnet_broadcast_lock); 288 spin_lock_bh(&xpnet_broadcast_lock);
315 xpnet_broadcast_partitions &= ~(1UL << (partid - 1)); 289 __clear_bit(partid, xpnet_broadcast_partitions);
316 bp = xpnet_broadcast_partitions;
317 spin_unlock_bh(&xpnet_broadcast_lock); 290 spin_unlock_bh(&xpnet_broadcast_lock);
318 291
319 if (bp == 0) 292 if (bitmap_empty((unsigned long *)xpnet_broadcast_partitions,
293 xp_max_npartitions)) {
320 netif_carrier_off(xpnet_device); 294 netif_carrier_off(xpnet_device);
295 }
321 296
322 dev_dbg(xpnet, "%s disconnected from partition %d; " 297 dev_dbg(xpnet, "%s disconnected from partition %d\n",
323 "xpnet_broadcast_partitions=0x%lx\n", 298 xpnet_device->name, partid);
324 xpnet_device->name, partid, bp);
325 break; 299 break;
326
327 } 300 }
328} 301}
329 302
@@ -334,8 +307,10 @@ xpnet_dev_open(struct net_device *dev)
334 307
335 dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, " 308 dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, "
336 "%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity, 309 "%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
337 XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS, 310 (unsigned long)XPNET_MSG_SIZE,
338 XPNET_MAX_IDLE_KTHREADS); 311 (unsigned long)XPNET_MSG_NENTRIES,
312 (unsigned long)XPNET_MAX_KTHREADS,
313 (unsigned long)XPNET_MAX_IDLE_KTHREADS);
339 314
340 ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL, 315 ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
341 XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, 316 XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
@@ -426,35 +401,74 @@ xpnet_send_completed(enum xp_retval reason, short partid, int channel,
426 } 401 }
427} 402}
428 403
404static void
405xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
406 u64 start_addr, u64 end_addr, u16 embedded_bytes, int dest_partid)
407{
408 u8 msg_buffer[XPNET_MSG_SIZE];
409 struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer;
410 u16 msg_size = sizeof(struct xpnet_message);
411 enum xp_retval ret;
412
413 msg->embedded_bytes = embedded_bytes;
414 if (unlikely(embedded_bytes != 0)) {
415 msg->version = XPNET_VERSION_EMBED;
416 dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
417 &msg->data, skb->data, (size_t)embedded_bytes);
418 skb_copy_from_linear_data(skb, &msg->data,
419 (size_t)embedded_bytes);
420 msg_size += embedded_bytes - 1;
421 } else {
422 msg->version = XPNET_VERSION;
423 }
424 msg->magic = XPNET_MAGIC;
425 msg->size = end_addr - start_addr;
426 msg->leadin_ignore = (u64)skb->data - start_addr;
427 msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
428 msg->buf_pa = xp_pa((void *)start_addr);
429
430 dev_dbg(xpnet, "sending XPC message to %d:%d\n"
431 KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, "
432 "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
433 dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
434 msg->leadin_ignore, msg->tailout_ignore);
435
436 atomic_inc(&queued_msg->use_count);
437
438 ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg,
439 msg_size, xpnet_send_completed, queued_msg);
440 if (unlikely(ret != xpSuccess))
441 atomic_dec(&queued_msg->use_count);
442}
443
429/* 444/*
430 * Network layer has formatted a packet (skb) and is ready to place it 445 * Network layer has formatted a packet (skb) and is ready to place it
431 * "on the wire". Prepare and send an xpnet_message to all partitions 446 * "on the wire". Prepare and send an xpnet_message to all partitions
432 * which have connected with us and are targets of this packet. 447 * which have connected with us and are targets of this packet.
433 * 448 *
434 * MAC-NOTE: For the XPNET driver, the MAC address contains the 449 * MAC-NOTE: For the XPNET driver, the MAC address contains the
435 * destination partition_id. If the destination partition id word 450 * destination partid. If the destination partid octets are 0xffff,
436 * is 0xff, this packet is to broadcast to all partitions. 451 * this packet is to be broadcast to all connected partitions.
437 */ 452 */
438static int 453static int
439xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) 454xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
440{ 455{
441 struct xpnet_pending_msg *queued_msg; 456 struct xpnet_pending_msg *queued_msg;
442 enum xp_retval ret;
443 struct xpnet_message *msg;
444 u64 start_addr, end_addr; 457 u64 start_addr, end_addr;
445 long dp;
446 u8 second_mac_octet;
447 short dest_partid; 458 short dest_partid;
448 struct xpnet_dev_private *priv; 459 struct xpnet_dev_private *priv = (struct xpnet_dev_private *)dev->priv;
449 u16 embedded_bytes; 460 u16 embedded_bytes = 0;
450
451 priv = (struct xpnet_dev_private *)dev->priv;
452 461
453 dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p " 462 dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
454 "skb->end=0x%p skb->len=%d\n", (void *)skb->head, 463 "skb->end=0x%p skb->len=%d\n", (void *)skb->head,
455 (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), 464 (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
456 skb->len); 465 skb->len);
457 466
467 if (skb->data[0] == 0x33) {
468 dev_kfree_skb(skb);
469 return 0; /* nothing needed to be done */
470 }
471
458 /* 472 /*
459 * The xpnet_pending_msg tracks how many outstanding 473 * The xpnet_pending_msg tracks how many outstanding
460 * xpc_send_notifies are relying on this skb. When none 474 * xpc_send_notifies are relying on this skb. When none
@@ -466,7 +480,6 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
466 "packet\n", sizeof(struct xpnet_pending_msg)); 480 "packet\n", sizeof(struct xpnet_pending_msg));
467 481
468 priv->stats.tx_errors++; 482 priv->stats.tx_errors++;
469
470 return -ENOMEM; 483 return -ENOMEM;
471 } 484 }
472 485
@@ -475,7 +488,6 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
475 end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb)); 488 end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
476 489
477 /* calculate how many bytes to embed in the XPC message */ 490 /* calculate how many bytes to embed in the XPC message */
478 embedded_bytes = 0;
479 if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) { 491 if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
480 /* skb->data does fit so embed */ 492 /* skb->data does fit so embed */
481 embedded_bytes = skb->len; 493 embedded_bytes = skb->len;
@@ -491,82 +503,28 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
491 atomic_set(&queued_msg->use_count, 1); 503 atomic_set(&queued_msg->use_count, 1);
492 queued_msg->skb = skb; 504 queued_msg->skb = skb;
493 505
494 second_mac_octet = skb->data[XPNET_PARTID_OCTET]; 506 if (skb->data[0] == 0xff) {
495 if (second_mac_octet == 0xff) {
496 /* we are being asked to broadcast to all partitions */ 507 /* we are being asked to broadcast to all partitions */
497 dp = xpnet_broadcast_partitions; 508 for_each_bit(dest_partid, xpnet_broadcast_partitions,
498 } else if (second_mac_octet != 0) { 509 xp_max_npartitions) {
499 dp = xpnet_broadcast_partitions &
500 (1UL << (second_mac_octet - 1));
501 } else {
502 /* 0 is an invalid partid. Ignore */
503 dp = 0;
504 }
505 dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp);
506
507 /*
508 * If we wanted to allow promiscuous mode to work like an
509 * unswitched network, this would be a good point to OR in a
510 * mask of partitions which should be receiving all packets.
511 */
512
513 /*
514 * Main send loop.
515 */
516 for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS;
517 dest_partid++) {
518 510
519 if (!(dp & (1UL << (dest_partid - 1)))) { 511 xpnet_send(skb, queued_msg, start_addr, end_addr,
520 /* not destined for this partition */ 512 embedded_bytes, dest_partid);
521 continue;
522 } 513 }
514 } else {
515 dest_partid = (short)skb->data[XPNET_PARTID_OCTET + 1];
516 dest_partid |= (short)skb->data[XPNET_PARTID_OCTET + 0] << 8;
523 517
524 /* remove this partition from the destinations mask */ 518 if (dest_partid >= 0 &&
525 dp &= ~(1UL << (dest_partid - 1)); 519 dest_partid < xp_max_npartitions &&
526 520 test_bit(dest_partid, xpnet_broadcast_partitions) != 0) {
527 /* found a partition to send to */ 521
528 522 xpnet_send(skb, queued_msg, start_addr, end_addr,
529 ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL, 523 embedded_bytes, dest_partid);
530 XPC_NOWAIT, (void **)&msg);
531 if (unlikely(ret != xpSuccess))
532 continue;
533
534 msg->embedded_bytes = embedded_bytes;
535 if (unlikely(embedded_bytes != 0)) {
536 msg->version = XPNET_VERSION_EMBED;
537 dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
538 &msg->data, skb->data, (size_t)embedded_bytes);
539 skb_copy_from_linear_data(skb, &msg->data,
540 (size_t)embedded_bytes);
541 } else {
542 msg->version = XPNET_VERSION;
543 }
544 msg->magic = XPNET_MAGIC;
545 msg->size = end_addr - start_addr;
546 msg->leadin_ignore = (u64)skb->data - start_addr;
547 msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
548 msg->buf_pa = __pa(start_addr);
549
550 dev_dbg(xpnet, "sending XPC message to %d:%d\n"
551 KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, "
552 "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
553 dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
554 msg->leadin_ignore, msg->tailout_ignore);
555
556 atomic_inc(&queued_msg->use_count);
557
558 ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg,
559 xpnet_send_completed, queued_msg);
560 if (unlikely(ret != xpSuccess)) {
561 atomic_dec(&queued_msg->use_count);
562 continue;
563 } 524 }
564 } 525 }
565 526
566 if (atomic_dec_return(&queued_msg->use_count) == 0) { 527 if (atomic_dec_return(&queued_msg->use_count) == 0) {
567 dev_dbg(xpnet, "no partitions to receive packet destined for "
568 "%d\n", dest_partid);
569
570 dev_kfree_skb(skb); 528 dev_kfree_skb(skb);
571 kfree(queued_msg); 529 kfree(queued_msg);
572 } 530 }
@@ -594,23 +552,28 @@ xpnet_dev_tx_timeout(struct net_device *dev)
594static int __init 552static int __init
595xpnet_init(void) 553xpnet_init(void)
596{ 554{
597 int i; 555 int result;
598 u32 license_num;
599 int result = -ENOMEM;
600 556
601 if (!ia64_platform_is("sn2")) 557 if (!is_shub() && !is_uv())
602 return -ENODEV; 558 return -ENODEV;
603 559
604 dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME); 560 dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
605 561
562 xpnet_broadcast_partitions = kzalloc(BITS_TO_LONGS(xp_max_npartitions) *
563 sizeof(long), GFP_KERNEL);
564 if (xpnet_broadcast_partitions == NULL)
565 return -ENOMEM;
566
606 /* 567 /*
607 * use ether_setup() to init the majority of our device 568 * use ether_setup() to init the majority of our device
608 * structure and then override the necessary pieces. 569 * structure and then override the necessary pieces.
609 */ 570 */
610 xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private), 571 xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private),
611 XPNET_DEVICE_NAME, ether_setup); 572 XPNET_DEVICE_NAME, ether_setup);
612 if (xpnet_device == NULL) 573 if (xpnet_device == NULL) {
574 kfree(xpnet_broadcast_partitions);
613 return -ENOMEM; 575 return -ENOMEM;
576 }
614 577
615 netif_carrier_off(xpnet_device); 578 netif_carrier_off(xpnet_device);
616 579
@@ -628,14 +591,10 @@ xpnet_init(void)
628 * MAC addresses. We chose the first octet of the MAC to be unlikely 591 * MAC addresses. We chose the first octet of the MAC to be unlikely
629 * to collide with any vendor's officially issued MAC. 592 * to collide with any vendor's officially issued MAC.
630 */ 593 */
631 xpnet_device->dev_addr[0] = 0xfe; 594 xpnet_device->dev_addr[0] = 0x02; /* locally administered, no OUI */
632 xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id; 595
633 license_num = sn_partition_serial_number_val(); 596 xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = xp_partition_id;
634 for (i = 3; i >= 0; i--) { 597 xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8);
635 xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] =
636 license_num & 0xff;
637 license_num = license_num >> 8;
638 }
639 598
640 /* 599 /*
641 * ether_setup() sets this to a multicast device. We are 600 * ether_setup() sets this to a multicast device. We are
@@ -651,8 +610,10 @@ xpnet_init(void)
651 xpnet_device->features = NETIF_F_NO_CSUM; 610 xpnet_device->features = NETIF_F_NO_CSUM;
652 611
653 result = register_netdev(xpnet_device); 612 result = register_netdev(xpnet_device);
654 if (result != 0) 613 if (result != 0) {
655 free_netdev(xpnet_device); 614 free_netdev(xpnet_device);
615 kfree(xpnet_broadcast_partitions);
616 }
656 617
657 return result; 618 return result;
658} 619}
@@ -666,8 +627,8 @@ xpnet_exit(void)
666 xpnet_device[0].name); 627 xpnet_device[0].name);
667 628
668 unregister_netdev(xpnet_device); 629 unregister_netdev(xpnet_device);
669
670 free_netdev(xpnet_device); 630 free_netdev(xpnet_device);
631 kfree(xpnet_broadcast_partitions);
671} 632}
672 633
673module_exit(xpnet_exit); 634module_exit(xpnet_exit);
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 66e5a5487c20..86dbb366415a 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -213,7 +213,8 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
213 struct mmc_blk_data *md = mq->data; 213 struct mmc_blk_data *md = mq->data;
214 struct mmc_card *card = md->queue.card; 214 struct mmc_card *card = md->queue.card;
215 struct mmc_blk_request brq; 215 struct mmc_blk_request brq;
216 int ret = 1, sg_pos, data_size; 216 int ret = 1, data_size, i;
217 struct scatterlist *sg;
217 218
218 mmc_claim_host(card->host); 219 mmc_claim_host(card->host);
219 220
@@ -267,18 +268,22 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
267 268
268 mmc_queue_bounce_pre(mq); 269 mmc_queue_bounce_pre(mq);
269 270
271 /*
272 * Adjust the sg list so it is the same size as the
273 * request.
274 */
270 if (brq.data.blocks != 275 if (brq.data.blocks !=
271 (req->nr_sectors >> (md->block_bits - 9))) { 276 (req->nr_sectors >> (md->block_bits - 9))) {
272 data_size = brq.data.blocks * brq.data.blksz; 277 data_size = brq.data.blocks * brq.data.blksz;
273 for (sg_pos = 0; sg_pos < brq.data.sg_len; sg_pos++) { 278 for_each_sg(brq.data.sg, sg, brq.data.sg_len, i) {
274 data_size -= mq->sg[sg_pos].length; 279 data_size -= sg->length;
275 if (data_size <= 0) { 280 if (data_size <= 0) {
276 mq->sg[sg_pos].length += data_size; 281 sg->length += data_size;
277 sg_pos++; 282 i++;
278 break; 283 break;
279 } 284 }
280 } 285 }
281 brq.data.sg_len = sg_pos; 286 brq.data.sg_len = i;
282 } 287 }
283 288
284 mmc_wait_for_req(card->host, &brq.mrq); 289 mmc_wait_for_req(card->host, &brq.mrq);
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index a067fe436301..f26b01d811ae 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -388,16 +388,14 @@ static int mmc_test_transfer(struct mmc_test_card *test,
388 int ret, i; 388 int ret, i;
389 unsigned long flags; 389 unsigned long flags;
390 390
391 BUG_ON(blocks * blksz > BUFFER_SIZE);
392
393 if (write) { 391 if (write) {
394 for (i = 0;i < blocks * blksz;i++) 392 for (i = 0;i < blocks * blksz;i++)
395 test->scratch[i] = i; 393 test->scratch[i] = i;
396 } else { 394 } else {
397 memset(test->scratch, 0, blocks * blksz); 395 memset(test->scratch, 0, BUFFER_SIZE);
398 } 396 }
399 local_irq_save(flags); 397 local_irq_save(flags);
400 sg_copy_from_buffer(sg, sg_len, test->scratch, blocks * blksz); 398 sg_copy_from_buffer(sg, sg_len, test->scratch, BUFFER_SIZE);
401 local_irq_restore(flags); 399 local_irq_restore(flags);
402 400
403 ret = mmc_test_set_blksize(test, blksz); 401 ret = mmc_test_set_blksize(test, blksz);
@@ -444,7 +442,7 @@ static int mmc_test_transfer(struct mmc_test_card *test,
444 } 442 }
445 } else { 443 } else {
446 local_irq_save(flags); 444 local_irq_save(flags);
447 sg_copy_to_buffer(sg, sg_len, test->scratch, blocks * blksz); 445 sg_copy_to_buffer(sg, sg_len, test->scratch, BUFFER_SIZE);
448 local_irq_restore(flags); 446 local_irq_restore(flags);
449 for (i = 0;i < blocks * blksz;i++) { 447 for (i = 0;i < blocks * blksz;i++) {
450 if (test->scratch[i] != (u8)i) 448 if (test->scratch[i] != (u8)i)
@@ -805,69 +803,6 @@ static int mmc_test_multi_xfersize_read(struct mmc_test_card *test)
805 return 0; 803 return 0;
806} 804}
807 805
808static int mmc_test_bigsg_write(struct mmc_test_card *test)
809{
810 int ret;
811 unsigned int size;
812 struct scatterlist sg;
813
814 if (test->card->host->max_blk_count == 1)
815 return RESULT_UNSUP_HOST;
816
817 size = PAGE_SIZE * 2;
818 size = min(size, test->card->host->max_req_size);
819 size = min(size, test->card->host->max_seg_size);
820 size = min(size, test->card->host->max_blk_count * 512);
821
822 memset(test->buffer, 0, BUFFER_SIZE);
823
824 if (size < 1024)
825 return RESULT_UNSUP_HOST;
826
827 sg_init_table(&sg, 1);
828 sg_init_one(&sg, test->buffer, BUFFER_SIZE);
829
830 ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 1);
831 if (ret)
832 return ret;
833
834 return 0;
835}
836
837static int mmc_test_bigsg_read(struct mmc_test_card *test)
838{
839 int ret, i;
840 unsigned int size;
841 struct scatterlist sg;
842
843 if (test->card->host->max_blk_count == 1)
844 return RESULT_UNSUP_HOST;
845
846 size = PAGE_SIZE * 2;
847 size = min(size, test->card->host->max_req_size);
848 size = min(size, test->card->host->max_seg_size);
849 size = min(size, test->card->host->max_blk_count * 512);
850
851 if (size < 1024)
852 return RESULT_UNSUP_HOST;
853
854 memset(test->buffer, 0xCD, BUFFER_SIZE);
855
856 sg_init_table(&sg, 1);
857 sg_init_one(&sg, test->buffer, BUFFER_SIZE);
858 ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 0);
859 if (ret)
860 return ret;
861
862 /* mmc_test_transfer() doesn't check for read overflows */
863 for (i = size;i < BUFFER_SIZE;i++) {
864 if (test->buffer[i] != 0xCD)
865 return RESULT_FAIL;
866 }
867
868 return 0;
869}
870
871#ifdef CONFIG_HIGHMEM 806#ifdef CONFIG_HIGHMEM
872 807
873static int mmc_test_write_high(struct mmc_test_card *test) 808static int mmc_test_write_high(struct mmc_test_card *test)
@@ -1071,20 +1006,6 @@ static const struct mmc_test_case mmc_test_cases[] = {
1071 .run = mmc_test_multi_xfersize_read, 1006 .run = mmc_test_multi_xfersize_read,
1072 }, 1007 },
1073 1008
1074 {
1075 .name = "Over-sized SG list write",
1076 .prepare = mmc_test_prepare_write,
1077 .run = mmc_test_bigsg_write,
1078 .cleanup = mmc_test_cleanup,
1079 },
1080
1081 {
1082 .name = "Over-sized SG list read",
1083 .prepare = mmc_test_prepare_read,
1084 .run = mmc_test_bigsg_read,
1085 .cleanup = mmc_test_cleanup,
1086 },
1087
1088#ifdef CONFIG_HIGHMEM 1009#ifdef CONFIG_HIGHMEM
1089 1010
1090 { 1011 {
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 3ee5b8c3b5ce..044d84eeed7c 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -121,6 +121,7 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
121{ 121{
122#ifdef CONFIG_MMC_DEBUG 122#ifdef CONFIG_MMC_DEBUG
123 unsigned int i, sz; 123 unsigned int i, sz;
124 struct scatterlist *sg;
124#endif 125#endif
125 126
126 pr_debug("%s: starting CMD%u arg %08x flags %08x\n", 127 pr_debug("%s: starting CMD%u arg %08x flags %08x\n",
@@ -156,8 +157,8 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
156 157
157#ifdef CONFIG_MMC_DEBUG 158#ifdef CONFIG_MMC_DEBUG
158 sz = 0; 159 sz = 0;
159 for (i = 0;i < mrq->data->sg_len;i++) 160 for_each_sg(mrq->data->sg, sg, mrq->data->sg_len, i)
160 sz += mrq->data->sg[i].length; 161 sz += sg->length;
161 BUG_ON(sz != mrq->data->blocks * mrq->data->blksz); 162 BUG_ON(sz != mrq->data->blocks * mrq->data->blksz);
162#endif 163#endif
163 164
diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c
index 99b20917cc0f..d3f55615c099 100644
--- a/drivers/mmc/host/au1xmmc.c
+++ b/drivers/mmc/host/au1xmmc.c
@@ -61,7 +61,13 @@
61 61
62/* Hardware definitions */ 62/* Hardware definitions */
63#define AU1XMMC_DESCRIPTOR_COUNT 1 63#define AU1XMMC_DESCRIPTOR_COUNT 1
64#define AU1XMMC_DESCRIPTOR_SIZE 2048 64
65/* max DMA seg size: 64KB on Au1100, 4MB on Au1200 */
66#ifdef CONFIG_SOC_AU1100
67#define AU1XMMC_DESCRIPTOR_SIZE 0x0000ffff
68#else /* Au1200 */
69#define AU1XMMC_DESCRIPTOR_SIZE 0x003fffff
70#endif
65 71
66#define AU1XMMC_OCR (MMC_VDD_27_28 | MMC_VDD_28_29 | MMC_VDD_29_30 | \ 72#define AU1XMMC_OCR (MMC_VDD_27_28 | MMC_VDD_28_29 | MMC_VDD_29_30 | \
67 MMC_VDD_30_31 | MMC_VDD_31_32 | MMC_VDD_32_33 | \ 73 MMC_VDD_30_31 | MMC_VDD_31_32 | MMC_VDD_32_33 | \
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index deb607c52c0d..fcb14c2346cc 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -143,7 +143,8 @@ static int jmicron_probe(struct sdhci_pci_chip *chip)
143 chip->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR | 143 chip->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR |
144 SDHCI_QUIRK_32BIT_DMA_SIZE | 144 SDHCI_QUIRK_32BIT_DMA_SIZE |
145 SDHCI_QUIRK_32BIT_ADMA_SIZE | 145 SDHCI_QUIRK_32BIT_ADMA_SIZE |
146 SDHCI_QUIRK_RESET_AFTER_REQUEST; 146 SDHCI_QUIRK_RESET_AFTER_REQUEST |
147 SDHCI_QUIRK_BROKEN_SMALL_PIO;
147 } 148 }
148 149
149 /* 150 /*
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 5f95e10229b5..e3a8133560a2 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -278,6 +278,15 @@ static void sdhci_transfer_pio(struct sdhci_host *host)
278 else 278 else
279 mask = SDHCI_SPACE_AVAILABLE; 279 mask = SDHCI_SPACE_AVAILABLE;
280 280
281 /*
282 * Some controllers (JMicron JMB38x) mess up the buffer bits
283 * for transfers < 4 bytes. As long as it is just one block,
284 * we can ignore the bits.
285 */
286 if ((host->quirks & SDHCI_QUIRK_BROKEN_SMALL_PIO) &&
287 (host->data->blocks == 1))
288 mask = ~0;
289
281 while (readl(host->ioaddr + SDHCI_PRESENT_STATE) & mask) { 290 while (readl(host->ioaddr + SDHCI_PRESENT_STATE) & mask) {
282 if (host->data->flags & MMC_DATA_READ) 291 if (host->data->flags & MMC_DATA_READ)
283 sdhci_read_block_pio(host); 292 sdhci_read_block_pio(host);
@@ -439,7 +448,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
439 448
440 host->adma_addr = dma_map_single(mmc_dev(host->mmc), 449 host->adma_addr = dma_map_single(mmc_dev(host->mmc),
441 host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE); 450 host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE);
442 if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr)) 451 if (dma_mapping_error(mmc_dev(host->mmc), host->adma_addr))
443 goto unmap_entries; 452 goto unmap_entries;
444 BUG_ON(host->adma_addr & 0x3); 453 BUG_ON(host->adma_addr & 0x3);
445 454
@@ -645,7 +654,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data)
645 * us an invalid request. 654 * us an invalid request.
646 */ 655 */
647 WARN_ON(1); 656 WARN_ON(1);
648 host->flags &= ~SDHCI_USE_DMA; 657 host->flags &= ~SDHCI_REQ_USE_DMA;
649 } else { 658 } else {
650 writel(host->adma_addr, 659 writel(host->adma_addr,
651 host->ioaddr + SDHCI_ADMA_ADDRESS); 660 host->ioaddr + SDHCI_ADMA_ADDRESS);
@@ -664,7 +673,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data)
664 * us an invalid request. 673 * us an invalid request.
665 */ 674 */
666 WARN_ON(1); 675 WARN_ON(1);
667 host->flags &= ~SDHCI_USE_DMA; 676 host->flags &= ~SDHCI_REQ_USE_DMA;
668 } else { 677 } else {
669 WARN_ON(sg_cnt != 1); 678 WARN_ON(sg_cnt != 1);
670 writel(sg_dma_address(data->sg), 679 writel(sg_dma_address(data->sg),
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index e354faee5df0..197d4a05f4ae 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -206,6 +206,8 @@ struct sdhci_host {
206#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<11) 206#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<11)
207/* Controller provides an incorrect timeout value for transfers */ 207/* Controller provides an incorrect timeout value for transfers */
208#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1<<12) 208#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1<<12)
209/* Controller has an issue with buffer bits for small transfers */
210#define SDHCI_QUIRK_BROKEN_SMALL_PIO (1<<13)
209 211
210 int irq; /* Device IRQ */ 212 int irq; /* Device IRQ */
211 void __iomem * ioaddr; /* Mapped address */ 213 void __iomem * ioaddr; /* Mapped address */
diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index 28cc6787a800..9b6af7e74a65 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -125,7 +125,7 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
125 int (*fill_super)(struct super_block *, void *, int), 125 int (*fill_super)(struct super_block *, void *, int),
126 struct vfsmount *mnt) 126 struct vfsmount *mnt)
127{ 127{
128 struct nameidata nd; 128 struct block_device *bdev;
129 int mtdnr, ret; 129 int mtdnr, ret;
130 130
131 if (!dev_name) 131 if (!dev_name)
@@ -181,29 +181,20 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
181 /* try the old way - the hack where we allowed users to mount 181 /* try the old way - the hack where we allowed users to mount
182 * /dev/mtdblock$(n) but didn't actually _use_ the blockdev 182 * /dev/mtdblock$(n) but didn't actually _use_ the blockdev
183 */ 183 */
184 ret = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); 184 bdev = lookup_bdev(dev_name);
185 185 if (IS_ERR(bdev)) {
186 DEBUG(1, "MTDSB: path_lookup() returned %d, inode %p\n", 186 ret = PTR_ERR(bdev);
187 ret, nd.path.dentry ? nd.path.dentry->d_inode : NULL); 187 DEBUG(1, "MTDSB: lookup_bdev() returned %d\n", ret);
188
189 if (ret)
190 return ret; 188 return ret;
191
192 ret = -EINVAL;
193
194 if (!S_ISBLK(nd.path.dentry->d_inode->i_mode))
195 goto out;
196
197 if (nd.path.mnt->mnt_flags & MNT_NODEV) {
198 ret = -EACCES;
199 goto out;
200 } 189 }
190 DEBUG(1, "MTDSB: lookup_bdev() returned 0\n");
201 191
202 if (imajor(nd.path.dentry->d_inode) != MTD_BLOCK_MAJOR) 192 ret = -EINVAL;
193 if (MAJOR(bdev->bd_dev) != MTD_BLOCK_MAJOR)
203 goto not_an_MTD_device; 194 goto not_an_MTD_device;
204 195
205 mtdnr = iminor(nd.path.dentry->d_inode); 196 mtdnr = MINOR(bdev->bd_dev);
206 path_put(&nd.path); 197 bdput(bdev);
207 198
208 return get_sb_mtd_nr(fs_type, flags, dev_name, data, mtdnr, fill_super, 199 return get_sb_mtd_nr(fs_type, flags, dev_name, data, mtdnr, fill_super,
209 mnt); 200 mnt);
@@ -213,10 +204,8 @@ not_an_MTD_device:
213 printk(KERN_NOTICE 204 printk(KERN_NOTICE
214 "MTD: Attempt to mount non-MTD device \"%s\"\n", 205 "MTD: Attempt to mount non-MTD device \"%s\"\n",
215 dev_name); 206 dev_name);
216out: 207 bdput(bdev);
217 path_put(&nd.path);
218 return ret; 208 return ret;
219
220} 209}
221 210
222EXPORT_SYMBOL_GPL(get_sb_mtd); 211EXPORT_SYMBOL_GPL(get_sb_mtd);
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 8ee7d7bb951b..e4765b713aba 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -6417,7 +6417,7 @@ static int niu_ethflow_to_class(int flow_type, u64 *class)
6417 *class = CLASS_CODE_SCTP_IPV6; 6417 *class = CLASS_CODE_SCTP_IPV6;
6418 break; 6418 break;
6419 default: 6419 default:
6420 return -1; 6420 return 0;
6421 } 6421 }
6422 6422
6423 return 1; 6423 return 1;
diff --git a/drivers/net/ps3_gelic_wireless.c b/drivers/net/ps3_gelic_wireless.c
index 6b2dee0cf3a9..a834b52a6a2c 100644
--- a/drivers/net/ps3_gelic_wireless.c
+++ b/drivers/net/ps3_gelic_wireless.c
@@ -1024,7 +1024,7 @@ static int gelic_wl_set_encode(struct net_device *netdev,
1024 struct gelic_wl_info *wl = port_wl(netdev_priv(netdev)); 1024 struct gelic_wl_info *wl = port_wl(netdev_priv(netdev));
1025 struct iw_point *enc = &data->encoding; 1025 struct iw_point *enc = &data->encoding;
1026 __u16 flags; 1026 __u16 flags;
1027 unsigned int irqflag; 1027 unsigned long irqflag;
1028 int key_index, index_specified; 1028 int key_index, index_specified;
1029 int ret = 0; 1029 int ret = 0;
1030 1030
@@ -1097,7 +1097,7 @@ static int gelic_wl_get_encode(struct net_device *netdev,
1097{ 1097{
1098 struct gelic_wl_info *wl = port_wl(netdev_priv(netdev)); 1098 struct gelic_wl_info *wl = port_wl(netdev_priv(netdev));
1099 struct iw_point *enc = &data->encoding; 1099 struct iw_point *enc = &data->encoding;
1100 unsigned int irqflag; 1100 unsigned long irqflag;
1101 unsigned int key_index, index_specified; 1101 unsigned int key_index, index_specified;
1102 int ret = 0; 1102 int ret = 0;
1103 1103
@@ -1215,7 +1215,7 @@ static int gelic_wl_set_encodeext(struct net_device *netdev,
1215 struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; 1215 struct iw_encode_ext *ext = (struct iw_encode_ext *)extra;
1216 __u16 alg; 1216 __u16 alg;
1217 __u16 flags; 1217 __u16 flags;
1218 unsigned int irqflag; 1218 unsigned long irqflag;
1219 int key_index; 1219 int key_index;
1220 int ret = 0; 1220 int ret = 0;
1221 1221
@@ -1303,7 +1303,7 @@ static int gelic_wl_get_encodeext(struct net_device *netdev,
1303 struct gelic_wl_info *wl = port_wl(netdev_priv(netdev)); 1303 struct gelic_wl_info *wl = port_wl(netdev_priv(netdev));
1304 struct iw_point *enc = &data->encoding; 1304 struct iw_point *enc = &data->encoding;
1305 struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; 1305 struct iw_encode_ext *ext = (struct iw_encode_ext *)extra;
1306 unsigned int irqflag; 1306 unsigned long irqflag;
1307 int key_index; 1307 int key_index;
1308 int ret = 0; 1308 int ret = 0;
1309 int max_key_len; 1309 int max_key_len;
@@ -1426,7 +1426,7 @@ static int gelic_wl_priv_set_psk(struct net_device *net_dev,
1426{ 1426{
1427 struct gelic_wl_info *wl = port_wl(netdev_priv(net_dev)); 1427 struct gelic_wl_info *wl = port_wl(netdev_priv(net_dev));
1428 unsigned int len; 1428 unsigned int len;
1429 unsigned int irqflag; 1429 unsigned long irqflag;
1430 int ret = 0; 1430 int ret = 0;
1431 1431
1432 pr_debug("%s:<- len=%d\n", __func__, data->data.length); 1432 pr_debug("%s:<- len=%d\n", __func__, data->data.length);
@@ -1467,7 +1467,7 @@ static int gelic_wl_priv_get_psk(struct net_device *net_dev,
1467{ 1467{
1468 struct gelic_wl_info *wl = port_wl(netdev_priv(net_dev)); 1468 struct gelic_wl_info *wl = port_wl(netdev_priv(net_dev));
1469 char *p; 1469 char *p;
1470 unsigned int irqflag; 1470 unsigned long irqflag;
1471 unsigned int i; 1471 unsigned int i;
1472 1472
1473 pr_debug("%s:<-\n", __func__); 1473 pr_debug("%s:<-\n", __func__);
diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index d9769c527346..ff3fad794b61 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -43,7 +43,9 @@
43#include <linux/version.h> 43#include <linux/version.h>
44#include <linux/module.h> 44#include <linux/module.h>
45#include <linux/delay.h> 45#include <linux/delay.h>
46#include <linux/hardirq.h>
46#include <linux/if.h> 47#include <linux/if.h>
48#include <linux/io.h>
47#include <linux/netdevice.h> 49#include <linux/netdevice.h>
48#include <linux/cache.h> 50#include <linux/cache.h>
49#include <linux/pci.h> 51#include <linux/pci.h>
@@ -471,9 +473,6 @@ ath5k_pci_probe(struct pci_dev *pdev,
471 /* Set private data */ 473 /* Set private data */
472 pci_set_drvdata(pdev, hw); 474 pci_set_drvdata(pdev, hw);
473 475
474 /* Enable msi for devices that support it */
475 pci_enable_msi(pdev);
476
477 /* Setup interrupt handler */ 476 /* Setup interrupt handler */
478 ret = request_irq(pdev->irq, ath5k_intr, IRQF_SHARED, "ath", sc); 477 ret = request_irq(pdev->irq, ath5k_intr, IRQF_SHARED, "ath", sc);
479 if (ret) { 478 if (ret) {
@@ -551,7 +550,6 @@ err_ah:
551err_irq: 550err_irq:
552 free_irq(pdev->irq, sc); 551 free_irq(pdev->irq, sc);
553err_free: 552err_free:
554 pci_disable_msi(pdev);
555 ieee80211_free_hw(hw); 553 ieee80211_free_hw(hw);
556err_map: 554err_map:
557 pci_iounmap(pdev, mem); 555 pci_iounmap(pdev, mem);
@@ -573,7 +571,6 @@ ath5k_pci_remove(struct pci_dev *pdev)
573 ath5k_detach(pdev, hw); 571 ath5k_detach(pdev, hw);
574 ath5k_hw_detach(sc->ah); 572 ath5k_hw_detach(sc->ah);
575 free_irq(pdev->irq, sc); 573 free_irq(pdev->irq, sc);
576 pci_disable_msi(pdev);
577 pci_iounmap(pdev, sc->iobase); 574 pci_iounmap(pdev, sc->iobase);
578 pci_release_region(pdev, 0); 575 pci_release_region(pdev, 0);
579 pci_disable_device(pdev); 576 pci_disable_device(pdev);
@@ -590,6 +587,9 @@ ath5k_pci_suspend(struct pci_dev *pdev, pm_message_t state)
590 ath5k_led_off(sc); 587 ath5k_led_off(sc);
591 588
592 ath5k_stop_hw(sc); 589 ath5k_stop_hw(sc);
590
591 free_irq(pdev->irq, sc);
592 pci_disable_msi(pdev);
593 pci_save_state(pdev); 593 pci_save_state(pdev);
594 pci_disable_device(pdev); 594 pci_disable_device(pdev);
595 pci_set_power_state(pdev, PCI_D3hot); 595 pci_set_power_state(pdev, PCI_D3hot);
@@ -605,15 +605,12 @@ ath5k_pci_resume(struct pci_dev *pdev)
605 struct ath5k_hw *ah = sc->ah; 605 struct ath5k_hw *ah = sc->ah;
606 int i, err; 606 int i, err;
607 607
608 err = pci_set_power_state(pdev, PCI_D0); 608 pci_restore_state(pdev);
609 if (err)
610 return err;
611 609
612 err = pci_enable_device(pdev); 610 err = pci_enable_device(pdev);
613 if (err) 611 if (err)
614 return err; 612 return err;
615 613
616 pci_restore_state(pdev);
617 /* 614 /*
618 * Suspend/Resume resets the PCI configuration space, so we have to 615 * Suspend/Resume resets the PCI configuration space, so we have to
619 * re-disable the RETRY_TIMEOUT register (0x41) to keep 616 * re-disable the RETRY_TIMEOUT register (0x41) to keep
@@ -621,7 +618,17 @@ ath5k_pci_resume(struct pci_dev *pdev)
621 */ 618 */
622 pci_write_config_byte(pdev, 0x41, 0); 619 pci_write_config_byte(pdev, 0x41, 0);
623 620
624 ath5k_init(sc); 621 pci_enable_msi(pdev);
622
623 err = request_irq(pdev->irq, ath5k_intr, IRQF_SHARED, "ath", sc);
624 if (err) {
625 ATH5K_ERR(sc, "request_irq failed\n");
626 goto err_msi;
627 }
628
629 err = ath5k_init(sc);
630 if (err)
631 goto err_irq;
625 ath5k_led_enable(sc); 632 ath5k_led_enable(sc);
626 633
627 /* 634 /*
@@ -635,6 +642,12 @@ ath5k_pci_resume(struct pci_dev *pdev)
635 ath5k_hw_reset_key(ah, i); 642 ath5k_hw_reset_key(ah, i);
636 643
637 return 0; 644 return 0;
645err_irq:
646 free_irq(pdev->irq, sc);
647err_msi:
648 pci_disable_msi(pdev);
649 pci_disable_device(pdev);
650 return err;
638} 651}
639#endif /* CONFIG_PM */ 652#endif /* CONFIG_PM */
640 653
@@ -1224,7 +1237,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
1224 1237
1225 pktlen = skb->len; 1238 pktlen = skb->len;
1226 1239
1227 if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) { 1240 if (info->control.hw_key) {
1228 keyidx = info->control.hw_key->hw_key_idx; 1241 keyidx = info->control.hw_key->hw_key_idx;
1229 pktlen += info->control.icv_len; 1242 pktlen += info->control.icv_len;
1230 } 1243 }
@@ -1249,6 +1262,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
1249 1262
1250 txq->link = &ds->ds_link; 1263 txq->link = &ds->ds_link;
1251 ath5k_hw_tx_start(ah, txq->qnum); 1264 ath5k_hw_tx_start(ah, txq->qnum);
1265 mmiowb();
1252 spin_unlock_bh(&txq->lock); 1266 spin_unlock_bh(&txq->lock);
1253 1267
1254 return 0; 1268 return 0;
@@ -1583,7 +1597,6 @@ ath5k_rx_stop(struct ath5k_softc *sc)
1583 ath5k_hw_stop_pcu_recv(ah); /* disable PCU */ 1597 ath5k_hw_stop_pcu_recv(ah); /* disable PCU */
1584 ath5k_hw_set_rx_filter(ah, 0); /* clear recv filter */ 1598 ath5k_hw_set_rx_filter(ah, 0); /* clear recv filter */
1585 ath5k_hw_stop_rx_dma(ah); /* disable DMA engine */ 1599 ath5k_hw_stop_rx_dma(ah); /* disable DMA engine */
1586 mdelay(3); /* 3ms is long enough for 1 frame */
1587 1600
1588 ath5k_debug_printrxbuffs(sc, ah); 1601 ath5k_debug_printrxbuffs(sc, ah);
1589 1602
@@ -1682,31 +1695,44 @@ ath5k_tasklet_rx(unsigned long data)
1682 struct ath5k_rx_status rs = {}; 1695 struct ath5k_rx_status rs = {};
1683 struct sk_buff *skb; 1696 struct sk_buff *skb;
1684 struct ath5k_softc *sc = (void *)data; 1697 struct ath5k_softc *sc = (void *)data;
1685 struct ath5k_buf *bf; 1698 struct ath5k_buf *bf, *bf_last;
1686 struct ath5k_desc *ds; 1699 struct ath5k_desc *ds;
1687 int ret; 1700 int ret;
1688 int hdrlen; 1701 int hdrlen;
1689 int pad; 1702 int pad;
1690 1703
1691 spin_lock(&sc->rxbuflock); 1704 spin_lock(&sc->rxbuflock);
1705 if (list_empty(&sc->rxbuf)) {
1706 ATH5K_WARN(sc, "empty rx buf pool\n");
1707 goto unlock;
1708 }
1709 bf_last = list_entry(sc->rxbuf.prev, struct ath5k_buf, list);
1692 do { 1710 do {
1693 rxs.flag = 0; 1711 rxs.flag = 0;
1694 1712
1695 if (unlikely(list_empty(&sc->rxbuf))) {
1696 ATH5K_WARN(sc, "empty rx buf pool\n");
1697 break;
1698 }
1699 bf = list_first_entry(&sc->rxbuf, struct ath5k_buf, list); 1713 bf = list_first_entry(&sc->rxbuf, struct ath5k_buf, list);
1700 BUG_ON(bf->skb == NULL); 1714 BUG_ON(bf->skb == NULL);
1701 skb = bf->skb; 1715 skb = bf->skb;
1702 ds = bf->desc; 1716 ds = bf->desc;
1703 1717
1704 /* TODO only one segment */ 1718 /*
1705 pci_dma_sync_single_for_cpu(sc->pdev, sc->desc_daddr, 1719 * last buffer must not be freed to ensure proper hardware
1706 sc->desc_len, PCI_DMA_FROMDEVICE); 1720 * function. When the hardware finishes also a packet next to
1707 1721 * it, we are sure, it doesn't use it anymore and we can go on.
1708 if (unlikely(ds->ds_link == bf->daddr)) /* this is the end */ 1722 */
1709 break; 1723 if (bf_last == bf)
1724 bf->flags |= 1;
1725 if (bf->flags) {
1726 struct ath5k_buf *bf_next = list_entry(bf->list.next,
1727 struct ath5k_buf, list);
1728 ret = sc->ah->ah_proc_rx_desc(sc->ah, bf_next->desc,
1729 &rs);
1730 if (ret)
1731 break;
1732 bf->flags &= ~1;
1733 /* skip the overwritten one (even status is martian) */
1734 goto next;
1735 }
1710 1736
1711 ret = sc->ah->ah_proc_rx_desc(sc->ah, ds, &rs); 1737 ret = sc->ah->ah_proc_rx_desc(sc->ah, ds, &rs);
1712 if (unlikely(ret == -EINPROGRESS)) 1738 if (unlikely(ret == -EINPROGRESS))
@@ -1752,8 +1778,6 @@ ath5k_tasklet_rx(unsigned long data)
1752 goto next; 1778 goto next;
1753 } 1779 }
1754accept: 1780accept:
1755 pci_dma_sync_single_for_cpu(sc->pdev, bf->skbaddr,
1756 rs.rs_datalen, PCI_DMA_FROMDEVICE);
1757 pci_unmap_single(sc->pdev, bf->skbaddr, sc->rxbufsize, 1781 pci_unmap_single(sc->pdev, bf->skbaddr, sc->rxbufsize,
1758 PCI_DMA_FROMDEVICE); 1782 PCI_DMA_FROMDEVICE);
1759 bf->skb = NULL; 1783 bf->skb = NULL;
@@ -1816,6 +1840,7 @@ accept:
1816next: 1840next:
1817 list_move_tail(&bf->list, &sc->rxbuf); 1841 list_move_tail(&bf->list, &sc->rxbuf);
1818 } while (ath5k_rxbuf_setup(sc, bf) == 0); 1842 } while (ath5k_rxbuf_setup(sc, bf) == 0);
1843unlock:
1819 spin_unlock(&sc->rxbuflock); 1844 spin_unlock(&sc->rxbuflock);
1820} 1845}
1821 1846
@@ -1840,9 +1865,6 @@ ath5k_tx_processq(struct ath5k_softc *sc, struct ath5k_txq *txq)
1840 list_for_each_entry_safe(bf, bf0, &txq->q, list) { 1865 list_for_each_entry_safe(bf, bf0, &txq->q, list) {
1841 ds = bf->desc; 1866 ds = bf->desc;
1842 1867
1843 /* TODO only one segment */
1844 pci_dma_sync_single_for_cpu(sc->pdev, sc->desc_daddr,
1845 sc->desc_len, PCI_DMA_FROMDEVICE);
1846 ret = sc->ah->ah_proc_tx_desc(sc->ah, ds, &ts); 1868 ret = sc->ah->ah_proc_tx_desc(sc->ah, ds, &ts);
1847 if (unlikely(ret == -EINPROGRESS)) 1869 if (unlikely(ret == -EINPROGRESS))
1848 break; 1870 break;
@@ -2015,8 +2037,6 @@ ath5k_beacon_send(struct ath5k_softc *sc)
2015 ATH5K_WARN(sc, "beacon queue %u didn't stop?\n", sc->bhalq); 2037 ATH5K_WARN(sc, "beacon queue %u didn't stop?\n", sc->bhalq);
2016 /* NB: hw still stops DMA, so proceed */ 2038 /* NB: hw still stops DMA, so proceed */
2017 } 2039 }
2018 pci_dma_sync_single_for_cpu(sc->pdev, bf->skbaddr, bf->skb->len,
2019 PCI_DMA_TODEVICE);
2020 2040
2021 ath5k_hw_put_tx_buf(ah, sc->bhalq, bf->daddr); 2041 ath5k_hw_put_tx_buf(ah, sc->bhalq, bf->daddr);
2022 ath5k_hw_tx_start(ah, sc->bhalq); 2042 ath5k_hw_tx_start(ah, sc->bhalq);
@@ -2240,6 +2260,7 @@ ath5k_init(struct ath5k_softc *sc)
2240 2260
2241 ret = 0; 2261 ret = 0;
2242done: 2262done:
2263 mmiowb();
2243 mutex_unlock(&sc->lock); 2264 mutex_unlock(&sc->lock);
2244 return ret; 2265 return ret;
2245} 2266}
@@ -2272,6 +2293,7 @@ ath5k_stop_locked(struct ath5k_softc *sc)
2272 if (!test_bit(ATH_STAT_INVALID, sc->status)) { 2293 if (!test_bit(ATH_STAT_INVALID, sc->status)) {
2273 ath5k_led_off(sc); 2294 ath5k_led_off(sc);
2274 ath5k_hw_set_intr(ah, 0); 2295 ath5k_hw_set_intr(ah, 0);
2296 synchronize_irq(sc->pdev->irq);
2275 } 2297 }
2276 ath5k_txq_cleanup(sc); 2298 ath5k_txq_cleanup(sc);
2277 if (!test_bit(ATH_STAT_INVALID, sc->status)) { 2299 if (!test_bit(ATH_STAT_INVALID, sc->status)) {
@@ -2321,9 +2343,13 @@ ath5k_stop_hw(struct ath5k_softc *sc)
2321 } 2343 }
2322 } 2344 }
2323 ath5k_txbuf_free(sc, sc->bbuf); 2345 ath5k_txbuf_free(sc, sc->bbuf);
2346 mmiowb();
2324 mutex_unlock(&sc->lock); 2347 mutex_unlock(&sc->lock);
2325 2348
2326 del_timer_sync(&sc->calib_tim); 2349 del_timer_sync(&sc->calib_tim);
2350 tasklet_kill(&sc->rxtq);
2351 tasklet_kill(&sc->txtq);
2352 tasklet_kill(&sc->restq);
2327 2353
2328 return ret; 2354 return ret;
2329} 2355}
@@ -2550,8 +2576,6 @@ ath5k_init_leds(struct ath5k_softc *sc)
2550 struct pci_dev *pdev = sc->pdev; 2576 struct pci_dev *pdev = sc->pdev;
2551 char name[ATH5K_LED_MAX_NAME_LEN + 1]; 2577 char name[ATH5K_LED_MAX_NAME_LEN + 1];
2552 2578
2553 sc->led_on = 0; /* active low */
2554
2555 /* 2579 /*
2556 * Auto-enable soft led processing for IBM cards and for 2580 * Auto-enable soft led processing for IBM cards and for
2557 * 5211 minipci cards. 2581 * 5211 minipci cards.
@@ -2560,11 +2584,13 @@ ath5k_init_leds(struct ath5k_softc *sc)
2560 pdev->device == PCI_DEVICE_ID_ATHEROS_AR5211) { 2584 pdev->device == PCI_DEVICE_ID_ATHEROS_AR5211) {
2561 __set_bit(ATH_STAT_LEDSOFT, sc->status); 2585 __set_bit(ATH_STAT_LEDSOFT, sc->status);
2562 sc->led_pin = 0; 2586 sc->led_pin = 0;
2587 sc->led_on = 0; /* active low */
2563 } 2588 }
2564 /* Enable softled on PIN1 on HP Compaq nc6xx, nc4000 & nx5000 laptops */ 2589 /* Enable softled on PIN1 on HP Compaq nc6xx, nc4000 & nx5000 laptops */
2565 if (pdev->subsystem_vendor == PCI_VENDOR_ID_COMPAQ) { 2590 if (pdev->subsystem_vendor == PCI_VENDOR_ID_COMPAQ) {
2566 __set_bit(ATH_STAT_LEDSOFT, sc->status); 2591 __set_bit(ATH_STAT_LEDSOFT, sc->status);
2567 sc->led_pin = 1; 2592 sc->led_pin = 1;
2593 sc->led_on = 1; /* active high */
2568 } 2594 }
2569 if (!test_bit(ATH_STAT_LEDSOFT, sc->status)) 2595 if (!test_bit(ATH_STAT_LEDSOFT, sc->status))
2570 goto out; 2596 goto out;
@@ -2783,6 +2809,7 @@ ath5k_config_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
2783 /* XXX: assoc id is set to 0 for now, mac80211 doesn't have 2809 /* XXX: assoc id is set to 0 for now, mac80211 doesn't have
2784 * a clean way of letting us retrieve this yet. */ 2810 * a clean way of letting us retrieve this yet. */
2785 ath5k_hw_set_associd(ah, ah->ah_bssid, 0); 2811 ath5k_hw_set_associd(ah, ah->ah_bssid, 0);
2812 mmiowb();
2786 } 2813 }
2787 2814
2788 if (conf->changed & IEEE80211_IFCC_BEACON && 2815 if (conf->changed & IEEE80211_IFCC_BEACON &&
@@ -2971,6 +2998,7 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
2971 } 2998 }
2972 2999
2973unlock: 3000unlock:
3001 mmiowb();
2974 mutex_unlock(&sc->lock); 3002 mutex_unlock(&sc->lock);
2975 return ret; 3003 return ret;
2976} 3004}
@@ -3032,8 +3060,6 @@ ath5k_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
3032 3060
3033 ath5k_debug_dump_skb(sc, skb, "BC ", 1); 3061 ath5k_debug_dump_skb(sc, skb, "BC ", 1);
3034 3062
3035 mutex_lock(&sc->lock);
3036
3037 if (sc->opmode != IEEE80211_IF_TYPE_IBSS) { 3063 if (sc->opmode != IEEE80211_IF_TYPE_IBSS) {
3038 ret = -EIO; 3064 ret = -EIO;
3039 goto end; 3065 goto end;
@@ -3044,11 +3070,12 @@ ath5k_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
3044 ret = ath5k_beacon_setup(sc, sc->bbuf); 3070 ret = ath5k_beacon_setup(sc, sc->bbuf);
3045 if (ret) 3071 if (ret)
3046 sc->bbuf->skb = NULL; 3072 sc->bbuf->skb = NULL;
3047 else 3073 else {
3048 ath5k_beacon_config(sc); 3074 ath5k_beacon_config(sc);
3075 mmiowb();
3076 }
3049 3077
3050end: 3078end:
3051 mutex_unlock(&sc->lock);
3052 return ret; 3079 return ret;
3053} 3080}
3054 3081
diff --git a/drivers/net/wireless/ath5k/base.h b/drivers/net/wireless/ath5k/base.h
index 47f414b09e67..d7e03e6b8271 100644
--- a/drivers/net/wireless/ath5k/base.h
+++ b/drivers/net/wireless/ath5k/base.h
@@ -56,7 +56,7 @@
56 56
57struct ath5k_buf { 57struct ath5k_buf {
58 struct list_head list; 58 struct list_head list;
59 unsigned int flags; /* tx descriptor flags */ 59 unsigned int flags; /* rx descriptor flags */
60 struct ath5k_desc *desc; /* virtual addr of desc */ 60 struct ath5k_desc *desc; /* virtual addr of desc */
61 dma_addr_t daddr; /* physical addr of desc */ 61 dma_addr_t daddr; /* physical addr of desc */
62 struct sk_buff *skb; /* skbuff for buf */ 62 struct sk_buff *skb; /* skbuff for buf */
diff --git a/drivers/net/wireless/ath5k/hw.c b/drivers/net/wireless/ath5k/hw.c
index c6d12c53bda4..7ca87a557312 100644
--- a/drivers/net/wireless/ath5k/hw.c
+++ b/drivers/net/wireless/ath5k/hw.c
@@ -1440,6 +1440,7 @@ int ath5k_hw_stop_tx_dma(struct ath5k_hw *ah, unsigned int queue)
1440 1440
1441 /* Stop queue */ 1441 /* Stop queue */
1442 ath5k_hw_reg_write(ah, tx_queue, AR5K_CR); 1442 ath5k_hw_reg_write(ah, tx_queue, AR5K_CR);
1443 ath5k_hw_reg_read(ah, AR5K_CR);
1443 } else { 1444 } else {
1444 /* 1445 /*
1445 * Schedule TX disable and wait until queue is empty 1446 * Schedule TX disable and wait until queue is empty
@@ -1456,6 +1457,8 @@ int ath5k_hw_stop_tx_dma(struct ath5k_hw *ah, unsigned int queue)
1456 1457
1457 /* Clear register */ 1458 /* Clear register */
1458 ath5k_hw_reg_write(ah, 0, AR5K_QCU_TXD); 1459 ath5k_hw_reg_write(ah, 0, AR5K_QCU_TXD);
1460 if (pending)
1461 return -EBUSY;
1459 } 1462 }
1460 1463
1461 /* TODO: Check for success else return error */ 1464 /* TODO: Check for success else return error */
@@ -1716,6 +1719,7 @@ enum ath5k_int ath5k_hw_set_intr(struct ath5k_hw *ah, enum ath5k_int new_mask)
1716 1719
1717 /* ..re-enable interrupts */ 1720 /* ..re-enable interrupts */
1718 ath5k_hw_reg_write(ah, AR5K_IER_ENABLE, AR5K_IER); 1721 ath5k_hw_reg_write(ah, AR5K_IER_ENABLE, AR5K_IER);
1722 ath5k_hw_reg_read(ah, AR5K_IER);
1719 1723
1720 return old_mask; 1724 return old_mask;
1721} 1725}
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index e78319aa47c1..3bf3a869361f 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -4645,8 +4645,7 @@ static int b43_wireless_init(struct ssb_device *dev)
4645 } 4645 }
4646 4646
4647 /* fill hw info */ 4647 /* fill hw info */
4648 hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE | 4648 hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
4649 IEEE80211_HW_RX_INCLUDES_FCS |
4650 IEEE80211_HW_SIGNAL_DBM | 4649 IEEE80211_HW_SIGNAL_DBM |
4651 IEEE80211_HW_NOISE_DBM; 4650 IEEE80211_HW_NOISE_DBM;
4652 4651
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 8d54502222a6..9dda8169f7cc 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -192,7 +192,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
192 const struct b43_phy *phy = &dev->phy; 192 const struct b43_phy *phy = &dev->phy;
193 const struct ieee80211_hdr *wlhdr = 193 const struct ieee80211_hdr *wlhdr =
194 (const struct ieee80211_hdr *)fragment_data; 194 (const struct ieee80211_hdr *)fragment_data;
195 int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)); 195 int use_encryption = !!info->control.hw_key;
196 __le16 fctl = wlhdr->frame_control; 196 __le16 fctl = wlhdr->frame_control;
197 struct ieee80211_rate *fbrate; 197 struct ieee80211_rate *fbrate;
198 u8 rate, rate_fb; 198 u8 rate, rate_fb;
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index a1b8bf3ee732..2541c81932f0 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -3702,8 +3702,7 @@ static int b43legacy_wireless_init(struct ssb_device *dev)
3702 } 3702 }
3703 3703
3704 /* fill hw info */ 3704 /* fill hw info */
3705 hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE | 3705 hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
3706 IEEE80211_HW_RX_INCLUDES_FCS |
3707 IEEE80211_HW_SIGNAL_DBM | 3706 IEEE80211_HW_SIGNAL_DBM |
3708 IEEE80211_HW_NOISE_DBM; 3707 IEEE80211_HW_NOISE_DBM;
3709 hw->queues = 1; /* FIXME: hardware has more queues */ 3708 hw->queues = 1; /* FIXME: hardware has more queues */
@@ -3846,10 +3845,10 @@ static int b43legacy_resume(struct ssb_device *dev)
3846 goto out; 3845 goto out;
3847 } 3846 }
3848 } 3847 }
3849 mutex_unlock(&wl->mutex);
3850 3848
3851 b43legacydbg(wl, "Device resumed.\n"); 3849 b43legacydbg(wl, "Device resumed.\n");
3852out: 3850out:
3851 mutex_unlock(&wl->mutex);
3853 return err; 3852 return err;
3854} 3853}
3855 3854
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index e969ed8d412d..68e1f8c78727 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -192,7 +192,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
192 u16 cookie) 192 u16 cookie)
193{ 193{
194 const struct ieee80211_hdr *wlhdr; 194 const struct ieee80211_hdr *wlhdr;
195 int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)); 195 int use_encryption = !!info->control.hw_key;
196 u16 fctl; 196 u16 fctl;
197 u8 rate; 197 u8 rate;
198 struct ieee80211_rate *rate_fb; 198 struct ieee80211_rate *rate_fb;
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index 5bf9e00b070c..c6f886ec08a3 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -6442,6 +6442,7 @@ static int ipw2100_resume(struct pci_dev *pci_dev)
6442 if (err) { 6442 if (err) {
6443 printk(KERN_ERR "%s: pci_enable_device failed on resume\n", 6443 printk(KERN_ERR "%s: pci_enable_device failed on resume\n",
6444 dev->name); 6444 dev->name);
6445 mutex_unlock(&priv->action_mutex);
6445 return err; 6446 return err;
6446 } 6447 }
6447 pci_restore_state(pci_dev); 6448 pci_restore_state(pci_dev);
@@ -7146,7 +7147,7 @@ static int ipw2100_wx_get_rate(struct net_device *dev,
7146 err = ipw2100_get_ordinal(priv, IPW_ORD_CURRENT_TX_RATE, &val, &len); 7147 err = ipw2100_get_ordinal(priv, IPW_ORD_CURRENT_TX_RATE, &val, &len);
7147 if (err) { 7148 if (err) {
7148 IPW_DEBUG_WX("failed querying ordinals.\n"); 7149 IPW_DEBUG_WX("failed querying ordinals.\n");
7149 return err; 7150 goto done;
7150 } 7151 }
7151 7152
7152 switch (val & TX_RATE_MASK) { 7153 switch (val & TX_RATE_MASK) {
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index c2a76785b665..a51e0eaa1334 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -630,7 +630,9 @@ static void iwl3945_pass_packet_to_mac80211(struct iwl3945_priv *priv,
630 struct ieee80211_rx_status *stats) 630 struct ieee80211_rx_status *stats)
631{ 631{
632 struct iwl3945_rx_packet *pkt = (struct iwl3945_rx_packet *)rxb->skb->data; 632 struct iwl3945_rx_packet *pkt = (struct iwl3945_rx_packet *)rxb->skb->data;
633#ifdef CONFIG_IWL3945_LEDS
633 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)IWL_RX_DATA(pkt); 634 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)IWL_RX_DATA(pkt);
635#endif
634 struct iwl3945_rx_frame_hdr *rx_hdr = IWL_RX_HDR(pkt); 636 struct iwl3945_rx_frame_hdr *rx_hdr = IWL_RX_HDR(pkt);
635 struct iwl3945_rx_frame_end *rx_end = IWL_RX_END(pkt); 637 struct iwl3945_rx_frame_end *rx_end = IWL_RX_END(pkt);
636 short len = le16_to_cpu(rx_hdr->len); 638 short len = le16_to_cpu(rx_hdr->len);
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index a44188bf4459..e3427c205ccf 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -818,8 +818,7 @@ int iwl_setup_mac(struct iwl_priv *priv)
818 hw->rate_control_algorithm = "iwl-4965-rs"; 818 hw->rate_control_algorithm = "iwl-4965-rs";
819 819
820 /* Tell mac80211 our characteristics */ 820 /* Tell mac80211 our characteristics */
821 hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE | 821 hw->flags = IEEE80211_HW_SIGNAL_DBM |
822 IEEE80211_HW_SIGNAL_DBM |
823 IEEE80211_HW_NOISE_DBM; 822 IEEE80211_HW_NOISE_DBM;
824 /* Default value; 4 EDCA QOS priorities */ 823 /* Default value; 4 EDCA QOS priorities */
825 hw->queues = 4; 824 hw->queues = 4;
diff --git a/drivers/net/wireless/iwlwifi/iwl-debug.h b/drivers/net/wireless/iwlwifi/iwl-debug.h
index 58384805a494..d6d729e86bdb 100644
--- a/drivers/net/wireless/iwlwifi/iwl-debug.h
+++ b/drivers/net/wireless/iwlwifi/iwl-debug.h
@@ -68,12 +68,8 @@ void iwl_dbgfs_unregister(struct iwl_priv *priv);
68#endif 68#endif
69 69
70#else 70#else
71static inline void IWL_DEBUG(int level, const char *fmt, ...) 71#define IWL_DEBUG(level, fmt, args...)
72{ 72#define IWL_DEBUG_LIMIT(level, fmt, args...)
73}
74static inline void IWL_DEBUG_LIMIT(int level, const char *fmt, ...)
75{
76}
77#endif /* CONFIG_IWLWIFI_DEBUG */ 73#endif /* CONFIG_IWLWIFI_DEBUG */
78 74
79 75
diff --git a/drivers/net/wireless/iwlwifi/iwl-led.c b/drivers/net/wireless/iwlwifi/iwl-led.c
index 899d7a2567a8..61250e6a7d1b 100644
--- a/drivers/net/wireless/iwlwifi/iwl-led.c
+++ b/drivers/net/wireless/iwlwifi/iwl-led.c
@@ -268,7 +268,9 @@ static int iwl_get_blink_rate(struct iwl_priv *priv)
268 if (tpt < 0) /* wrapparound */ 268 if (tpt < 0) /* wrapparound */
269 tpt = -tpt; 269 tpt = -tpt;
270 270
271 IWL_DEBUG_LED("tpt %lld current_tpt %lld\n", tpt, current_tpt); 271 IWL_DEBUG_LED("tpt %lld current_tpt %llu\n",
272 (long long)tpt,
273 (unsigned long long)current_tpt);
272 priv->led_tpt = current_tpt; 274 priv->led_tpt = current_tpt;
273 275
274 if (!priv->allow_blinking) 276 if (!priv->allow_blinking)
diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c
index efc750d2fc5c..5a00ac23e2d0 100644
--- a/drivers/net/wireless/iwlwifi/iwl-scan.c
+++ b/drivers/net/wireless/iwlwifi/iwl-scan.c
@@ -270,6 +270,7 @@ static void iwl_rx_scan_results_notif(struct iwl_priv *priv,
270static void iwl_rx_scan_complete_notif(struct iwl_priv *priv, 270static void iwl_rx_scan_complete_notif(struct iwl_priv *priv,
271 struct iwl_rx_mem_buffer *rxb) 271 struct iwl_rx_mem_buffer *rxb)
272{ 272{
273#ifdef CONFIG_IWLWIFI_DEBUG
273 struct iwl_rx_packet *pkt = (struct iwl_rx_packet *)rxb->skb->data; 274 struct iwl_rx_packet *pkt = (struct iwl_rx_packet *)rxb->skb->data;
274 struct iwl_scancomplete_notification *scan_notif = (void *)pkt->u.raw; 275 struct iwl_scancomplete_notification *scan_notif = (void *)pkt->u.raw;
275 276
@@ -277,6 +278,7 @@ static void iwl_rx_scan_complete_notif(struct iwl_priv *priv,
277 scan_notif->scanned_channels, 278 scan_notif->scanned_channels,
278 scan_notif->tsf_low, 279 scan_notif->tsf_low,
279 scan_notif->tsf_high, scan_notif->status); 280 scan_notif->tsf_high, scan_notif->status);
281#endif
280 282
281 /* The HW is no longer scanning */ 283 /* The HW is no longer scanning */
282 clear_bit(STATUS_SCAN_HW, &priv->status); 284 clear_bit(STATUS_SCAN_HW, &priv->status);
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index 9b50b1052b09..f72cd0bf6aa3 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -906,7 +906,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
906 * first entry */ 906 * first entry */
907 iwl_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len); 907 iwl_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len);
908 908
909 if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) 909 if (info->control.hw_key)
910 iwl_tx_cmd_build_hwcrypto(priv, info, tx_cmd, skb, sta_id); 910 iwl_tx_cmd_build_hwcrypto(priv, info, tx_cmd, skb, sta_id);
911 911
912 /* Set up TFD's 2nd entry to point directly to remainder of skb, 912 /* Set up TFD's 2nd entry to point directly to remainder of skb,
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 4a22d3fba75b..7c82ecfa30a4 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -2667,7 +2667,7 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv, struct sk_buff *skb)
2667 * first entry */ 2667 * first entry */
2668 iwl3945_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len); 2668 iwl3945_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len);
2669 2669
2670 if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) 2670 if (info->control.hw_key)
2671 iwl3945_build_tx_cmd_hwcrypto(priv, info, out_cmd, skb, 0); 2671 iwl3945_build_tx_cmd_hwcrypto(priv, info, out_cmd, skb, 0);
2672 2672
2673 /* Set up TFD's 2nd entry to point directly to remainder of skb, 2673 /* Set up TFD's 2nd entry to point directly to remainder of skb,
@@ -7899,8 +7899,7 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e
7899 priv->ibss_beacon = NULL; 7899 priv->ibss_beacon = NULL;
7900 7900
7901 /* Tell mac80211 our characteristics */ 7901 /* Tell mac80211 our characteristics */
7902 hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE | 7902 hw->flags = IEEE80211_HW_SIGNAL_DBM |
7903 IEEE80211_HW_SIGNAL_DBM |
7904 IEEE80211_HW_NOISE_DBM; 7903 IEEE80211_HW_NOISE_DBM;
7905 7904
7906 /* 4 EDCA QOS priorities */ 7905 /* 4 EDCA QOS priorities */
diff --git a/drivers/net/wireless/libertas/persistcfg.c b/drivers/net/wireless/libertas/persistcfg.c
index 6d0ff8decaf7..3309a9c3cfef 100644
--- a/drivers/net/wireless/libertas/persistcfg.c
+++ b/drivers/net/wireless/libertas/persistcfg.c
@@ -48,7 +48,7 @@ static ssize_t bootflag_get(struct device *dev,
48 if (ret) 48 if (ret)
49 return ret; 49 return ret;
50 50
51 return snprintf(buf, 12, "0x%x\n", le32_to_cpu(defs.bootflag)); 51 return snprintf(buf, 12, "%d\n", le32_to_cpu(defs.bootflag));
52} 52}
53 53
54/** 54/**
@@ -63,8 +63,8 @@ static ssize_t bootflag_set(struct device *dev, struct device_attribute *attr,
63 int ret; 63 int ret;
64 64
65 memset(&cmd, 0, sizeof(cmd)); 65 memset(&cmd, 0, sizeof(cmd));
66 ret = sscanf(buf, "%x", &datum); 66 ret = sscanf(buf, "%d", &datum);
67 if (ret != 1) 67 if ((ret != 1) || (datum > 1))
68 return -EINVAL; 68 return -EINVAL;
69 69
70 *((__le32 *)&cmd.data[0]) = cpu_to_le32(!!datum); 70 *((__le32 *)&cmd.data[0]) = cpu_to_le32(!!datum);
@@ -91,7 +91,7 @@ static ssize_t boottime_get(struct device *dev,
91 if (ret) 91 if (ret)
92 return ret; 92 return ret;
93 93
94 return snprintf(buf, 12, "0x%x\n", defs.boottime); 94 return snprintf(buf, 12, "%d\n", defs.boottime);
95} 95}
96 96
97/** 97/**
@@ -106,8 +106,8 @@ static ssize_t boottime_set(struct device *dev,
106 int ret; 106 int ret;
107 107
108 memset(&cmd, 0, sizeof(cmd)); 108 memset(&cmd, 0, sizeof(cmd));
109 ret = sscanf(buf, "%x", &datum); 109 ret = sscanf(buf, "%d", &datum);
110 if (ret != 1) 110 if ((ret != 1) || (datum > 255))
111 return -EINVAL; 111 return -EINVAL;
112 112
113 /* A too small boot time will result in the device booting into 113 /* A too small boot time will result in the device booting into
@@ -143,7 +143,7 @@ static ssize_t channel_get(struct device *dev,
143 if (ret) 143 if (ret)
144 return ret; 144 return ret;
145 145
146 return snprintf(buf, 12, "0x%x\n", le16_to_cpu(defs.channel)); 146 return snprintf(buf, 12, "%d\n", le16_to_cpu(defs.channel));
147} 147}
148 148
149/** 149/**
@@ -154,11 +154,11 @@ static ssize_t channel_set(struct device *dev, struct device_attribute *attr,
154{ 154{
155 struct lbs_private *priv = to_net_dev(dev)->priv; 155 struct lbs_private *priv = to_net_dev(dev)->priv;
156 struct cmd_ds_mesh_config cmd; 156 struct cmd_ds_mesh_config cmd;
157 uint16_t datum; 157 uint32_t datum;
158 int ret; 158 int ret;
159 159
160 memset(&cmd, 0, sizeof(cmd)); 160 memset(&cmd, 0, sizeof(cmd));
161 ret = sscanf(buf, "%hx", &datum); 161 ret = sscanf(buf, "%d", &datum);
162 if (ret != 1 || datum < 1 || datum > 11) 162 if (ret != 1 || datum < 1 || datum > 11)
163 return -EINVAL; 163 return -EINVAL;
164 164
@@ -274,8 +274,8 @@ static ssize_t protocol_id_set(struct device *dev,
274 int ret; 274 int ret;
275 275
276 memset(&cmd, 0, sizeof(cmd)); 276 memset(&cmd, 0, sizeof(cmd));
277 ret = sscanf(buf, "%x", &datum); 277 ret = sscanf(buf, "%d", &datum);
278 if (ret != 1) 278 if ((ret != 1) || (datum > 255))
279 return -EINVAL; 279 return -EINVAL;
280 280
281 /* fetch all other Information Element parameters */ 281 /* fetch all other Information Element parameters */
@@ -328,8 +328,8 @@ static ssize_t metric_id_set(struct device *dev, struct device_attribute *attr,
328 int ret; 328 int ret;
329 329
330 memset(&cmd, 0, sizeof(cmd)); 330 memset(&cmd, 0, sizeof(cmd));
331 ret = sscanf(buf, "%x", &datum); 331 ret = sscanf(buf, "%d", &datum);
332 if (ret != 1) 332 if ((ret != 1) || (datum > 255))
333 return -EINVAL; 333 return -EINVAL;
334 334
335 /* fetch all other Information Element parameters */ 335 /* fetch all other Information Element parameters */
@@ -382,8 +382,8 @@ static ssize_t capability_set(struct device *dev, struct device_attribute *attr,
382 int ret; 382 int ret;
383 383
384 memset(&cmd, 0, sizeof(cmd)); 384 memset(&cmd, 0, sizeof(cmd));
385 ret = sscanf(buf, "%x", &datum); 385 ret = sscanf(buf, "%d", &datum);
386 if (ret != 1) 386 if ((ret != 1) || (datum > 255))
387 return -EINVAL; 387 return -EINVAL;
388 388
389 /* fetch all other Information Element parameters */ 389 /* fetch all other Information Element parameters */
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 5816230d58f8..248d31a7aa33 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -500,7 +500,7 @@ failed_hw:
500 device_unregister(data->dev); 500 device_unregister(data->dev);
501failed_drvdata: 501failed_drvdata:
502 ieee80211_free_hw(hw); 502 ieee80211_free_hw(hw);
503 hwsim_radios[i] = 0; 503 hwsim_radios[i] = NULL;
504failed: 504failed:
505 mac80211_hwsim_free(); 505 mac80211_hwsim_free();
506 return err; 506 return err;
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
index 3558cb210747..3078417b326b 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/rt2x00/rt2500usb.c
@@ -1121,6 +1121,7 @@ static void rt2500usb_write_beacon(struct queue_entry *entry)
1121 int pipe = usb_sndbulkpipe(usb_dev, 1); 1121 int pipe = usb_sndbulkpipe(usb_dev, 1);
1122 int length; 1122 int length;
1123 u16 reg; 1123 u16 reg;
1124 u32 word, len;
1124 1125
1125 /* 1126 /*
1126 * Add the descriptor in front of the skb. 1127 * Add the descriptor in front of the skb.
@@ -1130,6 +1131,17 @@ static void rt2500usb_write_beacon(struct queue_entry *entry)
1130 skbdesc->desc = entry->skb->data; 1131 skbdesc->desc = entry->skb->data;
1131 1132
1132 /* 1133 /*
1134 * Adjust the beacon databyte count. The current number is
1135 * calculated before this function gets called, but falsely
1136 * assumes that the descriptor was already present in the SKB.
1137 */
1138 rt2x00_desc_read(skbdesc->desc, 0, &word);
1139 len = rt2x00_get_field32(word, TXD_W0_DATABYTE_COUNT);
1140 len += skbdesc->desc_len;
1141 rt2x00_set_field32(&word, TXD_W0_DATABYTE_COUNT, len);
1142 rt2x00_desc_write(skbdesc->desc, 0, word);
1143
1144 /*
1133 * Disable beaconing while we are reloading the beacon data, 1145 * Disable beaconing while we are reloading the beacon data,
1134 * otherwise we might be sending out invalid data. 1146 * otherwise we might be sending out invalid data.
1135 */ 1147 */
@@ -1650,7 +1662,6 @@ static void rt2500usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
1650 * Initialize all hw fields. 1662 * Initialize all hw fields.
1651 */ 1663 */
1652 rt2x00dev->hw->flags = 1664 rt2x00dev->hw->flags =
1653 IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
1654 IEEE80211_HW_RX_INCLUDES_FCS | 1665 IEEE80211_HW_RX_INCLUDES_FCS |
1655 IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | 1666 IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
1656 IEEE80211_HW_SIGNAL_DBM; 1667 IEEE80211_HW_SIGNAL_DBM;
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index 07b03b3c7ef1..db2dc976d831 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -108,7 +108,10 @@
108#define SHORT_PIFS ( SIFS + SHORT_SLOT_TIME ) 108#define SHORT_PIFS ( SIFS + SHORT_SLOT_TIME )
109#define DIFS ( PIFS + SLOT_TIME ) 109#define DIFS ( PIFS + SLOT_TIME )
110#define SHORT_DIFS ( SHORT_PIFS + SHORT_SLOT_TIME ) 110#define SHORT_DIFS ( SHORT_PIFS + SHORT_SLOT_TIME )
111#define EIFS ( SIFS + (8 * (IEEE80211_HEADER + ACK_SIZE)) ) 111#define EIFS ( SIFS + DIFS + \
112 (8 * (IEEE80211_HEADER + ACK_SIZE)) )
113#define SHORT_EIFS ( SIFS + SHORT_DIFS + \
114 (8 * (IEEE80211_HEADER + ACK_SIZE)) )
112 115
113/* 116/*
114 * Chipset identification 117 * Chipset identification
@@ -597,6 +600,7 @@ enum rt2x00_flags {
597 DEVICE_STARTED_SUSPEND, 600 DEVICE_STARTED_SUSPEND,
598 DEVICE_ENABLED_RADIO, 601 DEVICE_ENABLED_RADIO,
599 DEVICE_DISABLED_RADIO_HW, 602 DEVICE_DISABLED_RADIO_HW,
603 DEVICE_DIRTY_CONFIG,
600 604
601 /* 605 /*
602 * Driver features 606 * Driver features
diff --git a/drivers/net/wireless/rt2x00/rt2x00config.c b/drivers/net/wireless/rt2x00/rt2x00config.c
index f20ca712504f..3f89516e8332 100644
--- a/drivers/net/wireless/rt2x00/rt2x00config.c
+++ b/drivers/net/wireless/rt2x00/rt2x00config.c
@@ -271,7 +271,7 @@ config:
271 libconf.sifs = SIFS; 271 libconf.sifs = SIFS;
272 libconf.pifs = short_slot_time ? SHORT_PIFS : PIFS; 272 libconf.pifs = short_slot_time ? SHORT_PIFS : PIFS;
273 libconf.difs = short_slot_time ? SHORT_DIFS : DIFS; 273 libconf.difs = short_slot_time ? SHORT_DIFS : DIFS;
274 libconf.eifs = EIFS; 274 libconf.eifs = short_slot_time ? SHORT_EIFS : EIFS;
275 } 275 }
276 276
277 libconf.conf = conf; 277 libconf.conf = conf;
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 8c93eb8353b0..f42283ad7b02 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -1013,6 +1013,7 @@ int rt2x00lib_start(struct rt2x00_dev *rt2x00dev)
1013 rt2x00dev->intf_associated = 0; 1013 rt2x00dev->intf_associated = 0;
1014 1014
1015 __set_bit(DEVICE_STARTED, &rt2x00dev->flags); 1015 __set_bit(DEVICE_STARTED, &rt2x00dev->flags);
1016 __set_bit(DEVICE_DIRTY_CONFIG, &rt2x00dev->flags);
1016 1017
1017 return 0; 1018 return 0;
1018} 1019}
@@ -1237,9 +1238,9 @@ int rt2x00lib_resume(struct rt2x00_dev *rt2x00dev)
1237 /* 1238 /*
1238 * Reconfigure device. 1239 * Reconfigure device.
1239 */ 1240 */
1240 rt2x00lib_config(rt2x00dev, &rt2x00dev->hw->conf, 1); 1241 retval = rt2x00mac_config(rt2x00dev->hw, &rt2x00dev->hw->conf);
1241 if (!rt2x00dev->hw->conf.radio_enabled) 1242 if (retval)
1242 rt2x00lib_disable_radio(rt2x00dev); 1243 goto exit;
1243 1244
1244 /* 1245 /*
1245 * Iterator over each active interface to 1246 * Iterator over each active interface to
diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h
index f2c9b0e79b5f..c5fb3a72cf37 100644
--- a/drivers/net/wireless/rt2x00/rt2x00lib.h
+++ b/drivers/net/wireless/rt2x00/rt2x00lib.h
@@ -125,13 +125,6 @@ void rt2x00queue_unmap_skb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb);
125void rt2x00queue_free_skb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb); 125void rt2x00queue_free_skb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb);
126 126
127/** 127/**
128 * rt2x00queue_free_skb - free a skb
129 * @rt2x00dev: Pointer to &struct rt2x00_dev.
130 * @skb: The skb to free.
131 */
132void rt2x00queue_free_skb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb);
133
134/**
135 * rt2x00queue_write_tx_frame - Write TX frame to hardware 128 * rt2x00queue_write_tx_frame - Write TX frame to hardware
136 * @queue: Queue over which the frame should be send 129 * @queue: Queue over which the frame should be send
137 * @skb: The skb to send 130 * @skb: The skb to send
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index f1dcbaa80c3c..c3ee4ecba792 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -63,7 +63,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
63 */ 63 */
64 memcpy(skb->cb, frag_skb->cb, sizeof(skb->cb)); 64 memcpy(skb->cb, frag_skb->cb, sizeof(skb->cb));
65 rts_info = IEEE80211_SKB_CB(skb); 65 rts_info = IEEE80211_SKB_CB(skb);
66 rts_info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; 66 rts_info->control.hw_key = NULL;
67 rts_info->flags &= ~IEEE80211_TX_CTL_USE_RTS_CTS; 67 rts_info->flags &= ~IEEE80211_TX_CTL_USE_RTS_CTS;
68 rts_info->flags &= ~IEEE80211_TX_CTL_USE_CTS_PROTECT; 68 rts_info->flags &= ~IEEE80211_TX_CTL_USE_CTS_PROTECT;
69 rts_info->flags &= ~IEEE80211_TX_CTL_REQ_TX_STATUS; 69 rts_info->flags &= ~IEEE80211_TX_CTL_REQ_TX_STATUS;
@@ -83,6 +83,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
83 (struct ieee80211_rts *)(skb->data)); 83 (struct ieee80211_rts *)(skb->data));
84 84
85 if (rt2x00queue_write_tx_frame(queue, skb)) { 85 if (rt2x00queue_write_tx_frame(queue, skb)) {
86 dev_kfree_skb_any(skb);
86 WARNING(rt2x00dev, "Failed to send RTS/CTS frame.\n"); 87 WARNING(rt2x00dev, "Failed to send RTS/CTS frame.\n");
87 return NETDEV_TX_BUSY; 88 return NETDEV_TX_BUSY;
88 } 89 }
@@ -96,7 +97,6 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
96 struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); 97 struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
97 struct ieee80211_hdr *ieee80211hdr = (struct ieee80211_hdr *)skb->data; 98 struct ieee80211_hdr *ieee80211hdr = (struct ieee80211_hdr *)skb->data;
98 enum data_queue_qid qid = skb_get_queue_mapping(skb); 99 enum data_queue_qid qid = skb_get_queue_mapping(skb);
99 struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif);
100 struct data_queue *queue; 100 struct data_queue *queue;
101 u16 frame_control; 101 u16 frame_control;
102 102
@@ -152,18 +152,6 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
152 } 152 }
153 } 153 }
154 154
155 /*
156 * XXX: This is as wrong as the old mac80211 code was,
157 * due to beacons not getting sequence numbers assigned
158 * properly.
159 */
160 if (tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) {
161 if (tx_info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
162 intf->seqno += 0x10;
163 ieee80211hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG);
164 ieee80211hdr->seq_ctrl |= cpu_to_le16(intf->seqno);
165 }
166
167 if (rt2x00queue_write_tx_frame(queue, skb)) { 155 if (rt2x00queue_write_tx_frame(queue, skb)) {
168 ieee80211_stop_queue(rt2x00dev->hw, qid); 156 ieee80211_stop_queue(rt2x00dev->hw, qid);
169 return NETDEV_TX_BUSY; 157 return NETDEV_TX_BUSY;
@@ -322,6 +310,7 @@ EXPORT_SYMBOL_GPL(rt2x00mac_remove_interface);
322int rt2x00mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf) 310int rt2x00mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf)
323{ 311{
324 struct rt2x00_dev *rt2x00dev = hw->priv; 312 struct rt2x00_dev *rt2x00dev = hw->priv;
313 int force_reconfig;
325 314
326 /* 315 /*
327 * Mac80211 might be calling this function while we are trying 316 * Mac80211 might be calling this function while we are trying
@@ -341,7 +330,17 @@ int rt2x00mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf)
341 rt2x00lib_toggle_rx(rt2x00dev, STATE_RADIO_RX_OFF); 330 rt2x00lib_toggle_rx(rt2x00dev, STATE_RADIO_RX_OFF);
342 } 331 }
343 332
344 rt2x00lib_config(rt2x00dev, conf, 0); 333 /*
334 * When the DEVICE_DIRTY_CONFIG flag is set, the device has recently
335 * been started and the configuration must be forced upon the hardware.
336 * Otherwise registers will not be intialized correctly and could
337 * result in non-working hardware because essential registers aren't
338 * initialized.
339 */
340 force_reconfig =
341 __test_and_clear_bit(DEVICE_DIRTY_CONFIG, &rt2x00dev->flags);
342
343 rt2x00lib_config(rt2x00dev, conf, force_reconfig);
345 344
346 /* 345 /*
347 * Reenable RX only if the radio should be on. 346 * Reenable RX only if the radio should be on.
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c
index 7f442030f5ad..3b27f6aa860c 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.c
@@ -120,6 +120,7 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
120{ 120{
121 struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev; 121 struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
122 struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb); 122 struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb);
123 struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif);
123 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)entry->skb->data; 124 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)entry->skb->data;
124 struct ieee80211_rate *rate = 125 struct ieee80211_rate *rate =
125 ieee80211_get_tx_rate(rt2x00dev->hw, tx_info); 126 ieee80211_get_tx_rate(rt2x00dev->hw, tx_info);
@@ -200,6 +201,31 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
200 } 201 }
201 202
202 /* 203 /*
204 * Hardware should insert sequence counter.
205 * FIXME: We insert a software sequence counter first for
206 * hardware that doesn't support hardware sequence counting.
207 *
208 * This is wrong because beacons are not getting sequence
209 * numbers assigned properly.
210 *
211 * A secondary problem exists for drivers that cannot toggle
212 * sequence counting per-frame, since those will override the
213 * sequence counter given by mac80211.
214 */
215 if (tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) {
216 spin_lock(&intf->lock);
217
218 if (test_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags))
219 intf->seqno += 0x10;
220 hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG);
221 hdr->seq_ctrl |= cpu_to_le16(intf->seqno);
222
223 spin_unlock(&intf->lock);
224
225 __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
226 }
227
228 /*
203 * PLCP setup 229 * PLCP setup
204 * Length calculation depends on OFDM/CCK rate. 230 * Length calculation depends on OFDM/CCK rate.
205 */ 231 */
@@ -466,9 +492,12 @@ void rt2x00queue_init_rx(struct rt2x00_dev *rt2x00dev)
466 if (!rt2x00dev->ops->lib->init_rxentry) 492 if (!rt2x00dev->ops->lib->init_rxentry)
467 return; 493 return;
468 494
469 for (i = 0; i < queue->limit; i++) 495 for (i = 0; i < queue->limit; i++) {
496 queue->entries[i].flags = 0;
497
470 rt2x00dev->ops->lib->init_rxentry(rt2x00dev, 498 rt2x00dev->ops->lib->init_rxentry(rt2x00dev,
471 &queue->entries[i]); 499 &queue->entries[i]);
500 }
472} 501}
473 502
474void rt2x00queue_init_tx(struct rt2x00_dev *rt2x00dev) 503void rt2x00queue_init_tx(struct rt2x00_dev *rt2x00dev)
@@ -482,9 +511,12 @@ void rt2x00queue_init_tx(struct rt2x00_dev *rt2x00dev)
482 if (!rt2x00dev->ops->lib->init_txentry) 511 if (!rt2x00dev->ops->lib->init_txentry)
483 continue; 512 continue;
484 513
485 for (i = 0; i < queue->limit; i++) 514 for (i = 0; i < queue->limit; i++) {
515 queue->entries[i].flags = 0;
516
486 rt2x00dev->ops->lib->init_txentry(rt2x00dev, 517 rt2x00dev->ops->lib->init_txentry(rt2x00dev,
487 &queue->entries[i]); 518 &queue->entries[i]);
519 }
488 } 520 }
489} 521}
490 522
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.h b/drivers/net/wireless/rt2x00/rt2x00queue.h
index 8945945c892e..a4a8c57004db 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.h
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.h
@@ -199,6 +199,7 @@ struct txdone_entry_desc {
199 * @ENTRY_TXD_RTS_FRAME: This frame is a RTS frame. 199 * @ENTRY_TXD_RTS_FRAME: This frame is a RTS frame.
200 * @ENTRY_TXD_CTS_FRAME: This frame is a CTS-to-self frame. 200 * @ENTRY_TXD_CTS_FRAME: This frame is a CTS-to-self frame.
201 * @ENTRY_TXD_OFDM_RATE: This frame is send out with an OFDM rate. 201 * @ENTRY_TXD_OFDM_RATE: This frame is send out with an OFDM rate.
202 * @ENTRY_TXD_GENERATE_SEQ: This frame requires sequence counter.
202 * @ENTRY_TXD_FIRST_FRAGMENT: This is the first frame. 203 * @ENTRY_TXD_FIRST_FRAGMENT: This is the first frame.
203 * @ENTRY_TXD_MORE_FRAG: This frame is followed by another fragment. 204 * @ENTRY_TXD_MORE_FRAG: This frame is followed by another fragment.
204 * @ENTRY_TXD_REQ_TIMESTAMP: Require timestamp to be inserted. 205 * @ENTRY_TXD_REQ_TIMESTAMP: Require timestamp to be inserted.
@@ -210,6 +211,7 @@ enum txentry_desc_flags {
210 ENTRY_TXD_RTS_FRAME, 211 ENTRY_TXD_RTS_FRAME,
211 ENTRY_TXD_CTS_FRAME, 212 ENTRY_TXD_CTS_FRAME,
212 ENTRY_TXD_OFDM_RATE, 213 ENTRY_TXD_OFDM_RATE,
214 ENTRY_TXD_GENERATE_SEQ,
213 ENTRY_TXD_FIRST_FRAGMENT, 215 ENTRY_TXD_FIRST_FRAGMENT,
214 ENTRY_TXD_MORE_FRAG, 216 ENTRY_TXD_MORE_FRAG,
215 ENTRY_TXD_REQ_TIMESTAMP, 217 ENTRY_TXD_REQ_TIMESTAMP,
diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
index 83862e7f7aec..933e6cc9359d 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
@@ -122,6 +122,38 @@ int rt2x00usb_vendor_request_buff(struct rt2x00_dev *rt2x00dev,
122} 122}
123EXPORT_SYMBOL_GPL(rt2x00usb_vendor_request_buff); 123EXPORT_SYMBOL_GPL(rt2x00usb_vendor_request_buff);
124 124
125int rt2x00usb_vendor_request_large_buff(struct rt2x00_dev *rt2x00dev,
126 const u8 request, const u8 requesttype,
127 const u16 offset, void *buffer,
128 const u16 buffer_length,
129 const int timeout)
130{
131 int status = 0;
132 unsigned char *tb;
133 u16 off, len, bsize;
134
135 mutex_lock(&rt2x00dev->usb_cache_mutex);
136
137 tb = buffer;
138 off = offset;
139 len = buffer_length;
140 while (len && !status) {
141 bsize = min_t(u16, CSR_CACHE_SIZE, len);
142 status = rt2x00usb_vendor_req_buff_lock(rt2x00dev, request,
143 requesttype, off, tb,
144 bsize, timeout);
145
146 tb += bsize;
147 len -= bsize;
148 off += bsize;
149 }
150
151 mutex_unlock(&rt2x00dev->usb_cache_mutex);
152
153 return status;
154}
155EXPORT_SYMBOL_GPL(rt2x00usb_vendor_request_large_buff);
156
125/* 157/*
126 * TX data handlers. 158 * TX data handlers.
127 */ 159 */
diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.h b/drivers/net/wireless/rt2x00/rt2x00usb.h
index aad794adf52c..ee3875f894aa 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.h
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.h
@@ -70,8 +70,7 @@
70/* 70/*
71 * Cache size 71 * Cache size
72 */ 72 */
73#define CSR_CACHE_SIZE 8 73#define CSR_CACHE_SIZE 64
74#define CSR_CACHE_SIZE_FIRMWARE 64
75 74
76/* 75/*
77 * USB request types. 76 * USB request types.
@@ -172,6 +171,25 @@ int rt2x00usb_vendor_req_buff_lock(struct rt2x00_dev *rt2x00dev,
172 const u16 buffer_length, const int timeout); 171 const u16 buffer_length, const int timeout);
173 172
174/** 173/**
174 * rt2x00usb_vendor_request_large_buff - Send register command to device (buffered)
175 * @rt2x00dev: Pointer to &struct rt2x00_dev
176 * @request: USB vendor command (See &enum rt2x00usb_vendor_request)
177 * @requesttype: Request type &USB_VENDOR_REQUEST_*
178 * @offset: Register start offset to perform action on
179 * @buffer: Buffer where information will be read/written to by device
180 * @buffer_length: Size of &buffer
181 * @timeout: Operation timeout
182 *
183 * This function is used to transfer register data in blocks larger
184 * then CSR_CACHE_SIZE. Use for firmware upload, keys and beacons.
185 */
186int rt2x00usb_vendor_request_large_buff(struct rt2x00_dev *rt2x00dev,
187 const u8 request, const u8 requesttype,
188 const u16 offset, void *buffer,
189 const u16 buffer_length,
190 const int timeout);
191
192/**
175 * rt2x00usb_vendor_request_sw - Send single register command to device 193 * rt2x00usb_vendor_request_sw - Send single register command to device
176 * @rt2x00dev: Pointer to &struct rt2x00_dev 194 * @rt2x00dev: Pointer to &struct rt2x00_dev
177 * @request: USB vendor command (See &enum rt2x00usb_vendor_request) 195 * @request: USB vendor command (See &enum rt2x00usb_vendor_request)
diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
index f7c1f92c1448..fbe2a652e014 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/rt2x00/rt61pci.c
@@ -1544,7 +1544,8 @@ static void rt61pci_write_tx_desc(struct rt2x00_dev *rt2x00dev,
1544 rt2x00_set_field32(&word, TXD_W1_CWMIN, txdesc->cw_min); 1544 rt2x00_set_field32(&word, TXD_W1_CWMIN, txdesc->cw_min);
1545 rt2x00_set_field32(&word, TXD_W1_CWMAX, txdesc->cw_max); 1545 rt2x00_set_field32(&word, TXD_W1_CWMAX, txdesc->cw_max);
1546 rt2x00_set_field32(&word, TXD_W1_IV_OFFSET, IEEE80211_HEADER); 1546 rt2x00_set_field32(&word, TXD_W1_IV_OFFSET, IEEE80211_HEADER);
1547 rt2x00_set_field32(&word, TXD_W1_HW_SEQUENCE, 1); 1547 rt2x00_set_field32(&word, TXD_W1_HW_SEQUENCE,
1548 test_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags));
1548 rt2x00_set_field32(&word, TXD_W1_BUFFER_COUNT, 1); 1549 rt2x00_set_field32(&word, TXD_W1_BUFFER_COUNT, 1);
1549 rt2x00_desc_write(txd, 1, word); 1550 rt2x00_desc_write(txd, 1, word);
1550 1551
@@ -2278,7 +2279,6 @@ static void rt61pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
2278 * Initialize all hw fields. 2279 * Initialize all hw fields.
2279 */ 2280 */
2280 rt2x00dev->hw->flags = 2281 rt2x00dev->hw->flags =
2281 IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
2282 IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | 2282 IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
2283 IEEE80211_HW_SIGNAL_DBM; 2283 IEEE80211_HW_SIGNAL_DBM;
2284 rt2x00dev->hw->extra_tx_headroom = 0; 2284 rt2x00dev->hw->extra_tx_headroom = 0;
diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c
index d383735ab8f2..9761eaaa08be 100644
--- a/drivers/net/wireless/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/rt2x00/rt73usb.c
@@ -890,9 +890,6 @@ static int rt73usb_load_firmware(struct rt2x00_dev *rt2x00dev, const void *data,
890 unsigned int i; 890 unsigned int i;
891 int status; 891 int status;
892 u32 reg; 892 u32 reg;
893 const char *ptr = data;
894 char *cache;
895 int buflen;
896 893
897 /* 894 /*
898 * Wait for stable hardware. 895 * Wait for stable hardware.
@@ -911,31 +908,12 @@ static int rt73usb_load_firmware(struct rt2x00_dev *rt2x00dev, const void *data,
911 908
912 /* 909 /*
913 * Write firmware to device. 910 * Write firmware to device.
914 * We setup a seperate cache for this action,
915 * since we are going to write larger chunks of data
916 * then normally used cache size.
917 */ 911 */
918 cache = kmalloc(CSR_CACHE_SIZE_FIRMWARE, GFP_KERNEL); 912 rt2x00usb_vendor_request_large_buff(rt2x00dev, USB_MULTI_WRITE,
919 if (!cache) { 913 USB_VENDOR_REQUEST_OUT,
920 ERROR(rt2x00dev, "Failed to allocate firmware cache.\n"); 914 FIRMWARE_IMAGE_BASE,
921 return -ENOMEM; 915 data, len,
922 } 916 REGISTER_TIMEOUT32(len));
923
924 for (i = 0; i < len; i += CSR_CACHE_SIZE_FIRMWARE) {
925 buflen = min_t(int, len - i, CSR_CACHE_SIZE_FIRMWARE);
926
927 memcpy(cache, ptr, buflen);
928
929 rt2x00usb_vendor_request(rt2x00dev, USB_MULTI_WRITE,
930 USB_VENDOR_REQUEST_OUT,
931 FIRMWARE_IMAGE_BASE + i, 0,
932 cache, buflen,
933 REGISTER_TIMEOUT32(buflen));
934
935 ptr += buflen;
936 }
937
938 kfree(cache);
939 917
940 /* 918 /*
941 * Send firmware request to device to load firmware, 919 * Send firmware request to device to load firmware,
@@ -1303,7 +1281,8 @@ static void rt73usb_write_tx_desc(struct rt2x00_dev *rt2x00dev,
1303 rt2x00_set_field32(&word, TXD_W1_CWMIN, txdesc->cw_min); 1281 rt2x00_set_field32(&word, TXD_W1_CWMIN, txdesc->cw_min);
1304 rt2x00_set_field32(&word, TXD_W1_CWMAX, txdesc->cw_max); 1282 rt2x00_set_field32(&word, TXD_W1_CWMAX, txdesc->cw_max);
1305 rt2x00_set_field32(&word, TXD_W1_IV_OFFSET, IEEE80211_HEADER); 1283 rt2x00_set_field32(&word, TXD_W1_IV_OFFSET, IEEE80211_HEADER);
1306 rt2x00_set_field32(&word, TXD_W1_HW_SEQUENCE, 1); 1284 rt2x00_set_field32(&word, TXD_W1_HW_SEQUENCE,
1285 test_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags));
1307 rt2x00_desc_write(txd, 1, word); 1286 rt2x00_desc_write(txd, 1, word);
1308 1287
1309 rt2x00_desc_read(txd, 2, &word); 1288 rt2x00_desc_read(txd, 2, &word);
@@ -1352,6 +1331,7 @@ static void rt73usb_write_beacon(struct queue_entry *entry)
1352 struct skb_frame_desc *skbdesc = get_skb_frame_desc(entry->skb); 1331 struct skb_frame_desc *skbdesc = get_skb_frame_desc(entry->skb);
1353 unsigned int beacon_base; 1332 unsigned int beacon_base;
1354 u32 reg; 1333 u32 reg;
1334 u32 word, len;
1355 1335
1356 /* 1336 /*
1357 * Add the descriptor in front of the skb. 1337 * Add the descriptor in front of the skb.
@@ -1361,6 +1341,17 @@ static void rt73usb_write_beacon(struct queue_entry *entry)
1361 skbdesc->desc = entry->skb->data; 1341 skbdesc->desc = entry->skb->data;
1362 1342
1363 /* 1343 /*
1344 * Adjust the beacon databyte count. The current number is
1345 * calculated before this function gets called, but falsely
1346 * assumes that the descriptor was already present in the SKB.
1347 */
1348 rt2x00_desc_read(skbdesc->desc, 0, &word);
1349 len = rt2x00_get_field32(word, TXD_W0_DATABYTE_COUNT);
1350 len += skbdesc->desc_len;
1351 rt2x00_set_field32(&word, TXD_W0_DATABYTE_COUNT, len);
1352 rt2x00_desc_write(skbdesc->desc, 0, word);
1353
1354 /*
1364 * Disable beaconing while we are reloading the beacon data, 1355 * Disable beaconing while we are reloading the beacon data,
1365 * otherwise we might be sending out invalid data. 1356 * otherwise we might be sending out invalid data.
1366 */ 1357 */
@@ -1374,10 +1365,10 @@ static void rt73usb_write_beacon(struct queue_entry *entry)
1374 * Write entire beacon with descriptor to register. 1365 * Write entire beacon with descriptor to register.
1375 */ 1366 */
1376 beacon_base = HW_BEACON_OFFSET(entry->entry_idx); 1367 beacon_base = HW_BEACON_OFFSET(entry->entry_idx);
1377 rt2x00usb_vendor_request(rt2x00dev, USB_MULTI_WRITE, 1368 rt2x00usb_vendor_request_large_buff(rt2x00dev, USB_MULTI_WRITE,
1378 USB_VENDOR_REQUEST_OUT, beacon_base, 0, 1369 USB_VENDOR_REQUEST_OUT, beacon_base,
1379 entry->skb->data, entry->skb->len, 1370 entry->skb->data, entry->skb->len,
1380 REGISTER_TIMEOUT32(entry->skb->len)); 1371 REGISTER_TIMEOUT32(entry->skb->len));
1381 1372
1382 /* 1373 /*
1383 * Clean up the beacon skb. 1374 * Clean up the beacon skb.
@@ -1871,7 +1862,6 @@ static void rt73usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
1871 * Initialize all hw fields. 1862 * Initialize all hw fields.
1872 */ 1863 */
1873 rt2x00dev->hw->flags = 1864 rt2x00dev->hw->flags =
1874 IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
1875 IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | 1865 IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
1876 IEEE80211_HW_SIGNAL_DBM; 1866 IEEE80211_HW_SIGNAL_DBM;
1877 rt2x00dev->hw->extra_tx_headroom = TXD_DESC_SIZE; 1867 rt2x00dev->hw->extra_tx_headroom = TXD_DESC_SIZE;
diff --git a/drivers/net/wireless/rtl8187.h b/drivers/net/wireless/rtl8187.h
index 3afb49f8866a..1b0d750f6623 100644
--- a/drivers/net/wireless/rtl8187.h
+++ b/drivers/net/wireless/rtl8187.h
@@ -47,11 +47,13 @@ struct rtl8187_rx_hdr {
47struct rtl8187b_rx_hdr { 47struct rtl8187b_rx_hdr {
48 __le32 flags; 48 __le32 flags;
49 __le64 mac_time; 49 __le64 mac_time;
50 u8 noise; 50 u8 sq;
51 u8 signal; 51 u8 rssi;
52 u8 agc; 52 u8 agc;
53 u8 reserved; 53 u8 flags2;
54 __le32 unused; 54 __le16 snr_long2end;
55 s8 pwdb_g12;
56 u8 fot;
55} __attribute__((packed)); 57} __attribute__((packed));
56 58
57/* {rtl8187,rtl8187b}_tx_info is in skb */ 59/* {rtl8187,rtl8187b}_tx_info is in skb */
@@ -100,6 +102,7 @@ struct rtl8187_priv {
100 struct usb_device *udev; 102 struct usb_device *udev;
101 u32 rx_conf; 103 u32 rx_conf;
102 u16 txpwr_base; 104 u16 txpwr_base;
105 u16 seqno;
103 u8 asic_rev; 106 u8 asic_rev;
104 u8 is_rtl8187b; 107 u8 is_rtl8187b;
105 enum { 108 enum {
diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c
index d3067b1216ca..177988efd660 100644
--- a/drivers/net/wireless/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl8187_dev.c
@@ -169,6 +169,7 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
169{ 169{
170 struct rtl8187_priv *priv = dev->priv; 170 struct rtl8187_priv *priv = dev->priv;
171 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 171 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
172 struct ieee80211_hdr *ieee80211hdr = (struct ieee80211_hdr *)skb->data;
172 unsigned int ep; 173 unsigned int ep;
173 void *buf; 174 void *buf;
174 struct urb *urb; 175 struct urb *urb;
@@ -234,6 +235,20 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
234 ep = epmap[skb_get_queue_mapping(skb)]; 235 ep = epmap[skb_get_queue_mapping(skb)];
235 } 236 }
236 237
238 /* FIXME: The sequence that follows is needed for this driver to
239 * work with mac80211 since "mac80211: fix TX sequence numbers".
240 * As with the temporary code in rt2x00, changes will be needed
241 * to get proper sequence numbers on beacons. In addition, this
242 * patch places the sequence number in the hardware state, which
243 * limits us to a single virtual state.
244 */
245 if (info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) {
246 if (info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
247 priv->seqno += 0x10;
248 ieee80211hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG);
249 ieee80211hdr->seq_ctrl |= cpu_to_le16(priv->seqno);
250 }
251
237 info->driver_data[0] = dev; 252 info->driver_data[0] = dev;
238 info->driver_data[1] = urb; 253 info->driver_data[1] = urb;
239 254
@@ -257,6 +272,7 @@ static void rtl8187_rx_cb(struct urb *urb)
257 struct ieee80211_rx_status rx_status = { 0 }; 272 struct ieee80211_rx_status rx_status = { 0 };
258 int rate, signal; 273 int rate, signal;
259 u32 flags; 274 u32 flags;
275 u32 quality;
260 276
261 spin_lock(&priv->rx_queue.lock); 277 spin_lock(&priv->rx_queue.lock);
262 if (skb->next) 278 if (skb->next)
@@ -280,44 +296,57 @@ static void rtl8187_rx_cb(struct urb *urb)
280 flags = le32_to_cpu(hdr->flags); 296 flags = le32_to_cpu(hdr->flags);
281 signal = hdr->signal & 0x7f; 297 signal = hdr->signal & 0x7f;
282 rx_status.antenna = (hdr->signal >> 7) & 1; 298 rx_status.antenna = (hdr->signal >> 7) & 1;
283 rx_status.signal = signal;
284 rx_status.noise = hdr->noise; 299 rx_status.noise = hdr->noise;
285 rx_status.mactime = le64_to_cpu(hdr->mac_time); 300 rx_status.mactime = le64_to_cpu(hdr->mac_time);
286 priv->signal = signal;
287 priv->quality = signal; 301 priv->quality = signal;
302 rx_status.qual = priv->quality;
288 priv->noise = hdr->noise; 303 priv->noise = hdr->noise;
304 rate = (flags >> 20) & 0xF;
305 if (rate > 3) { /* OFDM rate */
306 if (signal > 90)
307 signal = 90;
308 else if (signal < 25)
309 signal = 25;
310 signal = 90 - signal;
311 } else { /* CCK rate */
312 if (signal > 95)
313 signal = 95;
314 else if (signal < 30)
315 signal = 30;
316 signal = 95 - signal;
317 }
318 rx_status.signal = signal;
319 priv->signal = signal;
289 } else { 320 } else {
290 struct rtl8187b_rx_hdr *hdr = 321 struct rtl8187b_rx_hdr *hdr =
291 (typeof(hdr))(skb_tail_pointer(skb) - sizeof(*hdr)); 322 (typeof(hdr))(skb_tail_pointer(skb) - sizeof(*hdr));
323 /* The Realtek datasheet for the RTL8187B shows that the RX
324 * header contains the following quantities: signal quality,
325 * RSSI, AGC, the received power in dB, and the measured SNR.
326 * In testing, none of these quantities show qualitative
327 * agreement with AP signal strength, except for the AGC,
328 * which is inversely proportional to the strength of the
329 * signal. In the following, the quality and signal strength
330 * are derived from the AGC. The arbitrary scaling constants
331 * are chosen to make the results close to the values obtained
332 * for a BCM4312 using b43 as the driver. The noise is ignored
333 * for now.
334 */
292 flags = le32_to_cpu(hdr->flags); 335 flags = le32_to_cpu(hdr->flags);
293 signal = hdr->agc >> 1; 336 quality = 170 - hdr->agc;
294 rx_status.antenna = (hdr->signal >> 7) & 1; 337 if (quality > 100)
295 rx_status.signal = 64 - min(hdr->noise, (u8)64); 338 quality = 100;
296 rx_status.noise = hdr->noise; 339 signal = 14 - hdr->agc / 2;
340 rx_status.qual = quality;
341 priv->quality = quality;
342 rx_status.signal = signal;
343 priv->signal = signal;
344 rx_status.antenna = (hdr->rssi >> 7) & 1;
297 rx_status.mactime = le64_to_cpu(hdr->mac_time); 345 rx_status.mactime = le64_to_cpu(hdr->mac_time);
298 priv->signal = hdr->signal; 346 rate = (flags >> 20) & 0xF;
299 priv->quality = hdr->agc >> 1;
300 priv->noise = hdr->noise;
301 } 347 }
302 348
303 skb_trim(skb, flags & 0x0FFF); 349 skb_trim(skb, flags & 0x0FFF);
304 rate = (flags >> 20) & 0xF;
305 if (rate > 3) { /* OFDM rate */
306 if (signal > 90)
307 signal = 90;
308 else if (signal < 25)
309 signal = 25;
310 signal = 90 - signal;
311 } else { /* CCK rate */
312 if (signal > 95)
313 signal = 95;
314 else if (signal < 30)
315 signal = 30;
316 signal = 95 - signal;
317 }
318
319 rx_status.qual = priv->quality;
320 rx_status.signal = signal;
321 rx_status.rate_idx = rate; 350 rx_status.rate_idx = rate;
322 rx_status.freq = dev->conf.channel->center_freq; 351 rx_status.freq = dev->conf.channel->center_freq;
323 rx_status.band = dev->conf.channel->band; 352 rx_status.band = dev->conf.channel->band;
@@ -1015,9 +1044,7 @@ static int __devinit rtl8187_probe(struct usb_interface *intf,
1015 1044
1016 priv->mode = IEEE80211_IF_TYPE_MNTR; 1045 priv->mode = IEEE80211_IF_TYPE_MNTR;
1017 dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | 1046 dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
1018 IEEE80211_HW_RX_INCLUDES_FCS | 1047 IEEE80211_HW_RX_INCLUDES_FCS;
1019 IEEE80211_HW_SIGNAL_UNSPEC;
1020 dev->max_signal = 65;
1021 1048
1022 eeprom.data = dev; 1049 eeprom.data = dev;
1023 eeprom.register_read = rtl8187_eeprom_register_read; 1050 eeprom.register_read = rtl8187_eeprom_register_read;
@@ -1132,10 +1159,16 @@ static int __devinit rtl8187_probe(struct usb_interface *intf,
1132 (*channel++).hw_value = txpwr >> 8; 1159 (*channel++).hw_value = txpwr >> 8;
1133 } 1160 }
1134 1161
1135 if (priv->is_rtl8187b) 1162 if (priv->is_rtl8187b) {
1136 printk(KERN_WARNING "rtl8187: 8187B chip detected. Support " 1163 printk(KERN_WARNING "rtl8187: 8187B chip detected. Support "
1137 "is EXPERIMENTAL, and could damage your\n" 1164 "is EXPERIMENTAL, and could damage your\n"
1138 " hardware, use at your own risk\n"); 1165 " hardware, use at your own risk\n");
1166 dev->flags |= IEEE80211_HW_SIGNAL_DBM;
1167 } else {
1168 dev->flags |= IEEE80211_HW_SIGNAL_UNSPEC;
1169 dev->max_signal = 65;
1170 }
1171
1139 if ((id->driver_info == DEVICE_RTL8187) && priv->is_rtl8187b) 1172 if ((id->driver_info == DEVICE_RTL8187) && priv->is_rtl8187b)
1140 printk(KERN_INFO "rtl8187: inconsistency between id with OEM" 1173 printk(KERN_INFO "rtl8187: inconsistency between id with OEM"
1141 " info!\n"); 1174 " info!\n");
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index fcc532bb6a7e..4d7b98b05030 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -935,7 +935,6 @@ struct ieee80211_hw *zd_mac_alloc_hw(struct usb_interface *intf)
935 hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &mac->band; 935 hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &mac->band;
936 936
937 hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | 937 hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
938 IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
939 IEEE80211_HW_SIGNAL_DB; 938 IEEE80211_HW_SIGNAL_DB;
940 939
941 hw->max_signal = 100; 940 hw->max_signal = 100;
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 4d17d384578d..9ce55850271a 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -49,6 +49,13 @@ config BATTERY_OLPC
49 help 49 help
50 Say Y to enable support for the battery on the OLPC laptop. 50 Say Y to enable support for the battery on the OLPC laptop.
51 51
52config BATTERY_TOSA
53 tristate "Sharp SL-6000 (tosa) battery"
54 depends on MACH_TOSA && MFD_TC6393XB
55 help
56 Say Y to enable support for the battery on the Sharp Zaurus
57 SL-6000 (tosa) models.
58
52config BATTERY_PALMTX 59config BATTERY_PALMTX
53 tristate "Palm T|X battery" 60 tristate "Palm T|X battery"
54 depends on MACH_PALMTX 61 depends on MACH_PALMTX
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 6f43a54ee420..4706bf8ff459 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -20,4 +20,5 @@ obj-$(CONFIG_APM_POWER) += apm_power.o
20obj-$(CONFIG_BATTERY_DS2760) += ds2760_battery.o 20obj-$(CONFIG_BATTERY_DS2760) += ds2760_battery.o
21obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o 21obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o
22obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o 22obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o
23obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o
23obj-$(CONFIG_BATTERY_PALMTX) += palmtx_battery.o 24obj-$(CONFIG_BATTERY_PALMTX) += palmtx_battery.o
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index ab1e8289f07f..32570af3c5c9 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -19,7 +19,7 @@
19 19
20#define EC_BAT_VOLTAGE 0x10 /* uint16_t, *9.76/32, mV */ 20#define EC_BAT_VOLTAGE 0x10 /* uint16_t, *9.76/32, mV */
21#define EC_BAT_CURRENT 0x11 /* int16_t, *15.625/120, mA */ 21#define EC_BAT_CURRENT 0x11 /* int16_t, *15.625/120, mA */
22#define EC_BAT_ACR 0x12 22#define EC_BAT_ACR 0x12 /* int16_t, *6250/15, µAh */
23#define EC_BAT_TEMP 0x13 /* uint16_t, *100/256, °C */ 23#define EC_BAT_TEMP 0x13 /* uint16_t, *100/256, °C */
24#define EC_AMB_TEMP 0x14 /* uint16_t, *100/256, °C */ 24#define EC_AMB_TEMP 0x14 /* uint16_t, *100/256, °C */
25#define EC_BAT_STATUS 0x15 /* uint8_t, bitmask */ 25#define EC_BAT_STATUS 0x15 /* uint8_t, bitmask */
@@ -84,6 +84,119 @@ static struct power_supply olpc_ac = {
84 .get_property = olpc_ac_get_prop, 84 .get_property = olpc_ac_get_prop,
85}; 85};
86 86
87static char bat_serial[17]; /* Ick */
88
89static int olpc_bat_get_status(union power_supply_propval *val, uint8_t ec_byte)
90{
91 if (olpc_platform_info.ecver > 0x44) {
92 if (ec_byte & BAT_STAT_CHARGING)
93 val->intval = POWER_SUPPLY_STATUS_CHARGING;
94 else if (ec_byte & BAT_STAT_DISCHARGING)
95 val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
96 else if (ec_byte & BAT_STAT_FULL)
97 val->intval = POWER_SUPPLY_STATUS_FULL;
98 else /* er,... */
99 val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
100 } else {
101 /* Older EC didn't report charge/discharge bits */
102 if (!(ec_byte & BAT_STAT_AC)) /* No AC means discharging */
103 val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
104 else if (ec_byte & BAT_STAT_FULL)
105 val->intval = POWER_SUPPLY_STATUS_FULL;
106 else /* Not _necessarily_ true but EC doesn't tell all yet */
107 val->intval = POWER_SUPPLY_STATUS_CHARGING;
108 }
109
110 return 0;
111}
112
113static int olpc_bat_get_health(union power_supply_propval *val)
114{
115 uint8_t ec_byte;
116 int ret;
117
118 ret = olpc_ec_cmd(EC_BAT_ERRCODE, NULL, 0, &ec_byte, 1);
119 if (ret)
120 return ret;
121
122 switch (ec_byte) {
123 case 0:
124 val->intval = POWER_SUPPLY_HEALTH_GOOD;
125 break;
126
127 case BAT_ERR_OVERTEMP:
128 val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
129 break;
130
131 case BAT_ERR_OVERVOLTAGE:
132 val->intval = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
133 break;
134
135 case BAT_ERR_INFOFAIL:
136 case BAT_ERR_OUT_OF_CONTROL:
137 case BAT_ERR_ID_FAIL:
138 case BAT_ERR_ACR_FAIL:
139 val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
140 break;
141
142 default:
143 /* Eep. We don't know this failure code */
144 ret = -EIO;
145 }
146
147 return ret;
148}
149
150static int olpc_bat_get_mfr(union power_supply_propval *val)
151{
152 uint8_t ec_byte;
153 int ret;
154
155 ec_byte = BAT_ADDR_MFR_TYPE;
156 ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, &ec_byte, 1);
157 if (ret)
158 return ret;
159
160 switch (ec_byte >> 4) {
161 case 1:
162 val->strval = "Gold Peak";
163 break;
164 case 2:
165 val->strval = "BYD";
166 break;
167 default:
168 val->strval = "Unknown";
169 break;
170 }
171
172 return ret;
173}
174
175static int olpc_bat_get_tech(union power_supply_propval *val)
176{
177 uint8_t ec_byte;
178 int ret;
179
180 ec_byte = BAT_ADDR_MFR_TYPE;
181 ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, &ec_byte, 1);
182 if (ret)
183 return ret;
184
185 switch (ec_byte & 0xf) {
186 case 1:
187 val->intval = POWER_SUPPLY_TECHNOLOGY_NiMH;
188 break;
189 case 2:
190 val->intval = POWER_SUPPLY_TECHNOLOGY_LiFe;
191 break;
192 default:
193 val->intval = POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
194 break;
195 }
196
197 return ret;
198}
199
87/********************************************************************* 200/*********************************************************************
88 * Battery properties 201 * Battery properties
89 *********************************************************************/ 202 *********************************************************************/
@@ -94,6 +207,7 @@ static int olpc_bat_get_property(struct power_supply *psy,
94 int ret = 0; 207 int ret = 0;
95 int16_t ec_word; 208 int16_t ec_word;
96 uint8_t ec_byte; 209 uint8_t ec_byte;
210 uint64_t ser_buf;
97 211
98 ret = olpc_ec_cmd(EC_BAT_STATUS, NULL, 0, &ec_byte, 1); 212 ret = olpc_ec_cmd(EC_BAT_STATUS, NULL, 0, &ec_byte, 1);
99 if (ret) 213 if (ret)
@@ -110,25 +224,10 @@ static int olpc_bat_get_property(struct power_supply *psy,
110 224
111 switch (psp) { 225 switch (psp) {
112 case POWER_SUPPLY_PROP_STATUS: 226 case POWER_SUPPLY_PROP_STATUS:
113 if (olpc_platform_info.ecver > 0x44) { 227 ret = olpc_bat_get_status(val, ec_byte);
114 if (ec_byte & BAT_STAT_CHARGING) 228 if (ret)
115 val->intval = POWER_SUPPLY_STATUS_CHARGING; 229 return ret;
116 else if (ec_byte & BAT_STAT_DISCHARGING) 230 break;
117 val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
118 else if (ec_byte & BAT_STAT_FULL)
119 val->intval = POWER_SUPPLY_STATUS_FULL;
120 else /* er,... */
121 val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
122 } else {
123 /* Older EC didn't report charge/discharge bits */
124 if (!(ec_byte & BAT_STAT_AC)) /* No AC means discharging */
125 val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
126 else if (ec_byte & BAT_STAT_FULL)
127 val->intval = POWER_SUPPLY_STATUS_FULL;
128 else /* Not _necessarily_ true but EC doesn't tell all yet */
129 val->intval = POWER_SUPPLY_STATUS_CHARGING;
130 break;
131 }
132 case POWER_SUPPLY_PROP_PRESENT: 231 case POWER_SUPPLY_PROP_PRESENT:
133 val->intval = !!(ec_byte & BAT_STAT_PRESENT); 232 val->intval = !!(ec_byte & BAT_STAT_PRESENT);
134 break; 233 break;
@@ -137,72 +236,21 @@ static int olpc_bat_get_property(struct power_supply *psy,
137 if (ec_byte & BAT_STAT_DESTROY) 236 if (ec_byte & BAT_STAT_DESTROY)
138 val->intval = POWER_SUPPLY_HEALTH_DEAD; 237 val->intval = POWER_SUPPLY_HEALTH_DEAD;
139 else { 238 else {
140 ret = olpc_ec_cmd(EC_BAT_ERRCODE, NULL, 0, &ec_byte, 1); 239 ret = olpc_bat_get_health(val);
141 if (ret) 240 if (ret)
142 return ret; 241 return ret;
143
144 switch (ec_byte) {
145 case 0:
146 val->intval = POWER_SUPPLY_HEALTH_GOOD;
147 break;
148
149 case BAT_ERR_OVERTEMP:
150 val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
151 break;
152
153 case BAT_ERR_OVERVOLTAGE:
154 val->intval = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
155 break;
156
157 case BAT_ERR_INFOFAIL:
158 case BAT_ERR_OUT_OF_CONTROL:
159 case BAT_ERR_ID_FAIL:
160 case BAT_ERR_ACR_FAIL:
161 val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
162 break;
163
164 default:
165 /* Eep. We don't know this failure code */
166 return -EIO;
167 }
168 } 242 }
169 break; 243 break;
170 244
171 case POWER_SUPPLY_PROP_MANUFACTURER: 245 case POWER_SUPPLY_PROP_MANUFACTURER:
172 ec_byte = BAT_ADDR_MFR_TYPE; 246 ret = olpc_bat_get_mfr(val);
173 ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, &ec_byte, 1);
174 if (ret) 247 if (ret)
175 return ret; 248 return ret;
176
177 switch (ec_byte >> 4) {
178 case 1:
179 val->strval = "Gold Peak";
180 break;
181 case 2:
182 val->strval = "BYD";
183 break;
184 default:
185 val->strval = "Unknown";
186 break;
187 }
188 break; 249 break;
189 case POWER_SUPPLY_PROP_TECHNOLOGY: 250 case POWER_SUPPLY_PROP_TECHNOLOGY:
190 ec_byte = BAT_ADDR_MFR_TYPE; 251 ret = olpc_bat_get_tech(val);
191 ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, &ec_byte, 1);
192 if (ret) 252 if (ret)
193 return ret; 253 return ret;
194
195 switch (ec_byte & 0xf) {
196 case 1:
197 val->intval = POWER_SUPPLY_TECHNOLOGY_NiMH;
198 break;
199 case 2:
200 val->intval = POWER_SUPPLY_TECHNOLOGY_LiFe;
201 break;
202 default:
203 val->intval = POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
204 break;
205 }
206 break; 254 break;
207 case POWER_SUPPLY_PROP_VOLTAGE_AVG: 255 case POWER_SUPPLY_PROP_VOLTAGE_AVG:
208 ret = olpc_ec_cmd(EC_BAT_VOLTAGE, NULL, 0, (void *)&ec_word, 2); 256 ret = olpc_ec_cmd(EC_BAT_VOLTAGE, NULL, 0, (void *)&ec_word, 2);
@@ -241,6 +289,22 @@ static int olpc_bat_get_property(struct power_supply *psy,
241 ec_word = be16_to_cpu(ec_word); 289 ec_word = be16_to_cpu(ec_word);
242 val->intval = ec_word * 100 / 256; 290 val->intval = ec_word * 100 / 256;
243 break; 291 break;
292 case POWER_SUPPLY_PROP_CHARGE_COUNTER:
293 ret = olpc_ec_cmd(EC_BAT_ACR, NULL, 0, (void *)&ec_word, 2);
294 if (ret)
295 return ret;
296
297 ec_word = be16_to_cpu(ec_word);
298 val->intval = ec_word * 6250 / 15;
299 break;
300 case POWER_SUPPLY_PROP_SERIAL_NUMBER:
301 ret = olpc_ec_cmd(EC_BAT_SERIAL, NULL, 0, (void *)&ser_buf, 8);
302 if (ret)
303 return ret;
304
305 sprintf(bat_serial, "%016llx", (long long)be64_to_cpu(ser_buf));
306 val->strval = bat_serial;
307 break;
244 default: 308 default:
245 ret = -EINVAL; 309 ret = -EINVAL;
246 break; 310 break;
@@ -260,6 +324,50 @@ static enum power_supply_property olpc_bat_props[] = {
260 POWER_SUPPLY_PROP_TEMP, 324 POWER_SUPPLY_PROP_TEMP,
261 POWER_SUPPLY_PROP_TEMP_AMBIENT, 325 POWER_SUPPLY_PROP_TEMP_AMBIENT,
262 POWER_SUPPLY_PROP_MANUFACTURER, 326 POWER_SUPPLY_PROP_MANUFACTURER,
327 POWER_SUPPLY_PROP_SERIAL_NUMBER,
328 POWER_SUPPLY_PROP_CHARGE_COUNTER,
329};
330
331/* EEPROM reading goes completely around the power_supply API, sadly */
332
333#define EEPROM_START 0x20
334#define EEPROM_END 0x80
335#define EEPROM_SIZE (EEPROM_END - EEPROM_START)
336
337static ssize_t olpc_bat_eeprom_read(struct kobject *kobj,
338 struct bin_attribute *attr, char *buf, loff_t off, size_t count)
339{
340 uint8_t ec_byte;
341 int ret, end;
342
343 if (off >= EEPROM_SIZE)
344 return 0;
345 if (off + count > EEPROM_SIZE)
346 count = EEPROM_SIZE - off;
347
348 end = EEPROM_START + off + count;
349 for (ec_byte = EEPROM_START + off; ec_byte < end; ec_byte++) {
350 ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1,
351 &buf[ec_byte - EEPROM_START], 1);
352 if (ret) {
353 printk(KERN_ERR "olpc-battery: EC command "
354 "EC_BAT_EEPROM @ 0x%x failed -"
355 " %d!\n", ec_byte, ret);
356 return -EIO;
357 }
358 }
359
360 return count;
361}
362
363static struct bin_attribute olpc_bat_eeprom = {
364 .attr = {
365 .name = "eeprom",
366 .mode = S_IRUGO,
367 .owner = THIS_MODULE,
368 },
369 .size = 0,
370 .read = olpc_bat_eeprom_read,
263}; 371};
264 372
265/********************************************************************* 373/*********************************************************************
@@ -290,8 +398,14 @@ static int __init olpc_bat_init(void)
290 398
291 if (!olpc_platform_info.ecver) 399 if (!olpc_platform_info.ecver)
292 return -ENXIO; 400 return -ENXIO;
293 if (olpc_platform_info.ecver < 0x43) { 401
294 printk(KERN_NOTICE "OLPC EC version 0x%02x too old for battery driver.\n", olpc_platform_info.ecver); 402 /*
403 * We've seen a number of EC protocol changes; this driver requires
404 * the latest EC protocol, supported by 0x44 and above.
405 */
406 if (olpc_platform_info.ecver < 0x44) {
407 printk(KERN_NOTICE "OLPC EC version 0x%02x too old for "
408 "battery driver.\n", olpc_platform_info.ecver);
295 return -ENXIO; 409 return -ENXIO;
296 } 410 }
297 411
@@ -315,8 +429,14 @@ static int __init olpc_bat_init(void)
315 if (ret) 429 if (ret)
316 goto battery_failed; 430 goto battery_failed;
317 431
432 ret = device_create_bin_file(olpc_bat.dev, &olpc_bat_eeprom);
433 if (ret)
434 goto eeprom_failed;
435
318 goto success; 436 goto success;
319 437
438eeprom_failed:
439 power_supply_unregister(&olpc_bat);
320battery_failed: 440battery_failed:
321 power_supply_unregister(&olpc_ac); 441 power_supply_unregister(&olpc_ac);
322ac_failed: 442ac_failed:
@@ -327,6 +447,7 @@ success:
327 447
328static void __exit olpc_bat_exit(void) 448static void __exit olpc_bat_exit(void)
329{ 449{
450 device_remove_bin_file(olpc_bat.dev, &olpc_bat_eeprom);
330 power_supply_unregister(&olpc_bat); 451 power_supply_unregister(&olpc_bat);
331 power_supply_unregister(&olpc_ac); 452 power_supply_unregister(&olpc_ac);
332 platform_device_unregister(bat_pdev); 453 platform_device_unregister(bat_pdev);
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 49215da5249b..fe2aeb11939b 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -99,6 +99,7 @@ static struct device_attribute power_supply_attrs[] = {
99 POWER_SUPPLY_ATTR(charge_empty), 99 POWER_SUPPLY_ATTR(charge_empty),
100 POWER_SUPPLY_ATTR(charge_now), 100 POWER_SUPPLY_ATTR(charge_now),
101 POWER_SUPPLY_ATTR(charge_avg), 101 POWER_SUPPLY_ATTR(charge_avg),
102 POWER_SUPPLY_ATTR(charge_counter),
102 POWER_SUPPLY_ATTR(energy_full_design), 103 POWER_SUPPLY_ATTR(energy_full_design),
103 POWER_SUPPLY_ATTR(energy_empty_design), 104 POWER_SUPPLY_ATTR(energy_empty_design),
104 POWER_SUPPLY_ATTR(energy_full), 105 POWER_SUPPLY_ATTR(energy_full),
diff --git a/drivers/power/tosa_battery.c b/drivers/power/tosa_battery.c
new file mode 100644
index 000000000000..bf664fbd6610
--- /dev/null
+++ b/drivers/power/tosa_battery.c
@@ -0,0 +1,486 @@
1/*
2 * Battery and Power Management code for the Sharp SL-6000x
3 *
4 * Copyright (c) 2005 Dirk Opfer
5 * Copyright (c) 2008 Dmitry Baryshkov
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 */
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/power_supply.h>
15#include <linux/wm97xx.h>
16#include <linux/delay.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/gpio.h>
20
21#include <asm/mach-types.h>
22#include <asm/arch/tosa.h>
23
24static DEFINE_MUTEX(bat_lock); /* protects gpio pins */
25static struct work_struct bat_work;
26
27struct tosa_bat {
28 int status;
29 struct power_supply psy;
30 int full_chrg;
31
32 struct mutex work_lock; /* protects data */
33
34 bool (*is_present)(struct tosa_bat *bat);
35 int gpio_full;
36 int gpio_charge_off;
37
38 int technology;
39
40 int gpio_bat;
41 int adc_bat;
42 int adc_bat_divider;
43 int bat_max;
44 int bat_min;
45
46 int gpio_temp;
47 int adc_temp;
48 int adc_temp_divider;
49};
50
51static struct tosa_bat tosa_bat_main;
52static struct tosa_bat tosa_bat_jacket;
53
54static unsigned long tosa_read_bat(struct tosa_bat *bat)
55{
56 unsigned long value = 0;
57
58 if (bat->gpio_bat < 0 || bat->adc_bat < 0)
59 return 0;
60
61 mutex_lock(&bat_lock);
62 gpio_set_value(bat->gpio_bat, 1);
63 msleep(5);
64 value = wm97xx_read_aux_adc(bat->psy.dev->parent->driver_data,
65 bat->adc_bat);
66 gpio_set_value(bat->gpio_bat, 0);
67 mutex_unlock(&bat_lock);
68
69 value = value * 1000000 / bat->adc_bat_divider;
70
71 return value;
72}
73
74static unsigned long tosa_read_temp(struct tosa_bat *bat)
75{
76 unsigned long value = 0;
77
78 if (bat->gpio_temp < 0 || bat->adc_temp < 0)
79 return 0;
80
81 mutex_lock(&bat_lock);
82 gpio_set_value(bat->gpio_temp, 1);
83 msleep(5);
84 value = wm97xx_read_aux_adc(bat->psy.dev->parent->driver_data,
85 bat->adc_temp);
86 gpio_set_value(bat->gpio_temp, 0);
87 mutex_unlock(&bat_lock);
88
89 value = value * 10000 / bat->adc_temp_divider;
90
91 return value;
92}
93
94static int tosa_bat_get_property(struct power_supply *psy,
95 enum power_supply_property psp,
96 union power_supply_propval *val)
97{
98 int ret = 0;
99 struct tosa_bat *bat = container_of(psy, struct tosa_bat, psy);
100
101 if (bat->is_present && !bat->is_present(bat)
102 && psp != POWER_SUPPLY_PROP_PRESENT) {
103 return -ENODEV;
104 }
105
106 switch (psp) {
107 case POWER_SUPPLY_PROP_STATUS:
108 val->intval = bat->status;
109 break;
110 case POWER_SUPPLY_PROP_TECHNOLOGY:
111 val->intval = bat->technology;
112 break;
113 case POWER_SUPPLY_PROP_VOLTAGE_NOW:
114 val->intval = tosa_read_bat(bat);
115 break;
116 case POWER_SUPPLY_PROP_VOLTAGE_MAX:
117 if (bat->full_chrg == -1)
118 val->intval = bat->bat_max;
119 else
120 val->intval = bat->full_chrg;
121 break;
122 case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
123 val->intval = bat->bat_max;
124 break;
125 case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
126 val->intval = bat->bat_min;
127 break;
128 case POWER_SUPPLY_PROP_TEMP:
129 val->intval = tosa_read_temp(bat);
130 break;
131 case POWER_SUPPLY_PROP_PRESENT:
132 val->intval = bat->is_present ? bat->is_present(bat) : 1;
133 break;
134 default:
135 ret = -EINVAL;
136 break;
137 }
138 return ret;
139}
140
141static bool tosa_jacket_bat_is_present(struct tosa_bat *bat)
142{
143 return gpio_get_value(TOSA_GPIO_JACKET_DETECT) == 0;
144}
145
146static void tosa_bat_external_power_changed(struct power_supply *psy)
147{
148 schedule_work(&bat_work);
149}
150
151static irqreturn_t tosa_bat_gpio_isr(int irq, void *data)
152{
153 pr_info("tosa_bat_gpio irq: %d\n", gpio_get_value(irq_to_gpio(irq)));
154 schedule_work(&bat_work);
155 return IRQ_HANDLED;
156}
157
158static void tosa_bat_update(struct tosa_bat *bat)
159{
160 int old;
161 struct power_supply *psy = &bat->psy;
162
163 mutex_lock(&bat->work_lock);
164
165 old = bat->status;
166
167 if (bat->is_present && !bat->is_present(bat)) {
168 printk(KERN_NOTICE "%s not present\n", psy->name);
169 bat->status = POWER_SUPPLY_STATUS_UNKNOWN;
170 bat->full_chrg = -1;
171 } else if (power_supply_am_i_supplied(psy)) {
172 if (bat->status == POWER_SUPPLY_STATUS_DISCHARGING) {
173 gpio_set_value(bat->gpio_charge_off, 0);
174 mdelay(15);
175 }
176
177 if (gpio_get_value(bat->gpio_full)) {
178 if (old == POWER_SUPPLY_STATUS_CHARGING ||
179 bat->full_chrg == -1)
180 bat->full_chrg = tosa_read_bat(bat);
181
182 gpio_set_value(bat->gpio_charge_off, 1);
183 bat->status = POWER_SUPPLY_STATUS_FULL;
184 } else {
185 gpio_set_value(bat->gpio_charge_off, 0);
186 bat->status = POWER_SUPPLY_STATUS_CHARGING;
187 }
188 } else {
189 gpio_set_value(bat->gpio_charge_off, 1);
190 bat->status = POWER_SUPPLY_STATUS_DISCHARGING;
191 }
192
193 if (old != bat->status)
194 power_supply_changed(psy);
195
196 mutex_unlock(&bat->work_lock);
197}
198
199static void tosa_bat_work(struct work_struct *work)
200{
201 tosa_bat_update(&tosa_bat_main);
202 tosa_bat_update(&tosa_bat_jacket);
203}
204
205
206static enum power_supply_property tosa_bat_main_props[] = {
207 POWER_SUPPLY_PROP_STATUS,
208 POWER_SUPPLY_PROP_TECHNOLOGY,
209 POWER_SUPPLY_PROP_VOLTAGE_NOW,
210 POWER_SUPPLY_PROP_VOLTAGE_MAX,
211 POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
212 POWER_SUPPLY_PROP_TEMP,
213 POWER_SUPPLY_PROP_PRESENT,
214};
215
216static enum power_supply_property tosa_bat_bu_props[] = {
217 POWER_SUPPLY_PROP_STATUS,
218 POWER_SUPPLY_PROP_TECHNOLOGY,
219 POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
220 POWER_SUPPLY_PROP_VOLTAGE_NOW,
221 POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
222 POWER_SUPPLY_PROP_PRESENT,
223};
224
225static struct tosa_bat tosa_bat_main = {
226 .status = POWER_SUPPLY_STATUS_DISCHARGING,
227 .full_chrg = -1,
228 .psy = {
229 .name = "main-battery",
230 .type = POWER_SUPPLY_TYPE_BATTERY,
231 .properties = tosa_bat_main_props,
232 .num_properties = ARRAY_SIZE(tosa_bat_main_props),
233 .get_property = tosa_bat_get_property,
234 .external_power_changed = tosa_bat_external_power_changed,
235 .use_for_apm = 1,
236 },
237
238 .gpio_full = TOSA_GPIO_BAT0_CRG,
239 .gpio_charge_off = TOSA_GPIO_CHARGE_OFF,
240
241 .technology = POWER_SUPPLY_TECHNOLOGY_LIPO,
242
243 .gpio_bat = TOSA_GPIO_BAT0_V_ON,
244 .adc_bat = WM97XX_AUX_ID3,
245 .adc_bat_divider = 414,
246 .bat_max = 4310000,
247 .bat_min = 1551 * 1000000 / 414,
248
249 .gpio_temp = TOSA_GPIO_BAT1_TH_ON,
250 .adc_temp = WM97XX_AUX_ID2,
251 .adc_temp_divider = 10000,
252};
253
254static struct tosa_bat tosa_bat_jacket = {
255 .status = POWER_SUPPLY_STATUS_DISCHARGING,
256 .full_chrg = -1,
257 .psy = {
258 .name = "jacket-battery",
259 .type = POWER_SUPPLY_TYPE_BATTERY,
260 .properties = tosa_bat_main_props,
261 .num_properties = ARRAY_SIZE(tosa_bat_main_props),
262 .get_property = tosa_bat_get_property,
263 .external_power_changed = tosa_bat_external_power_changed,
264 },
265
266 .is_present = tosa_jacket_bat_is_present,
267 .gpio_full = TOSA_GPIO_BAT1_CRG,
268 .gpio_charge_off = TOSA_GPIO_CHARGE_OFF_JC,
269
270 .technology = POWER_SUPPLY_TECHNOLOGY_LIPO,
271
272 .gpio_bat = TOSA_GPIO_BAT1_V_ON,
273 .adc_bat = WM97XX_AUX_ID3,
274 .adc_bat_divider = 414,
275 .bat_max = 4310000,
276 .bat_min = 1551 * 1000000 / 414,
277
278 .gpio_temp = TOSA_GPIO_BAT0_TH_ON,
279 .adc_temp = WM97XX_AUX_ID2,
280 .adc_temp_divider = 10000,
281};
282
283static struct tosa_bat tosa_bat_bu = {
284 .status = POWER_SUPPLY_STATUS_UNKNOWN,
285 .full_chrg = -1,
286
287 .psy = {
288 .name = "backup-battery",
289 .type = POWER_SUPPLY_TYPE_BATTERY,
290 .properties = tosa_bat_bu_props,
291 .num_properties = ARRAY_SIZE(tosa_bat_bu_props),
292 .get_property = tosa_bat_get_property,
293 .external_power_changed = tosa_bat_external_power_changed,
294 },
295
296 .gpio_full = -1,
297 .gpio_charge_off = -1,
298
299 .technology = POWER_SUPPLY_TECHNOLOGY_LiMn,
300
301 .gpio_bat = TOSA_GPIO_BU_CHRG_ON,
302 .adc_bat = WM97XX_AUX_ID4,
303 .adc_bat_divider = 1266,
304
305 .gpio_temp = -1,
306 .adc_temp = -1,
307 .adc_temp_divider = -1,
308};
309
310static struct {
311 int gpio;
312 char *name;
313 bool output;
314 int value;
315} gpios[] = {
316 { TOSA_GPIO_CHARGE_OFF, "main charge off", 1, 1 },
317 { TOSA_GPIO_CHARGE_OFF_JC, "jacket charge off", 1, 1 },
318 { TOSA_GPIO_BAT_SW_ON, "battery switch", 1, 0 },
319 { TOSA_GPIO_BAT0_V_ON, "main battery", 1, 0 },
320 { TOSA_GPIO_BAT1_V_ON, "jacket battery", 1, 0 },
321 { TOSA_GPIO_BAT1_TH_ON, "main battery temp", 1, 0 },
322 { TOSA_GPIO_BAT0_TH_ON, "jacket battery temp", 1, 0 },
323 { TOSA_GPIO_BU_CHRG_ON, "backup battery", 1, 0 },
324 { TOSA_GPIO_BAT0_CRG, "main battery full", 0, 0 },
325 { TOSA_GPIO_BAT1_CRG, "jacket battery full", 0, 0 },
326 { TOSA_GPIO_BAT0_LOW, "main battery low", 0, 0 },
327 { TOSA_GPIO_BAT1_LOW, "jacket battery low", 0, 0 },
328 { TOSA_GPIO_JACKET_DETECT, "jacket detect", 0, 0 },
329};
330
331#ifdef CONFIG_PM
332static int tosa_bat_suspend(struct platform_device *dev, pm_message_t state)
333{
334 /* flush all pending status updates */
335 flush_scheduled_work();
336 return 0;
337}
338
339static int tosa_bat_resume(struct platform_device *dev)
340{
341 /* things may have changed while we were away */
342 schedule_work(&bat_work);
343 return 0;
344}
345#else
346#define tosa_bat_suspend NULL
347#define tosa_bat_resume NULL
348#endif
349
350static int __devinit tosa_bat_probe(struct platform_device *dev)
351{
352 int ret;
353 int i;
354
355 if (!machine_is_tosa())
356 return -ENODEV;
357
358 for (i = 0; i < ARRAY_SIZE(gpios); i++) {
359 ret = gpio_request(gpios[i].gpio, gpios[i].name);
360 if (ret) {
361 i--;
362 goto err_gpio;
363 }
364
365 if (gpios[i].output)
366 ret = gpio_direction_output(gpios[i].gpio,
367 gpios[i].value);
368 else
369 ret = gpio_direction_input(gpios[i].gpio);
370
371 if (ret)
372 goto err_gpio;
373 }
374
375 mutex_init(&tosa_bat_main.work_lock);
376 mutex_init(&tosa_bat_jacket.work_lock);
377
378 INIT_WORK(&bat_work, tosa_bat_work);
379
380 ret = power_supply_register(&dev->dev, &tosa_bat_main.psy);
381 if (ret)
382 goto err_psy_reg_main;
383 ret = power_supply_register(&dev->dev, &tosa_bat_jacket.psy);
384 if (ret)
385 goto err_psy_reg_jacket;
386 ret = power_supply_register(&dev->dev, &tosa_bat_bu.psy);
387 if (ret)
388 goto err_psy_reg_bu;
389
390 ret = request_irq(gpio_to_irq(TOSA_GPIO_BAT0_CRG),
391 tosa_bat_gpio_isr,
392 IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
393 "main full", &tosa_bat_main);
394 if (ret)
395 goto err_req_main;
396
397 ret = request_irq(gpio_to_irq(TOSA_GPIO_BAT1_CRG),
398 tosa_bat_gpio_isr,
399 IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
400 "jacket full", &tosa_bat_jacket);
401 if (ret)
402 goto err_req_jacket;
403
404 ret = request_irq(gpio_to_irq(TOSA_GPIO_JACKET_DETECT),
405 tosa_bat_gpio_isr,
406 IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
407 "jacket detect", &tosa_bat_jacket);
408 if (!ret) {
409 schedule_work(&bat_work);
410 return 0;
411 }
412
413 free_irq(gpio_to_irq(TOSA_GPIO_BAT1_CRG), &tosa_bat_jacket);
414err_req_jacket:
415 free_irq(gpio_to_irq(TOSA_GPIO_BAT0_CRG), &tosa_bat_main);
416err_req_main:
417 power_supply_unregister(&tosa_bat_bu.psy);
418err_psy_reg_bu:
419 power_supply_unregister(&tosa_bat_jacket.psy);
420err_psy_reg_jacket:
421 power_supply_unregister(&tosa_bat_main.psy);
422err_psy_reg_main:
423
424 /* see comment in tosa_bat_remove */
425 flush_scheduled_work();
426
427 i--;
428err_gpio:
429 for (; i >= 0; i--)
430 gpio_free(gpios[i].gpio);
431
432 return ret;
433}
434
435static int __devexit tosa_bat_remove(struct platform_device *dev)
436{
437 int i;
438
439 free_irq(gpio_to_irq(TOSA_GPIO_JACKET_DETECT), &tosa_bat_jacket);
440 free_irq(gpio_to_irq(TOSA_GPIO_BAT1_CRG), &tosa_bat_jacket);
441 free_irq(gpio_to_irq(TOSA_GPIO_BAT0_CRG), &tosa_bat_main);
442
443 power_supply_unregister(&tosa_bat_bu.psy);
444 power_supply_unregister(&tosa_bat_jacket.psy);
445 power_supply_unregister(&tosa_bat_main.psy);
446
447 /*
448 * now flush all pending work.
449 * we won't get any more schedules, since all
450 * sources (isr and external_power_changed)
451 * are unregistered now.
452 */
453 flush_scheduled_work();
454
455 for (i = ARRAY_SIZE(gpios) - 1; i >= 0; i--)
456 gpio_free(gpios[i].gpio);
457
458 return 0;
459}
460
461static struct platform_driver tosa_bat_driver = {
462 .driver.name = "wm97xx-battery",
463 .driver.owner = THIS_MODULE,
464 .probe = tosa_bat_probe,
465 .remove = __devexit_p(tosa_bat_remove),
466 .suspend = tosa_bat_suspend,
467 .resume = tosa_bat_resume,
468};
469
470static int __init tosa_bat_init(void)
471{
472 return platform_driver_register(&tosa_bat_driver);
473}
474
475static void __exit tosa_bat_exit(void)
476{
477 platform_driver_unregister(&tosa_bat_driver);
478}
479
480module_init(tosa_bat_init);
481module_exit(tosa_bat_exit);
482
483MODULE_LICENSE("GPL");
484MODULE_AUTHOR("Dmitry Baryshkov");
485MODULE_DESCRIPTION("Tosa battery driver");
486MODULE_ALIAS("platform:wm97xx-battery");
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
new file mode 100644
index 000000000000..a656128f1fdd
--- /dev/null
+++ b/drivers/regulator/Kconfig
@@ -0,0 +1,59 @@
1menu "Voltage and Current regulators"
2
3config REGULATOR
4 bool "Voltage and Current Regulator Support"
5 default n
6 help
7 Generic Voltage and Current Regulator support.
8
9 This framework is designed to provide a generic interface to voltage
10 and current regulators within the Linux kernel. It's intended to
11 provide voltage and current control to client or consumer drivers and
12 also provide status information to user space applications through a
13 sysfs interface.
14
15 The intention is to allow systems to dynamically control regulator
16 output in order to save power and prolong battery life. This applies
17 to both voltage regulators (where voltage output is controllable) and
18 current sinks (where current output is controllable).
19
20 This framework safely compiles out if not selected so that client
21 drivers can still be used in systems with no software controllable
22 regulators.
23
24 If unsure, say no.
25
26config REGULATOR_DEBUG
27 bool "Regulator debug support"
28 depends on REGULATOR
29 help
30 Say yes here to enable debugging support.
31
32config REGULATOR_FIXED_VOLTAGE
33 tristate
34 default n
35 select REGULATOR
36
37config REGULATOR_VIRTUAL_CONSUMER
38 tristate "Virtual regulator consumer support"
39 default n
40 select REGULATOR
41 help
42 This driver provides a virtual consumer for the voltage and
43 current regulator API which provides sysfs controls for
44 configuring the supplies requested. This is mainly useful
45 for test purposes.
46
47 If unsure, say no.
48
49config REGULATOR_BQ24022
50 tristate "TI bq24022 Dual Input 1-Cell Li-Ion Charger IC"
51 default n
52 select REGULATOR
53 help
54 This driver controls a TI bq24022 Charger attached via
55 GPIOs. The provided current regulator can enable/disable
56 charging select between 100 mA and 500 mA charging current
57 limit.
58
59endmenu
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
new file mode 100644
index 000000000000..ac2c64efe65c
--- /dev/null
+++ b/drivers/regulator/Makefile
@@ -0,0 +1,12 @@
1#
2# Makefile for regulator drivers.
3#
4
5
6obj-$(CONFIG_REGULATOR) += core.o
7obj-$(CONFIG_REGULATOR_FIXED_VOLTAGE) += fixed.o
8obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o
9
10obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
11
12ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG
diff --git a/drivers/regulator/bq24022.c b/drivers/regulator/bq24022.c
new file mode 100644
index 000000000000..263699d6152d
--- /dev/null
+++ b/drivers/regulator/bq24022.c
@@ -0,0 +1,167 @@
1/*
2 * Support for TI bq24022 (bqTINY-II) Dual Input (USB/AC Adpater)
3 * 1-Cell Li-Ion Charger connected via GPIOs.
4 *
5 * Copyright (c) 2008 Philipp Zabel
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 */
12
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/platform_device.h>
16#include <linux/err.h>
17#include <linux/gpio.h>
18#include <linux/regulator/bq24022.h>
19#include <linux/regulator/driver.h>
20
21static int bq24022_set_current_limit(struct regulator_dev *rdev,
22 int min_uA, int max_uA)
23{
24 struct platform_device *pdev = rdev_get_drvdata(rdev);
25 struct bq24022_mach_info *pdata = pdev->dev.platform_data;
26
27 dev_dbg(&pdev->dev, "setting current limit to %s mA\n",
28 max_uA >= 500000 ? "500" : "100");
29
30 /* REVISIT: maybe return error if min_uA != 0 ? */
31 gpio_set_value(pdata->gpio_iset2, max_uA >= 500000);
32 return 0;
33}
34
35static int bq24022_get_current_limit(struct regulator_dev *rdev)
36{
37 struct platform_device *pdev = rdev_get_drvdata(rdev);
38 struct bq24022_mach_info *pdata = pdev->dev.platform_data;
39
40 return gpio_get_value(pdata->gpio_iset2) ? 500000 : 100000;
41}
42
43static int bq24022_enable(struct regulator_dev *rdev)
44{
45 struct platform_device *pdev = rdev_get_drvdata(rdev);
46 struct bq24022_mach_info *pdata = pdev->dev.platform_data;
47
48 dev_dbg(&pdev->dev, "enabling charger\n");
49
50 gpio_set_value(pdata->gpio_nce, 0);
51 return 0;
52}
53
54static int bq24022_disable(struct regulator_dev *rdev)
55{
56 struct platform_device *pdev = rdev_get_drvdata(rdev);
57 struct bq24022_mach_info *pdata = pdev->dev.platform_data;
58
59 dev_dbg(&pdev->dev, "disabling charger\n");
60
61 gpio_set_value(pdata->gpio_nce, 1);
62 return 0;
63}
64
65static int bq24022_is_enabled(struct regulator_dev *rdev)
66{
67 struct platform_device *pdev = rdev_get_drvdata(rdev);
68 struct bq24022_mach_info *pdata = pdev->dev.platform_data;
69
70 return !gpio_get_value(pdata->gpio_nce);
71}
72
73static struct regulator_ops bq24022_ops = {
74 .set_current_limit = bq24022_set_current_limit,
75 .get_current_limit = bq24022_get_current_limit,
76 .enable = bq24022_enable,
77 .disable = bq24022_disable,
78 .is_enabled = bq24022_is_enabled,
79};
80
81static struct regulator_desc bq24022_desc = {
82 .name = "bq24022",
83 .ops = &bq24022_ops,
84 .type = REGULATOR_CURRENT,
85};
86
87static int __init bq24022_probe(struct platform_device *pdev)
88{
89 struct bq24022_mach_info *pdata = pdev->dev.platform_data;
90 struct regulator_dev *bq24022;
91 int ret;
92
93 if (!pdata || !pdata->gpio_nce || !pdata->gpio_iset2)
94 return -EINVAL;
95
96 ret = gpio_request(pdata->gpio_nce, "ncharge_en");
97 if (ret) {
98 dev_dbg(&pdev->dev, "couldn't request nCE GPIO: %d\n",
99 pdata->gpio_nce);
100 goto err_ce;
101 }
102 ret = gpio_request(pdata->gpio_iset2, "charge_mode");
103 if (ret) {
104 dev_dbg(&pdev->dev, "couldn't request ISET2 GPIO: %d\n",
105 pdata->gpio_iset2);
106 goto err_iset2;
107 }
108 ret = gpio_direction_output(pdata->gpio_iset2, 0);
109 ret = gpio_direction_output(pdata->gpio_nce, 1);
110
111 bq24022 = regulator_register(&bq24022_desc, pdev);
112 if (IS_ERR(bq24022)) {
113 dev_dbg(&pdev->dev, "couldn't register regulator\n");
114 ret = PTR_ERR(bq24022);
115 goto err_reg;
116 }
117 platform_set_drvdata(pdev, bq24022);
118 dev_dbg(&pdev->dev, "registered regulator\n");
119
120 return 0;
121err_reg:
122 gpio_free(pdata->gpio_iset2);
123err_iset2:
124 gpio_free(pdata->gpio_nce);
125err_ce:
126 return ret;
127}
128
129static int __devexit bq24022_remove(struct platform_device *pdev)
130{
131 struct bq24022_mach_info *pdata = pdev->dev.platform_data;
132 struct regulator_dev *bq24022 = platform_get_drvdata(pdev);
133
134 regulator_unregister(bq24022);
135 gpio_free(pdata->gpio_iset2);
136 gpio_free(pdata->gpio_nce);
137
138 return 0;
139}
140
141static struct platform_driver bq24022_driver = {
142 .driver = {
143 .name = "bq24022",
144 },
145 .remove = __devexit_p(bq24022_remove),
146};
147
148static int __init bq24022_init(void)
149{
150 return platform_driver_probe(&bq24022_driver, bq24022_probe);
151}
152
153static void __exit bq24022_exit(void)
154{
155 platform_driver_unregister(&bq24022_driver);
156}
157
158/*
159 * make sure this is probed before gpio_vbus and pda_power,
160 * but after asic3 or other GPIO expander drivers.
161 */
162subsys_initcall(bq24022_init);
163module_exit(bq24022_exit);
164
165MODULE_AUTHOR("Philipp Zabel");
166MODULE_DESCRIPTION("TI bq24022 Li-Ion Charger driver");
167MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
new file mode 100644
index 000000000000..9c7986261568
--- /dev/null
+++ b/drivers/regulator/core.c
@@ -0,0 +1,1903 @@
1/*
2 * core.c -- Voltage/Current Regulator framework.
3 *
4 * Copyright 2007, 2008 Wolfson Microelectronics PLC.
5 *
6 * Author: Liam Girdwood <liam.girdwood@wolfsonmicro.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 *
13 */
14
15#include <linux/kernel.h>
16#include <linux/init.h>
17#include <linux/device.h>
18#include <linux/err.h>
19#include <linux/mutex.h>
20#include <linux/suspend.h>
21#include <linux/regulator/consumer.h>
22#include <linux/regulator/driver.h>
23#include <linux/regulator/machine.h>
24
25#define REGULATOR_VERSION "0.5"
26
27static DEFINE_MUTEX(regulator_list_mutex);
28static LIST_HEAD(regulator_list);
29static LIST_HEAD(regulator_map_list);
30
31/**
32 * struct regulator_dev
33 *
34 * Voltage / Current regulator class device. One for each regulator.
35 */
36struct regulator_dev {
37 struct regulator_desc *desc;
38 int use_count;
39
40 /* lists we belong to */
41 struct list_head list; /* list of all regulators */
42 struct list_head slist; /* list of supplied regulators */
43
44 /* lists we own */
45 struct list_head consumer_list; /* consumers we supply */
46 struct list_head supply_list; /* regulators we supply */
47
48 struct blocking_notifier_head notifier;
49 struct mutex mutex; /* consumer lock */
50 struct module *owner;
51 struct device dev;
52 struct regulation_constraints *constraints;
53 struct regulator_dev *supply; /* for tree */
54
55 void *reg_data; /* regulator_dev data */
56};
57
58/**
59 * struct regulator_map
60 *
61 * Used to provide symbolic supply names to devices.
62 */
63struct regulator_map {
64 struct list_head list;
65 struct device *dev;
66 const char *supply;
67 const char *regulator;
68};
69
70static inline struct regulator_dev *to_rdev(struct device *d)
71{
72 return container_of(d, struct regulator_dev, dev);
73}
74
75/*
76 * struct regulator
77 *
78 * One for each consumer device.
79 */
80struct regulator {
81 struct device *dev;
82 struct list_head list;
83 int uA_load;
84 int min_uV;
85 int max_uV;
86 int enabled; /* client has called enabled */
87 char *supply_name;
88 struct device_attribute dev_attr;
89 struct regulator_dev *rdev;
90};
91
92static int _regulator_is_enabled(struct regulator_dev *rdev);
93static int _regulator_disable(struct regulator_dev *rdev);
94static int _regulator_get_voltage(struct regulator_dev *rdev);
95static int _regulator_get_current_limit(struct regulator_dev *rdev);
96static unsigned int _regulator_get_mode(struct regulator_dev *rdev);
97static void _notifier_call_chain(struct regulator_dev *rdev,
98 unsigned long event, void *data);
99
100/* gets the regulator for a given consumer device */
101static struct regulator *get_device_regulator(struct device *dev)
102{
103 struct regulator *regulator = NULL;
104 struct regulator_dev *rdev;
105
106 mutex_lock(&regulator_list_mutex);
107 list_for_each_entry(rdev, &regulator_list, list) {
108 mutex_lock(&rdev->mutex);
109 list_for_each_entry(regulator, &rdev->consumer_list, list) {
110 if (regulator->dev == dev) {
111 mutex_unlock(&rdev->mutex);
112 mutex_unlock(&regulator_list_mutex);
113 return regulator;
114 }
115 }
116 mutex_unlock(&rdev->mutex);
117 }
118 mutex_unlock(&regulator_list_mutex);
119 return NULL;
120}
121
122/* Platform voltage constraint check */
123static int regulator_check_voltage(struct regulator_dev *rdev,
124 int *min_uV, int *max_uV)
125{
126 BUG_ON(*min_uV > *max_uV);
127
128 if (!rdev->constraints) {
129 printk(KERN_ERR "%s: no constraints for %s\n", __func__,
130 rdev->desc->name);
131 return -ENODEV;
132 }
133 if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE)) {
134 printk(KERN_ERR "%s: operation not allowed for %s\n",
135 __func__, rdev->desc->name);
136 return -EPERM;
137 }
138
139 if (*max_uV > rdev->constraints->max_uV)
140 *max_uV = rdev->constraints->max_uV;
141 if (*min_uV < rdev->constraints->min_uV)
142 *min_uV = rdev->constraints->min_uV;
143
144 if (*min_uV > *max_uV)
145 return -EINVAL;
146
147 return 0;
148}
149
150/* current constraint check */
151static int regulator_check_current_limit(struct regulator_dev *rdev,
152 int *min_uA, int *max_uA)
153{
154 BUG_ON(*min_uA > *max_uA);
155
156 if (!rdev->constraints) {
157 printk(KERN_ERR "%s: no constraints for %s\n", __func__,
158 rdev->desc->name);
159 return -ENODEV;
160 }
161 if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_CURRENT)) {
162 printk(KERN_ERR "%s: operation not allowed for %s\n",
163 __func__, rdev->desc->name);
164 return -EPERM;
165 }
166
167 if (*max_uA > rdev->constraints->max_uA)
168 *max_uA = rdev->constraints->max_uA;
169 if (*min_uA < rdev->constraints->min_uA)
170 *min_uA = rdev->constraints->min_uA;
171
172 if (*min_uA > *max_uA)
173 return -EINVAL;
174
175 return 0;
176}
177
178/* operating mode constraint check */
179static int regulator_check_mode(struct regulator_dev *rdev, int mode)
180{
181 if (!rdev->constraints) {
182 printk(KERN_ERR "%s: no constraints for %s\n", __func__,
183 rdev->desc->name);
184 return -ENODEV;
185 }
186 if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_MODE)) {
187 printk(KERN_ERR "%s: operation not allowed for %s\n",
188 __func__, rdev->desc->name);
189 return -EPERM;
190 }
191 if (!(rdev->constraints->valid_modes_mask & mode)) {
192 printk(KERN_ERR "%s: invalid mode %x for %s\n",
193 __func__, mode, rdev->desc->name);
194 return -EINVAL;
195 }
196 return 0;
197}
198
199/* dynamic regulator mode switching constraint check */
200static int regulator_check_drms(struct regulator_dev *rdev)
201{
202 if (!rdev->constraints) {
203 printk(KERN_ERR "%s: no constraints for %s\n", __func__,
204 rdev->desc->name);
205 return -ENODEV;
206 }
207 if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS)) {
208 printk(KERN_ERR "%s: operation not allowed for %s\n",
209 __func__, rdev->desc->name);
210 return -EPERM;
211 }
212 return 0;
213}
214
215static ssize_t device_requested_uA_show(struct device *dev,
216 struct device_attribute *attr, char *buf)
217{
218 struct regulator *regulator;
219
220 regulator = get_device_regulator(dev);
221 if (regulator == NULL)
222 return 0;
223
224 return sprintf(buf, "%d\n", regulator->uA_load);
225}
226
227static ssize_t regulator_uV_show(struct device *dev,
228 struct device_attribute *attr, char *buf)
229{
230 struct regulator_dev *rdev = to_rdev(dev);
231 ssize_t ret;
232
233 mutex_lock(&rdev->mutex);
234 ret = sprintf(buf, "%d\n", _regulator_get_voltage(rdev));
235 mutex_unlock(&rdev->mutex);
236
237 return ret;
238}
239
240static ssize_t regulator_uA_show(struct device *dev,
241 struct device_attribute *attr, char *buf)
242{
243 struct regulator_dev *rdev = to_rdev(dev);
244
245 return sprintf(buf, "%d\n", _regulator_get_current_limit(rdev));
246}
247
248static ssize_t regulator_opmode_show(struct device *dev,
249 struct device_attribute *attr, char *buf)
250{
251 struct regulator_dev *rdev = to_rdev(dev);
252 int mode = _regulator_get_mode(rdev);
253
254 switch (mode) {
255 case REGULATOR_MODE_FAST:
256 return sprintf(buf, "fast\n");
257 case REGULATOR_MODE_NORMAL:
258 return sprintf(buf, "normal\n");
259 case REGULATOR_MODE_IDLE:
260 return sprintf(buf, "idle\n");
261 case REGULATOR_MODE_STANDBY:
262 return sprintf(buf, "standby\n");
263 }
264 return sprintf(buf, "unknown\n");
265}
266
267static ssize_t regulator_state_show(struct device *dev,
268 struct device_attribute *attr, char *buf)
269{
270 struct regulator_dev *rdev = to_rdev(dev);
271 int state = _regulator_is_enabled(rdev);
272
273 if (state > 0)
274 return sprintf(buf, "enabled\n");
275 else if (state == 0)
276 return sprintf(buf, "disabled\n");
277 else
278 return sprintf(buf, "unknown\n");
279}
280
281static ssize_t regulator_min_uA_show(struct device *dev,
282 struct device_attribute *attr, char *buf)
283{
284 struct regulator_dev *rdev = to_rdev(dev);
285
286 if (!rdev->constraints)
287 return sprintf(buf, "constraint not defined\n");
288
289 return sprintf(buf, "%d\n", rdev->constraints->min_uA);
290}
291
292static ssize_t regulator_max_uA_show(struct device *dev,
293 struct device_attribute *attr, char *buf)
294{
295 struct regulator_dev *rdev = to_rdev(dev);
296
297 if (!rdev->constraints)
298 return sprintf(buf, "constraint not defined\n");
299
300 return sprintf(buf, "%d\n", rdev->constraints->max_uA);
301}
302
303static ssize_t regulator_min_uV_show(struct device *dev,
304 struct device_attribute *attr, char *buf)
305{
306 struct regulator_dev *rdev = to_rdev(dev);
307
308 if (!rdev->constraints)
309 return sprintf(buf, "constraint not defined\n");
310
311 return sprintf(buf, "%d\n", rdev->constraints->min_uV);
312}
313
314static ssize_t regulator_max_uV_show(struct device *dev,
315 struct device_attribute *attr, char *buf)
316{
317 struct regulator_dev *rdev = to_rdev(dev);
318
319 if (!rdev->constraints)
320 return sprintf(buf, "constraint not defined\n");
321
322 return sprintf(buf, "%d\n", rdev->constraints->max_uV);
323}
324
325static ssize_t regulator_total_uA_show(struct device *dev,
326 struct device_attribute *attr, char *buf)
327{
328 struct regulator_dev *rdev = to_rdev(dev);
329 struct regulator *regulator;
330 int uA = 0;
331
332 mutex_lock(&rdev->mutex);
333 list_for_each_entry(regulator, &rdev->consumer_list, list)
334 uA += regulator->uA_load;
335 mutex_unlock(&rdev->mutex);
336 return sprintf(buf, "%d\n", uA);
337}
338
339static ssize_t regulator_num_users_show(struct device *dev,
340 struct device_attribute *attr, char *buf)
341{
342 struct regulator_dev *rdev = to_rdev(dev);
343 return sprintf(buf, "%d\n", rdev->use_count);
344}
345
346static ssize_t regulator_type_show(struct device *dev,
347 struct device_attribute *attr, char *buf)
348{
349 struct regulator_dev *rdev = to_rdev(dev);
350
351 switch (rdev->desc->type) {
352 case REGULATOR_VOLTAGE:
353 return sprintf(buf, "voltage\n");
354 case REGULATOR_CURRENT:
355 return sprintf(buf, "current\n");
356 }
357 return sprintf(buf, "unknown\n");
358}
359
360static ssize_t regulator_suspend_mem_uV_show(struct device *dev,
361 struct device_attribute *attr, char *buf)
362{
363 struct regulator_dev *rdev = to_rdev(dev);
364
365 if (!rdev->constraints)
366 return sprintf(buf, "not defined\n");
367 return sprintf(buf, "%d\n", rdev->constraints->state_mem.uV);
368}
369
370static ssize_t regulator_suspend_disk_uV_show(struct device *dev,
371 struct device_attribute *attr, char *buf)
372{
373 struct regulator_dev *rdev = to_rdev(dev);
374
375 if (!rdev->constraints)
376 return sprintf(buf, "not defined\n");
377 return sprintf(buf, "%d\n", rdev->constraints->state_disk.uV);
378}
379
380static ssize_t regulator_suspend_standby_uV_show(struct device *dev,
381 struct device_attribute *attr, char *buf)
382{
383 struct regulator_dev *rdev = to_rdev(dev);
384
385 if (!rdev->constraints)
386 return sprintf(buf, "not defined\n");
387 return sprintf(buf, "%d\n", rdev->constraints->state_standby.uV);
388}
389
390static ssize_t suspend_opmode_show(struct regulator_dev *rdev,
391 unsigned int mode, char *buf)
392{
393 switch (mode) {
394 case REGULATOR_MODE_FAST:
395 return sprintf(buf, "fast\n");
396 case REGULATOR_MODE_NORMAL:
397 return sprintf(buf, "normal\n");
398 case REGULATOR_MODE_IDLE:
399 return sprintf(buf, "idle\n");
400 case REGULATOR_MODE_STANDBY:
401 return sprintf(buf, "standby\n");
402 }
403 return sprintf(buf, "unknown\n");
404}
405
406static ssize_t regulator_suspend_mem_mode_show(struct device *dev,
407 struct device_attribute *attr, char *buf)
408{
409 struct regulator_dev *rdev = to_rdev(dev);
410
411 if (!rdev->constraints)
412 return sprintf(buf, "not defined\n");
413 return suspend_opmode_show(rdev,
414 rdev->constraints->state_mem.mode, buf);
415}
416
417static ssize_t regulator_suspend_disk_mode_show(struct device *dev,
418 struct device_attribute *attr, char *buf)
419{
420 struct regulator_dev *rdev = to_rdev(dev);
421
422 if (!rdev->constraints)
423 return sprintf(buf, "not defined\n");
424 return suspend_opmode_show(rdev,
425 rdev->constraints->state_disk.mode, buf);
426}
427
428static ssize_t regulator_suspend_standby_mode_show(struct device *dev,
429 struct device_attribute *attr, char *buf)
430{
431 struct regulator_dev *rdev = to_rdev(dev);
432
433 if (!rdev->constraints)
434 return sprintf(buf, "not defined\n");
435 return suspend_opmode_show(rdev,
436 rdev->constraints->state_standby.mode, buf);
437}
438
439static ssize_t regulator_suspend_mem_state_show(struct device *dev,
440 struct device_attribute *attr, char *buf)
441{
442 struct regulator_dev *rdev = to_rdev(dev);
443
444 if (!rdev->constraints)
445 return sprintf(buf, "not defined\n");
446
447 if (rdev->constraints->state_mem.enabled)
448 return sprintf(buf, "enabled\n");
449 else
450 return sprintf(buf, "disabled\n");
451}
452
453static ssize_t regulator_suspend_disk_state_show(struct device *dev,
454 struct device_attribute *attr, char *buf)
455{
456 struct regulator_dev *rdev = to_rdev(dev);
457
458 if (!rdev->constraints)
459 return sprintf(buf, "not defined\n");
460
461 if (rdev->constraints->state_disk.enabled)
462 return sprintf(buf, "enabled\n");
463 else
464 return sprintf(buf, "disabled\n");
465}
466
467static ssize_t regulator_suspend_standby_state_show(struct device *dev,
468 struct device_attribute *attr, char *buf)
469{
470 struct regulator_dev *rdev = to_rdev(dev);
471
472 if (!rdev->constraints)
473 return sprintf(buf, "not defined\n");
474
475 if (rdev->constraints->state_standby.enabled)
476 return sprintf(buf, "enabled\n");
477 else
478 return sprintf(buf, "disabled\n");
479}
480static struct device_attribute regulator_dev_attrs[] = {
481 __ATTR(microvolts, 0444, regulator_uV_show, NULL),
482 __ATTR(microamps, 0444, regulator_uA_show, NULL),
483 __ATTR(opmode, 0444, regulator_opmode_show, NULL),
484 __ATTR(state, 0444, regulator_state_show, NULL),
485 __ATTR(min_microvolts, 0444, regulator_min_uV_show, NULL),
486 __ATTR(min_microamps, 0444, regulator_min_uA_show, NULL),
487 __ATTR(max_microvolts, 0444, regulator_max_uV_show, NULL),
488 __ATTR(max_microamps, 0444, regulator_max_uA_show, NULL),
489 __ATTR(requested_microamps, 0444, regulator_total_uA_show, NULL),
490 __ATTR(num_users, 0444, regulator_num_users_show, NULL),
491 __ATTR(type, 0444, regulator_type_show, NULL),
492 __ATTR(suspend_mem_microvolts, 0444,
493 regulator_suspend_mem_uV_show, NULL),
494 __ATTR(suspend_disk_microvolts, 0444,
495 regulator_suspend_disk_uV_show, NULL),
496 __ATTR(suspend_standby_microvolts, 0444,
497 regulator_suspend_standby_uV_show, NULL),
498 __ATTR(suspend_mem_mode, 0444,
499 regulator_suspend_mem_mode_show, NULL),
500 __ATTR(suspend_disk_mode, 0444,
501 regulator_suspend_disk_mode_show, NULL),
502 __ATTR(suspend_standby_mode, 0444,
503 regulator_suspend_standby_mode_show, NULL),
504 __ATTR(suspend_mem_state, 0444,
505 regulator_suspend_mem_state_show, NULL),
506 __ATTR(suspend_disk_state, 0444,
507 regulator_suspend_disk_state_show, NULL),
508 __ATTR(suspend_standby_state, 0444,
509 regulator_suspend_standby_state_show, NULL),
510 __ATTR_NULL,
511};
512
513static void regulator_dev_release(struct device *dev)
514{
515 struct regulator_dev *rdev = to_rdev(dev);
516 kfree(rdev);
517}
518
519static struct class regulator_class = {
520 .name = "regulator",
521 .dev_release = regulator_dev_release,
522 .dev_attrs = regulator_dev_attrs,
523};
524
525/* Calculate the new optimum regulator operating mode based on the new total
526 * consumer load. All locks held by caller */
527static void drms_uA_update(struct regulator_dev *rdev)
528{
529 struct regulator *sibling;
530 int current_uA = 0, output_uV, input_uV, err;
531 unsigned int mode;
532
533 err = regulator_check_drms(rdev);
534 if (err < 0 || !rdev->desc->ops->get_optimum_mode ||
535 !rdev->desc->ops->get_voltage || !rdev->desc->ops->set_mode);
536 return;
537
538 /* get output voltage */
539 output_uV = rdev->desc->ops->get_voltage(rdev);
540 if (output_uV <= 0)
541 return;
542
543 /* get input voltage */
544 if (rdev->supply && rdev->supply->desc->ops->get_voltage)
545 input_uV = rdev->supply->desc->ops->get_voltage(rdev->supply);
546 else
547 input_uV = rdev->constraints->input_uV;
548 if (input_uV <= 0)
549 return;
550
551 /* calc total requested load */
552 list_for_each_entry(sibling, &rdev->consumer_list, list)
553 current_uA += sibling->uA_load;
554
555 /* now get the optimum mode for our new total regulator load */
556 mode = rdev->desc->ops->get_optimum_mode(rdev, input_uV,
557 output_uV, current_uA);
558
559 /* check the new mode is allowed */
560 err = regulator_check_mode(rdev, mode);
561 if (err == 0)
562 rdev->desc->ops->set_mode(rdev, mode);
563}
564
565static int suspend_set_state(struct regulator_dev *rdev,
566 struct regulator_state *rstate)
567{
568 int ret = 0;
569
570 /* enable & disable are mandatory for suspend control */
571 if (!rdev->desc->ops->set_suspend_enable ||
572 !rdev->desc->ops->set_suspend_disable)
573 return -EINVAL;
574
575 if (rstate->enabled)
576 ret = rdev->desc->ops->set_suspend_enable(rdev);
577 else
578 ret = rdev->desc->ops->set_suspend_disable(rdev);
579 if (ret < 0) {
580 printk(KERN_ERR "%s: failed to enabled/disable\n", __func__);
581 return ret;
582 }
583
584 if (rdev->desc->ops->set_suspend_voltage && rstate->uV > 0) {
585 ret = rdev->desc->ops->set_suspend_voltage(rdev, rstate->uV);
586 if (ret < 0) {
587 printk(KERN_ERR "%s: failed to set voltage\n",
588 __func__);
589 return ret;
590 }
591 }
592
593 if (rdev->desc->ops->set_suspend_mode && rstate->mode > 0) {
594 ret = rdev->desc->ops->set_suspend_mode(rdev, rstate->mode);
595 if (ret < 0) {
596 printk(KERN_ERR "%s: failed to set mode\n", __func__);
597 return ret;
598 }
599 }
600 return ret;
601}
602
603/* locks held by caller */
604static int suspend_prepare(struct regulator_dev *rdev, suspend_state_t state)
605{
606 if (!rdev->constraints)
607 return -EINVAL;
608
609 switch (state) {
610 case PM_SUSPEND_STANDBY:
611 return suspend_set_state(rdev,
612 &rdev->constraints->state_standby);
613 case PM_SUSPEND_MEM:
614 return suspend_set_state(rdev,
615 &rdev->constraints->state_mem);
616 case PM_SUSPEND_MAX:
617 return suspend_set_state(rdev,
618 &rdev->constraints->state_disk);
619 default:
620 return -EINVAL;
621 }
622}
623
624static void print_constraints(struct regulator_dev *rdev)
625{
626 struct regulation_constraints *constraints = rdev->constraints;
627 char buf[80];
628 int count;
629
630 if (rdev->desc->type == REGULATOR_VOLTAGE) {
631 if (constraints->min_uV == constraints->max_uV)
632 count = sprintf(buf, "%d mV ",
633 constraints->min_uV / 1000);
634 else
635 count = sprintf(buf, "%d <--> %d mV ",
636 constraints->min_uV / 1000,
637 constraints->max_uV / 1000);
638 } else {
639 if (constraints->min_uA == constraints->max_uA)
640 count = sprintf(buf, "%d mA ",
641 constraints->min_uA / 1000);
642 else
643 count = sprintf(buf, "%d <--> %d mA ",
644 constraints->min_uA / 1000,
645 constraints->max_uA / 1000);
646 }
647 if (constraints->valid_modes_mask & REGULATOR_MODE_FAST)
648 count += sprintf(buf + count, "fast ");
649 if (constraints->valid_modes_mask & REGULATOR_MODE_NORMAL)
650 count += sprintf(buf + count, "normal ");
651 if (constraints->valid_modes_mask & REGULATOR_MODE_IDLE)
652 count += sprintf(buf + count, "idle ");
653 if (constraints->valid_modes_mask & REGULATOR_MODE_STANDBY)
654 count += sprintf(buf + count, "standby");
655
656 printk(KERN_INFO "regulator: %s: %s\n", rdev->desc->name, buf);
657}
658
659#define REG_STR_SIZE 32
660
661static struct regulator *create_regulator(struct regulator_dev *rdev,
662 struct device *dev,
663 const char *supply_name)
664{
665 struct regulator *regulator;
666 char buf[REG_STR_SIZE];
667 int err, size;
668
669 regulator = kzalloc(sizeof(*regulator), GFP_KERNEL);
670 if (regulator == NULL)
671 return NULL;
672
673 mutex_lock(&rdev->mutex);
674 regulator->rdev = rdev;
675 list_add(&regulator->list, &rdev->consumer_list);
676
677 if (dev) {
678 /* create a 'requested_microamps_name' sysfs entry */
679 size = scnprintf(buf, REG_STR_SIZE, "microamps_requested_%s",
680 supply_name);
681 if (size >= REG_STR_SIZE)
682 goto overflow_err;
683
684 regulator->dev = dev;
685 regulator->dev_attr.attr.name = kstrdup(buf, GFP_KERNEL);
686 if (regulator->dev_attr.attr.name == NULL)
687 goto attr_name_err;
688
689 regulator->dev_attr.attr.owner = THIS_MODULE;
690 regulator->dev_attr.attr.mode = 0444;
691 regulator->dev_attr.show = device_requested_uA_show;
692 err = device_create_file(dev, &regulator->dev_attr);
693 if (err < 0) {
694 printk(KERN_WARNING "%s: could not add regulator_dev"
695 " load sysfs\n", __func__);
696 goto attr_name_err;
697 }
698
699 /* also add a link to the device sysfs entry */
700 size = scnprintf(buf, REG_STR_SIZE, "%s-%s",
701 dev->kobj.name, supply_name);
702 if (size >= REG_STR_SIZE)
703 goto attr_err;
704
705 regulator->supply_name = kstrdup(buf, GFP_KERNEL);
706 if (regulator->supply_name == NULL)
707 goto attr_err;
708
709 err = sysfs_create_link(&rdev->dev.kobj, &dev->kobj,
710 buf);
711 if (err) {
712 printk(KERN_WARNING
713 "%s: could not add device link %s err %d\n",
714 __func__, dev->kobj.name, err);
715 device_remove_file(dev, &regulator->dev_attr);
716 goto link_name_err;
717 }
718 }
719 mutex_unlock(&rdev->mutex);
720 return regulator;
721link_name_err:
722 kfree(regulator->supply_name);
723attr_err:
724 device_remove_file(regulator->dev, &regulator->dev_attr);
725attr_name_err:
726 kfree(regulator->dev_attr.attr.name);
727overflow_err:
728 list_del(&regulator->list);
729 kfree(regulator);
730 mutex_unlock(&rdev->mutex);
731 return NULL;
732}
733
734/**
735 * regulator_get - lookup and obtain a reference to a regulator.
736 * @dev: device for regulator "consumer"
737 * @id: Supply name or regulator ID.
738 *
739 * Returns a struct regulator corresponding to the regulator producer,
740 * or IS_ERR() condition containing errno. Use of supply names
741 * configured via regulator_set_device_supply() is strongly
742 * encouraged.
743 */
744struct regulator *regulator_get(struct device *dev, const char *id)
745{
746 struct regulator_dev *rdev;
747 struct regulator_map *map;
748 struct regulator *regulator = ERR_PTR(-ENODEV);
749 const char *supply = id;
750
751 if (id == NULL) {
752 printk(KERN_ERR "regulator: get() with no identifier\n");
753 return regulator;
754 }
755
756 mutex_lock(&regulator_list_mutex);
757
758 list_for_each_entry(map, &regulator_map_list, list) {
759 if (dev == map->dev &&
760 strcmp(map->supply, id) == 0) {
761 supply = map->regulator;
762 break;
763 }
764 }
765
766 list_for_each_entry(rdev, &regulator_list, list) {
767 if (strcmp(supply, rdev->desc->name) == 0 &&
768 try_module_get(rdev->owner))
769 goto found;
770 }
771 printk(KERN_ERR "regulator: Unable to get requested regulator: %s\n",
772 id);
773 mutex_unlock(&regulator_list_mutex);
774 return regulator;
775
776found:
777 regulator = create_regulator(rdev, dev, id);
778 if (regulator == NULL) {
779 regulator = ERR_PTR(-ENOMEM);
780 module_put(rdev->owner);
781 }
782
783 mutex_unlock(&regulator_list_mutex);
784 return regulator;
785}
786EXPORT_SYMBOL_GPL(regulator_get);
787
788/**
789 * regulator_put - "free" the regulator source
790 * @regulator: regulator source
791 *
792 * Note: drivers must ensure that all regulator_enable calls made on this
793 * regulator source are balanced by regulator_disable calls prior to calling
794 * this function.
795 */
796void regulator_put(struct regulator *regulator)
797{
798 struct regulator_dev *rdev;
799
800 if (regulator == NULL || IS_ERR(regulator))
801 return;
802
803 if (regulator->enabled) {
804 printk(KERN_WARNING "Releasing supply %s while enabled\n",
805 regulator->supply_name);
806 WARN_ON(regulator->enabled);
807 regulator_disable(regulator);
808 }
809
810 mutex_lock(&regulator_list_mutex);
811 rdev = regulator->rdev;
812
813 /* remove any sysfs entries */
814 if (regulator->dev) {
815 sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name);
816 kfree(regulator->supply_name);
817 device_remove_file(regulator->dev, &regulator->dev_attr);
818 kfree(regulator->dev_attr.attr.name);
819 }
820 list_del(&regulator->list);
821 kfree(regulator);
822
823 module_put(rdev->owner);
824 mutex_unlock(&regulator_list_mutex);
825}
826EXPORT_SYMBOL_GPL(regulator_put);
827
828/* locks held by regulator_enable() */
829static int _regulator_enable(struct regulator_dev *rdev)
830{
831 int ret = -EINVAL;
832
833 if (!rdev->constraints) {
834 printk(KERN_ERR "%s: %s has no constraints\n",
835 __func__, rdev->desc->name);
836 return ret;
837 }
838
839 /* do we need to enable the supply regulator first */
840 if (rdev->supply) {
841 ret = _regulator_enable(rdev->supply);
842 if (ret < 0) {
843 printk(KERN_ERR "%s: failed to enable %s: %d\n",
844 __func__, rdev->desc->name, ret);
845 return ret;
846 }
847 }
848
849 /* check voltage and requested load before enabling */
850 if (rdev->desc->ops->enable) {
851
852 if (rdev->constraints &&
853 (rdev->constraints->valid_ops_mask &
854 REGULATOR_CHANGE_DRMS))
855 drms_uA_update(rdev);
856
857 ret = rdev->desc->ops->enable(rdev);
858 if (ret < 0) {
859 printk(KERN_ERR "%s: failed to enable %s: %d\n",
860 __func__, rdev->desc->name, ret);
861 return ret;
862 }
863 rdev->use_count++;
864 return ret;
865 }
866
867 return ret;
868}
869
870/**
871 * regulator_enable - enable regulator output
872 * @regulator: regulator source
873 *
874 * Enable the regulator output at the predefined voltage or current value.
875 * NOTE: the output value can be set by other drivers, boot loader or may be
876 * hardwired in the regulator.
877 * NOTE: calls to regulator_enable() must be balanced with calls to
878 * regulator_disable().
879 */
880int regulator_enable(struct regulator *regulator)
881{
882 int ret;
883
884 if (regulator->enabled) {
885 printk(KERN_CRIT "Regulator %s already enabled\n",
886 regulator->supply_name);
887 WARN_ON(regulator->enabled);
888 return 0;
889 }
890
891 mutex_lock(&regulator->rdev->mutex);
892 regulator->enabled = 1;
893 ret = _regulator_enable(regulator->rdev);
894 if (ret != 0)
895 regulator->enabled = 0;
896 mutex_unlock(&regulator->rdev->mutex);
897 return ret;
898}
899EXPORT_SYMBOL_GPL(regulator_enable);
900
901/* locks held by regulator_disable() */
902static int _regulator_disable(struct regulator_dev *rdev)
903{
904 int ret = 0;
905
906 /* are we the last user and permitted to disable ? */
907 if (rdev->use_count == 1 && !rdev->constraints->always_on) {
908
909 /* we are last user */
910 if (rdev->desc->ops->disable) {
911 ret = rdev->desc->ops->disable(rdev);
912 if (ret < 0) {
913 printk(KERN_ERR "%s: failed to disable %s\n",
914 __func__, rdev->desc->name);
915 return ret;
916 }
917 }
918
919 /* decrease our supplies ref count and disable if required */
920 if (rdev->supply)
921 _regulator_disable(rdev->supply);
922
923 rdev->use_count = 0;
924 } else if (rdev->use_count > 1) {
925
926 if (rdev->constraints &&
927 (rdev->constraints->valid_ops_mask &
928 REGULATOR_CHANGE_DRMS))
929 drms_uA_update(rdev);
930
931 rdev->use_count--;
932 }
933 return ret;
934}
935
936/**
937 * regulator_disable - disable regulator output
938 * @regulator: regulator source
939 *
940 * Disable the regulator output voltage or current.
941 * NOTE: this will only disable the regulator output if no other consumer
942 * devices have it enabled.
943 * NOTE: calls to regulator_enable() must be balanced with calls to
944 * regulator_disable().
945 */
946int regulator_disable(struct regulator *regulator)
947{
948 int ret;
949
950 if (!regulator->enabled) {
951 printk(KERN_ERR "%s: not in use by this consumer\n",
952 __func__);
953 return 0;
954 }
955
956 mutex_lock(&regulator->rdev->mutex);
957 regulator->enabled = 0;
958 regulator->uA_load = 0;
959 ret = _regulator_disable(regulator->rdev);
960 mutex_unlock(&regulator->rdev->mutex);
961 return ret;
962}
963EXPORT_SYMBOL_GPL(regulator_disable);
964
965/* locks held by regulator_force_disable() */
966static int _regulator_force_disable(struct regulator_dev *rdev)
967{
968 int ret = 0;
969
970 /* force disable */
971 if (rdev->desc->ops->disable) {
972 /* ah well, who wants to live forever... */
973 ret = rdev->desc->ops->disable(rdev);
974 if (ret < 0) {
975 printk(KERN_ERR "%s: failed to force disable %s\n",
976 __func__, rdev->desc->name);
977 return ret;
978 }
979 /* notify other consumers that power has been forced off */
980 _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE,
981 NULL);
982 }
983
984 /* decrease our supplies ref count and disable if required */
985 if (rdev->supply)
986 _regulator_disable(rdev->supply);
987
988 rdev->use_count = 0;
989 return ret;
990}
991
992/**
993 * regulator_force_disable - force disable regulator output
994 * @regulator: regulator source
995 *
996 * Forcibly disable the regulator output voltage or current.
997 * NOTE: this *will* disable the regulator output even if other consumer
998 * devices have it enabled. This should be used for situations when device
999 * damage will likely occur if the regulator is not disabled (e.g. over temp).
1000 */
1001int regulator_force_disable(struct regulator *regulator)
1002{
1003 int ret;
1004
1005 mutex_lock(&regulator->rdev->mutex);
1006 regulator->enabled = 0;
1007 regulator->uA_load = 0;
1008 ret = _regulator_force_disable(regulator->rdev);
1009 mutex_unlock(&regulator->rdev->mutex);
1010 return ret;
1011}
1012EXPORT_SYMBOL_GPL(regulator_force_disable);
1013
1014static int _regulator_is_enabled(struct regulator_dev *rdev)
1015{
1016 int ret;
1017
1018 mutex_lock(&rdev->mutex);
1019
1020 /* sanity check */
1021 if (!rdev->desc->ops->is_enabled) {
1022 ret = -EINVAL;
1023 goto out;
1024 }
1025
1026 ret = rdev->desc->ops->is_enabled(rdev);
1027out:
1028 mutex_unlock(&rdev->mutex);
1029 return ret;
1030}
1031
1032/**
1033 * regulator_is_enabled - is the regulator output enabled
1034 * @regulator: regulator source
1035 *
1036 * Returns zero for disabled otherwise return number of enable requests.
1037 */
1038int regulator_is_enabled(struct regulator *regulator)
1039{
1040 return _regulator_is_enabled(regulator->rdev);
1041}
1042EXPORT_SYMBOL_GPL(regulator_is_enabled);
1043
1044/**
1045 * regulator_set_voltage - set regulator output voltage
1046 * @regulator: regulator source
1047 * @min_uV: Minimum required voltage in uV
1048 * @max_uV: Maximum acceptable voltage in uV
1049 *
1050 * Sets a voltage regulator to the desired output voltage. This can be set
1051 * during any regulator state. IOW, regulator can be disabled or enabled.
1052 *
1053 * If the regulator is enabled then the voltage will change to the new value
1054 * immediately otherwise if the regulator is disabled the regulator will
1055 * output at the new voltage when enabled.
1056 *
1057 * NOTE: If the regulator is shared between several devices then the lowest
1058 * request voltage that meets the system constraints will be used.
1059 * NOTE: Regulator system constraints must be set for this regulator before
1060 * calling this function otherwise this call will fail.
1061 */
1062int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV)
1063{
1064 struct regulator_dev *rdev = regulator->rdev;
1065 int ret;
1066
1067 mutex_lock(&rdev->mutex);
1068
1069 /* sanity check */
1070 if (!rdev->desc->ops->set_voltage) {
1071 ret = -EINVAL;
1072 goto out;
1073 }
1074
1075 /* constraints check */
1076 ret = regulator_check_voltage(rdev, &min_uV, &max_uV);
1077 if (ret < 0)
1078 goto out;
1079 regulator->min_uV = min_uV;
1080 regulator->max_uV = max_uV;
1081 ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV);
1082
1083out:
1084 mutex_unlock(&rdev->mutex);
1085 return ret;
1086}
1087EXPORT_SYMBOL_GPL(regulator_set_voltage);
1088
1089static int _regulator_get_voltage(struct regulator_dev *rdev)
1090{
1091 /* sanity check */
1092 if (rdev->desc->ops->get_voltage)
1093 return rdev->desc->ops->get_voltage(rdev);
1094 else
1095 return -EINVAL;
1096}
1097
1098/**
1099 * regulator_get_voltage - get regulator output voltage
1100 * @regulator: regulator source
1101 *
1102 * This returns the current regulator voltage in uV.
1103 *
1104 * NOTE: If the regulator is disabled it will return the voltage value. This
1105 * function should not be used to determine regulator state.
1106 */
1107int regulator_get_voltage(struct regulator *regulator)
1108{
1109 int ret;
1110
1111 mutex_lock(&regulator->rdev->mutex);
1112
1113 ret = _regulator_get_voltage(regulator->rdev);
1114
1115 mutex_unlock(&regulator->rdev->mutex);
1116
1117 return ret;
1118}
1119EXPORT_SYMBOL_GPL(regulator_get_voltage);
1120
1121/**
1122 * regulator_set_current_limit - set regulator output current limit
1123 * @regulator: regulator source
1124 * @min_uA: Minimuum supported current in uA
1125 * @max_uA: Maximum supported current in uA
1126 *
1127 * Sets current sink to the desired output current. This can be set during
1128 * any regulator state. IOW, regulator can be disabled or enabled.
1129 *
1130 * If the regulator is enabled then the current will change to the new value
1131 * immediately otherwise if the regulator is disabled the regulator will
1132 * output at the new current when enabled.
1133 *
1134 * NOTE: Regulator system constraints must be set for this regulator before
1135 * calling this function otherwise this call will fail.
1136 */
1137int regulator_set_current_limit(struct regulator *regulator,
1138 int min_uA, int max_uA)
1139{
1140 struct regulator_dev *rdev = regulator->rdev;
1141 int ret;
1142
1143 mutex_lock(&rdev->mutex);
1144
1145 /* sanity check */
1146 if (!rdev->desc->ops->set_current_limit) {
1147 ret = -EINVAL;
1148 goto out;
1149 }
1150
1151 /* constraints check */
1152 ret = regulator_check_current_limit(rdev, &min_uA, &max_uA);
1153 if (ret < 0)
1154 goto out;
1155
1156 ret = rdev->desc->ops->set_current_limit(rdev, min_uA, max_uA);
1157out:
1158 mutex_unlock(&rdev->mutex);
1159 return ret;
1160}
1161EXPORT_SYMBOL_GPL(regulator_set_current_limit);
1162
1163static int _regulator_get_current_limit(struct regulator_dev *rdev)
1164{
1165 int ret;
1166
1167 mutex_lock(&rdev->mutex);
1168
1169 /* sanity check */
1170 if (!rdev->desc->ops->get_current_limit) {
1171 ret = -EINVAL;
1172 goto out;
1173 }
1174
1175 ret = rdev->desc->ops->get_current_limit(rdev);
1176out:
1177 mutex_unlock(&rdev->mutex);
1178 return ret;
1179}
1180
1181/**
1182 * regulator_get_current_limit - get regulator output current
1183 * @regulator: regulator source
1184 *
1185 * This returns the current supplied by the specified current sink in uA.
1186 *
1187 * NOTE: If the regulator is disabled it will return the current value. This
1188 * function should not be used to determine regulator state.
1189 */
1190int regulator_get_current_limit(struct regulator *regulator)
1191{
1192 return _regulator_get_current_limit(regulator->rdev);
1193}
1194EXPORT_SYMBOL_GPL(regulator_get_current_limit);
1195
1196/**
1197 * regulator_set_mode - set regulator operating mode
1198 * @regulator: regulator source
1199 * @mode: operating mode - one of the REGULATOR_MODE constants
1200 *
1201 * Set regulator operating mode to increase regulator efficiency or improve
1202 * regulation performance.
1203 *
1204 * NOTE: Regulator system constraints must be set for this regulator before
1205 * calling this function otherwise this call will fail.
1206 */
1207int regulator_set_mode(struct regulator *regulator, unsigned int mode)
1208{
1209 struct regulator_dev *rdev = regulator->rdev;
1210 int ret;
1211
1212 mutex_lock(&rdev->mutex);
1213
1214 /* sanity check */
1215 if (!rdev->desc->ops->set_mode) {
1216 ret = -EINVAL;
1217 goto out;
1218 }
1219
1220 /* constraints check */
1221 ret = regulator_check_mode(rdev, mode);
1222 if (ret < 0)
1223 goto out;
1224
1225 ret = rdev->desc->ops->set_mode(rdev, mode);
1226out:
1227 mutex_unlock(&rdev->mutex);
1228 return ret;
1229}
1230EXPORT_SYMBOL_GPL(regulator_set_mode);
1231
1232static unsigned int _regulator_get_mode(struct regulator_dev *rdev)
1233{
1234 int ret;
1235
1236 mutex_lock(&rdev->mutex);
1237
1238 /* sanity check */
1239 if (!rdev->desc->ops->get_mode) {
1240 ret = -EINVAL;
1241 goto out;
1242 }
1243
1244 ret = rdev->desc->ops->get_mode(rdev);
1245out:
1246 mutex_unlock(&rdev->mutex);
1247 return ret;
1248}
1249
1250/**
1251 * regulator_get_mode - get regulator operating mode
1252 * @regulator: regulator source
1253 *
1254 * Get the current regulator operating mode.
1255 */
1256unsigned int regulator_get_mode(struct regulator *regulator)
1257{
1258 return _regulator_get_mode(regulator->rdev);
1259}
1260EXPORT_SYMBOL_GPL(regulator_get_mode);
1261
1262/**
1263 * regulator_set_optimum_mode - set regulator optimum operating mode
1264 * @regulator: regulator source
1265 * @uA_load: load current
1266 *
1267 * Notifies the regulator core of a new device load. This is then used by
1268 * DRMS (if enabled by constraints) to set the most efficient regulator
1269 * operating mode for the new regulator loading.
1270 *
1271 * Consumer devices notify their supply regulator of the maximum power
1272 * they will require (can be taken from device datasheet in the power
1273 * consumption tables) when they change operational status and hence power
1274 * state. Examples of operational state changes that can affect power
1275 * consumption are :-
1276 *
1277 * o Device is opened / closed.
1278 * o Device I/O is about to begin or has just finished.
1279 * o Device is idling in between work.
1280 *
1281 * This information is also exported via sysfs to userspace.
1282 *
1283 * DRMS will sum the total requested load on the regulator and change
1284 * to the most efficient operating mode if platform constraints allow.
1285 *
1286 * Returns the new regulator mode or error.
1287 */
1288int regulator_set_optimum_mode(struct regulator *regulator, int uA_load)
1289{
1290 struct regulator_dev *rdev = regulator->rdev;
1291 struct regulator *consumer;
1292 int ret, output_uV, input_uV, total_uA_load = 0;
1293 unsigned int mode;
1294
1295 mutex_lock(&rdev->mutex);
1296
1297 regulator->uA_load = uA_load;
1298 ret = regulator_check_drms(rdev);
1299 if (ret < 0)
1300 goto out;
1301 ret = -EINVAL;
1302
1303 /* sanity check */
1304 if (!rdev->desc->ops->get_optimum_mode)
1305 goto out;
1306
1307 /* get output voltage */
1308 output_uV = rdev->desc->ops->get_voltage(rdev);
1309 if (output_uV <= 0) {
1310 printk(KERN_ERR "%s: invalid output voltage found for %s\n",
1311 __func__, rdev->desc->name);
1312 goto out;
1313 }
1314
1315 /* get input voltage */
1316 if (rdev->supply && rdev->supply->desc->ops->get_voltage)
1317 input_uV = rdev->supply->desc->ops->get_voltage(rdev->supply);
1318 else
1319 input_uV = rdev->constraints->input_uV;
1320 if (input_uV <= 0) {
1321 printk(KERN_ERR "%s: invalid input voltage found for %s\n",
1322 __func__, rdev->desc->name);
1323 goto out;
1324 }
1325
1326 /* calc total requested load for this regulator */
1327 list_for_each_entry(consumer, &rdev->consumer_list, list)
1328 total_uA_load += consumer->uA_load;
1329
1330 mode = rdev->desc->ops->get_optimum_mode(rdev,
1331 input_uV, output_uV,
1332 total_uA_load);
1333 if (ret <= 0) {
1334 printk(KERN_ERR "%s: failed to get optimum mode for %s @"
1335 " %d uA %d -> %d uV\n", __func__, rdev->desc->name,
1336 total_uA_load, input_uV, output_uV);
1337 goto out;
1338 }
1339
1340 ret = rdev->desc->ops->set_mode(rdev, mode);
1341 if (ret <= 0) {
1342 printk(KERN_ERR "%s: failed to set optimum mode %x for %s\n",
1343 __func__, mode, rdev->desc->name);
1344 goto out;
1345 }
1346 ret = mode;
1347out:
1348 mutex_unlock(&rdev->mutex);
1349 return ret;
1350}
1351EXPORT_SYMBOL_GPL(regulator_set_optimum_mode);
1352
1353/**
1354 * regulator_register_notifier - register regulator event notifier
1355 * @regulator: regulator source
1356 * @notifier_block: notifier block
1357 *
1358 * Register notifier block to receive regulator events.
1359 */
1360int regulator_register_notifier(struct regulator *regulator,
1361 struct notifier_block *nb)
1362{
1363 return blocking_notifier_chain_register(&regulator->rdev->notifier,
1364 nb);
1365}
1366EXPORT_SYMBOL_GPL(regulator_register_notifier);
1367
1368/**
1369 * regulator_unregister_notifier - unregister regulator event notifier
1370 * @regulator: regulator source
1371 * @notifier_block: notifier block
1372 *
1373 * Unregister regulator event notifier block.
1374 */
1375int regulator_unregister_notifier(struct regulator *regulator,
1376 struct notifier_block *nb)
1377{
1378 return blocking_notifier_chain_unregister(&regulator->rdev->notifier,
1379 nb);
1380}
1381EXPORT_SYMBOL_GPL(regulator_unregister_notifier);
1382
1383/* notify regulator consumers and downstream regulator consumers */
1384static void _notifier_call_chain(struct regulator_dev *rdev,
1385 unsigned long event, void *data)
1386{
1387 struct regulator_dev *_rdev;
1388
1389 /* call rdev chain first */
1390 mutex_lock(&rdev->mutex);
1391 blocking_notifier_call_chain(&rdev->notifier, event, NULL);
1392 mutex_unlock(&rdev->mutex);
1393
1394 /* now notify regulator we supply */
1395 list_for_each_entry(_rdev, &rdev->supply_list, slist)
1396 _notifier_call_chain(_rdev, event, data);
1397}
1398
1399/**
1400 * regulator_bulk_get - get multiple regulator consumers
1401 *
1402 * @dev: Device to supply
1403 * @num_consumers: Number of consumers to register
1404 * @consumers: Configuration of consumers; clients are stored here.
1405 *
1406 * @return 0 on success, an errno on failure.
1407 *
1408 * This helper function allows drivers to get several regulator
1409 * consumers in one operation. If any of the regulators cannot be
1410 * acquired then any regulators that were allocated will be freed
1411 * before returning to the caller.
1412 */
1413int regulator_bulk_get(struct device *dev, int num_consumers,
1414 struct regulator_bulk_data *consumers)
1415{
1416 int i;
1417 int ret;
1418
1419 for (i = 0; i < num_consumers; i++)
1420 consumers[i].consumer = NULL;
1421
1422 for (i = 0; i < num_consumers; i++) {
1423 consumers[i].consumer = regulator_get(dev,
1424 consumers[i].supply);
1425 if (IS_ERR(consumers[i].consumer)) {
1426 dev_err(dev, "Failed to get supply '%s'\n",
1427 consumers[i].supply);
1428 ret = PTR_ERR(consumers[i].consumer);
1429 consumers[i].consumer = NULL;
1430 goto err;
1431 }
1432 }
1433
1434 return 0;
1435
1436err:
1437 for (i = 0; i < num_consumers && consumers[i].consumer; i++)
1438 regulator_put(consumers[i].consumer);
1439
1440 return ret;
1441}
1442EXPORT_SYMBOL_GPL(regulator_bulk_get);
1443
1444/**
1445 * regulator_bulk_enable - enable multiple regulator consumers
1446 *
1447 * @num_consumers: Number of consumers
1448 * @consumers: Consumer data; clients are stored here.
1449 * @return 0 on success, an errno on failure
1450 *
1451 * This convenience API allows consumers to enable multiple regulator
1452 * clients in a single API call. If any consumers cannot be enabled
1453 * then any others that were enabled will be disabled again prior to
1454 * return.
1455 */
1456int regulator_bulk_enable(int num_consumers,
1457 struct regulator_bulk_data *consumers)
1458{
1459 int i;
1460 int ret;
1461
1462 for (i = 0; i < num_consumers; i++) {
1463 ret = regulator_enable(consumers[i].consumer);
1464 if (ret != 0)
1465 goto err;
1466 }
1467
1468 return 0;
1469
1470err:
1471 printk(KERN_ERR "Failed to enable %s\n", consumers[i].supply);
1472 for (i = 0; i < num_consumers; i++)
1473 regulator_disable(consumers[i].consumer);
1474
1475 return ret;
1476}
1477EXPORT_SYMBOL_GPL(regulator_bulk_enable);
1478
1479/**
1480 * regulator_bulk_disable - disable multiple regulator consumers
1481 *
1482 * @num_consumers: Number of consumers
1483 * @consumers: Consumer data; clients are stored here.
1484 * @return 0 on success, an errno on failure
1485 *
1486 * This convenience API allows consumers to disable multiple regulator
1487 * clients in a single API call. If any consumers cannot be enabled
1488 * then any others that were disabled will be disabled again prior to
1489 * return.
1490 */
1491int regulator_bulk_disable(int num_consumers,
1492 struct regulator_bulk_data *consumers)
1493{
1494 int i;
1495 int ret;
1496
1497 for (i = 0; i < num_consumers; i++) {
1498 ret = regulator_disable(consumers[i].consumer);
1499 if (ret != 0)
1500 goto err;
1501 }
1502
1503 return 0;
1504
1505err:
1506 printk(KERN_ERR "Failed to disable %s\n", consumers[i].supply);
1507 for (i = 0; i < num_consumers; i++)
1508 regulator_enable(consumers[i].consumer);
1509
1510 return ret;
1511}
1512EXPORT_SYMBOL_GPL(regulator_bulk_disable);
1513
1514/**
1515 * regulator_bulk_free - free multiple regulator consumers
1516 *
1517 * @num_consumers: Number of consumers
1518 * @consumers: Consumer data; clients are stored here.
1519 *
1520 * This convenience API allows consumers to free multiple regulator
1521 * clients in a single API call.
1522 */
1523void regulator_bulk_free(int num_consumers,
1524 struct regulator_bulk_data *consumers)
1525{
1526 int i;
1527
1528 for (i = 0; i < num_consumers; i++) {
1529 regulator_put(consumers[i].consumer);
1530 consumers[i].consumer = NULL;
1531 }
1532}
1533EXPORT_SYMBOL_GPL(regulator_bulk_free);
1534
1535/**
1536 * regulator_notifier_call_chain - call regulator event notifier
1537 * @regulator: regulator source
1538 * @event: notifier block
1539 * @data:
1540 *
1541 * Called by regulator drivers to notify clients a regulator event has
1542 * occurred. We also notify regulator clients downstream.
1543 */
1544int regulator_notifier_call_chain(struct regulator_dev *rdev,
1545 unsigned long event, void *data)
1546{
1547 _notifier_call_chain(rdev, event, data);
1548 return NOTIFY_DONE;
1549
1550}
1551EXPORT_SYMBOL_GPL(regulator_notifier_call_chain);
1552
1553/**
1554 * regulator_register - register regulator
1555 * @regulator: regulator source
1556 * @reg_data: private regulator data
1557 *
1558 * Called by regulator drivers to register a regulator.
1559 * Returns 0 on success.
1560 */
1561struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
1562 void *reg_data)
1563{
1564 static atomic_t regulator_no = ATOMIC_INIT(0);
1565 struct regulator_dev *rdev;
1566 int ret;
1567
1568 if (regulator_desc == NULL)
1569 return ERR_PTR(-EINVAL);
1570
1571 if (regulator_desc->name == NULL || regulator_desc->ops == NULL)
1572 return ERR_PTR(-EINVAL);
1573
1574 if (!regulator_desc->type == REGULATOR_VOLTAGE &&
1575 !regulator_desc->type == REGULATOR_CURRENT)
1576 return ERR_PTR(-EINVAL);
1577
1578 rdev = kzalloc(sizeof(struct regulator_dev), GFP_KERNEL);
1579 if (rdev == NULL)
1580 return ERR_PTR(-ENOMEM);
1581
1582 mutex_lock(&regulator_list_mutex);
1583
1584 mutex_init(&rdev->mutex);
1585 rdev->reg_data = reg_data;
1586 rdev->owner = regulator_desc->owner;
1587 rdev->desc = regulator_desc;
1588 INIT_LIST_HEAD(&rdev->consumer_list);
1589 INIT_LIST_HEAD(&rdev->supply_list);
1590 INIT_LIST_HEAD(&rdev->list);
1591 INIT_LIST_HEAD(&rdev->slist);
1592 BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier);
1593
1594 rdev->dev.class = &regulator_class;
1595 device_initialize(&rdev->dev);
1596 snprintf(rdev->dev.bus_id, sizeof(rdev->dev.bus_id),
1597 "regulator_%ld_%s",
1598 (unsigned long)atomic_inc_return(&regulator_no) - 1,
1599 regulator_desc->name);
1600
1601 ret = device_add(&rdev->dev);
1602 if (ret == 0)
1603 list_add(&rdev->list, &regulator_list);
1604 else {
1605 kfree(rdev);
1606 rdev = ERR_PTR(ret);
1607 }
1608 mutex_unlock(&regulator_list_mutex);
1609 return rdev;
1610}
1611EXPORT_SYMBOL_GPL(regulator_register);
1612
1613/**
1614 * regulator_unregister - unregister regulator
1615 * @regulator: regulator source
1616 *
1617 * Called by regulator drivers to unregister a regulator.
1618 */
1619void regulator_unregister(struct regulator_dev *rdev)
1620{
1621 if (rdev == NULL)
1622 return;
1623
1624 mutex_lock(&regulator_list_mutex);
1625 list_del(&rdev->list);
1626 if (rdev->supply)
1627 sysfs_remove_link(&rdev->dev.kobj, "supply");
1628 device_unregister(&rdev->dev);
1629 mutex_unlock(&regulator_list_mutex);
1630}
1631EXPORT_SYMBOL_GPL(regulator_unregister);
1632
1633/**
1634 * regulator_set_supply - set regulator supply regulator
1635 * @regulator: regulator name
1636 * @supply: supply regulator name
1637 *
1638 * Called by platform initialisation code to set the supply regulator for this
1639 * regulator. This ensures that a regulators supply will also be enabled by the
1640 * core if it's child is enabled.
1641 */
1642int regulator_set_supply(const char *regulator, const char *supply)
1643{
1644 struct regulator_dev *rdev, *supply_rdev;
1645 int err;
1646
1647 if (regulator == NULL || supply == NULL)
1648 return -EINVAL;
1649
1650 mutex_lock(&regulator_list_mutex);
1651
1652 list_for_each_entry(rdev, &regulator_list, list) {
1653 if (!strcmp(rdev->desc->name, regulator))
1654 goto found_regulator;
1655 }
1656 mutex_unlock(&regulator_list_mutex);
1657 return -ENODEV;
1658
1659found_regulator:
1660 list_for_each_entry(supply_rdev, &regulator_list, list) {
1661 if (!strcmp(supply_rdev->desc->name, supply))
1662 goto found_supply;
1663 }
1664 mutex_unlock(&regulator_list_mutex);
1665 return -ENODEV;
1666
1667found_supply:
1668 err = sysfs_create_link(&rdev->dev.kobj, &supply_rdev->dev.kobj,
1669 "supply");
1670 if (err) {
1671 printk(KERN_ERR
1672 "%s: could not add device link %s err %d\n",
1673 __func__, supply_rdev->dev.kobj.name, err);
1674 goto out;
1675 }
1676 rdev->supply = supply_rdev;
1677 list_add(&rdev->slist, &supply_rdev->supply_list);
1678out:
1679 mutex_unlock(&regulator_list_mutex);
1680 return err;
1681}
1682EXPORT_SYMBOL_GPL(regulator_set_supply);
1683
1684/**
1685 * regulator_get_supply - get regulator supply regulator
1686 * @regulator: regulator name
1687 *
1688 * Returns the supply supply regulator name or NULL if no supply regulator
1689 * exists (i.e the regulator is supplied directly from USB, Line, Battery, etc)
1690 */
1691const char *regulator_get_supply(const char *regulator)
1692{
1693 struct regulator_dev *rdev;
1694
1695 if (regulator == NULL)
1696 return NULL;
1697
1698 mutex_lock(&regulator_list_mutex);
1699 list_for_each_entry(rdev, &regulator_list, list) {
1700 if (!strcmp(rdev->desc->name, regulator))
1701 goto found;
1702 }
1703 mutex_unlock(&regulator_list_mutex);
1704 return NULL;
1705
1706found:
1707 mutex_unlock(&regulator_list_mutex);
1708 if (rdev->supply)
1709 return rdev->supply->desc->name;
1710 else
1711 return NULL;
1712}
1713EXPORT_SYMBOL_GPL(regulator_get_supply);
1714
1715/**
1716 * regulator_set_machine_constraints - sets regulator constraints
1717 * @regulator: regulator source
1718 *
1719 * Allows platform initialisation code to define and constrain
1720 * regulator circuits e.g. valid voltage/current ranges, etc. NOTE:
1721 * Constraints *must* be set by platform code in order for some
1722 * regulator operations to proceed i.e. set_voltage, set_current_limit,
1723 * set_mode.
1724 */
1725int regulator_set_machine_constraints(const char *regulator_name,
1726 struct regulation_constraints *constraints)
1727{
1728 struct regulator_dev *rdev;
1729 int ret = 0;
1730
1731 if (regulator_name == NULL)
1732 return -EINVAL;
1733
1734 mutex_lock(&regulator_list_mutex);
1735
1736 list_for_each_entry(rdev, &regulator_list, list) {
1737 if (!strcmp(regulator_name, rdev->desc->name))
1738 goto found;
1739 }
1740 ret = -ENODEV;
1741 goto out;
1742
1743found:
1744 mutex_lock(&rdev->mutex);
1745 rdev->constraints = constraints;
1746
1747 /* do we need to apply the constraint voltage */
1748 if (rdev->constraints->apply_uV &&
1749 rdev->constraints->min_uV == rdev->constraints->max_uV &&
1750 rdev->desc->ops->set_voltage) {
1751 ret = rdev->desc->ops->set_voltage(rdev,
1752 rdev->constraints->min_uV, rdev->constraints->max_uV);
1753 if (ret < 0) {
1754 printk(KERN_ERR "%s: failed to apply %duV"
1755 " constraint\n", __func__,
1756 rdev->constraints->min_uV);
1757 rdev->constraints = NULL;
1758 goto out;
1759 }
1760 }
1761
1762 /* are we enabled at boot time by firmware / bootloader */
1763 if (rdev->constraints->boot_on)
1764 rdev->use_count = 1;
1765
1766 /* do we need to setup our suspend state */
1767 if (constraints->initial_state)
1768 ret = suspend_prepare(rdev, constraints->initial_state);
1769
1770 print_constraints(rdev);
1771 mutex_unlock(&rdev->mutex);
1772
1773out:
1774 mutex_unlock(&regulator_list_mutex);
1775 return ret;
1776}
1777EXPORT_SYMBOL_GPL(regulator_set_machine_constraints);
1778
1779
1780/**
1781 * regulator_set_device_supply: Bind a regulator to a symbolic supply
1782 * @regulator: regulator source
1783 * @dev: device the supply applies to
1784 * @supply: symbolic name for supply
1785 *
1786 * Allows platform initialisation code to map physical regulator
1787 * sources to symbolic names for supplies for use by devices. Devices
1788 * should use these symbolic names to request regulators, avoiding the
1789 * need to provide board-specific regulator names as platform data.
1790 */
1791int regulator_set_device_supply(const char *regulator, struct device *dev,
1792 const char *supply)
1793{
1794 struct regulator_map *node;
1795
1796 if (regulator == NULL || supply == NULL)
1797 return -EINVAL;
1798
1799 node = kmalloc(sizeof(struct regulator_map), GFP_KERNEL);
1800 if (node == NULL)
1801 return -ENOMEM;
1802
1803 node->regulator = regulator;
1804 node->dev = dev;
1805 node->supply = supply;
1806
1807 mutex_lock(&regulator_list_mutex);
1808 list_add(&node->list, &regulator_map_list);
1809 mutex_unlock(&regulator_list_mutex);
1810 return 0;
1811}
1812EXPORT_SYMBOL_GPL(regulator_set_device_supply);
1813
1814/**
1815 * regulator_suspend_prepare: prepare regulators for system wide suspend
1816 * @state: system suspend state
1817 *
1818 * Configure each regulator with it's suspend operating parameters for state.
1819 * This will usually be called by machine suspend code prior to supending.
1820 */
1821int regulator_suspend_prepare(suspend_state_t state)
1822{
1823 struct regulator_dev *rdev;
1824 int ret = 0;
1825
1826 /* ON is handled by regulator active state */
1827 if (state == PM_SUSPEND_ON)
1828 return -EINVAL;
1829
1830 mutex_lock(&regulator_list_mutex);
1831 list_for_each_entry(rdev, &regulator_list, list) {
1832
1833 mutex_lock(&rdev->mutex);
1834 ret = suspend_prepare(rdev, state);
1835 mutex_unlock(&rdev->mutex);
1836
1837 if (ret < 0) {
1838 printk(KERN_ERR "%s: failed to prepare %s\n",
1839 __func__, rdev->desc->name);
1840 goto out;
1841 }
1842 }
1843out:
1844 mutex_unlock(&regulator_list_mutex);
1845 return ret;
1846}
1847EXPORT_SYMBOL_GPL(regulator_suspend_prepare);
1848
1849/**
1850 * rdev_get_drvdata - get rdev regulator driver data
1851 * @regulator: regulator
1852 *
1853 * Get rdev regulator driver private data. This call can be used in the
1854 * regulator driver context.
1855 */
1856void *rdev_get_drvdata(struct regulator_dev *rdev)
1857{
1858 return rdev->reg_data;
1859}
1860EXPORT_SYMBOL_GPL(rdev_get_drvdata);
1861
1862/**
1863 * regulator_get_drvdata - get regulator driver data
1864 * @regulator: regulator
1865 *
1866 * Get regulator driver private data. This call can be used in the consumer
1867 * driver context when non API regulator specific functions need to be called.
1868 */
1869void *regulator_get_drvdata(struct regulator *regulator)
1870{
1871 return regulator->rdev->reg_data;
1872}
1873EXPORT_SYMBOL_GPL(regulator_get_drvdata);
1874
1875/**
1876 * regulator_set_drvdata - set regulator driver data
1877 * @regulator: regulator
1878 * @data: data
1879 */
1880void regulator_set_drvdata(struct regulator *regulator, void *data)
1881{
1882 regulator->rdev->reg_data = data;
1883}
1884EXPORT_SYMBOL_GPL(regulator_set_drvdata);
1885
1886/**
1887 * regulator_get_id - get regulator ID
1888 * @regulator: regulator
1889 */
1890int rdev_get_id(struct regulator_dev *rdev)
1891{
1892 return rdev->desc->id;
1893}
1894EXPORT_SYMBOL_GPL(rdev_get_id);
1895
1896static int __init regulator_init(void)
1897{
1898 printk(KERN_INFO "regulator: core version %s\n", REGULATOR_VERSION);
1899 return class_register(&regulator_class);
1900}
1901
1902/* init early to allow our consumers to complete system booting */
1903core_initcall(regulator_init);
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
new file mode 100644
index 000000000000..d31db3e14913
--- /dev/null
+++ b/drivers/regulator/fixed.c
@@ -0,0 +1,129 @@
1/*
2 * fixed.c
3 *
4 * Copyright 2008 Wolfson Microelectronics PLC.
5 *
6 * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
12 *
13 * This is useful for systems with mixed controllable and
14 * non-controllable regulators, as well as for allowing testing on
15 * systems with no controllable regulators.
16 */
17
18#include <linux/err.h>
19#include <linux/mutex.h>
20#include <linux/platform_device.h>
21#include <linux/regulator/driver.h>
22#include <linux/regulator/fixed.h>
23
24struct fixed_voltage_data {
25 struct regulator_desc desc;
26 struct regulator_dev *dev;
27 int microvolts;
28};
29
30static int fixed_voltage_is_enabled(struct regulator_dev *dev)
31{
32 return 1;
33}
34
35static int fixed_voltage_enable(struct regulator_dev *dev)
36{
37 return 0;
38}
39
40static int fixed_voltage_get_voltage(struct regulator_dev *dev)
41{
42 struct fixed_voltage_data *data = rdev_get_drvdata(dev);
43
44 return data->microvolts;
45}
46
47static struct regulator_ops fixed_voltage_ops = {
48 .is_enabled = fixed_voltage_is_enabled,
49 .enable = fixed_voltage_enable,
50 .get_voltage = fixed_voltage_get_voltage,
51};
52
53static int regulator_fixed_voltage_probe(struct platform_device *pdev)
54{
55 struct fixed_voltage_config *config = pdev->dev.platform_data;
56 struct fixed_voltage_data *drvdata;
57 int ret;
58
59 drvdata = kzalloc(sizeof(struct fixed_voltage_data), GFP_KERNEL);
60 if (drvdata == NULL) {
61 ret = -ENOMEM;
62 goto err;
63 }
64
65 drvdata->desc.name = kstrdup(config->supply_name, GFP_KERNEL);
66 if (drvdata->desc.name == NULL) {
67 ret = -ENOMEM;
68 goto err;
69 }
70 drvdata->desc.type = REGULATOR_VOLTAGE;
71 drvdata->desc.owner = THIS_MODULE;
72 drvdata->desc.ops = &fixed_voltage_ops,
73
74 drvdata->microvolts = config->microvolts;
75
76 drvdata->dev = regulator_register(&drvdata->desc, drvdata);
77 if (IS_ERR(drvdata->dev)) {
78 ret = PTR_ERR(drvdata->dev);
79 goto err_name;
80 }
81
82 platform_set_drvdata(pdev, drvdata);
83
84 dev_dbg(&pdev->dev, "%s supplying %duV\n", drvdata->desc.name,
85 drvdata->microvolts);
86
87 return 0;
88
89err_name:
90 kfree(drvdata->desc.name);
91err:
92 kfree(drvdata);
93 return ret;
94}
95
96static int regulator_fixed_voltage_remove(struct platform_device *pdev)
97{
98 struct fixed_voltage_data *drvdata = platform_get_drvdata(pdev);
99
100 regulator_unregister(drvdata->dev);
101 kfree(drvdata->desc.name);
102 kfree(drvdata);
103
104 return 0;
105}
106
107static struct platform_driver regulator_fixed_voltage_driver = {
108 .probe = regulator_fixed_voltage_probe,
109 .remove = regulator_fixed_voltage_remove,
110 .driver = {
111 .name = "reg-fixed-voltage",
112 },
113};
114
115static int __init regulator_fixed_voltage_init(void)
116{
117 return platform_driver_register(&regulator_fixed_voltage_driver);
118}
119module_init(regulator_fixed_voltage_init);
120
121static void __exit regulator_fixed_voltage_exit(void)
122{
123 platform_driver_unregister(&regulator_fixed_voltage_driver);
124}
125module_exit(regulator_fixed_voltage_exit);
126
127MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
128MODULE_DESCRIPTION("Fixed voltage regulator");
129MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
new file mode 100644
index 000000000000..5ddb464b1c3f
--- /dev/null
+++ b/drivers/regulator/virtual.c
@@ -0,0 +1,345 @@
1/*
2 * reg-virtual-consumer.c
3 *
4 * Copyright 2008 Wolfson Microelectronics PLC.
5 *
6 * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
12 */
13
14#include <linux/err.h>
15#include <linux/mutex.h>
16#include <linux/platform_device.h>
17#include <linux/regulator/consumer.h>
18
19struct virtual_consumer_data {
20 struct mutex lock;
21 struct regulator *regulator;
22 int enabled;
23 int min_uV;
24 int max_uV;
25 int min_uA;
26 int max_uA;
27 unsigned int mode;
28};
29
30static void update_voltage_constraints(struct virtual_consumer_data *data)
31{
32 int ret;
33
34 if (data->min_uV && data->max_uV
35 && data->min_uV <= data->max_uV) {
36 ret = regulator_set_voltage(data->regulator,
37 data->min_uV, data->max_uV);
38 if (ret != 0) {
39 printk(KERN_ERR "regulator_set_voltage() failed: %d\n",
40 ret);
41 return;
42 }
43 }
44
45 if (data->min_uV && data->max_uV && !data->enabled) {
46 ret = regulator_enable(data->regulator);
47 if (ret == 0)
48 data->enabled = 1;
49 else
50 printk(KERN_ERR "regulator_enable() failed: %d\n",
51 ret);
52 }
53
54 if (!(data->min_uV && data->max_uV) && data->enabled) {
55 ret = regulator_disable(data->regulator);
56 if (ret == 0)
57 data->enabled = 0;
58 else
59 printk(KERN_ERR "regulator_disable() failed: %d\n",
60 ret);
61 }
62}
63
64static void update_current_limit_constraints(struct virtual_consumer_data
65 *data)
66{
67 int ret;
68
69 if (data->max_uA
70 && data->min_uA <= data->max_uA) {
71 ret = regulator_set_current_limit(data->regulator,
72 data->min_uA, data->max_uA);
73 if (ret != 0) {
74 pr_err("regulator_set_current_limit() failed: %d\n",
75 ret);
76 return;
77 }
78 }
79
80 if (data->max_uA && !data->enabled) {
81 ret = regulator_enable(data->regulator);
82 if (ret == 0)
83 data->enabled = 1;
84 else
85 printk(KERN_ERR "regulator_enable() failed: %d\n",
86 ret);
87 }
88
89 if (!(data->min_uA && data->max_uA) && data->enabled) {
90 ret = regulator_disable(data->regulator);
91 if (ret == 0)
92 data->enabled = 0;
93 else
94 printk(KERN_ERR "regulator_disable() failed: %d\n",
95 ret);
96 }
97}
98
99static ssize_t show_min_uV(struct device *dev,
100 struct device_attribute *attr, char *buf)
101{
102 struct virtual_consumer_data *data = dev_get_drvdata(dev);
103 return sprintf(buf, "%d\n", data->min_uV);
104}
105
106static ssize_t set_min_uV(struct device *dev, struct device_attribute *attr,
107 const char *buf, size_t count)
108{
109 struct virtual_consumer_data *data = dev_get_drvdata(dev);
110 long val;
111
112 if (strict_strtol(buf, 10, &val) != 0)
113 return count;
114
115 mutex_lock(&data->lock);
116
117 data->min_uV = val;
118 update_voltage_constraints(data);
119
120 mutex_unlock(&data->lock);
121
122 return count;
123}
124
125static ssize_t show_max_uV(struct device *dev,
126 struct device_attribute *attr, char *buf)
127{
128 struct virtual_consumer_data *data = dev_get_drvdata(dev);
129 return sprintf(buf, "%d\n", data->max_uV);
130}
131
132static ssize_t set_max_uV(struct device *dev, struct device_attribute *attr,
133 const char *buf, size_t count)
134{
135 struct virtual_consumer_data *data = dev_get_drvdata(dev);
136 long val;
137
138 if (strict_strtol(buf, 10, &val) != 0)
139 return count;
140
141 mutex_lock(&data->lock);
142
143 data->max_uV = val;
144 update_voltage_constraints(data);
145
146 mutex_unlock(&data->lock);
147
148 return count;
149}
150
151static ssize_t show_min_uA(struct device *dev,
152 struct device_attribute *attr, char *buf)
153{
154 struct virtual_consumer_data *data = dev_get_drvdata(dev);
155 return sprintf(buf, "%d\n", data->min_uA);
156}
157
158static ssize_t set_min_uA(struct device *dev, struct device_attribute *attr,
159 const char *buf, size_t count)
160{
161 struct virtual_consumer_data *data = dev_get_drvdata(dev);
162 long val;
163
164 if (strict_strtol(buf, 10, &val) != 0)
165 return count;
166
167 mutex_lock(&data->lock);
168
169 data->min_uA = val;
170 update_current_limit_constraints(data);
171
172 mutex_unlock(&data->lock);
173
174 return count;
175}
176
177static ssize_t show_max_uA(struct device *dev,
178 struct device_attribute *attr, char *buf)
179{
180 struct virtual_consumer_data *data = dev_get_drvdata(dev);
181 return sprintf(buf, "%d\n", data->max_uA);
182}
183
184static ssize_t set_max_uA(struct device *dev, struct device_attribute *attr,
185 const char *buf, size_t count)
186{
187 struct virtual_consumer_data *data = dev_get_drvdata(dev);
188 long val;
189
190 if (strict_strtol(buf, 10, &val) != 0)
191 return count;
192
193 mutex_lock(&data->lock);
194
195 data->max_uA = val;
196 update_current_limit_constraints(data);
197
198 mutex_unlock(&data->lock);
199
200 return count;
201}
202
203static ssize_t show_mode(struct device *dev,
204 struct device_attribute *attr, char *buf)
205{
206 struct virtual_consumer_data *data = dev_get_drvdata(dev);
207
208 switch (data->mode) {
209 case REGULATOR_MODE_FAST:
210 return sprintf(buf, "fast\n");
211 case REGULATOR_MODE_NORMAL:
212 return sprintf(buf, "normal\n");
213 case REGULATOR_MODE_IDLE:
214 return sprintf(buf, "idle\n");
215 case REGULATOR_MODE_STANDBY:
216 return sprintf(buf, "standby\n");
217 default:
218 return sprintf(buf, "unknown\n");
219 }
220}
221
222static ssize_t set_mode(struct device *dev, struct device_attribute *attr,
223 const char *buf, size_t count)
224{
225 struct virtual_consumer_data *data = dev_get_drvdata(dev);
226 unsigned int mode;
227 int ret;
228
229 if (strncmp(buf, "fast", strlen("fast")) == 0)
230 mode = REGULATOR_MODE_FAST;
231 else if (strncmp(buf, "normal", strlen("normal")) == 0)
232 mode = REGULATOR_MODE_NORMAL;
233 else if (strncmp(buf, "idle", strlen("idle")) == 0)
234 mode = REGULATOR_MODE_IDLE;
235 else if (strncmp(buf, "standby", strlen("standby")) == 0)
236 mode = REGULATOR_MODE_STANDBY;
237 else {
238 dev_err(dev, "Configuring invalid mode\n");
239 return count;
240 }
241
242 mutex_lock(&data->lock);
243 ret = regulator_set_mode(data->regulator, mode);
244 if (ret == 0)
245 data->mode = mode;
246 else
247 dev_err(dev, "Failed to configure mode: %d\n", ret);
248 mutex_unlock(&data->lock);
249
250 return count;
251}
252
253static DEVICE_ATTR(min_microvolts, 0666, show_min_uV, set_min_uV);
254static DEVICE_ATTR(max_microvolts, 0666, show_max_uV, set_max_uV);
255static DEVICE_ATTR(min_microamps, 0666, show_min_uA, set_min_uA);
256static DEVICE_ATTR(max_microamps, 0666, show_max_uA, set_max_uA);
257static DEVICE_ATTR(mode, 0666, show_mode, set_mode);
258
259struct device_attribute *attributes[] = {
260 &dev_attr_min_microvolts,
261 &dev_attr_max_microvolts,
262 &dev_attr_min_microamps,
263 &dev_attr_max_microamps,
264 &dev_attr_mode,
265};
266
267static int regulator_virtual_consumer_probe(struct platform_device *pdev)
268{
269 char *reg_id = pdev->dev.platform_data;
270 struct virtual_consumer_data *drvdata;
271 int ret, i;
272
273 drvdata = kzalloc(sizeof(struct virtual_consumer_data), GFP_KERNEL);
274 if (drvdata == NULL) {
275 ret = -ENOMEM;
276 goto err;
277 }
278
279 mutex_init(&drvdata->lock);
280
281 drvdata->regulator = regulator_get(&pdev->dev, reg_id);
282 if (IS_ERR(drvdata->regulator)) {
283 ret = PTR_ERR(drvdata->regulator);
284 goto err;
285 }
286
287 for (i = 0; i < ARRAY_SIZE(attributes); i++) {
288 ret = device_create_file(&pdev->dev, attributes[i]);
289 if (ret != 0)
290 goto err;
291 }
292
293 drvdata->mode = regulator_get_mode(drvdata->regulator);
294
295 platform_set_drvdata(pdev, drvdata);
296
297 return 0;
298
299err:
300 for (i = 0; i < ARRAY_SIZE(attributes); i++)
301 device_remove_file(&pdev->dev, attributes[i]);
302 kfree(drvdata);
303 return ret;
304}
305
306static int regulator_virtual_consumer_remove(struct platform_device *pdev)
307{
308 struct virtual_consumer_data *drvdata = platform_get_drvdata(pdev);
309 int i;
310
311 for (i = 0; i < ARRAY_SIZE(attributes); i++)
312 device_remove_file(&pdev->dev, attributes[i]);
313 if (drvdata->enabled)
314 regulator_disable(drvdata->regulator);
315 regulator_put(drvdata->regulator);
316
317 kfree(drvdata);
318
319 return 0;
320}
321
322static struct platform_driver regulator_virtual_consumer_driver = {
323 .probe = regulator_virtual_consumer_probe,
324 .remove = regulator_virtual_consumer_remove,
325 .driver = {
326 .name = "reg-virt-consumer",
327 },
328};
329
330
331static int __init regulator_virtual_consumer_init(void)
332{
333 return platform_driver_register(&regulator_virtual_consumer_driver);
334}
335module_init(regulator_virtual_consumer_init);
336
337static void __exit regulator_virtual_consumer_exit(void)
338{
339 platform_driver_unregister(&regulator_virtual_consumer_driver);
340}
341module_exit(regulator_virtual_consumer_exit);
342
343MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
344MODULE_DESCRIPTION("Virtual regulator consumer");
345MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index d397fa5f3a91..7af60b98d8a4 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -20,7 +20,7 @@ int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
20 20
21 err = mutex_lock_interruptible(&rtc->ops_lock); 21 err = mutex_lock_interruptible(&rtc->ops_lock);
22 if (err) 22 if (err)
23 return -EBUSY; 23 return err;
24 24
25 if (!rtc->ops) 25 if (!rtc->ops)
26 err = -ENODEV; 26 err = -ENODEV;
@@ -46,7 +46,7 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
46 46
47 err = mutex_lock_interruptible(&rtc->ops_lock); 47 err = mutex_lock_interruptible(&rtc->ops_lock);
48 if (err) 48 if (err)
49 return -EBUSY; 49 return err;
50 50
51 if (!rtc->ops) 51 if (!rtc->ops)
52 err = -ENODEV; 52 err = -ENODEV;
@@ -66,7 +66,7 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
66 66
67 err = mutex_lock_interruptible(&rtc->ops_lock); 67 err = mutex_lock_interruptible(&rtc->ops_lock);
68 if (err) 68 if (err)
69 return -EBUSY; 69 return err;
70 70
71 if (!rtc->ops) 71 if (!rtc->ops)
72 err = -ENODEV; 72 err = -ENODEV;
@@ -106,7 +106,7 @@ static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *al
106 106
107 err = mutex_lock_interruptible(&rtc->ops_lock); 107 err = mutex_lock_interruptible(&rtc->ops_lock);
108 if (err) 108 if (err)
109 return -EBUSY; 109 return err;
110 110
111 if (rtc->ops == NULL) 111 if (rtc->ops == NULL)
112 err = -ENODEV; 112 err = -ENODEV;
@@ -293,7 +293,7 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
293 293
294 err = mutex_lock_interruptible(&rtc->ops_lock); 294 err = mutex_lock_interruptible(&rtc->ops_lock);
295 if (err) 295 if (err)
296 return -EBUSY; 296 return err;
297 297
298 if (!rtc->ops) 298 if (!rtc->ops)
299 err = -ENODEV; 299 err = -ENODEV;
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 0a870b7e5c32..856cc1af40df 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -221,7 +221,7 @@ static long rtc_dev_ioctl(struct file *file,
221 221
222 err = mutex_lock_interruptible(&rtc->ops_lock); 222 err = mutex_lock_interruptible(&rtc->ops_lock);
223 if (err) 223 if (err)
224 return -EBUSY; 224 return err;
225 225
226 /* check that the calling task has appropriate permissions 226 /* check that the calling task has appropriate permissions
227 * for certain ioctls. doing this check here is useful 227 * for certain ioctls. doing this check here is useful
@@ -432,6 +432,8 @@ static int rtc_dev_release(struct inode *inode, struct file *file)
432#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL 432#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
433 clear_uie(rtc); 433 clear_uie(rtc);
434#endif 434#endif
435 rtc_irq_set_state(rtc, NULL, 0);
436
435 if (rtc->ops->release) 437 if (rtc->ops->release)
436 rtc->ops->release(rtc->dev.parent); 438 rtc->ops->release(rtc->dev.parent);
437 439
diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index 2d8df0b30538..20676cdef4a5 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c
@@ -91,7 +91,8 @@ static struct alias_pav_group *_find_group(struct alias_lcu *lcu,
91 else 91 else
92 search_unit_addr = uid->base_unit_addr; 92 search_unit_addr = uid->base_unit_addr;
93 list_for_each_entry(pos, &lcu->grouplist, group) { 93 list_for_each_entry(pos, &lcu->grouplist, group) {
94 if (pos->uid.base_unit_addr == search_unit_addr) 94 if (pos->uid.base_unit_addr == search_unit_addr &&
95 !strncmp(pos->uid.vduit, uid->vduit, sizeof(uid->vduit)))
95 return pos; 96 return pos;
96 }; 97 };
97 return NULL; 98 return NULL;
@@ -332,6 +333,7 @@ static int _add_device_to_lcu(struct alias_lcu *lcu,
332 group->uid.base_unit_addr = uid->real_unit_addr; 333 group->uid.base_unit_addr = uid->real_unit_addr;
333 else 334 else
334 group->uid.base_unit_addr = uid->base_unit_addr; 335 group->uid.base_unit_addr = uid->base_unit_addr;
336 memcpy(group->uid.vduit, uid->vduit, sizeof(uid->vduit));
335 INIT_LIST_HEAD(&group->group); 337 INIT_LIST_HEAD(&group->group);
336 INIT_LIST_HEAD(&group->baselist); 338 INIT_LIST_HEAD(&group->baselist);
337 INIT_LIST_HEAD(&group->aliaslist); 339 INIT_LIST_HEAD(&group->aliaslist);
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index d774e79476fe..cd3335c1c307 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -913,7 +913,8 @@ dasd_vendor_show(struct device *dev, struct device_attribute *attr, char *buf)
913static DEVICE_ATTR(vendor, 0444, dasd_vendor_show, NULL); 913static DEVICE_ATTR(vendor, 0444, dasd_vendor_show, NULL);
914 914
915#define UID_STRLEN ( /* vendor */ 3 + 1 + /* serial */ 14 + 1 +\ 915#define UID_STRLEN ( /* vendor */ 3 + 1 + /* serial */ 14 + 1 +\
916 /* SSID */ 4 + 1 + /* unit addr */ 2 + 1) 916 /* SSID */ 4 + 1 + /* unit addr */ 2 + 1 +\
917 /* vduit */ 32 + 1)
917 918
918static ssize_t 919static ssize_t
919dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf) 920dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf)
@@ -945,8 +946,17 @@ dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf)
945 sprintf(ua_string, "%02x", uid->real_unit_addr); 946 sprintf(ua_string, "%02x", uid->real_unit_addr);
946 break; 947 break;
947 } 948 }
948 snprintf(uid_string, sizeof(uid_string), "%s.%s.%04x.%s", 949 if (strlen(uid->vduit) > 0)
949 uid->vendor, uid->serial, uid->ssid, ua_string); 950 snprintf(uid_string, sizeof(uid_string),
951 "%s.%s.%04x.%s.%s",
952 uid->vendor, uid->serial,
953 uid->ssid, ua_string,
954 uid->vduit);
955 else
956 snprintf(uid_string, sizeof(uid_string),
957 "%s.%s.%04x.%s",
958 uid->vendor, uid->serial,
959 uid->ssid, ua_string);
950 spin_unlock(&dasd_devmap_lock); 960 spin_unlock(&dasd_devmap_lock);
951 return snprintf(buf, PAGE_SIZE, "%s\n", uid_string); 961 return snprintf(buf, PAGE_SIZE, "%s\n", uid_string);
952} 962}
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 3590fdb5b2fd..773b3fe275b2 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -313,8 +313,8 @@ static int prefix(struct ccw1 *ccw, struct PFX_eckd_data *pfxdata, int trk,
313 memset(pfxdata, 0, sizeof(*pfxdata)); 313 memset(pfxdata, 0, sizeof(*pfxdata));
314 /* prefix data */ 314 /* prefix data */
315 pfxdata->format = 0; 315 pfxdata->format = 0;
316 pfxdata->base_address = basepriv->conf_data.ned1.unit_addr; 316 pfxdata->base_address = basepriv->ned->unit_addr;
317 pfxdata->base_lss = basepriv->conf_data.ned1.ID; 317 pfxdata->base_lss = basepriv->ned->ID;
318 pfxdata->validity.define_extend = 1; 318 pfxdata->validity.define_extend = 1;
319 319
320 /* private uid is kept up to date, conf_data may be outdated */ 320 /* private uid is kept up to date, conf_data may be outdated */
@@ -536,36 +536,40 @@ dasd_eckd_cdl_reclen(int recid)
536/* 536/*
537 * Generate device unique id that specifies the physical device. 537 * Generate device unique id that specifies the physical device.
538 */ 538 */
539static int 539static int dasd_eckd_generate_uid(struct dasd_device *device,
540dasd_eckd_generate_uid(struct dasd_device *device, struct dasd_uid *uid) 540 struct dasd_uid *uid)
541{ 541{
542 struct dasd_eckd_private *private; 542 struct dasd_eckd_private *private;
543 struct dasd_eckd_confdata *confdata; 543 int count;
544 544
545 private = (struct dasd_eckd_private *) device->private; 545 private = (struct dasd_eckd_private *) device->private;
546 if (!private) 546 if (!private)
547 return -ENODEV; 547 return -ENODEV;
548 confdata = &private->conf_data; 548 if (!private->ned || !private->gneq)
549 if (!confdata)
550 return -ENODEV; 549 return -ENODEV;
551 550
552 memset(uid, 0, sizeof(struct dasd_uid)); 551 memset(uid, 0, sizeof(struct dasd_uid));
553 memcpy(uid->vendor, confdata->ned1.HDA_manufacturer, 552 memcpy(uid->vendor, private->ned->HDA_manufacturer,
554 sizeof(uid->vendor) - 1); 553 sizeof(uid->vendor) - 1);
555 EBCASC(uid->vendor, sizeof(uid->vendor) - 1); 554 EBCASC(uid->vendor, sizeof(uid->vendor) - 1);
556 memcpy(uid->serial, confdata->ned1.HDA_location, 555 memcpy(uid->serial, private->ned->HDA_location,
557 sizeof(uid->serial) - 1); 556 sizeof(uid->serial) - 1);
558 EBCASC(uid->serial, sizeof(uid->serial) - 1); 557 EBCASC(uid->serial, sizeof(uid->serial) - 1);
559 uid->ssid = confdata->neq.subsystemID; 558 uid->ssid = private->gneq->subsystemID;
560 uid->real_unit_addr = confdata->ned1.unit_addr; 559 uid->real_unit_addr = private->ned->unit_addr;;
561 if (confdata->ned2.sneq.flags == 0x40 && 560 if (private->sneq) {
562 confdata->ned2.sneq.format == 0x0001) { 561 uid->type = private->sneq->sua_flags;
563 uid->type = confdata->ned2.sneq.sua_flags;
564 if (uid->type == UA_BASE_PAV_ALIAS) 562 if (uid->type == UA_BASE_PAV_ALIAS)
565 uid->base_unit_addr = confdata->ned2.sneq.base_unit_addr; 563 uid->base_unit_addr = private->sneq->base_unit_addr;
566 } else { 564 } else {
567 uid->type = UA_BASE_DEVICE; 565 uid->type = UA_BASE_DEVICE;
568 } 566 }
567 if (private->vdsneq) {
568 for (count = 0; count < 16; count++) {
569 sprintf(uid->vduit+2*count, "%02x",
570 private->vdsneq->uit[count]);
571 }
572 }
569 return 0; 573 return 0;
570} 574}
571 575
@@ -623,6 +627,15 @@ static int dasd_eckd_read_conf_lpm(struct dasd_device *device,
623 ret = -ENOMEM; 627 ret = -ENOMEM;
624 goto out_error; 628 goto out_error;
625 } 629 }
630
631 /*
632 * buffer has to start with EBCDIC "V1.0" to show
633 * support for virtual device SNEQ
634 */
635 rcd_buf[0] = 0xE5;
636 rcd_buf[1] = 0xF1;
637 rcd_buf[2] = 0x4B;
638 rcd_buf[3] = 0xF0;
626 cqr = dasd_eckd_build_rcd_lpm(device, rcd_buf, ciw, lpm); 639 cqr = dasd_eckd_build_rcd_lpm(device, rcd_buf, ciw, lpm);
627 if (IS_ERR(cqr)) { 640 if (IS_ERR(cqr)) {
628 ret = PTR_ERR(cqr); 641 ret = PTR_ERR(cqr);
@@ -646,8 +659,62 @@ out_error:
646 return ret; 659 return ret;
647} 660}
648 661
649static int 662static int dasd_eckd_identify_conf_parts(struct dasd_eckd_private *private)
650dasd_eckd_read_conf(struct dasd_device *device) 663{
664
665 struct dasd_sneq *sneq;
666 int i, count;
667
668 private->ned = NULL;
669 private->sneq = NULL;
670 private->vdsneq = NULL;
671 private->gneq = NULL;
672 count = private->conf_len / sizeof(struct dasd_sneq);
673 sneq = (struct dasd_sneq *)private->conf_data;
674 for (i = 0; i < count; ++i) {
675 if (sneq->flags.identifier == 1 && sneq->format == 1)
676 private->sneq = sneq;
677 else if (sneq->flags.identifier == 1 && sneq->format == 4)
678 private->vdsneq = (struct vd_sneq *)sneq;
679 else if (sneq->flags.identifier == 2)
680 private->gneq = (struct dasd_gneq *)sneq;
681 else if (sneq->flags.identifier == 3 && sneq->res1 == 1)
682 private->ned = (struct dasd_ned *)sneq;
683 sneq++;
684 }
685 if (!private->ned || !private->gneq) {
686 private->ned = NULL;
687 private->sneq = NULL;
688 private->vdsneq = NULL;
689 private->gneq = NULL;
690 return -EINVAL;
691 }
692 return 0;
693
694};
695
696static unsigned char dasd_eckd_path_access(void *conf_data, int conf_len)
697{
698 struct dasd_gneq *gneq;
699 int i, count, found;
700
701 count = conf_len / sizeof(*gneq);
702 gneq = (struct dasd_gneq *)conf_data;
703 found = 0;
704 for (i = 0; i < count; ++i) {
705 if (gneq->flags.identifier == 2) {
706 found = 1;
707 break;
708 }
709 gneq++;
710 }
711 if (found)
712 return ((char *)gneq)[18] & 0x07;
713 else
714 return 0;
715}
716
717static int dasd_eckd_read_conf(struct dasd_device *device)
651{ 718{
652 void *conf_data; 719 void *conf_data;
653 int conf_len, conf_data_saved; 720 int conf_len, conf_data_saved;
@@ -661,7 +728,6 @@ dasd_eckd_read_conf(struct dasd_device *device)
661 path_data->opm = ccw_device_get_path_mask(device->cdev); 728 path_data->opm = ccw_device_get_path_mask(device->cdev);
662 lpm = 0x80; 729 lpm = 0x80;
663 conf_data_saved = 0; 730 conf_data_saved = 0;
664
665 /* get configuration data per operational path */ 731 /* get configuration data per operational path */
666 for (lpm = 0x80; lpm; lpm>>= 1) { 732 for (lpm = 0x80; lpm; lpm>>= 1) {
667 if (lpm & path_data->opm){ 733 if (lpm & path_data->opm){
@@ -678,22 +744,20 @@ dasd_eckd_read_conf(struct dasd_device *device)
678 "data retrieved"); 744 "data retrieved");
679 continue; /* no error */ 745 continue; /* no error */
680 } 746 }
681 if (conf_len != sizeof(struct dasd_eckd_confdata)) {
682 MESSAGE(KERN_WARNING,
683 "sizes of configuration data mismatch"
684 "%d (read) vs %ld (expected)",
685 conf_len,
686 sizeof(struct dasd_eckd_confdata));
687 kfree(conf_data);
688 continue; /* no error */
689 }
690 /* save first valid configuration data */ 747 /* save first valid configuration data */
691 if (!conf_data_saved){ 748 if (!conf_data_saved) {
692 memcpy(&private->conf_data, conf_data, 749 kfree(private->conf_data);
693 sizeof(struct dasd_eckd_confdata)); 750 private->conf_data = conf_data;
751 private->conf_len = conf_len;
752 if (dasd_eckd_identify_conf_parts(private)) {
753 private->conf_data = NULL;
754 private->conf_len = 0;
755 kfree(conf_data);
756 continue;
757 }
694 conf_data_saved++; 758 conf_data_saved++;
695 } 759 }
696 switch (((char *)conf_data)[242] & 0x07){ 760 switch (dasd_eckd_path_access(conf_data, conf_len)) {
697 case 0x02: 761 case 0x02:
698 path_data->npm |= lpm; 762 path_data->npm |= lpm;
699 break; 763 break;
@@ -701,7 +765,8 @@ dasd_eckd_read_conf(struct dasd_device *device)
701 path_data->ppm |= lpm; 765 path_data->ppm |= lpm;
702 break; 766 break;
703 } 767 }
704 kfree(conf_data); 768 if (conf_data != private->conf_data)
769 kfree(conf_data);
705 } 770 }
706 } 771 }
707 return 0; 772 return 0;
@@ -952,6 +1017,7 @@ out_err2:
952 dasd_free_block(device->block); 1017 dasd_free_block(device->block);
953 device->block = NULL; 1018 device->block = NULL;
954out_err1: 1019out_err1:
1020 kfree(private->conf_data);
955 kfree(device->private); 1021 kfree(device->private);
956 device->private = NULL; 1022 device->private = NULL;
957 return rc; 1023 return rc;
@@ -959,7 +1025,17 @@ out_err1:
959 1025
960static void dasd_eckd_uncheck_device(struct dasd_device *device) 1026static void dasd_eckd_uncheck_device(struct dasd_device *device)
961{ 1027{
1028 struct dasd_eckd_private *private;
1029
1030 private = (struct dasd_eckd_private *) device->private;
962 dasd_alias_disconnect_device_from_lcu(device); 1031 dasd_alias_disconnect_device_from_lcu(device);
1032 private->ned = NULL;
1033 private->sneq = NULL;
1034 private->vdsneq = NULL;
1035 private->gneq = NULL;
1036 private->conf_len = 0;
1037 kfree(private->conf_data);
1038 private->conf_data = NULL;
963} 1039}
964 1040
965static struct dasd_ccw_req * 1041static struct dasd_ccw_req *
@@ -1746,9 +1822,10 @@ dasd_eckd_fill_info(struct dasd_device * device,
1746 info->characteristics_size = sizeof(struct dasd_eckd_characteristics); 1822 info->characteristics_size = sizeof(struct dasd_eckd_characteristics);
1747 memcpy(info->characteristics, &private->rdc_data, 1823 memcpy(info->characteristics, &private->rdc_data,
1748 sizeof(struct dasd_eckd_characteristics)); 1824 sizeof(struct dasd_eckd_characteristics));
1749 info->confdata_size = sizeof(struct dasd_eckd_confdata); 1825 info->confdata_size = min((unsigned long)private->conf_len,
1750 memcpy(info->configuration_data, &private->conf_data, 1826 sizeof(info->configuration_data));
1751 sizeof(struct dasd_eckd_confdata)); 1827 memcpy(info->configuration_data, private->conf_data,
1828 info->confdata_size);
1752 return 0; 1829 return 0;
1753} 1830}
1754 1831
diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h
index fc2509c939bc..4bf0aa5112c1 100644
--- a/drivers/s390/block/dasd_eckd.h
+++ b/drivers/s390/block/dasd_eckd.h
@@ -231,133 +231,62 @@ struct dasd_eckd_characteristics {
231 __u8 reserved3[10]; 231 __u8 reserved3[10];
232} __attribute__ ((packed)); 232} __attribute__ ((packed));
233 233
234struct dasd_eckd_confdata { 234/* elements of the configuration data */
235struct dasd_ned {
235 struct { 236 struct {
236 struct { 237 __u8 identifier:2;
237 unsigned char identifier:2; 238 __u8 token_id:1;
238 unsigned char token_id:1; 239 __u8 sno_valid:1;
239 unsigned char sno_valid:1; 240 __u8 subst_sno:1;
240 unsigned char subst_sno:1; 241 __u8 recNED:1;
241 unsigned char recNED:1; 242 __u8 emuNED:1;
242 unsigned char emuNED:1; 243 __u8 reserved:1;
243 unsigned char reserved:1; 244 } __attribute__ ((packed)) flags;
244 } __attribute__ ((packed)) flags; 245 __u8 descriptor;
245 __u8 descriptor; 246 __u8 dev_class;
246 __u8 dev_class; 247 __u8 reserved;
247 __u8 reserved; 248 __u8 dev_type[6];
248 unsigned char dev_type[6]; 249 __u8 dev_model[3];
249 unsigned char dev_model[3]; 250 __u8 HDA_manufacturer[3];
250 unsigned char HDA_manufacturer[3]; 251 __u8 HDA_location[2];
251 unsigned char HDA_location[2]; 252 __u8 HDA_seqno[12];
252 unsigned char HDA_seqno[12]; 253 __u8 ID;
253 __u8 ID; 254 __u8 unit_addr;
254 __u8 unit_addr; 255} __attribute__ ((packed));
255 } __attribute__ ((packed)) ned1; 256
256 union { 257struct dasd_sneq {
257 struct {
258 struct {
259 unsigned char identifier:2;
260 unsigned char token_id:1;
261 unsigned char sno_valid:1;
262 unsigned char subst_sno:1;
263 unsigned char recNED:1;
264 unsigned char emuNED:1;
265 unsigned char reserved:1;
266 } __attribute__ ((packed)) flags;
267 __u8 descriptor;
268 __u8 reserved[2];
269 unsigned char dev_type[6];
270 unsigned char dev_model[3];
271 unsigned char DASD_manufacturer[3];
272 unsigned char DASD_location[2];
273 unsigned char DASD_seqno[12];
274 __u16 ID;
275 } __attribute__ ((packed)) ned;
276 struct {
277 unsigned char flags; /* byte 0 */
278 unsigned char res1; /* byte 1 */
279 __u16 format; /* byte 2-3 */
280 unsigned char res2[4]; /* byte 4-7 */
281 unsigned char sua_flags; /* byte 8 */
282 __u8 base_unit_addr; /* byte 9 */
283 unsigned char res3[22]; /* byte 10-31 */
284 } __attribute__ ((packed)) sneq;
285 } __attribute__ ((packed)) ned2;
286 struct { 258 struct {
287 struct { 259 __u8 identifier:2;
288 unsigned char identifier:2; 260 __u8 reserved:6;
289 unsigned char token_id:1; 261 } __attribute__ ((packed)) flags;
290 unsigned char sno_valid:1; 262 __u8 res1;
291 unsigned char subst_sno:1; 263 __u16 format;
292 unsigned char recNED:1; 264 __u8 res2[4]; /* byte 4- 7 */
293 unsigned char emuNED:1; 265 __u8 sua_flags; /* byte 8 */
294 unsigned char reserved:1; 266 __u8 base_unit_addr; /* byte 9 */
295 } __attribute__ ((packed)) flags; 267 __u8 res3[22]; /* byte 10-31 */
296 __u8 descriptor; 268} __attribute__ ((packed));
297 __u8 reserved[2]; 269
298 unsigned char cont_type[6]; 270struct vd_sneq {
299 unsigned char cont_model[3];
300 unsigned char cont_manufacturer[3];
301 unsigned char cont_location[2];
302 unsigned char cont_seqno[12];
303 __u16 ID;
304 } __attribute__ ((packed)) ned3;
305 struct { 271 struct {
306 struct { 272 __u8 identifier:2;
307 unsigned char identifier:2; 273 __u8 reserved:6;
308 unsigned char token_id:1; 274 } __attribute__ ((packed)) flags;
309 unsigned char sno_valid:1; 275 __u8 res1;
310 unsigned char subst_sno:1; 276 __u16 format;
311 unsigned char recNED:1; 277 __u8 res2[4]; /* byte 4- 7 */
312 unsigned char emuNED:1; 278 __u8 uit[16]; /* byte 8-23 */
313 unsigned char reserved:1; 279 __u8 res3[8]; /* byte 24-31 */
314 } __attribute__ ((packed)) flags; 280} __attribute__ ((packed));
315 __u8 descriptor; 281
316 __u8 reserved[2]; 282struct dasd_gneq {
317 unsigned char cont_type[6];
318 unsigned char empty[3];
319 unsigned char cont_manufacturer[3];
320 unsigned char cont_location[2];
321 unsigned char cont_seqno[12];
322 __u16 ID;
323 } __attribute__ ((packed)) ned4;
324 unsigned char ned5[32];
325 unsigned char ned6[32];
326 unsigned char ned7[32];
327 struct { 283 struct {
328 struct { 284 __u8 identifier:2;
329 unsigned char identifier:2; 285 __u8 reserved:6;
330 unsigned char reserved:6; 286 } __attribute__ ((packed)) flags;
331 } __attribute__ ((packed)) flags; 287 __u8 reserved[7];
332 __u8 selector; 288 __u16 subsystemID;
333 __u16 interfaceID; 289 __u8 reserved2[22];
334 __u32 reserved;
335 __u16 subsystemID;
336 struct {
337 unsigned char sp0:1;
338 unsigned char sp1:1;
339 unsigned char reserved:5;
340 unsigned char scluster:1;
341 } __attribute__ ((packed)) spathID;
342 __u8 unit_address;
343 __u8 dev_ID;
344 __u8 dev_address;
345 __u8 adapterID;
346 __u16 link_address;
347 struct {
348 unsigned char parallel:1;
349 unsigned char escon:1;
350 unsigned char reserved:1;
351 unsigned char ficon:1;
352 unsigned char reserved2:4;
353 } __attribute__ ((packed)) protocol_type;
354 struct {
355 unsigned char PID_in_236:1;
356 unsigned char reserved:7;
357 } __attribute__ ((packed)) format_flags;
358 __u8 log_dev_address;
359 unsigned char reserved2[12];
360 } __attribute__ ((packed)) neq;
361} __attribute__ ((packed)); 290} __attribute__ ((packed));
362 291
363struct dasd_eckd_path { 292struct dasd_eckd_path {
@@ -463,7 +392,14 @@ struct alias_pav_group {
463 392
464struct dasd_eckd_private { 393struct dasd_eckd_private {
465 struct dasd_eckd_characteristics rdc_data; 394 struct dasd_eckd_characteristics rdc_data;
466 struct dasd_eckd_confdata conf_data; 395 u8 *conf_data;
396 int conf_len;
397 /* pointers to specific parts in the conf_data */
398 struct dasd_ned *ned;
399 struct dasd_sneq *sneq;
400 struct vd_sneq *vdsneq;
401 struct dasd_gneq *gneq;
402
467 struct dasd_eckd_path path_data; 403 struct dasd_eckd_path path_data;
468 struct eckd_count count_area[5]; 404 struct eckd_count count_area[5];
469 int init_cqr_status; 405 int init_cqr_status;
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index fb2f931cf844..31ecaa4a40e4 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -307,6 +307,7 @@ struct dasd_uid {
307 __u16 ssid; 307 __u16 ssid;
308 __u8 real_unit_addr; 308 __u8 real_unit_addr;
309 __u8 base_unit_addr; 309 __u8 base_unit_addr;
310 char vduit[33];
310}; 311};
311 312
312/* 313/*
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index 3c8b25e6c345..1fd8f2193ed8 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -399,6 +399,7 @@ sclp_tod_from_jiffies(unsigned long jiffies)
399void 399void
400sclp_sync_wait(void) 400sclp_sync_wait(void)
401{ 401{
402 unsigned long long old_tick;
402 unsigned long flags; 403 unsigned long flags;
403 unsigned long cr0, cr0_sync; 404 unsigned long cr0, cr0_sync;
404 u64 timeout; 405 u64 timeout;
@@ -419,11 +420,12 @@ sclp_sync_wait(void)
419 if (!irq_context) 420 if (!irq_context)
420 local_bh_disable(); 421 local_bh_disable();
421 /* Enable service-signal interruption, disable timer interrupts */ 422 /* Enable service-signal interruption, disable timer interrupts */
423 old_tick = local_tick_disable();
422 trace_hardirqs_on(); 424 trace_hardirqs_on();
423 __ctl_store(cr0, 0, 0); 425 __ctl_store(cr0, 0, 0);
424 cr0_sync = cr0; 426 cr0_sync = cr0;
427 cr0_sync &= 0xffff00a0;
425 cr0_sync |= 0x00000200; 428 cr0_sync |= 0x00000200;
426 cr0_sync &= 0xFFFFF3AC;
427 __ctl_load(cr0_sync, 0, 0); 429 __ctl_load(cr0_sync, 0, 0);
428 __raw_local_irq_stosm(0x01); 430 __raw_local_irq_stosm(0x01);
429 /* Loop until driver state indicates finished request */ 431 /* Loop until driver state indicates finished request */
@@ -439,9 +441,9 @@ sclp_sync_wait(void)
439 __ctl_load(cr0, 0, 0); 441 __ctl_load(cr0, 0, 0);
440 if (!irq_context) 442 if (!irq_context)
441 _local_bh_enable(); 443 _local_bh_enable();
444 local_tick_enable(old_tick);
442 local_irq_restore(flags); 445 local_irq_restore(flags);
443} 446}
444
445EXPORT_SYMBOL(sclp_sync_wait); 447EXPORT_SYMBOL(sclp_sync_wait);
446 448
447/* Dispatch changes in send and receive mask to registered listeners. */ 449/* Dispatch changes in send and receive mask to registered listeners. */
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 0c2b77493db4..eb5f1b8bc57f 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -427,6 +427,8 @@ static int sclp_mem_notifier(struct notifier_block *nb,
427 sclp_attach_storage(id); 427 sclp_attach_storage(id);
428 switch (action) { 428 switch (action) {
429 case MEM_ONLINE: 429 case MEM_ONLINE:
430 case MEM_GOING_OFFLINE:
431 case MEM_CANCEL_OFFLINE:
430 break; 432 break;
431 case MEM_GOING_ONLINE: 433 case MEM_GOING_ONLINE:
432 rc = sclp_mem_change_state(start, size, 1); 434 rc = sclp_mem_change_state(start, size, 1);
@@ -434,6 +436,9 @@ static int sclp_mem_notifier(struct notifier_block *nb,
434 case MEM_CANCEL_ONLINE: 436 case MEM_CANCEL_ONLINE:
435 sclp_mem_change_state(start, size, 0); 437 sclp_mem_change_state(start, size, 0);
436 break; 438 break;
439 case MEM_OFFLINE:
440 sclp_mem_change_state(start, size, 0);
441 break;
437 default: 442 default:
438 rc = -EINVAL; 443 rc = -EINVAL;
439 break; 444 break;
diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c
index fff4ff485d9b..4cebd6ee6d27 100644
--- a/drivers/s390/char/sclp_config.c
+++ b/drivers/s390/char/sclp_config.c
@@ -8,7 +8,6 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/errno.h> 9#include <linux/errno.h>
10#include <linux/cpu.h> 10#include <linux/cpu.h>
11#include <linux/kthread.h>
12#include <linux/sysdev.h> 11#include <linux/sysdev.h>
13#include <linux/workqueue.h> 12#include <linux/workqueue.h>
14#include <asm/smp.h> 13#include <asm/smp.h>
@@ -41,19 +40,9 @@ static void sclp_cpu_capability_notify(struct work_struct *work)
41 put_online_cpus(); 40 put_online_cpus();
42} 41}
43 42
44static int sclp_cpu_kthread(void *data)
45{
46 smp_rescan_cpus();
47 return 0;
48}
49
50static void __ref sclp_cpu_change_notify(struct work_struct *work) 43static void __ref sclp_cpu_change_notify(struct work_struct *work)
51{ 44{
52 /* Can't call smp_rescan_cpus() from workqueue context since it may 45 smp_rescan_cpus();
53 * deadlock in case of cpu hotplug. So we have to create a kernel
54 * thread in order to call it.
55 */
56 kthread_run(sclp_cpu_kthread, NULL, "cpu_rescan");
57} 46}
58 47
59static void sclp_conf_receiver_fn(struct evbuf_header *evbuf) 48static void sclp_conf_receiver_fn(struct evbuf_header *evbuf)
diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c
index ef7bc0a125ef..cf8f24a4b5eb 100644
--- a/drivers/s390/cio/idset.c
+++ b/drivers/s390/cio/idset.c
@@ -5,7 +5,7 @@
5 * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> 5 * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
6 */ 6 */
7 7
8#include <linux/slab.h> 8#include <linux/vmalloc.h>
9#include <linux/bitops.h> 9#include <linux/bitops.h>
10#include "idset.h" 10#include "idset.h"
11#include "css.h" 11#include "css.h"
@@ -25,18 +25,18 @@ static struct idset *idset_new(int num_ssid, int num_id)
25{ 25{
26 struct idset *set; 26 struct idset *set;
27 27
28 set = kzalloc(sizeof(struct idset) + bitmap_size(num_ssid, num_id), 28 set = vmalloc(sizeof(struct idset) + bitmap_size(num_ssid, num_id));
29 GFP_KERNEL);
30 if (set) { 29 if (set) {
31 set->num_ssid = num_ssid; 30 set->num_ssid = num_ssid;
32 set->num_id = num_id; 31 set->num_id = num_id;
32 memset(set->bitmap, 0, bitmap_size(num_ssid, num_id));
33 } 33 }
34 return set; 34 return set;
35} 35}
36 36
37void idset_free(struct idset *set) 37void idset_free(struct idset *set)
38{ 38{
39 kfree(set); 39 vfree(set);
40} 40}
41 41
42void idset_clear(struct idset *set) 42void idset_clear(struct idset *set)
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index d10c73cc1688..d15648514a0f 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -1355,7 +1355,7 @@ int qdio_allocate(struct qdio_initialize *init_data)
1355 goto out_rel; 1355 goto out_rel;
1356 1356
1357 /* qdr is used in ccw1.cda which is u32 */ 1357 /* qdr is used in ccw1.cda which is u32 */
1358 irq_ptr->qdr = kzalloc(sizeof(struct qdr), GFP_KERNEL | GFP_DMA); 1358 irq_ptr->qdr = (struct qdr *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1359 if (!irq_ptr->qdr) 1359 if (!irq_ptr->qdr)
1360 goto out_rel; 1360 goto out_rel;
1361 WARN_ON((unsigned long)irq_ptr->qdr & 0xfff); 1361 WARN_ON((unsigned long)irq_ptr->qdr & 0xfff);
diff --git a/drivers/s390/cio/qdio_perf.c b/drivers/s390/cio/qdio_perf.c
index ea01b85b1cc9..ec5c4a414235 100644
--- a/drivers/s390/cio/qdio_perf.c
+++ b/drivers/s390/cio/qdio_perf.c
@@ -142,7 +142,7 @@ int __init qdio_setup_perf_stats(void)
142 return 0; 142 return 0;
143} 143}
144 144
145void __exit qdio_remove_perf_stats(void) 145void qdio_remove_perf_stats(void)
146{ 146{
147#ifdef CONFIG_PROC_FS 147#ifdef CONFIG_PROC_FS
148 remove_proc_entry("qdio_perf", NULL); 148 remove_proc_entry("qdio_perf", NULL);
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index f0923a8aceda..1bd2a208db28 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -325,7 +325,7 @@ void qdio_release_memory(struct qdio_irq *irq_ptr)
325 kmem_cache_free(qdio_q_cache, q); 325 kmem_cache_free(qdio_q_cache, q);
326 } 326 }
327 } 327 }
328 kfree(irq_ptr->qdr); 328 free_page((unsigned long) irq_ptr->qdr);
329 free_page(irq_ptr->chsc_page); 329 free_page(irq_ptr->chsc_page);
330 free_page((unsigned long) irq_ptr); 330 free_page((unsigned long) irq_ptr);
331} 331}
@@ -515,7 +515,7 @@ int __init qdio_setup_init(void)
515 return 0; 515 return 0;
516} 516}
517 517
518void __exit qdio_setup_exit(void) 518void qdio_setup_exit(void)
519{ 519{
520 kmem_cache_destroy(qdio_q_cache); 520 kmem_cache_destroy(qdio_q_cache);
521} 521}
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 79954bd6bfa5..292b60da6dc7 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -352,7 +352,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
352 return len; 352 return len;
353} 353}
354 354
355void s390_virtio_console_init(void) 355void __init s390_virtio_console_init(void)
356{ 356{
357 virtio_cons_early_init(early_put_chars); 357 virtio_cons_early_init(early_put_chars);
358} 358}
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 1895dbb553cd..80971c21ea1a 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -419,6 +419,7 @@ struct qeth_qdio_out_buffer {
419 int next_element_to_fill; 419 int next_element_to_fill;
420 struct sk_buff_head skb_list; 420 struct sk_buff_head skb_list;
421 struct list_head ctx_list; 421 struct list_head ctx_list;
422 int is_header[16];
422}; 423};
423 424
424struct qeth_card; 425struct qeth_card;
@@ -785,7 +786,7 @@ void qeth_core_remove_osn_attributes(struct device *);
785 786
786/* exports for qeth discipline device drivers */ 787/* exports for qeth discipline device drivers */
787extern struct qeth_card_list_struct qeth_core_card_list; 788extern struct qeth_card_list_struct qeth_core_card_list;
788 789extern struct kmem_cache *qeth_core_header_cache;
789extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS]; 790extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS];
790 791
791void qeth_set_allowed_threads(struct qeth_card *, unsigned long , int); 792void qeth_set_allowed_threads(struct qeth_card *, unsigned long , int);
@@ -843,7 +844,7 @@ int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int);
843int qeth_get_elements_no(struct qeth_card *, void *, struct sk_buff *, int); 844int qeth_get_elements_no(struct qeth_card *, void *, struct sk_buff *, int);
844int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *, 845int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *,
845 struct sk_buff *, struct qeth_hdr *, int, 846 struct sk_buff *, struct qeth_hdr *, int,
846 struct qeth_eddp_context *); 847 struct qeth_eddp_context *, int, int);
847int qeth_do_send_packet(struct qeth_card *, struct qeth_qdio_out_q *, 848int qeth_do_send_packet(struct qeth_card *, struct qeth_qdio_out_q *,
848 struct sk_buff *, struct qeth_hdr *, 849 struct sk_buff *, struct qeth_hdr *,
849 int, struct qeth_eddp_context *); 850 int, struct qeth_eddp_context *);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index cebb25e36e82..bd420d1b9a0d 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -19,8 +19,8 @@
19#include <linux/mii.h> 19#include <linux/mii.h>
20#include <linux/kthread.h> 20#include <linux/kthread.h>
21 21
22#include <asm-s390/ebcdic.h> 22#include <asm/ebcdic.h>
23#include <asm-s390/io.h> 23#include <asm/io.h>
24#include <asm/s390_rdev.h> 24#include <asm/s390_rdev.h>
25 25
26#include "qeth_core.h" 26#include "qeth_core.h"
@@ -48,6 +48,8 @@ EXPORT_SYMBOL_GPL(qeth_dbf);
48 48
49struct qeth_card_list_struct qeth_core_card_list; 49struct qeth_card_list_struct qeth_core_card_list;
50EXPORT_SYMBOL_GPL(qeth_core_card_list); 50EXPORT_SYMBOL_GPL(qeth_core_card_list);
51struct kmem_cache *qeth_core_header_cache;
52EXPORT_SYMBOL_GPL(qeth_core_header_cache);
51 53
52static struct device *qeth_core_root_dev; 54static struct device *qeth_core_root_dev;
53static unsigned int known_devices[][10] = QETH_MODELLIST_ARRAY; 55static unsigned int known_devices[][10] = QETH_MODELLIST_ARRAY;
@@ -933,6 +935,10 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
933 } 935 }
934 qeth_eddp_buf_release_contexts(buf); 936 qeth_eddp_buf_release_contexts(buf);
935 for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(queue->card); ++i) { 937 for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(queue->card); ++i) {
938 if (buf->buffer->element[i].addr && buf->is_header[i])
939 kmem_cache_free(qeth_core_header_cache,
940 buf->buffer->element[i].addr);
941 buf->is_header[i] = 0;
936 buf->buffer->element[i].length = 0; 942 buf->buffer->element[i].length = 0;
937 buf->buffer->element[i].addr = NULL; 943 buf->buffer->element[i].addr = NULL;
938 buf->buffer->element[i].flags = 0; 944 buf->buffer->element[i].flags = 0;
@@ -3002,8 +3008,8 @@ int qeth_get_elements_no(struct qeth_card *card, void *hdr,
3002 if (skb_shinfo(skb)->nr_frags > 0) 3008 if (skb_shinfo(skb)->nr_frags > 0)
3003 elements_needed = (skb_shinfo(skb)->nr_frags + 1); 3009 elements_needed = (skb_shinfo(skb)->nr_frags + 1);
3004 if (elements_needed == 0) 3010 if (elements_needed == 0)
3005 elements_needed = 1 + (((((unsigned long) hdr) % PAGE_SIZE) 3011 elements_needed = 1 + (((((unsigned long) skb->data) %
3006 + skb->len) >> PAGE_SHIFT); 3012 PAGE_SIZE) + skb->len) >> PAGE_SHIFT);
3007 if ((elements_needed + elems) > QETH_MAX_BUFFER_ELEMENTS(card)) { 3013 if ((elements_needed + elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
3008 QETH_DBF_MESSAGE(2, "Invalid size of IP packet " 3014 QETH_DBF_MESSAGE(2, "Invalid size of IP packet "
3009 "(Number=%d / Length=%d). Discarded.\n", 3015 "(Number=%d / Length=%d). Discarded.\n",
@@ -3015,7 +3021,8 @@ int qeth_get_elements_no(struct qeth_card *card, void *hdr,
3015EXPORT_SYMBOL_GPL(qeth_get_elements_no); 3021EXPORT_SYMBOL_GPL(qeth_get_elements_no);
3016 3022
3017static inline void __qeth_fill_buffer(struct sk_buff *skb, 3023static inline void __qeth_fill_buffer(struct sk_buff *skb,
3018 struct qdio_buffer *buffer, int is_tso, int *next_element_to_fill) 3024 struct qdio_buffer *buffer, int is_tso, int *next_element_to_fill,
3025 int offset)
3019{ 3026{
3020 int length = skb->len; 3027 int length = skb->len;
3021 int length_here; 3028 int length_here;
@@ -3027,6 +3034,11 @@ static inline void __qeth_fill_buffer(struct sk_buff *skb,
3027 data = skb->data; 3034 data = skb->data;
3028 first_lap = (is_tso == 0 ? 1 : 0); 3035 first_lap = (is_tso == 0 ? 1 : 0);
3029 3036
3037 if (offset >= 0) {
3038 data = skb->data + offset;
3039 first_lap = 0;
3040 }
3041
3030 while (length > 0) { 3042 while (length > 0) {
3031 /* length_here is the remaining amount of data in this page */ 3043 /* length_here is the remaining amount of data in this page */
3032 length_here = PAGE_SIZE - ((unsigned long) data % PAGE_SIZE); 3044 length_here = PAGE_SIZE - ((unsigned long) data % PAGE_SIZE);
@@ -3058,22 +3070,22 @@ static inline void __qeth_fill_buffer(struct sk_buff *skb,
3058} 3070}
3059 3071
3060static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue, 3072static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
3061 struct qeth_qdio_out_buffer *buf, struct sk_buff *skb) 3073 struct qeth_qdio_out_buffer *buf, struct sk_buff *skb,
3074 struct qeth_hdr *hdr, int offset, int hd_len)
3062{ 3075{
3063 struct qdio_buffer *buffer; 3076 struct qdio_buffer *buffer;
3064 struct qeth_hdr_tso *hdr;
3065 int flush_cnt = 0, hdr_len, large_send = 0; 3077 int flush_cnt = 0, hdr_len, large_send = 0;
3066 3078
3067 buffer = buf->buffer; 3079 buffer = buf->buffer;
3068 atomic_inc(&skb->users); 3080 atomic_inc(&skb->users);
3069 skb_queue_tail(&buf->skb_list, skb); 3081 skb_queue_tail(&buf->skb_list, skb);
3070 3082
3071 hdr = (struct qeth_hdr_tso *) skb->data;
3072 /*check first on TSO ....*/ 3083 /*check first on TSO ....*/
3073 if (hdr->hdr.hdr.l3.id == QETH_HEADER_TYPE_TSO) { 3084 if (hdr->hdr.l3.id == QETH_HEADER_TYPE_TSO) {
3074 int element = buf->next_element_to_fill; 3085 int element = buf->next_element_to_fill;
3075 3086
3076 hdr_len = sizeof(struct qeth_hdr_tso) + hdr->ext.dg_hdr_len; 3087 hdr_len = sizeof(struct qeth_hdr_tso) +
3088 ((struct qeth_hdr_tso *)hdr)->ext.dg_hdr_len;
3077 /*fill first buffer entry only with header information */ 3089 /*fill first buffer entry only with header information */
3078 buffer->element[element].addr = skb->data; 3090 buffer->element[element].addr = skb->data;
3079 buffer->element[element].length = hdr_len; 3091 buffer->element[element].length = hdr_len;
@@ -3083,9 +3095,20 @@ static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
3083 skb->len -= hdr_len; 3095 skb->len -= hdr_len;
3084 large_send = 1; 3096 large_send = 1;
3085 } 3097 }
3098
3099 if (offset >= 0) {
3100 int element = buf->next_element_to_fill;
3101 buffer->element[element].addr = hdr;
3102 buffer->element[element].length = sizeof(struct qeth_hdr) +
3103 hd_len;
3104 buffer->element[element].flags = SBAL_FLAGS_FIRST_FRAG;
3105 buf->is_header[element] = 1;
3106 buf->next_element_to_fill++;
3107 }
3108
3086 if (skb_shinfo(skb)->nr_frags == 0) 3109 if (skb_shinfo(skb)->nr_frags == 0)
3087 __qeth_fill_buffer(skb, buffer, large_send, 3110 __qeth_fill_buffer(skb, buffer, large_send,
3088 (int *)&buf->next_element_to_fill); 3111 (int *)&buf->next_element_to_fill, offset);
3089 else 3112 else
3090 __qeth_fill_buffer_frag(skb, buffer, large_send, 3113 __qeth_fill_buffer_frag(skb, buffer, large_send,
3091 (int *)&buf->next_element_to_fill); 3114 (int *)&buf->next_element_to_fill);
@@ -3115,7 +3138,7 @@ static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
3115int qeth_do_send_packet_fast(struct qeth_card *card, 3138int qeth_do_send_packet_fast(struct qeth_card *card,
3116 struct qeth_qdio_out_q *queue, struct sk_buff *skb, 3139 struct qeth_qdio_out_q *queue, struct sk_buff *skb,
3117 struct qeth_hdr *hdr, int elements_needed, 3140 struct qeth_hdr *hdr, int elements_needed,
3118 struct qeth_eddp_context *ctx) 3141 struct qeth_eddp_context *ctx, int offset, int hd_len)
3119{ 3142{
3120 struct qeth_qdio_out_buffer *buffer; 3143 struct qeth_qdio_out_buffer *buffer;
3121 int buffers_needed = 0; 3144 int buffers_needed = 0;
@@ -3148,7 +3171,7 @@ int qeth_do_send_packet_fast(struct qeth_card *card,
3148 } 3171 }
3149 atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); 3172 atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
3150 if (ctx == NULL) { 3173 if (ctx == NULL) {
3151 qeth_fill_buffer(queue, buffer, skb); 3174 qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len);
3152 qeth_flush_buffers(queue, index, 1); 3175 qeth_flush_buffers(queue, index, 1);
3153 } else { 3176 } else {
3154 flush_cnt = qeth_eddp_fill_buffer(queue, ctx, index); 3177 flush_cnt = qeth_eddp_fill_buffer(queue, ctx, index);
@@ -3224,7 +3247,7 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
3224 } 3247 }
3225 } 3248 }
3226 if (ctx == NULL) 3249 if (ctx == NULL)
3227 tmp = qeth_fill_buffer(queue, buffer, skb); 3250 tmp = qeth_fill_buffer(queue, buffer, skb, hdr, -1, 0);
3228 else { 3251 else {
3229 tmp = qeth_eddp_fill_buffer(queue, ctx, 3252 tmp = qeth_eddp_fill_buffer(queue, ctx,
3230 queue->next_buf_to_fill); 3253 queue->next_buf_to_fill);
@@ -4443,8 +4466,17 @@ static int __init qeth_core_init(void)
4443 rc = IS_ERR(qeth_core_root_dev) ? PTR_ERR(qeth_core_root_dev) : 0; 4466 rc = IS_ERR(qeth_core_root_dev) ? PTR_ERR(qeth_core_root_dev) : 0;
4444 if (rc) 4467 if (rc)
4445 goto register_err; 4468 goto register_err;
4446 return 0;
4447 4469
4470 qeth_core_header_cache = kmem_cache_create("qeth_hdr",
4471 sizeof(struct qeth_hdr) + ETH_HLEN, 64, 0, NULL);
4472 if (!qeth_core_header_cache) {
4473 rc = -ENOMEM;
4474 goto slab_err;
4475 }
4476
4477 return 0;
4478slab_err:
4479 s390_root_dev_unregister(qeth_core_root_dev);
4448register_err: 4480register_err:
4449 driver_remove_file(&qeth_core_ccwgroup_driver.driver, 4481 driver_remove_file(&qeth_core_ccwgroup_driver.driver,
4450 &driver_attr_group); 4482 &driver_attr_group);
@@ -4466,6 +4498,7 @@ static void __exit qeth_core_exit(void)
4466 &driver_attr_group); 4498 &driver_attr_group);
4467 ccwgroup_driver_unregister(&qeth_core_ccwgroup_driver); 4499 ccwgroup_driver_unregister(&qeth_core_ccwgroup_driver);
4468 ccw_driver_unregister(&qeth_ccw_driver); 4500 ccw_driver_unregister(&qeth_ccw_driver);
4501 kmem_cache_destroy(qeth_core_header_cache);
4469 qeth_unregister_dbf_views(); 4502 qeth_unregister_dbf_views();
4470 PRINT_INFO("core functions removed\n"); 4503 PRINT_INFO("core functions removed\n");
4471} 4504}
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index a8b069cd9a4c..b3cee032f578 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -243,8 +243,7 @@ static void qeth_l2_get_packet_type(struct qeth_card *card,
243static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, 243static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
244 struct sk_buff *skb, int ipv, int cast_type) 244 struct sk_buff *skb, int ipv, int cast_type)
245{ 245{
246 struct vlan_ethhdr *veth = (struct vlan_ethhdr *)((skb->data) + 246 struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
247 QETH_HEADER_SIZE);
248 247
249 memset(hdr, 0, sizeof(struct qeth_hdr)); 248 memset(hdr, 0, sizeof(struct qeth_hdr));
250 hdr->hdr.l2.id = QETH_HEADER_TYPE_LAYER2; 249 hdr->hdr.l2.id = QETH_HEADER_TYPE_LAYER2;
@@ -621,6 +620,9 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
621 int tx_bytes = skb->len; 620 int tx_bytes = skb->len;
622 enum qeth_large_send_types large_send = QETH_LARGE_SEND_NO; 621 enum qeth_large_send_types large_send = QETH_LARGE_SEND_NO;
623 struct qeth_eddp_context *ctx = NULL; 622 struct qeth_eddp_context *ctx = NULL;
623 int data_offset = -1;
624 int elements_needed = 0;
625 int hd_len = 0;
624 626
625 if ((card->state != CARD_STATE_UP) || !card->lan_online) { 627 if ((card->state != CARD_STATE_UP) || !card->lan_online) {
626 card->stats.tx_carrier_errors++; 628 card->stats.tx_carrier_errors++;
@@ -643,13 +645,32 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
643 if (card->info.type == QETH_CARD_TYPE_OSN) 645 if (card->info.type == QETH_CARD_TYPE_OSN)
644 hdr = (struct qeth_hdr *)skb->data; 646 hdr = (struct qeth_hdr *)skb->data;
645 else { 647 else {
646 /* create a clone with writeable headroom */ 648 if ((card->info.type == QETH_CARD_TYPE_IQD) && (!large_send) &&
647 new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr)); 649 (skb_shinfo(skb)->nr_frags == 0)) {
648 if (!new_skb) 650 new_skb = skb;
649 goto tx_drop; 651 data_offset = ETH_HLEN;
650 hdr = (struct qeth_hdr *)skb_push(new_skb, 652 hd_len = ETH_HLEN;
653 hdr = kmem_cache_alloc(qeth_core_header_cache,
654 GFP_ATOMIC);
655 if (!hdr)
656 goto tx_drop;
657 elements_needed++;
658 skb_reset_mac_header(new_skb);
659 qeth_l2_fill_header(card, hdr, new_skb, ipv, cast_type);
660 hdr->hdr.l2.pkt_length = new_skb->len;
661 memcpy(((char *)hdr) + sizeof(struct qeth_hdr),
662 skb_mac_header(new_skb), ETH_HLEN);
663 } else {
664 /* create a clone with writeable headroom */
665 new_skb = skb_realloc_headroom(skb,
666 sizeof(struct qeth_hdr));
667 if (!new_skb)
668 goto tx_drop;
669 hdr = (struct qeth_hdr *)skb_push(new_skb,
651 sizeof(struct qeth_hdr)); 670 sizeof(struct qeth_hdr));
652 qeth_l2_fill_header(card, hdr, new_skb, ipv, cast_type); 671 skb_set_mac_header(new_skb, sizeof(struct qeth_hdr));
672 qeth_l2_fill_header(card, hdr, new_skb, ipv, cast_type);
673 }
653 } 674 }
654 675
655 if (large_send == QETH_LARGE_SEND_EDDP) { 676 if (large_send == QETH_LARGE_SEND_EDDP) {
@@ -660,9 +681,13 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
660 goto tx_drop; 681 goto tx_drop;
661 } 682 }
662 } else { 683 } else {
663 elements = qeth_get_elements_no(card, (void *)hdr, new_skb, 0); 684 elements = qeth_get_elements_no(card, (void *)hdr, new_skb,
664 if (!elements) 685 elements_needed);
686 if (!elements) {
687 if (data_offset >= 0)
688 kmem_cache_free(qeth_core_header_cache, hdr);
665 goto tx_drop; 689 goto tx_drop;
690 }
666 } 691 }
667 692
668 if ((large_send == QETH_LARGE_SEND_NO) && 693 if ((large_send == QETH_LARGE_SEND_NO) &&
@@ -674,7 +699,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
674 elements, ctx); 699 elements, ctx);
675 else 700 else
676 rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr, 701 rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr,
677 elements, ctx); 702 elements, ctx, data_offset, hd_len);
678 if (!rc) { 703 if (!rc) {
679 card->stats.tx_packets++; 704 card->stats.tx_packets++;
680 card->stats.tx_bytes += tx_bytes; 705 card->stats.tx_bytes += tx_bytes;
@@ -701,6 +726,9 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
701 if (ctx != NULL) 726 if (ctx != NULL)
702 qeth_eddp_put_context(ctx); 727 qeth_eddp_put_context(ctx);
703 728
729 if (data_offset >= 0)
730 kmem_cache_free(qeth_core_header_cache, hdr);
731
704 if (rc == -EBUSY) { 732 if (rc == -EBUSY) {
705 if (new_skb != skb) 733 if (new_skb != skb)
706 dev_kfree_skb_any(new_skb); 734 dev_kfree_skb_any(new_skb);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 3e1d13857350..dd72c3c20165 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2604,6 +2604,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
2604 int tx_bytes = skb->len; 2604 int tx_bytes = skb->len;
2605 enum qeth_large_send_types large_send = QETH_LARGE_SEND_NO; 2605 enum qeth_large_send_types large_send = QETH_LARGE_SEND_NO;
2606 struct qeth_eddp_context *ctx = NULL; 2606 struct qeth_eddp_context *ctx = NULL;
2607 int data_offset = -1;
2607 2608
2608 if ((card->info.type == QETH_CARD_TYPE_IQD) && 2609 if ((card->info.type == QETH_CARD_TYPE_IQD) &&
2609 (skb->protocol != htons(ETH_P_IPV6)) && 2610 (skb->protocol != htons(ETH_P_IPV6)) &&
@@ -2624,14 +2625,28 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
2624 card->perf_stats.outbound_start_time = qeth_get_micros(); 2625 card->perf_stats.outbound_start_time = qeth_get_micros();
2625 } 2626 }
2626 2627
2627 /* create a clone with writeable headroom */ 2628 if (skb_is_gso(skb))
2628 new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr_tso) + 2629 large_send = card->options.large_send;
2629 VLAN_HLEN); 2630
2630 if (!new_skb) 2631 if ((card->info.type == QETH_CARD_TYPE_IQD) && (!large_send) &&
2631 goto tx_drop; 2632 (skb_shinfo(skb)->nr_frags == 0)) {
2633 new_skb = skb;
2634 data_offset = ETH_HLEN;
2635 hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
2636 if (!hdr)
2637 goto tx_drop;
2638 elements_needed++;
2639 } else {
2640 /* create a clone with writeable headroom */
2641 new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr_tso)
2642 + VLAN_HLEN);
2643 if (!new_skb)
2644 goto tx_drop;
2645 }
2632 2646
2633 if (card->info.type == QETH_CARD_TYPE_IQD) { 2647 if (card->info.type == QETH_CARD_TYPE_IQD) {
2634 skb_pull(new_skb, ETH_HLEN); 2648 if (data_offset < 0)
2649 skb_pull(new_skb, ETH_HLEN);
2635 } else { 2650 } else {
2636 if (new_skb->protocol == htons(ETH_P_IP)) { 2651 if (new_skb->protocol == htons(ETH_P_IP)) {
2637 if (card->dev->type == ARPHRD_IEEE802_TR) 2652 if (card->dev->type == ARPHRD_IEEE802_TR)
@@ -2657,9 +2672,6 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
2657 2672
2658 netif_stop_queue(dev); 2673 netif_stop_queue(dev);
2659 2674
2660 if (skb_is_gso(new_skb))
2661 large_send = card->options.large_send;
2662
2663 /* fix hardware limitation: as long as we do not have sbal 2675 /* fix hardware limitation: as long as we do not have sbal
2664 * chaining we can not send long frag lists so we temporary 2676 * chaining we can not send long frag lists so we temporary
2665 * switch to EDDP 2677 * switch to EDDP
@@ -2677,9 +2689,16 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
2677 qeth_tso_fill_header(card, hdr, new_skb); 2689 qeth_tso_fill_header(card, hdr, new_skb);
2678 elements_needed++; 2690 elements_needed++;
2679 } else { 2691 } else {
2680 hdr = (struct qeth_hdr *)skb_push(new_skb, 2692 if (data_offset < 0) {
2693 hdr = (struct qeth_hdr *)skb_push(new_skb,
2681 sizeof(struct qeth_hdr)); 2694 sizeof(struct qeth_hdr));
2682 qeth_l3_fill_header(card, hdr, new_skb, ipv, cast_type); 2695 qeth_l3_fill_header(card, hdr, new_skb, ipv,
2696 cast_type);
2697 } else {
2698 qeth_l3_fill_header(card, hdr, new_skb, ipv,
2699 cast_type);
2700 hdr->hdr.l3.length = new_skb->len - data_offset;
2701 }
2683 } 2702 }
2684 2703
2685 if (large_send == QETH_LARGE_SEND_EDDP) { 2704 if (large_send == QETH_LARGE_SEND_EDDP) {
@@ -2695,8 +2714,11 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
2695 } else { 2714 } else {
2696 int elems = qeth_get_elements_no(card, (void *)hdr, new_skb, 2715 int elems = qeth_get_elements_no(card, (void *)hdr, new_skb,
2697 elements_needed); 2716 elements_needed);
2698 if (!elems) 2717 if (!elems) {
2718 if (data_offset >= 0)
2719 kmem_cache_free(qeth_core_header_cache, hdr);
2699 goto tx_drop; 2720 goto tx_drop;
2721 }
2700 elements_needed += elems; 2722 elements_needed += elems;
2701 } 2723 }
2702 2724
@@ -2709,7 +2731,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
2709 elements_needed, ctx); 2731 elements_needed, ctx);
2710 else 2732 else
2711 rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr, 2733 rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr,
2712 elements_needed, ctx); 2734 elements_needed, ctx, data_offset, 0);
2713 2735
2714 if (!rc) { 2736 if (!rc) {
2715 card->stats.tx_packets++; 2737 card->stats.tx_packets++;
@@ -2737,6 +2759,9 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
2737 if (ctx != NULL) 2759 if (ctx != NULL)
2738 qeth_eddp_put_context(ctx); 2760 qeth_eddp_put_context(ctx);
2739 2761
2762 if (data_offset >= 0)
2763 kmem_cache_free(qeth_core_header_cache, hdr);
2764
2740 if (rc == -EBUSY) { 2765 if (rc == -EBUSY) {
2741 if (new_skb != skb) 2766 if (new_skb != skb)
2742 dev_kfree_skb_any(new_skb); 2767 dev_kfree_skb_any(new_skb);
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index a97f1ae11f78..342e12fb1c25 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1885,7 +1885,7 @@ static int serial8250_startup(struct uart_port *port)
1885 * the interrupt is enabled. Delays are necessary to 1885 * the interrupt is enabled. Delays are necessary to
1886 * allow register changes to become visible. 1886 * allow register changes to become visible.
1887 */ 1887 */
1888 spin_lock(&up->port.lock); 1888 spin_lock_irqsave(&up->port.lock, flags);
1889 if (up->port.flags & UPF_SHARE_IRQ) 1889 if (up->port.flags & UPF_SHARE_IRQ)
1890 disable_irq_nosync(up->port.irq); 1890 disable_irq_nosync(up->port.irq);
1891 1891
@@ -1901,7 +1901,7 @@ static int serial8250_startup(struct uart_port *port)
1901 1901
1902 if (up->port.flags & UPF_SHARE_IRQ) 1902 if (up->port.flags & UPF_SHARE_IRQ)
1903 enable_irq(up->port.irq); 1903 enable_irq(up->port.irq);
1904 spin_unlock(&up->port.lock); 1904 spin_unlock_irqrestore(&up->port.lock, flags);
1905 1905
1906 /* 1906 /*
1907 * If the interrupt is not reasserted, setup a timer to 1907 * If the interrupt is not reasserted, setup a timer to
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index 3a0bbbe17aa3..7e7383e890d8 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -42,7 +42,6 @@ obj-$(CONFIG_SERIAL_68328) += 68328serial.o
42obj-$(CONFIG_SERIAL_68360) += 68360serial.o 42obj-$(CONFIG_SERIAL_68360) += 68360serial.o
43obj-$(CONFIG_SERIAL_COLDFIRE) += mcfserial.o 43obj-$(CONFIG_SERIAL_COLDFIRE) += mcfserial.o
44obj-$(CONFIG_SERIAL_MCF) += mcf.o 44obj-$(CONFIG_SERIAL_MCF) += mcf.o
45obj-$(CONFIG_V850E_UART) += v850e_uart.o
46obj-$(CONFIG_SERIAL_PMACZILOG) += pmac_zilog.o 45obj-$(CONFIG_SERIAL_PMACZILOG) += pmac_zilog.o
47obj-$(CONFIG_SERIAL_LH7A40X) += serial_lh7a40x.o 46obj-$(CONFIG_SERIAL_LH7A40X) += serial_lh7a40x.o
48obj-$(CONFIG_SERIAL_DZ) += dz.o 47obj-$(CONFIG_SERIAL_DZ) += dz.o
diff --git a/drivers/serial/cpm_uart/cpm_uart.h b/drivers/serial/cpm_uart/cpm_uart.h
index 5c76e0ae0582..7274b527a3c1 100644
--- a/drivers/serial/cpm_uart/cpm_uart.h
+++ b/drivers/serial/cpm_uart/cpm_uart.h
@@ -50,6 +50,15 @@
50 50
51#define SCC_WAIT_CLOSING 100 51#define SCC_WAIT_CLOSING 100
52 52
53#define GPIO_CTS 0
54#define GPIO_RTS 1
55#define GPIO_DCD 2
56#define GPIO_DSR 3
57#define GPIO_DTR 4
58#define GPIO_RI 5
59
60#define NUM_GPIOS (GPIO_RI+1)
61
53struct uart_cpm_port { 62struct uart_cpm_port {
54 struct uart_port port; 63 struct uart_port port;
55 u16 rx_nrfifos; 64 u16 rx_nrfifos;
@@ -68,6 +77,7 @@ struct uart_cpm_port {
68 unsigned char *rx_buf; 77 unsigned char *rx_buf;
69 u32 flags; 78 u32 flags;
70 void (*set_lineif)(struct uart_cpm_port *); 79 void (*set_lineif)(struct uart_cpm_port *);
80 struct clk *clk;
71 u8 brg; 81 u8 brg;
72 uint dp_addr; 82 uint dp_addr;
73 void *mem_addr; 83 void *mem_addr;
@@ -82,6 +92,7 @@ struct uart_cpm_port {
82 int wait_closing; 92 int wait_closing;
83 /* value to combine with opcode to form cpm command */ 93 /* value to combine with opcode to form cpm command */
84 u32 command; 94 u32 command;
95 int gpios[NUM_GPIOS];
85}; 96};
86 97
87extern int cpm_uart_nr; 98extern int cpm_uart_nr;
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index a4f86927a74b..25efca5a7a1f 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -43,6 +43,9 @@
43#include <linux/dma-mapping.h> 43#include <linux/dma-mapping.h>
44#include <linux/fs_uart_pd.h> 44#include <linux/fs_uart_pd.h>
45#include <linux/of_platform.h> 45#include <linux/of_platform.h>
46#include <linux/gpio.h>
47#include <linux/of_gpio.h>
48#include <linux/clk.h>
46 49
47#include <asm/io.h> 50#include <asm/io.h>
48#include <asm/irq.h> 51#include <asm/irq.h>
@@ -96,13 +99,41 @@ static unsigned int cpm_uart_tx_empty(struct uart_port *port)
96 99
97static void cpm_uart_set_mctrl(struct uart_port *port, unsigned int mctrl) 100static void cpm_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
98{ 101{
99 /* Whee. Do nothing. */ 102 struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port;
103
104 if (pinfo->gpios[GPIO_RTS] >= 0)
105 gpio_set_value(pinfo->gpios[GPIO_RTS], !(mctrl & TIOCM_RTS));
106
107 if (pinfo->gpios[GPIO_DTR] >= 0)
108 gpio_set_value(pinfo->gpios[GPIO_DTR], !(mctrl & TIOCM_DTR));
100} 109}
101 110
102static unsigned int cpm_uart_get_mctrl(struct uart_port *port) 111static unsigned int cpm_uart_get_mctrl(struct uart_port *port)
103{ 112{
104 /* Whee. Do nothing. */ 113 struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port;
105 return TIOCM_CAR | TIOCM_DSR | TIOCM_CTS; 114 unsigned int mctrl = TIOCM_CTS | TIOCM_DSR | TIOCM_CAR;
115
116 if (pinfo->gpios[GPIO_CTS] >= 0) {
117 if (gpio_get_value(pinfo->gpios[GPIO_CTS]))
118 mctrl &= ~TIOCM_CTS;
119 }
120
121 if (pinfo->gpios[GPIO_DSR] >= 0) {
122 if (gpio_get_value(pinfo->gpios[GPIO_DSR]))
123 mctrl &= ~TIOCM_DSR;
124 }
125
126 if (pinfo->gpios[GPIO_DCD] >= 0) {
127 if (gpio_get_value(pinfo->gpios[GPIO_DCD]))
128 mctrl &= ~TIOCM_CAR;
129 }
130
131 if (pinfo->gpios[GPIO_RI] >= 0) {
132 if (!gpio_get_value(pinfo->gpios[GPIO_RI]))
133 mctrl |= TIOCM_RNG;
134 }
135
136 return mctrl;
106} 137}
107 138
108/* 139/*
@@ -566,7 +597,10 @@ static void cpm_uart_set_termios(struct uart_port *port,
566 out_be16(&sccp->scc_psmr, (sbits << 12) | scval); 597 out_be16(&sccp->scc_psmr, (sbits << 12) | scval);
567 } 598 }
568 599
569 cpm_set_brg(pinfo->brg - 1, baud); 600 if (pinfo->clk)
601 clk_set_rate(pinfo->clk, baud);
602 else
603 cpm_set_brg(pinfo->brg - 1, baud);
570 spin_unlock_irqrestore(&port->lock, flags); 604 spin_unlock_irqrestore(&port->lock, flags);
571} 605}
572 606
@@ -991,14 +1025,23 @@ static int cpm_uart_init_port(struct device_node *np,
991 void __iomem *mem, *pram; 1025 void __iomem *mem, *pram;
992 int len; 1026 int len;
993 int ret; 1027 int ret;
1028 int i;
994 1029
995 data = of_get_property(np, "fsl,cpm-brg", &len); 1030 data = of_get_property(np, "clock", NULL);
996 if (!data || len != 4) { 1031 if (data) {
997 printk(KERN_ERR "CPM UART %s has no/invalid " 1032 struct clk *clk = clk_get(NULL, (const char*)data);
998 "fsl,cpm-brg property.\n", np->name); 1033 if (!IS_ERR(clk))
999 return -EINVAL; 1034 pinfo->clk = clk;
1035 }
1036 if (!pinfo->clk) {
1037 data = of_get_property(np, "fsl,cpm-brg", &len);
1038 if (!data || len != 4) {
1039 printk(KERN_ERR "CPM UART %s has no/invalid "
1040 "fsl,cpm-brg property.\n", np->name);
1041 return -EINVAL;
1042 }
1043 pinfo->brg = *data;
1000 } 1044 }
1001 pinfo->brg = *data;
1002 1045
1003 data = of_get_property(np, "fsl,cpm-command", &len); 1046 data = of_get_property(np, "fsl,cpm-command", &len);
1004 if (!data || len != 4) { 1047 if (!data || len != 4) {
@@ -1050,6 +1093,9 @@ static int cpm_uart_init_port(struct device_node *np,
1050 goto out_pram; 1093 goto out_pram;
1051 } 1094 }
1052 1095
1096 for (i = 0; i < NUM_GPIOS; i++)
1097 pinfo->gpios[i] = of_get_gpio(np, i);
1098
1053 return cpm_uart_request_port(&pinfo->port); 1099 return cpm_uart_request_port(&pinfo->port);
1054 1100
1055out_pram: 1101out_pram:
diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h
index cd728df6a01a..8a0749e34ca3 100644
--- a/drivers/serial/sh-sci.h
+++ b/drivers/serial/sh-sci.h
@@ -451,19 +451,21 @@ SCIx_FNS(SCxSR, 0x08, 8, 0x10, 8, 0x08, 16, 0x10, 16, 0x04, 8)
451SCIx_FNS(SCxRDR, 0x0a, 8, 0x14, 8, 0x0A, 8, 0x14, 8, 0x05, 8) 451SCIx_FNS(SCxRDR, 0x0a, 8, 0x14, 8, 0x0A, 8, 0x14, 8, 0x05, 8)
452SCIF_FNS(SCFCR, 0x0c, 8, 0x18, 16) 452SCIF_FNS(SCFCR, 0x0c, 8, 0x18, 16)
453#if defined(CONFIG_CPU_SUBTYPE_SH7760) || \ 453#if defined(CONFIG_CPU_SUBTYPE_SH7760) || \
454 defined(CONFIG_CPU_SUBTYPE_SH7763) || \
455 defined(CONFIG_CPU_SUBTYPE_SH7780) || \ 454 defined(CONFIG_CPU_SUBTYPE_SH7780) || \
456 defined(CONFIG_CPU_SUBTYPE_SH7785) 455 defined(CONFIG_CPU_SUBTYPE_SH7785)
456SCIF_FNS(SCFDR, 0x0e, 16, 0x1C, 16)
457SCIF_FNS(SCTFDR, 0x0e, 16, 0x1C, 16) 457SCIF_FNS(SCTFDR, 0x0e, 16, 0x1C, 16)
458SCIF_FNS(SCRFDR, 0x0e, 16, 0x20, 16) 458SCIF_FNS(SCRFDR, 0x0e, 16, 0x20, 16)
459SCIF_FNS(SCSPTR, 0, 0, 0x24, 16) 459SCIF_FNS(SCSPTR, 0, 0, 0x24, 16)
460SCIF_FNS(SCLSR, 0, 0, 0x28, 16) 460SCIF_FNS(SCLSR, 0, 0, 0x28, 16)
461#if defined(CONFIG_CPU_SUBTYPE_SH7763) 461#elif defined(CONFIG_CPU_SUBTYPE_SH7763)
462/* SH7763 SCIF2 */
463SCIF_FNS(SCFDR, 0, 0, 0x1C, 16) 462SCIF_FNS(SCFDR, 0, 0, 0x1C, 16)
464SCIF_FNS(SCSPTR2, 0, 0, 0x20, 16) 463SCIF_FNS(SCSPTR2, 0, 0, 0x20, 16)
465SCIF_FNS(SCLSR2, 0, 0, 0x24, 16) 464SCIF_FNS(SCLSR2, 0, 0, 0x24, 16)
466#endif /* CONFIG_CPU_SUBTYPE_SH7763 */ 465SCIF_FNS(SCTFDR, 0x0e, 16, 0x1C, 16)
466SCIF_FNS(SCRFDR, 0x0e, 16, 0x20, 16)
467SCIF_FNS(SCSPTR, 0, 0, 0x24, 16)
468SCIF_FNS(SCLSR, 0, 0, 0x28, 16)
467#else 469#else
468SCIF_FNS(SCFDR, 0x0e, 16, 0x1C, 16) 470SCIF_FNS(SCFDR, 0x0e, 16, 0x1C, 16)
469#if defined(CONFIG_CPU_SUBTYPE_SH7722) 471#if defined(CONFIG_CPU_SUBTYPE_SH7722)
diff --git a/drivers/serial/v850e_uart.c b/drivers/serial/v850e_uart.c
deleted file mode 100644
index 5acf061b6cd2..000000000000
--- a/drivers/serial/v850e_uart.c
+++ /dev/null
@@ -1,548 +0,0 @@
1/*
2 * drivers/serial/v850e_uart.c -- Serial I/O using V850E on-chip UART or UARTB
3 *
4 * Copyright (C) 2001,02,03 NEC Electronics Corporation
5 * Copyright (C) 2001,02,03 Miles Bader <miles@gnu.org>
6 *
7 * This file is subject to the terms and conditions of the GNU General
8 * Public License. See the file COPYING in the main directory of this
9 * archive for more details.
10 *
11 * Written by Miles Bader <miles@gnu.org>
12 */
13
14/* This driver supports both the original V850E UART interface (called
15 merely `UART' in the docs) and the newer `UARTB' interface, which is
16 roughly a superset of the first one. The selection is made at
17 configure time -- if CONFIG_V850E_UARTB is defined, then UARTB is
18 presumed, otherwise the old UART -- as these are on-CPU UARTS, a system
19 can never have both.
20
21 The UARTB interface also has a 16-entry FIFO mode, which is not
22 yet supported by this driver. */
23
24#include <linux/kernel.h>
25#include <linux/init.h>
26#include <linux/module.h>
27#include <linux/console.h>
28#include <linux/tty.h>
29#include <linux/tty_flip.h>
30#include <linux/serial.h>
31#include <linux/serial_core.h>
32
33#include <asm/v850e_uart.h>
34
35/* Initial UART state. This may be overridden by machine-dependent headers. */
36#ifndef V850E_UART_INIT_BAUD
37#define V850E_UART_INIT_BAUD 115200
38#endif
39#ifndef V850E_UART_INIT_CFLAGS
40#define V850E_UART_INIT_CFLAGS (B115200 | CS8 | CREAD)
41#endif
42
43/* A string used for prefixing printed descriptions; since the same UART
44 macro is actually used on other chips than the V850E. This must be a
45 constant string. */
46#ifndef V850E_UART_CHIP_NAME
47#define V850E_UART_CHIP_NAME "V850E"
48#endif
49
50#define V850E_UART_MINOR_BASE 64 /* First tty minor number */
51
52
53/* Low-level UART functions. */
54
55/* Configure and turn on uart channel CHAN, using the termios `control
56 modes' bits in CFLAGS, and a baud-rate of BAUD. */
57void v850e_uart_configure (unsigned chan, unsigned cflags, unsigned baud)
58{
59 int flags;
60 v850e_uart_speed_t old_speed;
61 v850e_uart_config_t old_config;
62 v850e_uart_speed_t new_speed = v850e_uart_calc_speed (baud);
63 v850e_uart_config_t new_config = v850e_uart_calc_config (cflags);
64
65 /* Disable interrupts while we're twiddling the hardware. */
66 local_irq_save (flags);
67
68#ifdef V850E_UART_PRE_CONFIGURE
69 V850E_UART_PRE_CONFIGURE (chan, cflags, baud);
70#endif
71
72 old_config = V850E_UART_CONFIG (chan);
73 old_speed = v850e_uart_speed (chan);
74
75 if (! v850e_uart_speed_eq (old_speed, new_speed)) {
76 /* The baud rate has changed. First, disable the UART. */
77 V850E_UART_CONFIG (chan) = V850E_UART_CONFIG_FINI;
78 old_config = 0; /* Force the uart to be re-initialized. */
79
80 /* Reprogram the baud-rate generator. */
81 v850e_uart_set_speed (chan, new_speed);
82 }
83
84 if (! (old_config & V850E_UART_CONFIG_ENABLED)) {
85 /* If we are using the uart for the first time, start by
86 enabling it, which must be done before turning on any
87 other bits. */
88 V850E_UART_CONFIG (chan) = V850E_UART_CONFIG_INIT;
89 /* See the initial state. */
90 old_config = V850E_UART_CONFIG (chan);
91 }
92
93 if (new_config != old_config) {
94 /* Which of the TXE/RXE bits we'll temporarily turn off
95 before changing other control bits. */
96 unsigned temp_disable = 0;
97 /* Which of the TXE/RXE bits will be enabled. */
98 unsigned enable = 0;
99 unsigned changed_bits = new_config ^ old_config;
100
101 /* Which of RX/TX will be enabled in the new configuration. */
102 if (new_config & V850E_UART_CONFIG_RX_BITS)
103 enable |= (new_config & V850E_UART_CONFIG_RX_ENABLE);
104 if (new_config & V850E_UART_CONFIG_TX_BITS)
105 enable |= (new_config & V850E_UART_CONFIG_TX_ENABLE);
106
107 /* Figure out which of RX/TX needs to be disabled; note
108 that this will only happen if they're not already
109 disabled. */
110 if (changed_bits & V850E_UART_CONFIG_RX_BITS)
111 temp_disable
112 |= (old_config & V850E_UART_CONFIG_RX_ENABLE);
113 if (changed_bits & V850E_UART_CONFIG_TX_BITS)
114 temp_disable
115 |= (old_config & V850E_UART_CONFIG_TX_ENABLE);
116
117 /* We have to turn off RX and/or TX mode before changing
118 any associated control bits. */
119 if (temp_disable)
120 V850E_UART_CONFIG (chan) = old_config & ~temp_disable;
121
122 /* Write the new control bits, while RX/TX are disabled. */
123 if (changed_bits & ~enable)
124 V850E_UART_CONFIG (chan) = new_config & ~enable;
125
126 v850e_uart_config_delay (new_config, new_speed);
127
128 /* Write the final version, with enable bits turned on. */
129 V850E_UART_CONFIG (chan) = new_config;
130 }
131
132 local_irq_restore (flags);
133}
134
135
136/* Low-level console. */
137
138#ifdef CONFIG_V850E_UART_CONSOLE
139
140static void v850e_uart_cons_write (struct console *co,
141 const char *s, unsigned count)
142{
143 if (count > 0) {
144 unsigned chan = co->index;
145 unsigned irq = V850E_UART_TX_IRQ (chan);
146 int irq_was_enabled, irq_was_pending, flags;
147
148 /* We don't want to get `transmission completed'
149 interrupts, since we're busy-waiting, so we disable them
150 while sending (we don't disable interrupts entirely
151 because sending over a serial line is really slow). We
152 save the status of the tx interrupt and restore it when
153 we're done so that using printk doesn't interfere with
154 normal serial transmission (other than interleaving the
155 output, of course!). This should work correctly even if
156 this function is interrupted and the interrupt printks
157 something. */
158
159 /* Disable interrupts while fiddling with tx interrupt. */
160 local_irq_save (flags);
161 /* Get current tx interrupt status. */
162 irq_was_enabled = v850e_intc_irq_enabled (irq);
163 irq_was_pending = v850e_intc_irq_pending (irq);
164 /* Disable tx interrupt if necessary. */
165 if (irq_was_enabled)
166 v850e_intc_disable_irq (irq);
167 /* Turn interrupts back on. */
168 local_irq_restore (flags);
169
170 /* Send characters. */
171 while (count > 0) {
172 int ch = *s++;
173
174 if (ch == '\n') {
175 /* We don't have the benefit of a tty
176 driver, so translate NL into CR LF. */
177 v850e_uart_wait_for_xmit_ok (chan);
178 v850e_uart_putc (chan, '\r');
179 }
180
181 v850e_uart_wait_for_xmit_ok (chan);
182 v850e_uart_putc (chan, ch);
183
184 count--;
185 }
186
187 /* Restore saved tx interrupt status. */
188 if (irq_was_enabled) {
189 /* Wait for the last character we sent to be
190 completely transmitted (as we'll get an
191 interrupt interrupt at that point). */
192 v850e_uart_wait_for_xmit_done (chan);
193 /* Clear pending interrupts received due
194 to our transmission, unless there was already
195 one pending, in which case we want the
196 handler to be called. */
197 if (! irq_was_pending)
198 v850e_intc_clear_pending_irq (irq);
199 /* ... and then turn back on handling. */
200 v850e_intc_enable_irq (irq);
201 }
202 }
203}
204
205extern struct uart_driver v850e_uart_driver;
206static struct console v850e_uart_cons =
207{
208 .name = "ttyS",
209 .write = v850e_uart_cons_write,
210 .device = uart_console_device,
211 .flags = CON_PRINTBUFFER,
212 .cflag = V850E_UART_INIT_CFLAGS,
213 .index = -1,
214 .data = &v850e_uart_driver,
215};
216
217void v850e_uart_cons_init (unsigned chan)
218{
219 v850e_uart_configure (chan, V850E_UART_INIT_CFLAGS,
220 V850E_UART_INIT_BAUD);
221 v850e_uart_cons.index = chan;
222 register_console (&v850e_uart_cons);
223 printk ("Console: %s on-chip UART channel %d\n",
224 V850E_UART_CHIP_NAME, chan);
225}
226
227/* This is what the init code actually calls. */
228static int v850e_uart_console_init (void)
229{
230 v850e_uart_cons_init (V850E_UART_CONSOLE_CHANNEL);
231 return 0;
232}
233console_initcall(v850e_uart_console_init);
234
235#define V850E_UART_CONSOLE &v850e_uart_cons
236
237#else /* !CONFIG_V850E_UART_CONSOLE */
238#define V850E_UART_CONSOLE 0
239#endif /* CONFIG_V850E_UART_CONSOLE */
240
241/* TX/RX interrupt handlers. */
242
243static void v850e_uart_stop_tx (struct uart_port *port);
244
245void v850e_uart_tx (struct uart_port *port)
246{
247 struct circ_buf *xmit = &port->info->xmit;
248 int stopped = uart_tx_stopped (port);
249
250 if (v850e_uart_xmit_ok (port->line)) {
251 int tx_ch;
252
253 if (port->x_char) {
254 tx_ch = port->x_char;
255 port->x_char = 0;
256 } else if (!uart_circ_empty (xmit) && !stopped) {
257 tx_ch = xmit->buf[xmit->tail];
258 xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
259 } else
260 goto no_xmit;
261
262 v850e_uart_putc (port->line, tx_ch);
263 port->icount.tx++;
264
265 if (uart_circ_chars_pending (xmit) < WAKEUP_CHARS)
266 uart_write_wakeup (port);
267 }
268
269 no_xmit:
270 if (uart_circ_empty (xmit) || stopped)
271 v850e_uart_stop_tx (port, stopped);
272}
273
274static irqreturn_t v850e_uart_tx_irq(int irq, void *data)
275{
276 struct uart_port *port = data;
277 v850e_uart_tx (port);
278 return IRQ_HANDLED;
279}
280
281static irqreturn_t v850e_uart_rx_irq(int irq, void *data)
282{
283 struct uart_port *port = data;
284 unsigned ch_stat = TTY_NORMAL;
285 unsigned ch = v850e_uart_getc (port->line);
286 unsigned err = v850e_uart_err (port->line);
287
288 if (err) {
289 if (err & V850E_UART_ERR_OVERRUN) {
290 ch_stat = TTY_OVERRUN;
291 port->icount.overrun++;
292 } else if (err & V850E_UART_ERR_FRAME) {
293 ch_stat = TTY_FRAME;
294 port->icount.frame++;
295 } else if (err & V850E_UART_ERR_PARITY) {
296 ch_stat = TTY_PARITY;
297 port->icount.parity++;
298 }
299 }
300
301 port->icount.rx++;
302
303 tty_insert_flip_char (port->info->port.tty, ch, ch_stat);
304 tty_schedule_flip (port->info->port.tty);
305
306 return IRQ_HANDLED;
307}
308
309
310/* Control functions for the serial framework. */
311
312static void v850e_uart_nop (struct uart_port *port) { }
313static int v850e_uart_success (struct uart_port *port) { return 0; }
314
315static unsigned v850e_uart_tx_empty (struct uart_port *port)
316{
317 return TIOCSER_TEMT; /* Can't detect. */
318}
319
320static void v850e_uart_set_mctrl (struct uart_port *port, unsigned mctrl)
321{
322#ifdef V850E_UART_SET_RTS
323 V850E_UART_SET_RTS (port->line, (mctrl & TIOCM_RTS));
324#endif
325}
326
327static unsigned v850e_uart_get_mctrl (struct uart_port *port)
328{
329 /* We don't support DCD or DSR, so consider them permanently active. */
330 int mctrl = TIOCM_CAR | TIOCM_DSR;
331
332 /* We may support CTS. */
333#ifdef V850E_UART_CTS
334 mctrl |= V850E_UART_CTS(port->line) ? TIOCM_CTS : 0;
335#else
336 mctrl |= TIOCM_CTS;
337#endif
338
339 return mctrl;
340}
341
342static void v850e_uart_start_tx (struct uart_port *port)
343{
344 v850e_intc_disable_irq (V850E_UART_TX_IRQ (port->line));
345 v850e_uart_tx (port);
346 v850e_intc_enable_irq (V850E_UART_TX_IRQ (port->line));
347}
348
349static void v850e_uart_stop_tx (struct uart_port *port)
350{
351 v850e_intc_disable_irq (V850E_UART_TX_IRQ (port->line));
352}
353
354static void v850e_uart_start_rx (struct uart_port *port)
355{
356 v850e_intc_enable_irq (V850E_UART_RX_IRQ (port->line));
357}
358
359static void v850e_uart_stop_rx (struct uart_port *port)
360{
361 v850e_intc_disable_irq (V850E_UART_RX_IRQ (port->line));
362}
363
364static void v850e_uart_break_ctl (struct uart_port *port, int break_ctl)
365{
366 /* Umm, do this later. */
367}
368
369static int v850e_uart_startup (struct uart_port *port)
370{
371 int err;
372
373 /* Alloc RX irq. */
374 err = request_irq (V850E_UART_RX_IRQ (port->line), v850e_uart_rx_irq,
375 IRQF_DISABLED, "v850e_uart", port);
376 if (err)
377 return err;
378
379 /* Alloc TX irq. */
380 err = request_irq (V850E_UART_TX_IRQ (port->line), v850e_uart_tx_irq,
381 IRQF_DISABLED, "v850e_uart", port);
382 if (err) {
383 free_irq (V850E_UART_RX_IRQ (port->line), port);
384 return err;
385 }
386
387 v850e_uart_start_rx (port);
388
389 return 0;
390}
391
392static void v850e_uart_shutdown (struct uart_port *port)
393{
394 /* Disable port interrupts. */
395 free_irq (V850E_UART_TX_IRQ (port->line), port);
396 free_irq (V850E_UART_RX_IRQ (port->line), port);
397
398 /* Turn off xmit/recv enable bits. */
399 V850E_UART_CONFIG (port->line)
400 &= ~(V850E_UART_CONFIG_TX_ENABLE
401 | V850E_UART_CONFIG_RX_ENABLE);
402 /* Then reset the channel. */
403 V850E_UART_CONFIG (port->line) = 0;
404}
405
406static void
407v850e_uart_set_termios (struct uart_port *port, struct ktermios *termios,
408 struct ktermios *old)
409{
410 unsigned cflags = termios->c_cflag;
411
412 /* Restrict flags to legal values. */
413 if ((cflags & CSIZE) != CS7 && (cflags & CSIZE) != CS8)
414 /* The new value of CSIZE is invalid, use the old value. */
415 cflags = (cflags & ~CSIZE)
416 | (old ? (old->c_cflag & CSIZE) : CS8);
417
418 termios->c_cflag = cflags;
419
420 v850e_uart_configure (port->line, cflags,
421 uart_get_baud_rate (port, termios, old,
422 v850e_uart_min_baud(),
423 v850e_uart_max_baud()));
424}
425
426static const char *v850e_uart_type (struct uart_port *port)
427{
428 return port->type == PORT_V850E_UART ? "v850e_uart" : 0;
429}
430
431static void v850e_uart_config_port (struct uart_port *port, int flags)
432{
433 if (flags & UART_CONFIG_TYPE)
434 port->type = PORT_V850E_UART;
435}
436
437static int
438v850e_uart_verify_port (struct uart_port *port, struct serial_struct *ser)
439{
440 if (ser->type != PORT_UNKNOWN && ser->type != PORT_V850E_UART)
441 return -EINVAL;
442 if (ser->irq != V850E_UART_TX_IRQ (port->line))
443 return -EINVAL;
444 return 0;
445}
446
447static struct uart_ops v850e_uart_ops = {
448 .tx_empty = v850e_uart_tx_empty,
449 .get_mctrl = v850e_uart_get_mctrl,
450 .set_mctrl = v850e_uart_set_mctrl,
451 .start_tx = v850e_uart_start_tx,
452 .stop_tx = v850e_uart_stop_tx,
453 .stop_rx = v850e_uart_stop_rx,
454 .enable_ms = v850e_uart_nop,
455 .break_ctl = v850e_uart_break_ctl,
456 .startup = v850e_uart_startup,
457 .shutdown = v850e_uart_shutdown,
458 .set_termios = v850e_uart_set_termios,
459 .type = v850e_uart_type,
460 .release_port = v850e_uart_nop,
461 .request_port = v850e_uart_success,
462 .config_port = v850e_uart_config_port,
463 .verify_port = v850e_uart_verify_port,
464};
465
466/* Initialization and cleanup. */
467
468static struct uart_driver v850e_uart_driver = {
469 .owner = THIS_MODULE,
470 .driver_name = "v850e_uart",
471 .dev_name = "ttyS",
472 .major = TTY_MAJOR,
473 .minor = V850E_UART_MINOR_BASE,
474 .nr = V850E_UART_NUM_CHANNELS,
475 .cons = V850E_UART_CONSOLE,
476};
477
478
479static struct uart_port v850e_uart_ports[V850E_UART_NUM_CHANNELS];
480
481static int __init v850e_uart_init (void)
482{
483 int rval;
484
485 printk (KERN_INFO "%s on-chip UART\n", V850E_UART_CHIP_NAME);
486
487 rval = uart_register_driver (&v850e_uart_driver);
488 if (rval == 0) {
489 unsigned chan;
490
491 for (chan = 0; chan < V850E_UART_NUM_CHANNELS; chan++) {
492 struct uart_port *port = &v850e_uart_ports[chan];
493
494 memset (port, 0, sizeof *port);
495
496 port->ops = &v850e_uart_ops;
497 port->line = chan;
498 port->iotype = UPIO_MEM;
499 port->flags = UPF_BOOT_AUTOCONF;
500
501 /* We actually use multiple IRQs, but the serial
502 framework seems to mainly use this for
503 informational purposes anyway. Here we use the TX
504 irq. */
505 port->irq = V850E_UART_TX_IRQ (chan);
506
507 /* The serial framework doesn't really use these
508 membase/mapbase fields for anything useful, but
509 it requires that they be something non-zero to
510 consider the port `valid', and also uses them
511 for informational purposes. */
512 port->membase = (void *)V850E_UART_BASE_ADDR (chan);
513 port->mapbase = V850E_UART_BASE_ADDR (chan);
514
515 /* The framework insists on knowing the uart's master
516 clock freq, though it doesn't seem to do anything
517 useful for us with it. We must make it at least
518 higher than (the maximum baud rate * 16), otherwise
519 the framework will puke during its internal
520 calculations, and force the baud rate to be 9600.
521 To be accurate though, just repeat the calculation
522 we use when actually setting the speed. */
523 port->uartclk = v850e_uart_max_clock() * 16;
524
525 uart_add_one_port (&v850e_uart_driver, port);
526 }
527 }
528
529 return rval;
530}
531
532static void __exit v850e_uart_exit (void)
533{
534 unsigned chan;
535
536 for (chan = 0; chan < V850E_UART_NUM_CHANNELS; chan++)
537 uart_remove_one_port (&v850e_uart_driver,
538 &v850e_uart_ports[chan]);
539
540 uart_unregister_driver (&v850e_uart_driver);
541}
542
543module_init (v850e_uart_init);
544module_exit (v850e_uart_exit);
545
546MODULE_AUTHOR ("Miles Bader");
547MODULE_DESCRIPTION ("NEC " V850E_UART_CHIP_NAME " on-chip UART");
548MODULE_LICENSE ("GPL");
diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
index 617efb1640b1..be97789fa5fd 100644
--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -24,13 +24,12 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/maple.h> 25#include <linux/maple.h>
26#include <linux/dma-mapping.h> 26#include <linux/dma-mapping.h>
27#include <linux/delay.h>
27#include <asm/cacheflush.h> 28#include <asm/cacheflush.h>
28#include <asm/dma.h> 29#include <asm/dma.h>
29#include <asm/io.h> 30#include <asm/io.h>
30#include <asm/mach/dma.h> 31#include <mach/dma.h>
31#include <asm/mach/sysasic.h> 32#include <mach/sysasic.h>
32#include <asm/mach/maple.h>
33#include <linux/delay.h>
34 33
35MODULE_AUTHOR("Yaegshi Takeshi, Paul Mundt, M.R. Brown, Adrian McMenamin"); 34MODULE_AUTHOR("Yaegshi Takeshi, Paul Mundt, M.R. Brown, Adrian McMenamin");
36MODULE_DESCRIPTION("Maple bus driver for Dreamcast"); 35MODULE_DESCRIPTION("Maple bus driver for Dreamcast");
@@ -46,14 +45,15 @@ static DECLARE_WORK(maple_vblank_process, maple_vblank_handler);
46static LIST_HEAD(maple_waitq); 45static LIST_HEAD(maple_waitq);
47static LIST_HEAD(maple_sentq); 46static LIST_HEAD(maple_sentq);
48 47
49static DEFINE_MUTEX(maple_list_lock); 48/* mutex to protect queue of waiting packets */
49static DEFINE_MUTEX(maple_wlist_lock);
50 50
51static struct maple_driver maple_dummy_driver; 51static struct maple_driver maple_dummy_driver;
52static struct device maple_bus; 52static struct device maple_bus;
53static int subdevice_map[MAPLE_PORTS]; 53static int subdevice_map[MAPLE_PORTS];
54static unsigned long *maple_sendbuf, *maple_sendptr, *maple_lastptr; 54static unsigned long *maple_sendbuf, *maple_sendptr, *maple_lastptr;
55static unsigned long maple_pnp_time; 55static unsigned long maple_pnp_time;
56static int started, scanning, liststatus, fullscan; 56static int started, scanning, fullscan;
57static struct kmem_cache *maple_queue_cache; 57static struct kmem_cache *maple_queue_cache;
58 58
59struct maple_device_specify { 59struct maple_device_specify {
@@ -129,35 +129,124 @@ static void maple_release_device(struct device *dev)
129 kfree(mdev); 129 kfree(mdev);
130} 130}
131 131
132/** 132/*
133 * maple_add_packet - add a single instruction to the queue 133 * maple_add_packet - add a single instruction to the queue
134 * @mq: instruction to add to waiting queue 134 * @mdev - maple device
135 * @function - function on device being queried
136 * @command - maple command to add
137 * @length - length of command string (in 32 bit words)
138 * @data - remainder of command string
135 */ 139 */
136void maple_add_packet(struct mapleq *mq) 140int maple_add_packet(struct maple_device *mdev, u32 function, u32 command,
141 size_t length, void *data)
137{ 142{
138 mutex_lock(&maple_list_lock); 143 int locking, ret = 0;
139 list_add(&mq->list, &maple_waitq); 144 void *sendbuf = NULL;
140 mutex_unlock(&maple_list_lock); 145
146 mutex_lock(&maple_wlist_lock);
147 /* bounce if device already locked */
148 locking = mutex_is_locked(&mdev->mq->mutex);
149 if (locking) {
150 ret = -EBUSY;
151 goto out;
152 }
153
154 mutex_lock(&mdev->mq->mutex);
155
156 if (length) {
157 sendbuf = kmalloc(length * 4, GFP_KERNEL);
158 if (!sendbuf) {
159 mutex_unlock(&mdev->mq->mutex);
160 ret = -ENOMEM;
161 goto out;
162 }
163 ((__be32 *)sendbuf)[0] = cpu_to_be32(function);
164 }
165
166 mdev->mq->command = command;
167 mdev->mq->length = length;
168 if (length > 1)
169 memcpy(sendbuf + 4, data, (length - 1) * 4);
170 mdev->mq->sendbuf = sendbuf;
171
172 list_add(&mdev->mq->list, &maple_waitq);
173out:
174 mutex_unlock(&maple_wlist_lock);
175 return ret;
141} 176}
142EXPORT_SYMBOL_GPL(maple_add_packet); 177EXPORT_SYMBOL_GPL(maple_add_packet);
143 178
179/*
180 * maple_add_packet_sleeps - add a single instruction to the queue
181 * - waits for lock to be free
182 * @mdev - maple device
183 * @function - function on device being queried
184 * @command - maple command to add
185 * @length - length of command string (in 32 bit words)
186 * @data - remainder of command string
187 */
188int maple_add_packet_sleeps(struct maple_device *mdev, u32 function,
189 u32 command, size_t length, void *data)
190{
191 int locking, ret = 0;
192 void *sendbuf = NULL;
193
194 locking = mutex_lock_interruptible(&mdev->mq->mutex);
195 if (locking) {
196 ret = -EIO;
197 goto out;
198 }
199
200 if (length) {
201 sendbuf = kmalloc(length * 4, GFP_KERNEL);
202 if (!sendbuf) {
203 mutex_unlock(&mdev->mq->mutex);
204 ret = -ENOMEM;
205 goto out;
206 }
207 ((__be32 *)sendbuf)[0] = cpu_to_be32(function);
208 }
209
210 mdev->mq->command = command;
211 mdev->mq->length = length;
212 if (length > 1)
213 memcpy(sendbuf + 4, data, (length - 1) * 4);
214 mdev->mq->sendbuf = sendbuf;
215
216 mutex_lock(&maple_wlist_lock);
217 list_add(&mdev->mq->list, &maple_waitq);
218 mutex_unlock(&maple_wlist_lock);
219out:
220 return ret;
221}
222EXPORT_SYMBOL_GPL(maple_add_packet_sleeps);
223
144static struct mapleq *maple_allocq(struct maple_device *mdev) 224static struct mapleq *maple_allocq(struct maple_device *mdev)
145{ 225{
146 struct mapleq *mq; 226 struct mapleq *mq;
147 227
148 mq = kmalloc(sizeof(*mq), GFP_KERNEL); 228 mq = kmalloc(sizeof(*mq), GFP_KERNEL);
149 if (!mq) 229 if (!mq)
150 return NULL; 230 goto failed_nomem;
151 231
152 mq->dev = mdev; 232 mq->dev = mdev;
153 mq->recvbufdcsp = kmem_cache_zalloc(maple_queue_cache, GFP_KERNEL); 233 mq->recvbufdcsp = kmem_cache_zalloc(maple_queue_cache, GFP_KERNEL);
154 mq->recvbuf = (void *) P2SEGADDR(mq->recvbufdcsp); 234 mq->recvbuf = (void *) P2SEGADDR(mq->recvbufdcsp);
155 if (!mq->recvbuf) { 235 if (!mq->recvbuf)
156 kfree(mq); 236 goto failed_p2;
157 return NULL; 237 /*
158 } 238 * most devices do not need the mutex - but
239 * anything that injects block reads or writes
240 * will rely on it
241 */
242 mutex_init(&mq->mutex);
159 243
160 return mq; 244 return mq;
245
246failed_p2:
247 kfree(mq);
248failed_nomem:
249 return NULL;
161} 250}
162 251
163static struct maple_device *maple_alloc_dev(int port, int unit) 252static struct maple_device *maple_alloc_dev(int port, int unit)
@@ -178,7 +267,6 @@ static struct maple_device *maple_alloc_dev(int port, int unit)
178 } 267 }
179 mdev->dev.bus = &maple_bus_type; 268 mdev->dev.bus = &maple_bus_type;
180 mdev->dev.parent = &maple_bus; 269 mdev->dev.parent = &maple_bus;
181 mdev->function = 0;
182 return mdev; 270 return mdev;
183} 271}
184 272
@@ -216,7 +304,6 @@ static void maple_build_block(struct mapleq *mq)
216 *maple_sendptr++ = PHYSADDR(mq->recvbuf); 304 *maple_sendptr++ = PHYSADDR(mq->recvbuf);
217 *maple_sendptr++ = 305 *maple_sendptr++ =
218 mq->command | (to << 8) | (from << 16) | (len << 24); 306 mq->command | (to << 8) | (from << 16) | (len << 24);
219
220 while (len-- > 0) 307 while (len-- > 0)
221 *maple_sendptr++ = *lsendbuf++; 308 *maple_sendptr++ = *lsendbuf++;
222} 309}
@@ -224,22 +311,27 @@ static void maple_build_block(struct mapleq *mq)
224/* build up command queue */ 311/* build up command queue */
225static void maple_send(void) 312static void maple_send(void)
226{ 313{
227 int i; 314 int i, maple_packets = 0;
228 int maple_packets;
229 struct mapleq *mq, *nmq; 315 struct mapleq *mq, *nmq;
230 316
231 if (!list_empty(&maple_sentq)) 317 if (!list_empty(&maple_sentq))
232 return; 318 return;
233 if (list_empty(&maple_waitq) || !maple_dma_done()) 319 mutex_lock(&maple_wlist_lock);
320 if (list_empty(&maple_waitq) || !maple_dma_done()) {
321 mutex_unlock(&maple_wlist_lock);
234 return; 322 return;
235 maple_packets = 0; 323 }
236 maple_sendptr = maple_lastptr = maple_sendbuf; 324 mutex_unlock(&maple_wlist_lock);
325 maple_lastptr = maple_sendbuf;
326 maple_sendptr = maple_sendbuf;
327 mutex_lock(&maple_wlist_lock);
237 list_for_each_entry_safe(mq, nmq, &maple_waitq, list) { 328 list_for_each_entry_safe(mq, nmq, &maple_waitq, list) {
238 maple_build_block(mq); 329 maple_build_block(mq);
239 list_move(&mq->list, &maple_sentq); 330 list_move(&mq->list, &maple_sentq);
240 if (maple_packets++ > MAPLE_MAXPACKETS) 331 if (maple_packets++ > MAPLE_MAXPACKETS)
241 break; 332 break;
242 } 333 }
334 mutex_unlock(&maple_wlist_lock);
243 if (maple_packets > 0) { 335 if (maple_packets > 0) {
244 for (i = 0; i < (1 << MAPLE_DMA_PAGES); i++) 336 for (i = 0; i < (1 << MAPLE_DMA_PAGES); i++)
245 dma_cache_sync(0, maple_sendbuf + i * PAGE_SIZE, 337 dma_cache_sync(0, maple_sendbuf + i * PAGE_SIZE,
@@ -247,7 +339,8 @@ static void maple_send(void)
247 } 339 }
248} 340}
249 341
250static int attach_matching_maple_driver(struct device_driver *driver, 342/* check if there is a driver registered likely to match this device */
343static int check_matching_maple_driver(struct device_driver *driver,
251 void *devptr) 344 void *devptr)
252{ 345{
253 struct maple_driver *maple_drv; 346 struct maple_driver *maple_drv;
@@ -255,12 +348,8 @@ static int attach_matching_maple_driver(struct device_driver *driver,
255 348
256 mdev = devptr; 349 mdev = devptr;
257 maple_drv = to_maple_driver(driver); 350 maple_drv = to_maple_driver(driver);
258 if (mdev->devinfo.function & be32_to_cpu(maple_drv->function)) { 351 if (mdev->devinfo.function & cpu_to_be32(maple_drv->function))
259 if (maple_drv->connect(mdev) == 0) { 352 return 1;
260 mdev->driver = maple_drv;
261 return 1;
262 }
263 }
264 return 0; 353 return 0;
265} 354}
266 355
@@ -268,11 +357,6 @@ static void maple_detach_driver(struct maple_device *mdev)
268{ 357{
269 if (!mdev) 358 if (!mdev)
270 return; 359 return;
271 if (mdev->driver) {
272 if (mdev->driver->disconnect)
273 mdev->driver->disconnect(mdev);
274 }
275 mdev->driver = NULL;
276 device_unregister(&mdev->dev); 360 device_unregister(&mdev->dev);
277 mdev = NULL; 361 mdev = NULL;
278} 362}
@@ -328,8 +412,8 @@ static void maple_attach_driver(struct maple_device *mdev)
328 mdev->port, mdev->unit, function); 412 mdev->port, mdev->unit, function);
329 413
330 matched = 414 matched =
331 bus_for_each_drv(&maple_bus_type, NULL, mdev, 415 bus_for_each_drv(&maple_bus_type, NULL, mdev,
332 attach_matching_maple_driver); 416 check_matching_maple_driver);
333 417
334 if (matched == 0) { 418 if (matched == 0) {
335 /* Driver does not exist yet */ 419 /* Driver does not exist yet */
@@ -373,45 +457,48 @@ static int detach_maple_device(struct device *device, void *portptr)
373 457
374static int setup_maple_commands(struct device *device, void *ignored) 458static int setup_maple_commands(struct device *device, void *ignored)
375{ 459{
460 int add;
376 struct maple_device *maple_dev = to_maple_dev(device); 461 struct maple_device *maple_dev = to_maple_dev(device);
377 462
378 if ((maple_dev->interval > 0) 463 if ((maple_dev->interval > 0)
379 && time_after(jiffies, maple_dev->when)) { 464 && time_after(jiffies, maple_dev->when)) {
380 maple_dev->when = jiffies + maple_dev->interval; 465 /* bounce if we cannot lock */
381 maple_dev->mq->command = MAPLE_COMMAND_GETCOND; 466 add = maple_add_packet(maple_dev,
382 maple_dev->mq->sendbuf = &maple_dev->function; 467 be32_to_cpu(maple_dev->devinfo.function),
383 maple_dev->mq->length = 1; 468 MAPLE_COMMAND_GETCOND, 1, NULL);
384 maple_add_packet(maple_dev->mq); 469 if (!add)
385 liststatus++; 470 maple_dev->when = jiffies + maple_dev->interval;
386 } else { 471 } else {
387 if (time_after(jiffies, maple_pnp_time)) { 472 if (time_after(jiffies, maple_pnp_time))
388 maple_dev->mq->command = MAPLE_COMMAND_DEVINFO; 473 /* This will also bounce */
389 maple_dev->mq->length = 0; 474 maple_add_packet(maple_dev, 0,
390 maple_add_packet(maple_dev->mq); 475 MAPLE_COMMAND_DEVINFO, 0, NULL);
391 liststatus++;
392 }
393 } 476 }
394
395 return 0; 477 return 0;
396} 478}
397 479
398/* VBLANK bottom half - implemented via workqueue */ 480/* VBLANK bottom half - implemented via workqueue */
399static void maple_vblank_handler(struct work_struct *work) 481static void maple_vblank_handler(struct work_struct *work)
400{ 482{
401 if (!maple_dma_done()) 483 if (!list_empty(&maple_sentq) || !maple_dma_done())
402 return;
403 if (!list_empty(&maple_sentq))
404 return; 484 return;
485
405 ctrl_outl(0, MAPLE_ENABLE); 486 ctrl_outl(0, MAPLE_ENABLE);
406 liststatus = 0; 487
407 bus_for_each_dev(&maple_bus_type, NULL, NULL, 488 bus_for_each_dev(&maple_bus_type, NULL, NULL,
408 setup_maple_commands); 489 setup_maple_commands);
490
409 if (time_after(jiffies, maple_pnp_time)) 491 if (time_after(jiffies, maple_pnp_time))
410 maple_pnp_time = jiffies + MAPLE_PNP_INTERVAL; 492 maple_pnp_time = jiffies + MAPLE_PNP_INTERVAL;
411 if (liststatus && list_empty(&maple_sentq)) { 493
412 INIT_LIST_HEAD(&maple_sentq); 494 mutex_lock(&maple_wlist_lock);
495 if (!list_empty(&maple_waitq) && list_empty(&maple_sentq)) {
496 mutex_unlock(&maple_wlist_lock);
413 maple_send(); 497 maple_send();
498 } else {
499 mutex_unlock(&maple_wlist_lock);
414 } 500 }
501
415 maplebus_dma_reset(); 502 maplebus_dma_reset();
416} 503}
417 504
@@ -422,8 +509,8 @@ static void maple_map_subunits(struct maple_device *mdev, int submask)
422 struct maple_device *mdev_add; 509 struct maple_device *mdev_add;
423 struct maple_device_specify ds; 510 struct maple_device_specify ds;
424 511
512 ds.port = mdev->port;
425 for (k = 0; k < 5; k++) { 513 for (k = 0; k < 5; k++) {
426 ds.port = mdev->port;
427 ds.unit = k + 1; 514 ds.unit = k + 1;
428 retval = 515 retval =
429 bus_for_each_dev(&maple_bus_type, NULL, &ds, 516 bus_for_each_dev(&maple_bus_type, NULL, &ds,
@@ -437,9 +524,9 @@ static void maple_map_subunits(struct maple_device *mdev, int submask)
437 mdev_add = maple_alloc_dev(mdev->port, k + 1); 524 mdev_add = maple_alloc_dev(mdev->port, k + 1);
438 if (!mdev_add) 525 if (!mdev_add)
439 return; 526 return;
440 mdev_add->mq->command = MAPLE_COMMAND_DEVINFO; 527 maple_add_packet(mdev_add, 0, MAPLE_COMMAND_DEVINFO,
441 mdev_add->mq->length = 0; 528 0, NULL);
442 maple_add_packet(mdev_add->mq); 529 /* mark that we are checking sub devices */
443 scanning = 1; 530 scanning = 1;
444 } 531 }
445 submask = submask >> 1; 532 submask = submask >> 1;
@@ -505,6 +592,28 @@ static void maple_response_devinfo(struct maple_device *mdev,
505 } 592 }
506} 593}
507 594
595static void maple_port_rescan(void)
596{
597 int i;
598 struct maple_device *mdev;
599
600 fullscan = 1;
601 for (i = 0; i < MAPLE_PORTS; i++) {
602 if (checked[i] == false) {
603 fullscan = 0;
604 mdev = baseunits[i];
605 /*
606 * test lock in case scan has failed
607 * but device is still locked
608 */
609 if (mutex_is_locked(&mdev->mq->mutex))
610 mutex_unlock(&mdev->mq->mutex);
611 maple_add_packet(mdev, 0, MAPLE_COMMAND_DEVINFO,
612 0, NULL);
613 }
614 }
615}
616
508/* maple dma end bottom half - implemented via workqueue */ 617/* maple dma end bottom half - implemented via workqueue */
509static void maple_dma_handler(struct work_struct *work) 618static void maple_dma_handler(struct work_struct *work)
510{ 619{
@@ -512,7 +621,6 @@ static void maple_dma_handler(struct work_struct *work)
512 struct maple_device *dev; 621 struct maple_device *dev;
513 char *recvbuf; 622 char *recvbuf;
514 enum maple_code code; 623 enum maple_code code;
515 int i;
516 624
517 if (!maple_dma_done()) 625 if (!maple_dma_done())
518 return; 626 return;
@@ -522,6 +630,10 @@ static void maple_dma_handler(struct work_struct *work)
522 recvbuf = mq->recvbuf; 630 recvbuf = mq->recvbuf;
523 code = recvbuf[0]; 631 code = recvbuf[0];
524 dev = mq->dev; 632 dev = mq->dev;
633 kfree(mq->sendbuf);
634 mutex_unlock(&mq->mutex);
635 list_del_init(&mq->list);
636
525 switch (code) { 637 switch (code) {
526 case MAPLE_RESPONSE_NONE: 638 case MAPLE_RESPONSE_NONE:
527 maple_response_none(dev, mq); 639 maple_response_none(dev, mq);
@@ -558,26 +670,16 @@ static void maple_dma_handler(struct work_struct *work)
558 break; 670 break;
559 } 671 }
560 } 672 }
561 INIT_LIST_HEAD(&maple_sentq); 673 /* if scanning is 1 then we have subdevices to check */
562 if (scanning == 1) { 674 if (scanning == 1) {
563 maple_send(); 675 maple_send();
564 scanning = 2; 676 scanning = 2;
565 } else 677 } else
566 scanning = 0; 678 scanning = 0;
567 679 /*check if we have actually tested all ports yet */
568 if (!fullscan) { 680 if (!fullscan)
569 fullscan = 1; 681 maple_port_rescan();
570 for (i = 0; i < MAPLE_PORTS; i++) { 682 /* mark that we have been through the first scan */
571 if (checked[i] == false) {
572 fullscan = 0;
573 dev = baseunits[i];
574 dev->mq->command =
575 MAPLE_COMMAND_DEVINFO;
576 dev->mq->length = 0;
577 maple_add_packet(dev->mq);
578 }
579 }
580 }
581 if (started == 0) 683 if (started == 0)
582 started = 1; 684 started = 1;
583 } 685 }
@@ -631,7 +733,7 @@ static int match_maple_bus_driver(struct device *devptr,
631 if (maple_dev->devinfo.function == 0xFFFFFFFF) 733 if (maple_dev->devinfo.function == 0xFFFFFFFF)
632 return 0; 734 return 0;
633 else if (maple_dev->devinfo.function & 735 else if (maple_dev->devinfo.function &
634 be32_to_cpu(maple_drv->function)) 736 cpu_to_be32(maple_drv->function))
635 return 1; 737 return 1;
636 return 0; 738 return 0;
637} 739}
@@ -713,6 +815,9 @@ static int __init maple_bus_init(void)
713 if (!maple_queue_cache) 815 if (!maple_queue_cache)
714 goto cleanup_bothirqs; 816 goto cleanup_bothirqs;
715 817
818 INIT_LIST_HEAD(&maple_waitq);
819 INIT_LIST_HEAD(&maple_sentq);
820
716 /* setup maple ports */ 821 /* setup maple ports */
717 for (i = 0; i < MAPLE_PORTS; i++) { 822 for (i = 0; i < MAPLE_PORTS; i++) {
718 checked[i] = false; 823 checked[i] = false;
@@ -723,9 +828,7 @@ static int __init maple_bus_init(void)
723 maple_free_dev(mdev[i]); 828 maple_free_dev(mdev[i]);
724 goto cleanup_cache; 829 goto cleanup_cache;
725 } 830 }
726 mdev[i]->mq->command = MAPLE_COMMAND_DEVINFO; 831 maple_add_packet(mdev[i], 0, MAPLE_COMMAND_DEVINFO, 0, NULL);
727 mdev[i]->mq->length = 0;
728 maple_add_packet(mdev[i]->mq);
729 subdevice_map[i] = 0; 832 subdevice_map[i] = 0;
730 } 833 }
731 834
diff --git a/drivers/usb/gadget/m66592-udc.c b/drivers/usb/gadget/m66592-udc.c
index 8da7535c0c70..77b44fb48f0a 100644
--- a/drivers/usb/gadget/m66592-udc.c
+++ b/drivers/usb/gadget/m66592-udc.c
@@ -1593,7 +1593,7 @@ static int __init m66592_probe(struct platform_device *pdev)
1593 1593
1594 m66592->gadget.ops = &m66592_gadget_ops; 1594 m66592->gadget.ops = &m66592_gadget_ops;
1595 device_initialize(&m66592->gadget.dev); 1595 device_initialize(&m66592->gadget.dev);
1596 dev_set_name(&m66592->gadget, "gadget"); 1596 dev_set_name(&m66592->gadget.dev, "gadget");
1597 m66592->gadget.is_dualspeed = 1; 1597 m66592->gadget.is_dualspeed = 1;
1598 m66592->gadget.dev.parent = &pdev->dev; 1598 m66592->gadget.dev.parent = &pdev->dev;
1599 m66592->gadget.dev.dma_mask = pdev->dev.dma_mask; 1599 m66592->gadget.dev.dma_mask = pdev->dev.dma_mask;
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 0ebc1bfd2514..a6b55297a7fb 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -118,7 +118,6 @@ obj-$(CONFIG_FB_PS3) += ps3fb.o
118obj-$(CONFIG_FB_SM501) += sm501fb.o 118obj-$(CONFIG_FB_SM501) += sm501fb.o
119obj-$(CONFIG_FB_XILINX) += xilinxfb.o 119obj-$(CONFIG_FB_XILINX) += xilinxfb.o
120obj-$(CONFIG_FB_SH_MOBILE_LCDC) += sh_mobile_lcdcfb.o 120obj-$(CONFIG_FB_SH_MOBILE_LCDC) += sh_mobile_lcdcfb.o
121obj-$(CONFIG_FB_SH7343VOU) += sh7343_voufb.o
122obj-$(CONFIG_FB_OMAP) += omap/ 121obj-$(CONFIG_FB_OMAP) += omap/
123obj-$(CONFIG_XEN_FBDEV_FRONTEND) += xen-fbfront.o 122obj-$(CONFIG_XEN_FBDEV_FRONTEND) += xen-fbfront.o
124obj-$(CONFIG_FB_CARMINE) += carminefb.o 123obj-$(CONFIG_FB_CARMINE) += carminefb.o
diff --git a/drivers/video/arkfb.c b/drivers/video/arkfb.c
index 5001bd4ef466..38a1e8308c83 100644
--- a/drivers/video/arkfb.c
+++ b/drivers/video/arkfb.c
@@ -1126,11 +1126,8 @@ static int ark_pci_resume (struct pci_dev* dev)
1126 acquire_console_sem(); 1126 acquire_console_sem();
1127 mutex_lock(&(par->open_lock)); 1127 mutex_lock(&(par->open_lock));
1128 1128
1129 if (par->ref_count == 0) { 1129 if (par->ref_count == 0)
1130 mutex_unlock(&(par->open_lock)); 1130 goto fail;
1131 release_console_sem();
1132 return 0;
1133 }
1134 1131
1135 pci_set_power_state(dev, PCI_D0); 1132 pci_set_power_state(dev, PCI_D0);
1136 pci_restore_state(dev); 1133 pci_restore_state(dev);
@@ -1143,8 +1140,8 @@ static int ark_pci_resume (struct pci_dev* dev)
1143 arkfb_set_par(info); 1140 arkfb_set_par(info);
1144 fb_set_suspend(info, 0); 1141 fb_set_suspend(info, 0);
1145 1142
1146 mutex_unlock(&(par->open_lock));
1147fail: 1143fail:
1144 mutex_unlock(&(par->open_lock));
1148 release_console_sem(); 1145 release_console_sem();
1149 return 0; 1146 return 0;
1150} 1147}
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index fbea2bd129c7..6fa0b9d5559a 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -18,7 +18,7 @@
18#include <linux/fb.h> 18#include <linux/fb.h>
19#include <linux/backlight.h> 19#include <linux/backlight.h>
20 20
21#include <asm/cpu/dac.h> 21#include <cpu/dac.h>
22#include <asm/hp6xx.h> 22#include <asm/hp6xx.h>
23#include <asm/hd64461.h> 23#include <asm/hd64461.h>
24 24
diff --git a/drivers/video/backlight/platform_lcd.c b/drivers/video/backlight/platform_lcd.c
index 72d44dbfce82..738694d23889 100644
--- a/drivers/video/backlight/platform_lcd.c
+++ b/drivers/video/backlight/platform_lcd.c
@@ -92,7 +92,7 @@ static int __devinit platform_lcd_probe(struct platform_device *pdev)
92 92
93 plcd->us = dev; 93 plcd->us = dev;
94 plcd->pdata = pdata; 94 plcd->pdata = pdata;
95 plcd->lcd = lcd_device_register("platform-lcd", dev, 95 plcd->lcd = lcd_device_register(dev_name(dev), dev,
96 plcd, &platform_lcd_ops); 96 plcd, &platform_lcd_ops);
97 if (IS_ERR(plcd->lcd)) { 97 if (IS_ERR(plcd->lcd)) {
98 dev_err(dev, "cannot register lcd device\n"); 98 dev_err(dev, "cannot register lcd device\n");
@@ -101,6 +101,8 @@ static int __devinit platform_lcd_probe(struct platform_device *pdev)
101 } 101 }
102 102
103 platform_set_drvdata(pdev, plcd); 103 platform_set_drvdata(pdev, plcd);
104 platform_lcd_set_power(plcd->lcd, FB_BLANK_NORMAL);
105
104 return 0; 106 return 0;
105 107
106 err_mem: 108 err_mem:
diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c
index d7822af0e00a..ef7870f5ea08 100644
--- a/drivers/video/console/sticore.c
+++ b/drivers/video/console/sticore.c
@@ -24,6 +24,7 @@
24#include <asm/hardware.h> 24#include <asm/hardware.h>
25#include <asm/parisc-device.h> 25#include <asm/parisc-device.h>
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <asm/grfioctl.h>
27 28
28#include "../sticore.h" 29#include "../sticore.h"
29 30
@@ -725,6 +726,7 @@ static int __devinit sti_read_rom(int wordmode, struct sti_struct *sti,
725{ 726{
726 struct sti_cooked_rom *cooked; 727 struct sti_cooked_rom *cooked;
727 struct sti_rom *raw = NULL; 728 struct sti_rom *raw = NULL;
729 unsigned long revno;
728 730
729 cooked = kmalloc(sizeof *cooked, GFP_KERNEL); 731 cooked = kmalloc(sizeof *cooked, GFP_KERNEL);
730 if (!cooked) 732 if (!cooked)
@@ -767,9 +769,35 @@ static int __devinit sti_read_rom(int wordmode, struct sti_struct *sti,
767 sti->graphics_id[1] = raw->graphics_id[1]; 769 sti->graphics_id[1] = raw->graphics_id[1];
768 770
769 sti_dump_rom(raw); 771 sti_dump_rom(raw);
770 772
773 /* check if the ROM routines in this card are compatible */
774 if (wordmode || sti->graphics_id[1] != 0x09A02587)
775 goto ok;
776
777 revno = (raw->revno[0] << 8) | raw->revno[1];
778
779 switch (sti->graphics_id[0]) {
780 case S9000_ID_HCRX:
781 /* HyperA or HyperB ? */
782 if (revno == 0x8408 || revno == 0x840b)
783 goto msg_not_supported;
784 break;
785 case CRT_ID_THUNDER:
786 if (revno == 0x8509)
787 goto msg_not_supported;
788 break;
789 case CRT_ID_THUNDER2:
790 if (revno == 0x850c)
791 goto msg_not_supported;
792 }
793ok:
771 return 1; 794 return 1;
772 795
796msg_not_supported:
797 printk(KERN_ERR "Sorry, this GSC/STI card is not yet supported.\n");
798 printk(KERN_ERR "Please see http://parisc-linux.org/faq/"
799 "graphics-howto.html for more info.\n");
800 /* fall through */
773out_err: 801out_err:
774 kfree(raw); 802 kfree(raw);
775 kfree(cooked); 803 kfree(cooked);
diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c
index 2e552d5bbb5d..f89c3cce1e0c 100644
--- a/drivers/video/gbefb.c
+++ b/drivers/video/gbefb.c
@@ -87,6 +87,8 @@ static int gbe_revision;
87static int ypan, ywrap; 87static int ypan, ywrap;
88 88
89static uint32_t pseudo_palette[16]; 89static uint32_t pseudo_palette[16];
90static uint32_t gbe_cmap[256];
91static int gbe_turned_on; /* 0 turned off, 1 turned on */
90 92
91static char *mode_option __initdata = NULL; 93static char *mode_option __initdata = NULL;
92 94
@@ -208,6 +210,8 @@ void gbe_turn_off(void)
208 int i; 210 int i;
209 unsigned int val, x, y, vpixen_off; 211 unsigned int val, x, y, vpixen_off;
210 212
213 gbe_turned_on = 0;
214
211 /* check if pixel counter is on */ 215 /* check if pixel counter is on */
212 val = gbe->vt_xy; 216 val = gbe->vt_xy;
213 if (GET_GBE_FIELD(VT_XY, FREEZE, val) == 1) 217 if (GET_GBE_FIELD(VT_XY, FREEZE, val) == 1)
@@ -371,6 +375,22 @@ static void gbe_turn_on(void)
371 } 375 }
372 if (i == 10000) 376 if (i == 10000)
373 printk(KERN_ERR "gbefb: turn on DMA timed out\n"); 377 printk(KERN_ERR "gbefb: turn on DMA timed out\n");
378
379 gbe_turned_on = 1;
380}
381
382static void gbe_loadcmap(void)
383{
384 int i, j;
385
386 for (i = 0; i < 256; i++) {
387 for (j = 0; j < 1000 && gbe->cm_fifo >= 63; j++)
388 udelay(10);
389 if (j == 1000)
390 printk(KERN_ERR "gbefb: cmap FIFO timeout\n");
391
392 gbe->cmap[i] = gbe_cmap[i];
393 }
374} 394}
375 395
376/* 396/*
@@ -382,6 +402,7 @@ static int gbefb_blank(int blank, struct fb_info *info)
382 switch (blank) { 402 switch (blank) {
383 case FB_BLANK_UNBLANK: /* unblank */ 403 case FB_BLANK_UNBLANK: /* unblank */
384 gbe_turn_on(); 404 gbe_turn_on();
405 gbe_loadcmap();
385 break; 406 break;
386 407
387 case FB_BLANK_NORMAL: /* blank */ 408 case FB_BLANK_NORMAL: /* blank */
@@ -796,16 +817,10 @@ static int gbefb_set_par(struct fb_info *info)
796 gbe->gmap[i] = (i << 24) | (i << 16) | (i << 8); 817 gbe->gmap[i] = (i << 24) | (i << 16) | (i << 8);
797 818
798 /* Initialize the color map */ 819 /* Initialize the color map */
799 for (i = 0; i < 256; i++) { 820 for (i = 0; i < 256; i++)
800 int j; 821 gbe_cmap[i] = (i << 8) | (i << 16) | (i << 24);
801
802 for (j = 0; j < 1000 && gbe->cm_fifo >= 63; j++)
803 udelay(10);
804 if (j == 1000)
805 printk(KERN_ERR "gbefb: cmap FIFO timeout\n");
806 822
807 gbe->cmap[i] = (i << 8) | (i << 16) | (i << 24); 823 gbe_loadcmap();
808 }
809 824
810 return 0; 825 return 0;
811} 826}
@@ -855,14 +870,17 @@ static int gbefb_setcolreg(unsigned regno, unsigned red, unsigned green,
855 blue >>= 8; 870 blue >>= 8;
856 871
857 if (info->var.bits_per_pixel <= 8) { 872 if (info->var.bits_per_pixel <= 8) {
858 /* wait for the color map FIFO to have a free entry */ 873 gbe_cmap[regno] = (red << 24) | (green << 16) | (blue << 8);
859 for (i = 0; i < 1000 && gbe->cm_fifo >= 63; i++) 874 if (gbe_turned_on) {
860 udelay(10); 875 /* wait for the color map FIFO to have a free entry */
861 if (i == 1000) { 876 for (i = 0; i < 1000 && gbe->cm_fifo >= 63; i++)
862 printk(KERN_ERR "gbefb: cmap FIFO timeout\n"); 877 udelay(10);
863 return 1; 878 if (i == 1000) {
879 printk(KERN_ERR "gbefb: cmap FIFO timeout\n");
880 return 1;
881 }
882 gbe->cmap[regno] = gbe_cmap[regno];
864 } 883 }
865 gbe->cmap[regno] = (red << 24) | (green << 16) | (blue << 8);
866 } else if (regno < 16) { 884 } else if (regno < 16) {
867 switch (info->var.bits_per_pixel) { 885 switch (info->var.bits_per_pixel) {
868 case 15: 886 case 15:
diff --git a/drivers/video/hitfb.c b/drivers/video/hitfb.c
index 392a8be6aa76..e6467cf9f19f 100644
--- a/drivers/video/hitfb.c
+++ b/drivers/video/hitfb.c
@@ -27,7 +27,7 @@
27#include <asm/pgtable.h> 27#include <asm/pgtable.h>
28#include <asm/io.h> 28#include <asm/io.h>
29#include <asm/hd64461.h> 29#include <asm/hd64461.h>
30#include <asm/cpu/dac.h> 30#include <cpu/dac.h>
31 31
32#define WIDTH 640 32#define WIDTH 640
33 33
diff --git a/drivers/video/pvr2fb.c b/drivers/video/pvr2fb.c
index 8c863a7f654b..0a0fd48a8566 100644
--- a/drivers/video/pvr2fb.c
+++ b/drivers/video/pvr2fb.c
@@ -58,18 +58,18 @@
58 58
59#ifdef CONFIG_SH_DREAMCAST 59#ifdef CONFIG_SH_DREAMCAST
60#include <asm/machvec.h> 60#include <asm/machvec.h>
61#include <asm/mach/sysasic.h> 61#include <mach-dreamcast/mach/sysasic.h>
62#endif 62#endif
63 63
64#ifdef CONFIG_SH_DMA 64#ifdef CONFIG_SH_DMA
65#include <linux/pagemap.h> 65#include <linux/pagemap.h>
66#include <asm/mach/dma.h> 66#include <mach/dma.h>
67#include <asm/dma.h> 67#include <asm/dma.h>
68#endif 68#endif
69 69
70#ifdef CONFIG_SH_STORE_QUEUES 70#ifdef CONFIG_SH_STORE_QUEUES
71#include <linux/uaccess.h> 71#include <linux/uaccess.h>
72#include <asm/cpu/sq.h> 72#include <cpu/sq.h>
73#endif 73#endif
74 74
75#ifndef PCI_DEVICE_ID_NEC_NEON250 75#ifndef PCI_DEVICE_ID_NEC_NEON250
diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c
index 536ab11623f0..4a484ee98f8a 100644
--- a/drivers/video/vt8623fb.c
+++ b/drivers/video/vt8623fb.c
@@ -853,11 +853,8 @@ static int vt8623_pci_resume(struct pci_dev* dev)
853 acquire_console_sem(); 853 acquire_console_sem();
854 mutex_lock(&(par->open_lock)); 854 mutex_lock(&(par->open_lock));
855 855
856 if (par->ref_count == 0) { 856 if (par->ref_count == 0)
857 mutex_unlock(&(par->open_lock)); 857 goto fail;
858 release_console_sem();
859 return 0;
860 }
861 858
862 pci_set_power_state(dev, PCI_D0); 859 pci_set_power_state(dev, PCI_D0);
863 pci_restore_state(dev); 860 pci_restore_state(dev);
@@ -870,8 +867,8 @@ static int vt8623_pci_resume(struct pci_dev* dev)
870 vt8623fb_set_par(info); 867 vt8623fb_set_par(info);
871 fb_set_suspend(info, 0); 868 fb_set_suspend(info, 0);
872 869
873 mutex_unlock(&(par->open_lock));
874fail: 870fail:
871 mutex_unlock(&(par->open_lock));
875 release_console_sem(); 872 release_console_sem();
876 873
877 return 0; 874 return 0;
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index eaa3f2a79ff5..ccd6c530782d 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -420,7 +420,7 @@ static int __devinit detect_cru_service(void)
420static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason, 420static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason,
421 void *data) 421 void *data)
422{ 422{
423 static unsigned long rom_pl; 423 unsigned long rom_pl;
424 static int die_nmi_called; 424 static int die_nmi_called;
425 425
426 if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI) 426 if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI)