aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/Kconfig66
-rw-r--r--drivers/misc/Makefile6
-rw-r--r--drivers/misc/atmel_tclib.c161
-rw-r--r--drivers/misc/eeepc-laptop.c666
-rw-r--r--drivers/misc/enclosure.c202
-rw-r--r--drivers/misc/intel_menlow.c30
-rw-r--r--drivers/misc/kgdbts.c1090
-rw-r--r--drivers/misc/sgi-xp/Makefile11
-rw-r--r--drivers/misc/sgi-xp/xp.h463
-rw-r--r--drivers/misc/sgi-xp/xp_main.c279
-rw-r--r--drivers/misc/sgi-xp/xp_nofault.S35
-rw-r--r--drivers/misc/sgi-xp/xpc.h1187
-rw-r--r--drivers/misc/sgi-xp/xpc_channel.c2243
-rw-r--r--drivers/misc/sgi-xp/xpc_main.c1323
-rw-r--r--drivers/misc/sgi-xp/xpc_partition.c1174
-rw-r--r--drivers/misc/sgi-xp/xpnet.c677
-rw-r--r--drivers/misc/thinkpad_acpi.c765
17 files changed, 10088 insertions, 290 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 962817e49fba..636af2862308 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -22,6 +22,39 @@ config ATMEL_PWM
22 purposes including software controlled power-efficent backlights 22 purposes including software controlled power-efficent backlights
23 on LCD displays, motor control, and waveform generation. 23 on LCD displays, motor control, and waveform generation.
24 24
25config ATMEL_TCLIB
26 bool "Atmel AT32/AT91 Timer/Counter Library"
27 depends on (AVR32 || ARCH_AT91)
28 help
29 Select this if you want a library to allocate the Timer/Counter
30 blocks found on many Atmel processors. This facilitates using
31 these blocks by different drivers despite processor differences.
32
33config ATMEL_TCB_CLKSRC
34 bool "TC Block Clocksource"
35 depends on ATMEL_TCLIB && GENERIC_TIME
36 default y
37 help
38 Select this to get a high precision clocksource based on a
39 TC block with a 5+ MHz base clock rate. Two timer channels
40 are combined to make a single 32-bit timer.
41
42 When GENERIC_CLOCKEVENTS is defined, the third timer channel
43 may be used as a clock event device supporting oneshot mode
44 (delays of up to two seconds) based on the 32 KiHz clock.
45
46config ATMEL_TCB_CLKSRC_BLOCK
47 int
48 depends on ATMEL_TCB_CLKSRC
49 prompt "TC Block" if ARCH_AT91RM9200 || ARCH_AT91SAM9260 || CPU_AT32AP700X
50 default 0
51 range 0 1
52 help
53 Some chips provide more than one TC block, so you have the
54 choice of which one to use for the clock framework. The other
55 TC can be used for other purposes, such as PWM generation and
56 interval timing.
57
25config IBM_ASM 58config IBM_ASM
26 tristate "Device driver for IBM RSA service processor" 59 tristate "Device driver for IBM RSA service processor"
27 depends on X86 && PCI && INPUT && EXPERIMENTAL 60 depends on X86 && PCI && INPUT && EXPERIMENTAL
@@ -107,6 +140,7 @@ config ACER_WMI
107 depends on EXPERIMENTAL 140 depends on EXPERIMENTAL
108 depends on ACPI 141 depends on ACPI
109 depends on LEDS_CLASS 142 depends on LEDS_CLASS
143 depends on NEW_LEDS
110 depends on BACKLIGHT_CLASS_DEVICE 144 depends on BACKLIGHT_CLASS_DEVICE
111 depends on SERIO_I8042 145 depends on SERIO_I8042
112 select ACPI_WMI 146 select ACPI_WMI
@@ -127,6 +161,7 @@ config ASUS_LAPTOP
127 depends on ACPI 161 depends on ACPI
128 depends on EXPERIMENTAL && !ACPI_ASUS 162 depends on EXPERIMENTAL && !ACPI_ASUS
129 depends on LEDS_CLASS 163 depends on LEDS_CLASS
164 depends on NEW_LEDS
130 depends on BACKLIGHT_CLASS_DEVICE 165 depends on BACKLIGHT_CLASS_DEVICE
131 ---help--- 166 ---help---
132 This is the new Linux driver for Asus laptops. It may also support some 167 This is the new Linux driver for Asus laptops. It may also support some
@@ -208,10 +243,13 @@ config SONYPI_COMPAT
208config THINKPAD_ACPI 243config THINKPAD_ACPI
209 tristate "ThinkPad ACPI Laptop Extras" 244 tristate "ThinkPad ACPI Laptop Extras"
210 depends on X86 && ACPI 245 depends on X86 && ACPI
246 select BACKLIGHT_LCD_SUPPORT
211 select BACKLIGHT_CLASS_DEVICE 247 select BACKLIGHT_CLASS_DEVICE
212 select HWMON 248 select HWMON
213 select NVRAM 249 select NVRAM
214 depends on INPUT 250 select INPUT
251 select NEW_LEDS
252 select LEDS_CLASS
215 ---help--- 253 ---help---
216 This is a driver for the IBM and Lenovo ThinkPad laptops. It adds 254 This is a driver for the IBM and Lenovo ThinkPad laptops. It adds
217 support for Fn-Fx key combinations, Bluetooth control, video 255 support for Fn-Fx key combinations, Bluetooth control, video
@@ -311,6 +349,7 @@ config ATMEL_SSC
311config INTEL_MENLOW 349config INTEL_MENLOW
312 tristate "Thermal Management driver for Intel menlow platform" 350 tristate "Thermal Management driver for Intel menlow platform"
313 depends on ACPI_THERMAL 351 depends on ACPI_THERMAL
352 select THERMAL
314 depends on X86 353 depends on X86
315 ---help--- 354 ---help---
316 ACPI thermal management enhancement driver on 355 ACPI thermal management enhancement driver on
@@ -318,6 +357,19 @@ config INTEL_MENLOW
318 357
319 If unsure, say N. 358 If unsure, say N.
320 359
360config EEEPC_LAPTOP
361 tristate "Eee PC Hotkey Driver (EXPERIMENTAL)"
362 depends on X86
363 depends on ACPI
364 depends on BACKLIGHT_CLASS_DEVICE
365 depends on HWMON
366 depends on EXPERIMENTAL
367 ---help---
368 This driver supports the Fn-Fx keys on Eee PC laptops.
369 It also adds the ability to switch camera/wlan on/off.
370
371 If you have an Eee PC laptop, say Y or M here.
372
321config ENCLOSURE_SERVICES 373config ENCLOSURE_SERVICES
322 tristate "Enclosure Services" 374 tristate "Enclosure Services"
323 default n 375 default n
@@ -327,4 +379,16 @@ config ENCLOSURE_SERVICES
327 driver (SCSI/ATA) which supports enclosures 379 driver (SCSI/ATA) which supports enclosures
328 or a SCSI enclosure device (SES) to use these services. 380 or a SCSI enclosure device (SES) to use these services.
329 381
382config SGI_XP
383 tristate "Support communication between SGI SSIs"
384 depends on IA64_GENERIC || IA64_SGI_SN2
385 select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
386 select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
387 ---help---
388 An SGI machine can be divided into multiple Single System
389 Images which act independently of each other and have
390 hardware based memory protection from the others. Enabling
391 this feature will allow for direct communication between SSIs
392 based on a network adapter and DMA messaging.
393
330endif # MISC_DEVICES 394endif # MISC_DEVICES
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 3b12f5da8562..1952875a272e 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -7,9 +7,11 @@ obj-$(CONFIG_IBM_ASM) += ibmasm/
7obj-$(CONFIG_HDPU_FEATURES) += hdpuftrs/ 7obj-$(CONFIG_HDPU_FEATURES) += hdpuftrs/
8obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o 8obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o
9obj-$(CONFIG_ACER_WMI) += acer-wmi.o 9obj-$(CONFIG_ACER_WMI) += acer-wmi.o
10obj-$(CONFIG_ASUS_LAPTOP) += asus-laptop.o 10obj-$(CONFIG_ASUS_LAPTOP) += asus-laptop.o
11obj-$(CONFIG_EEEPC_LAPTOP) += eeepc-laptop.o
11obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o 12obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o
12obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o 13obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o
14obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o
13obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o 15obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o
14obj-$(CONFIG_LKDTM) += lkdtm.o 16obj-$(CONFIG_LKDTM) += lkdtm.o
15obj-$(CONFIG_TIFM_CORE) += tifm_core.o 17obj-$(CONFIG_TIFM_CORE) += tifm_core.o
@@ -22,3 +24,5 @@ obj-$(CONFIG_FUJITSU_LAPTOP) += fujitsu-laptop.o
22obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o 24obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o
23obj-$(CONFIG_INTEL_MENLOW) += intel_menlow.o 25obj-$(CONFIG_INTEL_MENLOW) += intel_menlow.o
24obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o 26obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
27obj-$(CONFIG_KGDB_TESTS) += kgdbts.o
28obj-$(CONFIG_SGI_XP) += sgi-xp/
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
new file mode 100644
index 000000000000..05dc8a31f280
--- /dev/null
+++ b/drivers/misc/atmel_tclib.c
@@ -0,0 +1,161 @@
1#include <linux/atmel_tc.h>
2#include <linux/clk.h>
3#include <linux/err.h>
4#include <linux/init.h>
5#include <linux/io.h>
6#include <linux/ioport.h>
7#include <linux/kernel.h>
8#include <linux/platform_device.h>
9
10/* Number of bytes to reserve for the iomem resource */
11#define ATMEL_TC_IOMEM_SIZE 256
12
13
14/*
15 * This is a thin library to solve the problem of how to portably allocate
16 * one of the TC blocks. For simplicity, it doesn't currently expect to
17 * share individual timers between different drivers.
18 */
19
20#if defined(CONFIG_AVR32)
21/* AVR32 has these divide PBB */
22const u8 atmel_tc_divisors[5] = { 0, 4, 8, 16, 32, };
23EXPORT_SYMBOL(atmel_tc_divisors);
24
25#elif defined(CONFIG_ARCH_AT91)
26/* AT91 has these divide MCK */
27const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
28EXPORT_SYMBOL(atmel_tc_divisors);
29
30#endif
31
32static DEFINE_SPINLOCK(tc_list_lock);
33static LIST_HEAD(tc_list);
34
35/**
36 * atmel_tc_alloc - allocate a specified TC block
37 * @block: which block to allocate
38 * @name: name to be associated with the iomem resource
39 *
40 * Caller allocates a block. If it is available, a pointer to a
41 * pre-initialized struct atmel_tc is returned. The caller can access
42 * the registers directly through the "regs" field.
43 */
44struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name)
45{
46 struct atmel_tc *tc;
47 struct platform_device *pdev = NULL;
48 struct resource *r;
49
50 spin_lock(&tc_list_lock);
51 list_for_each_entry(tc, &tc_list, node) {
52 if (tc->pdev->id == block) {
53 pdev = tc->pdev;
54 break;
55 }
56 }
57
58 if (!pdev || tc->iomem)
59 goto fail;
60
61 r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
62 r = request_mem_region(r->start, ATMEL_TC_IOMEM_SIZE, name);
63 if (!r)
64 goto fail;
65
66 tc->regs = ioremap(r->start, ATMEL_TC_IOMEM_SIZE);
67 if (!tc->regs)
68 goto fail_ioremap;
69
70 tc->iomem = r;
71
72out:
73 spin_unlock(&tc_list_lock);
74 return tc;
75
76fail_ioremap:
77 release_resource(r);
78fail:
79 tc = NULL;
80 goto out;
81}
82EXPORT_SYMBOL_GPL(atmel_tc_alloc);
83
84/**
85 * atmel_tc_free - release a specified TC block
86 * @tc: Timer/counter block that was returned by atmel_tc_alloc()
87 *
88 * This reverses the effect of atmel_tc_alloc(), unmapping the I/O
89 * registers, invalidating the resource returned by that routine and
90 * making the TC available to other drivers.
91 */
92void atmel_tc_free(struct atmel_tc *tc)
93{
94 spin_lock(&tc_list_lock);
95 if (tc->regs) {
96 iounmap(tc->regs);
97 release_resource(tc->iomem);
98 tc->regs = NULL;
99 tc->iomem = NULL;
100 }
101 spin_unlock(&tc_list_lock);
102}
103EXPORT_SYMBOL_GPL(atmel_tc_free);
104
105static int __init tc_probe(struct platform_device *pdev)
106{
107 struct atmel_tc *tc;
108 struct clk *clk;
109 int irq;
110
111 if (!platform_get_resource(pdev, IORESOURCE_MEM, 0))
112 return -EINVAL;
113
114 irq = platform_get_irq(pdev, 0);
115 if (irq < 0)
116 return -EINVAL;
117
118 tc = kzalloc(sizeof(struct atmel_tc), GFP_KERNEL);
119 if (!tc)
120 return -ENOMEM;
121
122 tc->pdev = pdev;
123
124 clk = clk_get(&pdev->dev, "t0_clk");
125 if (IS_ERR(clk)) {
126 kfree(tc);
127 return -EINVAL;
128 }
129
130 tc->clk[0] = clk;
131 tc->clk[1] = clk_get(&pdev->dev, "t1_clk");
132 if (IS_ERR(tc->clk[1]))
133 tc->clk[1] = clk;
134 tc->clk[2] = clk_get(&pdev->dev, "t2_clk");
135 if (IS_ERR(tc->clk[2]))
136 tc->clk[2] = clk;
137
138 tc->irq[0] = irq;
139 tc->irq[1] = platform_get_irq(pdev, 1);
140 if (tc->irq[1] < 0)
141 tc->irq[1] = irq;
142 tc->irq[2] = platform_get_irq(pdev, 2);
143 if (tc->irq[2] < 0)
144 tc->irq[2] = irq;
145
146 spin_lock(&tc_list_lock);
147 list_add_tail(&tc->node, &tc_list);
148 spin_unlock(&tc_list_lock);
149
150 return 0;
151}
152
153static struct platform_driver tc_driver = {
154 .driver.name = "atmel_tcb",
155};
156
157static int __init tc_init(void)
158{
159 return platform_driver_probe(&tc_driver, tc_probe);
160}
161arch_initcall(tc_init);
diff --git a/drivers/misc/eeepc-laptop.c b/drivers/misc/eeepc-laptop.c
new file mode 100644
index 000000000000..6d727609097f
--- /dev/null
+++ b/drivers/misc/eeepc-laptop.c
@@ -0,0 +1,666 @@
1/*
2 * eepc-laptop.c - Asus Eee PC extras
3 *
4 * Based on asus_acpi.c as patched for the Eee PC by Asus:
5 * ftp://ftp.asus.com/pub/ASUS/EeePC/701/ASUS_ACPI_071126.rar
6 * Based on eee.c from eeepc-linux
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 */
18
19#include <linux/kernel.h>
20#include <linux/module.h>
21#include <linux/init.h>
22#include <linux/types.h>
23#include <linux/platform_device.h>
24#include <linux/backlight.h>
25#include <linux/fb.h>
26#include <linux/hwmon.h>
27#include <linux/hwmon-sysfs.h>
28#include <acpi/acpi_drivers.h>
29#include <acpi/acpi_bus.h>
30#include <linux/uaccess.h>
31
32#define EEEPC_LAPTOP_VERSION "0.1"
33
34#define EEEPC_HOTK_NAME "Eee PC Hotkey Driver"
35#define EEEPC_HOTK_FILE "eeepc"
36#define EEEPC_HOTK_CLASS "hotkey"
37#define EEEPC_HOTK_DEVICE_NAME "Hotkey"
38#define EEEPC_HOTK_HID "ASUS010"
39
40#define EEEPC_LOG EEEPC_HOTK_FILE ": "
41#define EEEPC_ERR KERN_ERR EEEPC_LOG
42#define EEEPC_WARNING KERN_WARNING EEEPC_LOG
43#define EEEPC_NOTICE KERN_NOTICE EEEPC_LOG
44#define EEEPC_INFO KERN_INFO EEEPC_LOG
45
46/*
47 * Definitions for Asus EeePC
48 */
49#define NOTIFY_WLAN_ON 0x10
50#define NOTIFY_BRN_MIN 0x20
51#define NOTIFY_BRN_MAX 0x2f
52
53enum {
54 DISABLE_ASL_WLAN = 0x0001,
55 DISABLE_ASL_BLUETOOTH = 0x0002,
56 DISABLE_ASL_IRDA = 0x0004,
57 DISABLE_ASL_CAMERA = 0x0008,
58 DISABLE_ASL_TV = 0x0010,
59 DISABLE_ASL_GPS = 0x0020,
60 DISABLE_ASL_DISPLAYSWITCH = 0x0040,
61 DISABLE_ASL_MODEM = 0x0080,
62 DISABLE_ASL_CARDREADER = 0x0100
63};
64
65enum {
66 CM_ASL_WLAN = 0,
67 CM_ASL_BLUETOOTH,
68 CM_ASL_IRDA,
69 CM_ASL_1394,
70 CM_ASL_CAMERA,
71 CM_ASL_TV,
72 CM_ASL_GPS,
73 CM_ASL_DVDROM,
74 CM_ASL_DISPLAYSWITCH,
75 CM_ASL_PANELBRIGHT,
76 CM_ASL_BIOSFLASH,
77 CM_ASL_ACPIFLASH,
78 CM_ASL_CPUFV,
79 CM_ASL_CPUTEMPERATURE,
80 CM_ASL_FANCPU,
81 CM_ASL_FANCHASSIS,
82 CM_ASL_USBPORT1,
83 CM_ASL_USBPORT2,
84 CM_ASL_USBPORT3,
85 CM_ASL_MODEM,
86 CM_ASL_CARDREADER,
87 CM_ASL_LID
88};
89
90const char *cm_getv[] = {
91 "WLDG", NULL, NULL, NULL,
92 "CAMG", NULL, NULL, NULL,
93 NULL, "PBLG", NULL, NULL,
94 "CFVG", NULL, NULL, NULL,
95 "USBG", NULL, NULL, "MODG",
96 "CRDG", "LIDG"
97};
98
99const char *cm_setv[] = {
100 "WLDS", NULL, NULL, NULL,
101 "CAMS", NULL, NULL, NULL,
102 "SDSP", "PBLS", "HDPS", NULL,
103 "CFVS", NULL, NULL, NULL,
104 "USBG", NULL, NULL, "MODS",
105 "CRDS", NULL
106};
107
108#define EEEPC_EC "\\_SB.PCI0.SBRG.EC0."
109
110#define EEEPC_EC_FAN_PWM EEEPC_EC "SC02" /* Fan PWM duty cycle (%) */
111#define EEEPC_EC_SC02 0x63
112#define EEEPC_EC_FAN_HRPM EEEPC_EC "SC05" /* High byte, fan speed (RPM) */
113#define EEEPC_EC_FAN_LRPM EEEPC_EC "SC06" /* Low byte, fan speed (RPM) */
114#define EEEPC_EC_FAN_CTRL EEEPC_EC "SFB3" /* Byte containing SF25 */
115#define EEEPC_EC_SFB3 0xD3
116
117/*
118 * This is the main structure, we can use it to store useful information
119 * about the hotk device
120 */
121struct eeepc_hotk {
122 struct acpi_device *device; /* the device we are in */
123 acpi_handle handle; /* the handle of the hotk device */
124 u32 cm_supported; /* the control methods supported
125 by this BIOS */
126 uint init_flag; /* Init flags */
127 u16 event_count[128]; /* count for each event */
128};
129
130/* The actual device the driver binds to */
131static struct eeepc_hotk *ehotk;
132
133/* Platform device/driver */
134static struct platform_driver platform_driver = {
135 .driver = {
136 .name = EEEPC_HOTK_FILE,
137 .owner = THIS_MODULE,
138 }
139};
140
141static struct platform_device *platform_device;
142
143/*
144 * The hotkey driver declaration
145 */
146static int eeepc_hotk_add(struct acpi_device *device);
147static int eeepc_hotk_remove(struct acpi_device *device, int type);
148
149static const struct acpi_device_id eeepc_device_ids[] = {
150 {EEEPC_HOTK_HID, 0},
151 {"", 0},
152};
153MODULE_DEVICE_TABLE(acpi, eeepc_device_ids);
154
155static struct acpi_driver eeepc_hotk_driver = {
156 .name = EEEPC_HOTK_NAME,
157 .class = EEEPC_HOTK_CLASS,
158 .ids = eeepc_device_ids,
159 .ops = {
160 .add = eeepc_hotk_add,
161 .remove = eeepc_hotk_remove,
162 },
163};
164
165/* The backlight device /sys/class/backlight */
166static struct backlight_device *eeepc_backlight_device;
167
168/* The hwmon device */
169static struct device *eeepc_hwmon_device;
170
171/*
172 * The backlight class declaration
173 */
174static int read_brightness(struct backlight_device *bd);
175static int update_bl_status(struct backlight_device *bd);
176static struct backlight_ops eeepcbl_ops = {
177 .get_brightness = read_brightness,
178 .update_status = update_bl_status,
179};
180
181MODULE_AUTHOR("Corentin Chary, Eric Cooper");
182MODULE_DESCRIPTION(EEEPC_HOTK_NAME);
183MODULE_LICENSE("GPL");
184
185/*
186 * ACPI Helpers
187 */
188static int write_acpi_int(acpi_handle handle, const char *method, int val,
189 struct acpi_buffer *output)
190{
191 struct acpi_object_list params;
192 union acpi_object in_obj;
193 acpi_status status;
194
195 params.count = 1;
196 params.pointer = &in_obj;
197 in_obj.type = ACPI_TYPE_INTEGER;
198 in_obj.integer.value = val;
199
200 status = acpi_evaluate_object(handle, (char *)method, &params, output);
201 return (status == AE_OK ? 0 : -1);
202}
203
204static int read_acpi_int(acpi_handle handle, const char *method, int *val)
205{
206 acpi_status status;
207 ulong result;
208
209 status = acpi_evaluate_integer(handle, (char *)method, NULL, &result);
210 if (ACPI_FAILURE(status)) {
211 *val = -1;
212 return -1;
213 } else {
214 *val = result;
215 return 0;
216 }
217}
218
219static int set_acpi(int cm, int value)
220{
221 if (ehotk->cm_supported & (0x1 << cm)) {
222 const char *method = cm_setv[cm];
223 if (method == NULL)
224 return -ENODEV;
225 if (write_acpi_int(ehotk->handle, method, value, NULL))
226 printk(EEEPC_WARNING "Error writing %s\n", method);
227 }
228 return 0;
229}
230
231static int get_acpi(int cm)
232{
233 int value = -1;
234 if ((ehotk->cm_supported & (0x1 << cm))) {
235 const char *method = cm_getv[cm];
236 if (method == NULL)
237 return -ENODEV;
238 if (read_acpi_int(ehotk->handle, method, &value))
239 printk(EEEPC_WARNING "Error reading %s\n", method);
240 }
241 return value;
242}
243
244/*
245 * Backlight
246 */
247static int read_brightness(struct backlight_device *bd)
248{
249 return get_acpi(CM_ASL_PANELBRIGHT);
250}
251
252static int set_brightness(struct backlight_device *bd, int value)
253{
254 value = max(0, min(15, value));
255 return set_acpi(CM_ASL_PANELBRIGHT, value);
256}
257
258static int update_bl_status(struct backlight_device *bd)
259{
260 return set_brightness(bd, bd->props.brightness);
261}
262
263/*
264 * Sys helpers
265 */
266static int parse_arg(const char *buf, unsigned long count, int *val)
267{
268 if (!count)
269 return 0;
270 if (sscanf(buf, "%i", val) != 1)
271 return -EINVAL;
272 return count;
273}
274
275static ssize_t store_sys_acpi(int cm, const char *buf, size_t count)
276{
277 int rv, value;
278
279 rv = parse_arg(buf, count, &value);
280 if (rv > 0)
281 set_acpi(cm, value);
282 return rv;
283}
284
285static ssize_t show_sys_acpi(int cm, char *buf)
286{
287 return sprintf(buf, "%d\n", get_acpi(cm));
288}
289
290#define EEEPC_CREATE_DEVICE_ATTR(_name, _cm) \
291 static ssize_t show_##_name(struct device *dev, \
292 struct device_attribute *attr, \
293 char *buf) \
294 { \
295 return show_sys_acpi(_cm, buf); \
296 } \
297 static ssize_t store_##_name(struct device *dev, \
298 struct device_attribute *attr, \
299 const char *buf, size_t count) \
300 { \
301 return store_sys_acpi(_cm, buf, count); \
302 } \
303 static struct device_attribute dev_attr_##_name = { \
304 .attr = { \
305 .name = __stringify(_name), \
306 .mode = 0644 }, \
307 .show = show_##_name, \
308 .store = store_##_name, \
309 }
310
311EEEPC_CREATE_DEVICE_ATTR(camera, CM_ASL_CAMERA);
312EEEPC_CREATE_DEVICE_ATTR(cardr, CM_ASL_CARDREADER);
313EEEPC_CREATE_DEVICE_ATTR(disp, CM_ASL_DISPLAYSWITCH);
314EEEPC_CREATE_DEVICE_ATTR(wlan, CM_ASL_WLAN);
315
316static struct attribute *platform_attributes[] = {
317 &dev_attr_camera.attr,
318 &dev_attr_cardr.attr,
319 &dev_attr_disp.attr,
320 &dev_attr_wlan.attr,
321 NULL
322};
323
324static struct attribute_group platform_attribute_group = {
325 .attrs = platform_attributes
326};
327
328/*
329 * Hotkey functions
330 */
331static int eeepc_hotk_check(void)
332{
333 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
334 int result;
335
336 result = acpi_bus_get_status(ehotk->device);
337 if (result)
338 return result;
339 if (ehotk->device->status.present) {
340 if (write_acpi_int(ehotk->handle, "INIT", ehotk->init_flag,
341 &buffer)) {
342 printk(EEEPC_ERR "Hotkey initialization failed\n");
343 return -ENODEV;
344 } else {
345 printk(EEEPC_NOTICE "Hotkey init flags 0x%x\n",
346 ehotk->init_flag);
347 }
348 /* get control methods supported */
349 if (read_acpi_int(ehotk->handle, "CMSG"
350 , &ehotk->cm_supported)) {
351 printk(EEEPC_ERR
352 "Get control methods supported failed\n");
353 return -ENODEV;
354 } else {
355 printk(EEEPC_INFO
356 "Get control methods supported: 0x%x\n",
357 ehotk->cm_supported);
358 }
359 } else {
360 printk(EEEPC_ERR "Hotkey device not present, aborting\n");
361 return -EINVAL;
362 }
363 return 0;
364}
365
366static void notify_wlan(u32 *event)
367{
368 /* if DISABLE_ASL_WLAN is set, the notify code for fn+f2
369 will always be 0x10 */
370 if (ehotk->cm_supported & (0x1 << CM_ASL_WLAN)) {
371 const char *method = cm_getv[CM_ASL_WLAN];
372 int value;
373 if (read_acpi_int(ehotk->handle, method, &value))
374 printk(EEEPC_WARNING "Error reading %s\n",
375 method);
376 else if (value == 1)
377 *event = 0x11;
378 }
379}
380
381static void notify_brn(void)
382{
383 struct backlight_device *bd = eeepc_backlight_device;
384 bd->props.brightness = read_brightness(bd);
385}
386
387static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data)
388{
389 if (!ehotk)
390 return;
391 if (event == NOTIFY_WLAN_ON && (DISABLE_ASL_WLAN & ehotk->init_flag))
392 notify_wlan(&event);
393 if (event >= NOTIFY_BRN_MIN && event <= NOTIFY_BRN_MAX)
394 notify_brn();
395 acpi_bus_generate_proc_event(ehotk->device, event,
396 ehotk->event_count[event % 128]++);
397}
398
399static int eeepc_hotk_add(struct acpi_device *device)
400{
401 acpi_status status = AE_OK;
402 int result;
403
404 if (!device)
405 return -EINVAL;
406 printk(EEEPC_NOTICE EEEPC_HOTK_NAME "\n");
407 ehotk = kzalloc(sizeof(struct eeepc_hotk), GFP_KERNEL);
408 if (!ehotk)
409 return -ENOMEM;
410 ehotk->init_flag = DISABLE_ASL_WLAN | DISABLE_ASL_DISPLAYSWITCH;
411 ehotk->handle = device->handle;
412 strcpy(acpi_device_name(device), EEEPC_HOTK_DEVICE_NAME);
413 strcpy(acpi_device_class(device), EEEPC_HOTK_CLASS);
414 acpi_driver_data(device) = ehotk;
415 ehotk->device = device;
416 result = eeepc_hotk_check();
417 if (result)
418 goto end;
419 status = acpi_install_notify_handler(ehotk->handle, ACPI_SYSTEM_NOTIFY,
420 eeepc_hotk_notify, ehotk);
421 if (ACPI_FAILURE(status))
422 printk(EEEPC_ERR "Error installing notify handler\n");
423 end:
424 if (result) {
425 kfree(ehotk);
426 ehotk = NULL;
427 }
428 return result;
429}
430
431static int eeepc_hotk_remove(struct acpi_device *device, int type)
432{
433 acpi_status status = 0;
434
435 if (!device || !acpi_driver_data(device))
436 return -EINVAL;
437 status = acpi_remove_notify_handler(ehotk->handle, ACPI_SYSTEM_NOTIFY,
438 eeepc_hotk_notify);
439 if (ACPI_FAILURE(status))
440 printk(EEEPC_ERR "Error removing notify handler\n");
441 kfree(ehotk);
442 return 0;
443}
444
445/*
446 * Hwmon
447 */
448static int eeepc_get_fan_pwm(void)
449{
450 int value = 0;
451
452 read_acpi_int(NULL, EEEPC_EC_FAN_PWM, &value);
453 return (value);
454}
455
456static void eeepc_set_fan_pwm(int value)
457{
458 value = SENSORS_LIMIT(value, 0, 100);
459 ec_write(EEEPC_EC_SC02, value);
460}
461
462static int eeepc_get_fan_rpm(void)
463{
464 int high = 0;
465 int low = 0;
466
467 read_acpi_int(NULL, EEEPC_EC_FAN_HRPM, &high);
468 read_acpi_int(NULL, EEEPC_EC_FAN_LRPM, &low);
469 return (high << 8 | low);
470}
471
472static int eeepc_get_fan_ctrl(void)
473{
474 int value = 0;
475
476 read_acpi_int(NULL, EEEPC_EC_FAN_CTRL, &value);
477 return ((value & 0x02 ? 1 : 0));
478}
479
480static void eeepc_set_fan_ctrl(int manual)
481{
482 int value = 0;
483
484 read_acpi_int(NULL, EEEPC_EC_FAN_CTRL, &value);
485 if (manual)
486 value |= 0x02;
487 else
488 value &= ~0x02;
489 ec_write(EEEPC_EC_SFB3, value);
490}
491
492static ssize_t store_sys_hwmon(void (*set)(int), const char *buf, size_t count)
493{
494 int rv, value;
495
496 rv = parse_arg(buf, count, &value);
497 if (rv > 0)
498 set(value);
499 return rv;
500}
501
502static ssize_t show_sys_hwmon(int (*get)(void), char *buf)
503{
504 return sprintf(buf, "%d\n", get());
505}
506
507#define EEEPC_CREATE_SENSOR_ATTR(_name, _mode, _set, _get) \
508 static ssize_t show_##_name(struct device *dev, \
509 struct device_attribute *attr, \
510 char *buf) \
511 { \
512 return show_sys_hwmon(_set, buf); \
513 } \
514 static ssize_t store_##_name(struct device *dev, \
515 struct device_attribute *attr, \
516 const char *buf, size_t count) \
517 { \
518 return store_sys_hwmon(_get, buf, count); \
519 } \
520 static SENSOR_DEVICE_ATTR(_name, _mode, show_##_name, store_##_name, 0);
521
522EEEPC_CREATE_SENSOR_ATTR(fan1_input, S_IRUGO, eeepc_get_fan_rpm, NULL);
523EEEPC_CREATE_SENSOR_ATTR(fan1_pwm, S_IRUGO | S_IWUSR,
524 eeepc_get_fan_pwm, eeepc_set_fan_pwm);
525EEEPC_CREATE_SENSOR_ATTR(pwm1_enable, S_IRUGO | S_IWUSR,
526 eeepc_get_fan_ctrl, eeepc_set_fan_ctrl);
527
528static struct attribute *hwmon_attributes[] = {
529 &sensor_dev_attr_fan1_pwm.dev_attr.attr,
530 &sensor_dev_attr_fan1_input.dev_attr.attr,
531 &sensor_dev_attr_pwm1_enable.dev_attr.attr,
532 NULL
533};
534
535static struct attribute_group hwmon_attribute_group = {
536 .attrs = hwmon_attributes
537};
538
539/*
540 * exit/init
541 */
542static void eeepc_backlight_exit(void)
543{
544 if (eeepc_backlight_device)
545 backlight_device_unregister(eeepc_backlight_device);
546 eeepc_backlight_device = NULL;
547}
548
549static void eeepc_hwmon_exit(void)
550{
551 struct device *hwmon;
552
553 hwmon = eeepc_hwmon_device;
554 if (!hwmon)
555 return ;
556 hwmon_device_unregister(hwmon);
557 sysfs_remove_group(&hwmon->kobj,
558 &hwmon_attribute_group);
559 eeepc_hwmon_device = NULL;
560}
561
562static void __exit eeepc_laptop_exit(void)
563{
564 eeepc_backlight_exit();
565 eeepc_hwmon_exit();
566 acpi_bus_unregister_driver(&eeepc_hotk_driver);
567 sysfs_remove_group(&platform_device->dev.kobj,
568 &platform_attribute_group);
569 platform_device_unregister(platform_device);
570 platform_driver_unregister(&platform_driver);
571}
572
573static int eeepc_backlight_init(struct device *dev)
574{
575 struct backlight_device *bd;
576
577 bd = backlight_device_register(EEEPC_HOTK_FILE, dev,
578 NULL, &eeepcbl_ops);
579 if (IS_ERR(bd)) {
580 printk(EEEPC_ERR
581 "Could not register eeepc backlight device\n");
582 eeepc_backlight_device = NULL;
583 return PTR_ERR(bd);
584 }
585 eeepc_backlight_device = bd;
586 bd->props.max_brightness = 15;
587 bd->props.brightness = read_brightness(NULL);
588 bd->props.power = FB_BLANK_UNBLANK;
589 backlight_update_status(bd);
590 return 0;
591}
592
593static int eeepc_hwmon_init(struct device *dev)
594{
595 struct device *hwmon;
596 int result;
597
598 hwmon = hwmon_device_register(dev);
599 if (IS_ERR(hwmon)) {
600 printk(EEEPC_ERR
601 "Could not register eeepc hwmon device\n");
602 eeepc_hwmon_device = NULL;
603 return PTR_ERR(hwmon);
604 }
605 eeepc_hwmon_device = hwmon;
606 result = sysfs_create_group(&hwmon->kobj,
607 &hwmon_attribute_group);
608 if (result)
609 eeepc_hwmon_exit();
610 return result;
611}
612
613static int __init eeepc_laptop_init(void)
614{
615 struct device *dev;
616 int result;
617
618 if (acpi_disabled)
619 return -ENODEV;
620 result = acpi_bus_register_driver(&eeepc_hotk_driver);
621 if (result < 0)
622 return result;
623 if (!ehotk) {
624 acpi_bus_unregister_driver(&eeepc_hotk_driver);
625 return -ENODEV;
626 }
627 dev = acpi_get_physical_device(ehotk->device->handle);
628 result = eeepc_backlight_init(dev);
629 if (result)
630 goto fail_backlight;
631 result = eeepc_hwmon_init(dev);
632 if (result)
633 goto fail_hwmon;
634 /* Register platform stuff */
635 result = platform_driver_register(&platform_driver);
636 if (result)
637 goto fail_platform_driver;
638 platform_device = platform_device_alloc(EEEPC_HOTK_FILE, -1);
639 if (!platform_device) {
640 result = -ENOMEM;
641 goto fail_platform_device1;
642 }
643 result = platform_device_add(platform_device);
644 if (result)
645 goto fail_platform_device2;
646 result = sysfs_create_group(&platform_device->dev.kobj,
647 &platform_attribute_group);
648 if (result)
649 goto fail_sysfs;
650 return 0;
651fail_sysfs:
652 platform_device_del(platform_device);
653fail_platform_device2:
654 platform_device_put(platform_device);
655fail_platform_device1:
656 platform_driver_unregister(&platform_driver);
657fail_platform_driver:
658 eeepc_hwmon_exit();
659fail_hwmon:
660 eeepc_backlight_exit();
661fail_backlight:
662 return result;
663}
664
665module_init(eeepc_laptop_init);
666module_exit(eeepc_laptop_exit);
diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index 6fcb0e96adf4..0736cff9d97a 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -31,7 +31,6 @@
31static LIST_HEAD(container_list); 31static LIST_HEAD(container_list);
32static DEFINE_MUTEX(container_list_lock); 32static DEFINE_MUTEX(container_list_lock);
33static struct class enclosure_class; 33static struct class enclosure_class;
34static struct class enclosure_component_class;
35 34
36/** 35/**
37 * enclosure_find - find an enclosure given a device 36 * enclosure_find - find an enclosure given a device
@@ -40,16 +39,16 @@ static struct class enclosure_component_class;
40 * Looks through the list of registered enclosures to see 39 * Looks through the list of registered enclosures to see
41 * if it can find a match for a device. Returns NULL if no 40 * if it can find a match for a device. Returns NULL if no
42 * enclosure is found. Obtains a reference to the enclosure class 41 * enclosure is found. Obtains a reference to the enclosure class
43 * device which must be released with class_device_put(). 42 * device which must be released with device_put().
44 */ 43 */
45struct enclosure_device *enclosure_find(struct device *dev) 44struct enclosure_device *enclosure_find(struct device *dev)
46{ 45{
47 struct enclosure_device *edev = NULL; 46 struct enclosure_device *edev;
48 47
49 mutex_lock(&container_list_lock); 48 mutex_lock(&container_list_lock);
50 list_for_each_entry(edev, &container_list, node) { 49 list_for_each_entry(edev, &container_list, node) {
51 if (edev->cdev.dev == dev) { 50 if (edev->edev.parent == dev) {
52 class_device_get(&edev->cdev); 51 get_device(&edev->edev);
53 mutex_unlock(&container_list_lock); 52 mutex_unlock(&container_list_lock);
54 return edev; 53 return edev;
55 } 54 }
@@ -117,11 +116,11 @@ enclosure_register(struct device *dev, const char *name, int components,
117 116
118 edev->components = components; 117 edev->components = components;
119 118
120 edev->cdev.class = &enclosure_class; 119 edev->edev.class = &enclosure_class;
121 edev->cdev.dev = get_device(dev); 120 edev->edev.parent = get_device(dev);
122 edev->cb = cb; 121 edev->cb = cb;
123 snprintf(edev->cdev.class_id, BUS_ID_SIZE, "%s", name); 122 snprintf(edev->edev.bus_id, BUS_ID_SIZE, "%s", name);
124 err = class_device_register(&edev->cdev); 123 err = device_register(&edev->edev);
125 if (err) 124 if (err)
126 goto err; 125 goto err;
127 126
@@ -135,7 +134,7 @@ enclosure_register(struct device *dev, const char *name, int components,
135 return edev; 134 return edev;
136 135
137 err: 136 err:
138 put_device(edev->cdev.dev); 137 put_device(edev->edev.parent);
139 kfree(edev); 138 kfree(edev);
140 return ERR_PTR(err); 139 return ERR_PTR(err);
141} 140}
@@ -158,29 +157,69 @@ void enclosure_unregister(struct enclosure_device *edev)
158 157
159 for (i = 0; i < edev->components; i++) 158 for (i = 0; i < edev->components; i++)
160 if (edev->component[i].number != -1) 159 if (edev->component[i].number != -1)
161 class_device_unregister(&edev->component[i].cdev); 160 device_unregister(&edev->component[i].cdev);
162 161
163 /* prevent any callbacks into service user */ 162 /* prevent any callbacks into service user */
164 edev->cb = &enclosure_null_callbacks; 163 edev->cb = &enclosure_null_callbacks;
165 class_device_unregister(&edev->cdev); 164 device_unregister(&edev->edev);
166} 165}
167EXPORT_SYMBOL_GPL(enclosure_unregister); 166EXPORT_SYMBOL_GPL(enclosure_unregister);
168 167
169static void enclosure_release(struct class_device *cdev) 168#define ENCLOSURE_NAME_SIZE 64
169
170static void enclosure_link_name(struct enclosure_component *cdev, char *name)
171{
172 strcpy(name, "enclosure_device:");
173 strcat(name, cdev->cdev.bus_id);
174}
175
176static void enclosure_remove_links(struct enclosure_component *cdev)
177{
178 char name[ENCLOSURE_NAME_SIZE];
179
180 enclosure_link_name(cdev, name);
181 sysfs_remove_link(&cdev->dev->kobj, name);
182 sysfs_remove_link(&cdev->cdev.kobj, "device");
183}
184
185static int enclosure_add_links(struct enclosure_component *cdev)
186{
187 int error;
188 char name[ENCLOSURE_NAME_SIZE];
189
190 error = sysfs_create_link(&cdev->cdev.kobj, &cdev->dev->kobj, "device");
191 if (error)
192 return error;
193
194 enclosure_link_name(cdev, name);
195 error = sysfs_create_link(&cdev->dev->kobj, &cdev->cdev.kobj, name);
196 if (error)
197 sysfs_remove_link(&cdev->cdev.kobj, "device");
198
199 return error;
200}
201
202static void enclosure_release(struct device *cdev)
170{ 203{
171 struct enclosure_device *edev = to_enclosure_device(cdev); 204 struct enclosure_device *edev = to_enclosure_device(cdev);
172 205
173 put_device(cdev->dev); 206 put_device(cdev->parent);
174 kfree(edev); 207 kfree(edev);
175} 208}
176 209
177static void enclosure_component_release(struct class_device *cdev) 210static void enclosure_component_release(struct device *dev)
178{ 211{
179 if (cdev->dev) 212 struct enclosure_component *cdev = to_enclosure_component(dev);
213
214 if (cdev->dev) {
215 enclosure_remove_links(cdev);
180 put_device(cdev->dev); 216 put_device(cdev->dev);
181 class_device_put(cdev->parent); 217 }
218 put_device(dev->parent);
182} 219}
183 220
221static struct attribute_group *enclosure_groups[];
222
184/** 223/**
185 * enclosure_component_register - add a particular component to an enclosure 224 * enclosure_component_register - add a particular component to an enclosure
186 * @edev: the enclosure to add the component 225 * @edev: the enclosure to add the component
@@ -201,7 +240,7 @@ enclosure_component_register(struct enclosure_device *edev,
201 const char *name) 240 const char *name)
202{ 241{
203 struct enclosure_component *ecomp; 242 struct enclosure_component *ecomp;
204 struct class_device *cdev; 243 struct device *cdev;
205 int err; 244 int err;
206 245
207 if (number >= edev->components) 246 if (number >= edev->components)
@@ -215,14 +254,16 @@ enclosure_component_register(struct enclosure_device *edev,
215 ecomp->type = type; 254 ecomp->type = type;
216 ecomp->number = number; 255 ecomp->number = number;
217 cdev = &ecomp->cdev; 256 cdev = &ecomp->cdev;
218 cdev->parent = class_device_get(&edev->cdev); 257 cdev->parent = get_device(&edev->edev);
219 cdev->class = &enclosure_component_class;
220 if (name) 258 if (name)
221 snprintf(cdev->class_id, BUS_ID_SIZE, "%s", name); 259 snprintf(cdev->bus_id, BUS_ID_SIZE, "%s", name);
222 else 260 else
223 snprintf(cdev->class_id, BUS_ID_SIZE, "%u", number); 261 snprintf(cdev->bus_id, BUS_ID_SIZE, "%u", number);
224 262
225 err = class_device_register(cdev); 263 cdev->release = enclosure_component_release;
264 cdev->groups = enclosure_groups;
265
266 err = device_register(cdev);
226 if (err) 267 if (err)
227 ERR_PTR(err); 268 ERR_PTR(err);
228 269
@@ -247,18 +288,19 @@ EXPORT_SYMBOL_GPL(enclosure_component_register);
247int enclosure_add_device(struct enclosure_device *edev, int component, 288int enclosure_add_device(struct enclosure_device *edev, int component,
248 struct device *dev) 289 struct device *dev)
249{ 290{
250 struct class_device *cdev; 291 struct enclosure_component *cdev;
251 292
252 if (!edev || component >= edev->components) 293 if (!edev || component >= edev->components)
253 return -EINVAL; 294 return -EINVAL;
254 295
255 cdev = &edev->component[component].cdev; 296 cdev = &edev->component[component];
256 297
257 class_device_del(cdev);
258 if (cdev->dev) 298 if (cdev->dev)
259 put_device(cdev->dev); 299 enclosure_remove_links(cdev);
300
301 put_device(cdev->dev);
260 cdev->dev = get_device(dev); 302 cdev->dev = get_device(dev);
261 return class_device_add(cdev); 303 return enclosure_add_links(cdev);
262} 304}
263EXPORT_SYMBOL_GPL(enclosure_add_device); 305EXPORT_SYMBOL_GPL(enclosure_add_device);
264 306
@@ -272,18 +314,17 @@ EXPORT_SYMBOL_GPL(enclosure_add_device);
272 */ 314 */
273int enclosure_remove_device(struct enclosure_device *edev, int component) 315int enclosure_remove_device(struct enclosure_device *edev, int component)
274{ 316{
275 struct class_device *cdev; 317 struct enclosure_component *cdev;
276 318
277 if (!edev || component >= edev->components) 319 if (!edev || component >= edev->components)
278 return -EINVAL; 320 return -EINVAL;
279 321
280 cdev = &edev->component[component].cdev; 322 cdev = &edev->component[component];
281 323
282 class_device_del(cdev); 324 device_del(&cdev->cdev);
283 if (cdev->dev) 325 put_device(cdev->dev);
284 put_device(cdev->dev);
285 cdev->dev = NULL; 326 cdev->dev = NULL;
286 return class_device_add(cdev); 327 return device_add(&cdev->cdev);
287} 328}
288EXPORT_SYMBOL_GPL(enclosure_remove_device); 329EXPORT_SYMBOL_GPL(enclosure_remove_device);
289 330
@@ -291,14 +332,16 @@ EXPORT_SYMBOL_GPL(enclosure_remove_device);
291 * sysfs pieces below 332 * sysfs pieces below
292 */ 333 */
293 334
294static ssize_t enclosure_show_components(struct class_device *cdev, char *buf) 335static ssize_t enclosure_show_components(struct device *cdev,
336 struct device_attribute *attr,
337 char *buf)
295{ 338{
296 struct enclosure_device *edev = to_enclosure_device(cdev); 339 struct enclosure_device *edev = to_enclosure_device(cdev);
297 340
298 return snprintf(buf, 40, "%d\n", edev->components); 341 return snprintf(buf, 40, "%d\n", edev->components);
299} 342}
300 343
301static struct class_device_attribute enclosure_attrs[] = { 344static struct device_attribute enclosure_attrs[] = {
302 __ATTR(components, S_IRUGO, enclosure_show_components, NULL), 345 __ATTR(components, S_IRUGO, enclosure_show_components, NULL),
303 __ATTR_NULL 346 __ATTR_NULL
304}; 347};
@@ -306,8 +349,8 @@ static struct class_device_attribute enclosure_attrs[] = {
306static struct class enclosure_class = { 349static struct class enclosure_class = {
307 .name = "enclosure", 350 .name = "enclosure",
308 .owner = THIS_MODULE, 351 .owner = THIS_MODULE,
309 .release = enclosure_release, 352 .dev_release = enclosure_release,
310 .class_dev_attrs = enclosure_attrs, 353 .dev_attrs = enclosure_attrs,
311}; 354};
312 355
313static const char *const enclosure_status [] = { 356static const char *const enclosure_status [] = {
@@ -326,7 +369,8 @@ static const char *const enclosure_type [] = {
326 [ENCLOSURE_COMPONENT_ARRAY_DEVICE] = "array device", 369 [ENCLOSURE_COMPONENT_ARRAY_DEVICE] = "array device",
327}; 370};
328 371
329static ssize_t get_component_fault(struct class_device *cdev, char *buf) 372static ssize_t get_component_fault(struct device *cdev,
373 struct device_attribute *attr, char *buf)
330{ 374{
331 struct enclosure_device *edev = to_enclosure_device(cdev->parent); 375 struct enclosure_device *edev = to_enclosure_device(cdev->parent);
332 struct enclosure_component *ecomp = to_enclosure_component(cdev); 376 struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -336,8 +380,9 @@ static ssize_t get_component_fault(struct class_device *cdev, char *buf)
336 return snprintf(buf, 40, "%d\n", ecomp->fault); 380 return snprintf(buf, 40, "%d\n", ecomp->fault);
337} 381}
338 382
339static ssize_t set_component_fault(struct class_device *cdev, const char *buf, 383static ssize_t set_component_fault(struct device *cdev,
340 size_t count) 384 struct device_attribute *attr,
385 const char *buf, size_t count)
341{ 386{
342 struct enclosure_device *edev = to_enclosure_device(cdev->parent); 387 struct enclosure_device *edev = to_enclosure_device(cdev->parent);
343 struct enclosure_component *ecomp = to_enclosure_component(cdev); 388 struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -348,7 +393,8 @@ static ssize_t set_component_fault(struct class_device *cdev, const char *buf,
348 return count; 393 return count;
349} 394}
350 395
351static ssize_t get_component_status(struct class_device *cdev, char *buf) 396static ssize_t get_component_status(struct device *cdev,
397 struct device_attribute *attr,char *buf)
352{ 398{
353 struct enclosure_device *edev = to_enclosure_device(cdev->parent); 399 struct enclosure_device *edev = to_enclosure_device(cdev->parent);
354 struct enclosure_component *ecomp = to_enclosure_component(cdev); 400 struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -358,8 +404,9 @@ static ssize_t get_component_status(struct class_device *cdev, char *buf)
358 return snprintf(buf, 40, "%s\n", enclosure_status[ecomp->status]); 404 return snprintf(buf, 40, "%s\n", enclosure_status[ecomp->status]);
359} 405}
360 406
361static ssize_t set_component_status(struct class_device *cdev, const char *buf, 407static ssize_t set_component_status(struct device *cdev,
362 size_t count) 408 struct device_attribute *attr,
409 const char *buf, size_t count)
363{ 410{
364 struct enclosure_device *edev = to_enclosure_device(cdev->parent); 411 struct enclosure_device *edev = to_enclosure_device(cdev->parent);
365 struct enclosure_component *ecomp = to_enclosure_component(cdev); 412 struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -380,7 +427,8 @@ static ssize_t set_component_status(struct class_device *cdev, const char *buf,
380 return -EINVAL; 427 return -EINVAL;
381} 428}
382 429
383static ssize_t get_component_active(struct class_device *cdev, char *buf) 430static ssize_t get_component_active(struct device *cdev,
431 struct device_attribute *attr, char *buf)
384{ 432{
385 struct enclosure_device *edev = to_enclosure_device(cdev->parent); 433 struct enclosure_device *edev = to_enclosure_device(cdev->parent);
386 struct enclosure_component *ecomp = to_enclosure_component(cdev); 434 struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -390,8 +438,9 @@ static ssize_t get_component_active(struct class_device *cdev, char *buf)
390 return snprintf(buf, 40, "%d\n", ecomp->active); 438 return snprintf(buf, 40, "%d\n", ecomp->active);
391} 439}
392 440
393static ssize_t set_component_active(struct class_device *cdev, const char *buf, 441static ssize_t set_component_active(struct device *cdev,
394 size_t count) 442 struct device_attribute *attr,
443 const char *buf, size_t count)
395{ 444{
396 struct enclosure_device *edev = to_enclosure_device(cdev->parent); 445 struct enclosure_device *edev = to_enclosure_device(cdev->parent);
397 struct enclosure_component *ecomp = to_enclosure_component(cdev); 446 struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -402,7 +451,8 @@ static ssize_t set_component_active(struct class_device *cdev, const char *buf,
402 return count; 451 return count;
403} 452}
404 453
405static ssize_t get_component_locate(struct class_device *cdev, char *buf) 454static ssize_t get_component_locate(struct device *cdev,
455 struct device_attribute *attr, char *buf)
406{ 456{
407 struct enclosure_device *edev = to_enclosure_device(cdev->parent); 457 struct enclosure_device *edev = to_enclosure_device(cdev->parent);
408 struct enclosure_component *ecomp = to_enclosure_component(cdev); 458 struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -412,8 +462,9 @@ static ssize_t get_component_locate(struct class_device *cdev, char *buf)
412 return snprintf(buf, 40, "%d\n", ecomp->locate); 462 return snprintf(buf, 40, "%d\n", ecomp->locate);
413} 463}
414 464
415static ssize_t set_component_locate(struct class_device *cdev, const char *buf, 465static ssize_t set_component_locate(struct device *cdev,
416 size_t count) 466 struct device_attribute *attr,
467 const char *buf, size_t count)
417{ 468{
418 struct enclosure_device *edev = to_enclosure_device(cdev->parent); 469 struct enclosure_device *edev = to_enclosure_device(cdev->parent);
419 struct enclosure_component *ecomp = to_enclosure_component(cdev); 470 struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -424,7 +475,8 @@ static ssize_t set_component_locate(struct class_device *cdev, const char *buf,
424 return count; 475 return count;
425} 476}
426 477
427static ssize_t get_component_type(struct class_device *cdev, char *buf) 478static ssize_t get_component_type(struct device *cdev,
479 struct device_attribute *attr, char *buf)
428{ 480{
429 struct enclosure_component *ecomp = to_enclosure_component(cdev); 481 struct enclosure_component *ecomp = to_enclosure_component(cdev);
430 482
@@ -432,24 +484,32 @@ static ssize_t get_component_type(struct class_device *cdev, char *buf)
432} 484}
433 485
434 486
435static struct class_device_attribute enclosure_component_attrs[] = { 487static DEVICE_ATTR(fault, S_IRUGO | S_IWUSR, get_component_fault,
436 __ATTR(fault, S_IRUGO | S_IWUSR, get_component_fault, 488 set_component_fault);
437 set_component_fault), 489static DEVICE_ATTR(status, S_IRUGO | S_IWUSR, get_component_status,
438 __ATTR(status, S_IRUGO | S_IWUSR, get_component_status, 490 set_component_status);
439 set_component_status), 491static DEVICE_ATTR(active, S_IRUGO | S_IWUSR, get_component_active,
440 __ATTR(active, S_IRUGO | S_IWUSR, get_component_active, 492 set_component_active);
441 set_component_active), 493static DEVICE_ATTR(locate, S_IRUGO | S_IWUSR, get_component_locate,
442 __ATTR(locate, S_IRUGO | S_IWUSR, get_component_locate, 494 set_component_locate);
443 set_component_locate), 495static DEVICE_ATTR(type, S_IRUGO, get_component_type, NULL);
444 __ATTR(type, S_IRUGO, get_component_type, NULL), 496
445 __ATTR_NULL 497static struct attribute *enclosure_component_attrs[] = {
498 &dev_attr_fault.attr,
499 &dev_attr_status.attr,
500 &dev_attr_active.attr,
501 &dev_attr_locate.attr,
502 &dev_attr_type.attr,
503 NULL
446}; 504};
447 505
448static struct class enclosure_component_class = { 506static struct attribute_group enclosure_group = {
449 .name = "enclosure_component", 507 .attrs = enclosure_component_attrs,
450 .owner = THIS_MODULE, 508};
451 .class_dev_attrs = enclosure_component_attrs, 509
452 .release = enclosure_component_release, 510static struct attribute_group *enclosure_groups[] = {
511 &enclosure_group,
512 NULL
453}; 513};
454 514
455static int __init enclosure_init(void) 515static int __init enclosure_init(void)
@@ -459,20 +519,12 @@ static int __init enclosure_init(void)
459 err = class_register(&enclosure_class); 519 err = class_register(&enclosure_class);
460 if (err) 520 if (err)
461 return err; 521 return err;
462 err = class_register(&enclosure_component_class);
463 if (err)
464 goto err_out;
465 522
466 return 0; 523 return 0;
467 err_out:
468 class_unregister(&enclosure_class);
469
470 return err;
471} 524}
472 525
473static void __exit enclosure_exit(void) 526static void __exit enclosure_exit(void)
474{ 527{
475 class_unregister(&enclosure_component_class);
476 class_unregister(&enclosure_class); 528 class_unregister(&enclosure_class);
477} 529}
478 530
diff --git a/drivers/misc/intel_menlow.c b/drivers/misc/intel_menlow.c
index de16e88eb8d3..5bb8816c9126 100644
--- a/drivers/misc/intel_menlow.c
+++ b/drivers/misc/intel_menlow.c
@@ -175,28 +175,18 @@ static int intel_menlow_memory_add(struct acpi_device *device)
175 goto end; 175 goto end;
176 } 176 }
177 177
178 if (cdev) { 178 acpi_driver_data(device) = cdev;
179 acpi_driver_data(device) = cdev; 179 result = sysfs_create_link(&device->dev.kobj,
180 result = sysfs_create_link(&device->dev.kobj, 180 &cdev->device.kobj, "thermal_cooling");
181 &cdev->device.kobj, "thermal_cooling"); 181 if (result)
182 if (result) 182 printk(KERN_ERR PREFIX "Create sysfs link\n");
183 goto unregister; 183 result = sysfs_create_link(&cdev->device.kobj,
184 184 &device->dev.kobj, "device");
185 result = sysfs_create_link(&cdev->device.kobj, 185 if (result)
186 &device->dev.kobj, "device"); 186 printk(KERN_ERR PREFIX "Create sysfs link\n");
187 if (result) {
188 sysfs_remove_link(&device->dev.kobj, "thermal_cooling");
189 goto unregister;
190 }
191 }
192 187
193 end: 188 end:
194 return result; 189 return result;
195
196 unregister:
197 thermal_cooling_device_unregister(cdev);
198 return result;
199
200} 190}
201 191
202static int intel_menlow_memory_remove(struct acpi_device *device, int type) 192static int intel_menlow_memory_remove(struct acpi_device *device, int type)
@@ -213,7 +203,7 @@ static int intel_menlow_memory_remove(struct acpi_device *device, int type)
213 return 0; 203 return 0;
214} 204}
215 205
216const static struct acpi_device_id intel_menlow_memory_ids[] = { 206static const struct acpi_device_id intel_menlow_memory_ids[] = {
217 {"INT0002", 0}, 207 {"INT0002", 0},
218 {"", 0}, 208 {"", 0},
219}; 209};
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
new file mode 100644
index 000000000000..6d6286c4eeac
--- /dev/null
+++ b/drivers/misc/kgdbts.c
@@ -0,0 +1,1090 @@
1/*
2 * kgdbts is a test suite for kgdb for the sole purpose of validating
3 * that key pieces of the kgdb internals are working properly such as
4 * HW/SW breakpoints, single stepping, and NMI.
5 *
6 * Created by: Jason Wessel <jason.wessel@windriver.com>
7 *
8 * Copyright (c) 2008 Wind River Systems, Inc.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
17 * See the GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23/* Information about the kgdb test suite.
24 * -------------------------------------
25 *
26 * The kgdb test suite is designed as a KGDB I/O module which
27 * simulates the communications that a debugger would have with kgdb.
28 * The tests are broken up in to a line by line and referenced here as
29 * a "get" which is kgdb requesting input and "put" which is kgdb
30 * sending a response.
31 *
32 * The kgdb suite can be invoked from the kernel command line
33 * arguments system or executed dynamically at run time. The test
34 * suite uses the variable "kgdbts" to obtain the information about
35 * which tests to run and to configure the verbosity level. The
36 * following are the various characters you can use with the kgdbts=
37 * line:
38 *
39 * When using the "kgdbts=" you only choose one of the following core
40 * test types:
41 * A = Run all the core tests silently
42 * V1 = Run all the core tests with minimal output
43 * V2 = Run all the core tests in debug mode
44 *
45 * You can also specify optional tests:
46 * N## = Go to sleep with interrupts of for ## seconds
47 * to test the HW NMI watchdog
48 * F## = Break at do_fork for ## iterations
49 * S## = Break at sys_open for ## iterations
50 *
51 * NOTE: that the do_fork and sys_open tests are mutually exclusive.
52 *
53 * To invoke the kgdb test suite from boot you use a kernel start
54 * argument as follows:
55 * kgdbts=V1 kgdbwait
56 * Or if you wanted to perform the NMI test for 6 seconds and do_fork
57 * test for 100 forks, you could use:
58 * kgdbts=V1N6F100 kgdbwait
59 *
60 * The test suite can also be invoked at run time with:
61 * echo kgdbts=V1N6F100 > /sys/module/kgdbts/parameters/kgdbts
62 * Or as another example:
63 * echo kgdbts=V2 > /sys/module/kgdbts/parameters/kgdbts
64 *
65 * When developing a new kgdb arch specific implementation or
66 * using these tests for the purpose of regression testing,
67 * several invocations are required.
68 *
69 * 1) Boot with the test suite enabled by using the kernel arguments
70 * "kgdbts=V1F100 kgdbwait"
71 * ## If kgdb arch specific implementation has NMI use
72 * "kgdbts=V1N6F100
73 *
74 * 2) After the system boot run the basic test.
75 * echo kgdbts=V1 > /sys/module/kgdbts/parameters/kgdbts
76 *
77 * 3) Run the concurrency tests. It is best to use n+1
78 * while loops where n is the number of cpus you have
79 * in your system. The example below uses only two
80 * loops.
81 *
82 * ## This tests break points on sys_open
83 * while [ 1 ] ; do find / > /dev/null 2>&1 ; done &
84 * while [ 1 ] ; do find / > /dev/null 2>&1 ; done &
85 * echo kgdbts=V1S10000 > /sys/module/kgdbts/parameters/kgdbts
86 * fg # and hit control-c
87 * fg # and hit control-c
88 * ## This tests break points on do_fork
89 * while [ 1 ] ; do date > /dev/null ; done &
90 * while [ 1 ] ; do date > /dev/null ; done &
91 * echo kgdbts=V1F1000 > /sys/module/kgdbts/parameters/kgdbts
92 * fg # and hit control-c
93 *
94 */
95
96#include <linux/kernel.h>
97#include <linux/kgdb.h>
98#include <linux/ctype.h>
99#include <linux/uaccess.h>
100#include <linux/syscalls.h>
101#include <linux/nmi.h>
102#include <linux/delay.h>
103#include <linux/kthread.h>
104#include <linux/delay.h>
105
106#define v1printk(a...) do { \
107 if (verbose) \
108 printk(KERN_INFO a); \
109 } while (0)
110#define v2printk(a...) do { \
111 if (verbose > 1) \
112 printk(KERN_INFO a); \
113 touch_nmi_watchdog(); \
114 } while (0)
115#define eprintk(a...) do { \
116 printk(KERN_ERR a); \
117 WARN_ON(1); \
118 } while (0)
119#define MAX_CONFIG_LEN 40
120
121static const char hexchars[] = "0123456789abcdef";
122static struct kgdb_io kgdbts_io_ops;
123static char get_buf[BUFMAX];
124static int get_buf_cnt;
125static char put_buf[BUFMAX];
126static int put_buf_cnt;
127static char scratch_buf[BUFMAX];
128static int verbose;
129static int repeat_test;
130static int test_complete;
131static int send_ack;
132static int final_ack;
133static int hw_break_val;
134static int hw_break_val2;
135#if defined(CONFIG_ARM) || defined(CONFIG_MIPS)
136static int arch_needs_sstep_emulation = 1;
137#else
138static int arch_needs_sstep_emulation;
139#endif
140static unsigned long sstep_addr;
141static int sstep_state;
142
143/* Storage for the registers, in GDB format. */
144static unsigned long kgdbts_gdb_regs[(NUMREGBYTES +
145 sizeof(unsigned long) - 1) /
146 sizeof(unsigned long)];
147static struct pt_regs kgdbts_regs;
148
149/* -1 = init not run yet, 0 = unconfigured, 1 = configured. */
150static int configured = -1;
151
152#ifdef CONFIG_KGDB_TESTS_BOOT_STRING
153static char config[MAX_CONFIG_LEN] = CONFIG_KGDB_TESTS_BOOT_STRING;
154#else
155static char config[MAX_CONFIG_LEN];
156#endif
157static struct kparam_string kps = {
158 .string = config,
159 .maxlen = MAX_CONFIG_LEN,
160};
161
162static void fill_get_buf(char *buf);
163
164struct test_struct {
165 char *get;
166 char *put;
167 void (*get_handler)(char *);
168 int (*put_handler)(char *, char *);
169};
170
171struct test_state {
172 char *name;
173 struct test_struct *tst;
174 int idx;
175 int (*run_test) (int, int);
176 int (*validate_put) (char *);
177};
178
179static struct test_state ts;
180
181static int kgdbts_unreg_thread(void *ptr)
182{
183 /* Wait until the tests are complete and then ungresiter the I/O
184 * driver.
185 */
186 while (!final_ack)
187 msleep_interruptible(1500);
188
189 if (configured)
190 kgdb_unregister_io_module(&kgdbts_io_ops);
191 configured = 0;
192
193 return 0;
194}
195
196/* This is noinline such that it can be used for a single location to
197 * place a breakpoint
198 */
199static noinline void kgdbts_break_test(void)
200{
201 v2printk("kgdbts: breakpoint complete\n");
202}
203
204/* Lookup symbol info in the kernel */
205static unsigned long lookup_addr(char *arg)
206{
207 unsigned long addr = 0;
208
209 if (!strcmp(arg, "kgdbts_break_test"))
210 addr = (unsigned long)kgdbts_break_test;
211 else if (!strcmp(arg, "sys_open"))
212 addr = (unsigned long)sys_open;
213 else if (!strcmp(arg, "do_fork"))
214 addr = (unsigned long)do_fork;
215 else if (!strcmp(arg, "hw_break_val"))
216 addr = (unsigned long)&hw_break_val;
217 return addr;
218}
219
220static void break_helper(char *bp_type, char *arg, unsigned long vaddr)
221{
222 unsigned long addr;
223
224 if (arg)
225 addr = lookup_addr(arg);
226 else
227 addr = vaddr;
228
229 sprintf(scratch_buf, "%s,%lx,%i", bp_type, addr,
230 BREAK_INSTR_SIZE);
231 fill_get_buf(scratch_buf);
232}
233
234static void sw_break(char *arg)
235{
236 break_helper("Z0", arg, 0);
237}
238
239static void sw_rem_break(char *arg)
240{
241 break_helper("z0", arg, 0);
242}
243
244static void hw_break(char *arg)
245{
246 break_helper("Z1", arg, 0);
247}
248
249static void hw_rem_break(char *arg)
250{
251 break_helper("z1", arg, 0);
252}
253
254static void hw_write_break(char *arg)
255{
256 break_helper("Z2", arg, 0);
257}
258
259static void hw_rem_write_break(char *arg)
260{
261 break_helper("z2", arg, 0);
262}
263
264static void hw_access_break(char *arg)
265{
266 break_helper("Z4", arg, 0);
267}
268
269static void hw_rem_access_break(char *arg)
270{
271 break_helper("z4", arg, 0);
272}
273
274static void hw_break_val_access(void)
275{
276 hw_break_val2 = hw_break_val;
277}
278
279static void hw_break_val_write(void)
280{
281 hw_break_val++;
282}
283
284static int check_and_rewind_pc(char *put_str, char *arg)
285{
286 unsigned long addr = lookup_addr(arg);
287 int offset = 0;
288
289 kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
290 NUMREGBYTES);
291 gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
292 v2printk("Stopped at IP: %lx\n", instruction_pointer(&kgdbts_regs));
293#ifdef CONFIG_X86
294 /* On x86 a breakpoint stop requires it to be decremented */
295 if (addr + 1 == kgdbts_regs.ip)
296 offset = -1;
297#endif
298 if (strcmp(arg, "silent") &&
299 instruction_pointer(&kgdbts_regs) + offset != addr) {
300 eprintk("kgdbts: BP mismatch %lx expected %lx\n",
301 instruction_pointer(&kgdbts_regs) + offset, addr);
302 return 1;
303 }
304#ifdef CONFIG_X86
305 /* On x86 adjust the instruction pointer if needed */
306 kgdbts_regs.ip += offset;
307#endif
308 return 0;
309}
310
311static int check_single_step(char *put_str, char *arg)
312{
313 unsigned long addr = lookup_addr(arg);
314 /*
315 * From an arch indepent point of view the instruction pointer
316 * should be on a different instruction
317 */
318 kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
319 NUMREGBYTES);
320 gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
321 v2printk("Singlestep stopped at IP: %lx\n",
322 instruction_pointer(&kgdbts_regs));
323 if (instruction_pointer(&kgdbts_regs) == addr) {
324 eprintk("kgdbts: SingleStep failed at %lx\n",
325 instruction_pointer(&kgdbts_regs));
326 return 1;
327 }
328
329 return 0;
330}
331
332static void write_regs(char *arg)
333{
334 memset(scratch_buf, 0, sizeof(scratch_buf));
335 scratch_buf[0] = 'G';
336 pt_regs_to_gdb_regs(kgdbts_gdb_regs, &kgdbts_regs);
337 kgdb_mem2hex((char *)kgdbts_gdb_regs, &scratch_buf[1], NUMREGBYTES);
338 fill_get_buf(scratch_buf);
339}
340
341static void skip_back_repeat_test(char *arg)
342{
343 int go_back = simple_strtol(arg, NULL, 10);
344
345 repeat_test--;
346 if (repeat_test <= 0)
347 ts.idx++;
348 else
349 ts.idx -= go_back;
350 fill_get_buf(ts.tst[ts.idx].get);
351}
352
353static int got_break(char *put_str, char *arg)
354{
355 test_complete = 1;
356 if (!strncmp(put_str+1, arg, 2)) {
357 if (!strncmp(arg, "T0", 2))
358 test_complete = 2;
359 return 0;
360 }
361 return 1;
362}
363
364static void emul_sstep_get(char *arg)
365{
366 if (!arch_needs_sstep_emulation) {
367 fill_get_buf(arg);
368 return;
369 }
370 switch (sstep_state) {
371 case 0:
372 v2printk("Emulate single step\n");
373 /* Start by looking at the current PC */
374 fill_get_buf("g");
375 break;
376 case 1:
377 /* set breakpoint */
378 break_helper("Z0", 0, sstep_addr);
379 break;
380 case 2:
381 /* Continue */
382 fill_get_buf("c");
383 break;
384 case 3:
385 /* Clear breakpoint */
386 break_helper("z0", 0, sstep_addr);
387 break;
388 default:
389 eprintk("kgdbts: ERROR failed sstep get emulation\n");
390 }
391 sstep_state++;
392}
393
394static int emul_sstep_put(char *put_str, char *arg)
395{
396 if (!arch_needs_sstep_emulation) {
397 if (!strncmp(put_str+1, arg, 2))
398 return 0;
399 return 1;
400 }
401 switch (sstep_state) {
402 case 1:
403 /* validate the "g" packet to get the IP */
404 kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
405 NUMREGBYTES);
406 gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
407 v2printk("Stopped at IP: %lx\n",
408 instruction_pointer(&kgdbts_regs));
409 /* Want to stop at IP + break instruction size by default */
410 sstep_addr = instruction_pointer(&kgdbts_regs) +
411 BREAK_INSTR_SIZE;
412 break;
413 case 2:
414 if (strncmp(put_str, "$OK", 3)) {
415 eprintk("kgdbts: failed sstep break set\n");
416 return 1;
417 }
418 break;
419 case 3:
420 if (strncmp(put_str, "$T0", 3)) {
421 eprintk("kgdbts: failed continue sstep\n");
422 return 1;
423 }
424 break;
425 case 4:
426 if (strncmp(put_str, "$OK", 3)) {
427 eprintk("kgdbts: failed sstep break unset\n");
428 return 1;
429 }
430 /* Single step is complete so continue on! */
431 sstep_state = 0;
432 return 0;
433 default:
434 eprintk("kgdbts: ERROR failed sstep put emulation\n");
435 }
436
437 /* Continue on the same test line until emulation is complete */
438 ts.idx--;
439 return 0;
440}
441
442static int final_ack_set(char *put_str, char *arg)
443{
444 if (strncmp(put_str+1, arg, 2))
445 return 1;
446 final_ack = 1;
447 return 0;
448}
449/*
450 * Test to plant a breakpoint and detach, which should clear out the
451 * breakpoint and restore the original instruction.
452 */
453static struct test_struct plant_and_detach_test[] = {
454 { "?", "S0*" }, /* Clear break points */
455 { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
456 { "D", "OK" }, /* Detach */
457 { "", "" },
458};
459
460/*
461 * Simple test to write in a software breakpoint, check for the
462 * correct stop location and detach.
463 */
464static struct test_struct sw_breakpoint_test[] = {
465 { "?", "S0*" }, /* Clear break points */
466 { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
467 { "c", "T0*", }, /* Continue */
468 { "g", "kgdbts_break_test", 0, check_and_rewind_pc },
469 { "write", "OK", write_regs },
470 { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
471 { "D", "OK" }, /* Detach */
472 { "D", "OK", 0, got_break }, /* If the test worked we made it here */
473 { "", "" },
474};
475
476/*
477 * Test a known bad memory read location to test the fault handler and
478 * read bytes 1-8 at the bad address
479 */
480static struct test_struct bad_read_test[] = {
481 { "?", "S0*" }, /* Clear break points */
482 { "m0,1", "E*" }, /* read 1 byte at address 1 */
483 { "m0,2", "E*" }, /* read 1 byte at address 2 */
484 { "m0,3", "E*" }, /* read 1 byte at address 3 */
485 { "m0,4", "E*" }, /* read 1 byte at address 4 */
486 { "m0,5", "E*" }, /* read 1 byte at address 5 */
487 { "m0,6", "E*" }, /* read 1 byte at address 6 */
488 { "m0,7", "E*" }, /* read 1 byte at address 7 */
489 { "m0,8", "E*" }, /* read 1 byte at address 8 */
490 { "D", "OK" }, /* Detach which removes all breakpoints and continues */
491 { "", "" },
492};
493
494/*
495 * Test for hitting a breakpoint, remove it, single step, plant it
496 * again and detach.
497 */
498static struct test_struct singlestep_break_test[] = {
499 { "?", "S0*" }, /* Clear break points */
500 { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
501 { "c", "T0*", }, /* Continue */
502 { "g", "kgdbts_break_test", 0, check_and_rewind_pc },
503 { "write", "OK", write_regs }, /* Write registers */
504 { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
505 { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
506 { "g", "kgdbts_break_test", 0, check_single_step },
507 { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
508 { "c", "T0*", }, /* Continue */
509 { "g", "kgdbts_break_test", 0, check_and_rewind_pc },
510 { "write", "OK", write_regs }, /* Write registers */
511 { "D", "OK" }, /* Remove all breakpoints and continues */
512 { "", "" },
513};
514
515/*
516 * Test for hitting a breakpoint at do_fork for what ever the number
517 * of iterations required by the variable repeat_test.
518 */
519static struct test_struct do_fork_test[] = {
520 { "?", "S0*" }, /* Clear break points */
521 { "do_fork", "OK", sw_break, }, /* set sw breakpoint */
522 { "c", "T0*", }, /* Continue */
523 { "g", "do_fork", 0, check_and_rewind_pc }, /* check location */
524 { "write", "OK", write_regs }, /* Write registers */
525 { "do_fork", "OK", sw_rem_break }, /*remove breakpoint */
526 { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
527 { "g", "do_fork", 0, check_single_step },
528 { "do_fork", "OK", sw_break, }, /* set sw breakpoint */
529 { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
530 { "D", "OK", 0, final_ack_set }, /* detach and unregister I/O */
531 { "", "" },
532};
533
534/* Test for hitting a breakpoint at sys_open for what ever the number
535 * of iterations required by the variable repeat_test.
536 */
537static struct test_struct sys_open_test[] = {
538 { "?", "S0*" }, /* Clear break points */
539 { "sys_open", "OK", sw_break, }, /* set sw breakpoint */
540 { "c", "T0*", }, /* Continue */
541 { "g", "sys_open", 0, check_and_rewind_pc }, /* check location */
542 { "write", "OK", write_regs }, /* Write registers */
543 { "sys_open", "OK", sw_rem_break }, /*remove breakpoint */
544 { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
545 { "g", "sys_open", 0, check_single_step },
546 { "sys_open", "OK", sw_break, }, /* set sw breakpoint */
547 { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
548 { "D", "OK", 0, final_ack_set }, /* detach and unregister I/O */
549 { "", "" },
550};
551
552/*
553 * Test for hitting a simple hw breakpoint
554 */
555static struct test_struct hw_breakpoint_test[] = {
556 { "?", "S0*" }, /* Clear break points */
557 { "kgdbts_break_test", "OK", hw_break, }, /* set hw breakpoint */
558 { "c", "T0*", }, /* Continue */
559 { "g", "kgdbts_break_test", 0, check_and_rewind_pc },
560 { "write", "OK", write_regs },
561 { "kgdbts_break_test", "OK", hw_rem_break }, /*remove breakpoint */
562 { "D", "OK" }, /* Detach */
563 { "D", "OK", 0, got_break }, /* If the test worked we made it here */
564 { "", "" },
565};
566
567/*
568 * Test for hitting a hw write breakpoint
569 */
570static struct test_struct hw_write_break_test[] = {
571 { "?", "S0*" }, /* Clear break points */
572 { "hw_break_val", "OK", hw_write_break, }, /* set hw breakpoint */
573 { "c", "T0*", 0, got_break }, /* Continue */
574 { "g", "silent", 0, check_and_rewind_pc },
575 { "write", "OK", write_regs },
576 { "hw_break_val", "OK", hw_rem_write_break }, /*remove breakpoint */
577 { "D", "OK" }, /* Detach */
578 { "D", "OK", 0, got_break }, /* If the test worked we made it here */
579 { "", "" },
580};
581
582/*
583 * Test for hitting a hw access breakpoint
584 */
585static struct test_struct hw_access_break_test[] = {
586 { "?", "S0*" }, /* Clear break points */
587 { "hw_break_val", "OK", hw_access_break, }, /* set hw breakpoint */
588 { "c", "T0*", 0, got_break }, /* Continue */
589 { "g", "silent", 0, check_and_rewind_pc },
590 { "write", "OK", write_regs },
591 { "hw_break_val", "OK", hw_rem_access_break }, /*remove breakpoint */
592 { "D", "OK" }, /* Detach */
593 { "D", "OK", 0, got_break }, /* If the test worked we made it here */
594 { "", "" },
595};
596
597/*
598 * Test for hitting a hw access breakpoint
599 */
600static struct test_struct nmi_sleep_test[] = {
601 { "?", "S0*" }, /* Clear break points */
602 { "c", "T0*", 0, got_break }, /* Continue */
603 { "D", "OK" }, /* Detach */
604 { "D", "OK", 0, got_break }, /* If the test worked we made it here */
605 { "", "" },
606};
607
608static void fill_get_buf(char *buf)
609{
610 unsigned char checksum = 0;
611 int count = 0;
612 char ch;
613
614 strcpy(get_buf, "$");
615 strcat(get_buf, buf);
616 while ((ch = buf[count])) {
617 checksum += ch;
618 count++;
619 }
620 strcat(get_buf, "#");
621 get_buf[count + 2] = hexchars[checksum >> 4];
622 get_buf[count + 3] = hexchars[checksum & 0xf];
623 get_buf[count + 4] = '\0';
624 v2printk("get%i: %s\n", ts.idx, get_buf);
625}
626
627static int validate_simple_test(char *put_str)
628{
629 char *chk_str;
630
631 if (ts.tst[ts.idx].put_handler)
632 return ts.tst[ts.idx].put_handler(put_str,
633 ts.tst[ts.idx].put);
634
635 chk_str = ts.tst[ts.idx].put;
636 if (*put_str == '$')
637 put_str++;
638
639 while (*chk_str != '\0' && *put_str != '\0') {
640 /* If someone does a * to match the rest of the string, allow
641 * it, or stop if the recieved string is complete.
642 */
643 if (*put_str == '#' || *chk_str == '*')
644 return 0;
645 if (*put_str != *chk_str)
646 return 1;
647
648 chk_str++;
649 put_str++;
650 }
651 if (*chk_str == '\0' && (*put_str == '\0' || *put_str == '#'))
652 return 0;
653
654 return 1;
655}
656
657static int run_simple_test(int is_get_char, int chr)
658{
659 int ret = 0;
660 if (is_get_char) {
661 /* Send an ACK on the get if a prior put completed and set the
662 * send ack variable
663 */
664 if (send_ack) {
665 send_ack = 0;
666 return '+';
667 }
668 /* On the first get char, fill the transmit buffer and then
669 * take from the get_string.
670 */
671 if (get_buf_cnt == 0) {
672 if (ts.tst[ts.idx].get_handler)
673 ts.tst[ts.idx].get_handler(ts.tst[ts.idx].get);
674 else
675 fill_get_buf(ts.tst[ts.idx].get);
676 }
677
678 if (get_buf[get_buf_cnt] == '\0') {
679 eprintk("kgdbts: ERROR GET: EOB on '%s' at %i\n",
680 ts.name, ts.idx);
681 get_buf_cnt = 0;
682 fill_get_buf("D");
683 }
684 ret = get_buf[get_buf_cnt];
685 get_buf_cnt++;
686 return ret;
687 }
688
689 /* This callback is a put char which is when kgdb sends data to
690 * this I/O module.
691 */
692 if (ts.tst[ts.idx].get[0] == '\0' &&
693 ts.tst[ts.idx].put[0] == '\0') {
694 eprintk("kgdbts: ERROR: beyond end of test on"
695 " '%s' line %i\n", ts.name, ts.idx);
696 return 0;
697 }
698
699 if (put_buf_cnt >= BUFMAX) {
700 eprintk("kgdbts: ERROR: put buffer overflow on"
701 " '%s' line %i\n", ts.name, ts.idx);
702 put_buf_cnt = 0;
703 return 0;
704 }
705 /* Ignore everything until the first valid packet start '$' */
706 if (put_buf_cnt == 0 && chr != '$')
707 return 0;
708
709 put_buf[put_buf_cnt] = chr;
710 put_buf_cnt++;
711
712 /* End of packet == #XX so look for the '#' */
713 if (put_buf_cnt > 3 && put_buf[put_buf_cnt - 3] == '#') {
714 put_buf[put_buf_cnt] = '\0';
715 v2printk("put%i: %s\n", ts.idx, put_buf);
716 /* Trigger check here */
717 if (ts.validate_put && ts.validate_put(put_buf)) {
718 eprintk("kgdbts: ERROR PUT: end of test "
719 "buffer on '%s' line %i expected %s got %s\n",
720 ts.name, ts.idx, ts.tst[ts.idx].put, put_buf);
721 }
722 ts.idx++;
723 put_buf_cnt = 0;
724 get_buf_cnt = 0;
725 send_ack = 1;
726 }
727 return 0;
728}
729
730static void init_simple_test(void)
731{
732 memset(&ts, 0, sizeof(ts));
733 ts.run_test = run_simple_test;
734 ts.validate_put = validate_simple_test;
735}
736
737static void run_plant_and_detach_test(int is_early)
738{
739 char before[BREAK_INSTR_SIZE];
740 char after[BREAK_INSTR_SIZE];
741
742 probe_kernel_read(before, (char *)kgdbts_break_test,
743 BREAK_INSTR_SIZE);
744 init_simple_test();
745 ts.tst = plant_and_detach_test;
746 ts.name = "plant_and_detach_test";
747 /* Activate test with initial breakpoint */
748 if (!is_early)
749 kgdb_breakpoint();
750 probe_kernel_read(after, (char *)kgdbts_break_test,
751 BREAK_INSTR_SIZE);
752 if (memcmp(before, after, BREAK_INSTR_SIZE)) {
753 printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n");
754 panic("kgdb memory corruption");
755 }
756
757 /* complete the detach test */
758 if (!is_early)
759 kgdbts_break_test();
760}
761
762static void run_breakpoint_test(int is_hw_breakpoint)
763{
764 test_complete = 0;
765 init_simple_test();
766 if (is_hw_breakpoint) {
767 ts.tst = hw_breakpoint_test;
768 ts.name = "hw_breakpoint_test";
769 } else {
770 ts.tst = sw_breakpoint_test;
771 ts.name = "sw_breakpoint_test";
772 }
773 /* Activate test with initial breakpoint */
774 kgdb_breakpoint();
775 /* run code with the break point in it */
776 kgdbts_break_test();
777 kgdb_breakpoint();
778
779 if (test_complete)
780 return;
781
782 eprintk("kgdbts: ERROR %s test failed\n", ts.name);
783}
784
785static void run_hw_break_test(int is_write_test)
786{
787 test_complete = 0;
788 init_simple_test();
789 if (is_write_test) {
790 ts.tst = hw_write_break_test;
791 ts.name = "hw_write_break_test";
792 } else {
793 ts.tst = hw_access_break_test;
794 ts.name = "hw_access_break_test";
795 }
796 /* Activate test with initial breakpoint */
797 kgdb_breakpoint();
798 hw_break_val_access();
799 if (is_write_test) {
800 if (test_complete == 2)
801 eprintk("kgdbts: ERROR %s broke on access\n",
802 ts.name);
803 hw_break_val_write();
804 }
805 kgdb_breakpoint();
806
807 if (test_complete == 1)
808 return;
809
810 eprintk("kgdbts: ERROR %s test failed\n", ts.name);
811}
812
813static void run_nmi_sleep_test(int nmi_sleep)
814{
815 unsigned long flags;
816
817 init_simple_test();
818 ts.tst = nmi_sleep_test;
819 ts.name = "nmi_sleep_test";
820 /* Activate test with initial breakpoint */
821 kgdb_breakpoint();
822 local_irq_save(flags);
823 mdelay(nmi_sleep*1000);
824 touch_nmi_watchdog();
825 local_irq_restore(flags);
826 if (test_complete != 2)
827 eprintk("kgdbts: ERROR nmi_test did not hit nmi\n");
828 kgdb_breakpoint();
829 if (test_complete == 1)
830 return;
831
832 eprintk("kgdbts: ERROR %s test failed\n", ts.name);
833}
834
835static void run_bad_read_test(void)
836{
837 init_simple_test();
838 ts.tst = bad_read_test;
839 ts.name = "bad_read_test";
840 /* Activate test with initial breakpoint */
841 kgdb_breakpoint();
842}
843
844static void run_do_fork_test(void)
845{
846 init_simple_test();
847 ts.tst = do_fork_test;
848 ts.name = "do_fork_test";
849 /* Activate test with initial breakpoint */
850 kgdb_breakpoint();
851}
852
853static void run_sys_open_test(void)
854{
855 init_simple_test();
856 ts.tst = sys_open_test;
857 ts.name = "sys_open_test";
858 /* Activate test with initial breakpoint */
859 kgdb_breakpoint();
860}
861
862static void run_singlestep_break_test(void)
863{
864 init_simple_test();
865 ts.tst = singlestep_break_test;
866 ts.name = "singlestep_breakpoint_test";
867 /* Activate test with initial breakpoint */
868 kgdb_breakpoint();
869 kgdbts_break_test();
870 kgdbts_break_test();
871}
872
873static void kgdbts_run_tests(void)
874{
875 char *ptr;
876 int fork_test = 0;
877 int sys_open_test = 0;
878 int nmi_sleep = 0;
879
880 ptr = strstr(config, "F");
881 if (ptr)
882 fork_test = simple_strtol(ptr+1, NULL, 10);
883 ptr = strstr(config, "S");
884 if (ptr)
885 sys_open_test = simple_strtol(ptr+1, NULL, 10);
886 ptr = strstr(config, "N");
887 if (ptr)
888 nmi_sleep = simple_strtol(ptr+1, NULL, 10);
889
890 /* required internal KGDB tests */
891 v1printk("kgdbts:RUN plant and detach test\n");
892 run_plant_and_detach_test(0);
893 v1printk("kgdbts:RUN sw breakpoint test\n");
894 run_breakpoint_test(0);
895 v1printk("kgdbts:RUN bad memory access test\n");
896 run_bad_read_test();
897 v1printk("kgdbts:RUN singlestep breakpoint test\n");
898 run_singlestep_break_test();
899
900 /* ===Optional tests=== */
901
902 /* All HW break point tests */
903 if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) {
904 v1printk("kgdbts:RUN hw breakpoint test\n");
905 run_breakpoint_test(1);
906 v1printk("kgdbts:RUN hw write breakpoint test\n");
907 run_hw_break_test(1);
908 v1printk("kgdbts:RUN access write breakpoint test\n");
909 run_hw_break_test(0);
910 }
911
912 if (nmi_sleep) {
913 v1printk("kgdbts:RUN NMI sleep %i seconds test\n", nmi_sleep);
914 run_nmi_sleep_test(nmi_sleep);
915 }
916
917 /* If the do_fork test is run it will be the last test that is
918 * executed because a kernel thread will be spawned at the very
919 * end to unregister the debug hooks.
920 */
921 if (fork_test) {
922 repeat_test = fork_test;
923 printk(KERN_INFO "kgdbts:RUN do_fork for %i breakpoints\n",
924 repeat_test);
925 kthread_run(kgdbts_unreg_thread, 0, "kgdbts_unreg");
926 run_do_fork_test();
927 return;
928 }
929
930 /* If the sys_open test is run it will be the last test that is
931 * executed because a kernel thread will be spawned at the very
932 * end to unregister the debug hooks.
933 */
934 if (sys_open_test) {
935 repeat_test = sys_open_test;
936 printk(KERN_INFO "kgdbts:RUN sys_open for %i breakpoints\n",
937 repeat_test);
938 kthread_run(kgdbts_unreg_thread, 0, "kgdbts_unreg");
939 run_sys_open_test();
940 return;
941 }
942 /* Shutdown and unregister */
943 kgdb_unregister_io_module(&kgdbts_io_ops);
944 configured = 0;
945}
946
947static int kgdbts_option_setup(char *opt)
948{
949 if (strlen(opt) > MAX_CONFIG_LEN) {
950 printk(KERN_ERR "kgdbts: config string too long\n");
951 return -ENOSPC;
952 }
953 strcpy(config, opt);
954
955 verbose = 0;
956 if (strstr(config, "V1"))
957 verbose = 1;
958 if (strstr(config, "V2"))
959 verbose = 2;
960
961 return 0;
962}
963
964__setup("kgdbts=", kgdbts_option_setup);
965
966static int configure_kgdbts(void)
967{
968 int err = 0;
969
970 if (!strlen(config) || isspace(config[0]))
971 goto noconfig;
972 err = kgdbts_option_setup(config);
973 if (err)
974 goto noconfig;
975
976 final_ack = 0;
977 run_plant_and_detach_test(1);
978
979 err = kgdb_register_io_module(&kgdbts_io_ops);
980 if (err) {
981 configured = 0;
982 return err;
983 }
984 configured = 1;
985 kgdbts_run_tests();
986
987 return err;
988
989noconfig:
990 config[0] = 0;
991 configured = 0;
992
993 return err;
994}
995
996static int __init init_kgdbts(void)
997{
998 /* Already configured? */
999 if (configured == 1)
1000 return 0;
1001
1002 return configure_kgdbts();
1003}
1004
1005static void cleanup_kgdbts(void)
1006{
1007 if (configured == 1)
1008 kgdb_unregister_io_module(&kgdbts_io_ops);
1009}
1010
1011static int kgdbts_get_char(void)
1012{
1013 int val = 0;
1014
1015 if (ts.run_test)
1016 val = ts.run_test(1, 0);
1017
1018 return val;
1019}
1020
1021static void kgdbts_put_char(u8 chr)
1022{
1023 if (ts.run_test)
1024 ts.run_test(0, chr);
1025}
1026
1027static int param_set_kgdbts_var(const char *kmessage, struct kernel_param *kp)
1028{
1029 int len = strlen(kmessage);
1030
1031 if (len >= MAX_CONFIG_LEN) {
1032 printk(KERN_ERR "kgdbts: config string too long\n");
1033 return -ENOSPC;
1034 }
1035
1036 /* Only copy in the string if the init function has not run yet */
1037 if (configured < 0) {
1038 strcpy(config, kmessage);
1039 return 0;
1040 }
1041
1042 if (kgdb_connected) {
1043 printk(KERN_ERR
1044 "kgdbts: Cannot reconfigure while KGDB is connected.\n");
1045
1046 return -EBUSY;
1047 }
1048
1049 strcpy(config, kmessage);
1050 /* Chop out \n char as a result of echo */
1051 if (config[len - 1] == '\n')
1052 config[len - 1] = '\0';
1053
1054 if (configured == 1)
1055 cleanup_kgdbts();
1056
1057 /* Go and configure with the new params. */
1058 return configure_kgdbts();
1059}
1060
1061static void kgdbts_pre_exp_handler(void)
1062{
1063 /* Increment the module count when the debugger is active */
1064 if (!kgdb_connected)
1065 try_module_get(THIS_MODULE);
1066}
1067
1068static void kgdbts_post_exp_handler(void)
1069{
1070 /* decrement the module count when the debugger detaches */
1071 if (!kgdb_connected)
1072 module_put(THIS_MODULE);
1073}
1074
1075static struct kgdb_io kgdbts_io_ops = {
1076 .name = "kgdbts",
1077 .read_char = kgdbts_get_char,
1078 .write_char = kgdbts_put_char,
1079 .pre_exception = kgdbts_pre_exp_handler,
1080 .post_exception = kgdbts_post_exp_handler,
1081};
1082
1083module_init(init_kgdbts);
1084module_exit(cleanup_kgdbts);
1085module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644);
1086MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]");
1087MODULE_DESCRIPTION("KGDB Test Suite");
1088MODULE_LICENSE("GPL");
1089MODULE_AUTHOR("Wind River Systems, Inc.");
1090
diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile
new file mode 100644
index 000000000000..b6e40a7958ce
--- /dev/null
+++ b/drivers/misc/sgi-xp/Makefile
@@ -0,0 +1,11 @@
1#
2# Makefile for SGI's XP devices.
3#
4
5obj-$(CONFIG_SGI_XP) += xp.o
6xp-y := xp_main.o xp_nofault.o
7
8obj-$(CONFIG_SGI_XP) += xpc.o
9xpc-y := xpc_main.o xpc_channel.o xpc_partition.o
10
11obj-$(CONFIG_SGI_XP) += xpnet.o
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
new file mode 100644
index 000000000000..5515234be86a
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp.h
@@ -0,0 +1,463 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2004-2008 Silicon Graphics, Inc. All rights reserved.
7 */
8
9/*
10 * External Cross Partition (XP) structures and defines.
11 */
12
13#ifndef _DRIVERS_MISC_SGIXP_XP_H
14#define _DRIVERS_MISC_SGIXP_XP_H
15
16#include <linux/cache.h>
17#include <linux/hardirq.h>
18#include <linux/mutex.h>
19#include <asm/sn/types.h>
20#include <asm/sn/bte.h>
21
22#ifdef USE_DBUG_ON
23#define DBUG_ON(condition) BUG_ON(condition)
24#else
25#define DBUG_ON(condition)
26#endif
27
28/*
29 * Define the maximum number of logically defined partitions the system
30 * can support. It is constrained by the maximum number of hardware
31 * partitionable regions. The term 'region' in this context refers to the
32 * minimum number of nodes that can comprise an access protection grouping.
33 * The access protection is in regards to memory, IPI and IOI.
34 *
35 * The maximum number of hardware partitionable regions is equal to the
36 * maximum number of nodes in the entire system divided by the minimum number
37 * of nodes that comprise an access protection grouping.
38 */
39#define XP_MAX_PARTITIONS 64
40
41/*
42 * Define the number of u64s required to represent all the C-brick nasids
43 * as a bitmap. The cross-partition kernel modules deal only with
44 * C-brick nasids, thus the need for bitmaps which don't account for
45 * odd-numbered (non C-brick) nasids.
46 */
47#define XP_MAX_PHYSNODE_ID (MAX_NUMALINK_NODES / 2)
48#define XP_NASID_MASK_BYTES ((XP_MAX_PHYSNODE_ID + 7) / 8)
49#define XP_NASID_MASK_WORDS ((XP_MAX_PHYSNODE_ID + 63) / 64)
50
51/*
52 * Wrapper for bte_copy() that should it return a failure status will retry
53 * the bte_copy() once in the hope that the failure was due to a temporary
54 * aberration (i.e., the link going down temporarily).
55 *
56 * src - physical address of the source of the transfer.
57 * vdst - virtual address of the destination of the transfer.
58 * len - number of bytes to transfer from source to destination.
59 * mode - see bte_copy() for definition.
60 * notification - see bte_copy() for definition.
61 *
62 * Note: xp_bte_copy() should never be called while holding a spinlock.
63 */
64static inline bte_result_t
65xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification)
66{
67 bte_result_t ret;
68 u64 pdst = ia64_tpa(vdst);
69
70 /*
71 * Ensure that the physically mapped memory is contiguous.
72 *
73 * We do this by ensuring that the memory is from region 7 only.
74 * If the need should arise to use memory from one of the other
75 * regions, then modify the BUG_ON() statement to ensure that the
76 * memory from that region is always physically contiguous.
77 */
78 BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL);
79
80 ret = bte_copy(src, pdst, len, mode, notification);
81 if ((ret != BTE_SUCCESS) && BTE_ERROR_RETRY(ret)) {
82 if (!in_interrupt())
83 cond_resched();
84
85 ret = bte_copy(src, pdst, len, mode, notification);
86 }
87
88 return ret;
89}
90
91/*
92 * XPC establishes channel connections between the local partition and any
93 * other partition that is currently up. Over these channels, kernel-level
94 * `users' can communicate with their counterparts on the other partitions.
95 *
96 * The maxinum number of channels is limited to eight. For performance reasons,
97 * the internal cross partition structures require sixteen bytes per channel,
98 * and eight allows all of this interface-shared info to fit in one cache line.
99 *
100 * XPC_NCHANNELS reflects the total number of channels currently defined.
101 * If the need for additional channels arises, one can simply increase
102 * XPC_NCHANNELS accordingly. If the day should come where that number
103 * exceeds the MAXIMUM number of channels allowed (eight), then one will need
104 * to make changes to the XPC code to allow for this.
105 */
106#define XPC_MEM_CHANNEL 0 /* memory channel number */
107#define XPC_NET_CHANNEL 1 /* network channel number */
108
109#define XPC_NCHANNELS 2 /* #of defined channels */
110#define XPC_MAX_NCHANNELS 8 /* max #of channels allowed */
111
112#if XPC_NCHANNELS > XPC_MAX_NCHANNELS
113#error XPC_NCHANNELS exceeds MAXIMUM allowed.
114#endif
115
116/*
117 * The format of an XPC message is as follows:
118 *
119 * +-------+--------------------------------+
120 * | flags |////////////////////////////////|
121 * +-------+--------------------------------+
122 * | message # |
123 * +----------------------------------------+
124 * | payload (user-defined message) |
125 * | |
126 * :
127 * | |
128 * +----------------------------------------+
129 *
130 * The size of the payload is defined by the user via xpc_connect(). A user-
131 * defined message resides in the payload area.
132 *
133 * The user should have no dealings with the message header, but only the
134 * message's payload. When a message entry is allocated (via xpc_allocate())
135 * a pointer to the payload area is returned and not the actual beginning of
136 * the XPC message. The user then constructs a message in the payload area
137 * and passes that pointer as an argument on xpc_send() or xpc_send_notify().
138 *
139 * The size of a message entry (within a message queue) must be a cacheline
140 * sized multiple in order to facilitate the BTE transfer of messages from one
141 * message queue to another. A macro, XPC_MSG_SIZE(), is provided for the user
142 * that wants to fit as many msg entries as possible in a given memory size
143 * (e.g. a memory page).
144 */
145struct xpc_msg {
146 u8 flags; /* FOR XPC INTERNAL USE ONLY */
147 u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */
148 s64 number; /* FOR XPC INTERNAL USE ONLY */
149
150 u64 payload; /* user defined portion of message */
151};
152
153#define XPC_MSG_PAYLOAD_OFFSET (u64) (&((struct xpc_msg *)0)->payload)
154#define XPC_MSG_SIZE(_payload_size) \
155 L1_CACHE_ALIGN(XPC_MSG_PAYLOAD_OFFSET + (_payload_size))
156
157/*
158 * Define the return values and values passed to user's callout functions.
159 * (It is important to add new value codes at the end just preceding
160 * xpcUnknownReason, which must have the highest numerical value.)
161 */
162enum xpc_retval {
163 xpcSuccess = 0,
164
165 xpcNotConnected, /* 1: channel is not connected */
166 xpcConnected, /* 2: channel connected (opened) */
167 xpcRETIRED1, /* 3: (formerly xpcDisconnected) */
168
169 xpcMsgReceived, /* 4: message received */
170 xpcMsgDelivered, /* 5: message delivered and acknowledged */
171
172 xpcRETIRED2, /* 6: (formerly xpcTransferFailed) */
173
174 xpcNoWait, /* 7: operation would require wait */
175 xpcRetry, /* 8: retry operation */
176 xpcTimeout, /* 9: timeout in xpc_allocate_msg_wait() */
177 xpcInterrupted, /* 10: interrupted wait */
178
179 xpcUnequalMsgSizes, /* 11: message size disparity between sides */
180 xpcInvalidAddress, /* 12: invalid address */
181
182 xpcNoMemory, /* 13: no memory available for XPC structures */
183 xpcLackOfResources, /* 14: insufficient resources for operation */
184 xpcUnregistered, /* 15: channel is not registered */
185 xpcAlreadyRegistered, /* 16: channel is already registered */
186
187 xpcPartitionDown, /* 17: remote partition is down */
188 xpcNotLoaded, /* 18: XPC module is not loaded */
189 xpcUnloading, /* 19: this side is unloading XPC module */
190
191 xpcBadMagic, /* 20: XPC MAGIC string not found */
192
193 xpcReactivating, /* 21: remote partition was reactivated */
194
195 xpcUnregistering, /* 22: this side is unregistering channel */
196 xpcOtherUnregistering, /* 23: other side is unregistering channel */
197
198 xpcCloneKThread, /* 24: cloning kernel thread */
199 xpcCloneKThreadFailed, /* 25: cloning kernel thread failed */
200
201 xpcNoHeartbeat, /* 26: remote partition has no heartbeat */
202
203 xpcPioReadError, /* 27: PIO read error */
204 xpcPhysAddrRegFailed, /* 28: registration of phys addr range failed */
205
206 xpcBteDirectoryError, /* 29: maps to BTEFAIL_DIR */
207 xpcBtePoisonError, /* 30: maps to BTEFAIL_POISON */
208 xpcBteWriteError, /* 31: maps to BTEFAIL_WERR */
209 xpcBteAccessError, /* 32: maps to BTEFAIL_ACCESS */
210 xpcBtePWriteError, /* 33: maps to BTEFAIL_PWERR */
211 xpcBtePReadError, /* 34: maps to BTEFAIL_PRERR */
212 xpcBteTimeOutError, /* 35: maps to BTEFAIL_TOUT */
213 xpcBteXtalkError, /* 36: maps to BTEFAIL_XTERR */
214 xpcBteNotAvailable, /* 37: maps to BTEFAIL_NOTAVAIL */
215 xpcBteUnmappedError, /* 38: unmapped BTEFAIL_ error */
216
217 xpcBadVersion, /* 39: bad version number */
218 xpcVarsNotSet, /* 40: the XPC variables are not set up */
219 xpcNoRsvdPageAddr, /* 41: unable to get rsvd page's phys addr */
220 xpcInvalidPartid, /* 42: invalid partition ID */
221 xpcLocalPartid, /* 43: local partition ID */
222
223 xpcOtherGoingDown, /* 44: other side going down, reason unknown */
224 xpcSystemGoingDown, /* 45: system is going down, reason unknown */
225 xpcSystemHalt, /* 46: system is being halted */
226 xpcSystemReboot, /* 47: system is being rebooted */
227 xpcSystemPoweroff, /* 48: system is being powered off */
228
229 xpcDisconnecting, /* 49: channel disconnecting (closing) */
230
231 xpcOpenCloseError, /* 50: channel open/close protocol error */
232
233 xpcDisconnected, /* 51: channel disconnected (closed) */
234
235 xpcBteSh2Start, /* 52: BTE CRB timeout */
236
237 /* 53: 0x1 BTE Error Response Short */
238 xpcBteSh2RspShort = xpcBteSh2Start + BTEFAIL_SH2_RESP_SHORT,
239
240 /* 54: 0x2 BTE Error Response Long */
241 xpcBteSh2RspLong = xpcBteSh2Start + BTEFAIL_SH2_RESP_LONG,
242
243 /* 56: 0x4 BTE Error Response DSB */
244 xpcBteSh2RspDSB = xpcBteSh2Start + BTEFAIL_SH2_RESP_DSP,
245
246 /* 60: 0x8 BTE Error Response Access */
247 xpcBteSh2RspAccess = xpcBteSh2Start + BTEFAIL_SH2_RESP_ACCESS,
248
249 /* 68: 0x10 BTE Error CRB timeout */
250 xpcBteSh2CRBTO = xpcBteSh2Start + BTEFAIL_SH2_CRB_TO,
251
252 /* 84: 0x20 BTE Error NACK limit */
253 xpcBteSh2NACKLimit = xpcBteSh2Start + BTEFAIL_SH2_NACK_LIMIT,
254
255 /* 115: BTE end */
256 xpcBteSh2End = xpcBteSh2Start + BTEFAIL_SH2_ALL,
257
258 xpcUnknownReason /* 116: unknown reason - must be last in enum */
259};
260
261/*
262 * Define the callout function types used by XPC to update the user on
263 * connection activity and state changes (via the user function registered by
264 * xpc_connect()) and to notify them of messages received and delivered (via
265 * the user function registered by xpc_send_notify()).
266 *
267 * The two function types are xpc_channel_func and xpc_notify_func and
268 * both share the following arguments, with the exception of "data", which
269 * only xpc_channel_func has.
270 *
271 * Arguments:
272 *
273 * reason - reason code. (See following table.)
274 * partid - partition ID associated with condition.
275 * ch_number - channel # associated with condition.
276 * data - pointer to optional data. (See following table.)
277 * key - pointer to optional user-defined value provided as the "key"
278 * argument to xpc_connect() or xpc_send_notify().
279 *
280 * In the following table the "Optional Data" column applies to callouts made
281 * to functions registered by xpc_connect(). A "NA" in that column indicates
282 * that this reason code can be passed to functions registered by
283 * xpc_send_notify() (i.e. they don't have data arguments).
284 *
285 * Also, the first three reason codes in the following table indicate
286 * success, whereas the others indicate failure. When a failure reason code
287 * is received, one can assume that the channel is not connected.
288 *
289 *
290 * Reason Code | Cause | Optional Data
291 * =====================+================================+=====================
292 * xpcConnected | connection has been established| max #of entries
293 * | to the specified partition on | allowed in message
294 * | the specified channel | queue
295 * ---------------------+--------------------------------+---------------------
296 * xpcMsgReceived | an XPC message arrived from | address of payload
297 * | the specified partition on the |
298 * | specified channel | [the user must call
299 * | | xpc_received() when
300 * | | finished with the
301 * | | payload]
302 * ---------------------+--------------------------------+---------------------
303 * xpcMsgDelivered | notification that the message | NA
304 * | was delivered to the intended |
305 * | recipient and that they have |
306 * | acknowledged its receipt by |
307 * | calling xpc_received() |
308 * =====================+================================+=====================
309 * xpcUnequalMsgSizes | can't connect to the specified | NULL
310 * | partition on the specified |
311 * | channel because of mismatched |
312 * | message sizes |
313 * ---------------------+--------------------------------+---------------------
314 * xpcNoMemory | insufficient memory avaiable | NULL
315 * | to allocate message queue |
316 * ---------------------+--------------------------------+---------------------
317 * xpcLackOfResources | lack of resources to create | NULL
318 * | the necessary kthreads to |
319 * | support the channel |
320 * ---------------------+--------------------------------+---------------------
321 * xpcUnregistering | this side's user has | NULL or NA
322 * | unregistered by calling |
323 * | xpc_disconnect() |
324 * ---------------------+--------------------------------+---------------------
325 * xpcOtherUnregistering| the other side's user has | NULL or NA
326 * | unregistered by calling |
327 * | xpc_disconnect() |
328 * ---------------------+--------------------------------+---------------------
329 * xpcNoHeartbeat | the other side's XPC is no | NULL or NA
330 * | longer heartbeating |
331 * | |
332 * ---------------------+--------------------------------+---------------------
333 * xpcUnloading | this side's XPC module is | NULL or NA
334 * | being unloaded |
335 * | |
336 * ---------------------+--------------------------------+---------------------
337 * xpcOtherUnloading | the other side's XPC module is | NULL or NA
338 * | is being unloaded |
339 * | |
340 * ---------------------+--------------------------------+---------------------
341 * xpcPioReadError | xp_nofault_PIOR() returned an | NULL or NA
342 * | error while sending an IPI |
343 * | |
344 * ---------------------+--------------------------------+---------------------
345 * xpcInvalidAddress | the address either received or | NULL or NA
346 * | sent by the specified partition|
347 * | is invalid |
348 * ---------------------+--------------------------------+---------------------
349 * xpcBteNotAvailable | attempt to pull data from the | NULL or NA
350 * xpcBtePoisonError | specified partition over the |
351 * xpcBteWriteError | specified channel via a |
352 * xpcBteAccessError | bte_copy() failed |
353 * xpcBteTimeOutError | |
354 * xpcBteXtalkError | |
355 * xpcBteDirectoryError | |
356 * xpcBteGenericError | |
357 * xpcBteUnmappedError | |
358 * ---------------------+--------------------------------+---------------------
359 * xpcUnknownReason | the specified channel to the | NULL or NA
360 * | specified partition was |
361 * | unavailable for unknown reasons|
362 * =====================+================================+=====================
363 */
364
365typedef void (*xpc_channel_func) (enum xpc_retval reason, partid_t partid,
366 int ch_number, void *data, void *key);
367
368typedef void (*xpc_notify_func) (enum xpc_retval reason, partid_t partid,
369 int ch_number, void *key);
370
371/*
372 * The following is a registration entry. There is a global array of these,
373 * one per channel. It is used to record the connection registration made
374 * by the users of XPC. As long as a registration entry exists, for any
375 * partition that comes up, XPC will attempt to establish a connection on
376 * that channel. Notification that a connection has been made will occur via
377 * the xpc_channel_func function.
378 *
379 * The 'func' field points to the function to call when aynchronous
380 * notification is required for such events as: a connection established/lost,
381 * or an incoming message received, or an error condition encountered. A
382 * non-NULL 'func' field indicates that there is an active registration for
383 * the channel.
384 */
385struct xpc_registration {
386 struct mutex mutex;
387 xpc_channel_func func; /* function to call */
388 void *key; /* pointer to user's key */
389 u16 nentries; /* #of msg entries in local msg queue */
390 u16 msg_size; /* message queue's message size */
391 u32 assigned_limit; /* limit on #of assigned kthreads */
392 u32 idle_limit; /* limit on #of idle kthreads */
393} ____cacheline_aligned;
394
395#define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL)
396
397/* the following are valid xpc_allocate() flags */
398#define XPC_WAIT 0 /* wait flag */
399#define XPC_NOWAIT 1 /* no wait flag */
400
401struct xpc_interface {
402 void (*connect) (int);
403 void (*disconnect) (int);
404 enum xpc_retval (*allocate) (partid_t, int, u32, void **);
405 enum xpc_retval (*send) (partid_t, int, void *);
406 enum xpc_retval (*send_notify) (partid_t, int, void *,
407 xpc_notify_func, void *);
408 void (*received) (partid_t, int, void *);
409 enum xpc_retval (*partid_to_nasids) (partid_t, void *);
410};
411
412extern struct xpc_interface xpc_interface;
413
414extern void xpc_set_interface(void (*)(int),
415 void (*)(int),
416 enum xpc_retval (*)(partid_t, int, u32, void **),
417 enum xpc_retval (*)(partid_t, int, void *),
418 enum xpc_retval (*)(partid_t, int, void *,
419 xpc_notify_func, void *),
420 void (*)(partid_t, int, void *),
421 enum xpc_retval (*)(partid_t, void *));
422extern void xpc_clear_interface(void);
423
424extern enum xpc_retval xpc_connect(int, xpc_channel_func, void *, u16,
425 u16, u32, u32);
426extern void xpc_disconnect(int);
427
428static inline enum xpc_retval
429xpc_allocate(partid_t partid, int ch_number, u32 flags, void **payload)
430{
431 return xpc_interface.allocate(partid, ch_number, flags, payload);
432}
433
434static inline enum xpc_retval
435xpc_send(partid_t partid, int ch_number, void *payload)
436{
437 return xpc_interface.send(partid, ch_number, payload);
438}
439
440static inline enum xpc_retval
441xpc_send_notify(partid_t partid, int ch_number, void *payload,
442 xpc_notify_func func, void *key)
443{
444 return xpc_interface.send_notify(partid, ch_number, payload, func, key);
445}
446
447static inline void
448xpc_received(partid_t partid, int ch_number, void *payload)
449{
450 return xpc_interface.received(partid, ch_number, payload);
451}
452
453static inline enum xpc_retval
454xpc_partid_to_nasids(partid_t partid, void *nasids)
455{
456 return xpc_interface.partid_to_nasids(partid, nasids);
457}
458
459extern u64 xp_nofault_PIOR_target;
460extern int xp_nofault_PIOR(void *);
461extern int xp_error_PIOR(void);
462
463#endif /* _DRIVERS_MISC_SGIXP_XP_H */
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
new file mode 100644
index 000000000000..1fbf99bae963
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -0,0 +1,279 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9/*
10 * Cross Partition (XP) base.
11 *
12 * XP provides a base from which its users can interact
13 * with XPC, yet not be dependent on XPC.
14 *
15 */
16
17#include <linux/kernel.h>
18#include <linux/interrupt.h>
19#include <linux/module.h>
20#include <linux/mutex.h>
21#include <asm/sn/intr.h>
22#include <asm/sn/sn_sal.h>
23#include "xp.h"
24
25/*
26 * The export of xp_nofault_PIOR needs to happen here since it is defined
27 * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is
28 * defined here.
29 */
30EXPORT_SYMBOL_GPL(xp_nofault_PIOR);
31
32u64 xp_nofault_PIOR_target;
33EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target);
34
35/*
36 * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
37 * users of XPC.
38 */
39struct xpc_registration xpc_registrations[XPC_NCHANNELS];
40EXPORT_SYMBOL_GPL(xpc_registrations);
41
42/*
43 * Initialize the XPC interface to indicate that XPC isn't loaded.
44 */
45static enum xpc_retval
46xpc_notloaded(void)
47{
48 return xpcNotLoaded;
49}
50
51struct xpc_interface xpc_interface = {
52 (void (*)(int))xpc_notloaded,
53 (void (*)(int))xpc_notloaded,
54 (enum xpc_retval(*)(partid_t, int, u32, void **))xpc_notloaded,
55 (enum xpc_retval(*)(partid_t, int, void *))xpc_notloaded,
56 (enum xpc_retval(*)(partid_t, int, void *, xpc_notify_func, void *))
57 xpc_notloaded,
58 (void (*)(partid_t, int, void *))xpc_notloaded,
59 (enum xpc_retval(*)(partid_t, void *))xpc_notloaded
60};
61EXPORT_SYMBOL_GPL(xpc_interface);
62
63/*
64 * XPC calls this when it (the XPC module) has been loaded.
65 */
66void
67xpc_set_interface(void (*connect) (int),
68 void (*disconnect) (int),
69 enum xpc_retval (*allocate) (partid_t, int, u32, void **),
70 enum xpc_retval (*send) (partid_t, int, void *),
71 enum xpc_retval (*send_notify) (partid_t, int, void *,
72 xpc_notify_func, void *),
73 void (*received) (partid_t, int, void *),
74 enum xpc_retval (*partid_to_nasids) (partid_t, void *))
75{
76 xpc_interface.connect = connect;
77 xpc_interface.disconnect = disconnect;
78 xpc_interface.allocate = allocate;
79 xpc_interface.send = send;
80 xpc_interface.send_notify = send_notify;
81 xpc_interface.received = received;
82 xpc_interface.partid_to_nasids = partid_to_nasids;
83}
84EXPORT_SYMBOL_GPL(xpc_set_interface);
85
86/*
87 * XPC calls this when it (the XPC module) is being unloaded.
88 */
89void
90xpc_clear_interface(void)
91{
92 xpc_interface.connect = (void (*)(int))xpc_notloaded;
93 xpc_interface.disconnect = (void (*)(int))xpc_notloaded;
94 xpc_interface.allocate = (enum xpc_retval(*)(partid_t, int, u32,
95 void **))xpc_notloaded;
96 xpc_interface.send = (enum xpc_retval(*)(partid_t, int, void *))
97 xpc_notloaded;
98 xpc_interface.send_notify = (enum xpc_retval(*)(partid_t, int, void *,
99 xpc_notify_func,
100 void *))xpc_notloaded;
101 xpc_interface.received = (void (*)(partid_t, int, void *))
102 xpc_notloaded;
103 xpc_interface.partid_to_nasids = (enum xpc_retval(*)(partid_t, void *))
104 xpc_notloaded;
105}
106EXPORT_SYMBOL_GPL(xpc_clear_interface);
107
108/*
109 * Register for automatic establishment of a channel connection whenever
110 * a partition comes up.
111 *
112 * Arguments:
113 *
114 * ch_number - channel # to register for connection.
115 * func - function to call for asynchronous notification of channel
116 * state changes (i.e., connection, disconnection, error) and
117 * the arrival of incoming messages.
118 * key - pointer to optional user-defined value that gets passed back
119 * to the user on any callouts made to func.
120 * payload_size - size in bytes of the XPC message's payload area which
121 * contains a user-defined message. The user should make
122 * this large enough to hold their largest message.
123 * nentries - max #of XPC message entries a message queue can contain.
124 * The actual number, which is determined when a connection
125 * is established and may be less then requested, will be
126 * passed to the user via the xpcConnected callout.
127 * assigned_limit - max number of kthreads allowed to be processing
128 * messages (per connection) at any given instant.
129 * idle_limit - max number of kthreads allowed to be idle at any given
130 * instant.
131 */
132enum xpc_retval
133xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
134 u16 nentries, u32 assigned_limit, u32 idle_limit)
135{
136 struct xpc_registration *registration;
137
138 DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
139 DBUG_ON(payload_size == 0 || nentries == 0);
140 DBUG_ON(func == NULL);
141 DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
142
143 registration = &xpc_registrations[ch_number];
144
145 if (mutex_lock_interruptible(&registration->mutex) != 0)
146 return xpcInterrupted;
147
148 /* if XPC_CHANNEL_REGISTERED(ch_number) */
149 if (registration->func != NULL) {
150 mutex_unlock(&registration->mutex);
151 return xpcAlreadyRegistered;
152 }
153
154 /* register the channel for connection */
155 registration->msg_size = XPC_MSG_SIZE(payload_size);
156 registration->nentries = nentries;
157 registration->assigned_limit = assigned_limit;
158 registration->idle_limit = idle_limit;
159 registration->key = key;
160 registration->func = func;
161
162 mutex_unlock(&registration->mutex);
163
164 xpc_interface.connect(ch_number);
165
166 return xpcSuccess;
167}
168EXPORT_SYMBOL_GPL(xpc_connect);
169
170/*
171 * Remove the registration for automatic connection of the specified channel
172 * when a partition comes up.
173 *
174 * Before returning this xpc_disconnect() will wait for all connections on the
175 * specified channel have been closed/torndown. So the caller can be assured
176 * that they will not be receiving any more callouts from XPC to their
177 * function registered via xpc_connect().
178 *
179 * Arguments:
180 *
181 * ch_number - channel # to unregister.
182 */
183void
184xpc_disconnect(int ch_number)
185{
186 struct xpc_registration *registration;
187
188 DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
189
190 registration = &xpc_registrations[ch_number];
191
192 /*
193 * We've decided not to make this a down_interruptible(), since we
194 * figured XPC's users will just turn around and call xpc_disconnect()
195 * again anyways, so we might as well wait, if need be.
196 */
197 mutex_lock(&registration->mutex);
198
199 /* if !XPC_CHANNEL_REGISTERED(ch_number) */
200 if (registration->func == NULL) {
201 mutex_unlock(&registration->mutex);
202 return;
203 }
204
205 /* remove the connection registration for the specified channel */
206 registration->func = NULL;
207 registration->key = NULL;
208 registration->nentries = 0;
209 registration->msg_size = 0;
210 registration->assigned_limit = 0;
211 registration->idle_limit = 0;
212
213 xpc_interface.disconnect(ch_number);
214
215 mutex_unlock(&registration->mutex);
216
217 return;
218}
219EXPORT_SYMBOL_GPL(xpc_disconnect);
220
221int __init
222xp_init(void)
223{
224 int ret, ch_number;
225 u64 func_addr = *(u64 *)xp_nofault_PIOR;
226 u64 err_func_addr = *(u64 *)xp_error_PIOR;
227
228 if (!ia64_platform_is("sn2"))
229 return -ENODEV;
230
231 /*
232 * Register a nofault code region which performs a cross-partition
233 * PIO read. If the PIO read times out, the MCA handler will consume
234 * the error and return to a kernel-provided instruction to indicate
235 * an error. This PIO read exists because it is guaranteed to timeout
236 * if the destination is down (AMO operations do not timeout on at
237 * least some CPUs on Shubs <= v1.2, which unfortunately we have to
238 * work around).
239 */
240 ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr,
241 1, 1);
242 if (ret != 0) {
243 printk(KERN_ERR "XP: can't register nofault code, error=%d\n",
244 ret);
245 }
246 /*
247 * Setup the nofault PIO read target. (There is no special reason why
248 * SH_IPI_ACCESS was selected.)
249 */
250 if (is_shub2())
251 xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
252 else
253 xp_nofault_PIOR_target = SH1_IPI_ACCESS;
254
255 /* initialize the connection registration mutex */
256 for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++)
257 mutex_init(&xpc_registrations[ch_number].mutex);
258
259 return 0;
260}
261
262module_init(xp_init);
263
264void __exit
265xp_exit(void)
266{
267 u64 func_addr = *(u64 *)xp_nofault_PIOR;
268 u64 err_func_addr = *(u64 *)xp_error_PIOR;
269
270 /* unregister the PIO read nofault code region */
271 (void)sn_register_nofault_code(func_addr, err_func_addr,
272 err_func_addr, 1, 0);
273}
274
275module_exit(xp_exit);
276
277MODULE_AUTHOR("Silicon Graphics, Inc.");
278MODULE_DESCRIPTION("Cross Partition (XP) base");
279MODULE_LICENSE("GPL");
diff --git a/drivers/misc/sgi-xp/xp_nofault.S b/drivers/misc/sgi-xp/xp_nofault.S
new file mode 100644
index 000000000000..e38d43319429
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_nofault.S
@@ -0,0 +1,35 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9/*
10 * The xp_nofault_PIOR function takes a pointer to a remote PIO register
11 * and attempts to load and consume a value from it. This function
12 * will be registered as a nofault code block. In the event that the
13 * PIO read fails, the MCA handler will force the error to look
14 * corrected and vector to the xp_error_PIOR which will return an error.
15 *
16 * The definition of "consumption" and the time it takes for an MCA
17 * to surface is processor implementation specific. This code
18 * is sufficient on Itanium through the Montvale processor family.
19 * It may need to be adjusted for future processor implementations.
20 *
21 * extern int xp_nofault_PIOR(void *remote_register);
22 */
23
24 .global xp_nofault_PIOR
25xp_nofault_PIOR:
26 mov r8=r0 // Stage a success return value
27 ld8.acq r9=[r32];; // PIO Read the specified register
28 adds r9=1,r9;; // Add to force consumption
29 srlz.i;; // Allow time for MCA to surface
30 br.ret.sptk.many b0;; // Return success
31
32 .global xp_error_PIOR
33xp_error_PIOR:
34 mov r8=1 // Return value of 1
35 br.ret.sptk.many b0;; // Return failure
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
new file mode 100644
index 000000000000..9eb6d4a3269c
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -0,0 +1,1187 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9/*
10 * Cross Partition Communication (XPC) structures and macros.
11 */
12
13#ifndef _DRIVERS_MISC_SGIXP_XPC_H
14#define _DRIVERS_MISC_SGIXP_XPC_H
15
16#include <linux/interrupt.h>
17#include <linux/sysctl.h>
18#include <linux/device.h>
19#include <linux/mutex.h>
20#include <linux/completion.h>
21#include <asm/pgtable.h>
22#include <asm/processor.h>
23#include <asm/sn/bte.h>
24#include <asm/sn/clksupport.h>
25#include <asm/sn/addrs.h>
26#include <asm/sn/mspec.h>
27#include <asm/sn/shub_mmr.h>
28#include "xp.h"
29
30/*
31 * XPC Version numbers consist of a major and minor number. XPC can always
32 * talk to versions with same major #, and never talk to versions with a
33 * different major #.
34 */
35#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf))
36#define XPC_VERSION_MAJOR(_v) ((_v) >> 4)
37#define XPC_VERSION_MINOR(_v) ((_v) & 0xf)
38
39/*
40 * The next macros define word or bit representations for given
41 * C-brick nasid in either the SAL provided bit array representing
42 * nasids in the partition/machine or the AMO_t array used for
43 * inter-partition initiation communications.
44 *
45 * For SN2 machines, C-Bricks are alway even numbered NASIDs. As
46 * such, some space will be saved by insisting that nasid information
47 * passed from SAL always be packed for C-Bricks and the
48 * cross-partition interrupts use the same packing scheme.
49 */
50#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2)
51#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1))
52#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \
53 (1UL << XPC_NASID_B_INDEX(_n)))
54#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
55
56#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
57#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */
58
59/* define the process name of HB checker and the CPU it is pinned to */
60#define XPC_HB_CHECK_THREAD_NAME "xpc_hb"
61#define XPC_HB_CHECK_CPU 0
62
63/* define the process name of the discovery thread */
64#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery"
65
66/*
67 * the reserved page
68 *
69 * SAL reserves one page of memory per partition for XPC. Though a full page
70 * in length (16384 bytes), its starting address is not page aligned, but it
71 * is cacheline aligned. The reserved page consists of the following:
72 *
73 * reserved page header
74 *
75 * The first cacheline of the reserved page contains the header
76 * (struct xpc_rsvd_page). Before SAL initialization has completed,
77 * SAL has set up the following fields of the reserved page header:
78 * SAL_signature, SAL_version, partid, and nasids_size. The other
79 * fields are set up by XPC. (xpc_rsvd_page points to the local
80 * partition's reserved page.)
81 *
82 * part_nasids mask
83 * mach_nasids mask
84 *
85 * SAL also sets up two bitmaps (or masks), one that reflects the actual
86 * nasids in this partition (part_nasids), and the other that reflects
87 * the actual nasids in the entire machine (mach_nasids). We're only
88 * interested in the even numbered nasids (which contain the processors
89 * and/or memory), so we only need half as many bits to represent the
90 * nasids. The part_nasids mask is located starting at the first cacheline
91 * following the reserved page header. The mach_nasids mask follows right
92 * after the part_nasids mask. The size in bytes of each mask is reflected
93 * by the reserved page header field 'nasids_size'. (Local partition's
94 * mask pointers are xpc_part_nasids and xpc_mach_nasids.)
95 *
96 * vars
97 * vars part
98 *
99 * Immediately following the mach_nasids mask are the XPC variables
100 * required by other partitions. First are those that are generic to all
101 * partitions (vars), followed on the next available cacheline by those
102 * which are partition specific (vars part). These are setup by XPC.
103 * (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
104 *
105 * Note: Until vars_pa is set, the partition XPC code has not been initialized.
106 */
107struct xpc_rsvd_page {
108 u64 SAL_signature; /* SAL: unique signature */
109 u64 SAL_version; /* SAL: version */
110 u8 partid; /* SAL: partition ID */
111 u8 version;
112 u8 pad1[6]; /* align to next u64 in cacheline */
113 u64 vars_pa; /* physical address of struct xpc_vars */
114 struct timespec stamp; /* time when reserved page was setup by XPC */
115 u64 pad2[9]; /* align to last u64 in cacheline */
116 u64 nasids_size; /* SAL: size of each nasid mask in bytes */
117};
118
119#define XPC_RP_VERSION _XPC_VERSION(1, 1) /* version 1.1 of the reserved page */
120
121#define XPC_SUPPORTS_RP_STAMP(_version) \
122 (_version >= _XPC_VERSION(1, 1))
123
124/*
125 * compare stamps - the return value is:
126 *
127 * < 0, if stamp1 < stamp2
128 * = 0, if stamp1 == stamp2
129 * > 0, if stamp1 > stamp2
130 */
131static inline int
132xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
133{
134 int ret;
135
136 ret = stamp1->tv_sec - stamp2->tv_sec;
137 if (ret == 0)
138 ret = stamp1->tv_nsec - stamp2->tv_nsec;
139
140 return ret;
141}
142
143/*
144 * Define the structures by which XPC variables can be exported to other
145 * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
146 */
147
148/*
149 * The following structure describes the partition generic variables
150 * needed by other partitions in order to properly initialize.
151 *
152 * struct xpc_vars version number also applies to struct xpc_vars_part.
153 * Changes to either structure and/or related functionality should be
154 * reflected by incrementing either the major or minor version numbers
155 * of struct xpc_vars.
156 */
157struct xpc_vars {
158 u8 version;
159 u64 heartbeat;
160 u64 heartbeating_to_mask;
161 u64 heartbeat_offline; /* if 0, heartbeat should be changing */
162 int act_nasid;
163 int act_phys_cpuid;
164 u64 vars_part_pa;
165 u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */
166 AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */
167};
168
169#define XPC_V_VERSION _XPC_VERSION(3, 1) /* version 3.1 of the cross vars */
170
171#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
172 (_version >= _XPC_VERSION(3, 1))
173
174static inline int
175xpc_hb_allowed(partid_t partid, struct xpc_vars *vars)
176{
177 return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
178}
179
180static inline void
181xpc_allow_hb(partid_t partid, struct xpc_vars *vars)
182{
183 u64 old_mask, new_mask;
184
185 do {
186 old_mask = vars->heartbeating_to_mask;
187 new_mask = (old_mask | (1UL << partid));
188 } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
189 old_mask);
190}
191
192static inline void
193xpc_disallow_hb(partid_t partid, struct xpc_vars *vars)
194{
195 u64 old_mask, new_mask;
196
197 do {
198 old_mask = vars->heartbeating_to_mask;
199 new_mask = (old_mask & ~(1UL << partid));
200 } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
201 old_mask);
202}
203
204/*
205 * The AMOs page consists of a number of AMO variables which are divided into
206 * four groups, The first two groups are used to identify an IRQ's sender.
207 * These two groups consist of 64 and 128 AMO variables respectively. The last
208 * two groups, consisting of just one AMO variable each, are used to identify
209 * the remote partitions that are currently engaged (from the viewpoint of
210 * the XPC running on the remote partition).
211 */
212#define XPC_NOTIFY_IRQ_AMOS 0
213#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
214#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
215#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1)
216
217/*
218 * The following structure describes the per partition specific variables.
219 *
220 * An array of these structures, one per partition, will be defined. As a
221 * partition becomes active XPC will copy the array entry corresponding to
222 * itself from that partition. It is desirable that the size of this
223 * structure evenly divide into a cacheline, such that none of the entries
224 * in this array crosses a cacheline boundary. As it is now, each entry
225 * occupies half a cacheline.
226 */
227struct xpc_vars_part {
228 u64 magic;
229
230 u64 openclose_args_pa; /* physical address of open and close args */
231 u64 GPs_pa; /* physical address of Get/Put values */
232
233 u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */
234 int IPI_nasid; /* nasid of where to send IPIs */
235 int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */
236
237 u8 nchannels; /* #of defined channels supported */
238
239 u8 reserved[23]; /* pad to a full 64 bytes */
240};
241
242/*
243 * The vars_part MAGIC numbers play a part in the first contact protocol.
244 *
245 * MAGIC1 indicates that the per partition specific variables for a remote
246 * partition have been initialized by this partition.
247 *
248 * MAGIC2 indicates that this partition has pulled the remote partititions
249 * per partition variables that pertain to this partition.
250 */
251#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
252#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
253
254/* the reserved page sizes and offsets */
255
256#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
257#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars))
258
259#define XPC_RP_PART_NASIDS(_rp) ((u64 *)((u8 *)(_rp) + XPC_RP_HEADER_SIZE))
260#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
261#define XPC_RP_VARS(_rp) ((struct xpc_vars *)(XPC_RP_MACH_NASIDS(_rp) + \
262 xp_nasid_mask_words))
263#define XPC_RP_VARS_PART(_rp) ((struct xpc_vars_part *) \
264 ((u8 *)XPC_RP_VARS(_rp) + XPC_RP_VARS_SIZE))
265
266/*
267 * Functions registered by add_timer() or called by kernel_thread() only
268 * allow for a single 64-bit argument. The following macros can be used to
269 * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from
270 * the passed argument.
271 */
272#define XPC_PACK_ARGS(_arg1, _arg2) \
273 ((((u64) _arg1) & 0xffffffff) | \
274 ((((u64) _arg2) & 0xffffffff) << 32))
275
276#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff)
277#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff)
278
279/*
280 * Define a Get/Put value pair (pointers) used with a message queue.
281 */
282struct xpc_gp {
283 s64 get; /* Get value */
284 s64 put; /* Put value */
285};
286
287#define XPC_GP_SIZE \
288 L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS)
289
290/*
291 * Define a structure that contains arguments associated with opening and
292 * closing a channel.
293 */
294struct xpc_openclose_args {
295 u16 reason; /* reason why channel is closing */
296 u16 msg_size; /* sizeof each message entry */
297 u16 remote_nentries; /* #of message entries in remote msg queue */
298 u16 local_nentries; /* #of message entries in local msg queue */
299 u64 local_msgqueue_pa; /* physical address of local message queue */
300};
301
302#define XPC_OPENCLOSE_ARGS_SIZE \
303 L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS)
304
305/* struct xpc_msg flags */
306
307#define XPC_M_DONE 0x01 /* msg has been received/consumed */
308#define XPC_M_READY 0x02 /* msg is ready to be sent */
309#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */
310
311#define XPC_MSG_ADDRESS(_payload) \
312 ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET))
313
314/*
315 * Defines notify entry.
316 *
317 * This is used to notify a message's sender that their message was received
318 * and consumed by the intended recipient.
319 */
320struct xpc_notify {
321 u8 type; /* type of notification */
322
323 /* the following two fields are only used if type == XPC_N_CALL */
324 xpc_notify_func func; /* user's notify function */
325 void *key; /* pointer to user's key */
326};
327
328/* struct xpc_notify type of notification */
329
330#define XPC_N_CALL 0x01 /* notify function provided by user */
331
332/*
333 * Define the structure that manages all the stuff required by a channel. In
334 * particular, they are used to manage the messages sent across the channel.
335 *
336 * This structure is private to a partition, and is NOT shared across the
337 * partition boundary.
338 *
339 * There is an array of these structures for each remote partition. It is
340 * allocated at the time a partition becomes active. The array contains one
341 * of these structures for each potential channel connection to that partition.
342 *
343 * Each of these structures manages two message queues (circular buffers).
344 * They are allocated at the time a channel connection is made. One of
345 * these message queues (local_msgqueue) holds the locally created messages
346 * that are destined for the remote partition. The other of these message
347 * queues (remote_msgqueue) is a locally cached copy of the remote partition's
348 * own local_msgqueue.
349 *
350 * The following is a description of the Get/Put pointers used to manage these
351 * two message queues. Consider the local_msgqueue to be on one partition
352 * and the remote_msgqueue to be its cached copy on another partition. A
353 * description of what each of the lettered areas contains is included.
354 *
355 *
356 * local_msgqueue remote_msgqueue
357 *
358 * |/////////| |/////////|
359 * w_remote_GP.get --> +---------+ |/////////|
360 * | F | |/////////|
361 * remote_GP.get --> +---------+ +---------+ <-- local_GP->get
362 * | | | |
363 * | | | E |
364 * | | | |
365 * | | +---------+ <-- w_local_GP.get
366 * | B | |/////////|
367 * | | |////D////|
368 * | | |/////////|
369 * | | +---------+ <-- w_remote_GP.put
370 * | | |////C////|
371 * local_GP->put --> +---------+ +---------+ <-- remote_GP.put
372 * | | |/////////|
373 * | A | |/////////|
374 * | | |/////////|
375 * w_local_GP.put --> +---------+ |/////////|
376 * |/////////| |/////////|
377 *
378 *
379 * ( remote_GP.[get|put] are cached copies of the remote
380 * partition's local_GP->[get|put], and thus their values can
381 * lag behind their counterparts on the remote partition. )
382 *
383 *
384 * A - Messages that have been allocated, but have not yet been sent to the
385 * remote partition.
386 *
387 * B - Messages that have been sent, but have not yet been acknowledged by the
388 * remote partition as having been received.
389 *
390 * C - Area that needs to be prepared for the copying of sent messages, by
391 * the clearing of the message flags of any previously received messages.
392 *
393 * D - Area into which sent messages are to be copied from the remote
394 * partition's local_msgqueue and then delivered to their intended
395 * recipients. [ To allow for a multi-message copy, another pointer
396 * (next_msg_to_pull) has been added to keep track of the next message
397 * number needing to be copied (pulled). It chases after w_remote_GP.put.
398 * Any messages lying between w_local_GP.get and next_msg_to_pull have
399 * been copied and are ready to be delivered. ]
400 *
401 * E - Messages that have been copied and delivered, but have not yet been
402 * acknowledged by the recipient as having been received.
403 *
404 * F - Messages that have been acknowledged, but XPC has not yet notified the
405 * sender that the message was received by its intended recipient.
406 * This is also an area that needs to be prepared for the allocating of
407 * new messages, by the clearing of the message flags of the acknowledged
408 * messages.
409 */
410struct xpc_channel {
411 partid_t partid; /* ID of remote partition connected */
412 spinlock_t lock; /* lock for updating this structure */
413 u32 flags; /* general flags */
414
415 enum xpc_retval reason; /* reason why channel is disconnect'g */
416 int reason_line; /* line# disconnect initiated from */
417
418 u16 number; /* channel # */
419
420 u16 msg_size; /* sizeof each msg entry */
421 u16 local_nentries; /* #of msg entries in local msg queue */
422 u16 remote_nentries; /* #of msg entries in remote msg queue */
423
424 void *local_msgqueue_base; /* base address of kmalloc'd space */
425 struct xpc_msg *local_msgqueue; /* local message queue */
426 void *remote_msgqueue_base; /* base address of kmalloc'd space */
427 struct xpc_msg *remote_msgqueue; /* cached copy of remote partition's */
428 /* local message queue */
429 u64 remote_msgqueue_pa; /* phys addr of remote partition's */
430 /* local message queue */
431
432 atomic_t references; /* #of external references to queues */
433
434 atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */
435 wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
436
437 u8 delayed_IPI_flags; /* IPI flags received, but delayed */
438 /* action until channel disconnected */
439
440 /* queue of msg senders who want to be notified when msg received */
441
442 atomic_t n_to_notify; /* #of msg senders to notify */
443 struct xpc_notify *notify_queue; /* notify queue for messages sent */
444
445 xpc_channel_func func; /* user's channel function */
446 void *key; /* pointer to user's key */
447
448 struct mutex msg_to_pull_mutex; /* next msg to pull serialization */
449 struct completion wdisconnect_wait; /* wait for channel disconnect */
450
451 struct xpc_openclose_args *local_openclose_args; /* args passed on */
452 /* opening or closing of channel */
453
454 /* various flavors of local and remote Get/Put values */
455
456 struct xpc_gp *local_GP; /* local Get/Put values */
457 struct xpc_gp remote_GP; /* remote Get/Put values */
458 struct xpc_gp w_local_GP; /* working local Get/Put values */
459 struct xpc_gp w_remote_GP; /* working remote Get/Put values */
460 s64 next_msg_to_pull; /* Put value of next msg to pull */
461
462 /* kthread management related fields */
463
464 atomic_t kthreads_assigned; /* #of kthreads assigned to channel */
465 u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */
466 atomic_t kthreads_idle; /* #of kthreads idle waiting for work */
467 u32 kthreads_idle_limit; /* limit on #of kthreads idle */
468 atomic_t kthreads_active; /* #of kthreads actively working */
469
470 wait_queue_head_t idle_wq; /* idle kthread wait queue */
471
472} ____cacheline_aligned;
473
474/* struct xpc_channel flags */
475
476#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */
477
478#define XPC_C_ROPENREPLY 0x00000002 /* remote open channel reply */
479#define XPC_C_OPENREPLY 0x00000004 /* local open channel reply */
480#define XPC_C_ROPENREQUEST 0x00000008 /* remote open channel request */
481#define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */
482
483#define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */
484#define XPC_C_CONNECTEDCALLOUT 0x00000040 /* connected callout initiated */
485#define XPC_C_CONNECTEDCALLOUT_MADE \
486 0x00000080 /* connected callout completed */
487#define XPC_C_CONNECTED 0x00000100 /* local channel is connected */
488#define XPC_C_CONNECTING 0x00000200 /* channel is being connected */
489
490#define XPC_C_RCLOSEREPLY 0x00000400 /* remote close channel reply */
491#define XPC_C_CLOSEREPLY 0x00000800 /* local close channel reply */
492#define XPC_C_RCLOSEREQUEST 0x00001000 /* remote close channel request */
493#define XPC_C_CLOSEREQUEST 0x00002000 /* local close channel request */
494
495#define XPC_C_DISCONNECTED 0x00004000 /* channel is disconnected */
496#define XPC_C_DISCONNECTING 0x00008000 /* channel is being disconnected */
497#define XPC_C_DISCONNECTINGCALLOUT \
498 0x00010000 /* disconnecting callout initiated */
499#define XPC_C_DISCONNECTINGCALLOUT_MADE \
500 0x00020000 /* disconnecting callout completed */
501#define XPC_C_WDISCONNECT 0x00040000 /* waiting for channel disconnect */
502
503/*
504 * Manages channels on a partition basis. There is one of these structures
505 * for each partition (a partition will never utilize the structure that
506 * represents itself).
507 */
508struct xpc_partition {
509
510 /* XPC HB infrastructure */
511
512 u8 remote_rp_version; /* version# of partition's rsvd pg */
513 struct timespec remote_rp_stamp; /* time when rsvd pg was initialized */
514 u64 remote_rp_pa; /* phys addr of partition's rsvd pg */
515 u64 remote_vars_pa; /* phys addr of partition's vars */
516 u64 remote_vars_part_pa; /* phys addr of partition's vars part */
517 u64 last_heartbeat; /* HB at last read */
518 u64 remote_amos_page_pa; /* phys addr of partition's amos page */
519 int remote_act_nasid; /* active part's act/deact nasid */
520 int remote_act_phys_cpuid; /* active part's act/deact phys cpuid */
521 u32 act_IRQ_rcvd; /* IRQs since activation */
522 spinlock_t act_lock; /* protect updating of act_state */
523 u8 act_state; /* from XPC HB viewpoint */
524 u8 remote_vars_version; /* version# of partition's vars */
525 enum xpc_retval reason; /* reason partition is deactivating */
526 int reason_line; /* line# deactivation initiated from */
527 int reactivate_nasid; /* nasid in partition to reactivate */
528
529 unsigned long disengage_request_timeout; /* timeout in jiffies */
530 struct timer_list disengage_request_timer;
531
532 /* XPC infrastructure referencing and teardown control */
533
534 u8 setup_state; /* infrastructure setup state */
535 wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */
536 atomic_t references; /* #of references to infrastructure */
537
538 /*
539 * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN
540 * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION
541 * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE
542 * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.)
543 */
544
545 u8 nchannels; /* #of defined channels supported */
546 atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
547 atomic_t nchannels_engaged; /* #of channels engaged with remote part */
548 struct xpc_channel *channels; /* array of channel structures */
549
550 void *local_GPs_base; /* base address of kmalloc'd space */
551 struct xpc_gp *local_GPs; /* local Get/Put values */
552 void *remote_GPs_base; /* base address of kmalloc'd space */
553 struct xpc_gp *remote_GPs; /* copy of remote partition's local */
554 /* Get/Put values */
555 u64 remote_GPs_pa; /* phys address of remote partition's local */
556 /* Get/Put values */
557
558 /* fields used to pass args when opening or closing a channel */
559
560 void *local_openclose_args_base; /* base address of kmalloc'd space */
561 struct xpc_openclose_args *local_openclose_args; /* local's args */
562 void *remote_openclose_args_base; /* base address of kmalloc'd space */
563 struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
564 /* args */
565 u64 remote_openclose_args_pa; /* phys addr of remote's args */
566
567 /* IPI sending, receiving and handling related fields */
568
569 int remote_IPI_nasid; /* nasid of where to send IPIs */
570 int remote_IPI_phys_cpuid; /* phys CPU ID of where to send IPIs */
571 AMO_t *remote_IPI_amo_va; /* address of remote IPI AMO_t structure */
572
573 AMO_t *local_IPI_amo_va; /* address of IPI AMO_t structure */
574 u64 local_IPI_amo; /* IPI amo flags yet to be handled */
575 char IPI_owner[8]; /* IPI owner's name */
576 struct timer_list dropped_IPI_timer; /* dropped IPI timer */
577
578 spinlock_t IPI_lock; /* IPI handler lock */
579
580 /* channel manager related fields */
581
582 atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */
583 wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */
584
585} ____cacheline_aligned;
586
587/* struct xpc_partition act_state values (for XPC HB) */
588
589#define XPC_P_INACTIVE 0x00 /* partition is not active */
590#define XPC_P_ACTIVATION_REQ 0x01 /* created thread to activate */
591#define XPC_P_ACTIVATING 0x02 /* activation thread started */
592#define XPC_P_ACTIVE 0x03 /* xpc_partition_up() was called */
593#define XPC_P_DEACTIVATING 0x04 /* partition deactivation initiated */
594
595#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
596 xpc_deactivate_partition(__LINE__, (_p), (_reason))
597
598/* struct xpc_partition setup_state values */
599
600#define XPC_P_UNSET 0x00 /* infrastructure was never setup */
601#define XPC_P_SETUP 0x01 /* infrastructure is setup */
602#define XPC_P_WTEARDOWN 0x02 /* waiting to teardown infrastructure */
603#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */
604
605/*
606 * struct xpc_partition IPI_timer #of seconds to wait before checking for
607 * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
608 * after the IPI was received.
609 */
610#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ)
611
612/* number of seconds to wait for other partitions to disengage */
613#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90
614
615/* interval in seconds to print 'waiting disengagement' messages */
616#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10
617
618#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0]))
619
620/* found in xp_main.c */
621extern struct xpc_registration xpc_registrations[];
622
623/* found in xpc_main.c */
624extern struct device *xpc_part;
625extern struct device *xpc_chan;
626extern int xpc_disengage_request_timelimit;
627extern int xpc_disengage_request_timedout;
628extern irqreturn_t xpc_notify_IRQ_handler(int, void *);
629extern void xpc_dropped_IPI_check(struct xpc_partition *);
630extern void xpc_activate_partition(struct xpc_partition *);
631extern void xpc_activate_kthreads(struct xpc_channel *, int);
632extern void xpc_create_kthreads(struct xpc_channel *, int, int);
633extern void xpc_disconnect_wait(int);
634
635/* found in xpc_partition.c */
636extern int xpc_exiting;
637extern struct xpc_vars *xpc_vars;
638extern struct xpc_rsvd_page *xpc_rsvd_page;
639extern struct xpc_vars_part *xpc_vars_part;
640extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
641extern char *xpc_remote_copy_buffer;
642extern void *xpc_remote_copy_buffer_base;
643extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
644extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
645extern void xpc_allow_IPI_ops(void);
646extern void xpc_restrict_IPI_ops(void);
647extern int xpc_identify_act_IRQ_sender(void);
648extern int xpc_partition_disengaged(struct xpc_partition *);
649extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *);
650extern void xpc_mark_partition_inactive(struct xpc_partition *);
651extern void xpc_discovery(void);
652extern void xpc_check_remote_hb(void);
653extern void xpc_deactivate_partition(const int, struct xpc_partition *,
654 enum xpc_retval);
655extern enum xpc_retval xpc_initiate_partid_to_nasids(partid_t, void *);
656
657/* found in xpc_channel.c */
658extern void xpc_initiate_connect(int);
659extern void xpc_initiate_disconnect(int);
660extern enum xpc_retval xpc_initiate_allocate(partid_t, int, u32, void **);
661extern enum xpc_retval xpc_initiate_send(partid_t, int, void *);
662extern enum xpc_retval xpc_initiate_send_notify(partid_t, int, void *,
663 xpc_notify_func, void *);
664extern void xpc_initiate_received(partid_t, int, void *);
665extern enum xpc_retval xpc_setup_infrastructure(struct xpc_partition *);
666extern enum xpc_retval xpc_pull_remote_vars_part(struct xpc_partition *);
667extern void xpc_process_channel_activity(struct xpc_partition *);
668extern void xpc_connected_callout(struct xpc_channel *);
669extern void xpc_deliver_msg(struct xpc_channel *);
670extern void xpc_disconnect_channel(const int, struct xpc_channel *,
671 enum xpc_retval, unsigned long *);
672extern void xpc_disconnect_callout(struct xpc_channel *, enum xpc_retval);
673extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval);
674extern void xpc_teardown_infrastructure(struct xpc_partition *);
675
676static inline void
677xpc_wakeup_channel_mgr(struct xpc_partition *part)
678{
679 if (atomic_inc_return(&part->channel_mgr_requests) == 1)
680 wake_up(&part->channel_mgr_wq);
681}
682
683/*
684 * These next two inlines are used to keep us from tearing down a channel's
685 * msg queues while a thread may be referencing them.
686 */
687static inline void
688xpc_msgqueue_ref(struct xpc_channel *ch)
689{
690 atomic_inc(&ch->references);
691}
692
693static inline void
694xpc_msgqueue_deref(struct xpc_channel *ch)
695{
696 s32 refs = atomic_dec_return(&ch->references);
697
698 DBUG_ON(refs < 0);
699 if (refs == 0)
700 xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]);
701}
702
703#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \
704 xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs)
705
706/*
707 * These two inlines are used to keep us from tearing down a partition's
708 * setup infrastructure while a thread may be referencing it.
709 */
710static inline void
711xpc_part_deref(struct xpc_partition *part)
712{
713 s32 refs = atomic_dec_return(&part->references);
714
715 DBUG_ON(refs < 0);
716 if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN)
717 wake_up(&part->teardown_wq);
718}
719
720static inline int
721xpc_part_ref(struct xpc_partition *part)
722{
723 int setup;
724
725 atomic_inc(&part->references);
726 setup = (part->setup_state == XPC_P_SETUP);
727 if (!setup)
728 xpc_part_deref(part);
729
730 return setup;
731}
732
733/*
734 * The following macro is to be used for the setting of the reason and
735 * reason_line fields in both the struct xpc_channel and struct xpc_partition
736 * structures.
737 */
738#define XPC_SET_REASON(_p, _reason, _line) \
739 { \
740 (_p)->reason = _reason; \
741 (_p)->reason_line = _line; \
742 }
743
744/*
745 * This next set of inlines are used to keep track of when a partition is
746 * potentially engaged in accessing memory belonging to another partition.
747 */
748
749static inline void
750xpc_mark_partition_engaged(struct xpc_partition *part)
751{
752 unsigned long irq_flags;
753 AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
754 (XPC_ENGAGED_PARTITIONS_AMO *
755 sizeof(AMO_t)));
756
757 local_irq_save(irq_flags);
758
759 /* set bit corresponding to our partid in remote partition's AMO */
760 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
761 (1UL << sn_partition_id));
762 /*
763 * We must always use the nofault function regardless of whether we
764 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
765 * didn't, we'd never know that the other partition is down and would
766 * keep sending IPIs and AMOs to it until the heartbeat times out.
767 */
768 (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
769 variable),
770 xp_nofault_PIOR_target));
771
772 local_irq_restore(irq_flags);
773}
774
775static inline void
776xpc_mark_partition_disengaged(struct xpc_partition *part)
777{
778 unsigned long irq_flags;
779 AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
780 (XPC_ENGAGED_PARTITIONS_AMO *
781 sizeof(AMO_t)));
782
783 local_irq_save(irq_flags);
784
785 /* clear bit corresponding to our partid in remote partition's AMO */
786 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
787 ~(1UL << sn_partition_id));
788 /*
789 * We must always use the nofault function regardless of whether we
790 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
791 * didn't, we'd never know that the other partition is down and would
792 * keep sending IPIs and AMOs to it until the heartbeat times out.
793 */
794 (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
795 variable),
796 xp_nofault_PIOR_target));
797
798 local_irq_restore(irq_flags);
799}
800
801static inline void
802xpc_request_partition_disengage(struct xpc_partition *part)
803{
804 unsigned long irq_flags;
805 AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
806 (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
807
808 local_irq_save(irq_flags);
809
810 /* set bit corresponding to our partid in remote partition's AMO */
811 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
812 (1UL << sn_partition_id));
813 /*
814 * We must always use the nofault function regardless of whether we
815 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
816 * didn't, we'd never know that the other partition is down and would
817 * keep sending IPIs and AMOs to it until the heartbeat times out.
818 */
819 (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
820 variable),
821 xp_nofault_PIOR_target));
822
823 local_irq_restore(irq_flags);
824}
825
826static inline void
827xpc_cancel_partition_disengage_request(struct xpc_partition *part)
828{
829 unsigned long irq_flags;
830 AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
831 (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
832
833 local_irq_save(irq_flags);
834
835 /* clear bit corresponding to our partid in remote partition's AMO */
836 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
837 ~(1UL << sn_partition_id));
838 /*
839 * We must always use the nofault function regardless of whether we
840 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
841 * didn't, we'd never know that the other partition is down and would
842 * keep sending IPIs and AMOs to it until the heartbeat times out.
843 */
844 (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
845 variable),
846 xp_nofault_PIOR_target));
847
848 local_irq_restore(irq_flags);
849}
850
851static inline u64
852xpc_partition_engaged(u64 partid_mask)
853{
854 AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
855
856 /* return our partition's AMO variable ANDed with partid_mask */
857 return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
858 partid_mask);
859}
860
861static inline u64
862xpc_partition_disengage_requested(u64 partid_mask)
863{
864 AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
865
866 /* return our partition's AMO variable ANDed with partid_mask */
867 return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
868 partid_mask);
869}
870
871static inline void
872xpc_clear_partition_engaged(u64 partid_mask)
873{
874 AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
875
876 /* clear bit(s) based on partid_mask in our partition's AMO */
877 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
878 ~partid_mask);
879}
880
881static inline void
882xpc_clear_partition_disengage_request(u64 partid_mask)
883{
884 AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
885
886 /* clear bit(s) based on partid_mask in our partition's AMO */
887 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
888 ~partid_mask);
889}
890
891/*
892 * The following set of macros and inlines are used for the sending and
893 * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
894 * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
895 * the other that is associated with channel activity (SGI_XPC_NOTIFY).
896 */
897
898static inline u64
899xpc_IPI_receive(AMO_t *amo)
900{
901 return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
902}
903
904static inline enum xpc_retval
905xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
906{
907 int ret = 0;
908 unsigned long irq_flags;
909
910 local_irq_save(irq_flags);
911
912 FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag);
913 sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
914
915 /*
916 * We must always use the nofault function regardless of whether we
917 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
918 * didn't, we'd never know that the other partition is down and would
919 * keep sending IPIs and AMOs to it until the heartbeat times out.
920 */
921 ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
922 xp_nofault_PIOR_target));
923
924 local_irq_restore(irq_flags);
925
926 return ((ret == 0) ? xpcSuccess : xpcPioReadError);
927}
928
929/*
930 * IPIs associated with SGI_XPC_ACTIVATE IRQ.
931 */
932
933/*
934 * Flag the appropriate AMO variable and send an IPI to the specified node.
935 */
936static inline void
937xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
938 int to_phys_cpuid)
939{
940 int w_index = XPC_NASID_W_INDEX(from_nasid);
941 int b_index = XPC_NASID_B_INDEX(from_nasid);
942 AMO_t *amos = (AMO_t *)__va(amos_page_pa +
943 (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
944
945 (void)xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
946 to_phys_cpuid, SGI_XPC_ACTIVATE);
947}
948
949static inline void
950xpc_IPI_send_activate(struct xpc_vars *vars)
951{
952 xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0),
953 vars->act_nasid, vars->act_phys_cpuid);
954}
955
956static inline void
957xpc_IPI_send_activated(struct xpc_partition *part)
958{
959 xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
960 part->remote_act_nasid,
961 part->remote_act_phys_cpuid);
962}
963
964static inline void
965xpc_IPI_send_reactivate(struct xpc_partition *part)
966{
967 xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid,
968 xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
969}
970
971static inline void
972xpc_IPI_send_disengage(struct xpc_partition *part)
973{
974 xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
975 part->remote_act_nasid,
976 part->remote_act_phys_cpuid);
977}
978
979/*
980 * IPIs associated with SGI_XPC_NOTIFY IRQ.
981 */
982
983/*
984 * Send an IPI to the remote partition that is associated with the
985 * specified channel.
986 */
987#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \
988 xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f)
989
990static inline void
991xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string,
992 unsigned long *irq_flags)
993{
994 struct xpc_partition *part = &xpc_partitions[ch->partid];
995 enum xpc_retval ret;
996
997 if (likely(part->act_state != XPC_P_DEACTIVATING)) {
998 ret = xpc_IPI_send(part->remote_IPI_amo_va,
999 (u64)ipi_flag << (ch->number * 8),
1000 part->remote_IPI_nasid,
1001 part->remote_IPI_phys_cpuid, SGI_XPC_NOTIFY);
1002 dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
1003 ipi_flag_string, ch->partid, ch->number, ret);
1004 if (unlikely(ret != xpcSuccess)) {
1005 if (irq_flags != NULL)
1006 spin_unlock_irqrestore(&ch->lock, *irq_flags);
1007 XPC_DEACTIVATE_PARTITION(part, ret);
1008 if (irq_flags != NULL)
1009 spin_lock_irqsave(&ch->lock, *irq_flags);
1010 }
1011 }
1012}
1013
1014/*
1015 * Make it look like the remote partition, which is associated with the
1016 * specified channel, sent us an IPI. This faked IPI will be handled
1017 * by xpc_dropped_IPI_check().
1018 */
1019#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \
1020 xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f)
1021
1022static inline void
1023xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
1024 char *ipi_flag_string)
1025{
1026 struct xpc_partition *part = &xpc_partitions[ch->partid];
1027
1028 FETCHOP_STORE_OP(TO_AMO((u64)&part->local_IPI_amo_va->variable),
1029 FETCHOP_OR, ((u64)ipi_flag << (ch->number * 8)));
1030 dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
1031 ipi_flag_string, ch->partid, ch->number);
1032}
1033
1034/*
1035 * The sending and receiving of IPIs includes the setting of an AMO variable
1036 * to indicate the reason the IPI was sent. The 64-bit variable is divided
1037 * up into eight bytes, ordered from right to left. Byte zero pertains to
1038 * channel 0, byte one to channel 1, and so on. Each byte is described by
1039 * the following IPI flags.
1040 */
1041
1042#define XPC_IPI_CLOSEREQUEST 0x01
1043#define XPC_IPI_CLOSEREPLY 0x02
1044#define XPC_IPI_OPENREQUEST 0x04
1045#define XPC_IPI_OPENREPLY 0x08
1046#define XPC_IPI_MSGREQUEST 0x10
1047
1048/* given an AMO variable and a channel#, get its associated IPI flags */
1049#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff))
1050#define XPC_SET_IPI_FLAGS(_amo, _c, _f) (_amo) |= ((u64) (_f) << ((_c) * 8))
1051
1052#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0fUL)
1053#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010UL)
1054
1055static inline void
1056xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags)
1057{
1058 struct xpc_openclose_args *args = ch->local_openclose_args;
1059
1060 args->reason = ch->reason;
1061
1062 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags);
1063}
1064
1065static inline void
1066xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags)
1067{
1068 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags);
1069}
1070
1071static inline void
1072xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags)
1073{
1074 struct xpc_openclose_args *args = ch->local_openclose_args;
1075
1076 args->msg_size = ch->msg_size;
1077 args->local_nentries = ch->local_nentries;
1078
1079 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags);
1080}
1081
1082static inline void
1083xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags)
1084{
1085 struct xpc_openclose_args *args = ch->local_openclose_args;
1086
1087 args->remote_nentries = ch->remote_nentries;
1088 args->local_nentries = ch->local_nentries;
1089 args->local_msgqueue_pa = __pa(ch->local_msgqueue);
1090
1091 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags);
1092}
1093
1094static inline void
1095xpc_IPI_send_msgrequest(struct xpc_channel *ch)
1096{
1097 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL);
1098}
1099
1100static inline void
1101xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
1102{
1103 XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST);
1104}
1105
1106/*
1107 * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
1108 * pages are located in the lowest granule. The lowest granule uses 4k pages
1109 * for cached references and an alternate TLB handler to never provide a
1110 * cacheable mapping for the entire region. This will prevent speculative
1111 * reading of cached copies of our lines from being issued which will cause
1112 * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
1113 * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an
1114 * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition
1115 * activation and 2 AMO variables for partition deactivation.
1116 */
1117static inline AMO_t *
1118xpc_IPI_init(int index)
1119{
1120 AMO_t *amo = xpc_vars->amos_page + index;
1121
1122 (void)xpc_IPI_receive(amo); /* clear AMO variable */
1123 return amo;
1124}
1125
1126static inline enum xpc_retval
1127xpc_map_bte_errors(bte_result_t error)
1128{
1129 if (error == BTE_SUCCESS)
1130 return xpcSuccess;
1131
1132 if (is_shub2()) {
1133 if (BTE_VALID_SH2_ERROR(error))
1134 return xpcBteSh2Start + error;
1135 return xpcBteUnmappedError;
1136 }
1137 switch (error) {
1138 case BTE_SUCCESS:
1139 return xpcSuccess;
1140 case BTEFAIL_DIR:
1141 return xpcBteDirectoryError;
1142 case BTEFAIL_POISON:
1143 return xpcBtePoisonError;
1144 case BTEFAIL_WERR:
1145 return xpcBteWriteError;
1146 case BTEFAIL_ACCESS:
1147 return xpcBteAccessError;
1148 case BTEFAIL_PWERR:
1149 return xpcBtePWriteError;
1150 case BTEFAIL_PRERR:
1151 return xpcBtePReadError;
1152 case BTEFAIL_TOUT:
1153 return xpcBteTimeOutError;
1154 case BTEFAIL_XTERR:
1155 return xpcBteXtalkError;
1156 case BTEFAIL_NOTAVAIL:
1157 return xpcBteNotAvailable;
1158 default:
1159 return xpcBteUnmappedError;
1160 }
1161}
1162
1163/*
1164 * Check to see if there is any channel activity to/from the specified
1165 * partition.
1166 */
1167static inline void
1168xpc_check_for_channel_activity(struct xpc_partition *part)
1169{
1170 u64 IPI_amo;
1171 unsigned long irq_flags;
1172
1173 IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va);
1174 if (IPI_amo == 0)
1175 return;
1176
1177 spin_lock_irqsave(&part->IPI_lock, irq_flags);
1178 part->local_IPI_amo |= IPI_amo;
1179 spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
1180
1181 dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
1182 XPC_PARTID(part), IPI_amo);
1183
1184 xpc_wakeup_channel_mgr(part);
1185}
1186
1187#endif /* _DRIVERS_MISC_SGIXP_XPC_H */
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
new file mode 100644
index 000000000000..bfcb9ea968e9
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -0,0 +1,2243 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9/*
10 * Cross Partition Communication (XPC) channel support.
11 *
12 * This is the part of XPC that manages the channels and
13 * sends/receives messages across them to/from other partitions.
14 *
15 */
16
17#include <linux/kernel.h>
18#include <linux/init.h>
19#include <linux/sched.h>
20#include <linux/cache.h>
21#include <linux/interrupt.h>
22#include <linux/mutex.h>
23#include <linux/completion.h>
24#include <asm/sn/bte.h>
25#include <asm/sn/sn_sal.h>
26#include "xpc.h"
27
28/*
29 * Guarantee that the kzalloc'd memory is cacheline aligned.
30 */
31static void *
32xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
33{
34 /* see if kzalloc will give us cachline aligned memory by default */
35 *base = kzalloc(size, flags);
36 if (*base == NULL)
37 return NULL;
38
39 if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
40 return *base;
41
42 kfree(*base);
43
44 /* nope, we'll have to do it ourselves */
45 *base = kzalloc(size + L1_CACHE_BYTES, flags);
46 if (*base == NULL)
47 return NULL;
48
49 return (void *)L1_CACHE_ALIGN((u64)*base);
50}
51
52/*
53 * Set up the initial values for the XPartition Communication channels.
54 */
55static void
56xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
57{
58 int ch_number;
59 struct xpc_channel *ch;
60
61 for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
62 ch = &part->channels[ch_number];
63
64 ch->partid = partid;
65 ch->number = ch_number;
66 ch->flags = XPC_C_DISCONNECTED;
67
68 ch->local_GP = &part->local_GPs[ch_number];
69 ch->local_openclose_args =
70 &part->local_openclose_args[ch_number];
71
72 atomic_set(&ch->kthreads_assigned, 0);
73 atomic_set(&ch->kthreads_idle, 0);
74 atomic_set(&ch->kthreads_active, 0);
75
76 atomic_set(&ch->references, 0);
77 atomic_set(&ch->n_to_notify, 0);
78
79 spin_lock_init(&ch->lock);
80 mutex_init(&ch->msg_to_pull_mutex);
81 init_completion(&ch->wdisconnect_wait);
82
83 atomic_set(&ch->n_on_msg_allocate_wq, 0);
84 init_waitqueue_head(&ch->msg_allocate_wq);
85 init_waitqueue_head(&ch->idle_wq);
86 }
87}
88
89/*
90 * Setup the infrastructure necessary to support XPartition Communication
91 * between the specified remote partition and the local one.
92 */
93enum xpc_retval
94xpc_setup_infrastructure(struct xpc_partition *part)
95{
96 int ret, cpuid;
97 struct timer_list *timer;
98 partid_t partid = XPC_PARTID(part);
99
100 /*
101 * Zero out MOST of the entry for this partition. Only the fields
102 * starting with `nchannels' will be zeroed. The preceding fields must
103 * remain `viable' across partition ups and downs, since they may be
104 * referenced during this memset() operation.
105 */
106 memset(&part->nchannels, 0, sizeof(struct xpc_partition) -
107 offsetof(struct xpc_partition, nchannels));
108
109 /*
110 * Allocate all of the channel structures as a contiguous chunk of
111 * memory.
112 */
113 part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
114 GFP_KERNEL);
115 if (part->channels == NULL) {
116 dev_err(xpc_chan, "can't get memory for channels\n");
117 return xpcNoMemory;
118 }
119
120 part->nchannels = XPC_NCHANNELS;
121
122 /* allocate all the required GET/PUT values */
123
124 part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
125 GFP_KERNEL,
126 &part->local_GPs_base);
127 if (part->local_GPs == NULL) {
128 kfree(part->channels);
129 part->channels = NULL;
130 dev_err(xpc_chan, "can't get memory for local get/put "
131 "values\n");
132 return xpcNoMemory;
133 }
134
135 part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
136 GFP_KERNEL,
137 &part->
138 remote_GPs_base);
139 if (part->remote_GPs == NULL) {
140 dev_err(xpc_chan, "can't get memory for remote get/put "
141 "values\n");
142 kfree(part->local_GPs_base);
143 part->local_GPs = NULL;
144 kfree(part->channels);
145 part->channels = NULL;
146 return xpcNoMemory;
147 }
148
149 /* allocate all the required open and close args */
150
151 part->local_openclose_args =
152 xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
153 &part->local_openclose_args_base);
154 if (part->local_openclose_args == NULL) {
155 dev_err(xpc_chan, "can't get memory for local connect args\n");
156 kfree(part->remote_GPs_base);
157 part->remote_GPs = NULL;
158 kfree(part->local_GPs_base);
159 part->local_GPs = NULL;
160 kfree(part->channels);
161 part->channels = NULL;
162 return xpcNoMemory;
163 }
164
165 part->remote_openclose_args =
166 xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
167 &part->remote_openclose_args_base);
168 if (part->remote_openclose_args == NULL) {
169 dev_err(xpc_chan, "can't get memory for remote connect args\n");
170 kfree(part->local_openclose_args_base);
171 part->local_openclose_args = NULL;
172 kfree(part->remote_GPs_base);
173 part->remote_GPs = NULL;
174 kfree(part->local_GPs_base);
175 part->local_GPs = NULL;
176 kfree(part->channels);
177 part->channels = NULL;
178 return xpcNoMemory;
179 }
180
181 xpc_initialize_channels(part, partid);
182
183 atomic_set(&part->nchannels_active, 0);
184 atomic_set(&part->nchannels_engaged, 0);
185
186 /* local_IPI_amo were set to 0 by an earlier memset() */
187
188 /* Initialize this partitions AMO_t structure */
189 part->local_IPI_amo_va = xpc_IPI_init(partid);
190
191 spin_lock_init(&part->IPI_lock);
192
193 atomic_set(&part->channel_mgr_requests, 1);
194 init_waitqueue_head(&part->channel_mgr_wq);
195
196 sprintf(part->IPI_owner, "xpc%02d", partid);
197 ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, IRQF_SHARED,
198 part->IPI_owner, (void *)(u64)partid);
199 if (ret != 0) {
200 dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
201 "errno=%d\n", -ret);
202 kfree(part->remote_openclose_args_base);
203 part->remote_openclose_args = NULL;
204 kfree(part->local_openclose_args_base);
205 part->local_openclose_args = NULL;
206 kfree(part->remote_GPs_base);
207 part->remote_GPs = NULL;
208 kfree(part->local_GPs_base);
209 part->local_GPs = NULL;
210 kfree(part->channels);
211 part->channels = NULL;
212 return xpcLackOfResources;
213 }
214
215 /* Setup a timer to check for dropped IPIs */
216 timer = &part->dropped_IPI_timer;
217 init_timer(timer);
218 timer->function = (void (*)(unsigned long))xpc_dropped_IPI_check;
219 timer->data = (unsigned long)part;
220 timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT;
221 add_timer(timer);
222
223 /*
224 * With the setting of the partition setup_state to XPC_P_SETUP, we're
225 * declaring that this partition is ready to go.
226 */
227 part->setup_state = XPC_P_SETUP;
228
229 /*
230 * Setup the per partition specific variables required by the
231 * remote partition to establish channel connections with us.
232 *
233 * The setting of the magic # indicates that these per partition
234 * specific variables are ready to be used.
235 */
236 xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs);
237 xpc_vars_part[partid].openclose_args_pa =
238 __pa(part->local_openclose_args);
239 xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
240 cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */
241 xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
242 xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
243 xpc_vars_part[partid].nchannels = part->nchannels;
244 xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
245
246 return xpcSuccess;
247}
248
249/*
250 * Create a wrapper that hides the underlying mechanism for pulling a cacheline
251 * (or multiple cachelines) from a remote partition.
252 *
253 * src must be a cacheline aligned physical address on the remote partition.
254 * dst must be a cacheline aligned virtual address on this partition.
255 * cnt must be an cacheline sized
256 */
257static enum xpc_retval
258xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst,
259 const void *src, size_t cnt)
260{
261 bte_result_t bte_ret;
262
263 DBUG_ON((u64)src != L1_CACHE_ALIGN((u64)src));
264 DBUG_ON((u64)dst != L1_CACHE_ALIGN((u64)dst));
265 DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
266
267 if (part->act_state == XPC_P_DEACTIVATING)
268 return part->reason;
269
270 bte_ret = xp_bte_copy((u64)src, (u64)dst, (u64)cnt,
271 (BTE_NORMAL | BTE_WACQUIRE), NULL);
272 if (bte_ret == BTE_SUCCESS)
273 return xpcSuccess;
274
275 dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n",
276 XPC_PARTID(part), bte_ret);
277
278 return xpc_map_bte_errors(bte_ret);
279}
280
281/*
282 * Pull the remote per partition specific variables from the specified
283 * partition.
284 */
285enum xpc_retval
286xpc_pull_remote_vars_part(struct xpc_partition *part)
287{
288 u8 buffer[L1_CACHE_BYTES * 2];
289 struct xpc_vars_part *pulled_entry_cacheline =
290 (struct xpc_vars_part *)L1_CACHE_ALIGN((u64)buffer);
291 struct xpc_vars_part *pulled_entry;
292 u64 remote_entry_cacheline_pa, remote_entry_pa;
293 partid_t partid = XPC_PARTID(part);
294 enum xpc_retval ret;
295
296 /* pull the cacheline that contains the variables we're interested in */
297
298 DBUG_ON(part->remote_vars_part_pa !=
299 L1_CACHE_ALIGN(part->remote_vars_part_pa));
300 DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2);
301
302 remote_entry_pa = part->remote_vars_part_pa +
303 sn_partition_id * sizeof(struct xpc_vars_part);
304
305 remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
306
307 pulled_entry = (struct xpc_vars_part *)((u64)pulled_entry_cacheline +
308 (remote_entry_pa &
309 (L1_CACHE_BYTES - 1)));
310
311 ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline,
312 (void *)remote_entry_cacheline_pa,
313 L1_CACHE_BYTES);
314 if (ret != xpcSuccess) {
315 dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
316 "partition %d, ret=%d\n", partid, ret);
317 return ret;
318 }
319
320 /* see if they've been set up yet */
321
322 if (pulled_entry->magic != XPC_VP_MAGIC1 &&
323 pulled_entry->magic != XPC_VP_MAGIC2) {
324
325 if (pulled_entry->magic != 0) {
326 dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
327 "partition %d has bad magic value (=0x%lx)\n",
328 partid, sn_partition_id, pulled_entry->magic);
329 return xpcBadMagic;
330 }
331
332 /* they've not been initialized yet */
333 return xpcRetry;
334 }
335
336 if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) {
337
338 /* validate the variables */
339
340 if (pulled_entry->GPs_pa == 0 ||
341 pulled_entry->openclose_args_pa == 0 ||
342 pulled_entry->IPI_amo_pa == 0) {
343
344 dev_err(xpc_chan, "partition %d's XPC vars_part for "
345 "partition %d are not valid\n", partid,
346 sn_partition_id);
347 return xpcInvalidAddress;
348 }
349
350 /* the variables we imported look to be valid */
351
352 part->remote_GPs_pa = pulled_entry->GPs_pa;
353 part->remote_openclose_args_pa =
354 pulled_entry->openclose_args_pa;
355 part->remote_IPI_amo_va =
356 (AMO_t *)__va(pulled_entry->IPI_amo_pa);
357 part->remote_IPI_nasid = pulled_entry->IPI_nasid;
358 part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid;
359
360 if (part->nchannels > pulled_entry->nchannels)
361 part->nchannels = pulled_entry->nchannels;
362
363 /* let the other side know that we've pulled their variables */
364
365 xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
366 }
367
368 if (pulled_entry->magic == XPC_VP_MAGIC1)
369 return xpcRetry;
370
371 return xpcSuccess;
372}
373
374/*
375 * Get the IPI flags and pull the openclose args and/or remote GPs as needed.
376 */
377static u64
378xpc_get_IPI_flags(struct xpc_partition *part)
379{
380 unsigned long irq_flags;
381 u64 IPI_amo;
382 enum xpc_retval ret;
383
384 /*
385 * See if there are any IPI flags to be handled.
386 */
387
388 spin_lock_irqsave(&part->IPI_lock, irq_flags);
389 IPI_amo = part->local_IPI_amo;
390 if (IPI_amo != 0)
391 part->local_IPI_amo = 0;
392
393 spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
394
395 if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) {
396 ret = xpc_pull_remote_cachelines(part,
397 part->remote_openclose_args,
398 (void *)part->
399 remote_openclose_args_pa,
400 XPC_OPENCLOSE_ARGS_SIZE);
401 if (ret != xpcSuccess) {
402 XPC_DEACTIVATE_PARTITION(part, ret);
403
404 dev_dbg(xpc_chan, "failed to pull openclose args from "
405 "partition %d, ret=%d\n", XPC_PARTID(part),
406 ret);
407
408 /* don't bother processing IPIs anymore */
409 IPI_amo = 0;
410 }
411 }
412
413 if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) {
414 ret = xpc_pull_remote_cachelines(part, part->remote_GPs,
415 (void *)part->remote_GPs_pa,
416 XPC_GP_SIZE);
417 if (ret != xpcSuccess) {
418 XPC_DEACTIVATE_PARTITION(part, ret);
419
420 dev_dbg(xpc_chan, "failed to pull GPs from partition "
421 "%d, ret=%d\n", XPC_PARTID(part), ret);
422
423 /* don't bother processing IPIs anymore */
424 IPI_amo = 0;
425 }
426 }
427
428 return IPI_amo;
429}
430
431/*
432 * Allocate the local message queue and the notify queue.
433 */
434static enum xpc_retval
435xpc_allocate_local_msgqueue(struct xpc_channel *ch)
436{
437 unsigned long irq_flags;
438 int nentries;
439 size_t nbytes;
440
441 for (nentries = ch->local_nentries; nentries > 0; nentries--) {
442
443 nbytes = nentries * ch->msg_size;
444 ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
445 GFP_KERNEL,
446 &ch->local_msgqueue_base);
447 if (ch->local_msgqueue == NULL)
448 continue;
449
450 nbytes = nentries * sizeof(struct xpc_notify);
451 ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
452 if (ch->notify_queue == NULL) {
453 kfree(ch->local_msgqueue_base);
454 ch->local_msgqueue = NULL;
455 continue;
456 }
457
458 spin_lock_irqsave(&ch->lock, irq_flags);
459 if (nentries < ch->local_nentries) {
460 dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
461 "partid=%d, channel=%d\n", nentries,
462 ch->local_nentries, ch->partid, ch->number);
463
464 ch->local_nentries = nentries;
465 }
466 spin_unlock_irqrestore(&ch->lock, irq_flags);
467 return xpcSuccess;
468 }
469
470 dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
471 "queue, partid=%d, channel=%d\n", ch->partid, ch->number);
472 return xpcNoMemory;
473}
474
475/*
476 * Allocate the cached remote message queue.
477 */
478static enum xpc_retval
479xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
480{
481 unsigned long irq_flags;
482 int nentries;
483 size_t nbytes;
484
485 DBUG_ON(ch->remote_nentries <= 0);
486
487 for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
488
489 nbytes = nentries * ch->msg_size;
490 ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
491 GFP_KERNEL,
492 &ch->remote_msgqueue_base);
493 if (ch->remote_msgqueue == NULL)
494 continue;
495
496 spin_lock_irqsave(&ch->lock, irq_flags);
497 if (nentries < ch->remote_nentries) {
498 dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
499 "partid=%d, channel=%d\n", nentries,
500 ch->remote_nentries, ch->partid, ch->number);
501
502 ch->remote_nentries = nentries;
503 }
504 spin_unlock_irqrestore(&ch->lock, irq_flags);
505 return xpcSuccess;
506 }
507
508 dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
509 "partid=%d, channel=%d\n", ch->partid, ch->number);
510 return xpcNoMemory;
511}
512
513/*
514 * Allocate message queues and other stuff associated with a channel.
515 *
516 * Note: Assumes all of the channel sizes are filled in.
517 */
518static enum xpc_retval
519xpc_allocate_msgqueues(struct xpc_channel *ch)
520{
521 unsigned long irq_flags;
522 enum xpc_retval ret;
523
524 DBUG_ON(ch->flags & XPC_C_SETUP);
525
526 ret = xpc_allocate_local_msgqueue(ch);
527 if (ret != xpcSuccess)
528 return ret;
529
530 ret = xpc_allocate_remote_msgqueue(ch);
531 if (ret != xpcSuccess) {
532 kfree(ch->local_msgqueue_base);
533 ch->local_msgqueue = NULL;
534 kfree(ch->notify_queue);
535 ch->notify_queue = NULL;
536 return ret;
537 }
538
539 spin_lock_irqsave(&ch->lock, irq_flags);
540 ch->flags |= XPC_C_SETUP;
541 spin_unlock_irqrestore(&ch->lock, irq_flags);
542
543 return xpcSuccess;
544}
545
546/*
547 * Process a connect message from a remote partition.
548 *
549 * Note: xpc_process_connect() is expecting to be called with the
550 * spin_lock_irqsave held and will leave it locked upon return.
551 */
552static void
553xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
554{
555 enum xpc_retval ret;
556
557 DBUG_ON(!spin_is_locked(&ch->lock));
558
559 if (!(ch->flags & XPC_C_OPENREQUEST) ||
560 !(ch->flags & XPC_C_ROPENREQUEST)) {
561 /* nothing more to do for now */
562 return;
563 }
564 DBUG_ON(!(ch->flags & XPC_C_CONNECTING));
565
566 if (!(ch->flags & XPC_C_SETUP)) {
567 spin_unlock_irqrestore(&ch->lock, *irq_flags);
568 ret = xpc_allocate_msgqueues(ch);
569 spin_lock_irqsave(&ch->lock, *irq_flags);
570
571 if (ret != xpcSuccess)
572 XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
573
574 if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING))
575 return;
576
577 DBUG_ON(!(ch->flags & XPC_C_SETUP));
578 DBUG_ON(ch->local_msgqueue == NULL);
579 DBUG_ON(ch->remote_msgqueue == NULL);
580 }
581
582 if (!(ch->flags & XPC_C_OPENREPLY)) {
583 ch->flags |= XPC_C_OPENREPLY;
584 xpc_IPI_send_openreply(ch, irq_flags);
585 }
586
587 if (!(ch->flags & XPC_C_ROPENREPLY))
588 return;
589
590 DBUG_ON(ch->remote_msgqueue_pa == 0);
591
592 ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */
593
594 dev_info(xpc_chan, "channel %d to partition %d connected\n",
595 ch->number, ch->partid);
596
597 spin_unlock_irqrestore(&ch->lock, *irq_flags);
598 xpc_create_kthreads(ch, 1, 0);
599 spin_lock_irqsave(&ch->lock, *irq_flags);
600}
601
602/*
603 * Notify those who wanted to be notified upon delivery of their message.
604 */
605static void
606xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
607{
608 struct xpc_notify *notify;
609 u8 notify_type;
610 s64 get = ch->w_remote_GP.get - 1;
611
612 while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
613
614 notify = &ch->notify_queue[get % ch->local_nentries];
615
616 /*
617 * See if the notify entry indicates it was associated with
618 * a message who's sender wants to be notified. It is possible
619 * that it is, but someone else is doing or has done the
620 * notification.
621 */
622 notify_type = notify->type;
623 if (notify_type == 0 ||
624 cmpxchg(&notify->type, notify_type, 0) != notify_type) {
625 continue;
626 }
627
628 DBUG_ON(notify_type != XPC_N_CALL);
629
630 atomic_dec(&ch->n_to_notify);
631
632 if (notify->func != NULL) {
633 dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
634 "msg_number=%ld, partid=%d, channel=%d\n",
635 (void *)notify, get, ch->partid, ch->number);
636
637 notify->func(reason, ch->partid, ch->number,
638 notify->key);
639
640 dev_dbg(xpc_chan, "notify->func() returned, "
641 "notify=0x%p, msg_number=%ld, partid=%d, "
642 "channel=%d\n", (void *)notify, get,
643 ch->partid, ch->number);
644 }
645 }
646}
647
648/*
649 * Free up message queues and other stuff that were allocated for the specified
650 * channel.
651 *
652 * Note: ch->reason and ch->reason_line are left set for debugging purposes,
653 * they're cleared when XPC_C_DISCONNECTED is cleared.
654 */
655static void
656xpc_free_msgqueues(struct xpc_channel *ch)
657{
658 DBUG_ON(!spin_is_locked(&ch->lock));
659 DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
660
661 ch->remote_msgqueue_pa = 0;
662 ch->func = NULL;
663 ch->key = NULL;
664 ch->msg_size = 0;
665 ch->local_nentries = 0;
666 ch->remote_nentries = 0;
667 ch->kthreads_assigned_limit = 0;
668 ch->kthreads_idle_limit = 0;
669
670 ch->local_GP->get = 0;
671 ch->local_GP->put = 0;
672 ch->remote_GP.get = 0;
673 ch->remote_GP.put = 0;
674 ch->w_local_GP.get = 0;
675 ch->w_local_GP.put = 0;
676 ch->w_remote_GP.get = 0;
677 ch->w_remote_GP.put = 0;
678 ch->next_msg_to_pull = 0;
679
680 if (ch->flags & XPC_C_SETUP) {
681 ch->flags &= ~XPC_C_SETUP;
682
683 dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
684 ch->flags, ch->partid, ch->number);
685
686 kfree(ch->local_msgqueue_base);
687 ch->local_msgqueue = NULL;
688 kfree(ch->remote_msgqueue_base);
689 ch->remote_msgqueue = NULL;
690 kfree(ch->notify_queue);
691 ch->notify_queue = NULL;
692 }
693}
694
695/*
696 * spin_lock_irqsave() is expected to be held on entry.
697 */
698static void
699xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
700{
701 struct xpc_partition *part = &xpc_partitions[ch->partid];
702 u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
703
704 DBUG_ON(!spin_is_locked(&ch->lock));
705
706 if (!(ch->flags & XPC_C_DISCONNECTING))
707 return;
708
709 DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
710
711 /* make sure all activity has settled down first */
712
713 if (atomic_read(&ch->kthreads_assigned) > 0 ||
714 atomic_read(&ch->references) > 0) {
715 return;
716 }
717 DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
718 !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE));
719
720 if (part->act_state == XPC_P_DEACTIVATING) {
721 /* can't proceed until the other side disengages from us */
722 if (xpc_partition_engaged(1UL << ch->partid))
723 return;
724
725 } else {
726
727 /* as long as the other side is up do the full protocol */
728
729 if (!(ch->flags & XPC_C_RCLOSEREQUEST))
730 return;
731
732 if (!(ch->flags & XPC_C_CLOSEREPLY)) {
733 ch->flags |= XPC_C_CLOSEREPLY;
734 xpc_IPI_send_closereply(ch, irq_flags);
735 }
736
737 if (!(ch->flags & XPC_C_RCLOSEREPLY))
738 return;
739 }
740
741 /* wake those waiting for notify completion */
742 if (atomic_read(&ch->n_to_notify) > 0) {
743 /* >>> we do callout while holding ch->lock */
744 xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put);
745 }
746
747 /* both sides are disconnected now */
748
749 if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) {
750 spin_unlock_irqrestore(&ch->lock, *irq_flags);
751 xpc_disconnect_callout(ch, xpcDisconnected);
752 spin_lock_irqsave(&ch->lock, *irq_flags);
753 }
754
755 /* it's now safe to free the channel's message queues */
756 xpc_free_msgqueues(ch);
757
758 /* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */
759 ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
760
761 atomic_dec(&part->nchannels_active);
762
763 if (channel_was_connected) {
764 dev_info(xpc_chan, "channel %d to partition %d disconnected, "
765 "reason=%d\n", ch->number, ch->partid, ch->reason);
766 }
767
768 if (ch->flags & XPC_C_WDISCONNECT) {
769 /* we won't lose the CPU since we're holding ch->lock */
770 complete(&ch->wdisconnect_wait);
771 } else if (ch->delayed_IPI_flags) {
772 if (part->act_state != XPC_P_DEACTIVATING) {
773 /* time to take action on any delayed IPI flags */
774 spin_lock(&part->IPI_lock);
775 XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number,
776 ch->delayed_IPI_flags);
777 spin_unlock(&part->IPI_lock);
778 }
779 ch->delayed_IPI_flags = 0;
780 }
781}
782
783/*
784 * Process a change in the channel's remote connection state.
785 */
786static void
787xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
788 u8 IPI_flags)
789{
790 unsigned long irq_flags;
791 struct xpc_openclose_args *args =
792 &part->remote_openclose_args[ch_number];
793 struct xpc_channel *ch = &part->channels[ch_number];
794 enum xpc_retval reason;
795
796 spin_lock_irqsave(&ch->lock, irq_flags);
797
798again:
799
800 if ((ch->flags & XPC_C_DISCONNECTED) &&
801 (ch->flags & XPC_C_WDISCONNECT)) {
802 /*
803 * Delay processing IPI flags until thread waiting disconnect
804 * has had a chance to see that the channel is disconnected.
805 */
806 ch->delayed_IPI_flags |= IPI_flags;
807 spin_unlock_irqrestore(&ch->lock, irq_flags);
808 return;
809 }
810
811 if (IPI_flags & XPC_IPI_CLOSEREQUEST) {
812
813 dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received "
814 "from partid=%d, channel=%d\n", args->reason,
815 ch->partid, ch->number);
816
817 /*
818 * If RCLOSEREQUEST is set, we're probably waiting for
819 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
820 * with this RCLOSEREQUEST in the IPI_flags.
821 */
822
823 if (ch->flags & XPC_C_RCLOSEREQUEST) {
824 DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
825 DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
826 DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY));
827 DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY);
828
829 DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY));
830 IPI_flags &= ~XPC_IPI_CLOSEREPLY;
831 ch->flags |= XPC_C_RCLOSEREPLY;
832
833 /* both sides have finished disconnecting */
834 xpc_process_disconnect(ch, &irq_flags);
835 DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
836 goto again;
837 }
838
839 if (ch->flags & XPC_C_DISCONNECTED) {
840 if (!(IPI_flags & XPC_IPI_OPENREQUEST)) {
841 if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo,
842 ch_number) &
843 XPC_IPI_OPENREQUEST)) {
844
845 DBUG_ON(ch->delayed_IPI_flags != 0);
846 spin_lock(&part->IPI_lock);
847 XPC_SET_IPI_FLAGS(part->local_IPI_amo,
848 ch_number,
849 XPC_IPI_CLOSEREQUEST);
850 spin_unlock(&part->IPI_lock);
851 }
852 spin_unlock_irqrestore(&ch->lock, irq_flags);
853 return;
854 }
855
856 XPC_SET_REASON(ch, 0, 0);
857 ch->flags &= ~XPC_C_DISCONNECTED;
858
859 atomic_inc(&part->nchannels_active);
860 ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
861 }
862
863 IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY);
864
865 /*
866 * The meaningful CLOSEREQUEST connection state fields are:
867 * reason = reason connection is to be closed
868 */
869
870 ch->flags |= XPC_C_RCLOSEREQUEST;
871
872 if (!(ch->flags & XPC_C_DISCONNECTING)) {
873 reason = args->reason;
874 if (reason <= xpcSuccess || reason > xpcUnknownReason)
875 reason = xpcUnknownReason;
876 else if (reason == xpcUnregistering)
877 reason = xpcOtherUnregistering;
878
879 XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
880
881 DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY);
882 spin_unlock_irqrestore(&ch->lock, irq_flags);
883 return;
884 }
885
886 xpc_process_disconnect(ch, &irq_flags);
887 }
888
889 if (IPI_flags & XPC_IPI_CLOSEREPLY) {
890
891 dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d,"
892 " channel=%d\n", ch->partid, ch->number);
893
894 if (ch->flags & XPC_C_DISCONNECTED) {
895 DBUG_ON(part->act_state != XPC_P_DEACTIVATING);
896 spin_unlock_irqrestore(&ch->lock, irq_flags);
897 return;
898 }
899
900 DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
901
902 if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
903 if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number)
904 & XPC_IPI_CLOSEREQUEST)) {
905
906 DBUG_ON(ch->delayed_IPI_flags != 0);
907 spin_lock(&part->IPI_lock);
908 XPC_SET_IPI_FLAGS(part->local_IPI_amo,
909 ch_number,
910 XPC_IPI_CLOSEREPLY);
911 spin_unlock(&part->IPI_lock);
912 }
913 spin_unlock_irqrestore(&ch->lock, irq_flags);
914 return;
915 }
916
917 ch->flags |= XPC_C_RCLOSEREPLY;
918
919 if (ch->flags & XPC_C_CLOSEREPLY) {
920 /* both sides have finished disconnecting */
921 xpc_process_disconnect(ch, &irq_flags);
922 }
923 }
924
925 if (IPI_flags & XPC_IPI_OPENREQUEST) {
926
927 dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, "
928 "local_nentries=%d) received from partid=%d, "
929 "channel=%d\n", args->msg_size, args->local_nentries,
930 ch->partid, ch->number);
931
932 if (part->act_state == XPC_P_DEACTIVATING ||
933 (ch->flags & XPC_C_ROPENREQUEST)) {
934 spin_unlock_irqrestore(&ch->lock, irq_flags);
935 return;
936 }
937
938 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
939 ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST;
940 spin_unlock_irqrestore(&ch->lock, irq_flags);
941 return;
942 }
943 DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED |
944 XPC_C_OPENREQUEST)));
945 DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
946 XPC_C_OPENREPLY | XPC_C_CONNECTED));
947
948 /*
949 * The meaningful OPENREQUEST connection state fields are:
950 * msg_size = size of channel's messages in bytes
951 * local_nentries = remote partition's local_nentries
952 */
953 if (args->msg_size == 0 || args->local_nentries == 0) {
954 /* assume OPENREQUEST was delayed by mistake */
955 spin_unlock_irqrestore(&ch->lock, irq_flags);
956 return;
957 }
958
959 ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
960 ch->remote_nentries = args->local_nentries;
961
962 if (ch->flags & XPC_C_OPENREQUEST) {
963 if (args->msg_size != ch->msg_size) {
964 XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes,
965 &irq_flags);
966 spin_unlock_irqrestore(&ch->lock, irq_flags);
967 return;
968 }
969 } else {
970 ch->msg_size = args->msg_size;
971
972 XPC_SET_REASON(ch, 0, 0);
973 ch->flags &= ~XPC_C_DISCONNECTED;
974
975 atomic_inc(&part->nchannels_active);
976 }
977
978 xpc_process_connect(ch, &irq_flags);
979 }
980
981 if (IPI_flags & XPC_IPI_OPENREPLY) {
982
983 dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, "
984 "local_nentries=%d, remote_nentries=%d) received from "
985 "partid=%d, channel=%d\n", args->local_msgqueue_pa,
986 args->local_nentries, args->remote_nentries,
987 ch->partid, ch->number);
988
989 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
990 spin_unlock_irqrestore(&ch->lock, irq_flags);
991 return;
992 }
993 if (!(ch->flags & XPC_C_OPENREQUEST)) {
994 XPC_DISCONNECT_CHANNEL(ch, xpcOpenCloseError,
995 &irq_flags);
996 spin_unlock_irqrestore(&ch->lock, irq_flags);
997 return;
998 }
999
1000 DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
1001 DBUG_ON(ch->flags & XPC_C_CONNECTED);
1002
1003 /*
1004 * The meaningful OPENREPLY connection state fields are:
1005 * local_msgqueue_pa = physical address of remote
1006 * partition's local_msgqueue
1007 * local_nentries = remote partition's local_nentries
1008 * remote_nentries = remote partition's remote_nentries
1009 */
1010 DBUG_ON(args->local_msgqueue_pa == 0);
1011 DBUG_ON(args->local_nentries == 0);
1012 DBUG_ON(args->remote_nentries == 0);
1013
1014 ch->flags |= XPC_C_ROPENREPLY;
1015 ch->remote_msgqueue_pa = args->local_msgqueue_pa;
1016
1017 if (args->local_nentries < ch->remote_nentries) {
1018 dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
1019 "remote_nentries=%d, old remote_nentries=%d, "
1020 "partid=%d, channel=%d\n",
1021 args->local_nentries, ch->remote_nentries,
1022 ch->partid, ch->number);
1023
1024 ch->remote_nentries = args->local_nentries;
1025 }
1026 if (args->remote_nentries < ch->local_nentries) {
1027 dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
1028 "local_nentries=%d, old local_nentries=%d, "
1029 "partid=%d, channel=%d\n",
1030 args->remote_nentries, ch->local_nentries,
1031 ch->partid, ch->number);
1032
1033 ch->local_nentries = args->remote_nentries;
1034 }
1035
1036 xpc_process_connect(ch, &irq_flags);
1037 }
1038
1039 spin_unlock_irqrestore(&ch->lock, irq_flags);
1040}
1041
1042/*
1043 * Attempt to establish a channel connection to a remote partition.
1044 */
1045static enum xpc_retval
1046xpc_connect_channel(struct xpc_channel *ch)
1047{
1048 unsigned long irq_flags;
1049 struct xpc_registration *registration = &xpc_registrations[ch->number];
1050
1051 if (mutex_trylock(&registration->mutex) == 0)
1052 return xpcRetry;
1053
1054 if (!XPC_CHANNEL_REGISTERED(ch->number)) {
1055 mutex_unlock(&registration->mutex);
1056 return xpcUnregistered;
1057 }
1058
1059 spin_lock_irqsave(&ch->lock, irq_flags);
1060
1061 DBUG_ON(ch->flags & XPC_C_CONNECTED);
1062 DBUG_ON(ch->flags & XPC_C_OPENREQUEST);
1063
1064 if (ch->flags & XPC_C_DISCONNECTING) {
1065 spin_unlock_irqrestore(&ch->lock, irq_flags);
1066 mutex_unlock(&registration->mutex);
1067 return ch->reason;
1068 }
1069
1070 /* add info from the channel connect registration to the channel */
1071
1072 ch->kthreads_assigned_limit = registration->assigned_limit;
1073 ch->kthreads_idle_limit = registration->idle_limit;
1074 DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
1075 DBUG_ON(atomic_read(&ch->kthreads_idle) != 0);
1076 DBUG_ON(atomic_read(&ch->kthreads_active) != 0);
1077
1078 ch->func = registration->func;
1079 DBUG_ON(registration->func == NULL);
1080 ch->key = registration->key;
1081
1082 ch->local_nentries = registration->nentries;
1083
1084 if (ch->flags & XPC_C_ROPENREQUEST) {
1085 if (registration->msg_size != ch->msg_size) {
1086 /* the local and remote sides aren't the same */
1087
1088 /*
1089 * Because XPC_DISCONNECT_CHANNEL() can block we're
1090 * forced to up the registration sema before we unlock
1091 * the channel lock. But that's okay here because we're
1092 * done with the part that required the registration
1093 * sema. XPC_DISCONNECT_CHANNEL() requires that the
1094 * channel lock be locked and will unlock and relock
1095 * the channel lock as needed.
1096 */
1097 mutex_unlock(&registration->mutex);
1098 XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes,
1099 &irq_flags);
1100 spin_unlock_irqrestore(&ch->lock, irq_flags);
1101 return xpcUnequalMsgSizes;
1102 }
1103 } else {
1104 ch->msg_size = registration->msg_size;
1105
1106 XPC_SET_REASON(ch, 0, 0);
1107 ch->flags &= ~XPC_C_DISCONNECTED;
1108
1109 atomic_inc(&xpc_partitions[ch->partid].nchannels_active);
1110 }
1111
1112 mutex_unlock(&registration->mutex);
1113
1114 /* initiate the connection */
1115
1116 ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
1117 xpc_IPI_send_openrequest(ch, &irq_flags);
1118
1119 xpc_process_connect(ch, &irq_flags);
1120
1121 spin_unlock_irqrestore(&ch->lock, irq_flags);
1122
1123 return xpcSuccess;
1124}
1125
1126/*
1127 * Clear some of the msg flags in the local message queue.
1128 */
1129static inline void
1130xpc_clear_local_msgqueue_flags(struct xpc_channel *ch)
1131{
1132 struct xpc_msg *msg;
1133 s64 get;
1134
1135 get = ch->w_remote_GP.get;
1136 do {
1137 msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
1138 (get % ch->local_nentries) *
1139 ch->msg_size);
1140 msg->flags = 0;
1141 } while (++get < ch->remote_GP.get);
1142}
1143
1144/*
1145 * Clear some of the msg flags in the remote message queue.
1146 */
1147static inline void
1148xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch)
1149{
1150 struct xpc_msg *msg;
1151 s64 put;
1152
1153 put = ch->w_remote_GP.put;
1154 do {
1155 msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
1156 (put % ch->remote_nentries) *
1157 ch->msg_size);
1158 msg->flags = 0;
1159 } while (++put < ch->remote_GP.put);
1160}
1161
1162static void
1163xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
1164{
1165 struct xpc_channel *ch = &part->channels[ch_number];
1166 int nmsgs_sent;
1167
1168 ch->remote_GP = part->remote_GPs[ch_number];
1169
1170 /* See what, if anything, has changed for each connected channel */
1171
1172 xpc_msgqueue_ref(ch);
1173
1174 if (ch->w_remote_GP.get == ch->remote_GP.get &&
1175 ch->w_remote_GP.put == ch->remote_GP.put) {
1176 /* nothing changed since GPs were last pulled */
1177 xpc_msgqueue_deref(ch);
1178 return;
1179 }
1180
1181 if (!(ch->flags & XPC_C_CONNECTED)) {
1182 xpc_msgqueue_deref(ch);
1183 return;
1184 }
1185
1186 /*
1187 * First check to see if messages recently sent by us have been
1188 * received by the other side. (The remote GET value will have
1189 * changed since we last looked at it.)
1190 */
1191
1192 if (ch->w_remote_GP.get != ch->remote_GP.get) {
1193
1194 /*
1195 * We need to notify any senders that want to be notified
1196 * that their sent messages have been received by their
1197 * intended recipients. We need to do this before updating
1198 * w_remote_GP.get so that we don't allocate the same message
1199 * queue entries prematurely (see xpc_allocate_msg()).
1200 */
1201 if (atomic_read(&ch->n_to_notify) > 0) {
1202 /*
1203 * Notify senders that messages sent have been
1204 * received and delivered by the other side.
1205 */
1206 xpc_notify_senders(ch, xpcMsgDelivered,
1207 ch->remote_GP.get);
1208 }
1209
1210 /*
1211 * Clear msg->flags in previously sent messages, so that
1212 * they're ready for xpc_allocate_msg().
1213 */
1214 xpc_clear_local_msgqueue_flags(ch);
1215
1216 ch->w_remote_GP.get = ch->remote_GP.get;
1217
1218 dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
1219 "channel=%d\n", ch->w_remote_GP.get, ch->partid,
1220 ch->number);
1221
1222 /*
1223 * If anyone was waiting for message queue entries to become
1224 * available, wake them up.
1225 */
1226 if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
1227 wake_up(&ch->msg_allocate_wq);
1228 }
1229
1230 /*
1231 * Now check for newly sent messages by the other side. (The remote
1232 * PUT value will have changed since we last looked at it.)
1233 */
1234
1235 if (ch->w_remote_GP.put != ch->remote_GP.put) {
1236 /*
1237 * Clear msg->flags in previously received messages, so that
1238 * they're ready for xpc_get_deliverable_msg().
1239 */
1240 xpc_clear_remote_msgqueue_flags(ch);
1241
1242 ch->w_remote_GP.put = ch->remote_GP.put;
1243
1244 dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
1245 "channel=%d\n", ch->w_remote_GP.put, ch->partid,
1246 ch->number);
1247
1248 nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get;
1249 if (nmsgs_sent > 0) {
1250 dev_dbg(xpc_chan, "msgs waiting to be copied and "
1251 "delivered=%d, partid=%d, channel=%d\n",
1252 nmsgs_sent, ch->partid, ch->number);
1253
1254 if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
1255 xpc_activate_kthreads(ch, nmsgs_sent);
1256 }
1257 }
1258
1259 xpc_msgqueue_deref(ch);
1260}
1261
1262void
1263xpc_process_channel_activity(struct xpc_partition *part)
1264{
1265 unsigned long irq_flags;
1266 u64 IPI_amo, IPI_flags;
1267 struct xpc_channel *ch;
1268 int ch_number;
1269 u32 ch_flags;
1270
1271 IPI_amo = xpc_get_IPI_flags(part);
1272
1273 /*
1274 * Initiate channel connections for registered channels.
1275 *
1276 * For each connected channel that has pending messages activate idle
1277 * kthreads and/or create new kthreads as needed.
1278 */
1279
1280 for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
1281 ch = &part->channels[ch_number];
1282
1283 /*
1284 * Process any open or close related IPI flags, and then deal
1285 * with connecting or disconnecting the channel as required.
1286 */
1287
1288 IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number);
1289
1290 if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags))
1291 xpc_process_openclose_IPI(part, ch_number, IPI_flags);
1292
1293 ch_flags = ch->flags; /* need an atomic snapshot of flags */
1294
1295 if (ch_flags & XPC_C_DISCONNECTING) {
1296 spin_lock_irqsave(&ch->lock, irq_flags);
1297 xpc_process_disconnect(ch, &irq_flags);
1298 spin_unlock_irqrestore(&ch->lock, irq_flags);
1299 continue;
1300 }
1301
1302 if (part->act_state == XPC_P_DEACTIVATING)
1303 continue;
1304
1305 if (!(ch_flags & XPC_C_CONNECTED)) {
1306 if (!(ch_flags & XPC_C_OPENREQUEST)) {
1307 DBUG_ON(ch_flags & XPC_C_SETUP);
1308 (void)xpc_connect_channel(ch);
1309 } else {
1310 spin_lock_irqsave(&ch->lock, irq_flags);
1311 xpc_process_connect(ch, &irq_flags);
1312 spin_unlock_irqrestore(&ch->lock, irq_flags);
1313 }
1314 continue;
1315 }
1316
1317 /*
1318 * Process any message related IPI flags, this may involve the
1319 * activation of kthreads to deliver any pending messages sent
1320 * from the other partition.
1321 */
1322
1323 if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags))
1324 xpc_process_msg_IPI(part, ch_number);
1325 }
1326}
1327
1328/*
1329 * XPC's heartbeat code calls this function to inform XPC that a partition is
1330 * going down. XPC responds by tearing down the XPartition Communication
1331 * infrastructure used for the just downed partition.
1332 *
1333 * XPC's heartbeat code will never call this function and xpc_partition_up()
1334 * at the same time. Nor will it ever make multiple calls to either function
1335 * at the same time.
1336 */
1337void
1338xpc_partition_going_down(struct xpc_partition *part, enum xpc_retval reason)
1339{
1340 unsigned long irq_flags;
1341 int ch_number;
1342 struct xpc_channel *ch;
1343
1344 dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n",
1345 XPC_PARTID(part), reason);
1346
1347 if (!xpc_part_ref(part)) {
1348 /* infrastructure for this partition isn't currently set up */
1349 return;
1350 }
1351
1352 /* disconnect channels associated with the partition going down */
1353
1354 for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
1355 ch = &part->channels[ch_number];
1356
1357 xpc_msgqueue_ref(ch);
1358 spin_lock_irqsave(&ch->lock, irq_flags);
1359
1360 XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
1361
1362 spin_unlock_irqrestore(&ch->lock, irq_flags);
1363 xpc_msgqueue_deref(ch);
1364 }
1365
1366 xpc_wakeup_channel_mgr(part);
1367
1368 xpc_part_deref(part);
1369}
1370
1371/*
1372 * Teardown the infrastructure necessary to support XPartition Communication
1373 * between the specified remote partition and the local one.
1374 */
1375void
1376xpc_teardown_infrastructure(struct xpc_partition *part)
1377{
1378 partid_t partid = XPC_PARTID(part);
1379
1380 /*
1381 * We start off by making this partition inaccessible to local
1382 * processes by marking it as no longer setup. Then we make it
1383 * inaccessible to remote processes by clearing the XPC per partition
1384 * specific variable's magic # (which indicates that these variables
1385 * are no longer valid) and by ignoring all XPC notify IPIs sent to
1386 * this partition.
1387 */
1388
1389 DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
1390 DBUG_ON(atomic_read(&part->nchannels_active) != 0);
1391 DBUG_ON(part->setup_state != XPC_P_SETUP);
1392 part->setup_state = XPC_P_WTEARDOWN;
1393
1394 xpc_vars_part[partid].magic = 0;
1395
1396 free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
1397
1398 /*
1399 * Before proceeding with the teardown we have to wait until all
1400 * existing references cease.
1401 */
1402 wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
1403
1404 /* now we can begin tearing down the infrastructure */
1405
1406 part->setup_state = XPC_P_TORNDOWN;
1407
1408 /* in case we've still got outstanding timers registered... */
1409 del_timer_sync(&part->dropped_IPI_timer);
1410
1411 kfree(part->remote_openclose_args_base);
1412 part->remote_openclose_args = NULL;
1413 kfree(part->local_openclose_args_base);
1414 part->local_openclose_args = NULL;
1415 kfree(part->remote_GPs_base);
1416 part->remote_GPs = NULL;
1417 kfree(part->local_GPs_base);
1418 part->local_GPs = NULL;
1419 kfree(part->channels);
1420 part->channels = NULL;
1421 part->local_IPI_amo_va = NULL;
1422}
1423
1424/*
1425 * Called by XP at the time of channel connection registration to cause
1426 * XPC to establish connections to all currently active partitions.
1427 */
1428void
1429xpc_initiate_connect(int ch_number)
1430{
1431 partid_t partid;
1432 struct xpc_partition *part;
1433 struct xpc_channel *ch;
1434
1435 DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
1436
1437 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1438 part = &xpc_partitions[partid];
1439
1440 if (xpc_part_ref(part)) {
1441 ch = &part->channels[ch_number];
1442
1443 /*
1444 * Initiate the establishment of a connection on the
1445 * newly registered channel to the remote partition.
1446 */
1447 xpc_wakeup_channel_mgr(part);
1448 xpc_part_deref(part);
1449 }
1450 }
1451}
1452
1453void
1454xpc_connected_callout(struct xpc_channel *ch)
1455{
1456 /* let the registerer know that a connection has been established */
1457
1458 if (ch->func != NULL) {
1459 dev_dbg(xpc_chan, "ch->func() called, reason=xpcConnected, "
1460 "partid=%d, channel=%d\n", ch->partid, ch->number);
1461
1462 ch->func(xpcConnected, ch->partid, ch->number,
1463 (void *)(u64)ch->local_nentries, ch->key);
1464
1465 dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, "
1466 "partid=%d, channel=%d\n", ch->partid, ch->number);
1467 }
1468}
1469
1470/*
1471 * Called by XP at the time of channel connection unregistration to cause
1472 * XPC to teardown all current connections for the specified channel.
1473 *
1474 * Before returning xpc_initiate_disconnect() will wait until all connections
1475 * on the specified channel have been closed/torndown. So the caller can be
1476 * assured that they will not be receiving any more callouts from XPC to the
1477 * function they registered via xpc_connect().
1478 *
1479 * Arguments:
1480 *
1481 * ch_number - channel # to unregister.
1482 */
1483void
1484xpc_initiate_disconnect(int ch_number)
1485{
1486 unsigned long irq_flags;
1487 partid_t partid;
1488 struct xpc_partition *part;
1489 struct xpc_channel *ch;
1490
1491 DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
1492
1493 /* initiate the channel disconnect for every active partition */
1494 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1495 part = &xpc_partitions[partid];
1496
1497 if (xpc_part_ref(part)) {
1498 ch = &part->channels[ch_number];
1499 xpc_msgqueue_ref(ch);
1500
1501 spin_lock_irqsave(&ch->lock, irq_flags);
1502
1503 if (!(ch->flags & XPC_C_DISCONNECTED)) {
1504 ch->flags |= XPC_C_WDISCONNECT;
1505
1506 XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering,
1507 &irq_flags);
1508 }
1509
1510 spin_unlock_irqrestore(&ch->lock, irq_flags);
1511
1512 xpc_msgqueue_deref(ch);
1513 xpc_part_deref(part);
1514 }
1515 }
1516
1517 xpc_disconnect_wait(ch_number);
1518}
1519
1520/*
1521 * To disconnect a channel, and reflect it back to all who may be waiting.
1522 *
1523 * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
1524 * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by
1525 * xpc_disconnect_wait().
1526 *
1527 * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
1528 */
1529void
1530xpc_disconnect_channel(const int line, struct xpc_channel *ch,
1531 enum xpc_retval reason, unsigned long *irq_flags)
1532{
1533 u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
1534
1535 DBUG_ON(!spin_is_locked(&ch->lock));
1536
1537 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
1538 return;
1539
1540 DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED)));
1541
1542 dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n",
1543 reason, line, ch->partid, ch->number);
1544
1545 XPC_SET_REASON(ch, reason, line);
1546
1547 ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
1548 /* some of these may not have been set */
1549 ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
1550 XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
1551 XPC_C_CONNECTING | XPC_C_CONNECTED);
1552
1553 xpc_IPI_send_closerequest(ch, irq_flags);
1554
1555 if (channel_was_connected)
1556 ch->flags |= XPC_C_WASCONNECTED;
1557
1558 spin_unlock_irqrestore(&ch->lock, *irq_flags);
1559
1560 /* wake all idle kthreads so they can exit */
1561 if (atomic_read(&ch->kthreads_idle) > 0) {
1562 wake_up_all(&ch->idle_wq);
1563
1564 } else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
1565 !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
1566 /* start a kthread that will do the xpcDisconnecting callout */
1567 xpc_create_kthreads(ch, 1, 1);
1568 }
1569
1570 /* wake those waiting to allocate an entry from the local msg queue */
1571 if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
1572 wake_up(&ch->msg_allocate_wq);
1573
1574 spin_lock_irqsave(&ch->lock, *irq_flags);
1575}
1576
1577void
1578xpc_disconnect_callout(struct xpc_channel *ch, enum xpc_retval reason)
1579{
1580 /*
1581 * Let the channel's registerer know that the channel is being
1582 * disconnected. We don't want to do this if the registerer was never
1583 * informed of a connection being made.
1584 */
1585
1586 if (ch->func != NULL) {
1587 dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
1588 "channel=%d\n", reason, ch->partid, ch->number);
1589
1590 ch->func(reason, ch->partid, ch->number, NULL, ch->key);
1591
1592 dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
1593 "channel=%d\n", reason, ch->partid, ch->number);
1594 }
1595}
1596
1597/*
1598 * Wait for a message entry to become available for the specified channel,
1599 * but don't wait any longer than 1 jiffy.
1600 */
1601static enum xpc_retval
1602xpc_allocate_msg_wait(struct xpc_channel *ch)
1603{
1604 enum xpc_retval ret;
1605
1606 if (ch->flags & XPC_C_DISCONNECTING) {
1607 DBUG_ON(ch->reason == xpcInterrupted);
1608 return ch->reason;
1609 }
1610
1611 atomic_inc(&ch->n_on_msg_allocate_wq);
1612 ret = interruptible_sleep_on_timeout(&ch->msg_allocate_wq, 1);
1613 atomic_dec(&ch->n_on_msg_allocate_wq);
1614
1615 if (ch->flags & XPC_C_DISCONNECTING) {
1616 ret = ch->reason;
1617 DBUG_ON(ch->reason == xpcInterrupted);
1618 } else if (ret == 0) {
1619 ret = xpcTimeout;
1620 } else {
1621 ret = xpcInterrupted;
1622 }
1623
1624 return ret;
1625}
1626
1627/*
1628 * Allocate an entry for a message from the message queue associated with the
1629 * specified channel.
1630 */
1631static enum xpc_retval
1632xpc_allocate_msg(struct xpc_channel *ch, u32 flags,
1633 struct xpc_msg **address_of_msg)
1634{
1635 struct xpc_msg *msg;
1636 enum xpc_retval ret;
1637 s64 put;
1638
1639 /* this reference will be dropped in xpc_send_msg() */
1640 xpc_msgqueue_ref(ch);
1641
1642 if (ch->flags & XPC_C_DISCONNECTING) {
1643 xpc_msgqueue_deref(ch);
1644 return ch->reason;
1645 }
1646 if (!(ch->flags & XPC_C_CONNECTED)) {
1647 xpc_msgqueue_deref(ch);
1648 return xpcNotConnected;
1649 }
1650
1651 /*
1652 * Get the next available message entry from the local message queue.
1653 * If none are available, we'll make sure that we grab the latest
1654 * GP values.
1655 */
1656 ret = xpcTimeout;
1657
1658 while (1) {
1659
1660 put = ch->w_local_GP.put;
1661 rmb(); /* guarantee that .put loads before .get */
1662 if (put - ch->w_remote_GP.get < ch->local_nentries) {
1663
1664 /* There are available message entries. We need to try
1665 * to secure one for ourselves. We'll do this by trying
1666 * to increment w_local_GP.put as long as someone else
1667 * doesn't beat us to it. If they do, we'll have to
1668 * try again.
1669 */
1670 if (cmpxchg(&ch->w_local_GP.put, put, put + 1) == put) {
1671 /* we got the entry referenced by put */
1672 break;
1673 }
1674 continue; /* try again */
1675 }
1676
1677 /*
1678 * There aren't any available msg entries at this time.
1679 *
1680 * In waiting for a message entry to become available,
1681 * we set a timeout in case the other side is not
1682 * sending completion IPIs. This lets us fake an IPI
1683 * that will cause the IPI handler to fetch the latest
1684 * GP values as if an IPI was sent by the other side.
1685 */
1686 if (ret == xpcTimeout)
1687 xpc_IPI_send_local_msgrequest(ch);
1688
1689 if (flags & XPC_NOWAIT) {
1690 xpc_msgqueue_deref(ch);
1691 return xpcNoWait;
1692 }
1693
1694 ret = xpc_allocate_msg_wait(ch);
1695 if (ret != xpcInterrupted && ret != xpcTimeout) {
1696 xpc_msgqueue_deref(ch);
1697 return ret;
1698 }
1699 }
1700
1701 /* get the message's address and initialize it */
1702 msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
1703 (put % ch->local_nentries) * ch->msg_size);
1704
1705 DBUG_ON(msg->flags != 0);
1706 msg->number = put;
1707
1708 dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, "
1709 "msg_number=%ld, partid=%d, channel=%d\n", put + 1,
1710 (void *)msg, msg->number, ch->partid, ch->number);
1711
1712 *address_of_msg = msg;
1713
1714 return xpcSuccess;
1715}
1716
1717/*
1718 * Allocate an entry for a message from the message queue associated with the
1719 * specified channel. NOTE that this routine can sleep waiting for a message
1720 * entry to become available. To not sleep, pass in the XPC_NOWAIT flag.
1721 *
1722 * Arguments:
1723 *
1724 * partid - ID of partition to which the channel is connected.
1725 * ch_number - channel #.
1726 * flags - see xpc.h for valid flags.
1727 * payload - address of the allocated payload area pointer (filled in on
1728 * return) in which the user-defined message is constructed.
1729 */
1730enum xpc_retval
1731xpc_initiate_allocate(partid_t partid, int ch_number, u32 flags, void **payload)
1732{
1733 struct xpc_partition *part = &xpc_partitions[partid];
1734 enum xpc_retval ret = xpcUnknownReason;
1735 struct xpc_msg *msg = NULL;
1736
1737 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
1738 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
1739
1740 *payload = NULL;
1741
1742 if (xpc_part_ref(part)) {
1743 ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg);
1744 xpc_part_deref(part);
1745
1746 if (msg != NULL)
1747 *payload = &msg->payload;
1748 }
1749
1750 return ret;
1751}
1752
1753/*
1754 * Now we actually send the messages that are ready to be sent by advancing
1755 * the local message queue's Put value and then send an IPI to the recipient
1756 * partition.
1757 */
1758static void
1759xpc_send_msgs(struct xpc_channel *ch, s64 initial_put)
1760{
1761 struct xpc_msg *msg;
1762 s64 put = initial_put + 1;
1763 int send_IPI = 0;
1764
1765 while (1) {
1766
1767 while (1) {
1768 if (put == ch->w_local_GP.put)
1769 break;
1770
1771 msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
1772 (put % ch->local_nentries) *
1773 ch->msg_size);
1774
1775 if (!(msg->flags & XPC_M_READY))
1776 break;
1777
1778 put++;
1779 }
1780
1781 if (put == initial_put) {
1782 /* nothing's changed */
1783 break;
1784 }
1785
1786 if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) !=
1787 initial_put) {
1788 /* someone else beat us to it */
1789 DBUG_ON(ch->local_GP->put < initial_put);
1790 break;
1791 }
1792
1793 /* we just set the new value of local_GP->put */
1794
1795 dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
1796 "channel=%d\n", put, ch->partid, ch->number);
1797
1798 send_IPI = 1;
1799
1800 /*
1801 * We need to ensure that the message referenced by
1802 * local_GP->put is not XPC_M_READY or that local_GP->put
1803 * equals w_local_GP.put, so we'll go have a look.
1804 */
1805 initial_put = put;
1806 }
1807
1808 if (send_IPI)
1809 xpc_IPI_send_msgrequest(ch);
1810}
1811
1812/*
1813 * Common code that does the actual sending of the message by advancing the
1814 * local message queue's Put value and sends an IPI to the partition the
1815 * message is being sent to.
1816 */
1817static enum xpc_retval
1818xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
1819 xpc_notify_func func, void *key)
1820{
1821 enum xpc_retval ret = xpcSuccess;
1822 struct xpc_notify *notify = notify;
1823 s64 put, msg_number = msg->number;
1824
1825 DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
1826 DBUG_ON((((u64)msg - (u64)ch->local_msgqueue) / ch->msg_size) !=
1827 msg_number % ch->local_nentries);
1828 DBUG_ON(msg->flags & XPC_M_READY);
1829
1830 if (ch->flags & XPC_C_DISCONNECTING) {
1831 /* drop the reference grabbed in xpc_allocate_msg() */
1832 xpc_msgqueue_deref(ch);
1833 return ch->reason;
1834 }
1835
1836 if (notify_type != 0) {
1837 /*
1838 * Tell the remote side to send an ACK interrupt when the
1839 * message has been delivered.
1840 */
1841 msg->flags |= XPC_M_INTERRUPT;
1842
1843 atomic_inc(&ch->n_to_notify);
1844
1845 notify = &ch->notify_queue[msg_number % ch->local_nentries];
1846 notify->func = func;
1847 notify->key = key;
1848 notify->type = notify_type;
1849
1850 /* >>> is a mb() needed here? */
1851
1852 if (ch->flags & XPC_C_DISCONNECTING) {
1853 /*
1854 * An error occurred between our last error check and
1855 * this one. We will try to clear the type field from
1856 * the notify entry. If we succeed then
1857 * xpc_disconnect_channel() didn't already process
1858 * the notify entry.
1859 */
1860 if (cmpxchg(&notify->type, notify_type, 0) ==
1861 notify_type) {
1862 atomic_dec(&ch->n_to_notify);
1863 ret = ch->reason;
1864 }
1865
1866 /* drop the reference grabbed in xpc_allocate_msg() */
1867 xpc_msgqueue_deref(ch);
1868 return ret;
1869 }
1870 }
1871
1872 msg->flags |= XPC_M_READY;
1873
1874 /*
1875 * The preceding store of msg->flags must occur before the following
1876 * load of ch->local_GP->put.
1877 */
1878 mb();
1879
1880 /* see if the message is next in line to be sent, if so send it */
1881
1882 put = ch->local_GP->put;
1883 if (put == msg_number)
1884 xpc_send_msgs(ch, put);
1885
1886 /* drop the reference grabbed in xpc_allocate_msg() */
1887 xpc_msgqueue_deref(ch);
1888 return ret;
1889}
1890
1891/*
1892 * Send a message previously allocated using xpc_initiate_allocate() on the
1893 * specified channel connected to the specified partition.
1894 *
1895 * This routine will not wait for the message to be received, nor will
1896 * notification be given when it does happen. Once this routine has returned
1897 * the message entry allocated via xpc_initiate_allocate() is no longer
1898 * accessable to the caller.
1899 *
1900 * This routine, although called by users, does not call xpc_part_ref() to
1901 * ensure that the partition infrastructure is in place. It relies on the
1902 * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
1903 *
1904 * Arguments:
1905 *
1906 * partid - ID of partition to which the channel is connected.
1907 * ch_number - channel # to send message on.
1908 * payload - pointer to the payload area allocated via
1909 * xpc_initiate_allocate().
1910 */
1911enum xpc_retval
1912xpc_initiate_send(partid_t partid, int ch_number, void *payload)
1913{
1914 struct xpc_partition *part = &xpc_partitions[partid];
1915 struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
1916 enum xpc_retval ret;
1917
1918 dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg,
1919 partid, ch_number);
1920
1921 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
1922 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
1923 DBUG_ON(msg == NULL);
1924
1925 ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL);
1926
1927 return ret;
1928}
1929
1930/*
1931 * Send a message previously allocated using xpc_initiate_allocate on the
1932 * specified channel connected to the specified partition.
1933 *
1934 * This routine will not wait for the message to be sent. Once this routine
1935 * has returned the message entry allocated via xpc_initiate_allocate() is no
1936 * longer accessable to the caller.
1937 *
1938 * Once the remote end of the channel has received the message, the function
1939 * passed as an argument to xpc_initiate_send_notify() will be called. This
1940 * allows the sender to free up or re-use any buffers referenced by the
1941 * message, but does NOT mean the message has been processed at the remote
1942 * end by a receiver.
1943 *
1944 * If this routine returns an error, the caller's function will NOT be called.
1945 *
1946 * This routine, although called by users, does not call xpc_part_ref() to
1947 * ensure that the partition infrastructure is in place. It relies on the
1948 * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
1949 *
1950 * Arguments:
1951 *
1952 * partid - ID of partition to which the channel is connected.
1953 * ch_number - channel # to send message on.
1954 * payload - pointer to the payload area allocated via
1955 * xpc_initiate_allocate().
1956 * func - function to call with asynchronous notification of message
1957 * receipt. THIS FUNCTION MUST BE NON-BLOCKING.
1958 * key - user-defined key to be passed to the function when it's called.
1959 */
1960enum xpc_retval
1961xpc_initiate_send_notify(partid_t partid, int ch_number, void *payload,
1962 xpc_notify_func func, void *key)
1963{
1964 struct xpc_partition *part = &xpc_partitions[partid];
1965 struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
1966 enum xpc_retval ret;
1967
1968 dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg,
1969 partid, ch_number);
1970
1971 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
1972 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
1973 DBUG_ON(msg == NULL);
1974 DBUG_ON(func == NULL);
1975
1976 ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL,
1977 func, key);
1978 return ret;
1979}
1980
1981static struct xpc_msg *
1982xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
1983{
1984 struct xpc_partition *part = &xpc_partitions[ch->partid];
1985 struct xpc_msg *remote_msg, *msg;
1986 u32 msg_index, nmsgs;
1987 u64 msg_offset;
1988 enum xpc_retval ret;
1989
1990 if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) {
1991 /* we were interrupted by a signal */
1992 return NULL;
1993 }
1994
1995 while (get >= ch->next_msg_to_pull) {
1996
1997 /* pull as many messages as are ready and able to be pulled */
1998
1999 msg_index = ch->next_msg_to_pull % ch->remote_nentries;
2000
2001 DBUG_ON(ch->next_msg_to_pull >= ch->w_remote_GP.put);
2002 nmsgs = ch->w_remote_GP.put - ch->next_msg_to_pull;
2003 if (msg_index + nmsgs > ch->remote_nentries) {
2004 /* ignore the ones that wrap the msg queue for now */
2005 nmsgs = ch->remote_nentries - msg_index;
2006 }
2007
2008 msg_offset = msg_index * ch->msg_size;
2009 msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
2010 remote_msg = (struct xpc_msg *)(ch->remote_msgqueue_pa +
2011 msg_offset);
2012
2013 ret = xpc_pull_remote_cachelines(part, msg, remote_msg,
2014 nmsgs * ch->msg_size);
2015 if (ret != xpcSuccess) {
2016
2017 dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
2018 " msg %ld from partition %d, channel=%d, "
2019 "ret=%d\n", nmsgs, ch->next_msg_to_pull,
2020 ch->partid, ch->number, ret);
2021
2022 XPC_DEACTIVATE_PARTITION(part, ret);
2023
2024 mutex_unlock(&ch->msg_to_pull_mutex);
2025 return NULL;
2026 }
2027
2028 ch->next_msg_to_pull += nmsgs;
2029 }
2030
2031 mutex_unlock(&ch->msg_to_pull_mutex);
2032
2033 /* return the message we were looking for */
2034 msg_offset = (get % ch->remote_nentries) * ch->msg_size;
2035 msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
2036
2037 return msg;
2038}
2039
2040/*
2041 * Get a message to be delivered.
2042 */
2043static struct xpc_msg *
2044xpc_get_deliverable_msg(struct xpc_channel *ch)
2045{
2046 struct xpc_msg *msg = NULL;
2047 s64 get;
2048
2049 do {
2050 if (ch->flags & XPC_C_DISCONNECTING)
2051 break;
2052
2053 get = ch->w_local_GP.get;
2054 rmb(); /* guarantee that .get loads before .put */
2055 if (get == ch->w_remote_GP.put)
2056 break;
2057
2058 /* There are messages waiting to be pulled and delivered.
2059 * We need to try to secure one for ourselves. We'll do this
2060 * by trying to increment w_local_GP.get and hope that no one
2061 * else beats us to it. If they do, we'll we'll simply have
2062 * to try again for the next one.
2063 */
2064
2065 if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) {
2066 /* we got the entry referenced by get */
2067
2068 dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
2069 "partid=%d, channel=%d\n", get + 1,
2070 ch->partid, ch->number);
2071
2072 /* pull the message from the remote partition */
2073
2074 msg = xpc_pull_remote_msg(ch, get);
2075
2076 DBUG_ON(msg != NULL && msg->number != get);
2077 DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE));
2078 DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY));
2079
2080 break;
2081 }
2082
2083 } while (1);
2084
2085 return msg;
2086}
2087
2088/*
2089 * Deliver a message to its intended recipient.
2090 */
2091void
2092xpc_deliver_msg(struct xpc_channel *ch)
2093{
2094 struct xpc_msg *msg;
2095
2096 msg = xpc_get_deliverable_msg(ch);
2097 if (msg != NULL) {
2098
2099 /*
2100 * This ref is taken to protect the payload itself from being
2101 * freed before the user is finished with it, which the user
2102 * indicates by calling xpc_initiate_received().
2103 */
2104 xpc_msgqueue_ref(ch);
2105
2106 atomic_inc(&ch->kthreads_active);
2107
2108 if (ch->func != NULL) {
2109 dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, "
2110 "msg_number=%ld, partid=%d, channel=%d\n",
2111 (void *)msg, msg->number, ch->partid,
2112 ch->number);
2113
2114 /* deliver the message to its intended recipient */
2115 ch->func(xpcMsgReceived, ch->partid, ch->number,
2116 &msg->payload, ch->key);
2117
2118 dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, "
2119 "msg_number=%ld, partid=%d, channel=%d\n",
2120 (void *)msg, msg->number, ch->partid,
2121 ch->number);
2122 }
2123
2124 atomic_dec(&ch->kthreads_active);
2125 }
2126}
2127
2128/*
2129 * Now we actually acknowledge the messages that have been delivered and ack'd
2130 * by advancing the cached remote message queue's Get value and if requested
2131 * send an IPI to the message sender's partition.
2132 */
2133static void
2134xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
2135{
2136 struct xpc_msg *msg;
2137 s64 get = initial_get + 1;
2138 int send_IPI = 0;
2139
2140 while (1) {
2141
2142 while (1) {
2143 if (get == ch->w_local_GP.get)
2144 break;
2145
2146 msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
2147 (get % ch->remote_nentries) *
2148 ch->msg_size);
2149
2150 if (!(msg->flags & XPC_M_DONE))
2151 break;
2152
2153 msg_flags |= msg->flags;
2154 get++;
2155 }
2156
2157 if (get == initial_get) {
2158 /* nothing's changed */
2159 break;
2160 }
2161
2162 if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) !=
2163 initial_get) {
2164 /* someone else beat us to it */
2165 DBUG_ON(ch->local_GP->get <= initial_get);
2166 break;
2167 }
2168
2169 /* we just set the new value of local_GP->get */
2170
2171 dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
2172 "channel=%d\n", get, ch->partid, ch->number);
2173
2174 send_IPI = (msg_flags & XPC_M_INTERRUPT);
2175
2176 /*
2177 * We need to ensure that the message referenced by
2178 * local_GP->get is not XPC_M_DONE or that local_GP->get
2179 * equals w_local_GP.get, so we'll go have a look.
2180 */
2181 initial_get = get;
2182 }
2183
2184 if (send_IPI)
2185 xpc_IPI_send_msgrequest(ch);
2186}
2187
2188/*
2189 * Acknowledge receipt of a delivered message.
2190 *
2191 * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition
2192 * that sent the message.
2193 *
2194 * This function, although called by users, does not call xpc_part_ref() to
2195 * ensure that the partition infrastructure is in place. It relies on the
2196 * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg().
2197 *
2198 * Arguments:
2199 *
2200 * partid - ID of partition to which the channel is connected.
2201 * ch_number - channel # message received on.
2202 * payload - pointer to the payload area allocated via
2203 * xpc_initiate_allocate().
2204 */
2205void
2206xpc_initiate_received(partid_t partid, int ch_number, void *payload)
2207{
2208 struct xpc_partition *part = &xpc_partitions[partid];
2209 struct xpc_channel *ch;
2210 struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
2211 s64 get, msg_number = msg->number;
2212
2213 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
2214 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
2215
2216 ch = &part->channels[ch_number];
2217
2218 dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n",
2219 (void *)msg, msg_number, ch->partid, ch->number);
2220
2221 DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->msg_size) !=
2222 msg_number % ch->remote_nentries);
2223 DBUG_ON(msg->flags & XPC_M_DONE);
2224
2225 msg->flags |= XPC_M_DONE;
2226
2227 /*
2228 * The preceding store of msg->flags must occur before the following
2229 * load of ch->local_GP->get.
2230 */
2231 mb();
2232
2233 /*
2234 * See if this message is next in line to be acknowledged as having
2235 * been delivered.
2236 */
2237 get = ch->local_GP->get;
2238 if (get == msg_number)
2239 xpc_acknowledge_msgs(ch, get, msg->flags);
2240
2241 /* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg() */
2242 xpc_msgqueue_deref(ch);
2243}
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
new file mode 100644
index 000000000000..f673ba90eb0e
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -0,0 +1,1323 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9/*
10 * Cross Partition Communication (XPC) support - standard version.
11 *
12 * XPC provides a message passing capability that crosses partition
13 * boundaries. This module is made up of two parts:
14 *
15 * partition This part detects the presence/absence of other
16 * partitions. It provides a heartbeat and monitors
17 * the heartbeats of other partitions.
18 *
19 * channel This part manages the channels and sends/receives
20 * messages across them to/from other partitions.
21 *
22 * There are a couple of additional functions residing in XP, which
23 * provide an interface to XPC for its users.
24 *
25 *
26 * Caveats:
27 *
28 * . We currently have no way to determine which nasid an IPI came
29 * from. Thus, xpc_IPI_send() does a remote AMO write followed by
30 * an IPI. The AMO indicates where data is to be pulled from, so
31 * after the IPI arrives, the remote partition checks the AMO word.
32 * The IPI can actually arrive before the AMO however, so other code
33 * must periodically check for this case. Also, remote AMO operations
34 * do not reliably time out. Thus we do a remote PIO read solely to
35 * know whether the remote partition is down and whether we should
36 * stop sending IPIs to it. This remote PIO read operation is set up
37 * in a special nofault region so SAL knows to ignore (and cleanup)
38 * any errors due to the remote AMO write, PIO read, and/or PIO
39 * write operations.
40 *
41 * If/when new hardware solves this IPI problem, we should abandon
42 * the current approach.
43 *
44 */
45
46#include <linux/kernel.h>
47#include <linux/module.h>
48#include <linux/init.h>
49#include <linux/cache.h>
50#include <linux/interrupt.h>
51#include <linux/delay.h>
52#include <linux/reboot.h>
53#include <linux/completion.h>
54#include <linux/kdebug.h>
55#include <linux/kthread.h>
56#include <linux/uaccess.h>
57#include <asm/sn/intr.h>
58#include <asm/sn/sn_sal.h>
59#include "xpc.h"
60
61/* define two XPC debug device structures to be used with dev_dbg() et al */
62
63struct device_driver xpc_dbg_name = {
64 .name = "xpc"
65};
66
67struct device xpc_part_dbg_subname = {
68 .bus_id = {0}, /* set to "part" at xpc_init() time */
69 .driver = &xpc_dbg_name
70};
71
72struct device xpc_chan_dbg_subname = {
73 .bus_id = {0}, /* set to "chan" at xpc_init() time */
74 .driver = &xpc_dbg_name
75};
76
77struct device *xpc_part = &xpc_part_dbg_subname;
78struct device *xpc_chan = &xpc_chan_dbg_subname;
79
80static int xpc_kdebug_ignore;
81
82/* systune related variables for /proc/sys directories */
83
84static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
85static int xpc_hb_min_interval = 1;
86static int xpc_hb_max_interval = 10;
87
88static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
89static int xpc_hb_check_min_interval = 10;
90static int xpc_hb_check_max_interval = 120;
91
92int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
93static int xpc_disengage_request_min_timelimit; /* = 0 */
94static int xpc_disengage_request_max_timelimit = 120;
95
96static ctl_table xpc_sys_xpc_hb_dir[] = {
97 {
98 .ctl_name = CTL_UNNUMBERED,
99 .procname = "hb_interval",
100 .data = &xpc_hb_interval,
101 .maxlen = sizeof(int),
102 .mode = 0644,
103 .proc_handler = &proc_dointvec_minmax,
104 .strategy = &sysctl_intvec,
105 .extra1 = &xpc_hb_min_interval,
106 .extra2 = &xpc_hb_max_interval},
107 {
108 .ctl_name = CTL_UNNUMBERED,
109 .procname = "hb_check_interval",
110 .data = &xpc_hb_check_interval,
111 .maxlen = sizeof(int),
112 .mode = 0644,
113 .proc_handler = &proc_dointvec_minmax,
114 .strategy = &sysctl_intvec,
115 .extra1 = &xpc_hb_check_min_interval,
116 .extra2 = &xpc_hb_check_max_interval},
117 {}
118};
119static ctl_table xpc_sys_xpc_dir[] = {
120 {
121 .ctl_name = CTL_UNNUMBERED,
122 .procname = "hb",
123 .mode = 0555,
124 .child = xpc_sys_xpc_hb_dir},
125 {
126 .ctl_name = CTL_UNNUMBERED,
127 .procname = "disengage_request_timelimit",
128 .data = &xpc_disengage_request_timelimit,
129 .maxlen = sizeof(int),
130 .mode = 0644,
131 .proc_handler = &proc_dointvec_minmax,
132 .strategy = &sysctl_intvec,
133 .extra1 = &xpc_disengage_request_min_timelimit,
134 .extra2 = &xpc_disengage_request_max_timelimit},
135 {}
136};
137static ctl_table xpc_sys_dir[] = {
138 {
139 .ctl_name = CTL_UNNUMBERED,
140 .procname = "xpc",
141 .mode = 0555,
142 .child = xpc_sys_xpc_dir},
143 {}
144};
145static struct ctl_table_header *xpc_sysctl;
146
147/* non-zero if any remote partition disengage request was timed out */
148int xpc_disengage_request_timedout;
149
150/* #of IRQs received */
151static atomic_t xpc_act_IRQ_rcvd;
152
153/* IRQ handler notifies this wait queue on receipt of an IRQ */
154static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
155
156static unsigned long xpc_hb_check_timeout;
157
158/* notification that the xpc_hb_checker thread has exited */
159static DECLARE_COMPLETION(xpc_hb_checker_exited);
160
161/* notification that the xpc_discovery thread has exited */
162static DECLARE_COMPLETION(xpc_discovery_exited);
163
164static struct timer_list xpc_hb_timer;
165
166static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
167
168static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
169static struct notifier_block xpc_reboot_notifier = {
170 .notifier_call = xpc_system_reboot,
171};
172
173static int xpc_system_die(struct notifier_block *, unsigned long, void *);
174static struct notifier_block xpc_die_notifier = {
175 .notifier_call = xpc_system_die,
176};
177
178/*
179 * Timer function to enforce the timelimit on the partition disengage request.
180 */
181static void
182xpc_timeout_partition_disengage_request(unsigned long data)
183{
184 struct xpc_partition *part = (struct xpc_partition *)data;
185
186 DBUG_ON(time_before(jiffies, part->disengage_request_timeout));
187
188 (void)xpc_partition_disengaged(part);
189
190 DBUG_ON(part->disengage_request_timeout != 0);
191 DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
192}
193
194/*
195 * Notify the heartbeat check thread that an IRQ has been received.
196 */
197static irqreturn_t
198xpc_act_IRQ_handler(int irq, void *dev_id)
199{
200 atomic_inc(&xpc_act_IRQ_rcvd);
201 wake_up_interruptible(&xpc_act_IRQ_wq);
202 return IRQ_HANDLED;
203}
204
205/*
206 * Timer to produce the heartbeat. The timer structures function is
207 * already set when this is initially called. A tunable is used to
208 * specify when the next timeout should occur.
209 */
210static void
211xpc_hb_beater(unsigned long dummy)
212{
213 xpc_vars->heartbeat++;
214
215 if (time_after_eq(jiffies, xpc_hb_check_timeout))
216 wake_up_interruptible(&xpc_act_IRQ_wq);
217
218 xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
219 add_timer(&xpc_hb_timer);
220}
221
222/*
223 * This thread is responsible for nearly all of the partition
224 * activation/deactivation.
225 */
226static int
227xpc_hb_checker(void *ignore)
228{
229 int last_IRQ_count = 0;
230 int new_IRQ_count;
231 int force_IRQ = 0;
232
233 /* this thread was marked active by xpc_hb_init() */
234
235 set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU));
236
237 /* set our heartbeating to other partitions into motion */
238 xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
239 xpc_hb_beater(0);
240
241 while (!xpc_exiting) {
242
243 dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
244 "been received\n",
245 (int)(xpc_hb_check_timeout - jiffies),
246 atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count);
247
248 /* checking of remote heartbeats is skewed by IRQ handling */
249 if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
250 dev_dbg(xpc_part, "checking remote heartbeats\n");
251 xpc_check_remote_hb();
252
253 /*
254 * We need to periodically recheck to ensure no
255 * IPI/AMO pairs have been missed. That check
256 * must always reset xpc_hb_check_timeout.
257 */
258 force_IRQ = 1;
259 }
260
261 /* check for outstanding IRQs */
262 new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
263 if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
264 force_IRQ = 0;
265
266 dev_dbg(xpc_part, "found an IRQ to process; will be "
267 "resetting xpc_hb_check_timeout\n");
268
269 last_IRQ_count += xpc_identify_act_IRQ_sender();
270 if (last_IRQ_count < new_IRQ_count) {
271 /* retry once to help avoid missing AMO */
272 (void)xpc_identify_act_IRQ_sender();
273 }
274 last_IRQ_count = new_IRQ_count;
275
276 xpc_hb_check_timeout = jiffies +
277 (xpc_hb_check_interval * HZ);
278 }
279
280 /* wait for IRQ or timeout */
281 (void)wait_event_interruptible(xpc_act_IRQ_wq,
282 (last_IRQ_count <
283 atomic_read(&xpc_act_IRQ_rcvd)
284 || time_after_eq(jiffies,
285 xpc_hb_check_timeout) ||
286 xpc_exiting));
287 }
288
289 dev_dbg(xpc_part, "heartbeat checker is exiting\n");
290
291 /* mark this thread as having exited */
292 complete(&xpc_hb_checker_exited);
293 return 0;
294}
295
296/*
297 * This thread will attempt to discover other partitions to activate
298 * based on info provided by SAL. This new thread is short lived and
299 * will exit once discovery is complete.
300 */
301static int
302xpc_initiate_discovery(void *ignore)
303{
304 xpc_discovery();
305
306 dev_dbg(xpc_part, "discovery thread is exiting\n");
307
308 /* mark this thread as having exited */
309 complete(&xpc_discovery_exited);
310 return 0;
311}
312
313/*
314 * Establish first contact with the remote partititon. This involves pulling
315 * the XPC per partition variables from the remote partition and waiting for
316 * the remote partition to pull ours.
317 */
318static enum xpc_retval
319xpc_make_first_contact(struct xpc_partition *part)
320{
321 enum xpc_retval ret;
322
323 while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) {
324 if (ret != xpcRetry) {
325 XPC_DEACTIVATE_PARTITION(part, ret);
326 return ret;
327 }
328
329 dev_dbg(xpc_chan, "waiting to make first contact with "
330 "partition %d\n", XPC_PARTID(part));
331
332 /* wait a 1/4 of a second or so */
333 (void)msleep_interruptible(250);
334
335 if (part->act_state == XPC_P_DEACTIVATING)
336 return part->reason;
337 }
338
339 return xpc_mark_partition_active(part);
340}
341
342/*
343 * The first kthread assigned to a newly activated partition is the one
344 * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to
345 * that kthread until the partition is brought down, at which time that kthread
346 * returns back to XPC HB. (The return of that kthread will signify to XPC HB
347 * that XPC has dismantled all communication infrastructure for the associated
348 * partition.) This kthread becomes the channel manager for that partition.
349 *
350 * Each active partition has a channel manager, who, besides connecting and
351 * disconnecting channels, will ensure that each of the partition's connected
352 * channels has the required number of assigned kthreads to get the work done.
353 */
354static void
355xpc_channel_mgr(struct xpc_partition *part)
356{
357 while (part->act_state != XPC_P_DEACTIVATING ||
358 atomic_read(&part->nchannels_active) > 0 ||
359 !xpc_partition_disengaged(part)) {
360
361 xpc_process_channel_activity(part);
362
363 /*
364 * Wait until we've been requested to activate kthreads or
365 * all of the channel's message queues have been torn down or
366 * a signal is pending.
367 *
368 * The channel_mgr_requests is set to 1 after being awakened,
369 * This is done to prevent the channel mgr from making one pass
370 * through the loop for each request, since he will
371 * be servicing all the requests in one pass. The reason it's
372 * set to 1 instead of 0 is so that other kthreads will know
373 * that the channel mgr is running and won't bother trying to
374 * wake him up.
375 */
376 atomic_dec(&part->channel_mgr_requests);
377 (void)wait_event_interruptible(part->channel_mgr_wq,
378 (atomic_read(&part->channel_mgr_requests) > 0 ||
379 part->local_IPI_amo != 0 ||
380 (part->act_state == XPC_P_DEACTIVATING &&
381 atomic_read(&part->nchannels_active) == 0 &&
382 xpc_partition_disengaged(part))));
383 atomic_set(&part->channel_mgr_requests, 1);
384 }
385}
386
387/*
388 * When XPC HB determines that a partition has come up, it will create a new
389 * kthread and that kthread will call this function to attempt to set up the
390 * basic infrastructure used for Cross Partition Communication with the newly
391 * upped partition.
392 *
393 * The kthread that was created by XPC HB and which setup the XPC
394 * infrastructure will remain assigned to the partition until the partition
395 * goes down. At which time the kthread will teardown the XPC infrastructure
396 * and then exit.
397 *
398 * XPC HB will put the remote partition's XPC per partition specific variables
399 * physical address into xpc_partitions[partid].remote_vars_part_pa prior to
400 * calling xpc_partition_up().
401 */
402static void
403xpc_partition_up(struct xpc_partition *part)
404{
405 DBUG_ON(part->channels != NULL);
406
407 dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part));
408
409 if (xpc_setup_infrastructure(part) != xpcSuccess)
410 return;
411
412 /*
413 * The kthread that XPC HB called us with will become the
414 * channel manager for this partition. It will not return
415 * back to XPC HB until the partition's XPC infrastructure
416 * has been dismantled.
417 */
418
419 (void)xpc_part_ref(part); /* this will always succeed */
420
421 if (xpc_make_first_contact(part) == xpcSuccess)
422 xpc_channel_mgr(part);
423
424 xpc_part_deref(part);
425
426 xpc_teardown_infrastructure(part);
427}
428
429static int
430xpc_activating(void *__partid)
431{
432 partid_t partid = (u64)__partid;
433 struct xpc_partition *part = &xpc_partitions[partid];
434 unsigned long irq_flags;
435
436 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
437
438 spin_lock_irqsave(&part->act_lock, irq_flags);
439
440 if (part->act_state == XPC_P_DEACTIVATING) {
441 part->act_state = XPC_P_INACTIVE;
442 spin_unlock_irqrestore(&part->act_lock, irq_flags);
443 part->remote_rp_pa = 0;
444 return 0;
445 }
446
447 /* indicate the thread is activating */
448 DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ);
449 part->act_state = XPC_P_ACTIVATING;
450
451 XPC_SET_REASON(part, 0, 0);
452 spin_unlock_irqrestore(&part->act_lock, irq_flags);
453
454 dev_dbg(xpc_part, "bringing partition %d up\n", partid);
455
456 /*
457 * Register the remote partition's AMOs with SAL so it can handle
458 * and cleanup errors within that address range should the remote
459 * partition go down. We don't unregister this range because it is
460 * difficult to tell when outstanding writes to the remote partition
461 * are finished and thus when it is safe to unregister. This should
462 * not result in wasted space in the SAL xp_addr_region table because
463 * we should get the same page for remote_amos_page_pa after module
464 * reloads and system reboots.
465 */
466 if (sn_register_xp_addr_region(part->remote_amos_page_pa,
467 PAGE_SIZE, 1) < 0) {
468 dev_warn(xpc_part, "xpc_partition_up(%d) failed to register "
469 "xp_addr region\n", partid);
470
471 spin_lock_irqsave(&part->act_lock, irq_flags);
472 part->act_state = XPC_P_INACTIVE;
473 XPC_SET_REASON(part, xpcPhysAddrRegFailed, __LINE__);
474 spin_unlock_irqrestore(&part->act_lock, irq_flags);
475 part->remote_rp_pa = 0;
476 return 0;
477 }
478
479 xpc_allow_hb(partid, xpc_vars);
480 xpc_IPI_send_activated(part);
481
482 /*
483 * xpc_partition_up() holds this thread and marks this partition as
484 * XPC_P_ACTIVE by calling xpc_hb_mark_active().
485 */
486 (void)xpc_partition_up(part);
487
488 xpc_disallow_hb(partid, xpc_vars);
489 xpc_mark_partition_inactive(part);
490
491 if (part->reason == xpcReactivating) {
492 /* interrupting ourselves results in activating partition */
493 xpc_IPI_send_reactivate(part);
494 }
495
496 return 0;
497}
498
499void
500xpc_activate_partition(struct xpc_partition *part)
501{
502 partid_t partid = XPC_PARTID(part);
503 unsigned long irq_flags;
504 struct task_struct *kthread;
505
506 spin_lock_irqsave(&part->act_lock, irq_flags);
507
508 DBUG_ON(part->act_state != XPC_P_INACTIVE);
509
510 part->act_state = XPC_P_ACTIVATION_REQ;
511 XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
512
513 spin_unlock_irqrestore(&part->act_lock, irq_flags);
514
515 kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d",
516 partid);
517 if (IS_ERR(kthread)) {
518 spin_lock_irqsave(&part->act_lock, irq_flags);
519 part->act_state = XPC_P_INACTIVE;
520 XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
521 spin_unlock_irqrestore(&part->act_lock, irq_flags);
522 }
523}
524
525/*
526 * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
527 * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
528 * than one partition, we use an AMO_t structure per partition to indicate
529 * whether a partition has sent an IPI or not. If it has, then wake up the
530 * associated kthread to handle it.
531 *
532 * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
533 * running on other partitions.
534 *
535 * Noteworthy Arguments:
536 *
537 * irq - Interrupt ReQuest number. NOT USED.
538 *
539 * dev_id - partid of IPI's potential sender.
540 */
541irqreturn_t
542xpc_notify_IRQ_handler(int irq, void *dev_id)
543{
544 partid_t partid = (partid_t) (u64)dev_id;
545 struct xpc_partition *part = &xpc_partitions[partid];
546
547 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
548
549 if (xpc_part_ref(part)) {
550 xpc_check_for_channel_activity(part);
551
552 xpc_part_deref(part);
553 }
554 return IRQ_HANDLED;
555}
556
557/*
558 * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor
559 * because the write to their associated IPI amo completed after the IRQ/IPI
560 * was received.
561 */
562void
563xpc_dropped_IPI_check(struct xpc_partition *part)
564{
565 if (xpc_part_ref(part)) {
566 xpc_check_for_channel_activity(part);
567
568 part->dropped_IPI_timer.expires = jiffies +
569 XPC_P_DROPPED_IPI_WAIT;
570 add_timer(&part->dropped_IPI_timer);
571 xpc_part_deref(part);
572 }
573}
574
575void
576xpc_activate_kthreads(struct xpc_channel *ch, int needed)
577{
578 int idle = atomic_read(&ch->kthreads_idle);
579 int assigned = atomic_read(&ch->kthreads_assigned);
580 int wakeup;
581
582 DBUG_ON(needed <= 0);
583
584 if (idle > 0) {
585 wakeup = (needed > idle) ? idle : needed;
586 needed -= wakeup;
587
588 dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
589 "channel=%d\n", wakeup, ch->partid, ch->number);
590
591 /* only wakeup the requested number of kthreads */
592 wake_up_nr(&ch->idle_wq, wakeup);
593 }
594
595 if (needed <= 0)
596 return;
597
598 if (needed + assigned > ch->kthreads_assigned_limit) {
599 needed = ch->kthreads_assigned_limit - assigned;
600 if (needed <= 0)
601 return;
602 }
603
604 dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
605 needed, ch->partid, ch->number);
606
607 xpc_create_kthreads(ch, needed, 0);
608}
609
610/*
611 * This function is where XPC's kthreads wait for messages to deliver.
612 */
613static void
614xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
615{
616 do {
617 /* deliver messages to their intended recipients */
618
619 while (ch->w_local_GP.get < ch->w_remote_GP.put &&
620 !(ch->flags & XPC_C_DISCONNECTING)) {
621 xpc_deliver_msg(ch);
622 }
623
624 if (atomic_inc_return(&ch->kthreads_idle) >
625 ch->kthreads_idle_limit) {
626 /* too many idle kthreads on this channel */
627 atomic_dec(&ch->kthreads_idle);
628 break;
629 }
630
631 dev_dbg(xpc_chan, "idle kthread calling "
632 "wait_event_interruptible_exclusive()\n");
633
634 (void)wait_event_interruptible_exclusive(ch->idle_wq,
635 (ch->w_local_GP.get < ch->w_remote_GP.put ||
636 (ch->flags & XPC_C_DISCONNECTING)));
637
638 atomic_dec(&ch->kthreads_idle);
639
640 } while (!(ch->flags & XPC_C_DISCONNECTING));
641}
642
643static int
644xpc_kthread_start(void *args)
645{
646 partid_t partid = XPC_UNPACK_ARG1(args);
647 u16 ch_number = XPC_UNPACK_ARG2(args);
648 struct xpc_partition *part = &xpc_partitions[partid];
649 struct xpc_channel *ch;
650 int n_needed;
651 unsigned long irq_flags;
652
653 dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
654 partid, ch_number);
655
656 ch = &part->channels[ch_number];
657
658 if (!(ch->flags & XPC_C_DISCONNECTING)) {
659
660 /* let registerer know that connection has been established */
661
662 spin_lock_irqsave(&ch->lock, irq_flags);
663 if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
664 ch->flags |= XPC_C_CONNECTEDCALLOUT;
665 spin_unlock_irqrestore(&ch->lock, irq_flags);
666
667 xpc_connected_callout(ch);
668
669 spin_lock_irqsave(&ch->lock, irq_flags);
670 ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
671 spin_unlock_irqrestore(&ch->lock, irq_flags);
672
673 /*
674 * It is possible that while the callout was being
675 * made that the remote partition sent some messages.
676 * If that is the case, we may need to activate
677 * additional kthreads to help deliver them. We only
678 * need one less than total #of messages to deliver.
679 */
680 n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1;
681 if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
682 xpc_activate_kthreads(ch, n_needed);
683
684 } else {
685 spin_unlock_irqrestore(&ch->lock, irq_flags);
686 }
687
688 xpc_kthread_waitmsgs(part, ch);
689 }
690
691 /* let registerer know that connection is disconnecting */
692
693 spin_lock_irqsave(&ch->lock, irq_flags);
694 if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
695 !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
696 ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
697 spin_unlock_irqrestore(&ch->lock, irq_flags);
698
699 xpc_disconnect_callout(ch, xpcDisconnecting);
700
701 spin_lock_irqsave(&ch->lock, irq_flags);
702 ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
703 }
704 spin_unlock_irqrestore(&ch->lock, irq_flags);
705
706 if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
707 if (atomic_dec_return(&part->nchannels_engaged) == 0) {
708 xpc_mark_partition_disengaged(part);
709 xpc_IPI_send_disengage(part);
710 }
711 }
712
713 xpc_msgqueue_deref(ch);
714
715 dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
716 partid, ch_number);
717
718 xpc_part_deref(part);
719 return 0;
720}
721
722/*
723 * For each partition that XPC has established communications with, there is
724 * a minimum of one kernel thread assigned to perform any operation that
725 * may potentially sleep or block (basically the callouts to the asynchronous
726 * functions registered via xpc_connect()).
727 *
728 * Additional kthreads are created and destroyed by XPC as the workload
729 * demands.
730 *
731 * A kthread is assigned to one of the active channels that exists for a given
732 * partition.
733 */
734void
735xpc_create_kthreads(struct xpc_channel *ch, int needed,
736 int ignore_disconnecting)
737{
738 unsigned long irq_flags;
739 u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
740 struct xpc_partition *part = &xpc_partitions[ch->partid];
741 struct task_struct *kthread;
742
743 while (needed-- > 0) {
744
745 /*
746 * The following is done on behalf of the newly created
747 * kthread. That kthread is responsible for doing the
748 * counterpart to the following before it exits.
749 */
750 if (ignore_disconnecting) {
751 if (!atomic_inc_not_zero(&ch->kthreads_assigned)) {
752 /* kthreads assigned had gone to zero */
753 BUG_ON(!(ch->flags &
754 XPC_C_DISCONNECTINGCALLOUT_MADE));
755 break;
756 }
757
758 } else if (ch->flags & XPC_C_DISCONNECTING) {
759 break;
760
761 } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) {
762 if (atomic_inc_return(&part->nchannels_engaged) == 1)
763 xpc_mark_partition_engaged(part);
764 }
765 (void)xpc_part_ref(part);
766 xpc_msgqueue_ref(ch);
767
768 kthread = kthread_run(xpc_kthread_start, (void *)args,
769 "xpc%02dc%d", ch->partid, ch->number);
770 if (IS_ERR(kthread)) {
771 /* the fork failed */
772
773 /*
774 * NOTE: if (ignore_disconnecting &&
775 * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true,
776 * then we'll deadlock if all other kthreads assigned
777 * to this channel are blocked in the channel's
778 * registerer, because the only thing that will unblock
779 * them is the xpcDisconnecting callout that this
780 * failed kthread_run() would have made.
781 */
782
783 if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
784 atomic_dec_return(&part->nchannels_engaged) == 0) {
785 xpc_mark_partition_disengaged(part);
786 xpc_IPI_send_disengage(part);
787 }
788 xpc_msgqueue_deref(ch);
789 xpc_part_deref(part);
790
791 if (atomic_read(&ch->kthreads_assigned) <
792 ch->kthreads_idle_limit) {
793 /*
794 * Flag this as an error only if we have an
795 * insufficient #of kthreads for the channel
796 * to function.
797 */
798 spin_lock_irqsave(&ch->lock, irq_flags);
799 XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources,
800 &irq_flags);
801 spin_unlock_irqrestore(&ch->lock, irq_flags);
802 }
803 break;
804 }
805 }
806}
807
808void
809xpc_disconnect_wait(int ch_number)
810{
811 unsigned long irq_flags;
812 partid_t partid;
813 struct xpc_partition *part;
814 struct xpc_channel *ch;
815 int wakeup_channel_mgr;
816
817 /* now wait for all callouts to the caller's function to cease */
818 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
819 part = &xpc_partitions[partid];
820
821 if (!xpc_part_ref(part))
822 continue;
823
824 ch = &part->channels[ch_number];
825
826 if (!(ch->flags & XPC_C_WDISCONNECT)) {
827 xpc_part_deref(part);
828 continue;
829 }
830
831 wait_for_completion(&ch->wdisconnect_wait);
832
833 spin_lock_irqsave(&ch->lock, irq_flags);
834 DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
835 wakeup_channel_mgr = 0;
836
837 if (ch->delayed_IPI_flags) {
838 if (part->act_state != XPC_P_DEACTIVATING) {
839 spin_lock(&part->IPI_lock);
840 XPC_SET_IPI_FLAGS(part->local_IPI_amo,
841 ch->number,
842 ch->delayed_IPI_flags);
843 spin_unlock(&part->IPI_lock);
844 wakeup_channel_mgr = 1;
845 }
846 ch->delayed_IPI_flags = 0;
847 }
848
849 ch->flags &= ~XPC_C_WDISCONNECT;
850 spin_unlock_irqrestore(&ch->lock, irq_flags);
851
852 if (wakeup_channel_mgr)
853 xpc_wakeup_channel_mgr(part);
854
855 xpc_part_deref(part);
856 }
857}
858
859static void
860xpc_do_exit(enum xpc_retval reason)
861{
862 partid_t partid;
863 int active_part_count, printed_waiting_msg = 0;
864 struct xpc_partition *part;
865 unsigned long printmsg_time, disengage_request_timeout = 0;
866
867 /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
868 DBUG_ON(xpc_exiting == 1);
869
870 /*
871 * Let the heartbeat checker thread and the discovery thread
872 * (if one is running) know that they should exit. Also wake up
873 * the heartbeat checker thread in case it's sleeping.
874 */
875 xpc_exiting = 1;
876 wake_up_interruptible(&xpc_act_IRQ_wq);
877
878 /* ignore all incoming interrupts */
879 free_irq(SGI_XPC_ACTIVATE, NULL);
880
881 /* wait for the discovery thread to exit */
882 wait_for_completion(&xpc_discovery_exited);
883
884 /* wait for the heartbeat checker thread to exit */
885 wait_for_completion(&xpc_hb_checker_exited);
886
887 /* sleep for a 1/3 of a second or so */
888 (void)msleep_interruptible(300);
889
890 /* wait for all partitions to become inactive */
891
892 printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
893 xpc_disengage_request_timedout = 0;
894
895 do {
896 active_part_count = 0;
897
898 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
899 part = &xpc_partitions[partid];
900
901 if (xpc_partition_disengaged(part) &&
902 part->act_state == XPC_P_INACTIVE) {
903 continue;
904 }
905
906 active_part_count++;
907
908 XPC_DEACTIVATE_PARTITION(part, reason);
909
910 if (part->disengage_request_timeout >
911 disengage_request_timeout) {
912 disengage_request_timeout =
913 part->disengage_request_timeout;
914 }
915 }
916
917 if (xpc_partition_engaged(-1UL)) {
918 if (time_after(jiffies, printmsg_time)) {
919 dev_info(xpc_part, "waiting for remote "
920 "partitions to disengage, timeout in "
921 "%ld seconds\n",
922 (disengage_request_timeout - jiffies)
923 / HZ);
924 printmsg_time = jiffies +
925 (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
926 printed_waiting_msg = 1;
927 }
928
929 } else if (active_part_count > 0) {
930 if (printed_waiting_msg) {
931 dev_info(xpc_part, "waiting for local partition"
932 " to disengage\n");
933 printed_waiting_msg = 0;
934 }
935
936 } else {
937 if (!xpc_disengage_request_timedout) {
938 dev_info(xpc_part, "all partitions have "
939 "disengaged\n");
940 }
941 break;
942 }
943
944 /* sleep for a 1/3 of a second or so */
945 (void)msleep_interruptible(300);
946
947 } while (1);
948
949 DBUG_ON(xpc_partition_engaged(-1UL));
950
951 /* indicate to others that our reserved page is uninitialized */
952 xpc_rsvd_page->vars_pa = 0;
953
954 /* now it's time to eliminate our heartbeat */
955 del_timer_sync(&xpc_hb_timer);
956 DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
957
958 if (reason == xpcUnloading) {
959 /* take ourselves off of the reboot_notifier_list */
960 (void)unregister_reboot_notifier(&xpc_reboot_notifier);
961
962 /* take ourselves off of the die_notifier list */
963 (void)unregister_die_notifier(&xpc_die_notifier);
964 }
965
966 /* close down protections for IPI operations */
967 xpc_restrict_IPI_ops();
968
969 /* clear the interface to XPC's functions */
970 xpc_clear_interface();
971
972 if (xpc_sysctl)
973 unregister_sysctl_table(xpc_sysctl);
974
975 kfree(xpc_remote_copy_buffer_base);
976}
977
978/*
979 * This function is called when the system is being rebooted.
980 */
981static int
982xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
983{
984 enum xpc_retval reason;
985
986 switch (event) {
987 case SYS_RESTART:
988 reason = xpcSystemReboot;
989 break;
990 case SYS_HALT:
991 reason = xpcSystemHalt;
992 break;
993 case SYS_POWER_OFF:
994 reason = xpcSystemPoweroff;
995 break;
996 default:
997 reason = xpcSystemGoingDown;
998 }
999
1000 xpc_do_exit(reason);
1001 return NOTIFY_DONE;
1002}
1003
1004/*
1005 * Notify other partitions to disengage from all references to our memory.
1006 */
1007static void
1008xpc_die_disengage(void)
1009{
1010 struct xpc_partition *part;
1011 partid_t partid;
1012 unsigned long engaged;
1013 long time, printmsg_time, disengage_request_timeout;
1014
1015 /* keep xpc_hb_checker thread from doing anything (just in case) */
1016 xpc_exiting = 1;
1017
1018 xpc_vars->heartbeating_to_mask = 0; /* indicate we're deactivated */
1019
1020 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1021 part = &xpc_partitions[partid];
1022
1023 if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part->
1024 remote_vars_version)) {
1025
1026 /* just in case it was left set by an earlier XPC */
1027 xpc_clear_partition_engaged(1UL << partid);
1028 continue;
1029 }
1030
1031 if (xpc_partition_engaged(1UL << partid) ||
1032 part->act_state != XPC_P_INACTIVE) {
1033 xpc_request_partition_disengage(part);
1034 xpc_mark_partition_disengaged(part);
1035 xpc_IPI_send_disengage(part);
1036 }
1037 }
1038
1039 time = rtc_time();
1040 printmsg_time = time +
1041 (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
1042 disengage_request_timeout = time +
1043 (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
1044
1045 /* wait for all other partitions to disengage from us */
1046
1047 while (1) {
1048 engaged = xpc_partition_engaged(-1UL);
1049 if (!engaged) {
1050 dev_info(xpc_part, "all partitions have disengaged\n");
1051 break;
1052 }
1053
1054 time = rtc_time();
1055 if (time >= disengage_request_timeout) {
1056 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1057 if (engaged & (1UL << partid)) {
1058 dev_info(xpc_part, "disengage from "
1059 "remote partition %d timed "
1060 "out\n", partid);
1061 }
1062 }
1063 break;
1064 }
1065
1066 if (time >= printmsg_time) {
1067 dev_info(xpc_part, "waiting for remote partitions to "
1068 "disengage, timeout in %ld seconds\n",
1069 (disengage_request_timeout - time) /
1070 sn_rtc_cycles_per_second);
1071 printmsg_time = time +
1072 (XPC_DISENGAGE_PRINTMSG_INTERVAL *
1073 sn_rtc_cycles_per_second);
1074 }
1075 }
1076}
1077
1078/*
1079 * This function is called when the system is being restarted or halted due
1080 * to some sort of system failure. If this is the case we need to notify the
1081 * other partitions to disengage from all references to our memory.
1082 * This function can also be called when our heartbeater could be offlined
1083 * for a time. In this case we need to notify other partitions to not worry
1084 * about the lack of a heartbeat.
1085 */
1086static int
1087xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
1088{
1089 switch (event) {
1090 case DIE_MACHINE_RESTART:
1091 case DIE_MACHINE_HALT:
1092 xpc_die_disengage();
1093 break;
1094
1095 case DIE_KDEBUG_ENTER:
1096 /* Should lack of heartbeat be ignored by other partitions? */
1097 if (!xpc_kdebug_ignore)
1098 break;
1099
1100 /* fall through */
1101 case DIE_MCA_MONARCH_ENTER:
1102 case DIE_INIT_MONARCH_ENTER:
1103 xpc_vars->heartbeat++;
1104 xpc_vars->heartbeat_offline = 1;
1105 break;
1106
1107 case DIE_KDEBUG_LEAVE:
1108 /* Is lack of heartbeat being ignored by other partitions? */
1109 if (!xpc_kdebug_ignore)
1110 break;
1111
1112 /* fall through */
1113 case DIE_MCA_MONARCH_LEAVE:
1114 case DIE_INIT_MONARCH_LEAVE:
1115 xpc_vars->heartbeat++;
1116 xpc_vars->heartbeat_offline = 0;
1117 break;
1118 }
1119
1120 return NOTIFY_DONE;
1121}
1122
1123int __init
1124xpc_init(void)
1125{
1126 int ret;
1127 partid_t partid;
1128 struct xpc_partition *part;
1129 struct task_struct *kthread;
1130 size_t buf_size;
1131
1132 if (!ia64_platform_is("sn2"))
1133 return -ENODEV;
1134
1135 buf_size = max(XPC_RP_VARS_SIZE,
1136 XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
1137 xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
1138 GFP_KERNEL,
1139 &xpc_remote_copy_buffer_base);
1140 if (xpc_remote_copy_buffer == NULL)
1141 return -ENOMEM;
1142
1143 snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
1144 snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
1145
1146 xpc_sysctl = register_sysctl_table(xpc_sys_dir);
1147
1148 /*
1149 * The first few fields of each entry of xpc_partitions[] need to
1150 * be initialized now so that calls to xpc_connect() and
1151 * xpc_disconnect() can be made prior to the activation of any remote
1152 * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
1153 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
1154 * PARTITION HAS BEEN ACTIVATED.
1155 */
1156 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1157 part = &xpc_partitions[partid];
1158
1159 DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
1160
1161 part->act_IRQ_rcvd = 0;
1162 spin_lock_init(&part->act_lock);
1163 part->act_state = XPC_P_INACTIVE;
1164 XPC_SET_REASON(part, 0, 0);
1165
1166 init_timer(&part->disengage_request_timer);
1167 part->disengage_request_timer.function =
1168 xpc_timeout_partition_disengage_request;
1169 part->disengage_request_timer.data = (unsigned long)part;
1170
1171 part->setup_state = XPC_P_UNSET;
1172 init_waitqueue_head(&part->teardown_wq);
1173 atomic_set(&part->references, 0);
1174 }
1175
1176 /*
1177 * Open up protections for IPI operations (and AMO operations on
1178 * Shub 1.1 systems).
1179 */
1180 xpc_allow_IPI_ops();
1181
1182 /*
1183 * Interrupts being processed will increment this atomic variable and
1184 * awaken the heartbeat thread which will process the interrupts.
1185 */
1186 atomic_set(&xpc_act_IRQ_rcvd, 0);
1187
1188 /*
1189 * This is safe to do before the xpc_hb_checker thread has started
1190 * because the handler releases a wait queue. If an interrupt is
1191 * received before the thread is waiting, it will not go to sleep,
1192 * but rather immediately process the interrupt.
1193 */
1194 ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0,
1195 "xpc hb", NULL);
1196 if (ret != 0) {
1197 dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
1198 "errno=%d\n", -ret);
1199
1200 xpc_restrict_IPI_ops();
1201
1202 if (xpc_sysctl)
1203 unregister_sysctl_table(xpc_sysctl);
1204
1205 kfree(xpc_remote_copy_buffer_base);
1206 return -EBUSY;
1207 }
1208
1209 /*
1210 * Fill the partition reserved page with the information needed by
1211 * other partitions to discover we are alive and establish initial
1212 * communications.
1213 */
1214 xpc_rsvd_page = xpc_rsvd_page_init();
1215 if (xpc_rsvd_page == NULL) {
1216 dev_err(xpc_part, "could not setup our reserved page\n");
1217
1218 free_irq(SGI_XPC_ACTIVATE, NULL);
1219 xpc_restrict_IPI_ops();
1220
1221 if (xpc_sysctl)
1222 unregister_sysctl_table(xpc_sysctl);
1223
1224 kfree(xpc_remote_copy_buffer_base);
1225 return -EBUSY;
1226 }
1227
1228 /* add ourselves to the reboot_notifier_list */
1229 ret = register_reboot_notifier(&xpc_reboot_notifier);
1230 if (ret != 0)
1231 dev_warn(xpc_part, "can't register reboot notifier\n");
1232
1233 /* add ourselves to the die_notifier list */
1234 ret = register_die_notifier(&xpc_die_notifier);
1235 if (ret != 0)
1236 dev_warn(xpc_part, "can't register die notifier\n");
1237
1238 init_timer(&xpc_hb_timer);
1239 xpc_hb_timer.function = xpc_hb_beater;
1240
1241 /*
1242 * The real work-horse behind xpc. This processes incoming
1243 * interrupts and monitors remote heartbeats.
1244 */
1245 kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
1246 if (IS_ERR(kthread)) {
1247 dev_err(xpc_part, "failed while forking hb check thread\n");
1248
1249 /* indicate to others that our reserved page is uninitialized */
1250 xpc_rsvd_page->vars_pa = 0;
1251
1252 /* take ourselves off of the reboot_notifier_list */
1253 (void)unregister_reboot_notifier(&xpc_reboot_notifier);
1254
1255 /* take ourselves off of the die_notifier list */
1256 (void)unregister_die_notifier(&xpc_die_notifier);
1257
1258 del_timer_sync(&xpc_hb_timer);
1259 free_irq(SGI_XPC_ACTIVATE, NULL);
1260 xpc_restrict_IPI_ops();
1261
1262 if (xpc_sysctl)
1263 unregister_sysctl_table(xpc_sysctl);
1264
1265 kfree(xpc_remote_copy_buffer_base);
1266 return -EBUSY;
1267 }
1268
1269 /*
1270 * Startup a thread that will attempt to discover other partitions to
1271 * activate based on info provided by SAL. This new thread is short
1272 * lived and will exit once discovery is complete.
1273 */
1274 kthread = kthread_run(xpc_initiate_discovery, NULL,
1275 XPC_DISCOVERY_THREAD_NAME);
1276 if (IS_ERR(kthread)) {
1277 dev_err(xpc_part, "failed while forking discovery thread\n");
1278
1279 /* mark this new thread as a non-starter */
1280 complete(&xpc_discovery_exited);
1281
1282 xpc_do_exit(xpcUnloading);
1283 return -EBUSY;
1284 }
1285
1286 /* set the interface to point at XPC's functions */
1287 xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
1288 xpc_initiate_allocate, xpc_initiate_send,
1289 xpc_initiate_send_notify, xpc_initiate_received,
1290 xpc_initiate_partid_to_nasids);
1291
1292 return 0;
1293}
1294
1295module_init(xpc_init);
1296
1297void __exit
1298xpc_exit(void)
1299{
1300 xpc_do_exit(xpcUnloading);
1301}
1302
1303module_exit(xpc_exit);
1304
1305MODULE_AUTHOR("Silicon Graphics, Inc.");
1306MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
1307MODULE_LICENSE("GPL");
1308
1309module_param(xpc_hb_interval, int, 0);
1310MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
1311 "heartbeat increments.");
1312
1313module_param(xpc_hb_check_interval, int, 0);
1314MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
1315 "heartbeat checks.");
1316
1317module_param(xpc_disengage_request_timelimit, int, 0);
1318MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
1319 "for disengage request to complete.");
1320
1321module_param(xpc_kdebug_ignore, int, 0);
1322MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
1323 "other partitions when dropping into kdebug.");
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
new file mode 100644
index 000000000000..27e200ec5826
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -0,0 +1,1174 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9/*
10 * Cross Partition Communication (XPC) partition support.
11 *
12 * This is the part of XPC that detects the presence/absence of
13 * other partitions. It provides a heartbeat and monitors the
14 * heartbeats of other partitions.
15 *
16 */
17
18#include <linux/kernel.h>
19#include <linux/sysctl.h>
20#include <linux/cache.h>
21#include <linux/mmzone.h>
22#include <linux/nodemask.h>
23#include <asm/uncached.h>
24#include <asm/sn/bte.h>
25#include <asm/sn/intr.h>
26#include <asm/sn/sn_sal.h>
27#include <asm/sn/nodepda.h>
28#include <asm/sn/addrs.h>
29#include "xpc.h"
30
31/* XPC is exiting flag */
32int xpc_exiting;
33
34/* SH_IPI_ACCESS shub register value on startup */
35static u64 xpc_sh1_IPI_access;
36static u64 xpc_sh2_IPI_access0;
37static u64 xpc_sh2_IPI_access1;
38static u64 xpc_sh2_IPI_access2;
39static u64 xpc_sh2_IPI_access3;
40
41/* original protection values for each node */
42u64 xpc_prot_vec[MAX_NUMNODES];
43
44/* this partition's reserved page pointers */
45struct xpc_rsvd_page *xpc_rsvd_page;
46static u64 *xpc_part_nasids;
47static u64 *xpc_mach_nasids;
48struct xpc_vars *xpc_vars;
49struct xpc_vars_part *xpc_vars_part;
50
51static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */
52static int xp_nasid_mask_words; /* actual size in words of nasid mask */
53
54/*
55 * For performance reasons, each entry of xpc_partitions[] is cacheline
56 * aligned. And xpc_partitions[] is padded with an additional entry at the
57 * end so that the last legitimate entry doesn't share its cacheline with
58 * another variable.
59 */
60struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
61
62/*
63 * Generic buffer used to store a local copy of portions of a remote
64 * partition's reserved page (either its header and part_nasids mask,
65 * or its vars).
66 */
67char *xpc_remote_copy_buffer;
68void *xpc_remote_copy_buffer_base;
69
70/*
71 * Guarantee that the kmalloc'd memory is cacheline aligned.
72 */
73void *
74xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
75{
76 /* see if kmalloc will give us cachline aligned memory by default */
77 *base = kmalloc(size, flags);
78 if (*base == NULL)
79 return NULL;
80
81 if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
82 return *base;
83
84 kfree(*base);
85
86 /* nope, we'll have to do it ourselves */
87 *base = kmalloc(size + L1_CACHE_BYTES, flags);
88 if (*base == NULL)
89 return NULL;
90
91 return (void *)L1_CACHE_ALIGN((u64)*base);
92}
93
94/*
95 * Given a nasid, get the physical address of the partition's reserved page
96 * for that nasid. This function returns 0 on any error.
97 */
98static u64
99xpc_get_rsvd_page_pa(int nasid)
100{
101 bte_result_t bte_res;
102 s64 status;
103 u64 cookie = 0;
104 u64 rp_pa = nasid; /* seed with nasid */
105 u64 len = 0;
106 u64 buf = buf;
107 u64 buf_len = 0;
108 void *buf_base = NULL;
109
110 while (1) {
111
112 status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
113 &len);
114
115 dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
116 "0x%016lx, address=0x%016lx, len=0x%016lx\n",
117 status, cookie, rp_pa, len);
118
119 if (status != SALRET_MORE_PASSES)
120 break;
121
122 if (L1_CACHE_ALIGN(len) > buf_len) {
123 kfree(buf_base);
124 buf_len = L1_CACHE_ALIGN(len);
125 buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len,
126 GFP_KERNEL,
127 &buf_base);
128 if (buf_base == NULL) {
129 dev_err(xpc_part, "unable to kmalloc "
130 "len=0x%016lx\n", buf_len);
131 status = SALRET_ERROR;
132 break;
133 }
134 }
135
136 bte_res = xp_bte_copy(rp_pa, buf, buf_len,
137 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
138 if (bte_res != BTE_SUCCESS) {
139 dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
140 status = SALRET_ERROR;
141 break;
142 }
143 }
144
145 kfree(buf_base);
146
147 if (status != SALRET_OK)
148 rp_pa = 0;
149
150 dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
151 return rp_pa;
152}
153
154/*
155 * Fill the partition reserved page with the information needed by
156 * other partitions to discover we are alive and establish initial
157 * communications.
158 */
159struct xpc_rsvd_page *
160xpc_rsvd_page_init(void)
161{
162 struct xpc_rsvd_page *rp;
163 AMO_t *amos_page;
164 u64 rp_pa, nasid_array = 0;
165 int i, ret;
166
167 /* get the local reserved page's address */
168
169 preempt_disable();
170 rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
171 preempt_enable();
172 if (rp_pa == 0) {
173 dev_err(xpc_part, "SAL failed to locate the reserved page\n");
174 return NULL;
175 }
176 rp = (struct xpc_rsvd_page *)__va(rp_pa);
177
178 if (rp->partid != sn_partition_id) {
179 dev_err(xpc_part, "the reserved page's partid of %d should be "
180 "%d\n", rp->partid, sn_partition_id);
181 return NULL;
182 }
183
184 rp->version = XPC_RP_VERSION;
185
186 /* establish the actual sizes of the nasid masks */
187 if (rp->SAL_version == 1) {
188 /* SAL_version 1 didn't set the nasids_size field */
189 rp->nasids_size = 128;
190 }
191 xp_nasid_mask_bytes = rp->nasids_size;
192 xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
193
194 /* setup the pointers to the various items in the reserved page */
195 xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
196 xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
197 xpc_vars = XPC_RP_VARS(rp);
198 xpc_vars_part = XPC_RP_VARS_PART(rp);
199
200 /*
201 * Before clearing xpc_vars, see if a page of AMOs had been previously
202 * allocated. If not we'll need to allocate one and set permissions
203 * so that cross-partition AMOs are allowed.
204 *
205 * The allocated AMO page needs MCA reporting to remain disabled after
206 * XPC has unloaded. To make this work, we keep a copy of the pointer
207 * to this page (i.e., amos_page) in the struct xpc_vars structure,
208 * which is pointed to by the reserved page, and re-use that saved copy
209 * on subsequent loads of XPC. This AMO page is never freed, and its
210 * memory protections are never restricted.
211 */
212 amos_page = xpc_vars->amos_page;
213 if (amos_page == NULL) {
214 amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0));
215 if (amos_page == NULL) {
216 dev_err(xpc_part, "can't allocate page of AMOs\n");
217 return NULL;
218 }
219
220 /*
221 * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems
222 * when xpc_allow_IPI_ops() is called via xpc_hb_init().
223 */
224 if (!enable_shub_wars_1_1()) {
225 ret = sn_change_memprotect(ia64_tpa((u64)amos_page),
226 PAGE_SIZE,
227 SN_MEMPROT_ACCESS_CLASS_1,
228 &nasid_array);
229 if (ret != 0) {
230 dev_err(xpc_part, "can't change memory "
231 "protections\n");
232 uncached_free_page(__IA64_UNCACHED_OFFSET |
233 TO_PHYS((u64)amos_page));
234 return NULL;
235 }
236 }
237 } else if (!IS_AMO_ADDRESS((u64)amos_page)) {
238 /*
239 * EFI's XPBOOT can also set amos_page in the reserved page,
240 * but it happens to leave it as an uncached physical address
241 * and we need it to be an uncached virtual, so we'll have to
242 * convert it.
243 */
244 if (!IS_AMO_PHYS_ADDRESS((u64)amos_page)) {
245 dev_err(xpc_part, "previously used amos_page address "
246 "is bad = 0x%p\n", (void *)amos_page);
247 return NULL;
248 }
249 amos_page = (AMO_t *)TO_AMO((u64)amos_page);
250 }
251
252 /* clear xpc_vars */
253 memset(xpc_vars, 0, sizeof(struct xpc_vars));
254
255 xpc_vars->version = XPC_V_VERSION;
256 xpc_vars->act_nasid = cpuid_to_nasid(0);
257 xpc_vars->act_phys_cpuid = cpu_physical_id(0);
258 xpc_vars->vars_part_pa = __pa(xpc_vars_part);
259 xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page);
260 xpc_vars->amos_page = amos_page; /* save for next load of XPC */
261
262 /* clear xpc_vars_part */
263 memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
264 XP_MAX_PARTITIONS);
265
266 /* initialize the activate IRQ related AMO variables */
267 for (i = 0; i < xp_nasid_mask_words; i++)
268 (void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
269
270 /* initialize the engaged remote partitions related AMO variables */
271 (void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
272 (void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
273
274 /* timestamp of when reserved page was setup by XPC */
275 rp->stamp = CURRENT_TIME;
276
277 /*
278 * This signifies to the remote partition that our reserved
279 * page is initialized.
280 */
281 rp->vars_pa = __pa(xpc_vars);
282
283 return rp;
284}
285
286/*
287 * Change protections to allow IPI operations (and AMO operations on
288 * Shub 1.1 systems).
289 */
290void
291xpc_allow_IPI_ops(void)
292{
293 int node;
294 int nasid;
295
296 /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
297
298 if (is_shub2()) {
299 xpc_sh2_IPI_access0 =
300 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
301 xpc_sh2_IPI_access1 =
302 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
303 xpc_sh2_IPI_access2 =
304 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
305 xpc_sh2_IPI_access3 =
306 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
307
308 for_each_online_node(node) {
309 nasid = cnodeid_to_nasid(node);
310 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
311 -1UL);
312 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
313 -1UL);
314 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
315 -1UL);
316 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
317 -1UL);
318 }
319
320 } else {
321 xpc_sh1_IPI_access =
322 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
323
324 for_each_online_node(node) {
325 nasid = cnodeid_to_nasid(node);
326 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
327 -1UL);
328
329 /*
330 * Since the BIST collides with memory operations on
331 * SHUB 1.1 sn_change_memprotect() cannot be used.
332 */
333 if (enable_shub_wars_1_1()) {
334 /* open up everything */
335 xpc_prot_vec[node] = (u64)HUB_L((u64 *)
336 GLOBAL_MMR_ADDR
337 (nasid,
338 SH1_MD_DQLP_MMR_DIR_PRIVEC0));
339 HUB_S((u64 *)
340 GLOBAL_MMR_ADDR(nasid,
341 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
342 -1UL);
343 HUB_S((u64 *)
344 GLOBAL_MMR_ADDR(nasid,
345 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
346 -1UL);
347 }
348 }
349 }
350}
351
352/*
353 * Restrict protections to disallow IPI operations (and AMO operations on
354 * Shub 1.1 systems).
355 */
356void
357xpc_restrict_IPI_ops(void)
358{
359 int node;
360 int nasid;
361
362 /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
363
364 if (is_shub2()) {
365
366 for_each_online_node(node) {
367 nasid = cnodeid_to_nasid(node);
368 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
369 xpc_sh2_IPI_access0);
370 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
371 xpc_sh2_IPI_access1);
372 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
373 xpc_sh2_IPI_access2);
374 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
375 xpc_sh2_IPI_access3);
376 }
377
378 } else {
379
380 for_each_online_node(node) {
381 nasid = cnodeid_to_nasid(node);
382 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
383 xpc_sh1_IPI_access);
384
385 if (enable_shub_wars_1_1()) {
386 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
387 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
388 xpc_prot_vec[node]);
389 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
390 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
391 xpc_prot_vec[node]);
392 }
393 }
394 }
395}
396
397/*
398 * At periodic intervals, scan through all active partitions and ensure
399 * their heartbeat is still active. If not, the partition is deactivated.
400 */
401void
402xpc_check_remote_hb(void)
403{
404 struct xpc_vars *remote_vars;
405 struct xpc_partition *part;
406 partid_t partid;
407 bte_result_t bres;
408
409 remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
410
411 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
412
413 if (xpc_exiting)
414 break;
415
416 if (partid == sn_partition_id)
417 continue;
418
419 part = &xpc_partitions[partid];
420
421 if (part->act_state == XPC_P_INACTIVE ||
422 part->act_state == XPC_P_DEACTIVATING) {
423 continue;
424 }
425
426 /* pull the remote_hb cache line */
427 bres = xp_bte_copy(part->remote_vars_pa,
428 (u64)remote_vars,
429 XPC_RP_VARS_SIZE,
430 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
431 if (bres != BTE_SUCCESS) {
432 XPC_DEACTIVATE_PARTITION(part,
433 xpc_map_bte_errors(bres));
434 continue;
435 }
436
437 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
438 " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
439 partid, remote_vars->heartbeat, part->last_heartbeat,
440 remote_vars->heartbeat_offline,
441 remote_vars->heartbeating_to_mask);
442
443 if (((remote_vars->heartbeat == part->last_heartbeat) &&
444 (remote_vars->heartbeat_offline == 0)) ||
445 !xpc_hb_allowed(sn_partition_id, remote_vars)) {
446
447 XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
448 continue;
449 }
450
451 part->last_heartbeat = remote_vars->heartbeat;
452 }
453}
454
455/*
456 * Get a copy of a portion of the remote partition's rsvd page.
457 *
458 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
459 * is large enough to contain a copy of their reserved page header and
460 * part_nasids mask.
461 */
462static enum xpc_retval
463xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
464 struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
465{
466 int bres, i;
467
468 /* get the reserved page's physical address */
469
470 *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
471 if (*remote_rp_pa == 0)
472 return xpcNoRsvdPageAddr;
473
474 /* pull over the reserved page header and part_nasids mask */
475 bres = xp_bte_copy(*remote_rp_pa, (u64)remote_rp,
476 XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
477 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
478 if (bres != BTE_SUCCESS)
479 return xpc_map_bte_errors(bres);
480
481 if (discovered_nasids != NULL) {
482 u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
483
484 for (i = 0; i < xp_nasid_mask_words; i++)
485 discovered_nasids[i] |= remote_part_nasids[i];
486 }
487
488 /* check that the partid is for another partition */
489
490 if (remote_rp->partid < 1 ||
491 remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
492 return xpcInvalidPartid;
493 }
494
495 if (remote_rp->partid == sn_partition_id)
496 return xpcLocalPartid;
497
498 if (XPC_VERSION_MAJOR(remote_rp->version) !=
499 XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
500 return xpcBadVersion;
501 }
502
503 return xpcSuccess;
504}
505
506/*
507 * Get a copy of the remote partition's XPC variables from the reserved page.
508 *
509 * remote_vars points to a buffer that is cacheline aligned for BTE copies and
510 * assumed to be of size XPC_RP_VARS_SIZE.
511 */
512static enum xpc_retval
513xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
514{
515 int bres;
516
517 if (remote_vars_pa == 0)
518 return xpcVarsNotSet;
519
520 /* pull over the cross partition variables */
521 bres = xp_bte_copy(remote_vars_pa, (u64)remote_vars, XPC_RP_VARS_SIZE,
522 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
523 if (bres != BTE_SUCCESS)
524 return xpc_map_bte_errors(bres);
525
526 if (XPC_VERSION_MAJOR(remote_vars->version) !=
527 XPC_VERSION_MAJOR(XPC_V_VERSION)) {
528 return xpcBadVersion;
529 }
530
531 return xpcSuccess;
532}
533
534/*
535 * Update the remote partition's info.
536 */
537static void
538xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
539 struct timespec *remote_rp_stamp, u64 remote_rp_pa,
540 u64 remote_vars_pa, struct xpc_vars *remote_vars)
541{
542 part->remote_rp_version = remote_rp_version;
543 dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n",
544 part->remote_rp_version);
545
546 part->remote_rp_stamp = *remote_rp_stamp;
547 dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
548 part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
549
550 part->remote_rp_pa = remote_rp_pa;
551 dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
552
553 part->remote_vars_pa = remote_vars_pa;
554 dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
555 part->remote_vars_pa);
556
557 part->last_heartbeat = remote_vars->heartbeat;
558 dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
559 part->last_heartbeat);
560
561 part->remote_vars_part_pa = remote_vars->vars_part_pa;
562 dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
563 part->remote_vars_part_pa);
564
565 part->remote_act_nasid = remote_vars->act_nasid;
566 dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
567 part->remote_act_nasid);
568
569 part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
570 dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
571 part->remote_act_phys_cpuid);
572
573 part->remote_amos_page_pa = remote_vars->amos_page_pa;
574 dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
575 part->remote_amos_page_pa);
576
577 part->remote_vars_version = remote_vars->version;
578 dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
579 part->remote_vars_version);
580}
581
582/*
583 * Prior code has determined the nasid which generated an IPI. Inspect
584 * that nasid to determine if its partition needs to be activated or
585 * deactivated.
586 *
587 * A partition is consider "awaiting activation" if our partition
588 * flags indicate it is not active and it has a heartbeat. A
589 * partition is considered "awaiting deactivation" if our partition
590 * flags indicate it is active but it has no heartbeat or it is not
591 * sending its heartbeat to us.
592 *
593 * To determine the heartbeat, the remote nasid must have a properly
594 * initialized reserved page.
595 */
596static void
597xpc_identify_act_IRQ_req(int nasid)
598{
599 struct xpc_rsvd_page *remote_rp;
600 struct xpc_vars *remote_vars;
601 u64 remote_rp_pa;
602 u64 remote_vars_pa;
603 int remote_rp_version;
604 int reactivate = 0;
605 int stamp_diff;
606 struct timespec remote_rp_stamp = { 0, 0 };
607 partid_t partid;
608 struct xpc_partition *part;
609 enum xpc_retval ret;
610
611 /* pull over the reserved page structure */
612
613 remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
614
615 ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
616 if (ret != xpcSuccess) {
617 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
618 "which sent interrupt, reason=%d\n", nasid, ret);
619 return;
620 }
621
622 remote_vars_pa = remote_rp->vars_pa;
623 remote_rp_version = remote_rp->version;
624 if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
625 remote_rp_stamp = remote_rp->stamp;
626
627 partid = remote_rp->partid;
628 part = &xpc_partitions[partid];
629
630 /* pull over the cross partition variables */
631
632 remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
633
634 ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
635 if (ret != xpcSuccess) {
636
637 dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
638 "which sent interrupt, reason=%d\n", nasid, ret);
639
640 XPC_DEACTIVATE_PARTITION(part, ret);
641 return;
642 }
643
644 part->act_IRQ_rcvd++;
645
646 dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
647 "%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd,
648 remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
649
650 if (xpc_partition_disengaged(part) &&
651 part->act_state == XPC_P_INACTIVE) {
652
653 xpc_update_partition_info(part, remote_rp_version,
654 &remote_rp_stamp, remote_rp_pa,
655 remote_vars_pa, remote_vars);
656
657 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
658 if (xpc_partition_disengage_requested(1UL << partid)) {
659 /*
660 * Other side is waiting on us to disengage,
661 * even though we already have.
662 */
663 return;
664 }
665 } else {
666 /* other side doesn't support disengage requests */
667 xpc_clear_partition_disengage_request(1UL << partid);
668 }
669
670 xpc_activate_partition(part);
671 return;
672 }
673
674 DBUG_ON(part->remote_rp_version == 0);
675 DBUG_ON(part->remote_vars_version == 0);
676
677 if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
678 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
679 remote_vars_version));
680
681 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
682 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
683 version));
684 /* see if the other side rebooted */
685 if (part->remote_amos_page_pa ==
686 remote_vars->amos_page_pa &&
687 xpc_hb_allowed(sn_partition_id, remote_vars)) {
688 /* doesn't look that way, so ignore the IPI */
689 return;
690 }
691 }
692
693 /*
694 * Other side rebooted and previous XPC didn't support the
695 * disengage request, so we don't need to do anything special.
696 */
697
698 xpc_update_partition_info(part, remote_rp_version,
699 &remote_rp_stamp, remote_rp_pa,
700 remote_vars_pa, remote_vars);
701 part->reactivate_nasid = nasid;
702 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
703 return;
704 }
705
706 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
707
708 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
709 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
710
711 /*
712 * Other side rebooted and previous XPC did support the
713 * disengage request, but the new one doesn't.
714 */
715
716 xpc_clear_partition_engaged(1UL << partid);
717 xpc_clear_partition_disengage_request(1UL << partid);
718
719 xpc_update_partition_info(part, remote_rp_version,
720 &remote_rp_stamp, remote_rp_pa,
721 remote_vars_pa, remote_vars);
722 reactivate = 1;
723
724 } else {
725 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
726
727 stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
728 &remote_rp_stamp);
729 if (stamp_diff != 0) {
730 DBUG_ON(stamp_diff >= 0);
731
732 /*
733 * Other side rebooted and the previous XPC did support
734 * the disengage request, as does the new one.
735 */
736
737 DBUG_ON(xpc_partition_engaged(1UL << partid));
738 DBUG_ON(xpc_partition_disengage_requested(1UL <<
739 partid));
740
741 xpc_update_partition_info(part, remote_rp_version,
742 &remote_rp_stamp,
743 remote_rp_pa, remote_vars_pa,
744 remote_vars);
745 reactivate = 1;
746 }
747 }
748
749 if (part->disengage_request_timeout > 0 &&
750 !xpc_partition_disengaged(part)) {
751 /* still waiting on other side to disengage from us */
752 return;
753 }
754
755 if (reactivate) {
756 part->reactivate_nasid = nasid;
757 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
758
759 } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
760 xpc_partition_disengage_requested(1UL << partid)) {
761 XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
762 }
763}
764
765/*
766 * Loop through the activation AMO variables and process any bits
767 * which are set. Each bit indicates a nasid sending a partition
768 * activation or deactivation request.
769 *
770 * Return #of IRQs detected.
771 */
772int
773xpc_identify_act_IRQ_sender(void)
774{
775 int word, bit;
776 u64 nasid_mask;
777 u64 nasid; /* remote nasid */
778 int n_IRQs_detected = 0;
779 AMO_t *act_amos;
780
781 act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
782
783 /* scan through act AMO variable looking for non-zero entries */
784 for (word = 0; word < xp_nasid_mask_words; word++) {
785
786 if (xpc_exiting)
787 break;
788
789 nasid_mask = xpc_IPI_receive(&act_amos[word]);
790 if (nasid_mask == 0) {
791 /* no IRQs from nasids in this variable */
792 continue;
793 }
794
795 dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
796 nasid_mask);
797
798 /*
799 * If this nasid has been added to the machine since
800 * our partition was reset, this will retain the
801 * remote nasid in our reserved pages machine mask.
802 * This is used in the event of module reload.
803 */
804 xpc_mach_nasids[word] |= nasid_mask;
805
806 /* locate the nasid(s) which sent interrupts */
807
808 for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
809 if (nasid_mask & (1UL << bit)) {
810 n_IRQs_detected++;
811 nasid = XPC_NASID_FROM_W_B(word, bit);
812 dev_dbg(xpc_part, "interrupt from nasid %ld\n",
813 nasid);
814 xpc_identify_act_IRQ_req(nasid);
815 }
816 }
817 }
818 return n_IRQs_detected;
819}
820
821/*
822 * See if the other side has responded to a partition disengage request
823 * from us.
824 */
825int
826xpc_partition_disengaged(struct xpc_partition *part)
827{
828 partid_t partid = XPC_PARTID(part);
829 int disengaged;
830
831 disengaged = (xpc_partition_engaged(1UL << partid) == 0);
832 if (part->disengage_request_timeout) {
833 if (!disengaged) {
834 if (time_before(jiffies,
835 part->disengage_request_timeout)) {
836 /* timelimit hasn't been reached yet */
837 return 0;
838 }
839
840 /*
841 * Other side hasn't responded to our disengage
842 * request in a timely fashion, so assume it's dead.
843 */
844
845 dev_info(xpc_part, "disengage from remote partition %d "
846 "timed out\n", partid);
847 xpc_disengage_request_timedout = 1;
848 xpc_clear_partition_engaged(1UL << partid);
849 disengaged = 1;
850 }
851 part->disengage_request_timeout = 0;
852
853 /* cancel the timer function, provided it's not us */
854 if (!in_interrupt()) {
855 del_singleshot_timer_sync(&part->
856 disengage_request_timer);
857 }
858
859 DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
860 part->act_state != XPC_P_INACTIVE);
861 if (part->act_state != XPC_P_INACTIVE)
862 xpc_wakeup_channel_mgr(part);
863
864 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version))
865 xpc_cancel_partition_disengage_request(part);
866 }
867 return disengaged;
868}
869
870/*
871 * Mark specified partition as active.
872 */
873enum xpc_retval
874xpc_mark_partition_active(struct xpc_partition *part)
875{
876 unsigned long irq_flags;
877 enum xpc_retval ret;
878
879 dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
880
881 spin_lock_irqsave(&part->act_lock, irq_flags);
882 if (part->act_state == XPC_P_ACTIVATING) {
883 part->act_state = XPC_P_ACTIVE;
884 ret = xpcSuccess;
885 } else {
886 DBUG_ON(part->reason == xpcSuccess);
887 ret = part->reason;
888 }
889 spin_unlock_irqrestore(&part->act_lock, irq_flags);
890
891 return ret;
892}
893
894/*
895 * Notify XPC that the partition is down.
896 */
897void
898xpc_deactivate_partition(const int line, struct xpc_partition *part,
899 enum xpc_retval reason)
900{
901 unsigned long irq_flags;
902
903 spin_lock_irqsave(&part->act_lock, irq_flags);
904
905 if (part->act_state == XPC_P_INACTIVE) {
906 XPC_SET_REASON(part, reason, line);
907 spin_unlock_irqrestore(&part->act_lock, irq_flags);
908 if (reason == xpcReactivating) {
909 /* we interrupt ourselves to reactivate partition */
910 xpc_IPI_send_reactivate(part);
911 }
912 return;
913 }
914 if (part->act_state == XPC_P_DEACTIVATING) {
915 if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
916 reason == xpcReactivating) {
917 XPC_SET_REASON(part, reason, line);
918 }
919 spin_unlock_irqrestore(&part->act_lock, irq_flags);
920 return;
921 }
922
923 part->act_state = XPC_P_DEACTIVATING;
924 XPC_SET_REASON(part, reason, line);
925
926 spin_unlock_irqrestore(&part->act_lock, irq_flags);
927
928 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
929 xpc_request_partition_disengage(part);
930 xpc_IPI_send_disengage(part);
931
932 /* set a timelimit on the disengage request */
933 part->disengage_request_timeout = jiffies +
934 (xpc_disengage_request_timelimit * HZ);
935 part->disengage_request_timer.expires =
936 part->disengage_request_timeout;
937 add_timer(&part->disengage_request_timer);
938 }
939
940 dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
941 XPC_PARTID(part), reason);
942
943 xpc_partition_going_down(part, reason);
944}
945
946/*
947 * Mark specified partition as inactive.
948 */
949void
950xpc_mark_partition_inactive(struct xpc_partition *part)
951{
952 unsigned long irq_flags;
953
954 dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
955 XPC_PARTID(part));
956
957 spin_lock_irqsave(&part->act_lock, irq_flags);
958 part->act_state = XPC_P_INACTIVE;
959 spin_unlock_irqrestore(&part->act_lock, irq_flags);
960 part->remote_rp_pa = 0;
961}
962
963/*
964 * SAL has provided a partition and machine mask. The partition mask
965 * contains a bit for each even nasid in our partition. The machine
966 * mask contains a bit for each even nasid in the entire machine.
967 *
968 * Using those two bit arrays, we can determine which nasids are
969 * known in the machine. Each should also have a reserved page
970 * initialized if they are available for partitioning.
971 */
972void
973xpc_discovery(void)
974{
975 void *remote_rp_base;
976 struct xpc_rsvd_page *remote_rp;
977 struct xpc_vars *remote_vars;
978 u64 remote_rp_pa;
979 u64 remote_vars_pa;
980 int region;
981 int region_size;
982 int max_regions;
983 int nasid;
984 struct xpc_rsvd_page *rp;
985 partid_t partid;
986 struct xpc_partition *part;
987 u64 *discovered_nasids;
988 enum xpc_retval ret;
989
990 remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
991 xp_nasid_mask_bytes,
992 GFP_KERNEL, &remote_rp_base);
993 if (remote_rp == NULL)
994 return;
995
996 remote_vars = (struct xpc_vars *)remote_rp;
997
998 discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
999 GFP_KERNEL);
1000 if (discovered_nasids == NULL) {
1001 kfree(remote_rp_base);
1002 return;
1003 }
1004
1005 rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
1006
1007 /*
1008 * The term 'region' in this context refers to the minimum number of
1009 * nodes that can comprise an access protection grouping. The access
1010 * protection is in regards to memory, IOI and IPI.
1011 */
1012 max_regions = 64;
1013 region_size = sn_region_size;
1014
1015 switch (region_size) {
1016 case 128:
1017 max_regions *= 2;
1018 case 64:
1019 max_regions *= 2;
1020 case 32:
1021 max_regions *= 2;
1022 region_size = 16;
1023 DBUG_ON(!is_shub2());
1024 }
1025
1026 for (region = 0; region < max_regions; region++) {
1027
1028 if (xpc_exiting)
1029 break;
1030
1031 dev_dbg(xpc_part, "searching region %d\n", region);
1032
1033 for (nasid = (region * region_size * 2);
1034 nasid < ((region + 1) * region_size * 2); nasid += 2) {
1035
1036 if (xpc_exiting)
1037 break;
1038
1039 dev_dbg(xpc_part, "checking nasid %d\n", nasid);
1040
1041 if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
1042 dev_dbg(xpc_part, "PROM indicates Nasid %d is "
1043 "part of the local partition; skipping "
1044 "region\n", nasid);
1045 break;
1046 }
1047
1048 if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
1049 dev_dbg(xpc_part, "PROM indicates Nasid %d was "
1050 "not on Numa-Link network at reset\n",
1051 nasid);
1052 continue;
1053 }
1054
1055 if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
1056 dev_dbg(xpc_part, "Nasid %d is part of a "
1057 "partition which was previously "
1058 "discovered\n", nasid);
1059 continue;
1060 }
1061
1062 /* pull over the reserved page structure */
1063
1064 ret = xpc_get_remote_rp(nasid, discovered_nasids,
1065 remote_rp, &remote_rp_pa);
1066 if (ret != xpcSuccess) {
1067 dev_dbg(xpc_part, "unable to get reserved page "
1068 "from nasid %d, reason=%d\n", nasid,
1069 ret);
1070
1071 if (ret == xpcLocalPartid)
1072 break;
1073
1074 continue;
1075 }
1076
1077 remote_vars_pa = remote_rp->vars_pa;
1078
1079 partid = remote_rp->partid;
1080 part = &xpc_partitions[partid];
1081
1082 /* pull over the cross partition variables */
1083
1084 ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
1085 if (ret != xpcSuccess) {
1086 dev_dbg(xpc_part, "unable to get XPC variables "
1087 "from nasid %d, reason=%d\n", nasid,
1088 ret);
1089
1090 XPC_DEACTIVATE_PARTITION(part, ret);
1091 continue;
1092 }
1093
1094 if (part->act_state != XPC_P_INACTIVE) {
1095 dev_dbg(xpc_part, "partition %d on nasid %d is "
1096 "already activating\n", partid, nasid);
1097 break;
1098 }
1099
1100 /*
1101 * Register the remote partition's AMOs with SAL so it
1102 * can handle and cleanup errors within that address
1103 * range should the remote partition go down. We don't
1104 * unregister this range because it is difficult to
1105 * tell when outstanding writes to the remote partition
1106 * are finished and thus when it is thus safe to
1107 * unregister. This should not result in wasted space
1108 * in the SAL xp_addr_region table because we should
1109 * get the same page for remote_act_amos_pa after
1110 * module reloads and system reboots.
1111 */
1112 if (sn_register_xp_addr_region
1113 (remote_vars->amos_page_pa, PAGE_SIZE, 1) < 0) {
1114 dev_dbg(xpc_part,
1115 "partition %d failed to "
1116 "register xp_addr region 0x%016lx\n",
1117 partid, remote_vars->amos_page_pa);
1118
1119 XPC_SET_REASON(part, xpcPhysAddrRegFailed,
1120 __LINE__);
1121 break;
1122 }
1123
1124 /*
1125 * The remote nasid is valid and available.
1126 * Send an interrupt to that nasid to notify
1127 * it that we are ready to begin activation.
1128 */
1129 dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
1130 "nasid %d, phys_cpuid 0x%x\n",
1131 remote_vars->amos_page_pa,
1132 remote_vars->act_nasid,
1133 remote_vars->act_phys_cpuid);
1134
1135 if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
1136 version)) {
1137 part->remote_amos_page_pa =
1138 remote_vars->amos_page_pa;
1139 xpc_mark_partition_disengaged(part);
1140 xpc_cancel_partition_disengage_request(part);
1141 }
1142 xpc_IPI_send_activate(remote_vars);
1143 }
1144 }
1145
1146 kfree(discovered_nasids);
1147 kfree(remote_rp_base);
1148}
1149
1150/*
1151 * Given a partid, get the nasids owned by that partition from the
1152 * remote partition's reserved page.
1153 */
1154enum xpc_retval
1155xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
1156{
1157 struct xpc_partition *part;
1158 u64 part_nasid_pa;
1159 int bte_res;
1160
1161 part = &xpc_partitions[partid];
1162 if (part->remote_rp_pa == 0)
1163 return xpcPartitionDown;
1164
1165 memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
1166
1167 part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
1168
1169 bte_res = xp_bte_copy(part_nasid_pa, (u64)nasid_mask,
1170 xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE),
1171 NULL);
1172
1173 return xpc_map_bte_errors(bte_res);
1174}
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
new file mode 100644
index 000000000000..a9543c65814d
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -0,0 +1,677 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1999-2008 Silicon Graphics, Inc. All rights reserved.
7 */
8
9/*
10 * Cross Partition Network Interface (XPNET) support
11 *
12 * XPNET provides a virtual network layered on top of the Cross
13 * Partition communication layer.
14 *
15 * XPNET provides direct point-to-point and broadcast-like support
16 * for an ethernet-like device. The ethernet broadcast medium is
17 * replaced with a point-to-point message structure which passes
18 * pointers to a DMA-capable block that a remote partition should
19 * retrieve and pass to the upper level networking layer.
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/types.h>
25#include <linux/kernel.h>
26#include <linux/init.h>
27#include <linux/ioport.h>
28#include <linux/netdevice.h>
29#include <linux/etherdevice.h>
30#include <linux/delay.h>
31#include <linux/ethtool.h>
32#include <linux/mii.h>
33#include <linux/smp.h>
34#include <linux/string.h>
35#include <asm/sn/bte.h>
36#include <asm/sn/io.h>
37#include <asm/sn/sn_sal.h>
38#include <asm/atomic.h>
39#include "xp.h"
40
41/*
42 * The message payload transferred by XPC.
43 *
44 * buf_pa is the physical address where the DMA should pull from.
45 *
46 * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a
47 * cacheline boundary. To accomplish this, we record the number of
48 * bytes from the beginning of the first cacheline to the first useful
49 * byte of the skb (leadin_ignore) and the number of bytes from the
50 * last useful byte of the skb to the end of the last cacheline
51 * (tailout_ignore).
52 *
53 * size is the number of bytes to transfer which includes the skb->len
54 * (useful bytes of the senders skb) plus the leadin and tailout
55 */
56struct xpnet_message {
57 u16 version; /* Version for this message */
58 u16 embedded_bytes; /* #of bytes embedded in XPC message */
59 u32 magic; /* Special number indicating this is xpnet */
60 u64 buf_pa; /* phys address of buffer to retrieve */
61 u32 size; /* #of bytes in buffer */
62 u8 leadin_ignore; /* #of bytes to ignore at the beginning */
63 u8 tailout_ignore; /* #of bytes to ignore at the end */
64 unsigned char data; /* body of small packets */
65};
66
67/*
68 * Determine the size of our message, the cacheline aligned size,
69 * and then the number of message will request from XPC.
70 *
71 * XPC expects each message to exist in an individual cacheline.
72 */
73#define XPNET_MSG_SIZE (L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET)
74#define XPNET_MSG_DATA_MAX \
75 (XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data))
76#define XPNET_MSG_ALIGNED_SIZE (L1_CACHE_ALIGN(XPNET_MSG_SIZE))
77#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE)
78
79#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1)
80#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1)
81
82/*
83 * Version number of XPNET implementation. XPNET can always talk to versions
84 * with same major #, and never talk to versions with a different version.
85 */
86#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor))
87#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4)
88#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf)
89
90#define XPNET_VERSION _XPNET_VERSION(1, 0) /* version 1.0 */
91#define XPNET_VERSION_EMBED _XPNET_VERSION(1, 1) /* version 1.1 */
92#define XPNET_MAGIC 0x88786984 /* "XNET" */
93
94#define XPNET_VALID_MSG(_m) \
95 ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \
96 && (msg->magic == XPNET_MAGIC))
97
98#define XPNET_DEVICE_NAME "xp0"
99
100/*
101 * When messages are queued with xpc_send_notify, a kmalloc'd buffer
102 * of the following type is passed as a notification cookie. When the
103 * notification function is called, we use the cookie to decide
104 * whether all outstanding message sends have completed. The skb can
105 * then be released.
106 */
107struct xpnet_pending_msg {
108 struct list_head free_list;
109 struct sk_buff *skb;
110 atomic_t use_count;
111};
112
113/* driver specific structure pointed to by the device structure */
114struct xpnet_dev_private {
115 struct net_device_stats stats;
116};
117
118struct net_device *xpnet_device;
119
120/*
121 * When we are notified of other partitions activating, we add them to
122 * our bitmask of partitions to which we broadcast.
123 */
124static u64 xpnet_broadcast_partitions;
125/* protect above */
126static DEFINE_SPINLOCK(xpnet_broadcast_lock);
127
128/*
129 * Since the Block Transfer Engine (BTE) is being used for the transfer
130 * and it relies upon cache-line size transfers, we need to reserve at
131 * least one cache-line for head and tail alignment. The BTE is
132 * limited to 8MB transfers.
133 *
134 * Testing has shown that changing MTU to greater than 64KB has no effect
135 * on TCP as the two sides negotiate a Max Segment Size that is limited
136 * to 64K. Other protocols May use packets greater than this, but for
137 * now, the default is 64KB.
138 */
139#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
140/* 32KB has been determined to be the ideal */
141#define XPNET_DEF_MTU (0x8000UL)
142
143/*
144 * The partition id is encapsulated in the MAC address. The following
145 * define locates the octet the partid is in.
146 */
147#define XPNET_PARTID_OCTET 1
148#define XPNET_LICENSE_OCTET 2
149
150/*
151 * Define the XPNET debug device structure that is to be used with dev_dbg(),
152 * dev_err(), dev_warn(), and dev_info().
153 */
154struct device_driver xpnet_dbg_name = {
155 .name = "xpnet"
156};
157
158struct device xpnet_dbg_subname = {
159 .bus_id = {0}, /* set to "" */
160 .driver = &xpnet_dbg_name
161};
162
163struct device *xpnet = &xpnet_dbg_subname;
164
165/*
166 * Packet was recevied by XPC and forwarded to us.
167 */
168static void
169xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
170{
171 struct sk_buff *skb;
172 bte_result_t bret;
173 struct xpnet_dev_private *priv =
174 (struct xpnet_dev_private *)xpnet_device->priv;
175
176 if (!XPNET_VALID_MSG(msg)) {
177 /*
178 * Packet with a different XPC version. Ignore.
179 */
180 xpc_received(partid, channel, (void *)msg);
181
182 priv->stats.rx_errors++;
183
184 return;
185 }
186 dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
187 msg->leadin_ignore, msg->tailout_ignore);
188
189 /* reserve an extra cache line */
190 skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
191 if (!skb) {
192 dev_err(xpnet, "failed on dev_alloc_skb(%d)\n",
193 msg->size + L1_CACHE_BYTES);
194
195 xpc_received(partid, channel, (void *)msg);
196
197 priv->stats.rx_errors++;
198
199 return;
200 }
201
202 /*
203 * The allocated skb has some reserved space.
204 * In order to use bte_copy, we need to get the
205 * skb->data pointer moved forward.
206 */
207 skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
208 (L1_CACHE_BYTES - 1)) +
209 msg->leadin_ignore));
210
211 /*
212 * Update the tail pointer to indicate data actually
213 * transferred.
214 */
215 skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore));
216
217 /*
218 * Move the data over from the other side.
219 */
220 if ((XPNET_VERSION_MINOR(msg->version) == 1) &&
221 (msg->embedded_bytes != 0)) {
222 dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, "
223 "%lu)\n", skb->data, &msg->data,
224 (size_t)msg->embedded_bytes);
225
226 skb_copy_to_linear_data(skb, &msg->data,
227 (size_t)msg->embedded_bytes);
228 } else {
229 dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
230 "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa,
231 (void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
232 msg->size);
233
234 bret = bte_copy(msg->buf_pa,
235 __pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
236 msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
237
238 if (bret != BTE_SUCCESS) {
239 /*
240 * >>> Need better way of cleaning skb. Currently skb
241 * >>> appears in_use and we can't just call
242 * >>> dev_kfree_skb.
243 */
244 dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned "
245 "error=0x%x\n", (void *)msg->buf_pa,
246 (void *)__pa((u64)skb->data &
247 ~(L1_CACHE_BYTES - 1)),
248 msg->size, bret);
249
250 xpc_received(partid, channel, (void *)msg);
251
252 priv->stats.rx_errors++;
253
254 return;
255 }
256 }
257
258 dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
259 "skb->end=0x%p skb->len=%d\n", (void *)skb->head,
260 (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
261 skb->len);
262
263 skb->protocol = eth_type_trans(skb, xpnet_device);
264 skb->ip_summed = CHECKSUM_UNNECESSARY;
265
266 dev_dbg(xpnet, "passing skb to network layer\n"
267 KERN_DEBUG "\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p "
268 "skb->end=0x%p skb->len=%d\n",
269 (void *)skb->head, (void *)skb->data, skb_tail_pointer(skb),
270 skb_end_pointer(skb), skb->len);
271
272 xpnet_device->last_rx = jiffies;
273 priv->stats.rx_packets++;
274 priv->stats.rx_bytes += skb->len + ETH_HLEN;
275
276 netif_rx_ni(skb);
277 xpc_received(partid, channel, (void *)msg);
278}
279
280/*
281 * This is the handler which XPC calls during any sort of change in
282 * state or message reception on a connection.
283 */
284static void
285xpnet_connection_activity(enum xpc_retval reason, partid_t partid, int channel,
286 void *data, void *key)
287{
288 long bp;
289
290 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
291 DBUG_ON(channel != XPC_NET_CHANNEL);
292
293 switch (reason) {
294 case xpcMsgReceived: /* message received */
295 DBUG_ON(data == NULL);
296
297 xpnet_receive(partid, channel, (struct xpnet_message *)data);
298 break;
299
300 case xpcConnected: /* connection completed to a partition */
301 spin_lock_bh(&xpnet_broadcast_lock);
302 xpnet_broadcast_partitions |= 1UL << (partid - 1);
303 bp = xpnet_broadcast_partitions;
304 spin_unlock_bh(&xpnet_broadcast_lock);
305
306 netif_carrier_on(xpnet_device);
307
308 dev_dbg(xpnet, "%s connection created to partition %d; "
309 "xpnet_broadcast_partitions=0x%lx\n",
310 xpnet_device->name, partid, bp);
311 break;
312
313 default:
314 spin_lock_bh(&xpnet_broadcast_lock);
315 xpnet_broadcast_partitions &= ~(1UL << (partid - 1));
316 bp = xpnet_broadcast_partitions;
317 spin_unlock_bh(&xpnet_broadcast_lock);
318
319 if (bp == 0)
320 netif_carrier_off(xpnet_device);
321
322 dev_dbg(xpnet, "%s disconnected from partition %d; "
323 "xpnet_broadcast_partitions=0x%lx\n",
324 xpnet_device->name, partid, bp);
325 break;
326
327 }
328}
329
330static int
331xpnet_dev_open(struct net_device *dev)
332{
333 enum xpc_retval ret;
334
335 dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, "
336 "%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
337 XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS,
338 XPNET_MAX_IDLE_KTHREADS);
339
340 ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
341 XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
342 XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS);
343 if (ret != xpcSuccess) {
344 dev_err(xpnet, "ifconfig up of %s failed on XPC connect, "
345 "ret=%d\n", dev->name, ret);
346
347 return -ENOMEM;
348 }
349
350 dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name);
351
352 return 0;
353}
354
355static int
356xpnet_dev_stop(struct net_device *dev)
357{
358 xpc_disconnect(XPC_NET_CHANNEL);
359
360 dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name);
361
362 return 0;
363}
364
365static int
366xpnet_dev_change_mtu(struct net_device *dev, int new_mtu)
367{
368 /* 68 comes from min TCP+IP+MAC header */
369 if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) {
370 dev_err(xpnet, "ifconfig %s mtu %d failed; value must be "
371 "between 68 and %ld\n", dev->name, new_mtu,
372 XPNET_MAX_MTU);
373 return -EINVAL;
374 }
375
376 dev->mtu = new_mtu;
377 dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu);
378 return 0;
379}
380
381/*
382 * Required for the net_device structure.
383 */
384static int
385xpnet_dev_set_config(struct net_device *dev, struct ifmap *new_map)
386{
387 return 0;
388}
389
390/*
391 * Return statistics to the caller.
392 */
393static struct net_device_stats *
394xpnet_dev_get_stats(struct net_device *dev)
395{
396 struct xpnet_dev_private *priv;
397
398 priv = (struct xpnet_dev_private *)dev->priv;
399
400 return &priv->stats;
401}
402
403/*
404 * Notification that the other end has received the message and
405 * DMA'd the skb information. At this point, they are done with
406 * our side. When all recipients are done processing, we
407 * release the skb and then release our pending message structure.
408 */
409static void
410xpnet_send_completed(enum xpc_retval reason, partid_t partid, int channel,
411 void *__qm)
412{
413 struct xpnet_pending_msg *queued_msg = (struct xpnet_pending_msg *)__qm;
414
415 DBUG_ON(queued_msg == NULL);
416
417 dev_dbg(xpnet, "message to %d notified with reason %d\n",
418 partid, reason);
419
420 if (atomic_dec_return(&queued_msg->use_count) == 0) {
421 dev_dbg(xpnet, "all acks for skb->head=-x%p\n",
422 (void *)queued_msg->skb->head);
423
424 dev_kfree_skb_any(queued_msg->skb);
425 kfree(queued_msg);
426 }
427}
428
429/*
430 * Network layer has formatted a packet (skb) and is ready to place it
431 * "on the wire". Prepare and send an xpnet_message to all partitions
432 * which have connected with us and are targets of this packet.
433 *
434 * MAC-NOTE: For the XPNET driver, the MAC address contains the
435 * destination partition_id. If the destination partition id word
436 * is 0xff, this packet is to broadcast to all partitions.
437 */
438static int
439xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
440{
441 struct xpnet_pending_msg *queued_msg;
442 enum xpc_retval ret;
443 struct xpnet_message *msg;
444 u64 start_addr, end_addr;
445 long dp;
446 u8 second_mac_octet;
447 partid_t dest_partid;
448 struct xpnet_dev_private *priv;
449 u16 embedded_bytes;
450
451 priv = (struct xpnet_dev_private *)dev->priv;
452
453 dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
454 "skb->end=0x%p skb->len=%d\n", (void *)skb->head,
455 (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
456 skb->len);
457
458 /*
459 * The xpnet_pending_msg tracks how many outstanding
460 * xpc_send_notifies are relying on this skb. When none
461 * remain, release the skb.
462 */
463 queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC);
464 if (queued_msg == NULL) {
465 dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping "
466 "packet\n", sizeof(struct xpnet_pending_msg));
467
468 priv->stats.tx_errors++;
469
470 return -ENOMEM;
471 }
472
473 /* get the beginning of the first cacheline and end of last */
474 start_addr = ((u64)skb->data & ~(L1_CACHE_BYTES - 1));
475 end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
476
477 /* calculate how many bytes to embed in the XPC message */
478 embedded_bytes = 0;
479 if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
480 /* skb->data does fit so embed */
481 embedded_bytes = skb->len;
482 }
483
484 /*
485 * Since the send occurs asynchronously, we set the count to one
486 * and begin sending. Any sends that happen to complete before
487 * we are done sending will not free the skb. We will be left
488 * with that task during exit. This also handles the case of
489 * a packet destined for a partition which is no longer up.
490 */
491 atomic_set(&queued_msg->use_count, 1);
492 queued_msg->skb = skb;
493
494 second_mac_octet = skb->data[XPNET_PARTID_OCTET];
495 if (second_mac_octet == 0xff) {
496 /* we are being asked to broadcast to all partitions */
497 dp = xpnet_broadcast_partitions;
498 } else if (second_mac_octet != 0) {
499 dp = xpnet_broadcast_partitions &
500 (1UL << (second_mac_octet - 1));
501 } else {
502 /* 0 is an invalid partid. Ignore */
503 dp = 0;
504 }
505 dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp);
506
507 /*
508 * If we wanted to allow promiscuous mode to work like an
509 * unswitched network, this would be a good point to OR in a
510 * mask of partitions which should be receiving all packets.
511 */
512
513 /*
514 * Main send loop.
515 */
516 for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS;
517 dest_partid++) {
518
519 if (!(dp & (1UL << (dest_partid - 1)))) {
520 /* not destined for this partition */
521 continue;
522 }
523
524 /* remove this partition from the destinations mask */
525 dp &= ~(1UL << (dest_partid - 1));
526
527 /* found a partition to send to */
528
529 ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL,
530 XPC_NOWAIT, (void **)&msg);
531 if (unlikely(ret != xpcSuccess))
532 continue;
533
534 msg->embedded_bytes = embedded_bytes;
535 if (unlikely(embedded_bytes != 0)) {
536 msg->version = XPNET_VERSION_EMBED;
537 dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
538 &msg->data, skb->data, (size_t)embedded_bytes);
539 skb_copy_from_linear_data(skb, &msg->data,
540 (size_t)embedded_bytes);
541 } else {
542 msg->version = XPNET_VERSION;
543 }
544 msg->magic = XPNET_MAGIC;
545 msg->size = end_addr - start_addr;
546 msg->leadin_ignore = (u64)skb->data - start_addr;
547 msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
548 msg->buf_pa = __pa(start_addr);
549
550 dev_dbg(xpnet, "sending XPC message to %d:%d\n"
551 KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, "
552 "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
553 dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
554 msg->leadin_ignore, msg->tailout_ignore);
555
556 atomic_inc(&queued_msg->use_count);
557
558 ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg,
559 xpnet_send_completed, queued_msg);
560 if (unlikely(ret != xpcSuccess)) {
561 atomic_dec(&queued_msg->use_count);
562 continue;
563 }
564 }
565
566 if (atomic_dec_return(&queued_msg->use_count) == 0) {
567 dev_dbg(xpnet, "no partitions to receive packet destined for "
568 "%d\n", dest_partid);
569
570 dev_kfree_skb(skb);
571 kfree(queued_msg);
572 }
573
574 priv->stats.tx_packets++;
575 priv->stats.tx_bytes += skb->len;
576
577 return 0;
578}
579
580/*
581 * Deal with transmit timeouts coming from the network layer.
582 */
583static void
584xpnet_dev_tx_timeout(struct net_device *dev)
585{
586 struct xpnet_dev_private *priv;
587
588 priv = (struct xpnet_dev_private *)dev->priv;
589
590 priv->stats.tx_errors++;
591 return;
592}
593
594static int __init
595xpnet_init(void)
596{
597 int i;
598 u32 license_num;
599 int result = -ENOMEM;
600
601 if (!ia64_platform_is("sn2"))
602 return -ENODEV;
603
604 dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
605
606 /*
607 * use ether_setup() to init the majority of our device
608 * structure and then override the necessary pieces.
609 */
610 xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private),
611 XPNET_DEVICE_NAME, ether_setup);
612 if (xpnet_device == NULL)
613 return -ENOMEM;
614
615 netif_carrier_off(xpnet_device);
616
617 xpnet_device->mtu = XPNET_DEF_MTU;
618 xpnet_device->change_mtu = xpnet_dev_change_mtu;
619 xpnet_device->open = xpnet_dev_open;
620 xpnet_device->get_stats = xpnet_dev_get_stats;
621 xpnet_device->stop = xpnet_dev_stop;
622 xpnet_device->hard_start_xmit = xpnet_dev_hard_start_xmit;
623 xpnet_device->tx_timeout = xpnet_dev_tx_timeout;
624 xpnet_device->set_config = xpnet_dev_set_config;
625
626 /*
627 * Multicast assumes the LSB of the first octet is set for multicast
628 * MAC addresses. We chose the first octet of the MAC to be unlikely
629 * to collide with any vendor's officially issued MAC.
630 */
631 xpnet_device->dev_addr[0] = 0xfe;
632 xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id;
633 license_num = sn_partition_serial_number_val();
634 for (i = 3; i >= 0; i--) {
635 xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] =
636 license_num & 0xff;
637 license_num = license_num >> 8;
638 }
639
640 /*
641 * ether_setup() sets this to a multicast device. We are
642 * really not supporting multicast at this time.
643 */
644 xpnet_device->flags &= ~IFF_MULTICAST;
645
646 /*
647 * No need to checksum as it is a DMA transfer. The BTE will
648 * report an error if the data is not retrievable and the
649 * packet will be dropped.
650 */
651 xpnet_device->features = NETIF_F_NO_CSUM;
652
653 result = register_netdev(xpnet_device);
654 if (result != 0)
655 free_netdev(xpnet_device);
656
657 return result;
658}
659
660module_init(xpnet_init);
661
662static void __exit
663xpnet_exit(void)
664{
665 dev_info(xpnet, "unregistering network device %s\n",
666 xpnet_device[0].name);
667
668 unregister_netdev(xpnet_device);
669
670 free_netdev(xpnet_device);
671}
672
673module_exit(xpnet_exit);
674
675MODULE_AUTHOR("Silicon Graphics, Inc.");
676MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)");
677MODULE_LICENSE("GPL");
diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index 6cb781262f94..3f28f6eabdbf 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -21,7 +21,7 @@
21 * 02110-1301, USA. 21 * 02110-1301, USA.
22 */ 22 */
23 23
24#define TPACPI_VERSION "0.19" 24#define TPACPI_VERSION "0.20"
25#define TPACPI_SYSFS_VERSION 0x020200 25#define TPACPI_SYSFS_VERSION 0x020200
26 26
27/* 27/*
@@ -67,6 +67,7 @@
67#include <linux/hwmon.h> 67#include <linux/hwmon.h>
68#include <linux/hwmon-sysfs.h> 68#include <linux/hwmon-sysfs.h>
69#include <linux/input.h> 69#include <linux/input.h>
70#include <linux/leds.h>
70#include <asm/uaccess.h> 71#include <asm/uaccess.h>
71 72
72#include <linux/dmi.h> 73#include <linux/dmi.h>
@@ -85,6 +86,8 @@
85#define TP_CMOS_VOLUME_MUTE 2 86#define TP_CMOS_VOLUME_MUTE 2
86#define TP_CMOS_BRIGHTNESS_UP 4 87#define TP_CMOS_BRIGHTNESS_UP 4
87#define TP_CMOS_BRIGHTNESS_DOWN 5 88#define TP_CMOS_BRIGHTNESS_DOWN 5
89#define TP_CMOS_THINKLIGHT_ON 12
90#define TP_CMOS_THINKLIGHT_OFF 13
88 91
89/* NVRAM Addresses */ 92/* NVRAM Addresses */
90enum tp_nvram_addr { 93enum tp_nvram_addr {
@@ -133,8 +136,12 @@ enum {
133#define TPACPI_PROC_DIR "ibm" 136#define TPACPI_PROC_DIR "ibm"
134#define TPACPI_ACPI_EVENT_PREFIX "ibm" 137#define TPACPI_ACPI_EVENT_PREFIX "ibm"
135#define TPACPI_DRVR_NAME TPACPI_FILE 138#define TPACPI_DRVR_NAME TPACPI_FILE
139#define TPACPI_DRVR_SHORTNAME "tpacpi"
136#define TPACPI_HWMON_DRVR_NAME TPACPI_NAME "_hwmon" 140#define TPACPI_HWMON_DRVR_NAME TPACPI_NAME "_hwmon"
137 141
142#define TPACPI_NVRAM_KTHREAD_NAME "ktpacpi_nvramd"
143#define TPACPI_WORKQUEUE_NAME "ktpacpid"
144
138#define TPACPI_MAX_ACPI_ARGS 3 145#define TPACPI_MAX_ACPI_ARGS 3
139 146
140/* Debugging */ 147/* Debugging */
@@ -225,6 +232,7 @@ static struct {
225 u32 light:1; 232 u32 light:1;
226 u32 light_status:1; 233 u32 light_status:1;
227 u32 bright_16levels:1; 234 u32 bright_16levels:1;
235 u32 bright_acpimode:1;
228 u32 wan:1; 236 u32 wan:1;
229 u32 fan_ctrl_status_undef:1; 237 u32 fan_ctrl_status_undef:1;
230 u32 input_device_registered:1; 238 u32 input_device_registered:1;
@@ -236,6 +244,11 @@ static struct {
236 u32 hotkey_poll_active:1; 244 u32 hotkey_poll_active:1;
237} tp_features; 245} tp_features;
238 246
247static struct {
248 u16 hotkey_mask_ff:1;
249 u16 bright_cmos_ec_unsync:1;
250} tp_warned;
251
239struct thinkpad_id_data { 252struct thinkpad_id_data {
240 unsigned int vendor; /* ThinkPad vendor: 253 unsigned int vendor; /* ThinkPad vendor:
241 * PCI_VENDOR_ID_IBM/PCI_VENDOR_ID_LENOVO */ 254 * PCI_VENDOR_ID_IBM/PCI_VENDOR_ID_LENOVO */
@@ -246,7 +259,8 @@ struct thinkpad_id_data {
246 u16 bios_model; /* Big Endian, TP-1Y = 0x5931, 0 = unknown */ 259 u16 bios_model; /* Big Endian, TP-1Y = 0x5931, 0 = unknown */
247 u16 ec_model; 260 u16 ec_model;
248 261
249 char *model_str; 262 char *model_str; /* ThinkPad T43 */
263 char *nummodel_str; /* 9384A9C for a 9384-A9C model */
250}; 264};
251static struct thinkpad_id_data thinkpad_id; 265static struct thinkpad_id_data thinkpad_id;
252 266
@@ -259,6 +273,16 @@ static enum {
259static int experimental; 273static int experimental;
260static u32 dbg_level; 274static u32 dbg_level;
261 275
276static struct workqueue_struct *tpacpi_wq;
277
278/* Special LED class that can defer work */
279struct tpacpi_led_classdev {
280 struct led_classdev led_classdev;
281 struct work_struct work;
282 enum led_brightness new_brightness;
283 unsigned int led;
284};
285
262/**************************************************************************** 286/****************************************************************************
263 **************************************************************************** 287 ****************************************************************************
264 * 288 *
@@ -807,6 +831,80 @@ static int parse_strtoul(const char *buf,
807 return 0; 831 return 0;
808} 832}
809 833
834static int __init tpacpi_query_bcl_levels(acpi_handle handle)
835{
836 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
837 union acpi_object *obj;
838 int rc;
839
840 if (ACPI_SUCCESS(acpi_evaluate_object(handle, NULL, NULL, &buffer))) {
841 obj = (union acpi_object *)buffer.pointer;
842 if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) {
843 printk(TPACPI_ERR "Unknown _BCL data, "
844 "please report this to %s\n", TPACPI_MAIL);
845 rc = 0;
846 } else {
847 rc = obj->package.count;
848 }
849 } else {
850 return 0;
851 }
852
853 kfree(buffer.pointer);
854 return rc;
855}
856
857static acpi_status __init tpacpi_acpi_walk_find_bcl(acpi_handle handle,
858 u32 lvl, void *context, void **rv)
859{
860 char name[ACPI_PATH_SEGMENT_LENGTH];
861 struct acpi_buffer buffer = { sizeof(name), &name };
862
863 if (ACPI_SUCCESS(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)) &&
864 !strncmp("_BCL", name, sizeof(name) - 1)) {
865 BUG_ON(!rv || !*rv);
866 **(int **)rv = tpacpi_query_bcl_levels(handle);
867 return AE_CTRL_TERMINATE;
868 } else {
869 return AE_OK;
870 }
871}
872
873/*
874 * Returns 0 (no ACPI _BCL or _BCL invalid), or size of brightness map
875 */
876static int __init tpacpi_check_std_acpi_brightness_support(void)
877{
878 int status;
879 int bcl_levels = 0;
880 void *bcl_ptr = &bcl_levels;
881
882 if (!vid_handle) {
883 TPACPI_ACPIHANDLE_INIT(vid);
884 }
885 if (!vid_handle)
886 return 0;
887
888 /*
889 * Search for a _BCL method, and execute it. This is safe on all
890 * ThinkPads, and as a side-effect, _BCL will place a Lenovo Vista
891 * BIOS in ACPI backlight control mode. We do NOT have to care
892 * about calling the _BCL method in an enabled video device, any
893 * will do for our purposes.
894 */
895
896 status = acpi_walk_namespace(ACPI_TYPE_METHOD, vid_handle, 3,
897 tpacpi_acpi_walk_find_bcl, NULL,
898 &bcl_ptr);
899
900 if (ACPI_SUCCESS(status) && bcl_levels > 2) {
901 tp_features.bright_acpimode = 1;
902 return (bcl_levels - 2);
903 }
904
905 return 0;
906}
907
810/************************************************************************* 908/*************************************************************************
811 * thinkpad-acpi driver attributes 909 * thinkpad-acpi driver attributes
812 */ 910 */
@@ -909,12 +1007,14 @@ static int __init thinkpad_acpi_driver_init(struct ibm_init_struct *iibm)
909 thinkpad_id.ec_version_str : "unknown"); 1007 thinkpad_id.ec_version_str : "unknown");
910 1008
911 if (thinkpad_id.vendor && thinkpad_id.model_str) 1009 if (thinkpad_id.vendor && thinkpad_id.model_str)
912 printk(TPACPI_INFO "%s %s\n", 1010 printk(TPACPI_INFO "%s %s, model %s\n",
913 (thinkpad_id.vendor == PCI_VENDOR_ID_IBM) ? 1011 (thinkpad_id.vendor == PCI_VENDOR_ID_IBM) ?
914 "IBM" : ((thinkpad_id.vendor == 1012 "IBM" : ((thinkpad_id.vendor ==
915 PCI_VENDOR_ID_LENOVO) ? 1013 PCI_VENDOR_ID_LENOVO) ?
916 "Lenovo" : "Unknown vendor"), 1014 "Lenovo" : "Unknown vendor"),
917 thinkpad_id.model_str); 1015 thinkpad_id.model_str,
1016 (thinkpad_id.nummodel_str) ?
1017 thinkpad_id.nummodel_str : "unknown");
918 1018
919 return 0; 1019 return 0;
920} 1020}
@@ -1107,6 +1207,19 @@ static int hotkey_mask_set(u32 mask)
1107 int rc = 0; 1207 int rc = 0;
1108 1208
1109 if (tp_features.hotkey_mask) { 1209 if (tp_features.hotkey_mask) {
1210 if (!tp_warned.hotkey_mask_ff &&
1211 (mask == 0xffff || mask == 0xffffff ||
1212 mask == 0xffffffff)) {
1213 tp_warned.hotkey_mask_ff = 1;
1214 printk(TPACPI_NOTICE
1215 "setting the hotkey mask to 0x%08x is likely "
1216 "not the best way to go about it\n", mask);
1217 printk(TPACPI_NOTICE
1218 "please consider using the driver defaults, "
1219 "and refer to up-to-date thinkpad-acpi "
1220 "documentation\n");
1221 }
1222
1110 HOTKEY_CONFIG_CRITICAL_START 1223 HOTKEY_CONFIG_CRITICAL_START
1111 for (i = 0; i < 32; i++) { 1224 for (i = 0; i < 32; i++) {
1112 u32 m = 1 << i; 1225 u32 m = 1 << i;
@@ -1427,8 +1540,7 @@ static void hotkey_poll_setup(int may_warn)
1427 (tpacpi_inputdev->users > 0 || hotkey_report_mode < 2)) { 1540 (tpacpi_inputdev->users > 0 || hotkey_report_mode < 2)) {
1428 if (!tpacpi_hotkey_task) { 1541 if (!tpacpi_hotkey_task) {
1429 tpacpi_hotkey_task = kthread_run(hotkey_kthread, 1542 tpacpi_hotkey_task = kthread_run(hotkey_kthread,
1430 NULL, 1543 NULL, TPACPI_NVRAM_KTHREAD_NAME);
1431 TPACPI_FILE "d");
1432 if (IS_ERR(tpacpi_hotkey_task)) { 1544 if (IS_ERR(tpacpi_hotkey_task)) {
1433 tpacpi_hotkey_task = NULL; 1545 tpacpi_hotkey_task = NULL;
1434 printk(TPACPI_ERR 1546 printk(TPACPI_ERR
@@ -1887,6 +1999,9 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
1887 KEY_UNKNOWN, /* 0x0D: FN+INSERT */ 1999 KEY_UNKNOWN, /* 0x0D: FN+INSERT */
1888 KEY_UNKNOWN, /* 0x0E: FN+DELETE */ 2000 KEY_UNKNOWN, /* 0x0E: FN+DELETE */
1889 2001
2002 /* These either have to go through ACPI video, or
2003 * act like in the IBM ThinkPads, so don't ever
2004 * enable them by default */
1890 KEY_RESERVED, /* 0x0F: FN+HOME (brightness up) */ 2005 KEY_RESERVED, /* 0x0F: FN+HOME (brightness up) */
1891 KEY_RESERVED, /* 0x10: FN+END (brightness down) */ 2006 KEY_RESERVED, /* 0x10: FN+END (brightness down) */
1892 2007
@@ -2091,6 +2206,32 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
2091 set_bit(SW_TABLET_MODE, tpacpi_inputdev->swbit); 2206 set_bit(SW_TABLET_MODE, tpacpi_inputdev->swbit);
2092 } 2207 }
2093 2208
2209 /* Do not issue duplicate brightness change events to
2210 * userspace */
2211 if (!tp_features.bright_acpimode)
2212 /* update bright_acpimode... */
2213 tpacpi_check_std_acpi_brightness_support();
2214
2215 if (tp_features.bright_acpimode) {
2216 printk(TPACPI_INFO
2217 "This ThinkPad has standard ACPI backlight "
2218 "brightness control, supported by the ACPI "
2219 "video driver\n");
2220 printk(TPACPI_NOTICE
2221 "Disabling thinkpad-acpi brightness events "
2222 "by default...\n");
2223
2224 /* The hotkey_reserved_mask change below is not
2225 * necessary while the keys are at KEY_RESERVED in the
2226 * default map, but better safe than sorry, leave it
2227 * here as a marker of what we have to do, especially
2228 * when we finally become able to set this at runtime
2229 * on response to X.org requests */
2230 hotkey_reserved_mask |=
2231 (1 << TP_ACPI_HOTKEYSCAN_FNHOME)
2232 | (1 << TP_ACPI_HOTKEYSCAN_FNEND);
2233 }
2234
2094 dbg_printk(TPACPI_DBG_INIT, 2235 dbg_printk(TPACPI_DBG_INIT,
2095 "enabling hot key handling\n"); 2236 "enabling hot key handling\n");
2096 res = hotkey_status_set(1); 2237 res = hotkey_status_set(1);
@@ -3110,13 +3251,82 @@ static struct ibm_struct video_driver_data = {
3110TPACPI_HANDLE(lght, root, "\\LGHT"); /* A21e, A2xm/p, T20-22, X20-21 */ 3251TPACPI_HANDLE(lght, root, "\\LGHT"); /* A21e, A2xm/p, T20-22, X20-21 */
3111TPACPI_HANDLE(ledb, ec, "LEDB"); /* G4x */ 3252TPACPI_HANDLE(ledb, ec, "LEDB"); /* G4x */
3112 3253
3254static int light_get_status(void)
3255{
3256 int status = 0;
3257
3258 if (tp_features.light_status) {
3259 if (!acpi_evalf(ec_handle, &status, "KBLT", "d"))
3260 return -EIO;
3261 return (!!status);
3262 }
3263
3264 return -ENXIO;
3265}
3266
3267static int light_set_status(int status)
3268{
3269 int rc;
3270
3271 if (tp_features.light) {
3272 if (cmos_handle) {
3273 rc = acpi_evalf(cmos_handle, NULL, NULL, "vd",
3274 (status)?
3275 TP_CMOS_THINKLIGHT_ON :
3276 TP_CMOS_THINKLIGHT_OFF);
3277 } else {
3278 rc = acpi_evalf(lght_handle, NULL, NULL, "vd",
3279 (status)? 1 : 0);
3280 }
3281 return (rc)? 0 : -EIO;
3282 }
3283
3284 return -ENXIO;
3285}
3286
3287static void light_set_status_worker(struct work_struct *work)
3288{
3289 struct tpacpi_led_classdev *data =
3290 container_of(work, struct tpacpi_led_classdev, work);
3291
3292 if (likely(tpacpi_lifecycle == TPACPI_LIFE_RUNNING))
3293 light_set_status((data->new_brightness != LED_OFF));
3294}
3295
3296static void light_sysfs_set(struct led_classdev *led_cdev,
3297 enum led_brightness brightness)
3298{
3299 struct tpacpi_led_classdev *data =
3300 container_of(led_cdev,
3301 struct tpacpi_led_classdev,
3302 led_classdev);
3303 data->new_brightness = brightness;
3304 queue_work(tpacpi_wq, &data->work);
3305}
3306
3307static enum led_brightness light_sysfs_get(struct led_classdev *led_cdev)
3308{
3309 return (light_get_status() == 1)? LED_FULL : LED_OFF;
3310}
3311
3312static struct tpacpi_led_classdev tpacpi_led_thinklight = {
3313 .led_classdev = {
3314 .name = "tpacpi::thinklight",
3315 .brightness_set = &light_sysfs_set,
3316 .brightness_get = &light_sysfs_get,
3317 }
3318};
3319
3113static int __init light_init(struct ibm_init_struct *iibm) 3320static int __init light_init(struct ibm_init_struct *iibm)
3114{ 3321{
3322 int rc = 0;
3323
3115 vdbg_printk(TPACPI_DBG_INIT, "initializing light subdriver\n"); 3324 vdbg_printk(TPACPI_DBG_INIT, "initializing light subdriver\n");
3116 3325
3117 TPACPI_ACPIHANDLE_INIT(ledb); 3326 TPACPI_ACPIHANDLE_INIT(ledb);
3118 TPACPI_ACPIHANDLE_INIT(lght); 3327 TPACPI_ACPIHANDLE_INIT(lght);
3119 TPACPI_ACPIHANDLE_INIT(cmos); 3328 TPACPI_ACPIHANDLE_INIT(cmos);
3329 INIT_WORK(&tpacpi_led_thinklight.work, light_set_status_worker);
3120 3330
3121 /* light not supported on 570, 600e/x, 770e, 770x, G4x, R30, R31 */ 3331 /* light not supported on 570, 600e/x, 770e, 770x, G4x, R30, R31 */
3122 tp_features.light = (cmos_handle || lght_handle) && !ledb_handle; 3332 tp_features.light = (cmos_handle || lght_handle) && !ledb_handle;
@@ -3130,13 +3340,31 @@ static int __init light_init(struct ibm_init_struct *iibm)
3130 vdbg_printk(TPACPI_DBG_INIT, "light is %s\n", 3340 vdbg_printk(TPACPI_DBG_INIT, "light is %s\n",
3131 str_supported(tp_features.light)); 3341 str_supported(tp_features.light));
3132 3342
3133 return (tp_features.light)? 0 : 1; 3343 if (tp_features.light) {
3344 rc = led_classdev_register(&tpacpi_pdev->dev,
3345 &tpacpi_led_thinklight.led_classdev);
3346 }
3347
3348 if (rc < 0) {
3349 tp_features.light = 0;
3350 tp_features.light_status = 0;
3351 } else {
3352 rc = (tp_features.light)? 0 : 1;
3353 }
3354 return rc;
3355}
3356
3357static void light_exit(void)
3358{
3359 led_classdev_unregister(&tpacpi_led_thinklight.led_classdev);
3360 if (work_pending(&tpacpi_led_thinklight.work))
3361 flush_workqueue(tpacpi_wq);
3134} 3362}
3135 3363
3136static int light_read(char *p) 3364static int light_read(char *p)
3137{ 3365{
3138 int len = 0; 3366 int len = 0;
3139 int status = 0; 3367 int status;
3140 3368
3141 if (!tp_features.light) { 3369 if (!tp_features.light) {
3142 len += sprintf(p + len, "status:\t\tnot supported\n"); 3370 len += sprintf(p + len, "status:\t\tnot supported\n");
@@ -3144,8 +3372,9 @@ static int light_read(char *p)
3144 len += sprintf(p + len, "status:\t\tunknown\n"); 3372 len += sprintf(p + len, "status:\t\tunknown\n");
3145 len += sprintf(p + len, "commands:\ton, off\n"); 3373 len += sprintf(p + len, "commands:\ton, off\n");
3146 } else { 3374 } else {
3147 if (!acpi_evalf(ec_handle, &status, "KBLT", "d")) 3375 status = light_get_status();
3148 return -EIO; 3376 if (status < 0)
3377 return status;
3149 len += sprintf(p + len, "status:\t\t%s\n", onoff(status, 0)); 3378 len += sprintf(p + len, "status:\t\t%s\n", onoff(status, 0));
3150 len += sprintf(p + len, "commands:\ton, off\n"); 3379 len += sprintf(p + len, "commands:\ton, off\n");
3151 } 3380 }
@@ -3155,37 +3384,29 @@ static int light_read(char *p)
3155 3384
3156static int light_write(char *buf) 3385static int light_write(char *buf)
3157{ 3386{
3158 int cmos_cmd, lght_cmd;
3159 char *cmd; 3387 char *cmd;
3160 int success; 3388 int newstatus = 0;
3161 3389
3162 if (!tp_features.light) 3390 if (!tp_features.light)
3163 return -ENODEV; 3391 return -ENODEV;
3164 3392
3165 while ((cmd = next_cmd(&buf))) { 3393 while ((cmd = next_cmd(&buf))) {
3166 if (strlencmp(cmd, "on") == 0) { 3394 if (strlencmp(cmd, "on") == 0) {
3167 cmos_cmd = 0x0c; 3395 newstatus = 1;
3168 lght_cmd = 1;
3169 } else if (strlencmp(cmd, "off") == 0) { 3396 } else if (strlencmp(cmd, "off") == 0) {
3170 cmos_cmd = 0x0d; 3397 newstatus = 0;
3171 lght_cmd = 0;
3172 } else 3398 } else
3173 return -EINVAL; 3399 return -EINVAL;
3174
3175 success = cmos_handle ?
3176 acpi_evalf(cmos_handle, NULL, NULL, "vd", cmos_cmd) :
3177 acpi_evalf(lght_handle, NULL, NULL, "vd", lght_cmd);
3178 if (!success)
3179 return -EIO;
3180 } 3400 }
3181 3401
3182 return 0; 3402 return light_set_status(newstatus);
3183} 3403}
3184 3404
3185static struct ibm_struct light_driver_data = { 3405static struct ibm_struct light_driver_data = {
3186 .name = "light", 3406 .name = "light",
3187 .read = light_read, 3407 .read = light_read,
3188 .write = light_write, 3408 .write = light_write,
3409 .exit = light_exit,
3189}; 3410};
3190 3411
3191/************************************************************************* 3412/*************************************************************************
@@ -3583,6 +3804,12 @@ enum { /* For TPACPI_LED_OLD */
3583 TPACPI_LED_EC_HLMS = 0x0e, /* EC reg to select led to command */ 3804 TPACPI_LED_EC_HLMS = 0x0e, /* EC reg to select led to command */
3584}; 3805};
3585 3806
3807enum led_status_t {
3808 TPACPI_LED_OFF = 0,
3809 TPACPI_LED_ON,
3810 TPACPI_LED_BLINK,
3811};
3812
3586static enum led_access_mode led_supported; 3813static enum led_access_mode led_supported;
3587 3814
3588TPACPI_HANDLE(led, ec, "SLED", /* 570 */ 3815TPACPI_HANDLE(led, ec, "SLED", /* 570 */
@@ -3591,8 +3818,174 @@ TPACPI_HANDLE(led, ec, "SLED", /* 570 */
3591 "LED", /* all others */ 3818 "LED", /* all others */
3592 ); /* R30, R31 */ 3819 ); /* R30, R31 */
3593 3820
3821#define TPACPI_LED_NUMLEDS 8
3822static struct tpacpi_led_classdev *tpacpi_leds;
3823static enum led_status_t tpacpi_led_state_cache[TPACPI_LED_NUMLEDS];
3824static const char const *tpacpi_led_names[TPACPI_LED_NUMLEDS] = {
3825 /* there's a limit of 19 chars + NULL before 2.6.26 */
3826 "tpacpi::power",
3827 "tpacpi:orange:batt",
3828 "tpacpi:green:batt",
3829 "tpacpi::dock_active",
3830 "tpacpi::bay_active",
3831 "tpacpi::dock_batt",
3832 "tpacpi::unknown_led",
3833 "tpacpi::standby",
3834};
3835
3836static int led_get_status(unsigned int led)
3837{
3838 int status;
3839 enum led_status_t led_s;
3840
3841 switch (led_supported) {
3842 case TPACPI_LED_570:
3843 if (!acpi_evalf(ec_handle,
3844 &status, "GLED", "dd", 1 << led))
3845 return -EIO;
3846 led_s = (status == 0)?
3847 TPACPI_LED_OFF :
3848 ((status == 1)?
3849 TPACPI_LED_ON :
3850 TPACPI_LED_BLINK);
3851 tpacpi_led_state_cache[led] = led_s;
3852 return led_s;
3853 default:
3854 return -ENXIO;
3855 }
3856
3857 /* not reached */
3858}
3859
3860static int led_set_status(unsigned int led, enum led_status_t ledstatus)
3861{
3862 /* off, on, blink. Index is led_status_t */
3863 static const int const led_sled_arg1[] = { 0, 1, 3 };
3864 static const int const led_exp_hlbl[] = { 0, 0, 1 }; /* led# * */
3865 static const int const led_exp_hlcl[] = { 0, 1, 1 }; /* led# * */
3866 static const int const led_led_arg1[] = { 0, 0x80, 0xc0 };
3867
3868 int rc = 0;
3869
3870 switch (led_supported) {
3871 case TPACPI_LED_570:
3872 /* 570 */
3873 led = 1 << led;
3874 if (!acpi_evalf(led_handle, NULL, NULL, "vdd",
3875 led, led_sled_arg1[ledstatus]))
3876 rc = -EIO;
3877 break;
3878 case TPACPI_LED_OLD:
3879 /* 600e/x, 770e, 770x, A21e, A2xm/p, T20-22, X20 */
3880 led = 1 << led;
3881 rc = ec_write(TPACPI_LED_EC_HLMS, led);
3882 if (rc >= 0)
3883 rc = ec_write(TPACPI_LED_EC_HLBL,
3884 led * led_exp_hlbl[ledstatus]);
3885 if (rc >= 0)
3886 rc = ec_write(TPACPI_LED_EC_HLCL,
3887 led * led_exp_hlcl[ledstatus]);
3888 break;
3889 case TPACPI_LED_NEW:
3890 /* all others */
3891 if (!acpi_evalf(led_handle, NULL, NULL, "vdd",
3892 led, led_led_arg1[ledstatus]))
3893 rc = -EIO;
3894 break;
3895 default:
3896 rc = -ENXIO;
3897 }
3898
3899 if (!rc)
3900 tpacpi_led_state_cache[led] = ledstatus;
3901
3902 return rc;
3903}
3904
3905static void led_sysfs_set_status(unsigned int led,
3906 enum led_brightness brightness)
3907{
3908 led_set_status(led,
3909 (brightness == LED_OFF) ?
3910 TPACPI_LED_OFF :
3911 (tpacpi_led_state_cache[led] == TPACPI_LED_BLINK) ?
3912 TPACPI_LED_BLINK : TPACPI_LED_ON);
3913}
3914
3915static void led_set_status_worker(struct work_struct *work)
3916{
3917 struct tpacpi_led_classdev *data =
3918 container_of(work, struct tpacpi_led_classdev, work);
3919
3920 if (likely(tpacpi_lifecycle == TPACPI_LIFE_RUNNING))
3921 led_sysfs_set_status(data->led, data->new_brightness);
3922}
3923
3924static void led_sysfs_set(struct led_classdev *led_cdev,
3925 enum led_brightness brightness)
3926{
3927 struct tpacpi_led_classdev *data = container_of(led_cdev,
3928 struct tpacpi_led_classdev, led_classdev);
3929
3930 data->new_brightness = brightness;
3931 queue_work(tpacpi_wq, &data->work);
3932}
3933
3934static int led_sysfs_blink_set(struct led_classdev *led_cdev,
3935 unsigned long *delay_on, unsigned long *delay_off)
3936{
3937 struct tpacpi_led_classdev *data = container_of(led_cdev,
3938 struct tpacpi_led_classdev, led_classdev);
3939
3940 /* Can we choose the flash rate? */
3941 if (*delay_on == 0 && *delay_off == 0) {
3942 /* yes. set them to the hardware blink rate (1 Hz) */
3943 *delay_on = 500; /* ms */
3944 *delay_off = 500; /* ms */
3945 } else if ((*delay_on != 500) || (*delay_off != 500))
3946 return -EINVAL;
3947
3948 data->new_brightness = TPACPI_LED_BLINK;
3949 queue_work(tpacpi_wq, &data->work);
3950
3951 return 0;
3952}
3953
3954static enum led_brightness led_sysfs_get(struct led_classdev *led_cdev)
3955{
3956 int rc;
3957
3958 struct tpacpi_led_classdev *data = container_of(led_cdev,
3959 struct tpacpi_led_classdev, led_classdev);
3960
3961 rc = led_get_status(data->led);
3962
3963 if (rc == TPACPI_LED_OFF || rc < 0)
3964 rc = LED_OFF; /* no error handling in led class :( */
3965 else
3966 rc = LED_FULL;
3967
3968 return rc;
3969}
3970
3971static void led_exit(void)
3972{
3973 unsigned int i;
3974
3975 for (i = 0; i < TPACPI_LED_NUMLEDS; i++) {
3976 if (tpacpi_leds[i].led_classdev.name)
3977 led_classdev_unregister(&tpacpi_leds[i].led_classdev);
3978 }
3979
3980 kfree(tpacpi_leds);
3981 tpacpi_leds = NULL;
3982}
3983
3594static int __init led_init(struct ibm_init_struct *iibm) 3984static int __init led_init(struct ibm_init_struct *iibm)
3595{ 3985{
3986 unsigned int i;
3987 int rc;
3988
3596 vdbg_printk(TPACPI_DBG_INIT, "initializing LED subdriver\n"); 3989 vdbg_printk(TPACPI_DBG_INIT, "initializing LED subdriver\n");
3597 3990
3598 TPACPI_ACPIHANDLE_INIT(led); 3991 TPACPI_ACPIHANDLE_INIT(led);
@@ -3613,10 +4006,41 @@ static int __init led_init(struct ibm_init_struct *iibm)
3613 vdbg_printk(TPACPI_DBG_INIT, "LED commands are %s, mode %d\n", 4006 vdbg_printk(TPACPI_DBG_INIT, "LED commands are %s, mode %d\n",
3614 str_supported(led_supported), led_supported); 4007 str_supported(led_supported), led_supported);
3615 4008
4009 tpacpi_leds = kzalloc(sizeof(*tpacpi_leds) * TPACPI_LED_NUMLEDS,
4010 GFP_KERNEL);
4011 if (!tpacpi_leds) {
4012 printk(TPACPI_ERR "Out of memory for LED data\n");
4013 return -ENOMEM;
4014 }
4015
4016 for (i = 0; i < TPACPI_LED_NUMLEDS; i++) {
4017 tpacpi_leds[i].led = i;
4018
4019 tpacpi_leds[i].led_classdev.brightness_set = &led_sysfs_set;
4020 tpacpi_leds[i].led_classdev.blink_set = &led_sysfs_blink_set;
4021 if (led_supported == TPACPI_LED_570)
4022 tpacpi_leds[i].led_classdev.brightness_get =
4023 &led_sysfs_get;
4024
4025 tpacpi_leds[i].led_classdev.name = tpacpi_led_names[i];
4026
4027 INIT_WORK(&tpacpi_leds[i].work, led_set_status_worker);
4028
4029 rc = led_classdev_register(&tpacpi_pdev->dev,
4030 &tpacpi_leds[i].led_classdev);
4031 if (rc < 0) {
4032 tpacpi_leds[i].led_classdev.name = NULL;
4033 led_exit();
4034 return rc;
4035 }
4036 }
4037
3616 return (led_supported != TPACPI_LED_NONE)? 0 : 1; 4038 return (led_supported != TPACPI_LED_NONE)? 0 : 1;
3617} 4039}
3618 4040
3619#define led_status(s) ((s) == 0 ? "off" : ((s) == 1 ? "on" : "blinking")) 4041#define str_led_status(s) \
4042 ((s) == TPACPI_LED_OFF ? "off" : \
4043 ((s) == TPACPI_LED_ON ? "on" : "blinking"))
3620 4044
3621static int led_read(char *p) 4045static int led_read(char *p)
3622{ 4046{
@@ -3632,11 +4056,11 @@ static int led_read(char *p)
3632 /* 570 */ 4056 /* 570 */
3633 int i, status; 4057 int i, status;
3634 for (i = 0; i < 8; i++) { 4058 for (i = 0; i < 8; i++) {
3635 if (!acpi_evalf(ec_handle, 4059 status = led_get_status(i);
3636 &status, "GLED", "dd", 1 << i)) 4060 if (status < 0)
3637 return -EIO; 4061 return -EIO;
3638 len += sprintf(p + len, "%d:\t\t%s\n", 4062 len += sprintf(p + len, "%d:\t\t%s\n",
3639 i, led_status(status)); 4063 i, str_led_status(status));
3640 } 4064 }
3641 } 4065 }
3642 4066
@@ -3646,16 +4070,11 @@ static int led_read(char *p)
3646 return len; 4070 return len;
3647} 4071}
3648 4072
3649/* off, on, blink */
3650static const int led_sled_arg1[] = { 0, 1, 3 };
3651static const int led_exp_hlbl[] = { 0, 0, 1 }; /* led# * */
3652static const int led_exp_hlcl[] = { 0, 1, 1 }; /* led# * */
3653static const int led_led_arg1[] = { 0, 0x80, 0xc0 };
3654
3655static int led_write(char *buf) 4073static int led_write(char *buf)
3656{ 4074{
3657 char *cmd; 4075 char *cmd;
3658 int led, ind, ret; 4076 int led, rc;
4077 enum led_status_t s;
3659 4078
3660 if (!led_supported) 4079 if (!led_supported)
3661 return -ENODEV; 4080 return -ENODEV;
@@ -3665,38 +4084,18 @@ static int led_write(char *buf)
3665 return -EINVAL; 4084 return -EINVAL;
3666 4085
3667 if (strstr(cmd, "off")) { 4086 if (strstr(cmd, "off")) {
3668 ind = 0; 4087 s = TPACPI_LED_OFF;
3669 } else if (strstr(cmd, "on")) { 4088 } else if (strstr(cmd, "on")) {
3670 ind = 1; 4089 s = TPACPI_LED_ON;
3671 } else if (strstr(cmd, "blink")) { 4090 } else if (strstr(cmd, "blink")) {
3672 ind = 2; 4091 s = TPACPI_LED_BLINK;
3673 } else
3674 return -EINVAL;
3675
3676 if (led_supported == TPACPI_LED_570) {
3677 /* 570 */
3678 led = 1 << led;
3679 if (!acpi_evalf(led_handle, NULL, NULL, "vdd",
3680 led, led_sled_arg1[ind]))
3681 return -EIO;
3682 } else if (led_supported == TPACPI_LED_OLD) {
3683 /* 600e/x, 770e, 770x, A21e, A2xm/p, T20-22, X20 */
3684 led = 1 << led;
3685 ret = ec_write(TPACPI_LED_EC_HLMS, led);
3686 if (ret >= 0)
3687 ret = ec_write(TPACPI_LED_EC_HLBL,
3688 led * led_exp_hlbl[ind]);
3689 if (ret >= 0)
3690 ret = ec_write(TPACPI_LED_EC_HLCL,
3691 led * led_exp_hlcl[ind]);
3692 if (ret < 0)
3693 return ret;
3694 } else { 4092 } else {
3695 /* all others */ 4093 return -EINVAL;
3696 if (!acpi_evalf(led_handle, NULL, NULL, "vdd",
3697 led, led_led_arg1[ind]))
3698 return -EIO;
3699 } 4094 }
4095
4096 rc = led_set_status(led, s);
4097 if (rc < 0)
4098 return rc;
3700 } 4099 }
3701 4100
3702 return 0; 4101 return 0;
@@ -3706,6 +4105,7 @@ static struct ibm_struct led_driver_data = {
3706 .name = "led", 4105 .name = "led",
3707 .read = led_read, 4106 .read = led_read,
3708 .write = led_write, 4107 .write = led_write,
4108 .exit = led_exit,
3709}; 4109};
3710 4110
3711/************************************************************************* 4111/*************************************************************************
@@ -4170,8 +4570,16 @@ static struct ibm_struct ecdump_driver_data = {
4170 4570
4171#define TPACPI_BACKLIGHT_DEV_NAME "thinkpad_screen" 4571#define TPACPI_BACKLIGHT_DEV_NAME "thinkpad_screen"
4172 4572
4573enum {
4574 TP_EC_BACKLIGHT = 0x31,
4575
4576 /* TP_EC_BACKLIGHT bitmasks */
4577 TP_EC_BACKLIGHT_LVLMSK = 0x1F,
4578 TP_EC_BACKLIGHT_CMDMSK = 0xE0,
4579 TP_EC_BACKLIGHT_MAPSW = 0x20,
4580};
4581
4173static struct backlight_device *ibm_backlight_device; 4582static struct backlight_device *ibm_backlight_device;
4174static int brightness_offset = 0x31;
4175static int brightness_mode; 4583static int brightness_mode;
4176static unsigned int brightness_enable = 2; /* 2 = auto, 0 = no, 1 = yes */ 4584static unsigned int brightness_enable = 2; /* 2 = auto, 0 = no, 1 = yes */
4177 4585
@@ -4180,16 +4588,24 @@ static struct mutex brightness_mutex;
4180/* 4588/*
4181 * ThinkPads can read brightness from two places: EC 0x31, or 4589 * ThinkPads can read brightness from two places: EC 0x31, or
4182 * CMOS NVRAM byte 0x5E, bits 0-3. 4590 * CMOS NVRAM byte 0x5E, bits 0-3.
4591 *
4592 * EC 0x31 has the following layout
4593 * Bit 7: unknown function
4594 * Bit 6: unknown function
4595 * Bit 5: Z: honour scale changes, NZ: ignore scale changes
4596 * Bit 4: must be set to zero to avoid problems
4597 * Bit 3-0: backlight brightness level
4598 *
4599 * brightness_get_raw returns status data in the EC 0x31 layout
4183 */ 4600 */
4184static int brightness_get(struct backlight_device *bd) 4601static int brightness_get_raw(int *status)
4185{ 4602{
4186 u8 lec = 0, lcmos = 0, level = 0; 4603 u8 lec = 0, lcmos = 0, level = 0;
4187 4604
4188 if (brightness_mode & 1) { 4605 if (brightness_mode & 1) {
4189 if (!acpi_ec_read(brightness_offset, &lec)) 4606 if (!acpi_ec_read(TP_EC_BACKLIGHT, &lec))
4190 return -EIO; 4607 return -EIO;
4191 lec &= (tp_features.bright_16levels)? 0x0f : 0x07; 4608 level = lec & TP_EC_BACKLIGHT_LVLMSK;
4192 level = lec;
4193 }; 4609 };
4194 if (brightness_mode & 2) { 4610 if (brightness_mode & 2) {
4195 lcmos = (nvram_read_byte(TP_NVRAM_ADDR_BRIGHTNESS) 4611 lcmos = (nvram_read_byte(TP_NVRAM_ADDR_BRIGHTNESS)
@@ -4199,16 +4615,27 @@ static int brightness_get(struct backlight_device *bd)
4199 level = lcmos; 4615 level = lcmos;
4200 } 4616 }
4201 4617
4202 if (brightness_mode == 3 && lec != lcmos) { 4618 if (brightness_mode == 3) {
4203 printk(TPACPI_ERR 4619 *status = lec; /* Prefer EC, CMOS is just a backing store */
4204 "CMOS NVRAM (%u) and EC (%u) do not agree " 4620 lec &= TP_EC_BACKLIGHT_LVLMSK;
4205 "on display brightness level\n", 4621 if (lec == lcmos)
4206 (unsigned int) lcmos, 4622 tp_warned.bright_cmos_ec_unsync = 0;
4207 (unsigned int) lec); 4623 else {
4208 return -EIO; 4624 if (!tp_warned.bright_cmos_ec_unsync) {
4625 printk(TPACPI_ERR
4626 "CMOS NVRAM (%u) and EC (%u) do not "
4627 "agree on display brightness level\n",
4628 (unsigned int) lcmos,
4629 (unsigned int) lec);
4630 tp_warned.bright_cmos_ec_unsync = 1;
4631 }
4632 return -EIO;
4633 }
4634 } else {
4635 *status = level;
4209 } 4636 }
4210 4637
4211 return level; 4638 return 0;
4212} 4639}
4213 4640
4214/* May return EINTR which can always be mapped to ERESTARTSYS */ 4641/* May return EINTR which can always be mapped to ERESTARTSYS */
@@ -4216,19 +4643,22 @@ static int brightness_set(int value)
4216{ 4643{
4217 int cmos_cmd, inc, i, res; 4644 int cmos_cmd, inc, i, res;
4218 int current_value; 4645 int current_value;
4646 int command_bits;
4219 4647
4220 if (value > ((tp_features.bright_16levels)? 15 : 7)) 4648 if (value > ((tp_features.bright_16levels)? 15 : 7) ||
4649 value < 0)
4221 return -EINVAL; 4650 return -EINVAL;
4222 4651
4223 res = mutex_lock_interruptible(&brightness_mutex); 4652 res = mutex_lock_interruptible(&brightness_mutex);
4224 if (res < 0) 4653 if (res < 0)
4225 return res; 4654 return res;
4226 4655
4227 current_value = brightness_get(NULL); 4656 res = brightness_get_raw(&current_value);
4228 if (current_value < 0) { 4657 if (res < 0)
4229 res = current_value;
4230 goto errout; 4658 goto errout;
4231 } 4659
4660 command_bits = current_value & TP_EC_BACKLIGHT_CMDMSK;
4661 current_value &= TP_EC_BACKLIGHT_LVLMSK;
4232 4662
4233 cmos_cmd = value > current_value ? 4663 cmos_cmd = value > current_value ?
4234 TP_CMOS_BRIGHTNESS_UP : 4664 TP_CMOS_BRIGHTNESS_UP :
@@ -4243,7 +4673,8 @@ static int brightness_set(int value)
4243 goto errout; 4673 goto errout;
4244 } 4674 }
4245 if ((brightness_mode & 1) && 4675 if ((brightness_mode & 1) &&
4246 !acpi_ec_write(brightness_offset, i + inc)) { 4676 !acpi_ec_write(TP_EC_BACKLIGHT,
4677 (i + inc) | command_bits)) {
4247 res = -EIO; 4678 res = -EIO;
4248 goto errout;; 4679 goto errout;;
4249 } 4680 }
@@ -4266,106 +4697,23 @@ static int brightness_update_status(struct backlight_device *bd)
4266 bd->props.brightness : 0); 4697 bd->props.brightness : 0);
4267} 4698}
4268 4699
4269static struct backlight_ops ibm_backlight_data = { 4700static int brightness_get(struct backlight_device *bd)
4270 .get_brightness = brightness_get,
4271 .update_status = brightness_update_status,
4272};
4273
4274/* --------------------------------------------------------------------- */
4275
4276static int __init tpacpi_query_bcll_levels(acpi_handle handle)
4277{
4278 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
4279 union acpi_object *obj;
4280 int rc;
4281
4282 if (ACPI_SUCCESS(acpi_evaluate_object(handle, NULL, NULL, &buffer))) {
4283 obj = (union acpi_object *)buffer.pointer;
4284 if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) {
4285 printk(TPACPI_ERR "Unknown BCLL data, "
4286 "please report this to %s\n", TPACPI_MAIL);
4287 rc = 0;
4288 } else {
4289 rc = obj->package.count;
4290 }
4291 } else {
4292 return 0;
4293 }
4294
4295 kfree(buffer.pointer);
4296 return rc;
4297}
4298
4299static acpi_status __init brightness_find_bcll(acpi_handle handle, u32 lvl,
4300 void *context, void **rv)
4301{
4302 char name[ACPI_PATH_SEGMENT_LENGTH];
4303 struct acpi_buffer buffer = { sizeof(name), &name };
4304
4305 if (ACPI_SUCCESS(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)) &&
4306 !strncmp("BCLL", name, sizeof(name) - 1)) {
4307 if (tpacpi_query_bcll_levels(handle) == 16) {
4308 *rv = handle;
4309 return AE_CTRL_TERMINATE;
4310 } else {
4311 return AE_OK;
4312 }
4313 } else {
4314 return AE_OK;
4315 }
4316}
4317
4318static int __init brightness_check_levels(void)
4319{ 4701{
4320 int status; 4702 int status, res;
4321 void *found_node = NULL;
4322 4703
4323 if (!vid_handle) { 4704 res = brightness_get_raw(&status);
4324 TPACPI_ACPIHANDLE_INIT(vid); 4705 if (res < 0)
4325 } 4706 return 0; /* FIXME: teach backlight about error handling */
4326 if (!vid_handle)
4327 return 0;
4328
4329 /* Search for a BCLL package with 16 levels */
4330 status = acpi_walk_namespace(ACPI_TYPE_PACKAGE, vid_handle, 3,
4331 brightness_find_bcll, NULL,
4332 &found_node);
4333
4334 return (ACPI_SUCCESS(status) && found_node != NULL);
4335}
4336
4337static acpi_status __init brightness_find_bcl(acpi_handle handle, u32 lvl,
4338 void *context, void **rv)
4339{
4340 char name[ACPI_PATH_SEGMENT_LENGTH];
4341 struct acpi_buffer buffer = { sizeof(name), &name };
4342 4707
4343 if (ACPI_SUCCESS(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)) && 4708 return status & TP_EC_BACKLIGHT_LVLMSK;
4344 !strncmp("_BCL", name, sizeof(name) - 1)) {
4345 *rv = handle;
4346 return AE_CTRL_TERMINATE;
4347 } else {
4348 return AE_OK;
4349 }
4350} 4709}
4351 4710
4352static int __init brightness_check_std_acpi_support(void) 4711static struct backlight_ops ibm_backlight_data = {
4353{ 4712 .get_brightness = brightness_get,
4354 int status; 4713 .update_status = brightness_update_status,
4355 void *found_node = NULL; 4714};
4356
4357 if (!vid_handle) {
4358 TPACPI_ACPIHANDLE_INIT(vid);
4359 }
4360 if (!vid_handle)
4361 return 0;
4362
4363 /* Search for a _BCL method, but don't execute it */
4364 status = acpi_walk_namespace(ACPI_TYPE_METHOD, vid_handle, 3,
4365 brightness_find_bcl, NULL, &found_node);
4366 4715
4367 return (ACPI_SUCCESS(status) && found_node != NULL); 4716/* --------------------------------------------------------------------- */
4368}
4369 4717
4370static int __init brightness_init(struct ibm_init_struct *iibm) 4718static int __init brightness_init(struct ibm_init_struct *iibm)
4371{ 4719{
@@ -4375,13 +4723,19 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
4375 4723
4376 mutex_init(&brightness_mutex); 4724 mutex_init(&brightness_mutex);
4377 4725
4378 if (!brightness_enable) { 4726 /*
4379 dbg_printk(TPACPI_DBG_INIT, 4727 * We always attempt to detect acpi support, so as to switch
4380 "brightness support disabled by " 4728 * Lenovo Vista BIOS to ACPI brightness mode even if we are not
4381 "module parameter\n"); 4729 * going to publish a backlight interface
4382 return 1; 4730 */
4383 } else if (brightness_enable > 1) { 4731 b = tpacpi_check_std_acpi_brightness_support();
4384 if (brightness_check_std_acpi_support()) { 4732 if (b > 0) {
4733 if (thinkpad_id.vendor == PCI_VENDOR_ID_LENOVO) {
4734 printk(TPACPI_NOTICE
4735 "Lenovo BIOS switched to ACPI backlight "
4736 "control mode\n");
4737 }
4738 if (brightness_enable > 1) {
4385 printk(TPACPI_NOTICE 4739 printk(TPACPI_NOTICE
4386 "standard ACPI backlight interface " 4740 "standard ACPI backlight interface "
4387 "available, not loading native one...\n"); 4741 "available, not loading native one...\n");
@@ -4389,6 +4743,22 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
4389 } 4743 }
4390 } 4744 }
4391 4745
4746 if (!brightness_enable) {
4747 dbg_printk(TPACPI_DBG_INIT,
4748 "brightness support disabled by "
4749 "module parameter\n");
4750 return 1;
4751 }
4752
4753 if (b > 16) {
4754 printk(TPACPI_ERR
4755 "Unsupported brightness interface, "
4756 "please contact %s\n", TPACPI_MAIL);
4757 return 1;
4758 }
4759 if (b == 16)
4760 tp_features.bright_16levels = 1;
4761
4392 if (!brightness_mode) { 4762 if (!brightness_mode) {
4393 if (thinkpad_id.vendor == PCI_VENDOR_ID_LENOVO) 4763 if (thinkpad_id.vendor == PCI_VENDOR_ID_LENOVO)
4394 brightness_mode = 2; 4764 brightness_mode = 2;
@@ -4402,12 +4772,7 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
4402 if (brightness_mode > 3) 4772 if (brightness_mode > 3)
4403 return -EINVAL; 4773 return -EINVAL;
4404 4774
4405 tp_features.bright_16levels = 4775 if (brightness_get_raw(&b) < 0)
4406 thinkpad_id.vendor == PCI_VENDOR_ID_LENOVO &&
4407 brightness_check_levels();
4408
4409 b = brightness_get(NULL);
4410 if (b < 0)
4411 return 1; 4776 return 1;
4412 4777
4413 if (tp_features.bright_16levels) 4778 if (tp_features.bright_16levels)
@@ -4425,7 +4790,7 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
4425 4790
4426 ibm_backlight_device->props.max_brightness = 4791 ibm_backlight_device->props.max_brightness =
4427 (tp_features.bright_16levels)? 15 : 7; 4792 (tp_features.bright_16levels)? 15 : 7;
4428 ibm_backlight_device->props.brightness = b; 4793 ibm_backlight_device->props.brightness = b & TP_EC_BACKLIGHT_LVLMSK;
4429 backlight_update_status(ibm_backlight_device); 4794 backlight_update_status(ibm_backlight_device);
4430 4795
4431 return 0; 4796 return 0;
@@ -5046,11 +5411,11 @@ static void fan_watchdog_reset(void)
5046 if (fan_watchdog_maxinterval > 0 && 5411 if (fan_watchdog_maxinterval > 0 &&
5047 tpacpi_lifecycle != TPACPI_LIFE_EXITING) { 5412 tpacpi_lifecycle != TPACPI_LIFE_EXITING) {
5048 fan_watchdog_active = 1; 5413 fan_watchdog_active = 1;
5049 if (!schedule_delayed_work(&fan_watchdog_task, 5414 if (!queue_delayed_work(tpacpi_wq, &fan_watchdog_task,
5050 msecs_to_jiffies(fan_watchdog_maxinterval 5415 msecs_to_jiffies(fan_watchdog_maxinterval
5051 * 1000))) { 5416 * 1000))) {
5052 printk(TPACPI_ERR 5417 printk(TPACPI_ERR
5053 "failed to schedule the fan watchdog, " 5418 "failed to queue the fan watchdog, "
5054 "watchdog will not trigger\n"); 5419 "watchdog will not trigger\n");
5055 } 5420 }
5056 } else 5421 } else
@@ -5420,7 +5785,7 @@ static void fan_exit(void)
5420 &driver_attr_fan_watchdog); 5785 &driver_attr_fan_watchdog);
5421 5786
5422 cancel_delayed_work(&fan_watchdog_task); 5787 cancel_delayed_work(&fan_watchdog_task);
5423 flush_scheduled_work(); 5788 flush_workqueue(tpacpi_wq);
5424} 5789}
5425 5790
5426static int fan_read(char *p) 5791static int fan_read(char *p)
@@ -5826,10 +6191,13 @@ static void __init get_thinkpad_model_data(struct thinkpad_id_data *tp)
5826 6191
5827 tp->model_str = kstrdup(dmi_get_system_info(DMI_PRODUCT_VERSION), 6192 tp->model_str = kstrdup(dmi_get_system_info(DMI_PRODUCT_VERSION),
5828 GFP_KERNEL); 6193 GFP_KERNEL);
5829 if (strnicmp(tp->model_str, "ThinkPad", 8) != 0) { 6194 if (tp->model_str && strnicmp(tp->model_str, "ThinkPad", 8) != 0) {
5830 kfree(tp->model_str); 6195 kfree(tp->model_str);
5831 tp->model_str = NULL; 6196 tp->model_str = NULL;
5832 } 6197 }
6198
6199 tp->nummodel_str = kstrdup(dmi_get_system_info(DMI_PRODUCT_NAME),
6200 GFP_KERNEL);
5833} 6201}
5834 6202
5835static int __init probe_for_thinkpad(void) 6203static int __init probe_for_thinkpad(void)
@@ -6071,6 +6439,9 @@ static void thinkpad_acpi_module_exit(void)
6071 if (proc_dir) 6439 if (proc_dir)
6072 remove_proc_entry(TPACPI_PROC_DIR, acpi_root_dir); 6440 remove_proc_entry(TPACPI_PROC_DIR, acpi_root_dir);
6073 6441
6442 if (tpacpi_wq)
6443 destroy_workqueue(tpacpi_wq);
6444
6074 kfree(thinkpad_id.bios_version_str); 6445 kfree(thinkpad_id.bios_version_str);
6075 kfree(thinkpad_id.ec_version_str); 6446 kfree(thinkpad_id.ec_version_str);
6076 kfree(thinkpad_id.model_str); 6447 kfree(thinkpad_id.model_str);
@@ -6101,6 +6472,12 @@ static int __init thinkpad_acpi_module_init(void)
6101 TPACPI_ACPIHANDLE_INIT(ecrd); 6472 TPACPI_ACPIHANDLE_INIT(ecrd);
6102 TPACPI_ACPIHANDLE_INIT(ecwr); 6473 TPACPI_ACPIHANDLE_INIT(ecwr);
6103 6474
6475 tpacpi_wq = create_singlethread_workqueue(TPACPI_WORKQUEUE_NAME);
6476 if (!tpacpi_wq) {
6477 thinkpad_acpi_module_exit();
6478 return -ENOMEM;
6479 }
6480
6104 proc_dir = proc_mkdir(TPACPI_PROC_DIR, acpi_root_dir); 6481 proc_dir = proc_mkdir(TPACPI_PROC_DIR, acpi_root_dir);
6105 if (!proc_dir) { 6482 if (!proc_dir) {
6106 printk(TPACPI_ERR 6483 printk(TPACPI_ERR
@@ -6223,6 +6600,8 @@ static int __init thinkpad_acpi_module_init(void)
6223/* Please remove this in year 2009 */ 6600/* Please remove this in year 2009 */
6224MODULE_ALIAS("ibm_acpi"); 6601MODULE_ALIAS("ibm_acpi");
6225 6602
6603MODULE_ALIAS(TPACPI_DRVR_SHORTNAME);
6604
6226/* 6605/*
6227 * DMI matching for module autoloading 6606 * DMI matching for module autoloading
6228 * 6607 *