aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64')
-rw-r--r--arch/ia64/Kconfig23
-rw-r--r--arch/ia64/configs/tiger_defconfig96
-rw-r--r--arch/ia64/hp/common/sba_iommu.c34
-rw-r--r--arch/ia64/ia32/ia32_ioctl.c1
-rw-r--r--arch/ia64/ia32/sys_ia32.c2
-rw-r--r--arch/ia64/kernel/acpi.c23
-rw-r--r--arch/ia64/kernel/entry.S6
-rw-r--r--arch/ia64/kernel/fsys.S4
-rw-r--r--arch/ia64/kernel/mca.c8
-rw-r--r--arch/ia64/kernel/mca_drv.c4
-rw-r--r--arch/ia64/kernel/mca_drv_asm.S18
-rw-r--r--arch/ia64/kernel/minstate.h3
-rw-r--r--arch/ia64/kernel/module.c10
-rw-r--r--arch/ia64/kernel/perfmon.c218
-rw-r--r--arch/ia64/kernel/process.c55
-rw-r--r--arch/ia64/kernel/ptrace.c47
-rw-r--r--arch/ia64/kernel/setup.c3
-rw-r--r--arch/ia64/kernel/signal.c3
-rw-r--r--arch/ia64/kernel/smpboot.c2
-rw-r--r--arch/ia64/kernel/sys_ia64.c7
-rw-r--r--arch/ia64/kernel/traps.c29
-rw-r--r--arch/ia64/lib/flush.S6
-rw-r--r--arch/ia64/lib/memcpy_mck.S2
-rw-r--r--arch/ia64/lib/memset.S2
-rw-r--r--arch/ia64/mm/init.c19
-rw-r--r--arch/ia64/sn/kernel/Makefile7
-rw-r--r--arch/ia64/sn/kernel/io_init.c10
-rw-r--r--arch/ia64/sn/kernel/mca.c34
-rw-r--r--arch/ia64/sn/kernel/setup.c46
-rw-r--r--arch/ia64/sn/kernel/tiocx.c60
-rw-r--r--arch/ia64/sn/kernel/xp_main.c289
-rw-r--r--arch/ia64/sn/kernel/xp_nofault.S31
-rw-r--r--arch/ia64/sn/kernel/xpc.h991
-rw-r--r--arch/ia64/sn/kernel/xpc_channel.c2297
-rw-r--r--arch/ia64/sn/kernel/xpc_main.c1064
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c984
-rw-r--r--arch/ia64/sn/kernel/xpnet.c715
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_dma.c2
-rw-r--r--arch/ia64/sn/pci/tioca_provider.c2
39 files changed, 6909 insertions, 248 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 468dbe8a6b9c..3ad2c4af099c 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -46,6 +46,10 @@ config GENERIC_IOMAP
46 bool 46 bool
47 default y 47 default y
48 48
49config SCHED_NO_NO_OMIT_FRAME_POINTER
50 bool
51 default y
52
49choice 53choice
50 prompt "System type" 54 prompt "System type"
51 default IA64_GENERIC 55 default IA64_GENERIC
@@ -217,6 +221,16 @@ config IA64_SGI_SN_SIM
217 If you are compiling a kernel that will run under SGI's IA-64 221 If you are compiling a kernel that will run under SGI's IA-64
218 simulator (Medusa) then say Y, otherwise say N. 222 simulator (Medusa) then say Y, otherwise say N.
219 223
224config IA64_SGI_SN_XP
225 tristate "Support communication between SGI SSIs"
226 depends on MSPEC
227 help
228 An SGI machine can be divided into multiple Single System
229 Images which act independently of each other and have
230 hardware based memory protection from the others. Enabling
231 this feature will allow for direct communication between SSIs
232 based on a network adapter and DMA messaging.
233
220config FORCE_MAX_ZONEORDER 234config FORCE_MAX_ZONEORDER
221 int 235 int
222 default "18" 236 default "18"
@@ -261,6 +275,15 @@ config HOTPLUG_CPU
261 can be controlled through /sys/devices/system/cpu/cpu#. 275 can be controlled through /sys/devices/system/cpu/cpu#.
262 Say N if you want to disable CPU hotplug. 276 Say N if you want to disable CPU hotplug.
263 277
278config SCHED_SMT
279 bool "SMT scheduler support"
280 depends on SMP
281 default off
282 help
283 Improves the CPU scheduler's decision making when dealing with
284 Intel IA64 chips with MultiThreading at a cost of slightly increased
285 overhead in some places. If unsure say N here.
286
264config PREEMPT 287config PREEMPT
265 bool "Preemptible Kernel" 288 bool "Preemptible Kernel"
266 help 289 help
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index 99830e8fc9ba..9086b789f6ac 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.11-rc2 3# Linux kernel version: 2.6.12-rc3
4# Sat Jan 22 11:17:02 2005 4# Tue May 3 15:55:04 2005
5# 5#
6 6
7# 7#
@@ -10,6 +10,7 @@
10CONFIG_EXPERIMENTAL=y 10CONFIG_EXPERIMENTAL=y
11CONFIG_CLEAN_COMPILE=y 11CONFIG_CLEAN_COMPILE=y
12CONFIG_LOCK_KERNEL=y 12CONFIG_LOCK_KERNEL=y
13CONFIG_INIT_ENV_ARG_LIMIT=32
13 14
14# 15#
15# General setup 16# General setup
@@ -21,24 +22,27 @@ CONFIG_POSIX_MQUEUE=y
21# CONFIG_BSD_PROCESS_ACCT is not set 22# CONFIG_BSD_PROCESS_ACCT is not set
22CONFIG_SYSCTL=y 23CONFIG_SYSCTL=y
23# CONFIG_AUDIT is not set 24# CONFIG_AUDIT is not set
24CONFIG_LOG_BUF_SHIFT=20
25CONFIG_HOTPLUG=y 25CONFIG_HOTPLUG=y
26CONFIG_KOBJECT_UEVENT=y 26CONFIG_KOBJECT_UEVENT=y
27CONFIG_IKCONFIG=y 27CONFIG_IKCONFIG=y
28CONFIG_IKCONFIG_PROC=y 28CONFIG_IKCONFIG_PROC=y
29# CONFIG_CPUSETS is not set
29# CONFIG_EMBEDDED is not set 30# CONFIG_EMBEDDED is not set
30CONFIG_KALLSYMS=y 31CONFIG_KALLSYMS=y
31CONFIG_KALLSYMS_ALL=y 32CONFIG_KALLSYMS_ALL=y
32# CONFIG_KALLSYMS_EXTRA_PASS is not set 33# CONFIG_KALLSYMS_EXTRA_PASS is not set
34CONFIG_PRINTK=y
35CONFIG_BUG=y
36CONFIG_BASE_FULL=y
33CONFIG_FUTEX=y 37CONFIG_FUTEX=y
34CONFIG_EPOLL=y 38CONFIG_EPOLL=y
35# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
36CONFIG_SHMEM=y 39CONFIG_SHMEM=y
37CONFIG_CC_ALIGN_FUNCTIONS=0 40CONFIG_CC_ALIGN_FUNCTIONS=0
38CONFIG_CC_ALIGN_LABELS=0 41CONFIG_CC_ALIGN_LABELS=0
39CONFIG_CC_ALIGN_LOOPS=0 42CONFIG_CC_ALIGN_LOOPS=0
40CONFIG_CC_ALIGN_JUMPS=0 43CONFIG_CC_ALIGN_JUMPS=0
41# CONFIG_TINY_SHMEM is not set 44# CONFIG_TINY_SHMEM is not set
45CONFIG_BASE_SMALL=0
42 46
43# 47#
44# Loadable module support 48# Loadable module support
@@ -85,6 +89,7 @@ CONFIG_FORCE_MAX_ZONEORDER=18
85CONFIG_SMP=y 89CONFIG_SMP=y
86CONFIG_NR_CPUS=4 90CONFIG_NR_CPUS=4
87CONFIG_HOTPLUG_CPU=y 91CONFIG_HOTPLUG_CPU=y
92# CONFIG_SCHED_SMT is not set
88# CONFIG_PREEMPT is not set 93# CONFIG_PREEMPT is not set
89CONFIG_HAVE_DEC_LOCK=y 94CONFIG_HAVE_DEC_LOCK=y
90CONFIG_IA32_SUPPORT=y 95CONFIG_IA32_SUPPORT=y
@@ -135,6 +140,7 @@ CONFIG_PCI_DOMAINS=y
135# CONFIG_PCI_MSI is not set 140# CONFIG_PCI_MSI is not set
136CONFIG_PCI_LEGACY_PROC=y 141CONFIG_PCI_LEGACY_PROC=y
137CONFIG_PCI_NAMES=y 142CONFIG_PCI_NAMES=y
143# CONFIG_PCI_DEBUG is not set
138 144
139# 145#
140# PCI Hotplug Support 146# PCI Hotplug Support
@@ -152,10 +158,6 @@ CONFIG_HOTPLUG_PCI_ACPI=m
152# CONFIG_PCCARD is not set 158# CONFIG_PCCARD is not set
153 159
154# 160#
155# PC-card bridges
156#
157
158#
159# Device Drivers 161# Device Drivers
160# 162#
161 163
@@ -195,9 +197,10 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
195CONFIG_BLK_DEV_NBD=m 197CONFIG_BLK_DEV_NBD=m
196# CONFIG_BLK_DEV_SX8 is not set 198# CONFIG_BLK_DEV_SX8 is not set
197# CONFIG_BLK_DEV_UB is not set 199# CONFIG_BLK_DEV_UB is not set
198CONFIG_BLK_DEV_RAM=m 200CONFIG_BLK_DEV_RAM=y
199CONFIG_BLK_DEV_RAM_COUNT=16 201CONFIG_BLK_DEV_RAM_COUNT=16
200CONFIG_BLK_DEV_RAM_SIZE=4096 202CONFIG_BLK_DEV_RAM_SIZE=4096
203CONFIG_BLK_DEV_INITRD=y
201CONFIG_INITRAMFS_SOURCE="" 204CONFIG_INITRAMFS_SOURCE=""
202# CONFIG_CDROM_PKTCDVD is not set 205# CONFIG_CDROM_PKTCDVD is not set
203 206
@@ -313,7 +316,6 @@ CONFIG_SCSI_FC_ATTRS=y
313# CONFIG_SCSI_BUSLOGIC is not set 316# CONFIG_SCSI_BUSLOGIC is not set
314# CONFIG_SCSI_DMX3191D is not set 317# CONFIG_SCSI_DMX3191D is not set
315# CONFIG_SCSI_EATA is not set 318# CONFIG_SCSI_EATA is not set
316# CONFIG_SCSI_EATA_PIO is not set
317# CONFIG_SCSI_FUTURE_DOMAIN is not set 319# CONFIG_SCSI_FUTURE_DOMAIN is not set
318# CONFIG_SCSI_GDTH is not set 320# CONFIG_SCSI_GDTH is not set
319# CONFIG_SCSI_IPS is not set 321# CONFIG_SCSI_IPS is not set
@@ -325,7 +327,6 @@ CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
325CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 327CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
326# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set 328# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
327# CONFIG_SCSI_IPR is not set 329# CONFIG_SCSI_IPR is not set
328# CONFIG_SCSI_QLOGIC_ISP is not set
329CONFIG_SCSI_QLOGIC_FC=y 330CONFIG_SCSI_QLOGIC_FC=y
330# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set 331# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set
331CONFIG_SCSI_QLOGIC_1280=y 332CONFIG_SCSI_QLOGIC_1280=y
@@ -336,6 +337,7 @@ CONFIG_SCSI_QLA22XX=m
336CONFIG_SCSI_QLA2300=m 337CONFIG_SCSI_QLA2300=m
337CONFIG_SCSI_QLA2322=m 338CONFIG_SCSI_QLA2322=m
338# CONFIG_SCSI_QLA6312 is not set 339# CONFIG_SCSI_QLA6312 is not set
340# CONFIG_SCSI_LPFC is not set
339# CONFIG_SCSI_DC395x is not set 341# CONFIG_SCSI_DC395x is not set
340# CONFIG_SCSI_DC390T is not set 342# CONFIG_SCSI_DC390T is not set
341# CONFIG_SCSI_DEBUG is not set 343# CONFIG_SCSI_DEBUG is not set
@@ -358,6 +360,7 @@ CONFIG_DM_CRYPT=m
358CONFIG_DM_SNAPSHOT=m 360CONFIG_DM_SNAPSHOT=m
359CONFIG_DM_MIRROR=m 361CONFIG_DM_MIRROR=m
360CONFIG_DM_ZERO=m 362CONFIG_DM_ZERO=m
363# CONFIG_DM_MULTIPATH is not set
361 364
362# 365#
363# Fusion MPT device support 366# Fusion MPT device support
@@ -386,7 +389,6 @@ CONFIG_NET=y
386# 389#
387CONFIG_PACKET=y 390CONFIG_PACKET=y
388# CONFIG_PACKET_MMAP is not set 391# CONFIG_PACKET_MMAP is not set
389CONFIG_NETLINK_DEV=y
390CONFIG_UNIX=y 392CONFIG_UNIX=y
391# CONFIG_NET_KEY is not set 393# CONFIG_NET_KEY is not set
392CONFIG_INET=y 394CONFIG_INET=y
@@ -446,7 +448,6 @@ CONFIG_DUMMY=m
446# CONFIG_BONDING is not set 448# CONFIG_BONDING is not set
447# CONFIG_EQUALIZER is not set 449# CONFIG_EQUALIZER is not set
448# CONFIG_TUN is not set 450# CONFIG_TUN is not set
449# CONFIG_ETHERTAP is not set
450 451
451# 452#
452# ARCnet devices 453# ARCnet devices
@@ -484,7 +485,6 @@ CONFIG_NET_PCI=y
484# CONFIG_DGRS is not set 485# CONFIG_DGRS is not set
485CONFIG_EEPRO100=m 486CONFIG_EEPRO100=m
486CONFIG_E100=m 487CONFIG_E100=m
487# CONFIG_E100_NAPI is not set
488# CONFIG_FEALNX is not set 488# CONFIG_FEALNX is not set
489# CONFIG_NATSEMI is not set 489# CONFIG_NATSEMI is not set
490# CONFIG_NE2K_PCI is not set 490# CONFIG_NE2K_PCI is not set
@@ -566,25 +566,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
566# CONFIG_INPUT_EVBUG is not set 566# CONFIG_INPUT_EVBUG is not set
567 567
568# 568#
569# Input I/O drivers
570#
571CONFIG_GAMEPORT=m
572CONFIG_SOUND_GAMEPORT=m
573# CONFIG_GAMEPORT_NS558 is not set
574# CONFIG_GAMEPORT_L4 is not set
575# CONFIG_GAMEPORT_EMU10K1 is not set
576# CONFIG_GAMEPORT_VORTEX is not set
577# CONFIG_GAMEPORT_FM801 is not set
578# CONFIG_GAMEPORT_CS461X is not set
579CONFIG_SERIO=y
580CONFIG_SERIO_I8042=y
581# CONFIG_SERIO_SERPORT is not set
582# CONFIG_SERIO_CT82C710 is not set
583# CONFIG_SERIO_PCIPS2 is not set
584CONFIG_SERIO_LIBPS2=y
585# CONFIG_SERIO_RAW is not set
586
587#
588# Input Device Drivers 569# Input Device Drivers
589# 570#
590CONFIG_INPUT_KEYBOARD=y 571CONFIG_INPUT_KEYBOARD=y
@@ -602,6 +583,24 @@ CONFIG_MOUSE_PS2=y
602# CONFIG_INPUT_MISC is not set 583# CONFIG_INPUT_MISC is not set
603 584
604# 585#
586# Hardware I/O ports
587#
588CONFIG_SERIO=y
589CONFIG_SERIO_I8042=y
590# CONFIG_SERIO_SERPORT is not set
591# CONFIG_SERIO_PCIPS2 is not set
592CONFIG_SERIO_LIBPS2=y
593# CONFIG_SERIO_RAW is not set
594CONFIG_GAMEPORT=m
595# CONFIG_GAMEPORT_NS558 is not set
596# CONFIG_GAMEPORT_L4 is not set
597# CONFIG_GAMEPORT_EMU10K1 is not set
598# CONFIG_GAMEPORT_VORTEX is not set
599# CONFIG_GAMEPORT_FM801 is not set
600# CONFIG_GAMEPORT_CS461X is not set
601CONFIG_SOUND_GAMEPORT=m
602
603#
605# Character devices 604# Character devices
606# 605#
607CONFIG_VT=y 606CONFIG_VT=y
@@ -615,6 +614,8 @@ CONFIG_SERIAL_NONSTANDARD=y
615# CONFIG_SYNCLINK is not set 614# CONFIG_SYNCLINK is not set
616# CONFIG_SYNCLINKMP is not set 615# CONFIG_SYNCLINKMP is not set
617# CONFIG_N_HDLC is not set 616# CONFIG_N_HDLC is not set
617# CONFIG_SPECIALIX is not set
618# CONFIG_SX is not set
618# CONFIG_STALDRV is not set 619# CONFIG_STALDRV is not set
619 620
620# 621#
@@ -635,6 +636,7 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y
635# 636#
636CONFIG_SERIAL_CORE=y 637CONFIG_SERIAL_CORE=y
637CONFIG_SERIAL_CORE_CONSOLE=y 638CONFIG_SERIAL_CORE_CONSOLE=y
639# CONFIG_SERIAL_JSM is not set
638CONFIG_UNIX98_PTYS=y 640CONFIG_UNIX98_PTYS=y
639CONFIG_LEGACY_PTYS=y 641CONFIG_LEGACY_PTYS=y
640CONFIG_LEGACY_PTY_COUNT=256 642CONFIG_LEGACY_PTY_COUNT=256
@@ -670,6 +672,12 @@ CONFIG_HPET=y
670# CONFIG_HPET_RTC_IRQ is not set 672# CONFIG_HPET_RTC_IRQ is not set
671CONFIG_HPET_MMAP=y 673CONFIG_HPET_MMAP=y
672CONFIG_MAX_RAW_DEVS=256 674CONFIG_MAX_RAW_DEVS=256
675# CONFIG_HANGCHECK_TIMER is not set
676
677#
678# TPM devices
679#
680# CONFIG_TCG_TPM is not set
673 681
674# 682#
675# I2C support 683# I2C support
@@ -705,7 +713,6 @@ CONFIG_MAX_RAW_DEVS=256
705# 713#
706CONFIG_VGA_CONSOLE=y 714CONFIG_VGA_CONSOLE=y
707CONFIG_DUMMY_CONSOLE=y 715CONFIG_DUMMY_CONSOLE=y
708# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
709 716
710# 717#
711# Sound 718# Sound
@@ -715,6 +722,8 @@ CONFIG_DUMMY_CONSOLE=y
715# 722#
716# USB support 723# USB support
717# 724#
725CONFIG_USB_ARCH_HAS_HCD=y
726CONFIG_USB_ARCH_HAS_OHCI=y
718CONFIG_USB=y 727CONFIG_USB=y
719# CONFIG_USB_DEBUG is not set 728# CONFIG_USB_DEBUG is not set
720 729
@@ -726,8 +735,6 @@ CONFIG_USB_DEVICEFS=y
726# CONFIG_USB_DYNAMIC_MINORS is not set 735# CONFIG_USB_DYNAMIC_MINORS is not set
727# CONFIG_USB_SUSPEND is not set 736# CONFIG_USB_SUSPEND is not set
728# CONFIG_USB_OTG is not set 737# CONFIG_USB_OTG is not set
729CONFIG_USB_ARCH_HAS_HCD=y
730CONFIG_USB_ARCH_HAS_OHCI=y
731 738
732# 739#
733# USB Host Controller Drivers 740# USB Host Controller Drivers
@@ -736,6 +743,8 @@ CONFIG_USB_EHCI_HCD=m
736# CONFIG_USB_EHCI_SPLIT_ISO is not set 743# CONFIG_USB_EHCI_SPLIT_ISO is not set
737# CONFIG_USB_EHCI_ROOT_HUB_TT is not set 744# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
738CONFIG_USB_OHCI_HCD=m 745CONFIG_USB_OHCI_HCD=m
746# CONFIG_USB_OHCI_BIG_ENDIAN is not set
747CONFIG_USB_OHCI_LITTLE_ENDIAN=y
739CONFIG_USB_UHCI_HCD=y 748CONFIG_USB_UHCI_HCD=y
740# CONFIG_USB_SL811_HCD is not set 749# CONFIG_USB_SL811_HCD is not set
741 750
@@ -751,12 +760,11 @@ CONFIG_USB_UHCI_HCD=y
751# 760#
752CONFIG_USB_STORAGE=m 761CONFIG_USB_STORAGE=m
753# CONFIG_USB_STORAGE_DEBUG is not set 762# CONFIG_USB_STORAGE_DEBUG is not set
754# CONFIG_USB_STORAGE_RW_DETECT is not set
755# CONFIG_USB_STORAGE_DATAFAB is not set 763# CONFIG_USB_STORAGE_DATAFAB is not set
756# CONFIG_USB_STORAGE_FREECOM is not set 764# CONFIG_USB_STORAGE_FREECOM is not set
757# CONFIG_USB_STORAGE_ISD200 is not set 765# CONFIG_USB_STORAGE_ISD200 is not set
758# CONFIG_USB_STORAGE_DPCM is not set 766# CONFIG_USB_STORAGE_DPCM is not set
759# CONFIG_USB_STORAGE_HP8200e is not set 767# CONFIG_USB_STORAGE_USBAT is not set
760# CONFIG_USB_STORAGE_SDDR09 is not set 768# CONFIG_USB_STORAGE_SDDR09 is not set
761# CONFIG_USB_STORAGE_SDDR55 is not set 769# CONFIG_USB_STORAGE_SDDR55 is not set
762# CONFIG_USB_STORAGE_JUMPSHOT is not set 770# CONFIG_USB_STORAGE_JUMPSHOT is not set
@@ -800,6 +808,7 @@ CONFIG_USB_HIDINPUT=y
800# CONFIG_USB_PEGASUS is not set 808# CONFIG_USB_PEGASUS is not set
801# CONFIG_USB_RTL8150 is not set 809# CONFIG_USB_RTL8150 is not set
802# CONFIG_USB_USBNET is not set 810# CONFIG_USB_USBNET is not set
811# CONFIG_USB_MON is not set
803 812
804# 813#
805# USB port drivers 814# USB port drivers
@@ -824,6 +833,7 @@ CONFIG_USB_HIDINPUT=y
824# CONFIG_USB_PHIDGETKIT is not set 833# CONFIG_USB_PHIDGETKIT is not set
825# CONFIG_USB_PHIDGETSERVO is not set 834# CONFIG_USB_PHIDGETSERVO is not set
826# CONFIG_USB_IDMOUSE is not set 835# CONFIG_USB_IDMOUSE is not set
836# CONFIG_USB_SISUSBVGA is not set
827# CONFIG_USB_TEST is not set 837# CONFIG_USB_TEST is not set
828 838
829# 839#
@@ -867,7 +877,12 @@ CONFIG_REISERFS_FS_POSIX_ACL=y
867CONFIG_REISERFS_FS_SECURITY=y 877CONFIG_REISERFS_FS_SECURITY=y
868# CONFIG_JFS_FS is not set 878# CONFIG_JFS_FS is not set
869CONFIG_FS_POSIX_ACL=y 879CONFIG_FS_POSIX_ACL=y
880
881#
882# XFS support
883#
870CONFIG_XFS_FS=y 884CONFIG_XFS_FS=y
885CONFIG_XFS_EXPORT=y
871# CONFIG_XFS_RT is not set 886# CONFIG_XFS_RT is not set
872# CONFIG_XFS_QUOTA is not set 887# CONFIG_XFS_QUOTA is not set
873# CONFIG_XFS_SECURITY is not set 888# CONFIG_XFS_SECURITY is not set
@@ -945,7 +960,7 @@ CONFIG_NFSD_V4=y
945CONFIG_NFSD_TCP=y 960CONFIG_NFSD_TCP=y
946CONFIG_LOCKD=m 961CONFIG_LOCKD=m
947CONFIG_LOCKD_V4=y 962CONFIG_LOCKD_V4=y
948CONFIG_EXPORTFS=m 963CONFIG_EXPORTFS=y
949CONFIG_SUNRPC=m 964CONFIG_SUNRPC=m
950CONFIG_SUNRPC_GSS=m 965CONFIG_SUNRPC_GSS=m
951CONFIG_RPCSEC_GSS_KRB5=m 966CONFIG_RPCSEC_GSS_KRB5=m
@@ -1042,8 +1057,10 @@ CONFIG_GENERIC_IRQ_PROBE=y
1042# 1057#
1043# Kernel hacking 1058# Kernel hacking
1044# 1059#
1060# CONFIG_PRINTK_TIME is not set
1045CONFIG_DEBUG_KERNEL=y 1061CONFIG_DEBUG_KERNEL=y
1046CONFIG_MAGIC_SYSRQ=y 1062CONFIG_MAGIC_SYSRQ=y
1063CONFIG_LOG_BUF_SHIFT=20
1047# CONFIG_SCHEDSTATS is not set 1064# CONFIG_SCHEDSTATS is not set
1048# CONFIG_DEBUG_SLAB is not set 1065# CONFIG_DEBUG_SLAB is not set
1049# CONFIG_DEBUG_SPINLOCK is not set 1066# CONFIG_DEBUG_SPINLOCK is not set
@@ -1077,6 +1094,7 @@ CONFIG_CRYPTO_MD5=m
1077# CONFIG_CRYPTO_SHA256 is not set 1094# CONFIG_CRYPTO_SHA256 is not set
1078# CONFIG_CRYPTO_SHA512 is not set 1095# CONFIG_CRYPTO_SHA512 is not set
1079# CONFIG_CRYPTO_WP512 is not set 1096# CONFIG_CRYPTO_WP512 is not set
1097# CONFIG_CRYPTO_TGR192 is not set
1080CONFIG_CRYPTO_DES=m 1098CONFIG_CRYPTO_DES=m
1081# CONFIG_CRYPTO_BLOWFISH is not set 1099# CONFIG_CRYPTO_BLOWFISH is not set
1082# CONFIG_CRYPTO_TWOFISH is not set 1100# CONFIG_CRYPTO_TWOFISH is not set
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 6a8fcba7a853..b8db6e3e5e81 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1944,43 +1944,17 @@ sba_connect_bus(struct pci_bus *bus)
1944static void __init 1944static void __init
1945sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) 1945sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle)
1946{ 1946{
1947 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
1948 union acpi_object *obj;
1949 acpi_handle phandle;
1950 unsigned int node; 1947 unsigned int node;
1948 int pxm;
1951 1949
1952 ioc->node = MAX_NUMNODES; 1950 ioc->node = MAX_NUMNODES;
1953 1951
1954 /* 1952 pxm = acpi_get_pxm(handle);
1955 * Check for a _PXM on this node first. We don't typically see
1956 * one here, so we'll end up getting it from the parent.
1957 */
1958 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) {
1959 if (ACPI_FAILURE(acpi_get_parent(handle, &phandle)))
1960 return;
1961
1962 /* Reset the acpi buffer */
1963 buffer.length = ACPI_ALLOCATE_BUFFER;
1964 buffer.pointer = NULL;
1965
1966 if (ACPI_FAILURE(acpi_evaluate_object(phandle, "_PXM", NULL,
1967 &buffer)))
1968 return;
1969 }
1970 1953
1971 if (!buffer.length || !buffer.pointer) 1954 if (pxm < 0)
1972 return; 1955 return;
1973 1956
1974 obj = buffer.pointer; 1957 node = pxm_to_nid_map[pxm];
1975
1976 if (obj->type != ACPI_TYPE_INTEGER ||
1977 obj->integer.value >= MAX_PXM_DOMAINS) {
1978 acpi_os_free(buffer.pointer);
1979 return;
1980 }
1981
1982 node = pxm_to_nid_map[obj->integer.value];
1983 acpi_os_free(buffer.pointer);
1984 1958
1985 if (node >= MAX_NUMNODES || !node_online(node)) 1959 if (node >= MAX_NUMNODES || !node_online(node))
1986 return; 1960 return;
diff --git a/arch/ia64/ia32/ia32_ioctl.c b/arch/ia64/ia32/ia32_ioctl.c
index 9845dabe2613..164b211f4174 100644
--- a/arch/ia64/ia32/ia32_ioctl.c
+++ b/arch/ia64/ia32/ia32_ioctl.c
@@ -13,7 +13,6 @@
13 13
14#define INCLUDES 14#define INCLUDES
15#include "compat_ioctl.c" 15#include "compat_ioctl.c"
16#include <asm/ioctl32.h>
17 16
18#define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) 17#define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT))
19 18
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 247a21c64aea..c1e20d65dd6c 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -2427,7 +2427,7 @@ sys32_epoll_wait(int epfd, struct epoll_event32 __user * events, int maxevents,
2427{ 2427{
2428 struct epoll_event *events64 = NULL; 2428 struct epoll_event *events64 = NULL;
2429 mm_segment_t old_fs = get_fs(); 2429 mm_segment_t old_fs = get_fs();
2430 int error, numevents, size; 2430 int numevents, size;
2431 int evt_idx; 2431 int evt_idx;
2432 int do_free_pages = 0; 2432 int do_free_pages = 0;
2433 2433
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index a8e99c56a768..72dfd9e7de0f 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -779,7 +779,7 @@ acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
779 union acpi_object *obj; 779 union acpi_object *obj;
780 struct acpi_table_iosapic *iosapic; 780 struct acpi_table_iosapic *iosapic;
781 unsigned int gsi_base; 781 unsigned int gsi_base;
782 int node; 782 int pxm, node;
783 783
784 /* Only care about objects w/ a method that returns the MADT */ 784 /* Only care about objects w/ a method that returns the MADT */
785 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) 785 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
@@ -805,29 +805,16 @@ acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
805 gsi_base = iosapic->global_irq_base; 805 gsi_base = iosapic->global_irq_base;
806 806
807 acpi_os_free(buffer.pointer); 807 acpi_os_free(buffer.pointer);
808 buffer.length = ACPI_ALLOCATE_BUFFER;
809 buffer.pointer = NULL;
810 808
811 /* 809 /*
812 * OK, it's an IOSAPIC MADT entry, look for a _PXM method to tell 810 * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell
813 * us which node to associate this with. 811 * us which node to associate this with.
814 */ 812 */
815 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) 813 pxm = acpi_get_pxm(handle);
816 return AE_OK; 814 if (pxm < 0)
817
818 if (!buffer.length || !buffer.pointer)
819 return AE_OK;
820
821 obj = buffer.pointer;
822
823 if (obj->type != ACPI_TYPE_INTEGER ||
824 obj->integer.value >= MAX_PXM_DOMAINS) {
825 acpi_os_free(buffer.pointer);
826 return AE_OK; 815 return AE_OK;
827 }
828 816
829 node = pxm_to_nid_map[obj->integer.value]; 817 node = pxm_to_nid_map[pxm];
830 acpi_os_free(buffer.pointer);
831 818
832 if (node >= MAX_NUMNODES || !node_online(node) || 819 if (node >= MAX_NUMNODES || !node_online(node) ||
833 cpus_empty(node_to_cpumask(node))) 820 cpus_empty(node_to_cpumask(node)))
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 9353adc18956..8ebfcc0c813a 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -777,7 +777,7 @@ GLOBAL_ENTRY(ia64_ret_from_ia32_execve)
777 st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit 777 st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit
778 .mem.offset 8,0 778 .mem.offset 8,0
779 st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit 779 st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit
780END(ia64_ret_from_ia32_execve_syscall) 780END(ia64_ret_from_ia32_execve)
781 // fall through 781 // fall through
782#endif /* CONFIG_IA32_SUPPORT */ 782#endif /* CONFIG_IA32_SUPPORT */
783GLOBAL_ENTRY(ia64_leave_kernel) 783GLOBAL_ENTRY(ia64_leave_kernel)
@@ -1176,7 +1176,7 @@ ENTRY(notify_resume_user)
1176 ;; 1176 ;;
1177(pNonSys) mov out2=0 // out2==0 => not a syscall 1177(pNonSys) mov out2=0 // out2==0 => not a syscall
1178 .fframe 16 1178 .fframe 16
1179 .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!) 1179 .spillsp ar.unat, 16
1180 st8 [sp]=r9,-16 // allocate space for ar.unat and save it 1180 st8 [sp]=r9,-16 // allocate space for ar.unat and save it
1181 st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch 1181 st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch
1182 .body 1182 .body
@@ -1202,7 +1202,7 @@ GLOBAL_ENTRY(sys_rt_sigsuspend)
1202 adds out2=8,sp // out2=&sigscratch->ar_pfs 1202 adds out2=8,sp // out2=&sigscratch->ar_pfs
1203 ;; 1203 ;;
1204 .fframe 16 1204 .fframe 16
1205 .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!) 1205 .spillsp ar.unat, 16
1206 st8 [sp]=r9,-16 // allocate space for ar.unat and save it 1206 st8 [sp]=r9,-16 // allocate space for ar.unat and save it
1207 st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch 1207 st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch
1208 .body 1208 .body
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index f566ff43a389..7d7684a369d3 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -460,9 +460,9 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
460 ;; 460 ;;
461 461
462 st8 [r2]=r14 // update current->blocked with new mask 462 st8 [r2]=r14 // update current->blocked with new mask
463 cmpxchg4.acq r14=[r9],r18,ar.ccv // current->thread_info->flags <- r18 463 cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18
464 ;; 464 ;;
465 cmp.ne p6,p0=r17,r14 // update failed? 465 cmp.ne p6,p0=r17,r8 // update failed?
466(p6) br.cond.spnt.few 1b // yes -> retry 466(p6) br.cond.spnt.few 1b // yes -> retry
467 467
468#ifdef CONFIG_SMP 468#ifdef CONFIG_SMP
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 4d6c7b8f667b..736e328b5e61 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1103,8 +1103,6 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs)
1103 return IRQ_HANDLED; 1103 return IRQ_HANDLED;
1104} 1104}
1105 1105
1106#endif /* CONFIG_ACPI */
1107
1108/* 1106/*
1109 * ia64_mca_cpe_poll 1107 * ia64_mca_cpe_poll
1110 * 1108 *
@@ -1122,6 +1120,8 @@ ia64_mca_cpe_poll (unsigned long dummy)
1122 platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); 1120 platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
1123} 1121}
1124 1122
1123#endif /* CONFIG_ACPI */
1124
1125/* 1125/*
1126 * C portion of the OS INIT handler 1126 * C portion of the OS INIT handler
1127 * 1127 *
@@ -1390,8 +1390,7 @@ ia64_mca_init(void)
1390 register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction); 1390 register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction);
1391 1391
1392#ifdef CONFIG_ACPI 1392#ifdef CONFIG_ACPI
1393 /* Setup the CPEI/P vector and handler */ 1393 /* Setup the CPEI/P handler */
1394 cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI);
1395 register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); 1394 register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction);
1396#endif 1395#endif
1397 1396
@@ -1436,6 +1435,7 @@ ia64_mca_late_init(void)
1436 1435
1437#ifdef CONFIG_ACPI 1436#ifdef CONFIG_ACPI
1438 /* Setup the CPEI/P vector and handler */ 1437 /* Setup the CPEI/P vector and handler */
1438 cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI);
1439 init_timer(&cpe_poll_timer); 1439 init_timer(&cpe_poll_timer);
1440 cpe_poll_timer.function = ia64_mca_cpe_poll; 1440 cpe_poll_timer.function = ia64_mca_cpe_poll;
1441 1441
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index ab478172c349..abc0113a821d 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -132,8 +132,7 @@ mca_handler_bh(unsigned long paddr)
132 spin_unlock(&mca_bh_lock); 132 spin_unlock(&mca_bh_lock);
133 133
134 /* This process is about to be killed itself */ 134 /* This process is about to be killed itself */
135 force_sig(SIGKILL, current); 135 do_exit(SIGKILL);
136 schedule();
137} 136}
138 137
139/** 138/**
@@ -439,6 +438,7 @@ recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_chec
439 psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; 438 psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
440 psr2->cpl = 0; 439 psr2->cpl = 0;
441 psr2->ri = 0; 440 psr2->ri = 0;
441 psr2->i = 0;
442 442
443 return 1; 443 return 1;
444 } 444 }
diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S
index bcfa05acc561..2d7e0217638d 100644
--- a/arch/ia64/kernel/mca_drv_asm.S
+++ b/arch/ia64/kernel/mca_drv_asm.S
@@ -10,6 +10,7 @@
10 10
11#include <asm/asmmacro.h> 11#include <asm/asmmacro.h>
12#include <asm/processor.h> 12#include <asm/processor.h>
13#include <asm/ptrace.h>
13 14
14GLOBAL_ENTRY(mca_handler_bhhook) 15GLOBAL_ENTRY(mca_handler_bhhook)
15 invala // clear RSE ? 16 invala // clear RSE ?
@@ -20,12 +21,21 @@ GLOBAL_ENTRY(mca_handler_bhhook)
20 ;; 21 ;;
21 alloc r16=ar.pfs,0,2,1,0 // make a new frame 22 alloc r16=ar.pfs,0,2,1,0 // make a new frame
22 ;; 23 ;;
24 mov ar.rsc=0
25 ;;
23 mov r13=IA64_KR(CURRENT) // current task pointer 26 mov r13=IA64_KR(CURRENT) // current task pointer
24 ;; 27 ;;
25 adds r12=IA64_TASK_THREAD_KSP_OFFSET,r13 28 mov r2=r13
29 ;;
30 addl r22=IA64_RBS_OFFSET,r2
31 ;;
32 mov ar.bspstore=r22
26 ;; 33 ;;
27 ld8 r12=[r12] // stack pointer 34 addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
28 ;; 35 ;;
36 adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
37 ;;
38 st1 [r2]=r0 // clear current->thread.on_ustack flag
29 mov loc0=r16 39 mov loc0=r16
30 movl loc1=mca_handler_bh // recovery C function 40 movl loc1=mca_handler_bh // recovery C function
31 ;; 41 ;;
@@ -34,7 +44,9 @@ GLOBAL_ENTRY(mca_handler_bhhook)
34 ;; 44 ;;
35 mov loc1=rp 45 mov loc1=rp
36 ;; 46 ;;
37 br.call.sptk.many rp=b6 // not return ... 47 ssm psr.i
48 ;;
49 br.call.sptk.many rp=b6 // does not return ...
38 ;; 50 ;;
39 mov ar.pfs=loc0 51 mov ar.pfs=loc0
40 mov rp=loc1 52 mov rp=loc1
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index 1dbc7b2497c9..f6d8a010d99b 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -41,7 +41,7 @@
41(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \ 41(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \
42(pKStk) ld8 r3 = [r3];; \ 42(pKStk) ld8 r3 = [r3];; \
43(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \ 43(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \
44(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \ 44(pKStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \
45(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ 45(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
46(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \ 46(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \
47 ;; \ 47 ;; \
@@ -50,7 +50,6 @@
50(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ 50(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
51(pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \ 51(pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \
52 ;; \ 52 ;; \
53(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
54(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ 53(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
55 ;; \ 54 ;; \
56(pUStk) mov r18=ar.bsp; \ 55(pUStk) mov r18=ar.bsp; \
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index febc091c2f02..f1aca7cffd12 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -825,14 +825,16 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind
825 * XXX Should have an arch-hook for running this after final section 825 * XXX Should have an arch-hook for running this after final section
826 * addresses have been selected... 826 * addresses have been selected...
827 */ 827 */
828 /* See if gp can cover the entire core module: */ 828 uint64_t gp;
829 uint64_t gp = (uint64_t) mod->module_core + MAX_LTOFF / 2; 829 if (mod->core_size > MAX_LTOFF)
830 if (mod->core_size >= MAX_LTOFF)
831 /* 830 /*
832 * This takes advantage of fact that SHF_ARCH_SMALL gets allocated 831 * This takes advantage of fact that SHF_ARCH_SMALL gets allocated
833 * at the end of the module. 832 * at the end of the module.
834 */ 833 */
835 gp = (uint64_t) mod->module_core + mod->core_size - MAX_LTOFF / 2; 834 gp = mod->core_size - MAX_LTOFF / 2;
835 else
836 gp = mod->core_size / 2;
837 gp = (uint64_t) mod->module_core + ((gp + 7) & -8);
836 mod->arch.gp = gp; 838 mod->arch.gp = gp;
837 DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp); 839 DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp);
838 } 840 }
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 376fcbc3f8da..6407bff6bfd7 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -11,7 +11,7 @@
11 * Version Perfmon-2.x is a rewrite of perfmon-1.x 11 * Version Perfmon-2.x is a rewrite of perfmon-1.x
12 * by Stephane Eranian, Hewlett Packard Co. 12 * by Stephane Eranian, Hewlett Packard Co.
13 * 13 *
14 * Copyright (C) 1999-2003, 2005 Hewlett Packard Co 14 * Copyright (C) 1999-2005 Hewlett Packard Co
15 * Stephane Eranian <eranian@hpl.hp.com> 15 * Stephane Eranian <eranian@hpl.hp.com>
16 * David Mosberger-Tang <davidm@hpl.hp.com> 16 * David Mosberger-Tang <davidm@hpl.hp.com>
17 * 17 *
@@ -497,6 +497,9 @@ typedef struct {
497static pfm_stats_t pfm_stats[NR_CPUS]; 497static pfm_stats_t pfm_stats[NR_CPUS];
498static pfm_session_t pfm_sessions; /* global sessions information */ 498static pfm_session_t pfm_sessions; /* global sessions information */
499 499
500static spinlock_t pfm_alt_install_check = SPIN_LOCK_UNLOCKED;
501static pfm_intr_handler_desc_t *pfm_alt_intr_handler;
502
500static struct proc_dir_entry *perfmon_dir; 503static struct proc_dir_entry *perfmon_dir;
501static pfm_uuid_t pfm_null_uuid = {0,}; 504static pfm_uuid_t pfm_null_uuid = {0,};
502 505
@@ -606,6 +609,7 @@ DEFINE_PER_CPU(unsigned long, pfm_syst_info);
606DEFINE_PER_CPU(struct task_struct *, pmu_owner); 609DEFINE_PER_CPU(struct task_struct *, pmu_owner);
607DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); 610DEFINE_PER_CPU(pfm_context_t *, pmu_ctx);
608DEFINE_PER_CPU(unsigned long, pmu_activation_number); 611DEFINE_PER_CPU(unsigned long, pmu_activation_number);
612EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info);
609 613
610 614
611/* forward declaration */ 615/* forward declaration */
@@ -1265,6 +1269,8 @@ out:
1265} 1269}
1266EXPORT_SYMBOL(pfm_unregister_buffer_fmt); 1270EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
1267 1271
1272extern void update_pal_halt_status(int);
1273
1268static int 1274static int
1269pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) 1275pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
1270{ 1276{
@@ -1311,6 +1317,11 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
1311 is_syswide, 1317 is_syswide,
1312 cpu)); 1318 cpu));
1313 1319
1320 /*
1321 * disable default_idle() to go to PAL_HALT
1322 */
1323 update_pal_halt_status(0);
1324
1314 UNLOCK_PFS(flags); 1325 UNLOCK_PFS(flags);
1315 1326
1316 return 0; 1327 return 0;
@@ -1318,7 +1329,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
1318error_conflict: 1329error_conflict:
1319 DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", 1330 DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
1320 pfm_sessions.pfs_sys_session[cpu]->pid, 1331 pfm_sessions.pfs_sys_session[cpu]->pid,
1321 smp_processor_id())); 1332 cpu));
1322abort: 1333abort:
1323 UNLOCK_PFS(flags); 1334 UNLOCK_PFS(flags);
1324 1335
@@ -1366,6 +1377,12 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
1366 is_syswide, 1377 is_syswide,
1367 cpu)); 1378 cpu));
1368 1379
1380 /*
1381 * if possible, enable default_idle() to go into PAL_HALT
1382 */
1383 if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0)
1384 update_pal_halt_status(1);
1385
1369 UNLOCK_PFS(flags); 1386 UNLOCK_PFS(flags);
1370 1387
1371 return 0; 1388 return 0;
@@ -4202,7 +4219,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
4202 DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", 4219 DPRINT(("cannot load to [%d], invalid ctx_state=%d\n",
4203 req->load_pid, 4220 req->load_pid,
4204 ctx->ctx_state)); 4221 ctx->ctx_state));
4205 return -EINVAL; 4222 return -EBUSY;
4206 } 4223 }
4207 4224
4208 DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); 4225 DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg));
@@ -4704,16 +4721,26 @@ recheck:
4704 if (task == current || ctx->ctx_fl_system) return 0; 4721 if (task == current || ctx->ctx_fl_system) return 0;
4705 4722
4706 /* 4723 /*
4707 * if context is UNLOADED we are safe to go 4724 * we are monitoring another thread
4708 */ 4725 */
4709 if (state == PFM_CTX_UNLOADED) return 0; 4726 switch(state) {
4710 4727 case PFM_CTX_UNLOADED:
4711 /* 4728 /*
4712 * no command can operate on a zombie context 4729 * if context is UNLOADED we are safe to go
4713 */ 4730 */
4714 if (state == PFM_CTX_ZOMBIE) { 4731 return 0;
4715 DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); 4732 case PFM_CTX_ZOMBIE:
4716 return -EINVAL; 4733 /*
4734 * no command can operate on a zombie context
4735 */
4736 DPRINT(("cmd %d state zombie cannot operate on context\n", cmd));
4737 return -EINVAL;
4738 case PFM_CTX_MASKED:
4739 /*
4740 * PMU state has been saved to software even though
4741 * the thread may still be running.
4742 */
4743 if (cmd != PFM_UNLOAD_CONTEXT) return 0;
4717 } 4744 }
4718 4745
4719 /* 4746 /*
@@ -5532,26 +5559,32 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
5532 int ret; 5559 int ret;
5533 5560
5534 this_cpu = get_cpu(); 5561 this_cpu = get_cpu();
5535 min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; 5562 if (likely(!pfm_alt_intr_handler)) {
5536 max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; 5563 min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
5564 max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
5537 5565
5538 start_cycles = ia64_get_itc(); 5566 start_cycles = ia64_get_itc();
5539 5567
5540 ret = pfm_do_interrupt_handler(irq, arg, regs); 5568 ret = pfm_do_interrupt_handler(irq, arg, regs);
5541 5569
5542 total_cycles = ia64_get_itc(); 5570 total_cycles = ia64_get_itc();
5543 5571
5544 /* 5572 /*
5545 * don't measure spurious interrupts 5573 * don't measure spurious interrupts
5546 */ 5574 */
5547 if (likely(ret == 0)) { 5575 if (likely(ret == 0)) {
5548 total_cycles -= start_cycles; 5576 total_cycles -= start_cycles;
5549 5577
5550 if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; 5578 if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
5551 if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; 5579 if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
5552 5580
5553 pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; 5581 pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
5582 }
5554 } 5583 }
5584 else {
5585 (*pfm_alt_intr_handler->handler)(irq, arg, regs);
5586 }
5587
5555 put_cpu_no_resched(); 5588 put_cpu_no_resched();
5556 return IRQ_HANDLED; 5589 return IRQ_HANDLED;
5557} 5590}
@@ -6402,6 +6435,141 @@ static struct irqaction perfmon_irqaction = {
6402 .name = "perfmon" 6435 .name = "perfmon"
6403}; 6436};
6404 6437
6438static void
6439pfm_alt_save_pmu_state(void *data)
6440{
6441 struct pt_regs *regs;
6442
6443 regs = ia64_task_regs(current);
6444
6445 DPRINT(("called\n"));
6446
6447 /*
6448 * should not be necessary but
6449 * let's take not risk
6450 */
6451 pfm_clear_psr_up();
6452 pfm_clear_psr_pp();
6453 ia64_psr(regs)->pp = 0;
6454
6455 /*
6456 * This call is required
6457 * May cause a spurious interrupt on some processors
6458 */
6459 pfm_freeze_pmu();
6460
6461 ia64_srlz_d();
6462}
6463
6464void
6465pfm_alt_restore_pmu_state(void *data)
6466{
6467 struct pt_regs *regs;
6468
6469 regs = ia64_task_regs(current);
6470
6471 DPRINT(("called\n"));
6472
6473 /*
6474 * put PMU back in state expected
6475 * by perfmon
6476 */
6477 pfm_clear_psr_up();
6478 pfm_clear_psr_pp();
6479 ia64_psr(regs)->pp = 0;
6480
6481 /*
6482 * perfmon runs with PMU unfrozen at all times
6483 */
6484 pfm_unfreeze_pmu();
6485
6486 ia64_srlz_d();
6487}
6488
6489int
6490pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
6491{
6492 int ret, i;
6493 int reserve_cpu;
6494
6495 /* some sanity checks */
6496 if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
6497
6498 /* do the easy test first */
6499 if (pfm_alt_intr_handler) return -EBUSY;
6500
6501 /* one at a time in the install or remove, just fail the others */
6502 if (!spin_trylock(&pfm_alt_install_check)) {
6503 return -EBUSY;
6504 }
6505
6506 /* reserve our session */
6507 for_each_online_cpu(reserve_cpu) {
6508 ret = pfm_reserve_session(NULL, 1, reserve_cpu);
6509 if (ret) goto cleanup_reserve;
6510 }
6511
6512 /* save the current system wide pmu states */
6513 ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1);
6514 if (ret) {
6515 DPRINT(("on_each_cpu() failed: %d\n", ret));
6516 goto cleanup_reserve;
6517 }
6518
6519 /* officially change to the alternate interrupt handler */
6520 pfm_alt_intr_handler = hdl;
6521
6522 spin_unlock(&pfm_alt_install_check);
6523
6524 return 0;
6525
6526cleanup_reserve:
6527 for_each_online_cpu(i) {
6528 /* don't unreserve more than we reserved */
6529 if (i >= reserve_cpu) break;
6530
6531 pfm_unreserve_session(NULL, 1, i);
6532 }
6533
6534 spin_unlock(&pfm_alt_install_check);
6535
6536 return ret;
6537}
6538EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt);
6539
6540int
6541pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
6542{
6543 int i;
6544 int ret;
6545
6546 if (hdl == NULL) return -EINVAL;
6547
6548 /* cannot remove someone else's handler! */
6549 if (pfm_alt_intr_handler != hdl) return -EINVAL;
6550
6551 /* one at a time in the install or remove, just fail the others */
6552 if (!spin_trylock(&pfm_alt_install_check)) {
6553 return -EBUSY;
6554 }
6555
6556 pfm_alt_intr_handler = NULL;
6557
6558 ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1);
6559 if (ret) {
6560 DPRINT(("on_each_cpu() failed: %d\n", ret));
6561 }
6562
6563 for_each_online_cpu(i) {
6564 pfm_unreserve_session(NULL, 1, i);
6565 }
6566
6567 spin_unlock(&pfm_alt_install_check);
6568
6569 return 0;
6570}
6571EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt);
6572
6405/* 6573/*
6406 * perfmon initialization routine, called from the initcall() table 6574 * perfmon initialization routine, called from the initcall() table
6407 */ 6575 */
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 7c43aea5f7f7..ebb71f3d6d19 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -50,7 +50,7 @@
50#include "sigframe.h" 50#include "sigframe.h"
51 51
52void (*ia64_mark_idle)(int); 52void (*ia64_mark_idle)(int);
53static cpumask_t cpu_idle_map; 53static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
54 54
55unsigned long boot_option_idle_override = 0; 55unsigned long boot_option_idle_override = 0;
56EXPORT_SYMBOL(boot_option_idle_override); 56EXPORT_SYMBOL(boot_option_idle_override);
@@ -173,7 +173,9 @@ do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall
173 ia64_do_signal(oldset, scr, in_syscall); 173 ia64_do_signal(oldset, scr, in_syscall);
174} 174}
175 175
176static int pal_halt = 1; 176static int pal_halt = 1;
177static int can_do_pal_halt = 1;
178
177static int __init nohalt_setup(char * str) 179static int __init nohalt_setup(char * str)
178{ 180{
179 pal_halt = 0; 181 pal_halt = 0;
@@ -181,16 +183,20 @@ static int __init nohalt_setup(char * str)
181} 183}
182__setup("nohalt", nohalt_setup); 184__setup("nohalt", nohalt_setup);
183 185
186void
187update_pal_halt_status(int status)
188{
189 can_do_pal_halt = pal_halt && status;
190}
191
184/* 192/*
185 * We use this if we don't have any better idle routine.. 193 * We use this if we don't have any better idle routine..
186 */ 194 */
187void 195void
188default_idle (void) 196default_idle (void)
189{ 197{
190 unsigned long pmu_active = ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_PP | IA64_PSR_UP);
191
192 while (!need_resched()) 198 while (!need_resched())
193 if (pal_halt && !pmu_active) 199 if (can_do_pal_halt)
194 safe_halt(); 200 safe_halt();
195 else 201 else
196 cpu_relax(); 202 cpu_relax();
@@ -223,20 +229,31 @@ static inline void play_dead(void)
223} 229}
224#endif /* CONFIG_HOTPLUG_CPU */ 230#endif /* CONFIG_HOTPLUG_CPU */
225 231
226
227void cpu_idle_wait(void) 232void cpu_idle_wait(void)
228{ 233{
229 int cpu; 234 unsigned int cpu, this_cpu = get_cpu();
230 cpumask_t map; 235 cpumask_t map;
231 236
232 for_each_online_cpu(cpu) 237 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
233 cpu_set(cpu, cpu_idle_map); 238 put_cpu();
234 239
235 wmb(); 240 cpus_clear(map);
236 do { 241 for_each_online_cpu(cpu) {
237 ssleep(1); 242 per_cpu(cpu_idle_state, cpu) = 1;
238 cpus_and(map, cpu_idle_map, cpu_online_map); 243 cpu_set(cpu, map);
239 } while (!cpus_empty(map)); 244 }
245
246 __get_cpu_var(cpu_idle_state) = 0;
247
248 wmb();
249 do {
250 ssleep(1);
251 for_each_online_cpu(cpu) {
252 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
253 cpu_clear(cpu, map);
254 }
255 cpus_and(map, map, cpu_online_map);
256 } while (!cpus_empty(map));
240} 257}
241EXPORT_SYMBOL_GPL(cpu_idle_wait); 258EXPORT_SYMBOL_GPL(cpu_idle_wait);
242 259
@@ -244,7 +261,6 @@ void __attribute__((noreturn))
244cpu_idle (void) 261cpu_idle (void)
245{ 262{
246 void (*mark_idle)(int) = ia64_mark_idle; 263 void (*mark_idle)(int) = ia64_mark_idle;
247 int cpu = smp_processor_id();
248 264
249 /* endless idle loop with no priority at all */ 265 /* endless idle loop with no priority at all */
250 while (1) { 266 while (1) {
@@ -255,12 +271,13 @@ cpu_idle (void)
255 while (!need_resched()) { 271 while (!need_resched()) {
256 void (*idle)(void); 272 void (*idle)(void);
257 273
274 if (__get_cpu_var(cpu_idle_state))
275 __get_cpu_var(cpu_idle_state) = 0;
276
277 rmb();
258 if (mark_idle) 278 if (mark_idle)
259 (*mark_idle)(1); 279 (*mark_idle)(1);
260 280
261 if (cpu_isset(cpu, cpu_idle_map))
262 cpu_clear(cpu, cpu_idle_map);
263 rmb();
264 idle = pm_idle; 281 idle = pm_idle;
265 if (!idle) 282 if (!idle)
266 idle = default_idle; 283 idle = default_idle;
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 9e730c7bf0cd..4c1d2f5442b3 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -635,11 +635,17 @@ ia64_flush_fph (struct task_struct *task)
635{ 635{
636 struct ia64_psr *psr = ia64_psr(ia64_task_regs(task)); 636 struct ia64_psr *psr = ia64_psr(ia64_task_regs(task));
637 637
638 /*
639 * Prevent migrating this task while
640 * we're fiddling with the FPU state
641 */
642 preempt_disable();
638 if (ia64_is_local_fpu_owner(task) && psr->mfh) { 643 if (ia64_is_local_fpu_owner(task) && psr->mfh) {
639 psr->mfh = 0; 644 psr->mfh = 0;
640 task->thread.flags |= IA64_THREAD_FPH_VALID; 645 task->thread.flags |= IA64_THREAD_FPH_VALID;
641 ia64_save_fpu(&task->thread.fph[0]); 646 ia64_save_fpu(&task->thread.fph[0]);
642 } 647 }
648 preempt_enable();
643} 649}
644 650
645/* 651/*
@@ -692,16 +698,30 @@ convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt,
692 unsigned long cfm) 698 unsigned long cfm)
693{ 699{
694 struct unw_frame_info info, prev_info; 700 struct unw_frame_info info, prev_info;
695 unsigned long ip, pr; 701 unsigned long ip, sp, pr;
696 702
697 unw_init_from_blocked_task(&info, child); 703 unw_init_from_blocked_task(&info, child);
698 while (1) { 704 while (1) {
699 prev_info = info; 705 prev_info = info;
700 if (unw_unwind(&info) < 0) 706 if (unw_unwind(&info) < 0)
701 return; 707 return;
702 if (unw_get_rp(&info, &ip) < 0) 708
709 unw_get_sp(&info, &sp);
710 if ((long)((unsigned long)child + IA64_STK_OFFSET - sp)
711 < IA64_PT_REGS_SIZE) {
712 dprintk("ptrace.%s: ran off the top of the kernel "
713 "stack\n", __FUNCTION__);
714 return;
715 }
716 if (unw_get_pr (&prev_info, &pr) < 0) {
717 unw_get_rp(&prev_info, &ip);
718 dprintk("ptrace.%s: failed to read "
719 "predicate register (ip=0x%lx)\n",
720 __FUNCTION__, ip);
703 return; 721 return;
704 if (ip < FIXADDR_USER_END) 722 }
723 if (unw_is_intr_frame(&info)
724 && (pr & (1UL << PRED_USER_STACK)))
705 break; 725 break;
706 } 726 }
707 727
@@ -1616,20 +1636,25 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
1616 long arg4, long arg5, long arg6, long arg7, 1636 long arg4, long arg5, long arg6, long arg7,
1617 struct pt_regs regs) 1637 struct pt_regs regs)
1618{ 1638{
1619 long syscall; 1639 if (test_thread_flag(TIF_SYSCALL_TRACE)
1640 && (current->ptrace & PT_PTRACED))
1641 syscall_trace();
1620 1642
1621 if (unlikely(current->audit_context)) { 1643 if (unlikely(current->audit_context)) {
1622 if (IS_IA32_PROCESS(&regs)) 1644 long syscall;
1645 int arch;
1646
1647 if (IS_IA32_PROCESS(&regs)) {
1623 syscall = regs.r1; 1648 syscall = regs.r1;
1624 else 1649 arch = AUDIT_ARCH_I386;
1650 } else {
1625 syscall = regs.r15; 1651 syscall = regs.r15;
1652 arch = AUDIT_ARCH_IA64;
1653 }
1626 1654
1627 audit_syscall_entry(current, syscall, arg0, arg1, arg2, arg3); 1655 audit_syscall_entry(current, arch, syscall, arg0, arg1, arg2, arg3);
1628 } 1656 }
1629 1657
1630 if (test_thread_flag(TIF_SYSCALL_TRACE)
1631 && (current->ptrace & PT_PTRACED))
1632 syscall_trace();
1633} 1658}
1634 1659
1635/* "asmlinkage" so the input arguments are preserved... */ 1660/* "asmlinkage" so the input arguments are preserved... */
@@ -1640,7 +1665,7 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
1640 struct pt_regs regs) 1665 struct pt_regs regs)
1641{ 1666{
1642 if (unlikely(current->audit_context)) 1667 if (unlikely(current->audit_context))
1643 audit_syscall_exit(current, regs.r8); 1668 audit_syscall_exit(current, AUDITSC_RESULT(regs.r10), regs.r8);
1644 1669
1645 if (test_thread_flag(TIF_SYSCALL_TRACE) 1670 if (test_thread_flag(TIF_SYSCALL_TRACE)
1646 && (current->ptrace & PT_PTRACED)) 1671 && (current->ptrace & PT_PTRACED))
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index b7e6b4cb374b..d14692e0920a 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -720,7 +720,8 @@ cpu_init (void)
720 ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page))); 720 ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page)));
721 721
722 /* 722 /*
723 * Initialize default control register to defer all speculative faults. The 723 * Initialize default control register to defer speculative faults except
724 * for those arising from TLB misses, which are not deferred. The
724 * kernel MUST NOT depend on a particular setting of these bits (in other words, 725 * kernel MUST NOT depend on a particular setting of these bits (in other words,
725 * the kernel must have recovery code for all speculative accesses). Turn on 726 * the kernel must have recovery code for all speculative accesses). Turn on
726 * dcr.lc as per recommendation by the architecture team. Most IA-32 apps 727 * dcr.lc as per recommendation by the architecture team. Most IA-32 apps
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 6891d86937d9..499b7e5317cf 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -224,7 +224,8 @@ ia64_rt_sigreturn (struct sigscratch *scr)
224 * could be corrupted. 224 * could be corrupted.
225 */ 225 */
226 retval = (long) &ia64_leave_kernel; 226 retval = (long) &ia64_leave_kernel;
227 if (test_thread_flag(TIF_SYSCALL_TRACE)) 227 if (test_thread_flag(TIF_SYSCALL_TRACE)
228 || test_thread_flag(TIF_SYSCALL_AUDIT))
228 /* 229 /*
229 * strace expects to be notified after sigreturn returns even though the 230 * strace expects to be notified after sigreturn returns even though the
230 * context to which we return may not be in the middle of a syscall. 231 * context to which we return may not be in the middle of a syscall.
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 0d5ee57c9865..3865f088ffa2 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -624,7 +624,7 @@ static struct {
624 __u16 thread_id; 624 __u16 thread_id;
625 __u16 proc_fixed_addr; 625 __u16 proc_fixed_addr;
626 __u8 valid; 626 __u8 valid;
627}mt_info[NR_CPUS] __devinit; 627} mt_info[NR_CPUS] __devinitdata;
628 628
629#ifdef CONFIG_HOTPLUG_CPU 629#ifdef CONFIG_HOTPLUG_CPU
630static inline void 630static inline void
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index a8cf6d8a509c..770fab37928e 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -182,13 +182,6 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un
182 } 182 }
183 } 183 }
184 184
185 /*
186 * A zero mmap always succeeds in Linux, independent of whether or not the
187 * remaining arguments are valid.
188 */
189 if (len == 0)
190 goto out;
191
192 /* Careful about overflows.. */ 185 /* Careful about overflows.. */
193 len = PAGE_ALIGN(len); 186 len = PAGE_ALIGN(len);
194 if (!len || len > TASK_SIZE) { 187 if (!len || len > TASK_SIZE) {
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index e82ad78081b3..1861173bd4f6 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -111,6 +111,24 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
111 siginfo_t siginfo; 111 siginfo_t siginfo;
112 int sig, code; 112 int sig, code;
113 113
114 /* break.b always sets cr.iim to 0, which causes problems for
115 * debuggers. Get the real break number from the original instruction,
116 * but only for kernel code. User space break.b is left alone, to
117 * preserve the existing behaviour. All break codings have the same
118 * format, so there is no need to check the slot type.
119 */
120 if (break_num == 0 && !user_mode(regs)) {
121 struct ia64_psr *ipsr = ia64_psr(regs);
122 unsigned long *bundle = (unsigned long *)regs->cr_iip;
123 unsigned long slot;
124 switch (ipsr->ri) {
125 case 0: slot = (bundle[0] >> 5); break;
126 case 1: slot = (bundle[0] >> 46) | (bundle[1] << 18); break;
127 default: slot = (bundle[1] >> 23); break;
128 }
129 break_num = ((slot >> 36 & 1) << 20) | (slot >> 6 & 0xfffff);
130 }
131
114 /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */ 132 /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */
115 siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); 133 siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
116 siginfo.si_imm = break_num; 134 siginfo.si_imm = break_num;
@@ -202,13 +220,21 @@ disabled_fph_fault (struct pt_regs *regs)
202 220
203 /* first, grant user-level access to fph partition: */ 221 /* first, grant user-level access to fph partition: */
204 psr->dfh = 0; 222 psr->dfh = 0;
223
224 /*
225 * Make sure that no other task gets in on this processor
226 * while we're claiming the FPU
227 */
228 preempt_disable();
205#ifndef CONFIG_SMP 229#ifndef CONFIG_SMP
206 { 230 {
207 struct task_struct *fpu_owner 231 struct task_struct *fpu_owner
208 = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER); 232 = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER);
209 233
210 if (ia64_is_local_fpu_owner(current)) 234 if (ia64_is_local_fpu_owner(current)) {
235 preempt_enable_no_resched();
211 return; 236 return;
237 }
212 238
213 if (fpu_owner) 239 if (fpu_owner)
214 ia64_flush_fph(fpu_owner); 240 ia64_flush_fph(fpu_owner);
@@ -226,6 +252,7 @@ disabled_fph_fault (struct pt_regs *regs)
226 */ 252 */
227 psr->mfh = 1; 253 psr->mfh = 1;
228 } 254 }
255 preempt_enable_no_resched();
229} 256}
230 257
231static inline int 258static inline int
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
index 29c802b19669..a1af9146cfdb 100644
--- a/arch/ia64/lib/flush.S
+++ b/arch/ia64/lib/flush.S
@@ -1,8 +1,8 @@
1/* 1/*
2 * Cache flushing routines. 2 * Cache flushing routines.
3 * 3 *
4 * Copyright (C) 1999-2001 Hewlett-Packard Co 4 * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co
5 * Copyright (C) 1999-2001 David Mosberger-Tang <davidm@hpl.hp.com> 5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 */ 6 */
7#include <asm/asmmacro.h> 7#include <asm/asmmacro.h>
8#include <asm/page.h> 8#include <asm/page.h>
@@ -26,7 +26,7 @@ GLOBAL_ENTRY(flush_icache_range)
26 26
27 mov ar.lc=r8 27 mov ar.lc=r8
28 ;; 28 ;;
29.Loop: fc in0 // issuable on M0 only 29.Loop: fc.i in0 // issuable on M2 only
30 add in0=32,in0 30 add in0=32,in0
31 br.cloop.sptk.few .Loop 31 br.cloop.sptk.few .Loop
32 ;; 32 ;;
diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S
index 3c2cd2f04db9..6f308e62c137 100644
--- a/arch/ia64/lib/memcpy_mck.S
+++ b/arch/ia64/lib/memcpy_mck.S
@@ -75,6 +75,7 @@ GLOBAL_ENTRY(memcpy)
75 mov f6=f0 75 mov f6=f0
76 br.cond.sptk .common_code 76 br.cond.sptk .common_code
77 ;; 77 ;;
78END(memcpy)
78GLOBAL_ENTRY(__copy_user) 79GLOBAL_ENTRY(__copy_user)
79 .prologue 80 .prologue
80// check dest alignment 81// check dest alignment
@@ -524,7 +525,6 @@ EK(.ex_handler, (p17) st8 [dst1]=r39,8); \
524#undef B 525#undef B
525#undef C 526#undef C
526#undef D 527#undef D
527END(memcpy)
528 528
529/* 529/*
530 * Due to lack of local tag support in gcc 2.x assembler, it is not clear which 530 * Due to lack of local tag support in gcc 2.x assembler, it is not clear which
diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S
index bd8cf907fe22..f26c16aefb1c 100644
--- a/arch/ia64/lib/memset.S
+++ b/arch/ia64/lib/memset.S
@@ -57,10 +57,10 @@ GLOBAL_ENTRY(memset)
57{ .mmi 57{ .mmi
58 .prologue 58 .prologue
59 alloc tmp = ar.pfs, 3, 0, 0, 0 59 alloc tmp = ar.pfs, 3, 0, 0, 0
60 .body
61 lfetch.nt1 [dest] // 60 lfetch.nt1 [dest] //
62 .save ar.lc, save_lc 61 .save ar.lc, save_lc
63 mov.i save_lc = ar.lc 62 mov.i save_lc = ar.lc
63 .body
64} { .mmi 64} { .mmi
65 mov ret0 = dest // return value 65 mov ret0 = dest // return value
66 cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero 66 cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 547785e3cba2..4eb2f52b87a1 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -305,8 +305,9 @@ setup_gate (void)
305 struct page *page; 305 struct page *page;
306 306
307 /* 307 /*
308 * Map the gate page twice: once read-only to export the ELF headers etc. and once 308 * Map the gate page twice: once read-only to export the ELF
309 * execute-only page to enable privilege-promotion via "epc": 309 * headers etc. and once execute-only page to enable
310 * privilege-promotion via "epc":
310 */ 311 */
311 page = virt_to_page(ia64_imva(__start_gate_section)); 312 page = virt_to_page(ia64_imva(__start_gate_section));
312 put_kernel_page(page, GATE_ADDR, PAGE_READONLY); 313 put_kernel_page(page, GATE_ADDR, PAGE_READONLY);
@@ -315,6 +316,20 @@ setup_gate (void)
315 put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE); 316 put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE);
316#else 317#else
317 put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE); 318 put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);
319 /* Fill in the holes (if any) with read-only zero pages: */
320 {
321 unsigned long addr;
322
323 for (addr = GATE_ADDR + PAGE_SIZE;
324 addr < GATE_ADDR + PERCPU_PAGE_SIZE;
325 addr += PAGE_SIZE)
326 {
327 put_kernel_page(ZERO_PAGE(0), addr,
328 PAGE_READONLY);
329 put_kernel_page(ZERO_PAGE(0), addr + PERCPU_PAGE_SIZE,
330 PAGE_READONLY);
331 }
332 }
318#endif 333#endif
319 ia64_patch_gate(); 334 ia64_patch_gate();
320} 335}
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
index 4f381fb25049..4351c4ff9845 100644
--- a/arch/ia64/sn/kernel/Makefile
+++ b/arch/ia64/sn/kernel/Makefile
@@ -4,10 +4,15 @@
4# License. See the file "COPYING" in the main directory of this archive 4# License. See the file "COPYING" in the main directory of this archive
5# for more details. 5# for more details.
6# 6#
7# Copyright (C) 1999,2001-2003 Silicon Graphics, Inc. All Rights Reserved. 7# Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All Rights Reserved.
8# 8#
9 9
10obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ 10obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \
11 huberror.o io_init.o iomv.o klconflib.o sn2/ 11 huberror.o io_init.o iomv.o klconflib.o sn2/
12obj-$(CONFIG_IA64_GENERIC) += machvec.o 12obj-$(CONFIG_IA64_GENERIC) += machvec.o
13obj-$(CONFIG_SGI_TIOCX) += tiocx.o 13obj-$(CONFIG_SGI_TIOCX) += tiocx.o
14obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o
15xp-y := xp_main.o xp_nofault.o
16obj-$(CONFIG_IA64_SGI_SN_XP) += xpc.o
17xpc-y := xpc_main.o xpc_channel.o xpc_partition.o
18obj-$(CONFIG_IA64_SGI_SN_XP) += xpnet.o
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 18160a06a8c9..9e07f5463f21 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -174,6 +174,12 @@ static void sn_fixup_ionodes(void)
174 if (status) 174 if (status)
175 continue; 175 continue;
176 176
177 /* Attach the error interrupt handlers */
178 if (nasid & 1)
179 ice_error_init(hubdev);
180 else
181 hub_error_init(hubdev);
182
177 for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) 183 for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++)
178 hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev; 184 hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev;
179 185
@@ -211,10 +217,6 @@ static void sn_fixup_ionodes(void)
211 sn_flush_device_list; 217 sn_flush_device_list;
212 } 218 }
213 219
214 if (!(i & 1))
215 hub_error_init(hubdev);
216 else
217 ice_error_init(hubdev);
218 } 220 }
219 221
220} 222}
diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c
index 857774bb2c9a..6546db6abdba 100644
--- a/arch/ia64/sn/kernel/mca.c
+++ b/arch/ia64/sn/kernel/mca.c
@@ -37,6 +37,11 @@ static u64 *sn_oemdata_size, sn_oemdata_bufsize;
37 * This function is the callback routine that SAL calls to log error 37 * This function is the callback routine that SAL calls to log error
38 * info for platform errors. buf is appended to sn_oemdata, resizing as 38 * info for platform errors. buf is appended to sn_oemdata, resizing as
39 * required. 39 * required.
40 * Note: this is a SAL to OS callback, running under the same rules as the SAL
41 * code. SAL calls are run with preempt disabled so this routine must not
42 * sleep. vmalloc can sleep so print_hook cannot resize the output buffer
43 * itself, instead it must set the required size and return to let the caller
44 * resize the buffer then redrive the SAL call.
40 */ 45 */
41static int print_hook(const char *fmt, ...) 46static int print_hook(const char *fmt, ...)
42{ 47{
@@ -47,18 +52,8 @@ static int print_hook(const char *fmt, ...)
47 vsnprintf(buf, sizeof(buf), fmt, args); 52 vsnprintf(buf, sizeof(buf), fmt, args);
48 va_end(args); 53 va_end(args);
49 len = strlen(buf); 54 len = strlen(buf);
50 while (*sn_oemdata_size + len + 1 > sn_oemdata_bufsize) { 55 if (*sn_oemdata_size + len <= sn_oemdata_bufsize)
51 u8 *newbuf = vmalloc(sn_oemdata_bufsize += 1000); 56 memcpy(*sn_oemdata + *sn_oemdata_size, buf, len);
52 if (!newbuf) {
53 printk(KERN_ERR "%s: unable to extend sn_oemdata\n",
54 __FUNCTION__);
55 return 0;
56 }
57 memcpy(newbuf, *sn_oemdata, *sn_oemdata_size);
58 vfree(*sn_oemdata);
59 *sn_oemdata = newbuf;
60 }
61 memcpy(*sn_oemdata + *sn_oemdata_size, buf, len + 1);
62 *sn_oemdata_size += len; 57 *sn_oemdata_size += len;
63 return 0; 58 return 0;
64} 59}
@@ -98,7 +93,20 @@ sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata,
98 sn_oemdata = oemdata; 93 sn_oemdata = oemdata;
99 sn_oemdata_size = oemdata_size; 94 sn_oemdata_size = oemdata_size;
100 sn_oemdata_bufsize = 0; 95 sn_oemdata_bufsize = 0;
101 ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header); 96 *sn_oemdata_size = PAGE_SIZE; /* first guess at how much data will be generated */
97 while (*sn_oemdata_size > sn_oemdata_bufsize) {
98 u8 *newbuf = vmalloc(*sn_oemdata_size);
99 if (!newbuf) {
100 printk(KERN_ERR "%s: unable to extend sn_oemdata\n",
101 __FUNCTION__);
102 return 1;
103 }
104 vfree(*sn_oemdata);
105 *sn_oemdata = newbuf;
106 sn_oemdata_bufsize = *sn_oemdata_size;
107 *sn_oemdata_size = 0;
108 ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header);
109 }
102 up(&sn_oemdata_mutex); 110 up(&sn_oemdata_mutex);
103 return 0; 111 return 0;
104} 112}
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index d35f2a6f9c94..44bfc7f318cb 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -3,7 +3,7 @@
3 * License. See the file "COPYING" in the main directory of this archive 3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details. 4 * for more details.
5 * 5 *
6 * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All rights reserved. 6 * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
7 */ 7 */
8 8
9#include <linux/config.h> 9#include <linux/config.h>
@@ -73,6 +73,12 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
73DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); 73DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
74EXPORT_PER_CPU_SYMBOL(__sn_hub_info); 74EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
75 75
76DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]);
77EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid);
78
79DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda);
80EXPORT_PER_CPU_SYMBOL(__sn_nodepda);
81
76partid_t sn_partid = -1; 82partid_t sn_partid = -1;
77EXPORT_SYMBOL(sn_partid); 83EXPORT_SYMBOL(sn_partid);
78char sn_system_serial_number_string[128]; 84char sn_system_serial_number_string[128];
@@ -216,7 +222,7 @@ void __init early_sn_setup(void)
216 222
217extern int platform_intr_list[]; 223extern int platform_intr_list[];
218extern nasid_t master_nasid; 224extern nasid_t master_nasid;
219static int shub_1_1_found __initdata; 225static int __initdata shub_1_1_found = 0;
220 226
221/* 227/*
222 * sn_check_for_wars 228 * sn_check_for_wars
@@ -245,7 +251,7 @@ static void __init sn_check_for_wars(void)
245 } else { 251 } else {
246 for_each_online_node(cnode) { 252 for_each_online_node(cnode) {
247 if (is_shub_1_1(cnodeid_to_nasid(cnode))) 253 if (is_shub_1_1(cnodeid_to_nasid(cnode)))
248 sn_hub_info->shub_1_1_found = 1; 254 shub_1_1_found = 1;
249 } 255 }
250 } 256 }
251} 257}
@@ -265,6 +271,8 @@ void __init sn_setup(char **cmdline_p)
265 int major = sn_sal_rev_major(), minor = sn_sal_rev_minor(); 271 int major = sn_sal_rev_major(), minor = sn_sal_rev_minor();
266 extern void sn_cpu_init(void); 272 extern void sn_cpu_init(void);
267 273
274 ia64_sn_plat_set_error_handling_features();
275
268 /* 276 /*
269 * If the generic code has enabled vga console support - lets 277 * If the generic code has enabled vga console support - lets
270 * get rid of it again. This is a kludge for the fact that ACPI 278 * get rid of it again. This is a kludge for the fact that ACPI
@@ -373,11 +381,11 @@ static void __init sn_init_pdas(char **cmdline_p)
373{ 381{
374 cnodeid_t cnode; 382 cnodeid_t cnode;
375 383
376 memset(pda->cnodeid_to_nasid_table, -1, 384 memset(sn_cnodeid_to_nasid, -1,
377 sizeof(pda->cnodeid_to_nasid_table)); 385 sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
378 for_each_online_node(cnode) 386 for_each_online_node(cnode)
379 pda->cnodeid_to_nasid_table[cnode] = 387 sn_cnodeid_to_nasid[cnode] =
380 pxm_to_nasid(nid_to_pxm_map[cnode]); 388 pxm_to_nasid(nid_to_pxm_map[cnode]);
381 389
382 numionodes = num_online_nodes(); 390 numionodes = num_online_nodes();
383 scan_for_ionodes(); 391 scan_for_ionodes();
@@ -477,7 +485,8 @@ void __init sn_cpu_init(void)
477 485
478 cnode = nasid_to_cnodeid(nasid); 486 cnode = nasid_to_cnodeid(nasid);
479 487
480 pda->p_nodepda = nodepdaindr[cnode]; 488 sn_nodepda = nodepdaindr[cnode];
489
481 pda->led_address = 490 pda->led_address =
482 (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT)); 491 (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT));
483 pda->led_state = LED_ALWAYS_SET; 492 pda->led_state = LED_ALWAYS_SET;
@@ -486,15 +495,18 @@ void __init sn_cpu_init(void)
486 pda->idle_flag = 0; 495 pda->idle_flag = 0;
487 496
488 if (cpuid != 0) { 497 if (cpuid != 0) {
489 memcpy(pda->cnodeid_to_nasid_table, 498 /* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */
490 pdacpu(0)->cnodeid_to_nasid_table, 499 memcpy(sn_cnodeid_to_nasid,
491 sizeof(pda->cnodeid_to_nasid_table)); 500 (&per_cpu(__sn_cnodeid_to_nasid, 0)),
501 sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
492 } 502 }
493 503
494 /* 504 /*
495 * Check for WARs. 505 * Check for WARs.
496 * Only needs to be done once, on BSP. 506 * Only needs to be done once, on BSP.
497 * Has to be done after loop above, because it uses pda.cnodeid_to_nasid_table[i]. 507 * Has to be done after loop above, because it uses this cpu's
508 * sn_cnodeid_to_nasid table which was just initialized if this
509 * isn't cpu 0.
498 * Has to be done before assignment below. 510 * Has to be done before assignment below.
499 */ 511 */
500 if (!wars_have_been_checked) { 512 if (!wars_have_been_checked) {
@@ -580,8 +592,7 @@ static void __init scan_for_ionodes(void)
580 brd = find_lboard_any(brd, KLTYPE_SNIA); 592 brd = find_lboard_any(brd, KLTYPE_SNIA);
581 593
582 while (brd) { 594 while (brd) {
583 pda->cnodeid_to_nasid_table[numionodes] = 595 sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid;
584 brd->brd_nasid;
585 physical_node_map[brd->brd_nasid] = numionodes; 596 physical_node_map[brd->brd_nasid] = numionodes;
586 root_lboard[numionodes] = brd; 597 root_lboard[numionodes] = brd;
587 numionodes++; 598 numionodes++;
@@ -602,8 +613,7 @@ static void __init scan_for_ionodes(void)
602 root_lboard[nasid_to_cnodeid(nasid)], 613 root_lboard[nasid_to_cnodeid(nasid)],
603 KLTYPE_TIO); 614 KLTYPE_TIO);
604 while (brd) { 615 while (brd) {
605 pda->cnodeid_to_nasid_table[numionodes] = 616 sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid;
606 brd->brd_nasid;
607 physical_node_map[brd->brd_nasid] = numionodes; 617 physical_node_map[brd->brd_nasid] = numionodes;
608 root_lboard[numionodes] = brd; 618 root_lboard[numionodes] = brd;
609 numionodes++; 619 numionodes++;
@@ -614,7 +624,6 @@ static void __init scan_for_ionodes(void)
614 brd = find_lboard_any(brd, KLTYPE_TIO); 624 brd = find_lboard_any(brd, KLTYPE_TIO);
615 } 625 }
616 } 626 }
617
618} 627}
619 628
620int 629int
@@ -623,7 +632,8 @@ nasid_slice_to_cpuid(int nasid, int slice)
623 long cpu; 632 long cpu;
624 633
625 for (cpu=0; cpu < NR_CPUS; cpu++) 634 for (cpu=0; cpu < NR_CPUS; cpu++)
626 if (nodepda->phys_cpuid[cpu].nasid == nasid && nodepda->phys_cpuid[cpu].slice == slice) 635 if (cpuid_to_nasid(cpu) == nasid &&
636 cpuid_to_slice(cpu) == slice)
627 return cpu; 637 return cpu;
628 638
629 return -1; 639 return -1;
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index 66190d7e492d..ab9b5f35c2a7 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -21,6 +21,8 @@
21#include <asm/sn/types.h> 21#include <asm/sn/types.h>
22#include <asm/sn/shubio.h> 22#include <asm/sn/shubio.h>
23#include <asm/sn/tiocx.h> 23#include <asm/sn/tiocx.h>
24#include <asm/sn/l1.h>
25#include <asm/sn/module.h>
24#include "tio.h" 26#include "tio.h"
25#include "xtalk/xwidgetdev.h" 27#include "xtalk/xwidgetdev.h"
26#include "xtalk/hubdev.h" 28#include "xtalk/hubdev.h"
@@ -308,14 +310,12 @@ void tiocx_irq_free(struct sn_irq_info *sn_irq_info)
308 } 310 }
309} 311}
310 312
311uint64_t 313uint64_t tiocx_dma_addr(uint64_t addr)
312tiocx_dma_addr(uint64_t addr)
313{ 314{
314 return PHYS_TO_TIODMA(addr); 315 return PHYS_TO_TIODMA(addr);
315} 316}
316 317
317uint64_t 318uint64_t tiocx_swin_base(int nasid)
318tiocx_swin_base(int nasid)
319{ 319{
320 return TIO_SWIN_BASE(nasid, TIOCX_CORELET); 320 return TIO_SWIN_BASE(nasid, TIOCX_CORELET);
321} 321}
@@ -330,19 +330,6 @@ EXPORT_SYMBOL(tiocx_bus_type);
330EXPORT_SYMBOL(tiocx_dma_addr); 330EXPORT_SYMBOL(tiocx_dma_addr);
331EXPORT_SYMBOL(tiocx_swin_base); 331EXPORT_SYMBOL(tiocx_swin_base);
332 332
333static uint64_t tiocx_get_hubdev_info(u64 handle, u64 address)
334{
335
336 struct ia64_sal_retval ret_stuff;
337 ret_stuff.status = 0;
338 ret_stuff.v0 = 0;
339
340 ia64_sal_oemcall_nolock(&ret_stuff,
341 SN_SAL_IOIF_GET_HUBDEV_INFO,
342 handle, address, 0, 0, 0, 0, 0);
343 return ret_stuff.v0;
344}
345
346static void tio_conveyor_set(nasid_t nasid, int enable_flag) 333static void tio_conveyor_set(nasid_t nasid, int enable_flag)
347{ 334{
348 uint64_t ice_frz; 335 uint64_t ice_frz;
@@ -379,7 +366,29 @@ static void tio_corelet_reset(nasid_t nasid, int corelet)
379 udelay(2000); 366 udelay(2000);
380} 367}
381 368
382static int fpga_attached(nasid_t nasid) 369static int tiocx_btchar_get(int nasid)
370{
371 moduleid_t module_id;
372 geoid_t geoid;
373 int cnodeid;
374
375 cnodeid = nasid_to_cnodeid(nasid);
376 geoid = cnodeid_get_geoid(cnodeid);
377 module_id = geo_module(geoid);
378 return MODULE_GET_BTCHAR(module_id);
379}
380
381static int is_fpga_brick(int nasid)
382{
383 switch (tiocx_btchar_get(nasid)) {
384 case L1_BRICKTYPE_SA:
385 case L1_BRICKTYPE_ATHENA:
386 return 1;
387 }
388 return 0;
389}
390
391static int bitstream_loaded(nasid_t nasid)
383{ 392{
384 uint64_t cx_credits; 393 uint64_t cx_credits;
385 394
@@ -396,7 +405,7 @@ static int tiocx_reload(struct cx_dev *cx_dev)
396 int mfg_num = CX_DEV_NONE; 405 int mfg_num = CX_DEV_NONE;
397 nasid_t nasid = cx_dev->cx_id.nasid; 406 nasid_t nasid = cx_dev->cx_id.nasid;
398 407
399 if (fpga_attached(nasid)) { 408 if (bitstream_loaded(nasid)) {
400 uint64_t cx_id; 409 uint64_t cx_id;
401 410
402 cx_id = 411 cx_id =
@@ -427,9 +436,10 @@ static ssize_t show_cxdev_control(struct device *dev, char *buf)
427{ 436{
428 struct cx_dev *cx_dev = to_cx_dev(dev); 437 struct cx_dev *cx_dev = to_cx_dev(dev);
429 438
430 return sprintf(buf, "0x%x 0x%x 0x%x\n", 439 return sprintf(buf, "0x%x 0x%x 0x%x %d\n",
431 cx_dev->cx_id.nasid, 440 cx_dev->cx_id.nasid,
432 cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num); 441 cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num,
442 tiocx_btchar_get(cx_dev->cx_id.nasid));
433} 443}
434 444
435static ssize_t store_cxdev_control(struct device *dev, const char *buf, 445static ssize_t store_cxdev_control(struct device *dev, const char *buf,
@@ -475,20 +485,14 @@ static int __init tiocx_init(void)
475 if ((nasid = cnodeid_to_nasid(cnodeid)) < 0) 485 if ((nasid = cnodeid_to_nasid(cnodeid)) < 0)
476 break; /* No more nasids .. bail out of loop */ 486 break; /* No more nasids .. bail out of loop */
477 487
478 if (nasid & 0x1) { /* TIO's are always odd */ 488 if ((nasid & 0x1) && is_fpga_brick(nasid)) {
479 struct hubdev_info *hubdev; 489 struct hubdev_info *hubdev;
480 uint64_t status;
481 struct xwidget_info *widgetp; 490 struct xwidget_info *widgetp;
482 491
483 DBG("Found TIO at nasid 0x%x\n", nasid); 492 DBG("Found TIO at nasid 0x%x\n", nasid);
484 493
485 hubdev = 494 hubdev =
486 (struct hubdev_info *)(NODEPDA(cnodeid)->pdinfo); 495 (struct hubdev_info *)(NODEPDA(cnodeid)->pdinfo);
487 status =
488 tiocx_get_hubdev_info(nasid,
489 (uint64_t) __pa(hubdev));
490 if (status)
491 continue;
492 496
493 widgetp = &hubdev->hdi_xwidget_info[TIOCX_CORELET]; 497 widgetp = &hubdev->hdi_xwidget_info[TIOCX_CORELET];
494 498
diff --git a/arch/ia64/sn/kernel/xp_main.c b/arch/ia64/sn/kernel/xp_main.c
new file mode 100644
index 000000000000..3be52a34c80f
--- /dev/null
+++ b/arch/ia64/sn/kernel/xp_main.c
@@ -0,0 +1,289 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9
10/*
11 * Cross Partition (XP) base.
12 *
13 * XP provides a base from which its users can interact
14 * with XPC, yet not be dependent on XPC.
15 *
16 */
17
18
19#include <linux/kernel.h>
20#include <linux/interrupt.h>
21#include <linux/module.h>
22#include <asm/sn/intr.h>
23#include <asm/sn/sn_sal.h>
24#include <asm/sn/xp.h>
25
26
27/*
28 * Target of nofault PIO read.
29 */
30u64 xp_nofault_PIOR_target;
31
32
33/*
34 * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
35 * users of XPC.
36 */
37struct xpc_registration xpc_registrations[XPC_NCHANNELS];
38
39
40/*
41 * Initialize the XPC interface to indicate that XPC isn't loaded.
42 */
43static enum xpc_retval xpc_notloaded(void) { return xpcNotLoaded; }
44
45struct xpc_interface xpc_interface = {
46 (void (*)(int)) xpc_notloaded,
47 (void (*)(int)) xpc_notloaded,
48 (enum xpc_retval (*)(partid_t, int, u32, void **)) xpc_notloaded,
49 (enum xpc_retval (*)(partid_t, int, void *)) xpc_notloaded,
50 (enum xpc_retval (*)(partid_t, int, void *, xpc_notify_func, void *))
51 xpc_notloaded,
52 (void (*)(partid_t, int, void *)) xpc_notloaded,
53 (enum xpc_retval (*)(partid_t, void *)) xpc_notloaded
54};
55
56
57/*
58 * XPC calls this when it (the XPC module) has been loaded.
59 */
60void
61xpc_set_interface(void (*connect)(int),
62 void (*disconnect)(int),
63 enum xpc_retval (*allocate)(partid_t, int, u32, void **),
64 enum xpc_retval (*send)(partid_t, int, void *),
65 enum xpc_retval (*send_notify)(partid_t, int, void *,
66 xpc_notify_func, void *),
67 void (*received)(partid_t, int, void *),
68 enum xpc_retval (*partid_to_nasids)(partid_t, void *))
69{
70 xpc_interface.connect = connect;
71 xpc_interface.disconnect = disconnect;
72 xpc_interface.allocate = allocate;
73 xpc_interface.send = send;
74 xpc_interface.send_notify = send_notify;
75 xpc_interface.received = received;
76 xpc_interface.partid_to_nasids = partid_to_nasids;
77}
78
79
80/*
81 * XPC calls this when it (the XPC module) is being unloaded.
82 */
83void
84xpc_clear_interface(void)
85{
86 xpc_interface.connect = (void (*)(int)) xpc_notloaded;
87 xpc_interface.disconnect = (void (*)(int)) xpc_notloaded;
88 xpc_interface.allocate = (enum xpc_retval (*)(partid_t, int, u32,
89 void **)) xpc_notloaded;
90 xpc_interface.send = (enum xpc_retval (*)(partid_t, int, void *))
91 xpc_notloaded;
92 xpc_interface.send_notify = (enum xpc_retval (*)(partid_t, int, void *,
93 xpc_notify_func, void *)) xpc_notloaded;
94 xpc_interface.received = (void (*)(partid_t, int, void *))
95 xpc_notloaded;
96 xpc_interface.partid_to_nasids = (enum xpc_retval (*)(partid_t, void *))
97 xpc_notloaded;
98}
99
100
101/*
102 * Register for automatic establishment of a channel connection whenever
103 * a partition comes up.
104 *
105 * Arguments:
106 *
107 * ch_number - channel # to register for connection.
108 * func - function to call for asynchronous notification of channel
109 * state changes (i.e., connection, disconnection, error) and
110 * the arrival of incoming messages.
111 * key - pointer to optional user-defined value that gets passed back
112 * to the user on any callouts made to func.
113 * payload_size - size in bytes of the XPC message's payload area which
114 * contains a user-defined message. The user should make
115 * this large enough to hold their largest message.
116 * nentries - max #of XPC message entries a message queue can contain.
117 * The actual number, which is determined when a connection
118 * is established and may be less then requested, will be
119 * passed to the user via the xpcConnected callout.
120 * assigned_limit - max number of kthreads allowed to be processing
121 * messages (per connection) at any given instant.
122 * idle_limit - max number of kthreads allowed to be idle at any given
123 * instant.
124 */
125enum xpc_retval
126xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
127 u16 nentries, u32 assigned_limit, u32 idle_limit)
128{
129 struct xpc_registration *registration;
130
131
132 DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
133 DBUG_ON(payload_size == 0 || nentries == 0);
134 DBUG_ON(func == NULL);
135 DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
136
137 registration = &xpc_registrations[ch_number];
138
139 if (down_interruptible(&registration->sema) != 0) {
140 return xpcInterrupted;
141 }
142
143 /* if XPC_CHANNEL_REGISTERED(ch_number) */
144 if (registration->func != NULL) {
145 up(&registration->sema);
146 return xpcAlreadyRegistered;
147 }
148
149 /* register the channel for connection */
150 registration->msg_size = XPC_MSG_SIZE(payload_size);
151 registration->nentries = nentries;
152 registration->assigned_limit = assigned_limit;
153 registration->idle_limit = idle_limit;
154 registration->key = key;
155 registration->func = func;
156
157 up(&registration->sema);
158
159 xpc_interface.connect(ch_number);
160
161 return xpcSuccess;
162}
163
164
165/*
166 * Remove the registration for automatic connection of the specified channel
167 * when a partition comes up.
168 *
169 * Before returning this xpc_disconnect() will wait for all connections on the
170 * specified channel have been closed/torndown. So the caller can be assured
171 * that they will not be receiving any more callouts from XPC to their
172 * function registered via xpc_connect().
173 *
174 * Arguments:
175 *
176 * ch_number - channel # to unregister.
177 */
178void
179xpc_disconnect(int ch_number)
180{
181 struct xpc_registration *registration;
182
183
184 DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
185
186 registration = &xpc_registrations[ch_number];
187
188 /*
189 * We've decided not to make this a down_interruptible(), since we
190 * figured XPC's users will just turn around and call xpc_disconnect()
191 * again anyways, so we might as well wait, if need be.
192 */
193 down(&registration->sema);
194
195 /* if !XPC_CHANNEL_REGISTERED(ch_number) */
196 if (registration->func == NULL) {
197 up(&registration->sema);
198 return;
199 }
200
201 /* remove the connection registration for the specified channel */
202 registration->func = NULL;
203 registration->key = NULL;
204 registration->nentries = 0;
205 registration->msg_size = 0;
206 registration->assigned_limit = 0;
207 registration->idle_limit = 0;
208
209 xpc_interface.disconnect(ch_number);
210
211 up(&registration->sema);
212
213 return;
214}
215
216
217int __init
218xp_init(void)
219{
220 int ret, ch_number;
221 u64 func_addr = *(u64 *) xp_nofault_PIOR;
222 u64 err_func_addr = *(u64 *) xp_error_PIOR;
223
224
225 if (!ia64_platform_is("sn2")) {
226 return -ENODEV;
227 }
228
229 /*
230 * Register a nofault code region which performs a cross-partition
231 * PIO read. If the PIO read times out, the MCA handler will consume
232 * the error and return to a kernel-provided instruction to indicate
233 * an error. This PIO read exists because it is guaranteed to timeout
234 * if the destination is down (AMO operations do not timeout on at
235 * least some CPUs on Shubs <= v1.2, which unfortunately we have to
236 * work around).
237 */
238 if ((ret = sn_register_nofault_code(func_addr, err_func_addr,
239 err_func_addr, 1, 1)) != 0) {
240 printk(KERN_ERR "XP: can't register nofault code, error=%d\n",
241 ret);
242 }
243 /*
244 * Setup the nofault PIO read target. (There is no special reason why
245 * SH_IPI_ACCESS was selected.)
246 */
247 if (is_shub2()) {
248 xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
249 } else {
250 xp_nofault_PIOR_target = SH1_IPI_ACCESS;
251 }
252
253 /* initialize the connection registration semaphores */
254 for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
255 sema_init(&xpc_registrations[ch_number].sema, 1); /* mutex */
256 }
257
258 return 0;
259}
260module_init(xp_init);
261
262
263void __exit
264xp_exit(void)
265{
266 u64 func_addr = *(u64 *) xp_nofault_PIOR;
267 u64 err_func_addr = *(u64 *) xp_error_PIOR;
268
269
270 /* unregister the PIO read nofault code region */
271 (void) sn_register_nofault_code(func_addr, err_func_addr,
272 err_func_addr, 1, 0);
273}
274module_exit(xp_exit);
275
276
277MODULE_AUTHOR("Silicon Graphics, Inc.");
278MODULE_DESCRIPTION("Cross Partition (XP) base");
279MODULE_LICENSE("GPL");
280
281EXPORT_SYMBOL(xp_nofault_PIOR);
282EXPORT_SYMBOL(xp_nofault_PIOR_target);
283EXPORT_SYMBOL(xpc_registrations);
284EXPORT_SYMBOL(xpc_interface);
285EXPORT_SYMBOL(xpc_clear_interface);
286EXPORT_SYMBOL(xpc_set_interface);
287EXPORT_SYMBOL(xpc_connect);
288EXPORT_SYMBOL(xpc_disconnect);
289
diff --git a/arch/ia64/sn/kernel/xp_nofault.S b/arch/ia64/sn/kernel/xp_nofault.S
new file mode 100644
index 000000000000..b772543053c9
--- /dev/null
+++ b/arch/ia64/sn/kernel/xp_nofault.S
@@ -0,0 +1,31 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9
10/*
11 * The xp_nofault_PIOR function takes a pointer to a remote PIO register
12 * and attempts to load and consume a value from it. This function
13 * will be registered as a nofault code block. In the event that the
14 * PIO read fails, the MCA handler will force the error to look
15 * corrected and vector to the xp_error_PIOR which will return an error.
16 *
17 * extern int xp_nofault_PIOR(void *remote_register);
18 */
19
20 .global xp_nofault_PIOR
21xp_nofault_PIOR:
22 mov r8=r0 // Stage a success return value
23 ld8.acq r9=[r32];; // PIO Read the specified register
24 adds r9=1,r9 // Add to force a consume
25 br.ret.sptk.many b0;; // Return success
26
27 .global xp_error_PIOR
28xp_error_PIOR:
29 mov r8=1 // Return value of 1
30 br.ret.sptk.many b0;; // Return failure
31
diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
new file mode 100644
index 000000000000..1a0aed8490d1
--- /dev/null
+++ b/arch/ia64/sn/kernel/xpc.h
@@ -0,0 +1,991 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9
10/*
11 * Cross Partition Communication (XPC) structures and macros.
12 */
13
14#ifndef _IA64_SN_KERNEL_XPC_H
15#define _IA64_SN_KERNEL_XPC_H
16
17
18#include <linux/config.h>
19#include <linux/interrupt.h>
20#include <linux/sysctl.h>
21#include <linux/device.h>
22#include <asm/pgtable.h>
23#include <asm/processor.h>
24#include <asm/sn/bte.h>
25#include <asm/sn/clksupport.h>
26#include <asm/sn/addrs.h>
27#include <asm/sn/mspec.h>
28#include <asm/sn/shub_mmr.h>
29#include <asm/sn/xp.h>
30
31
32/*
33 * XPC Version numbers consist of a major and minor number. XPC can always
34 * talk to versions with same major #, and never talk to versions with a
35 * different major #.
36 */
37#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf))
38#define XPC_VERSION_MAJOR(_v) ((_v) >> 4)
39#define XPC_VERSION_MINOR(_v) ((_v) & 0xf)
40
41
42/*
43 * The next macros define word or bit representations for given
44 * C-brick nasid in either the SAL provided bit array representing
45 * nasids in the partition/machine or the AMO_t array used for
46 * inter-partition initiation communications.
47 *
48 * For SN2 machines, C-Bricks are alway even numbered NASIDs. As
49 * such, some space will be saved by insisting that nasid information
50 * passed from SAL always be packed for C-Bricks and the
51 * cross-partition interrupts use the same packing scheme.
52 */
53#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2)
54#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1))
55#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \
56 (1UL << XPC_NASID_B_INDEX(_n)))
57#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
58
59#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
60#define XPC_HB_CHECK_DEFAULT_TIMEOUT 20 /* check HB every x secs */
61
62/* define the process name of HB checker and the CPU it is pinned to */
63#define XPC_HB_CHECK_THREAD_NAME "xpc_hb"
64#define XPC_HB_CHECK_CPU 0
65
66/* define the process name of the discovery thread */
67#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery"
68
69
70#define XPC_HB_ALLOWED(_p, _v) ((_v)->heartbeating_to_mask & (1UL << (_p)))
71#define XPC_ALLOW_HB(_p, _v) (_v)->heartbeating_to_mask |= (1UL << (_p))
72#define XPC_DISALLOW_HB(_p, _v) (_v)->heartbeating_to_mask &= (~(1UL << (_p)))
73
74
75/*
76 * Reserved Page provided by SAL.
77 *
78 * SAL provides one page per partition of reserved memory. When SAL
79 * initialization is complete, SAL_signature, SAL_version, partid,
80 * part_nasids, and mach_nasids are set.
81 *
82 * Note: Until vars_pa is set, the partition XPC code has not been initialized.
83 */
84struct xpc_rsvd_page {
85 u64 SAL_signature; /* SAL unique signature */
86 u64 SAL_version; /* SAL specified version */
87 u8 partid; /* partition ID from SAL */
88 u8 version;
89 u8 pad[6]; /* pad to u64 align */
90 u64 vars_pa;
91 u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned;
92 u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned;
93};
94#define XPC_RP_VERSION _XPC_VERSION(1,0) /* version 1.0 of the reserved page */
95
96#define XPC_RSVD_PAGE_ALIGNED_SIZE \
97 (L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)))
98
99
100/*
101 * Define the structures by which XPC variables can be exported to other
102 * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
103 */
104
105/*
106 * The following structure describes the partition generic variables
107 * needed by other partitions in order to properly initialize.
108 *
109 * struct xpc_vars version number also applies to struct xpc_vars_part.
110 * Changes to either structure and/or related functionality should be
111 * reflected by incrementing either the major or minor version numbers
112 * of struct xpc_vars.
113 */
114struct xpc_vars {
115 u8 version;
116 u64 heartbeat;
117 u64 heartbeating_to_mask;
118 u64 kdb_status; /* 0 = machine running */
119 int act_nasid;
120 int act_phys_cpuid;
121 u64 vars_part_pa;
122 u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */
123 AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */
124 AMO_t *act_amos; /* pointer to the first activation AMO */
125};
126#define XPC_V_VERSION _XPC_VERSION(3,0) /* version 3.0 of the cross vars */
127
128#define XPC_VARS_ALIGNED_SIZE (L1_CACHE_ALIGN(sizeof(struct xpc_vars)))
129
130/*
131 * The following structure describes the per partition specific variables.
132 *
133 * An array of these structures, one per partition, will be defined. As a
134 * partition becomes active XPC will copy the array entry corresponding to
135 * itself from that partition. It is desirable that the size of this
136 * structure evenly divide into a cacheline, such that none of the entries
137 * in this array crosses a cacheline boundary. As it is now, each entry
138 * occupies half a cacheline.
139 */
140struct xpc_vars_part {
141 u64 magic;
142
143 u64 openclose_args_pa; /* physical address of open and close args */
144 u64 GPs_pa; /* physical address of Get/Put values */
145
146 u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */
147 int IPI_nasid; /* nasid of where to send IPIs */
148 int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */
149
150 u8 nchannels; /* #of defined channels supported */
151
152 u8 reserved[23]; /* pad to a full 64 bytes */
153};
154
155/*
156 * The vars_part MAGIC numbers play a part in the first contact protocol.
157 *
158 * MAGIC1 indicates that the per partition specific variables for a remote
159 * partition have been initialized by this partition.
160 *
161 * MAGIC2 indicates that this partition has pulled the remote partititions
162 * per partition variables that pertain to this partition.
163 */
164#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
165#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
166
167
168
169/*
170 * Functions registered by add_timer() or called by kernel_thread() only
171 * allow for a single 64-bit argument. The following macros can be used to
172 * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from
173 * the passed argument.
174 */
175#define XPC_PACK_ARGS(_arg1, _arg2) \
176 ((((u64) _arg1) & 0xffffffff) | \
177 ((((u64) _arg2) & 0xffffffff) << 32))
178
179#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff)
180#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff)
181
182
183
184/*
185 * Define a Get/Put value pair (pointers) used with a message queue.
186 */
187struct xpc_gp {
188 s64 get; /* Get value */
189 s64 put; /* Put value */
190};
191
192#define XPC_GP_SIZE \
193 L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS)
194
195
196
197/*
198 * Define a structure that contains arguments associated with opening and
199 * closing a channel.
200 */
201struct xpc_openclose_args {
202 u16 reason; /* reason why channel is closing */
203 u16 msg_size; /* sizeof each message entry */
204 u16 remote_nentries; /* #of message entries in remote msg queue */
205 u16 local_nentries; /* #of message entries in local msg queue */
206 u64 local_msgqueue_pa; /* physical address of local message queue */
207};
208
209#define XPC_OPENCLOSE_ARGS_SIZE \
210 L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS)
211
212
213
214/* struct xpc_msg flags */
215
216#define XPC_M_DONE 0x01 /* msg has been received/consumed */
217#define XPC_M_READY 0x02 /* msg is ready to be sent */
218#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */
219
220
221#define XPC_MSG_ADDRESS(_payload) \
222 ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET))
223
224
225
226/*
227 * Defines notify entry.
228 *
229 * This is used to notify a message's sender that their message was received
230 * and consumed by the intended recipient.
231 */
232struct xpc_notify {
233 struct semaphore sema; /* notify semaphore */
234 u8 type; /* type of notification */
235
236 /* the following two fields are only used if type == XPC_N_CALL */
237 xpc_notify_func func; /* user's notify function */
238 void *key; /* pointer to user's key */
239};
240
241/* struct xpc_notify type of notification */
242
243#define XPC_N_CALL 0x01 /* notify function provided by user */
244
245
246
247/*
248 * Define the structure that manages all the stuff required by a channel. In
249 * particular, they are used to manage the messages sent across the channel.
250 *
251 * This structure is private to a partition, and is NOT shared across the
252 * partition boundary.
253 *
254 * There is an array of these structures for each remote partition. It is
255 * allocated at the time a partition becomes active. The array contains one
256 * of these structures for each potential channel connection to that partition.
257 *
258 * Each of these structures manages two message queues (circular buffers).
259 * They are allocated at the time a channel connection is made. One of
260 * these message queues (local_msgqueue) holds the locally created messages
261 * that are destined for the remote partition. The other of these message
262 * queues (remote_msgqueue) is a locally cached copy of the remote partition's
263 * own local_msgqueue.
264 *
265 * The following is a description of the Get/Put pointers used to manage these
266 * two message queues. Consider the local_msgqueue to be on one partition
267 * and the remote_msgqueue to be its cached copy on another partition. A
268 * description of what each of the lettered areas contains is included.
269 *
270 *
271 * local_msgqueue remote_msgqueue
272 *
273 * |/////////| |/////////|
274 * w_remote_GP.get --> +---------+ |/////////|
275 * | F | |/////////|
276 * remote_GP.get --> +---------+ +---------+ <-- local_GP->get
277 * | | | |
278 * | | | E |
279 * | | | |
280 * | | +---------+ <-- w_local_GP.get
281 * | B | |/////////|
282 * | | |////D////|
283 * | | |/////////|
284 * | | +---------+ <-- w_remote_GP.put
285 * | | |////C////|
286 * local_GP->put --> +---------+ +---------+ <-- remote_GP.put
287 * | | |/////////|
288 * | A | |/////////|
289 * | | |/////////|
290 * w_local_GP.put --> +---------+ |/////////|
291 * |/////////| |/////////|
292 *
293 *
294 * ( remote_GP.[get|put] are cached copies of the remote
295 * partition's local_GP->[get|put], and thus their values can
296 * lag behind their counterparts on the remote partition. )
297 *
298 *
299 * A - Messages that have been allocated, but have not yet been sent to the
300 * remote partition.
301 *
302 * B - Messages that have been sent, but have not yet been acknowledged by the
303 * remote partition as having been received.
304 *
305 * C - Area that needs to be prepared for the copying of sent messages, by
306 * the clearing of the message flags of any previously received messages.
307 *
308 * D - Area into which sent messages are to be copied from the remote
309 * partition's local_msgqueue and then delivered to their intended
310 * recipients. [ To allow for a multi-message copy, another pointer
311 * (next_msg_to_pull) has been added to keep track of the next message
312 * number needing to be copied (pulled). It chases after w_remote_GP.put.
313 * Any messages lying between w_local_GP.get and next_msg_to_pull have
314 * been copied and are ready to be delivered. ]
315 *
316 * E - Messages that have been copied and delivered, but have not yet been
317 * acknowledged by the recipient as having been received.
318 *
319 * F - Messages that have been acknowledged, but XPC has not yet notified the
320 * sender that the message was received by its intended recipient.
321 * This is also an area that needs to be prepared for the allocating of
322 * new messages, by the clearing of the message flags of the acknowledged
323 * messages.
324 */
325struct xpc_channel {
326 partid_t partid; /* ID of remote partition connected */
327 spinlock_t lock; /* lock for updating this structure */
328 u32 flags; /* general flags */
329
330 enum xpc_retval reason; /* reason why channel is disconnect'g */
331 int reason_line; /* line# disconnect initiated from */
332
333 u16 number; /* channel # */
334
335 u16 msg_size; /* sizeof each msg entry */
336 u16 local_nentries; /* #of msg entries in local msg queue */
337 u16 remote_nentries; /* #of msg entries in remote msg queue*/
338
339 void *local_msgqueue_base; /* base address of kmalloc'd space */
340 struct xpc_msg *local_msgqueue; /* local message queue */
341 void *remote_msgqueue_base; /* base address of kmalloc'd space */
342 struct xpc_msg *remote_msgqueue;/* cached copy of remote partition's */
343 /* local message queue */
344 u64 remote_msgqueue_pa; /* phys addr of remote partition's */
345 /* local message queue */
346
347 atomic_t references; /* #of external references to queues */
348
349 atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */
350 wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
351
352 /* queue of msg senders who want to be notified when msg received */
353
354 atomic_t n_to_notify; /* #of msg senders to notify */
355 struct xpc_notify *notify_queue;/* notify queue for messages sent */
356
357 xpc_channel_func func; /* user's channel function */
358 void *key; /* pointer to user's key */
359
360 struct semaphore msg_to_pull_sema; /* next msg to pull serialization */
361 struct semaphore teardown_sema; /* wait for teardown completion */
362
363 struct xpc_openclose_args *local_openclose_args; /* args passed on */
364 /* opening or closing of channel */
365
366 /* various flavors of local and remote Get/Put values */
367
368 struct xpc_gp *local_GP; /* local Get/Put values */
369 struct xpc_gp remote_GP; /* remote Get/Put values */
370 struct xpc_gp w_local_GP; /* working local Get/Put values */
371 struct xpc_gp w_remote_GP; /* working remote Get/Put values */
372 s64 next_msg_to_pull; /* Put value of next msg to pull */
373
374 /* kthread management related fields */
375
376// >>> rethink having kthreads_assigned_limit and kthreads_idle_limit; perhaps
377// >>> allow the assigned limit be unbounded and let the idle limit be dynamic
378// >>> dependent on activity over the last interval of time
379 atomic_t kthreads_assigned; /* #of kthreads assigned to channel */
380 u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */
381 atomic_t kthreads_idle; /* #of kthreads idle waiting for work */
382 u32 kthreads_idle_limit; /* limit on #of kthreads idle */
383 atomic_t kthreads_active; /* #of kthreads actively working */
384 // >>> following field is temporary
385 u32 kthreads_created; /* total #of kthreads created */
386
387 wait_queue_head_t idle_wq; /* idle kthread wait queue */
388
389} ____cacheline_aligned;
390
391
392/* struct xpc_channel flags */
393
394#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */
395
396#define XPC_C_ROPENREPLY 0x00000002 /* remote open channel reply */
397#define XPC_C_OPENREPLY 0x00000004 /* local open channel reply */
398#define XPC_C_ROPENREQUEST 0x00000008 /* remote open channel request */
399#define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */
400
401#define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */
402#define XPC_C_CONNECTCALLOUT 0x00000040 /* channel connected callout made */
403#define XPC_C_CONNECTED 0x00000080 /* local channel is connected */
404#define XPC_C_CONNECTING 0x00000100 /* channel is being connected */
405
406#define XPC_C_RCLOSEREPLY 0x00000200 /* remote close channel reply */
407#define XPC_C_CLOSEREPLY 0x00000400 /* local close channel reply */
408#define XPC_C_RCLOSEREQUEST 0x00000800 /* remote close channel request */
409#define XPC_C_CLOSEREQUEST 0x00001000 /* local close channel request */
410
411#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */
412#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */
413
414
415
416/*
417 * Manages channels on a partition basis. There is one of these structures
418 * for each partition (a partition will never utilize the structure that
419 * represents itself).
420 */
421struct xpc_partition {
422
423 /* XPC HB infrastructure */
424
425 u64 remote_rp_pa; /* phys addr of partition's rsvd pg */
426 u64 remote_vars_pa; /* phys addr of partition's vars */
427 u64 remote_vars_part_pa; /* phys addr of partition's vars part */
428 u64 last_heartbeat; /* HB at last read */
429 u64 remote_amos_page_pa; /* phys addr of partition's amos page */
430 int remote_act_nasid; /* active part's act/deact nasid */
431 int remote_act_phys_cpuid; /* active part's act/deact phys cpuid */
432 u32 act_IRQ_rcvd; /* IRQs since activation */
433 spinlock_t act_lock; /* protect updating of act_state */
434 u8 act_state; /* from XPC HB viewpoint */
435 enum xpc_retval reason; /* reason partition is deactivating */
436 int reason_line; /* line# deactivation initiated from */
437 int reactivate_nasid; /* nasid in partition to reactivate */
438
439
440 /* XPC infrastructure referencing and teardown control */
441
442 u8 setup_state; /* infrastructure setup state */
443 wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */
444 atomic_t references; /* #of references to infrastructure */
445
446
447 /*
448 * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN
449 * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION
450 * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE
451 * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.)
452 */
453
454
455 u8 nchannels; /* #of defined channels supported */
456 atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
457 struct xpc_channel *channels;/* array of channel structures */
458
459 void *local_GPs_base; /* base address of kmalloc'd space */
460 struct xpc_gp *local_GPs; /* local Get/Put values */
461 void *remote_GPs_base; /* base address of kmalloc'd space */
462 struct xpc_gp *remote_GPs;/* copy of remote partition's local Get/Put */
463 /* values */
464 u64 remote_GPs_pa; /* phys address of remote partition's local */
465 /* Get/Put values */
466
467
468 /* fields used to pass args when opening or closing a channel */
469
470 void *local_openclose_args_base; /* base address of kmalloc'd space */
471 struct xpc_openclose_args *local_openclose_args; /* local's args */
472 void *remote_openclose_args_base; /* base address of kmalloc'd space */
473 struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
474 /* args */
475 u64 remote_openclose_args_pa; /* phys addr of remote's args */
476
477
478 /* IPI sending, receiving and handling related fields */
479
480 int remote_IPI_nasid; /* nasid of where to send IPIs */
481 int remote_IPI_phys_cpuid; /* phys CPU ID of where to send IPIs */
482 AMO_t *remote_IPI_amo_va; /* address of remote IPI AMO_t structure */
483
484 AMO_t *local_IPI_amo_va; /* address of IPI AMO_t structure */
485 u64 local_IPI_amo; /* IPI amo flags yet to be handled */
486 char IPI_owner[8]; /* IPI owner's name */
487 struct timer_list dropped_IPI_timer; /* dropped IPI timer */
488
489 spinlock_t IPI_lock; /* IPI handler lock */
490
491
492 /* channel manager related fields */
493
494 atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */
495 wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */
496
497} ____cacheline_aligned;
498
499
500/* struct xpc_partition act_state values (for XPC HB) */
501
502#define XPC_P_INACTIVE 0x00 /* partition is not active */
503#define XPC_P_ACTIVATION_REQ 0x01 /* created thread to activate */
504#define XPC_P_ACTIVATING 0x02 /* activation thread started */
505#define XPC_P_ACTIVE 0x03 /* xpc_partition_up() was called */
506#define XPC_P_DEACTIVATING 0x04 /* partition deactivation initiated */
507
508
509#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
510 xpc_deactivate_partition(__LINE__, (_p), (_reason))
511
512
513/* struct xpc_partition setup_state values */
514
515#define XPC_P_UNSET 0x00 /* infrastructure was never setup */
516#define XPC_P_SETUP 0x01 /* infrastructure is setup */
517#define XPC_P_WTEARDOWN 0x02 /* waiting to teardown infrastructure */
518#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */
519
520
521/*
522 * struct xpc_partition IPI_timer #of seconds to wait before checking for
523 * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
524 * after the IPI was received.
525 */
526#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ)
527
528
529#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0]))
530
531
532
533/* found in xp_main.c */
534extern struct xpc_registration xpc_registrations[];
535
536
537/* >>> found in xpc_main.c only */
538extern struct device *xpc_part;
539extern struct device *xpc_chan;
540extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *);
541extern void xpc_dropped_IPI_check(struct xpc_partition *);
542extern void xpc_activate_kthreads(struct xpc_channel *, int);
543extern void xpc_create_kthreads(struct xpc_channel *, int);
544extern void xpc_disconnect_wait(int);
545
546
547/* found in xpc_main.c and efi-xpc.c */
548extern void xpc_activate_partition(struct xpc_partition *);
549
550
551/* found in xpc_partition.c */
552extern int xpc_exiting;
553extern int xpc_hb_interval;
554extern int xpc_hb_check_interval;
555extern struct xpc_vars *xpc_vars;
556extern struct xpc_rsvd_page *xpc_rsvd_page;
557extern struct xpc_vars_part *xpc_vars_part;
558extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
559extern char xpc_remote_copy_buffer[];
560extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
561extern void xpc_allow_IPI_ops(void);
562extern void xpc_restrict_IPI_ops(void);
563extern int xpc_identify_act_IRQ_sender(void);
564extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *);
565extern void xpc_mark_partition_inactive(struct xpc_partition *);
566extern void xpc_discovery(void);
567extern void xpc_check_remote_hb(void);
568extern void xpc_deactivate_partition(const int, struct xpc_partition *,
569 enum xpc_retval);
570extern enum xpc_retval xpc_initiate_partid_to_nasids(partid_t, void *);
571
572
573/* found in xpc_channel.c */
574extern void xpc_initiate_connect(int);
575extern void xpc_initiate_disconnect(int);
576extern enum xpc_retval xpc_initiate_allocate(partid_t, int, u32, void **);
577extern enum xpc_retval xpc_initiate_send(partid_t, int, void *);
578extern enum xpc_retval xpc_initiate_send_notify(partid_t, int, void *,
579 xpc_notify_func, void *);
580extern void xpc_initiate_received(partid_t, int, void *);
581extern enum xpc_retval xpc_setup_infrastructure(struct xpc_partition *);
582extern enum xpc_retval xpc_pull_remote_vars_part(struct xpc_partition *);
583extern void xpc_process_channel_activity(struct xpc_partition *);
584extern void xpc_connected_callout(struct xpc_channel *);
585extern void xpc_deliver_msg(struct xpc_channel *);
586extern void xpc_disconnect_channel(const int, struct xpc_channel *,
587 enum xpc_retval, unsigned long *);
588extern void xpc_disconnected_callout(struct xpc_channel *);
589extern void xpc_partition_down(struct xpc_partition *, enum xpc_retval);
590extern void xpc_teardown_infrastructure(struct xpc_partition *);
591
592
593
594static inline void
595xpc_wakeup_channel_mgr(struct xpc_partition *part)
596{
597 if (atomic_inc_return(&part->channel_mgr_requests) == 1) {
598 wake_up(&part->channel_mgr_wq);
599 }
600}
601
602
603
604/*
605 * These next two inlines are used to keep us from tearing down a channel's
606 * msg queues while a thread may be referencing them.
607 */
608static inline void
609xpc_msgqueue_ref(struct xpc_channel *ch)
610{
611 atomic_inc(&ch->references);
612}
613
614static inline void
615xpc_msgqueue_deref(struct xpc_channel *ch)
616{
617 s32 refs = atomic_dec_return(&ch->references);
618
619 DBUG_ON(refs < 0);
620 if (refs == 0) {
621 xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]);
622 }
623}
624
625
626
627#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \
628 xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs)
629
630
631/*
632 * These two inlines are used to keep us from tearing down a partition's
633 * setup infrastructure while a thread may be referencing it.
634 */
635static inline void
636xpc_part_deref(struct xpc_partition *part)
637{
638 s32 refs = atomic_dec_return(&part->references);
639
640
641 DBUG_ON(refs < 0);
642 if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN) {
643 wake_up(&part->teardown_wq);
644 }
645}
646
647static inline int
648xpc_part_ref(struct xpc_partition *part)
649{
650 int setup;
651
652
653 atomic_inc(&part->references);
654 setup = (part->setup_state == XPC_P_SETUP);
655 if (!setup) {
656 xpc_part_deref(part);
657 }
658 return setup;
659}
660
661
662
663/*
664 * The following macro is to be used for the setting of the reason and
665 * reason_line fields in both the struct xpc_channel and struct xpc_partition
666 * structures.
667 */
668#define XPC_SET_REASON(_p, _reason, _line) \
669 { \
670 (_p)->reason = _reason; \
671 (_p)->reason_line = _line; \
672 }
673
674
675
676/*
677 * The following set of macros and inlines are used for the sending and
678 * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
679 * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
680 * the other that is associated with channel activity (SGI_XPC_NOTIFY).
681 */
682
683static inline u64
684xpc_IPI_receive(AMO_t *amo)
685{
686 return FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_CLEAR);
687}
688
689
690static inline enum xpc_retval
691xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
692{
693 int ret = 0;
694 unsigned long irq_flags;
695
696
697 local_irq_save(irq_flags);
698
699 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR, flag);
700 sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
701
702 /*
703 * We must always use the nofault function regardless of whether we
704 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
705 * didn't, we'd never know that the other partition is down and would
706 * keep sending IPIs and AMOs to it until the heartbeat times out.
707 */
708 ret = xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
709 xp_nofault_PIOR_target));
710
711 local_irq_restore(irq_flags);
712
713 return ((ret == 0) ? xpcSuccess : xpcPioReadError);
714}
715
716
717/*
718 * IPIs associated with SGI_XPC_ACTIVATE IRQ.
719 */
720
721/*
722 * Flag the appropriate AMO variable and send an IPI to the specified node.
723 */
724static inline void
725xpc_activate_IRQ_send(u64 amos_page, int from_nasid, int to_nasid,
726 int to_phys_cpuid)
727{
728 int w_index = XPC_NASID_W_INDEX(from_nasid);
729 int b_index = XPC_NASID_B_INDEX(from_nasid);
730 AMO_t *amos = (AMO_t *) __va(amos_page +
731 (XP_MAX_PARTITIONS * sizeof(AMO_t)));
732
733
734 (void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
735 to_phys_cpuid, SGI_XPC_ACTIVATE);
736}
737
738static inline void
739xpc_IPI_send_activate(struct xpc_vars *vars)
740{
741 xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0),
742 vars->act_nasid, vars->act_phys_cpuid);
743}
744
745static inline void
746xpc_IPI_send_activated(struct xpc_partition *part)
747{
748 xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
749 part->remote_act_nasid, part->remote_act_phys_cpuid);
750}
751
752static inline void
753xpc_IPI_send_reactivate(struct xpc_partition *part)
754{
755 xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid,
756 xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
757}
758
759
760/*
761 * IPIs associated with SGI_XPC_NOTIFY IRQ.
762 */
763
764/*
765 * Send an IPI to the remote partition that is associated with the
766 * specified channel.
767 */
768#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \
769 xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f)
770
771static inline void
772xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string,
773 unsigned long *irq_flags)
774{
775 struct xpc_partition *part = &xpc_partitions[ch->partid];
776 enum xpc_retval ret;
777
778
779 if (likely(part->act_state != XPC_P_DEACTIVATING)) {
780 ret = xpc_IPI_send(part->remote_IPI_amo_va,
781 (u64) ipi_flag << (ch->number * 8),
782 part->remote_IPI_nasid,
783 part->remote_IPI_phys_cpuid,
784 SGI_XPC_NOTIFY);
785 dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
786 ipi_flag_string, ch->partid, ch->number, ret);
787 if (unlikely(ret != xpcSuccess)) {
788 if (irq_flags != NULL) {
789 spin_unlock_irqrestore(&ch->lock, *irq_flags);
790 }
791 XPC_DEACTIVATE_PARTITION(part, ret);
792 if (irq_flags != NULL) {
793 spin_lock_irqsave(&ch->lock, *irq_flags);
794 }
795 }
796 }
797}
798
799
800/*
801 * Make it look like the remote partition, which is associated with the
802 * specified channel, sent us an IPI. This faked IPI will be handled
803 * by xpc_dropped_IPI_check().
804 */
805#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \
806 xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f)
807
808static inline void
809xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
810 char *ipi_flag_string)
811{
812 struct xpc_partition *part = &xpc_partitions[ch->partid];
813
814
815 FETCHOP_STORE_OP(TO_AMO((u64) &part->local_IPI_amo_va->variable),
816 FETCHOP_OR, ((u64) ipi_flag << (ch->number * 8)));
817 dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
818 ipi_flag_string, ch->partid, ch->number);
819}
820
821
822/*
823 * The sending and receiving of IPIs includes the setting of an AMO variable
824 * to indicate the reason the IPI was sent. The 64-bit variable is divided
825 * up into eight bytes, ordered from right to left. Byte zero pertains to
826 * channel 0, byte one to channel 1, and so on. Each byte is described by
827 * the following IPI flags.
828 */
829
830#define XPC_IPI_CLOSEREQUEST 0x01
831#define XPC_IPI_CLOSEREPLY 0x02
832#define XPC_IPI_OPENREQUEST 0x04
833#define XPC_IPI_OPENREPLY 0x08
834#define XPC_IPI_MSGREQUEST 0x10
835
836
837/* given an AMO variable and a channel#, get its associated IPI flags */
838#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff))
839
840#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0f)
841#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010)
842
843
844static inline void
845xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags)
846{
847 struct xpc_openclose_args *args = ch->local_openclose_args;
848
849
850 args->reason = ch->reason;
851
852 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags);
853}
854
855static inline void
856xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags)
857{
858 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags);
859}
860
861static inline void
862xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags)
863{
864 struct xpc_openclose_args *args = ch->local_openclose_args;
865
866
867 args->msg_size = ch->msg_size;
868 args->local_nentries = ch->local_nentries;
869
870 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags);
871}
872
873static inline void
874xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags)
875{
876 struct xpc_openclose_args *args = ch->local_openclose_args;
877
878
879 args->remote_nentries = ch->remote_nentries;
880 args->local_nentries = ch->local_nentries;
881 args->local_msgqueue_pa = __pa(ch->local_msgqueue);
882
883 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags);
884}
885
886static inline void
887xpc_IPI_send_msgrequest(struct xpc_channel *ch)
888{
889 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL);
890}
891
892static inline void
893xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
894{
895 XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST);
896}
897
898
899/*
900 * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
901 * pages are located in the lowest granule. The lowest granule uses 4k pages
902 * for cached references and an alternate TLB handler to never provide a
903 * cacheable mapping for the entire region. This will prevent speculative
904 * reading of cached copies of our lines from being issued which will cause
905 * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
906 * (XP_MAX_PARTITIONS) AMO variables for message notification (xpc_main.c)
907 * and an additional 16 AMO variables for partition activation (xpc_hb.c).
908 */
909static inline AMO_t *
910xpc_IPI_init(partid_t partid)
911{
912 AMO_t *part_amo = xpc_vars->amos_page + partid;
913
914
915 xpc_IPI_receive(part_amo);
916 return part_amo;
917}
918
919
920
921static inline enum xpc_retval
922xpc_map_bte_errors(bte_result_t error)
923{
924 switch (error) {
925 case BTE_SUCCESS: return xpcSuccess;
926 case BTEFAIL_DIR: return xpcBteDirectoryError;
927 case BTEFAIL_POISON: return xpcBtePoisonError;
928 case BTEFAIL_WERR: return xpcBteWriteError;
929 case BTEFAIL_ACCESS: return xpcBteAccessError;
930 case BTEFAIL_PWERR: return xpcBtePWriteError;
931 case BTEFAIL_PRERR: return xpcBtePReadError;
932 case BTEFAIL_TOUT: return xpcBteTimeOutError;
933 case BTEFAIL_XTERR: return xpcBteXtalkError;
934 case BTEFAIL_NOTAVAIL: return xpcBteNotAvailable;
935 default: return xpcBteUnmappedError;
936 }
937}
938
939
940
941static inline void *
942xpc_kmalloc_cacheline_aligned(size_t size, int flags, void **base)
943{
944 /* see if kmalloc will give us cachline aligned memory by default */
945 *base = kmalloc(size, flags);
946 if (*base == NULL) {
947 return NULL;
948 }
949 if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
950 return *base;
951 }
952 kfree(*base);
953
954 /* nope, we'll have to do it ourselves */
955 *base = kmalloc(size + L1_CACHE_BYTES, flags);
956 if (*base == NULL) {
957 return NULL;
958 }
959 return (void *) L1_CACHE_ALIGN((u64) *base);
960}
961
962
963/*
964 * Check to see if there is any channel activity to/from the specified
965 * partition.
966 */
967static inline void
968xpc_check_for_channel_activity(struct xpc_partition *part)
969{
970 u64 IPI_amo;
971 unsigned long irq_flags;
972
973
974 IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va);
975 if (IPI_amo == 0) {
976 return;
977 }
978
979 spin_lock_irqsave(&part->IPI_lock, irq_flags);
980 part->local_IPI_amo |= IPI_amo;
981 spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
982
983 dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
984 XPC_PARTID(part), IPI_amo);
985
986 xpc_wakeup_channel_mgr(part);
987}
988
989
990#endif /* _IA64_SN_KERNEL_XPC_H */
991
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
new file mode 100644
index 000000000000..0bf6fbcc46d2
--- /dev/null
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -0,0 +1,2297 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9
10/*
11 * Cross Partition Communication (XPC) channel support.
12 *
13 * This is the part of XPC that manages the channels and
14 * sends/receives messages across them to/from other partitions.
15 *
16 */
17
18
19#include <linux/kernel.h>
20#include <linux/init.h>
21#include <linux/sched.h>
22#include <linux/cache.h>
23#include <linux/interrupt.h>
24#include <linux/slab.h>
25#include <asm/sn/bte.h>
26#include <asm/sn/sn_sal.h>
27#include "xpc.h"
28
29
30/*
31 * Set up the initial values for the XPartition Communication channels.
32 */
33static void
34xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
35{
36 int ch_number;
37 struct xpc_channel *ch;
38
39
40 for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
41 ch = &part->channels[ch_number];
42
43 ch->partid = partid;
44 ch->number = ch_number;
45 ch->flags = XPC_C_DISCONNECTED;
46
47 ch->local_GP = &part->local_GPs[ch_number];
48 ch->local_openclose_args =
49 &part->local_openclose_args[ch_number];
50
51 atomic_set(&ch->kthreads_assigned, 0);
52 atomic_set(&ch->kthreads_idle, 0);
53 atomic_set(&ch->kthreads_active, 0);
54
55 atomic_set(&ch->references, 0);
56 atomic_set(&ch->n_to_notify, 0);
57
58 spin_lock_init(&ch->lock);
59 sema_init(&ch->msg_to_pull_sema, 1); /* mutex */
60
61 atomic_set(&ch->n_on_msg_allocate_wq, 0);
62 init_waitqueue_head(&ch->msg_allocate_wq);
63 init_waitqueue_head(&ch->idle_wq);
64 }
65}
66
67
68/*
69 * Setup the infrastructure necessary to support XPartition Communication
70 * between the specified remote partition and the local one.
71 */
72enum xpc_retval
73xpc_setup_infrastructure(struct xpc_partition *part)
74{
75 int ret;
76 struct timer_list *timer;
77 partid_t partid = XPC_PARTID(part);
78
79
80 /*
81 * Zero out MOST of the entry for this partition. Only the fields
82 * starting with `nchannels' will be zeroed. The preceding fields must
83 * remain `viable' across partition ups and downs, since they may be
84 * referenced during this memset() operation.
85 */
86 memset(&part->nchannels, 0, sizeof(struct xpc_partition) -
87 offsetof(struct xpc_partition, nchannels));
88
89 /*
90 * Allocate all of the channel structures as a contiguous chunk of
91 * memory.
92 */
93 part->channels = kmalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
94 GFP_KERNEL);
95 if (part->channels == NULL) {
96 dev_err(xpc_chan, "can't get memory for channels\n");
97 return xpcNoMemory;
98 }
99 memset(part->channels, 0, sizeof(struct xpc_channel) * XPC_NCHANNELS);
100
101 part->nchannels = XPC_NCHANNELS;
102
103
104 /* allocate all the required GET/PUT values */
105
106 part->local_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE,
107 GFP_KERNEL, &part->local_GPs_base);
108 if (part->local_GPs == NULL) {
109 kfree(part->channels);
110 part->channels = NULL;
111 dev_err(xpc_chan, "can't get memory for local get/put "
112 "values\n");
113 return xpcNoMemory;
114 }
115 memset(part->local_GPs, 0, XPC_GP_SIZE);
116
117 part->remote_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE,
118 GFP_KERNEL, &part->remote_GPs_base);
119 if (part->remote_GPs == NULL) {
120 kfree(part->channels);
121 part->channels = NULL;
122 kfree(part->local_GPs_base);
123 part->local_GPs = NULL;
124 dev_err(xpc_chan, "can't get memory for remote get/put "
125 "values\n");
126 return xpcNoMemory;
127 }
128 memset(part->remote_GPs, 0, XPC_GP_SIZE);
129
130
131 /* allocate all the required open and close args */
132
133 part->local_openclose_args = xpc_kmalloc_cacheline_aligned(
134 XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
135 &part->local_openclose_args_base);
136 if (part->local_openclose_args == NULL) {
137 kfree(part->channels);
138 part->channels = NULL;
139 kfree(part->local_GPs_base);
140 part->local_GPs = NULL;
141 kfree(part->remote_GPs_base);
142 part->remote_GPs = NULL;
143 dev_err(xpc_chan, "can't get memory for local connect args\n");
144 return xpcNoMemory;
145 }
146 memset(part->local_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
147
148 part->remote_openclose_args = xpc_kmalloc_cacheline_aligned(
149 XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
150 &part->remote_openclose_args_base);
151 if (part->remote_openclose_args == NULL) {
152 kfree(part->channels);
153 part->channels = NULL;
154 kfree(part->local_GPs_base);
155 part->local_GPs = NULL;
156 kfree(part->remote_GPs_base);
157 part->remote_GPs = NULL;
158 kfree(part->local_openclose_args_base);
159 part->local_openclose_args = NULL;
160 dev_err(xpc_chan, "can't get memory for remote connect args\n");
161 return xpcNoMemory;
162 }
163 memset(part->remote_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
164
165
166 xpc_initialize_channels(part, partid);
167
168 atomic_set(&part->nchannels_active, 0);
169
170
171 /* local_IPI_amo were set to 0 by an earlier memset() */
172
173 /* Initialize this partitions AMO_t structure */
174 part->local_IPI_amo_va = xpc_IPI_init(partid);
175
176 spin_lock_init(&part->IPI_lock);
177
178 atomic_set(&part->channel_mgr_requests, 1);
179 init_waitqueue_head(&part->channel_mgr_wq);
180
181 sprintf(part->IPI_owner, "xpc%02d", partid);
182 ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ,
183 part->IPI_owner, (void *) (u64) partid);
184 if (ret != 0) {
185 kfree(part->channels);
186 part->channels = NULL;
187 kfree(part->local_GPs_base);
188 part->local_GPs = NULL;
189 kfree(part->remote_GPs_base);
190 part->remote_GPs = NULL;
191 kfree(part->local_openclose_args_base);
192 part->local_openclose_args = NULL;
193 kfree(part->remote_openclose_args_base);
194 part->remote_openclose_args = NULL;
195 dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
196 "errno=%d\n", -ret);
197 return xpcLackOfResources;
198 }
199
200 /* Setup a timer to check for dropped IPIs */
201 timer = &part->dropped_IPI_timer;
202 init_timer(timer);
203 timer->function = (void (*)(unsigned long)) xpc_dropped_IPI_check;
204 timer->data = (unsigned long) part;
205 timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT;
206 add_timer(timer);
207
208 /*
209 * With the setting of the partition setup_state to XPC_P_SETUP, we're
210 * declaring that this partition is ready to go.
211 */
212 (volatile u8) part->setup_state = XPC_P_SETUP;
213
214
215 /*
216 * Setup the per partition specific variables required by the
217 * remote partition to establish channel connections with us.
218 *
219 * The setting of the magic # indicates that these per partition
220 * specific variables are ready to be used.
221 */
222 xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs);
223 xpc_vars_part[partid].openclose_args_pa =
224 __pa(part->local_openclose_args);
225 xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
226 xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(smp_processor_id());
227 xpc_vars_part[partid].IPI_phys_cpuid =
228 cpu_physical_id(smp_processor_id());
229 xpc_vars_part[partid].nchannels = part->nchannels;
230 (volatile u64) xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
231
232 return xpcSuccess;
233}
234
235
236/*
237 * Create a wrapper that hides the underlying mechanism for pulling a cacheline
238 * (or multiple cachelines) from a remote partition.
239 *
240 * src must be a cacheline aligned physical address on the remote partition.
241 * dst must be a cacheline aligned virtual address on this partition.
242 * cnt must be an cacheline sized
243 */
244static enum xpc_retval
245xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst,
246 const void *src, size_t cnt)
247{
248 bte_result_t bte_ret;
249
250
251 DBUG_ON((u64) src != L1_CACHE_ALIGN((u64) src));
252 DBUG_ON((u64) dst != L1_CACHE_ALIGN((u64) dst));
253 DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
254
255 if (part->act_state == XPC_P_DEACTIVATING) {
256 return part->reason;
257 }
258
259 bte_ret = xp_bte_copy((u64) src, (u64) ia64_tpa((u64) dst),
260 (u64) cnt, (BTE_NORMAL | BTE_WACQUIRE), NULL);
261 if (bte_ret == BTE_SUCCESS) {
262 return xpcSuccess;
263 }
264
265 dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n",
266 XPC_PARTID(part), bte_ret);
267
268 return xpc_map_bte_errors(bte_ret);
269}
270
271
272/*
273 * Pull the remote per partititon specific variables from the specified
274 * partition.
275 */
276enum xpc_retval
277xpc_pull_remote_vars_part(struct xpc_partition *part)
278{
279 u8 buffer[L1_CACHE_BYTES * 2];
280 struct xpc_vars_part *pulled_entry_cacheline =
281 (struct xpc_vars_part *) L1_CACHE_ALIGN((u64) buffer);
282 struct xpc_vars_part *pulled_entry;
283 u64 remote_entry_cacheline_pa, remote_entry_pa;
284 partid_t partid = XPC_PARTID(part);
285 enum xpc_retval ret;
286
287
288 /* pull the cacheline that contains the variables we're interested in */
289
290 DBUG_ON(part->remote_vars_part_pa !=
291 L1_CACHE_ALIGN(part->remote_vars_part_pa));
292 DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2);
293
294 remote_entry_pa = part->remote_vars_part_pa +
295 sn_partition_id * sizeof(struct xpc_vars_part);
296
297 remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
298
299 pulled_entry = (struct xpc_vars_part *) ((u64) pulled_entry_cacheline +
300 (remote_entry_pa & (L1_CACHE_BYTES - 1)));
301
302 ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline,
303 (void *) remote_entry_cacheline_pa,
304 L1_CACHE_BYTES);
305 if (ret != xpcSuccess) {
306 dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
307 "partition %d, ret=%d\n", partid, ret);
308 return ret;
309 }
310
311
312 /* see if they've been set up yet */
313
314 if (pulled_entry->magic != XPC_VP_MAGIC1 &&
315 pulled_entry->magic != XPC_VP_MAGIC2) {
316
317 if (pulled_entry->magic != 0) {
318 dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
319 "partition %d has bad magic value (=0x%lx)\n",
320 partid, sn_partition_id, pulled_entry->magic);
321 return xpcBadMagic;
322 }
323
324 /* they've not been initialized yet */
325 return xpcRetry;
326 }
327
328 if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) {
329
330 /* validate the variables */
331
332 if (pulled_entry->GPs_pa == 0 ||
333 pulled_entry->openclose_args_pa == 0 ||
334 pulled_entry->IPI_amo_pa == 0) {
335
336 dev_err(xpc_chan, "partition %d's XPC vars_part for "
337 "partition %d are not valid\n", partid,
338 sn_partition_id);
339 return xpcInvalidAddress;
340 }
341
342 /* the variables we imported look to be valid */
343
344 part->remote_GPs_pa = pulled_entry->GPs_pa;
345 part->remote_openclose_args_pa =
346 pulled_entry->openclose_args_pa;
347 part->remote_IPI_amo_va =
348 (AMO_t *) __va(pulled_entry->IPI_amo_pa);
349 part->remote_IPI_nasid = pulled_entry->IPI_nasid;
350 part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid;
351
352 if (part->nchannels > pulled_entry->nchannels) {
353 part->nchannels = pulled_entry->nchannels;
354 }
355
356 /* let the other side know that we've pulled their variables */
357
358 (volatile u64) xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
359 }
360
361 if (pulled_entry->magic == XPC_VP_MAGIC1) {
362 return xpcRetry;
363 }
364
365 return xpcSuccess;
366}
367
368
369/*
370 * Get the IPI flags and pull the openclose args and/or remote GPs as needed.
371 */
372static u64
373xpc_get_IPI_flags(struct xpc_partition *part)
374{
375 unsigned long irq_flags;
376 u64 IPI_amo;
377 enum xpc_retval ret;
378
379
380 /*
381 * See if there are any IPI flags to be handled.
382 */
383
384 spin_lock_irqsave(&part->IPI_lock, irq_flags);
385 if ((IPI_amo = part->local_IPI_amo) != 0) {
386 part->local_IPI_amo = 0;
387 }
388 spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
389
390
391 if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) {
392 ret = xpc_pull_remote_cachelines(part,
393 part->remote_openclose_args,
394 (void *) part->remote_openclose_args_pa,
395 XPC_OPENCLOSE_ARGS_SIZE);
396 if (ret != xpcSuccess) {
397 XPC_DEACTIVATE_PARTITION(part, ret);
398
399 dev_dbg(xpc_chan, "failed to pull openclose args from "
400 "partition %d, ret=%d\n", XPC_PARTID(part),
401 ret);
402
403 /* don't bother processing IPIs anymore */
404 IPI_amo = 0;
405 }
406 }
407
408 if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) {
409 ret = xpc_pull_remote_cachelines(part, part->remote_GPs,
410 (void *) part->remote_GPs_pa,
411 XPC_GP_SIZE);
412 if (ret != xpcSuccess) {
413 XPC_DEACTIVATE_PARTITION(part, ret);
414
415 dev_dbg(xpc_chan, "failed to pull GPs from partition "
416 "%d, ret=%d\n", XPC_PARTID(part), ret);
417
418 /* don't bother processing IPIs anymore */
419 IPI_amo = 0;
420 }
421 }
422
423 return IPI_amo;
424}
425
426
427/*
428 * Allocate the local message queue and the notify queue.
429 */
430static enum xpc_retval
431xpc_allocate_local_msgqueue(struct xpc_channel *ch)
432{
433 unsigned long irq_flags;
434 int nentries;
435 size_t nbytes;
436
437
438 // >>> may want to check for ch->flags & XPC_C_DISCONNECTING between
439 // >>> iterations of the for-loop, bail if set?
440
441 // >>> should we impose a minumum #of entries? like 4 or 8?
442 for (nentries = ch->local_nentries; nentries > 0; nentries--) {
443
444 nbytes = nentries * ch->msg_size;
445 ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
446 (GFP_KERNEL | GFP_DMA),
447 &ch->local_msgqueue_base);
448 if (ch->local_msgqueue == NULL) {
449 continue;
450 }
451 memset(ch->local_msgqueue, 0, nbytes);
452
453 nbytes = nentries * sizeof(struct xpc_notify);
454 ch->notify_queue = kmalloc(nbytes, (GFP_KERNEL | GFP_DMA));
455 if (ch->notify_queue == NULL) {
456 kfree(ch->local_msgqueue_base);
457 ch->local_msgqueue = NULL;
458 continue;
459 }
460 memset(ch->notify_queue, 0, nbytes);
461
462 spin_lock_irqsave(&ch->lock, irq_flags);
463 if (nentries < ch->local_nentries) {
464 dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
465 "partid=%d, channel=%d\n", nentries,
466 ch->local_nentries, ch->partid, ch->number);
467
468 ch->local_nentries = nentries;
469 }
470 spin_unlock_irqrestore(&ch->lock, irq_flags);
471 return xpcSuccess;
472 }
473
474 dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
475 "queue, partid=%d, channel=%d\n", ch->partid, ch->number);
476 return xpcNoMemory;
477}
478
479
480/*
481 * Allocate the cached remote message queue.
482 */
483static enum xpc_retval
484xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
485{
486 unsigned long irq_flags;
487 int nentries;
488 size_t nbytes;
489
490
491 DBUG_ON(ch->remote_nentries <= 0);
492
493 // >>> may want to check for ch->flags & XPC_C_DISCONNECTING between
494 // >>> iterations of the for-loop, bail if set?
495
496 // >>> should we impose a minumum #of entries? like 4 or 8?
497 for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
498
499 nbytes = nentries * ch->msg_size;
500 ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
501 (GFP_KERNEL | GFP_DMA),
502 &ch->remote_msgqueue_base);
503 if (ch->remote_msgqueue == NULL) {
504 continue;
505 }
506 memset(ch->remote_msgqueue, 0, nbytes);
507
508 spin_lock_irqsave(&ch->lock, irq_flags);
509 if (nentries < ch->remote_nentries) {
510 dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
511 "partid=%d, channel=%d\n", nentries,
512 ch->remote_nentries, ch->partid, ch->number);
513
514 ch->remote_nentries = nentries;
515 }
516 spin_unlock_irqrestore(&ch->lock, irq_flags);
517 return xpcSuccess;
518 }
519
520 dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
521 "partid=%d, channel=%d\n", ch->partid, ch->number);
522 return xpcNoMemory;
523}
524
525
526/*
527 * Allocate message queues and other stuff associated with a channel.
528 *
529 * Note: Assumes all of the channel sizes are filled in.
530 */
531static enum xpc_retval
532xpc_allocate_msgqueues(struct xpc_channel *ch)
533{
534 unsigned long irq_flags;
535 int i;
536 enum xpc_retval ret;
537
538
539 DBUG_ON(ch->flags & XPC_C_SETUP);
540
541 if ((ret = xpc_allocate_local_msgqueue(ch)) != xpcSuccess) {
542 return ret;
543 }
544
545 if ((ret = xpc_allocate_remote_msgqueue(ch)) != xpcSuccess) {
546 kfree(ch->local_msgqueue_base);
547 ch->local_msgqueue = NULL;
548 kfree(ch->notify_queue);
549 ch->notify_queue = NULL;
550 return ret;
551 }
552
553 for (i = 0; i < ch->local_nentries; i++) {
554 /* use a semaphore as an event wait queue */
555 sema_init(&ch->notify_queue[i].sema, 0);
556 }
557
558 sema_init(&ch->teardown_sema, 0); /* event wait */
559
560 spin_lock_irqsave(&ch->lock, irq_flags);
561 ch->flags |= XPC_C_SETUP;
562 spin_unlock_irqrestore(&ch->lock, irq_flags);
563
564 return xpcSuccess;
565}
566
567
568/*
569 * Process a connect message from a remote partition.
570 *
571 * Note: xpc_process_connect() is expecting to be called with the
572 * spin_lock_irqsave held and will leave it locked upon return.
573 */
574static void
575xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
576{
577 enum xpc_retval ret;
578
579
580 DBUG_ON(!spin_is_locked(&ch->lock));
581
582 if (!(ch->flags & XPC_C_OPENREQUEST) ||
583 !(ch->flags & XPC_C_ROPENREQUEST)) {
584 /* nothing more to do for now */
585 return;
586 }
587 DBUG_ON(!(ch->flags & XPC_C_CONNECTING));
588
589 if (!(ch->flags & XPC_C_SETUP)) {
590 spin_unlock_irqrestore(&ch->lock, *irq_flags);
591 ret = xpc_allocate_msgqueues(ch);
592 spin_lock_irqsave(&ch->lock, *irq_flags);
593
594 if (ret != xpcSuccess) {
595 XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
596 }
597 if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING)) {
598 return;
599 }
600
601 DBUG_ON(!(ch->flags & XPC_C_SETUP));
602 DBUG_ON(ch->local_msgqueue == NULL);
603 DBUG_ON(ch->remote_msgqueue == NULL);
604 }
605
606 if (!(ch->flags & XPC_C_OPENREPLY)) {
607 ch->flags |= XPC_C_OPENREPLY;
608 xpc_IPI_send_openreply(ch, irq_flags);
609 }
610
611 if (!(ch->flags & XPC_C_ROPENREPLY)) {
612 return;
613 }
614
615 DBUG_ON(ch->remote_msgqueue_pa == 0);
616
617 ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */
618
619 dev_info(xpc_chan, "channel %d to partition %d connected\n",
620 ch->number, ch->partid);
621
622 spin_unlock_irqrestore(&ch->lock, *irq_flags);
623 xpc_create_kthreads(ch, 1);
624 spin_lock_irqsave(&ch->lock, *irq_flags);
625}
626
627
628/*
629 * Free up message queues and other stuff that were allocated for the specified
630 * channel.
631 *
632 * Note: ch->reason and ch->reason_line are left set for debugging purposes,
633 * they're cleared when XPC_C_DISCONNECTED is cleared.
634 */
635static void
636xpc_free_msgqueues(struct xpc_channel *ch)
637{
638 DBUG_ON(!spin_is_locked(&ch->lock));
639 DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
640
641 ch->remote_msgqueue_pa = 0;
642 ch->func = NULL;
643 ch->key = NULL;
644 ch->msg_size = 0;
645 ch->local_nentries = 0;
646 ch->remote_nentries = 0;
647 ch->kthreads_assigned_limit = 0;
648 ch->kthreads_idle_limit = 0;
649
650 ch->local_GP->get = 0;
651 ch->local_GP->put = 0;
652 ch->remote_GP.get = 0;
653 ch->remote_GP.put = 0;
654 ch->w_local_GP.get = 0;
655 ch->w_local_GP.put = 0;
656 ch->w_remote_GP.get = 0;
657 ch->w_remote_GP.put = 0;
658 ch->next_msg_to_pull = 0;
659
660 if (ch->flags & XPC_C_SETUP) {
661 ch->flags &= ~XPC_C_SETUP;
662
663 dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
664 ch->flags, ch->partid, ch->number);
665
666 kfree(ch->local_msgqueue_base);
667 ch->local_msgqueue = NULL;
668 kfree(ch->remote_msgqueue_base);
669 ch->remote_msgqueue = NULL;
670 kfree(ch->notify_queue);
671 ch->notify_queue = NULL;
672
673 /* in case someone is waiting for the teardown to complete */
674 up(&ch->teardown_sema);
675 }
676}
677
678
679/*
680 * spin_lock_irqsave() is expected to be held on entry.
681 */
682static void
683xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
684{
685 struct xpc_partition *part = &xpc_partitions[ch->partid];
686 u32 ch_flags = ch->flags;
687
688
689 DBUG_ON(!spin_is_locked(&ch->lock));
690
691 if (!(ch->flags & XPC_C_DISCONNECTING)) {
692 return;
693 }
694
695 DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
696
697 /* make sure all activity has settled down first */
698
699 if (atomic_read(&ch->references) > 0) {
700 return;
701 }
702 DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
703
704 /* it's now safe to free the channel's message queues */
705
706 xpc_free_msgqueues(ch);
707 DBUG_ON(ch->flags & XPC_C_SETUP);
708
709 if (part->act_state != XPC_P_DEACTIVATING) {
710
711 /* as long as the other side is up do the full protocol */
712
713 if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
714 return;
715 }
716
717 if (!(ch->flags & XPC_C_CLOSEREPLY)) {
718 ch->flags |= XPC_C_CLOSEREPLY;
719 xpc_IPI_send_closereply(ch, irq_flags);
720 }
721
722 if (!(ch->flags & XPC_C_RCLOSEREPLY)) {
723 return;
724 }
725 }
726
727 /* both sides are disconnected now */
728
729 ch->flags = XPC_C_DISCONNECTED; /* clear all flags, but this one */
730
731 atomic_dec(&part->nchannels_active);
732
733 if (ch_flags & XPC_C_WASCONNECTED) {
734 dev_info(xpc_chan, "channel %d to partition %d disconnected, "
735 "reason=%d\n", ch->number, ch->partid, ch->reason);
736 }
737}
738
739
740/*
741 * Process a change in the channel's remote connection state.
742 */
743static void
744xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
745 u8 IPI_flags)
746{
747 unsigned long irq_flags;
748 struct xpc_openclose_args *args =
749 &part->remote_openclose_args[ch_number];
750 struct xpc_channel *ch = &part->channels[ch_number];
751 enum xpc_retval reason;
752
753
754
755 spin_lock_irqsave(&ch->lock, irq_flags);
756
757
758 if (IPI_flags & XPC_IPI_CLOSEREQUEST) {
759
760 dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received "
761 "from partid=%d, channel=%d\n", args->reason,
762 ch->partid, ch->number);
763
764 /*
765 * If RCLOSEREQUEST is set, we're probably waiting for
766 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
767 * with this RCLOSEQREUQEST in the IPI_flags.
768 */
769
770 if (ch->flags & XPC_C_RCLOSEREQUEST) {
771 DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
772 DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
773 DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY));
774 DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY);
775
776 DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY));
777 IPI_flags &= ~XPC_IPI_CLOSEREPLY;
778 ch->flags |= XPC_C_RCLOSEREPLY;
779
780 /* both sides have finished disconnecting */
781 xpc_process_disconnect(ch, &irq_flags);
782 }
783
784 if (ch->flags & XPC_C_DISCONNECTED) {
785 // >>> explain this section
786
787 if (!(IPI_flags & XPC_IPI_OPENREQUEST)) {
788 DBUG_ON(part->act_state !=
789 XPC_P_DEACTIVATING);
790 spin_unlock_irqrestore(&ch->lock, irq_flags);
791 return;
792 }
793
794 XPC_SET_REASON(ch, 0, 0);
795 ch->flags &= ~XPC_C_DISCONNECTED;
796
797 atomic_inc(&part->nchannels_active);
798 ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
799 }
800
801 IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY);
802
803 /*
804 * The meaningful CLOSEREQUEST connection state fields are:
805 * reason = reason connection is to be closed
806 */
807
808 ch->flags |= XPC_C_RCLOSEREQUEST;
809
810 if (!(ch->flags & XPC_C_DISCONNECTING)) {
811 reason = args->reason;
812 if (reason <= xpcSuccess || reason > xpcUnknownReason) {
813 reason = xpcUnknownReason;
814 } else if (reason == xpcUnregistering) {
815 reason = xpcOtherUnregistering;
816 }
817
818 XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
819 } else {
820 xpc_process_disconnect(ch, &irq_flags);
821 }
822 }
823
824
825 if (IPI_flags & XPC_IPI_CLOSEREPLY) {
826
827 dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d,"
828 " channel=%d\n", ch->partid, ch->number);
829
830 if (ch->flags & XPC_C_DISCONNECTED) {
831 DBUG_ON(part->act_state != XPC_P_DEACTIVATING);
832 spin_unlock_irqrestore(&ch->lock, irq_flags);
833 return;
834 }
835
836 DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
837 DBUG_ON(!(ch->flags & XPC_C_RCLOSEREQUEST));
838
839 ch->flags |= XPC_C_RCLOSEREPLY;
840
841 if (ch->flags & XPC_C_CLOSEREPLY) {
842 /* both sides have finished disconnecting */
843 xpc_process_disconnect(ch, &irq_flags);
844 }
845 }
846
847
848 if (IPI_flags & XPC_IPI_OPENREQUEST) {
849
850 dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, "
851 "local_nentries=%d) received from partid=%d, "
852 "channel=%d\n", args->msg_size, args->local_nentries,
853 ch->partid, ch->number);
854
855 if ((ch->flags & XPC_C_DISCONNECTING) ||
856 part->act_state == XPC_P_DEACTIVATING) {
857 spin_unlock_irqrestore(&ch->lock, irq_flags);
858 return;
859 }
860 DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED |
861 XPC_C_OPENREQUEST)));
862 DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
863 XPC_C_OPENREPLY | XPC_C_CONNECTED));
864
865 /*
866 * The meaningful OPENREQUEST connection state fields are:
867 * msg_size = size of channel's messages in bytes
868 * local_nentries = remote partition's local_nentries
869 */
870 DBUG_ON(args->msg_size == 0);
871 DBUG_ON(args->local_nentries == 0);
872
873 ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
874 ch->remote_nentries = args->local_nentries;
875
876
877 if (ch->flags & XPC_C_OPENREQUEST) {
878 if (args->msg_size != ch->msg_size) {
879 XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes,
880 &irq_flags);
881 spin_unlock_irqrestore(&ch->lock, irq_flags);
882 return;
883 }
884 } else {
885 ch->msg_size = args->msg_size;
886
887 XPC_SET_REASON(ch, 0, 0);
888 ch->flags &= ~XPC_C_DISCONNECTED;
889
890 atomic_inc(&part->nchannels_active);
891 }
892
893 xpc_process_connect(ch, &irq_flags);
894 }
895
896
897 if (IPI_flags & XPC_IPI_OPENREPLY) {
898
899 dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, "
900 "local_nentries=%d, remote_nentries=%d) received from "
901 "partid=%d, channel=%d\n", args->local_msgqueue_pa,
902 args->local_nentries, args->remote_nentries,
903 ch->partid, ch->number);
904
905 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
906 spin_unlock_irqrestore(&ch->lock, irq_flags);
907 return;
908 }
909 DBUG_ON(!(ch->flags & XPC_C_OPENREQUEST));
910 DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
911 DBUG_ON(ch->flags & XPC_C_CONNECTED);
912
913 /*
914 * The meaningful OPENREPLY connection state fields are:
915 * local_msgqueue_pa = physical address of remote
916 * partition's local_msgqueue
917 * local_nentries = remote partition's local_nentries
918 * remote_nentries = remote partition's remote_nentries
919 */
920 DBUG_ON(args->local_msgqueue_pa == 0);
921 DBUG_ON(args->local_nentries == 0);
922 DBUG_ON(args->remote_nentries == 0);
923
924 ch->flags |= XPC_C_ROPENREPLY;
925 ch->remote_msgqueue_pa = args->local_msgqueue_pa;
926
927 if (args->local_nentries < ch->remote_nentries) {
928 dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
929 "remote_nentries=%d, old remote_nentries=%d, "
930 "partid=%d, channel=%d\n",
931 args->local_nentries, ch->remote_nentries,
932 ch->partid, ch->number);
933
934 ch->remote_nentries = args->local_nentries;
935 }
936 if (args->remote_nentries < ch->local_nentries) {
937 dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
938 "local_nentries=%d, old local_nentries=%d, "
939 "partid=%d, channel=%d\n",
940 args->remote_nentries, ch->local_nentries,
941 ch->partid, ch->number);
942
943 ch->local_nentries = args->remote_nentries;
944 }
945
946 xpc_process_connect(ch, &irq_flags);
947 }
948
949 spin_unlock_irqrestore(&ch->lock, irq_flags);
950}
951
952
953/*
954 * Attempt to establish a channel connection to a remote partition.
955 */
956static enum xpc_retval
957xpc_connect_channel(struct xpc_channel *ch)
958{
959 unsigned long irq_flags;
960 struct xpc_registration *registration = &xpc_registrations[ch->number];
961
962
963 if (down_interruptible(&registration->sema) != 0) {
964 return xpcInterrupted;
965 }
966
967 if (!XPC_CHANNEL_REGISTERED(ch->number)) {
968 up(&registration->sema);
969 return xpcUnregistered;
970 }
971
972 spin_lock_irqsave(&ch->lock, irq_flags);
973
974 DBUG_ON(ch->flags & XPC_C_CONNECTED);
975 DBUG_ON(ch->flags & XPC_C_OPENREQUEST);
976
977 if (ch->flags & XPC_C_DISCONNECTING) {
978 spin_unlock_irqrestore(&ch->lock, irq_flags);
979 up(&registration->sema);
980 return ch->reason;
981 }
982
983
984 /* add info from the channel connect registration to the channel */
985
986 ch->kthreads_assigned_limit = registration->assigned_limit;
987 ch->kthreads_idle_limit = registration->idle_limit;
988 DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
989 DBUG_ON(atomic_read(&ch->kthreads_idle) != 0);
990 DBUG_ON(atomic_read(&ch->kthreads_active) != 0);
991
992 ch->func = registration->func;
993 DBUG_ON(registration->func == NULL);
994 ch->key = registration->key;
995
996 ch->local_nentries = registration->nentries;
997
998 if (ch->flags & XPC_C_ROPENREQUEST) {
999 if (registration->msg_size != ch->msg_size) {
1000 /* the local and remote sides aren't the same */
1001
1002 /*
1003 * Because XPC_DISCONNECT_CHANNEL() can block we're
1004 * forced to up the registration sema before we unlock
1005 * the channel lock. But that's okay here because we're
1006 * done with the part that required the registration
1007 * sema. XPC_DISCONNECT_CHANNEL() requires that the
1008 * channel lock be locked and will unlock and relock
1009 * the channel lock as needed.
1010 */
1011 up(&registration->sema);
1012 XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes,
1013 &irq_flags);
1014 spin_unlock_irqrestore(&ch->lock, irq_flags);
1015 return xpcUnequalMsgSizes;
1016 }
1017 } else {
1018 ch->msg_size = registration->msg_size;
1019
1020 XPC_SET_REASON(ch, 0, 0);
1021 ch->flags &= ~XPC_C_DISCONNECTED;
1022
1023 atomic_inc(&xpc_partitions[ch->partid].nchannels_active);
1024 }
1025
1026 up(&registration->sema);
1027
1028
1029 /* initiate the connection */
1030
1031 ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
1032 xpc_IPI_send_openrequest(ch, &irq_flags);
1033
1034 xpc_process_connect(ch, &irq_flags);
1035
1036 spin_unlock_irqrestore(&ch->lock, irq_flags);
1037
1038 return xpcSuccess;
1039}
1040
1041
1042/*
1043 * Notify those who wanted to be notified upon delivery of their message.
1044 */
1045static void
1046xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
1047{
1048 struct xpc_notify *notify;
1049 u8 notify_type;
1050 s64 get = ch->w_remote_GP.get - 1;
1051
1052
1053 while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
1054
1055 notify = &ch->notify_queue[get % ch->local_nentries];
1056
1057 /*
1058 * See if the notify entry indicates it was associated with
1059 * a message who's sender wants to be notified. It is possible
1060 * that it is, but someone else is doing or has done the
1061 * notification.
1062 */
1063 notify_type = notify->type;
1064 if (notify_type == 0 ||
1065 cmpxchg(&notify->type, notify_type, 0) !=
1066 notify_type) {
1067 continue;
1068 }
1069
1070 DBUG_ON(notify_type != XPC_N_CALL);
1071
1072 atomic_dec(&ch->n_to_notify);
1073
1074 if (notify->func != NULL) {
1075 dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
1076 "msg_number=%ld, partid=%d, channel=%d\n",
1077 (void *) notify, get, ch->partid, ch->number);
1078
1079 notify->func(reason, ch->partid, ch->number,
1080 notify->key);
1081
1082 dev_dbg(xpc_chan, "notify->func() returned, "
1083 "notify=0x%p, msg_number=%ld, partid=%d, "
1084 "channel=%d\n", (void *) notify, get,
1085 ch->partid, ch->number);
1086 }
1087 }
1088}
1089
1090
1091/*
1092 * Clear some of the msg flags in the local message queue.
1093 */
1094static inline void
1095xpc_clear_local_msgqueue_flags(struct xpc_channel *ch)
1096{
1097 struct xpc_msg *msg;
1098 s64 get;
1099
1100
1101 get = ch->w_remote_GP.get;
1102 do {
1103 msg = (struct xpc_msg *) ((u64) ch->local_msgqueue +
1104 (get % ch->local_nentries) * ch->msg_size);
1105 msg->flags = 0;
1106 } while (++get < (volatile s64) ch->remote_GP.get);
1107}
1108
1109
1110/*
1111 * Clear some of the msg flags in the remote message queue.
1112 */
1113static inline void
1114xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch)
1115{
1116 struct xpc_msg *msg;
1117 s64 put;
1118
1119
1120 put = ch->w_remote_GP.put;
1121 do {
1122 msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue +
1123 (put % ch->remote_nentries) * ch->msg_size);
1124 msg->flags = 0;
1125 } while (++put < (volatile s64) ch->remote_GP.put);
1126}
1127
1128
1129static void
1130xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
1131{
1132 struct xpc_channel *ch = &part->channels[ch_number];
1133 int nmsgs_sent;
1134
1135
1136 ch->remote_GP = part->remote_GPs[ch_number];
1137
1138
1139 /* See what, if anything, has changed for each connected channel */
1140
1141 xpc_msgqueue_ref(ch);
1142
1143 if (ch->w_remote_GP.get == ch->remote_GP.get &&
1144 ch->w_remote_GP.put == ch->remote_GP.put) {
1145 /* nothing changed since GPs were last pulled */
1146 xpc_msgqueue_deref(ch);
1147 return;
1148 }
1149
1150 if (!(ch->flags & XPC_C_CONNECTED)){
1151 xpc_msgqueue_deref(ch);
1152 return;
1153 }
1154
1155
1156 /*
1157 * First check to see if messages recently sent by us have been
1158 * received by the other side. (The remote GET value will have
1159 * changed since we last looked at it.)
1160 */
1161
1162 if (ch->w_remote_GP.get != ch->remote_GP.get) {
1163
1164 /*
1165 * We need to notify any senders that want to be notified
1166 * that their sent messages have been received by their
1167 * intended recipients. We need to do this before updating
1168 * w_remote_GP.get so that we don't allocate the same message
1169 * queue entries prematurely (see xpc_allocate_msg()).
1170 */
1171 if (atomic_read(&ch->n_to_notify) > 0) {
1172 /*
1173 * Notify senders that messages sent have been
1174 * received and delivered by the other side.
1175 */
1176 xpc_notify_senders(ch, xpcMsgDelivered,
1177 ch->remote_GP.get);
1178 }
1179
1180 /*
1181 * Clear msg->flags in previously sent messages, so that
1182 * they're ready for xpc_allocate_msg().
1183 */
1184 xpc_clear_local_msgqueue_flags(ch);
1185
1186 (volatile s64) ch->w_remote_GP.get = ch->remote_GP.get;
1187
1188 dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
1189 "channel=%d\n", ch->w_remote_GP.get, ch->partid,
1190 ch->number);
1191
1192 /*
1193 * If anyone was waiting for message queue entries to become
1194 * available, wake them up.
1195 */
1196 if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) {
1197 wake_up(&ch->msg_allocate_wq);
1198 }
1199 }
1200
1201
1202 /*
1203 * Now check for newly sent messages by the other side. (The remote
1204 * PUT value will have changed since we last looked at it.)
1205 */
1206
1207 if (ch->w_remote_GP.put != ch->remote_GP.put) {
1208 /*
1209 * Clear msg->flags in previously received messages, so that
1210 * they're ready for xpc_get_deliverable_msg().
1211 */
1212 xpc_clear_remote_msgqueue_flags(ch);
1213
1214 (volatile s64) ch->w_remote_GP.put = ch->remote_GP.put;
1215
1216 dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
1217 "channel=%d\n", ch->w_remote_GP.put, ch->partid,
1218 ch->number);
1219
1220 nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get;
1221 if (nmsgs_sent > 0) {
1222 dev_dbg(xpc_chan, "msgs waiting to be copied and "
1223 "delivered=%d, partid=%d, channel=%d\n",
1224 nmsgs_sent, ch->partid, ch->number);
1225
1226 if (ch->flags & XPC_C_CONNECTCALLOUT) {
1227 xpc_activate_kthreads(ch, nmsgs_sent);
1228 }
1229 }
1230 }
1231
1232 xpc_msgqueue_deref(ch);
1233}
1234
1235
1236void
1237xpc_process_channel_activity(struct xpc_partition *part)
1238{
1239 unsigned long irq_flags;
1240 u64 IPI_amo, IPI_flags;
1241 struct xpc_channel *ch;
1242 int ch_number;
1243
1244
1245 IPI_amo = xpc_get_IPI_flags(part);
1246
1247 /*
1248 * Initiate channel connections for registered channels.
1249 *
1250 * For each connected channel that has pending messages activate idle
1251 * kthreads and/or create new kthreads as needed.
1252 */
1253
1254 for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
1255 ch = &part->channels[ch_number];
1256
1257
1258 /*
1259 * Process any open or close related IPI flags, and then deal
1260 * with connecting or disconnecting the channel as required.
1261 */
1262
1263 IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number);
1264
1265 if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags)) {
1266 xpc_process_openclose_IPI(part, ch_number, IPI_flags);
1267 }
1268
1269
1270 if (ch->flags & XPC_C_DISCONNECTING) {
1271 spin_lock_irqsave(&ch->lock, irq_flags);
1272 xpc_process_disconnect(ch, &irq_flags);
1273 spin_unlock_irqrestore(&ch->lock, irq_flags);
1274 continue;
1275 }
1276
1277 if (part->act_state == XPC_P_DEACTIVATING) {
1278 continue;
1279 }
1280
1281 if (!(ch->flags & XPC_C_CONNECTED)) {
1282 if (!(ch->flags & XPC_C_OPENREQUEST)) {
1283 DBUG_ON(ch->flags & XPC_C_SETUP);
1284 (void) xpc_connect_channel(ch);
1285 } else {
1286 spin_lock_irqsave(&ch->lock, irq_flags);
1287 xpc_process_connect(ch, &irq_flags);
1288 spin_unlock_irqrestore(&ch->lock, irq_flags);
1289 }
1290 continue;
1291 }
1292
1293
1294 /*
1295 * Process any message related IPI flags, this may involve the
1296 * activation of kthreads to deliver any pending messages sent
1297 * from the other partition.
1298 */
1299
1300 if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags)) {
1301 xpc_process_msg_IPI(part, ch_number);
1302 }
1303 }
1304}
1305
1306
1307/*
1308 * XPC's heartbeat code calls this function to inform XPC that a partition has
1309 * gone down. XPC responds by tearing down the XPartition Communication
1310 * infrastructure used for the just downed partition.
1311 *
1312 * XPC's heartbeat code will never call this function and xpc_partition_up()
1313 * at the same time. Nor will it ever make multiple calls to either function
1314 * at the same time.
1315 */
1316void
1317xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason)
1318{
1319 unsigned long irq_flags;
1320 int ch_number;
1321 struct xpc_channel *ch;
1322
1323
1324 dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n",
1325 XPC_PARTID(part), reason);
1326
1327 if (!xpc_part_ref(part)) {
1328 /* infrastructure for this partition isn't currently set up */
1329 return;
1330 }
1331
1332
1333 /* disconnect all channels associated with the downed partition */
1334
1335 for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
1336 ch = &part->channels[ch_number];
1337
1338
1339 xpc_msgqueue_ref(ch);
1340 spin_lock_irqsave(&ch->lock, irq_flags);
1341
1342 XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
1343
1344 spin_unlock_irqrestore(&ch->lock, irq_flags);
1345 xpc_msgqueue_deref(ch);
1346 }
1347
1348 xpc_wakeup_channel_mgr(part);
1349
1350 xpc_part_deref(part);
1351}
1352
1353
1354/*
1355 * Teardown the infrastructure necessary to support XPartition Communication
1356 * between the specified remote partition and the local one.
1357 */
1358void
1359xpc_teardown_infrastructure(struct xpc_partition *part)
1360{
1361 partid_t partid = XPC_PARTID(part);
1362
1363
1364 /*
1365 * We start off by making this partition inaccessible to local
1366 * processes by marking it as no longer setup. Then we make it
1367 * inaccessible to remote processes by clearing the XPC per partition
1368 * specific variable's magic # (which indicates that these variables
1369 * are no longer valid) and by ignoring all XPC notify IPIs sent to
1370 * this partition.
1371 */
1372
1373 DBUG_ON(atomic_read(&part->nchannels_active) != 0);
1374 DBUG_ON(part->setup_state != XPC_P_SETUP);
1375 part->setup_state = XPC_P_WTEARDOWN;
1376
1377 xpc_vars_part[partid].magic = 0;
1378
1379
1380 free_irq(SGI_XPC_NOTIFY, (void *) (u64) partid);
1381
1382
1383 /*
1384 * Before proceding with the teardown we have to wait until all
1385 * existing references cease.
1386 */
1387 wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
1388
1389
1390 /* now we can begin tearing down the infrastructure */
1391
1392 part->setup_state = XPC_P_TORNDOWN;
1393
1394 /* in case we've still got outstanding timers registered... */
1395 del_timer_sync(&part->dropped_IPI_timer);
1396
1397 kfree(part->remote_openclose_args_base);
1398 part->remote_openclose_args = NULL;
1399 kfree(part->local_openclose_args_base);
1400 part->local_openclose_args = NULL;
1401 kfree(part->remote_GPs_base);
1402 part->remote_GPs = NULL;
1403 kfree(part->local_GPs_base);
1404 part->local_GPs = NULL;
1405 kfree(part->channels);
1406 part->channels = NULL;
1407 part->local_IPI_amo_va = NULL;
1408}
1409
1410
1411/*
1412 * Called by XP at the time of channel connection registration to cause
1413 * XPC to establish connections to all currently active partitions.
1414 */
1415void
1416xpc_initiate_connect(int ch_number)
1417{
1418 partid_t partid;
1419 struct xpc_partition *part;
1420 struct xpc_channel *ch;
1421
1422
1423 DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
1424
1425 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1426 part = &xpc_partitions[partid];
1427
1428 if (xpc_part_ref(part)) {
1429 ch = &part->channels[ch_number];
1430
1431 if (!(ch->flags & XPC_C_DISCONNECTING)) {
1432 DBUG_ON(ch->flags & XPC_C_OPENREQUEST);
1433 DBUG_ON(ch->flags & XPC_C_CONNECTED);
1434 DBUG_ON(ch->flags & XPC_C_SETUP);
1435
1436 /*
1437 * Initiate the establishment of a connection
1438 * on the newly registered channel to the
1439 * remote partition.
1440 */
1441 xpc_wakeup_channel_mgr(part);
1442 }
1443
1444 xpc_part_deref(part);
1445 }
1446 }
1447}
1448
1449
1450void
1451xpc_connected_callout(struct xpc_channel *ch)
1452{
1453 unsigned long irq_flags;
1454
1455
1456 /* let the registerer know that a connection has been established */
1457
1458 if (ch->func != NULL) {
1459 dev_dbg(xpc_chan, "ch->func() called, reason=xpcConnected, "
1460 "partid=%d, channel=%d\n", ch->partid, ch->number);
1461
1462 ch->func(xpcConnected, ch->partid, ch->number,
1463 (void *) (u64) ch->local_nentries, ch->key);
1464
1465 dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, "
1466 "partid=%d, channel=%d\n", ch->partid, ch->number);
1467 }
1468
1469 spin_lock_irqsave(&ch->lock, irq_flags);
1470 ch->flags |= XPC_C_CONNECTCALLOUT;
1471 spin_unlock_irqrestore(&ch->lock, irq_flags);
1472}
1473
1474
1475/*
1476 * Called by XP at the time of channel connection unregistration to cause
1477 * XPC to teardown all current connections for the specified channel.
1478 *
1479 * Before returning xpc_initiate_disconnect() will wait until all connections
1480 * on the specified channel have been closed/torndown. So the caller can be
1481 * assured that they will not be receiving any more callouts from XPC to the
1482 * function they registered via xpc_connect().
1483 *
1484 * Arguments:
1485 *
1486 * ch_number - channel # to unregister.
1487 */
1488void
1489xpc_initiate_disconnect(int ch_number)
1490{
1491 unsigned long irq_flags;
1492 partid_t partid;
1493 struct xpc_partition *part;
1494 struct xpc_channel *ch;
1495
1496
1497 DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
1498
1499 /* initiate the channel disconnect for every active partition */
1500 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1501 part = &xpc_partitions[partid];
1502
1503 if (xpc_part_ref(part)) {
1504 ch = &part->channels[ch_number];
1505 xpc_msgqueue_ref(ch);
1506
1507 spin_lock_irqsave(&ch->lock, irq_flags);
1508
1509 XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering,
1510 &irq_flags);
1511
1512 spin_unlock_irqrestore(&ch->lock, irq_flags);
1513
1514 xpc_msgqueue_deref(ch);
1515 xpc_part_deref(part);
1516 }
1517 }
1518
1519 xpc_disconnect_wait(ch_number);
1520}
1521
1522
1523/*
1524 * To disconnect a channel, and reflect it back to all who may be waiting.
1525 *
1526 * >>> An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
1527 * >>> xpc_free_msgqueues().
1528 *
1529 * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
1530 */
1531void
1532xpc_disconnect_channel(const int line, struct xpc_channel *ch,
1533 enum xpc_retval reason, unsigned long *irq_flags)
1534{
1535 u32 flags;
1536
1537
1538 DBUG_ON(!spin_is_locked(&ch->lock));
1539
1540 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
1541 return;
1542 }
1543 DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED)));
1544
1545 dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n",
1546 reason, line, ch->partid, ch->number);
1547
1548 XPC_SET_REASON(ch, reason, line);
1549
1550 flags = ch->flags;
1551 /* some of these may not have been set */
1552 ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
1553 XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
1554 XPC_C_CONNECTING | XPC_C_CONNECTED);
1555
1556 ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
1557 xpc_IPI_send_closerequest(ch, irq_flags);
1558
1559 if (flags & XPC_C_CONNECTED) {
1560 ch->flags |= XPC_C_WASCONNECTED;
1561 }
1562
1563 if (atomic_read(&ch->kthreads_idle) > 0) {
1564 /* wake all idle kthreads so they can exit */
1565 wake_up_all(&ch->idle_wq);
1566 }
1567
1568 spin_unlock_irqrestore(&ch->lock, *irq_flags);
1569
1570
1571 /* wake those waiting to allocate an entry from the local msg queue */
1572
1573 if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) {
1574 wake_up(&ch->msg_allocate_wq);
1575 }
1576
1577 /* wake those waiting for notify completion */
1578
1579 if (atomic_read(&ch->n_to_notify) > 0) {
1580 xpc_notify_senders(ch, reason, ch->w_local_GP.put);
1581 }
1582
1583 spin_lock_irqsave(&ch->lock, *irq_flags);
1584}
1585
1586
1587void
1588xpc_disconnected_callout(struct xpc_channel *ch)
1589{
1590 /*
1591 * Let the channel's registerer know that the channel is now
1592 * disconnected. We don't want to do this if the registerer was never
1593 * informed of a connection being made, unless the disconnect was for
1594 * abnormal reasons.
1595 */
1596
1597 if (ch->func != NULL) {
1598 dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
1599 "channel=%d\n", ch->reason, ch->partid, ch->number);
1600
1601 ch->func(ch->reason, ch->partid, ch->number, NULL, ch->key);
1602
1603 dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
1604 "channel=%d\n", ch->reason, ch->partid, ch->number);
1605 }
1606}
1607
1608
1609/*
1610 * Wait for a message entry to become available for the specified channel,
1611 * but don't wait any longer than 1 jiffy.
1612 */
1613static enum xpc_retval
1614xpc_allocate_msg_wait(struct xpc_channel *ch)
1615{
1616 enum xpc_retval ret;
1617
1618
1619 if (ch->flags & XPC_C_DISCONNECTING) {
1620 DBUG_ON(ch->reason == xpcInterrupted); // >>> Is this true?
1621 return ch->reason;
1622 }
1623
1624 atomic_inc(&ch->n_on_msg_allocate_wq);
1625 ret = interruptible_sleep_on_timeout(&ch->msg_allocate_wq, 1);
1626 atomic_dec(&ch->n_on_msg_allocate_wq);
1627
1628 if (ch->flags & XPC_C_DISCONNECTING) {
1629 ret = ch->reason;
1630 DBUG_ON(ch->reason == xpcInterrupted); // >>> Is this true?
1631 } else if (ret == 0) {
1632 ret = xpcTimeout;
1633 } else {
1634 ret = xpcInterrupted;
1635 }
1636
1637 return ret;
1638}
1639
1640
1641/*
1642 * Allocate an entry for a message from the message queue associated with the
1643 * specified channel.
1644 */
1645static enum xpc_retval
1646xpc_allocate_msg(struct xpc_channel *ch, u32 flags,
1647 struct xpc_msg **address_of_msg)
1648{
1649 struct xpc_msg *msg;
1650 enum xpc_retval ret;
1651 s64 put;
1652
1653
1654 /* this reference will be dropped in xpc_send_msg() */
1655 xpc_msgqueue_ref(ch);
1656
1657 if (ch->flags & XPC_C_DISCONNECTING) {
1658 xpc_msgqueue_deref(ch);
1659 return ch->reason;
1660 }
1661 if (!(ch->flags & XPC_C_CONNECTED)) {
1662 xpc_msgqueue_deref(ch);
1663 return xpcNotConnected;
1664 }
1665
1666
1667 /*
1668 * Get the next available message entry from the local message queue.
1669 * If none are available, we'll make sure that we grab the latest
1670 * GP values.
1671 */
1672 ret = xpcTimeout;
1673
1674 while (1) {
1675
1676 put = (volatile s64) ch->w_local_GP.put;
1677 if (put - (volatile s64) ch->w_remote_GP.get <
1678 ch->local_nentries) {
1679
1680 /* There are available message entries. We need to try
1681 * to secure one for ourselves. We'll do this by trying
1682 * to increment w_local_GP.put as long as someone else
1683 * doesn't beat us to it. If they do, we'll have to
1684 * try again.
1685 */
1686 if (cmpxchg(&ch->w_local_GP.put, put, put + 1) ==
1687 put) {
1688 /* we got the entry referenced by put */
1689 break;
1690 }
1691 continue; /* try again */
1692 }
1693
1694
1695 /*
1696 * There aren't any available msg entries at this time.
1697 *
1698 * In waiting for a message entry to become available,
1699 * we set a timeout in case the other side is not
1700 * sending completion IPIs. This lets us fake an IPI
1701 * that will cause the IPI handler to fetch the latest
1702 * GP values as if an IPI was sent by the other side.
1703 */
1704 if (ret == xpcTimeout) {
1705 xpc_IPI_send_local_msgrequest(ch);
1706 }
1707
1708 if (flags & XPC_NOWAIT) {
1709 xpc_msgqueue_deref(ch);
1710 return xpcNoWait;
1711 }
1712
1713 ret = xpc_allocate_msg_wait(ch);
1714 if (ret != xpcInterrupted && ret != xpcTimeout) {
1715 xpc_msgqueue_deref(ch);
1716 return ret;
1717 }
1718 }
1719
1720
1721 /* get the message's address and initialize it */
1722 msg = (struct xpc_msg *) ((u64) ch->local_msgqueue +
1723 (put % ch->local_nentries) * ch->msg_size);
1724
1725
1726 DBUG_ON(msg->flags != 0);
1727 msg->number = put;
1728
1729 dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, "
1730 "msg_number=%ld, partid=%d, channel=%d\n", put + 1,
1731 (void *) msg, msg->number, ch->partid, ch->number);
1732
1733 *address_of_msg = msg;
1734
1735 return xpcSuccess;
1736}
1737
1738
1739/*
1740 * Allocate an entry for a message from the message queue associated with the
1741 * specified channel. NOTE that this routine can sleep waiting for a message
1742 * entry to become available. To not sleep, pass in the XPC_NOWAIT flag.
1743 *
1744 * Arguments:
1745 *
1746 * partid - ID of partition to which the channel is connected.
1747 * ch_number - channel #.
1748 * flags - see xpc.h for valid flags.
1749 * payload - address of the allocated payload area pointer (filled in on
1750 * return) in which the user-defined message is constructed.
1751 */
1752enum xpc_retval
1753xpc_initiate_allocate(partid_t partid, int ch_number, u32 flags, void **payload)
1754{
1755 struct xpc_partition *part = &xpc_partitions[partid];
1756 enum xpc_retval ret = xpcUnknownReason;
1757 struct xpc_msg *msg;
1758
1759
1760 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
1761 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
1762
1763 *payload = NULL;
1764
1765 if (xpc_part_ref(part)) {
1766 ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg);
1767 xpc_part_deref(part);
1768
1769 if (msg != NULL) {
1770 *payload = &msg->payload;
1771 }
1772 }
1773
1774 return ret;
1775}
1776
1777
1778/*
1779 * Now we actually send the messages that are ready to be sent by advancing
1780 * the local message queue's Put value and then send an IPI to the recipient
1781 * partition.
1782 */
1783static void
1784xpc_send_msgs(struct xpc_channel *ch, s64 initial_put)
1785{
1786 struct xpc_msg *msg;
1787 s64 put = initial_put + 1;
1788 int send_IPI = 0;
1789
1790
1791 while (1) {
1792
1793 while (1) {
1794 if (put == (volatile s64) ch->w_local_GP.put) {
1795 break;
1796 }
1797
1798 msg = (struct xpc_msg *) ((u64) ch->local_msgqueue +
1799 (put % ch->local_nentries) * ch->msg_size);
1800
1801 if (!(msg->flags & XPC_M_READY)) {
1802 break;
1803 }
1804
1805 put++;
1806 }
1807
1808 if (put == initial_put) {
1809 /* nothing's changed */
1810 break;
1811 }
1812
1813 if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) !=
1814 initial_put) {
1815 /* someone else beat us to it */
1816 DBUG_ON((volatile s64) ch->local_GP->put < initial_put);
1817 break;
1818 }
1819
1820 /* we just set the new value of local_GP->put */
1821
1822 dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
1823 "channel=%d\n", put, ch->partid, ch->number);
1824
1825 send_IPI = 1;
1826
1827 /*
1828 * We need to ensure that the message referenced by
1829 * local_GP->put is not XPC_M_READY or that local_GP->put
1830 * equals w_local_GP.put, so we'll go have a look.
1831 */
1832 initial_put = put;
1833 }
1834
1835 if (send_IPI) {
1836 xpc_IPI_send_msgrequest(ch);
1837 }
1838}
1839
1840
1841/*
1842 * Common code that does the actual sending of the message by advancing the
1843 * local message queue's Put value and sends an IPI to the partition the
1844 * message is being sent to.
1845 */
1846static enum xpc_retval
1847xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
1848 xpc_notify_func func, void *key)
1849{
1850 enum xpc_retval ret = xpcSuccess;
1851 struct xpc_notify *notify = NULL; // >>> to keep the compiler happy!!
1852 s64 put, msg_number = msg->number;
1853
1854
1855 DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
1856 DBUG_ON((((u64) msg - (u64) ch->local_msgqueue) / ch->msg_size) !=
1857 msg_number % ch->local_nentries);
1858 DBUG_ON(msg->flags & XPC_M_READY);
1859
1860 if (ch->flags & XPC_C_DISCONNECTING) {
1861 /* drop the reference grabbed in xpc_allocate_msg() */
1862 xpc_msgqueue_deref(ch);
1863 return ch->reason;
1864 }
1865
1866 if (notify_type != 0) {
1867 /*
1868 * Tell the remote side to send an ACK interrupt when the
1869 * message has been delivered.
1870 */
1871 msg->flags |= XPC_M_INTERRUPT;
1872
1873 atomic_inc(&ch->n_to_notify);
1874
1875 notify = &ch->notify_queue[msg_number % ch->local_nentries];
1876 notify->func = func;
1877 notify->key = key;
1878 (volatile u8) notify->type = notify_type;
1879
1880 // >>> is a mb() needed here?
1881
1882 if (ch->flags & XPC_C_DISCONNECTING) {
1883 /*
1884 * An error occurred between our last error check and
1885 * this one. We will try to clear the type field from
1886 * the notify entry. If we succeed then
1887 * xpc_disconnect_channel() didn't already process
1888 * the notify entry.
1889 */
1890 if (cmpxchg(&notify->type, notify_type, 0) ==
1891 notify_type) {
1892 atomic_dec(&ch->n_to_notify);
1893 ret = ch->reason;
1894 }
1895
1896 /* drop the reference grabbed in xpc_allocate_msg() */
1897 xpc_msgqueue_deref(ch);
1898 return ret;
1899 }
1900 }
1901
1902 msg->flags |= XPC_M_READY;
1903
1904 /*
1905 * The preceding store of msg->flags must occur before the following
1906 * load of ch->local_GP->put.
1907 */
1908 mb();
1909
1910 /* see if the message is next in line to be sent, if so send it */
1911
1912 put = ch->local_GP->put;
1913 if (put == msg_number) {
1914 xpc_send_msgs(ch, put);
1915 }
1916
1917 /* drop the reference grabbed in xpc_allocate_msg() */
1918 xpc_msgqueue_deref(ch);
1919 return ret;
1920}
1921
1922
1923/*
1924 * Send a message previously allocated using xpc_initiate_allocate() on the
1925 * specified channel connected to the specified partition.
1926 *
1927 * This routine will not wait for the message to be received, nor will
1928 * notification be given when it does happen. Once this routine has returned
1929 * the message entry allocated via xpc_initiate_allocate() is no longer
1930 * accessable to the caller.
1931 *
1932 * This routine, although called by users, does not call xpc_part_ref() to
1933 * ensure that the partition infrastructure is in place. It relies on the
1934 * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
1935 *
1936 * Arguments:
1937 *
1938 * partid - ID of partition to which the channel is connected.
1939 * ch_number - channel # to send message on.
1940 * payload - pointer to the payload area allocated via
1941 * xpc_initiate_allocate().
1942 */
1943enum xpc_retval
1944xpc_initiate_send(partid_t partid, int ch_number, void *payload)
1945{
1946 struct xpc_partition *part = &xpc_partitions[partid];
1947 struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
1948 enum xpc_retval ret;
1949
1950
1951 dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg,
1952 partid, ch_number);
1953
1954 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
1955 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
1956 DBUG_ON(msg == NULL);
1957
1958 ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL);
1959
1960 return ret;
1961}
1962
1963
1964/*
1965 * Send a message previously allocated using xpc_initiate_allocate on the
1966 * specified channel connected to the specified partition.
1967 *
1968 * This routine will not wait for the message to be sent. Once this routine
1969 * has returned the message entry allocated via xpc_initiate_allocate() is no
1970 * longer accessable to the caller.
1971 *
1972 * Once the remote end of the channel has received the message, the function
1973 * passed as an argument to xpc_initiate_send_notify() will be called. This
1974 * allows the sender to free up or re-use any buffers referenced by the
1975 * message, but does NOT mean the message has been processed at the remote
1976 * end by a receiver.
1977 *
1978 * If this routine returns an error, the caller's function will NOT be called.
1979 *
1980 * This routine, although called by users, does not call xpc_part_ref() to
1981 * ensure that the partition infrastructure is in place. It relies on the
1982 * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
1983 *
1984 * Arguments:
1985 *
1986 * partid - ID of partition to which the channel is connected.
1987 * ch_number - channel # to send message on.
1988 * payload - pointer to the payload area allocated via
1989 * xpc_initiate_allocate().
1990 * func - function to call with asynchronous notification of message
1991 * receipt. THIS FUNCTION MUST BE NON-BLOCKING.
1992 * key - user-defined key to be passed to the function when it's called.
1993 */
1994enum xpc_retval
1995xpc_initiate_send_notify(partid_t partid, int ch_number, void *payload,
1996 xpc_notify_func func, void *key)
1997{
1998 struct xpc_partition *part = &xpc_partitions[partid];
1999 struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
2000 enum xpc_retval ret;
2001
2002
2003 dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg,
2004 partid, ch_number);
2005
2006 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
2007 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
2008 DBUG_ON(msg == NULL);
2009 DBUG_ON(func == NULL);
2010
2011 ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL,
2012 func, key);
2013 return ret;
2014}
2015
2016
2017static struct xpc_msg *
2018xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
2019{
2020 struct xpc_partition *part = &xpc_partitions[ch->partid];
2021 struct xpc_msg *remote_msg, *msg;
2022 u32 msg_index, nmsgs;
2023 u64 msg_offset;
2024 enum xpc_retval ret;
2025
2026
2027 if (down_interruptible(&ch->msg_to_pull_sema) != 0) {
2028 /* we were interrupted by a signal */
2029 return NULL;
2030 }
2031
2032 while (get >= ch->next_msg_to_pull) {
2033
2034 /* pull as many messages as are ready and able to be pulled */
2035
2036 msg_index = ch->next_msg_to_pull % ch->remote_nentries;
2037
2038 DBUG_ON(ch->next_msg_to_pull >=
2039 (volatile s64) ch->w_remote_GP.put);
2040 nmsgs = (volatile s64) ch->w_remote_GP.put -
2041 ch->next_msg_to_pull;
2042 if (msg_index + nmsgs > ch->remote_nentries) {
2043 /* ignore the ones that wrap the msg queue for now */
2044 nmsgs = ch->remote_nentries - msg_index;
2045 }
2046
2047 msg_offset = msg_index * ch->msg_size;
2048 msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue +
2049 msg_offset);
2050 remote_msg = (struct xpc_msg *) (ch->remote_msgqueue_pa +
2051 msg_offset);
2052
2053 if ((ret = xpc_pull_remote_cachelines(part, msg, remote_msg,
2054 nmsgs * ch->msg_size)) != xpcSuccess) {
2055
2056 dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
2057 " msg %ld from partition %d, channel=%d, "
2058 "ret=%d\n", nmsgs, ch->next_msg_to_pull,
2059 ch->partid, ch->number, ret);
2060
2061 XPC_DEACTIVATE_PARTITION(part, ret);
2062
2063 up(&ch->msg_to_pull_sema);
2064 return NULL;
2065 }
2066
2067 mb(); /* >>> this may not be needed, we're not sure */
2068
2069 ch->next_msg_to_pull += nmsgs;
2070 }
2071
2072 up(&ch->msg_to_pull_sema);
2073
2074 /* return the message we were looking for */
2075 msg_offset = (get % ch->remote_nentries) * ch->msg_size;
2076 msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + msg_offset);
2077
2078 return msg;
2079}
2080
2081
2082/*
2083 * Get a message to be delivered.
2084 */
2085static struct xpc_msg *
2086xpc_get_deliverable_msg(struct xpc_channel *ch)
2087{
2088 struct xpc_msg *msg = NULL;
2089 s64 get;
2090
2091
2092 do {
2093 if ((volatile u32) ch->flags & XPC_C_DISCONNECTING) {
2094 break;
2095 }
2096
2097 get = (volatile s64) ch->w_local_GP.get;
2098 if (get == (volatile s64) ch->w_remote_GP.put) {
2099 break;
2100 }
2101
2102 /* There are messages waiting to be pulled and delivered.
2103 * We need to try to secure one for ourselves. We'll do this
2104 * by trying to increment w_local_GP.get and hope that no one
2105 * else beats us to it. If they do, we'll we'll simply have
2106 * to try again for the next one.
2107 */
2108
2109 if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) {
2110 /* we got the entry referenced by get */
2111
2112 dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
2113 "partid=%d, channel=%d\n", get + 1,
2114 ch->partid, ch->number);
2115
2116 /* pull the message from the remote partition */
2117
2118 msg = xpc_pull_remote_msg(ch, get);
2119
2120 DBUG_ON(msg != NULL && msg->number != get);
2121 DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE));
2122 DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY));
2123
2124 break;
2125 }
2126
2127 } while (1);
2128
2129 return msg;
2130}
2131
2132
2133/*
2134 * Deliver a message to its intended recipient.
2135 */
2136void
2137xpc_deliver_msg(struct xpc_channel *ch)
2138{
2139 struct xpc_msg *msg;
2140
2141
2142 if ((msg = xpc_get_deliverable_msg(ch)) != NULL) {
2143
2144 /*
2145 * This ref is taken to protect the payload itself from being
2146 * freed before the user is finished with it, which the user
2147 * indicates by calling xpc_initiate_received().
2148 */
2149 xpc_msgqueue_ref(ch);
2150
2151 atomic_inc(&ch->kthreads_active);
2152
2153 if (ch->func != NULL) {
2154 dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, "
2155 "msg_number=%ld, partid=%d, channel=%d\n",
2156 (void *) msg, msg->number, ch->partid,
2157 ch->number);
2158
2159 /* deliver the message to its intended recipient */
2160 ch->func(xpcMsgReceived, ch->partid, ch->number,
2161 &msg->payload, ch->key);
2162
2163 dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, "
2164 "msg_number=%ld, partid=%d, channel=%d\n",
2165 (void *) msg, msg->number, ch->partid,
2166 ch->number);
2167 }
2168
2169 atomic_dec(&ch->kthreads_active);
2170 }
2171}
2172
2173
2174/*
2175 * Now we actually acknowledge the messages that have been delivered and ack'd
2176 * by advancing the cached remote message queue's Get value and if requested
2177 * send an IPI to the message sender's partition.
2178 */
2179static void
2180xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
2181{
2182 struct xpc_msg *msg;
2183 s64 get = initial_get + 1;
2184 int send_IPI = 0;
2185
2186
2187 while (1) {
2188
2189 while (1) {
2190 if (get == (volatile s64) ch->w_local_GP.get) {
2191 break;
2192 }
2193
2194 msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue +
2195 (get % ch->remote_nentries) * ch->msg_size);
2196
2197 if (!(msg->flags & XPC_M_DONE)) {
2198 break;
2199 }
2200
2201 msg_flags |= msg->flags;
2202 get++;
2203 }
2204
2205 if (get == initial_get) {
2206 /* nothing's changed */
2207 break;
2208 }
2209
2210 if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) !=
2211 initial_get) {
2212 /* someone else beat us to it */
2213 DBUG_ON((volatile s64) ch->local_GP->get <=
2214 initial_get);
2215 break;
2216 }
2217
2218 /* we just set the new value of local_GP->get */
2219
2220 dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
2221 "channel=%d\n", get, ch->partid, ch->number);
2222
2223 send_IPI = (msg_flags & XPC_M_INTERRUPT);
2224
2225 /*
2226 * We need to ensure that the message referenced by
2227 * local_GP->get is not XPC_M_DONE or that local_GP->get
2228 * equals w_local_GP.get, so we'll go have a look.
2229 */
2230 initial_get = get;
2231 }
2232
2233 if (send_IPI) {
2234 xpc_IPI_send_msgrequest(ch);
2235 }
2236}
2237
2238
2239/*
2240 * Acknowledge receipt of a delivered message.
2241 *
2242 * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition
2243 * that sent the message.
2244 *
2245 * This function, although called by users, does not call xpc_part_ref() to
2246 * ensure that the partition infrastructure is in place. It relies on the
2247 * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg().
2248 *
2249 * Arguments:
2250 *
2251 * partid - ID of partition to which the channel is connected.
2252 * ch_number - channel # message received on.
2253 * payload - pointer to the payload area allocated via
2254 * xpc_initiate_allocate().
2255 */
2256void
2257xpc_initiate_received(partid_t partid, int ch_number, void *payload)
2258{
2259 struct xpc_partition *part = &xpc_partitions[partid];
2260 struct xpc_channel *ch;
2261 struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
2262 s64 get, msg_number = msg->number;
2263
2264
2265 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
2266 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
2267
2268 ch = &part->channels[ch_number];
2269
2270 dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n",
2271 (void *) msg, msg_number, ch->partid, ch->number);
2272
2273 DBUG_ON((((u64) msg - (u64) ch->remote_msgqueue) / ch->msg_size) !=
2274 msg_number % ch->remote_nentries);
2275 DBUG_ON(msg->flags & XPC_M_DONE);
2276
2277 msg->flags |= XPC_M_DONE;
2278
2279 /*
2280 * The preceding store of msg->flags must occur before the following
2281 * load of ch->local_GP->get.
2282 */
2283 mb();
2284
2285 /*
2286 * See if this message is next in line to be acknowledged as having
2287 * been delivered.
2288 */
2289 get = ch->local_GP->get;
2290 if (get == msg_number) {
2291 xpc_acknowledge_msgs(ch, get, msg->flags);
2292 }
2293
2294 /* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg() */
2295 xpc_msgqueue_deref(ch);
2296}
2297
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
new file mode 100644
index 000000000000..177ddb748ebe
--- /dev/null
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -0,0 +1,1064 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9
10/*
11 * Cross Partition Communication (XPC) support - standard version.
12 *
13 * XPC provides a message passing capability that crosses partition
14 * boundaries. This module is made up of two parts:
15 *
16 * partition This part detects the presence/absence of other
17 * partitions. It provides a heartbeat and monitors
18 * the heartbeats of other partitions.
19 *
20 * channel This part manages the channels and sends/receives
21 * messages across them to/from other partitions.
22 *
23 * There are a couple of additional functions residing in XP, which
24 * provide an interface to XPC for its users.
25 *
26 *
27 * Caveats:
28 *
29 * . We currently have no way to determine which nasid an IPI came
30 * from. Thus, xpc_IPI_send() does a remote AMO write followed by
31 * an IPI. The AMO indicates where data is to be pulled from, so
32 * after the IPI arrives, the remote partition checks the AMO word.
33 * The IPI can actually arrive before the AMO however, so other code
34 * must periodically check for this case. Also, remote AMO operations
35 * do not reliably time out. Thus we do a remote PIO read solely to
36 * know whether the remote partition is down and whether we should
37 * stop sending IPIs to it. This remote PIO read operation is set up
38 * in a special nofault region so SAL knows to ignore (and cleanup)
39 * any errors due to the remote AMO write, PIO read, and/or PIO
40 * write operations.
41 *
42 * If/when new hardware solves this IPI problem, we should abandon
43 * the current approach.
44 *
45 */
46
47
48#include <linux/kernel.h>
49#include <linux/module.h>
50#include <linux/init.h>
51#include <linux/sched.h>
52#include <linux/syscalls.h>
53#include <linux/cache.h>
54#include <linux/interrupt.h>
55#include <linux/slab.h>
56#include <asm/sn/intr.h>
57#include <asm/sn/sn_sal.h>
58#include <asm/uaccess.h>
59#include "xpc.h"
60
61
62/* define two XPC debug device structures to be used with dev_dbg() et al */
63
64struct device_driver xpc_dbg_name = {
65 .name = "xpc"
66};
67
68struct device xpc_part_dbg_subname = {
69 .bus_id = {0}, /* set to "part" at xpc_init() time */
70 .driver = &xpc_dbg_name
71};
72
73struct device xpc_chan_dbg_subname = {
74 .bus_id = {0}, /* set to "chan" at xpc_init() time */
75 .driver = &xpc_dbg_name
76};
77
78struct device *xpc_part = &xpc_part_dbg_subname;
79struct device *xpc_chan = &xpc_chan_dbg_subname;
80
81
82/* systune related variables for /proc/sys directories */
83
84static int xpc_hb_min = 1;
85static int xpc_hb_max = 10;
86
87static int xpc_hb_check_min = 10;
88static int xpc_hb_check_max = 120;
89
90static ctl_table xpc_sys_xpc_hb_dir[] = {
91 {
92 1,
93 "hb_interval",
94 &xpc_hb_interval,
95 sizeof(int),
96 0644,
97 NULL,
98 &proc_dointvec_minmax,
99 &sysctl_intvec,
100 NULL,
101 &xpc_hb_min, &xpc_hb_max
102 },
103 {
104 2,
105 "hb_check_interval",
106 &xpc_hb_check_interval,
107 sizeof(int),
108 0644,
109 NULL,
110 &proc_dointvec_minmax,
111 &sysctl_intvec,
112 NULL,
113 &xpc_hb_check_min, &xpc_hb_check_max
114 },
115 {0}
116};
117static ctl_table xpc_sys_xpc_dir[] = {
118 {
119 1,
120 "hb",
121 NULL,
122 0,
123 0555,
124 xpc_sys_xpc_hb_dir
125 },
126 {0}
127};
128static ctl_table xpc_sys_dir[] = {
129 {
130 1,
131 "xpc",
132 NULL,
133 0,
134 0555,
135 xpc_sys_xpc_dir
136 },
137 {0}
138};
139static struct ctl_table_header *xpc_sysctl;
140
141
142/* #of IRQs received */
143static atomic_t xpc_act_IRQ_rcvd;
144
145/* IRQ handler notifies this wait queue on receipt of an IRQ */
146static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
147
148static unsigned long xpc_hb_check_timeout;
149
150/* xpc_hb_checker thread exited notification */
151static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited);
152
153/* xpc_discovery thread exited notification */
154static DECLARE_MUTEX_LOCKED(xpc_discovery_exited);
155
156
157static struct timer_list xpc_hb_timer;
158
159
160static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
161
162
163/*
164 * Notify the heartbeat check thread that an IRQ has been received.
165 */
166static irqreturn_t
167xpc_act_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs)
168{
169 atomic_inc(&xpc_act_IRQ_rcvd);
170 wake_up_interruptible(&xpc_act_IRQ_wq);
171 return IRQ_HANDLED;
172}
173
174
175/*
176 * Timer to produce the heartbeat. The timer structures function is
177 * already set when this is initially called. A tunable is used to
178 * specify when the next timeout should occur.
179 */
180static void
181xpc_hb_beater(unsigned long dummy)
182{
183 xpc_vars->heartbeat++;
184
185 if (jiffies >= xpc_hb_check_timeout) {
186 wake_up_interruptible(&xpc_act_IRQ_wq);
187 }
188
189 xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
190 add_timer(&xpc_hb_timer);
191}
192
193
194/*
195 * This thread is responsible for nearly all of the partition
196 * activation/deactivation.
197 */
198static int
199xpc_hb_checker(void *ignore)
200{
201 int last_IRQ_count = 0;
202 int new_IRQ_count;
203 int force_IRQ=0;
204
205
206 /* this thread was marked active by xpc_hb_init() */
207
208 daemonize(XPC_HB_CHECK_THREAD_NAME);
209
210 set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU));
211
212 xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
213
214 while (!(volatile int) xpc_exiting) {
215
216 /* wait for IRQ or timeout */
217 (void) wait_event_interruptible(xpc_act_IRQ_wq,
218 (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
219 jiffies >= xpc_hb_check_timeout ||
220 (volatile int) xpc_exiting));
221
222 dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
223 "been received\n",
224 (int) (xpc_hb_check_timeout - jiffies),
225 atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count);
226
227
228 /* checking of remote heartbeats is skewed by IRQ handling */
229 if (jiffies >= xpc_hb_check_timeout) {
230 dev_dbg(xpc_part, "checking remote heartbeats\n");
231 xpc_check_remote_hb();
232
233 /*
234 * We need to periodically recheck to ensure no
235 * IPI/AMO pairs have been missed. That check
236 * must always reset xpc_hb_check_timeout.
237 */
238 force_IRQ = 1;
239 }
240
241
242 new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
243 if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
244 force_IRQ = 0;
245
246 dev_dbg(xpc_part, "found an IRQ to process; will be "
247 "resetting xpc_hb_check_timeout\n");
248
249 last_IRQ_count += xpc_identify_act_IRQ_sender();
250 if (last_IRQ_count < new_IRQ_count) {
251 /* retry once to help avoid missing AMO */
252 (void) xpc_identify_act_IRQ_sender();
253 }
254 last_IRQ_count = new_IRQ_count;
255
256 xpc_hb_check_timeout = jiffies +
257 (xpc_hb_check_interval * HZ);
258 }
259 }
260
261 dev_dbg(xpc_part, "heartbeat checker is exiting\n");
262
263
264 /* mark this thread as inactive */
265 up(&xpc_hb_checker_exited);
266 return 0;
267}
268
269
270/*
271 * This thread will attempt to discover other partitions to activate
272 * based on info provided by SAL. This new thread is short lived and
273 * will exit once discovery is complete.
274 */
275static int
276xpc_initiate_discovery(void *ignore)
277{
278 daemonize(XPC_DISCOVERY_THREAD_NAME);
279
280 xpc_discovery();
281
282 dev_dbg(xpc_part, "discovery thread is exiting\n");
283
284 /* mark this thread as inactive */
285 up(&xpc_discovery_exited);
286 return 0;
287}
288
289
290/*
291 * Establish first contact with the remote partititon. This involves pulling
292 * the XPC per partition variables from the remote partition and waiting for
293 * the remote partition to pull ours.
294 */
295static enum xpc_retval
296xpc_make_first_contact(struct xpc_partition *part)
297{
298 enum xpc_retval ret;
299
300
301 while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) {
302 if (ret != xpcRetry) {
303 XPC_DEACTIVATE_PARTITION(part, ret);
304 return ret;
305 }
306
307 dev_dbg(xpc_chan, "waiting to make first contact with "
308 "partition %d\n", XPC_PARTID(part));
309
310 /* wait a 1/4 of a second or so */
311 set_current_state(TASK_INTERRUPTIBLE);
312 (void) schedule_timeout(0.25 * HZ);
313
314 if (part->act_state == XPC_P_DEACTIVATING) {
315 return part->reason;
316 }
317 }
318
319 return xpc_mark_partition_active(part);
320}
321
322
323/*
324 * The first kthread assigned to a newly activated partition is the one
325 * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to
326 * that kthread until the partition is brought down, at which time that kthread
327 * returns back to XPC HB. (The return of that kthread will signify to XPC HB
328 * that XPC has dismantled all communication infrastructure for the associated
329 * partition.) This kthread becomes the channel manager for that partition.
330 *
331 * Each active partition has a channel manager, who, besides connecting and
332 * disconnecting channels, will ensure that each of the partition's connected
333 * channels has the required number of assigned kthreads to get the work done.
334 */
335static void
336xpc_channel_mgr(struct xpc_partition *part)
337{
338 while (part->act_state != XPC_P_DEACTIVATING ||
339 atomic_read(&part->nchannels_active) > 0) {
340
341 xpc_process_channel_activity(part);
342
343
344 /*
345 * Wait until we've been requested to activate kthreads or
346 * all of the channel's message queues have been torn down or
347 * a signal is pending.
348 *
349 * The channel_mgr_requests is set to 1 after being awakened,
350 * This is done to prevent the channel mgr from making one pass
351 * through the loop for each request, since he will
352 * be servicing all the requests in one pass. The reason it's
353 * set to 1 instead of 0 is so that other kthreads will know
354 * that the channel mgr is running and won't bother trying to
355 * wake him up.
356 */
357 atomic_dec(&part->channel_mgr_requests);
358 (void) wait_event_interruptible(part->channel_mgr_wq,
359 (atomic_read(&part->channel_mgr_requests) > 0 ||
360 (volatile u64) part->local_IPI_amo != 0 ||
361 ((volatile u8) part->act_state ==
362 XPC_P_DEACTIVATING &&
363 atomic_read(&part->nchannels_active) == 0)));
364 atomic_set(&part->channel_mgr_requests, 1);
365
366 // >>> Does it need to wakeup periodically as well? In case we
367 // >>> miscalculated the #of kthreads to wakeup or create?
368 }
369}
370
371
372/*
373 * When XPC HB determines that a partition has come up, it will create a new
374 * kthread and that kthread will call this function to attempt to set up the
375 * basic infrastructure used for Cross Partition Communication with the newly
376 * upped partition.
377 *
378 * The kthread that was created by XPC HB and which setup the XPC
379 * infrastructure will remain assigned to the partition until the partition
380 * goes down. At which time the kthread will teardown the XPC infrastructure
381 * and then exit.
382 *
383 * XPC HB will put the remote partition's XPC per partition specific variables
384 * physical address into xpc_partitions[partid].remote_vars_part_pa prior to
385 * calling xpc_partition_up().
386 */
387static void
388xpc_partition_up(struct xpc_partition *part)
389{
390 DBUG_ON(part->channels != NULL);
391
392 dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part));
393
394 if (xpc_setup_infrastructure(part) != xpcSuccess) {
395 return;
396 }
397
398 /*
399 * The kthread that XPC HB called us with will become the
400 * channel manager for this partition. It will not return
401 * back to XPC HB until the partition's XPC infrastructure
402 * has been dismantled.
403 */
404
405 (void) xpc_part_ref(part); /* this will always succeed */
406
407 if (xpc_make_first_contact(part) == xpcSuccess) {
408 xpc_channel_mgr(part);
409 }
410
411 xpc_part_deref(part);
412
413 xpc_teardown_infrastructure(part);
414}
415
416
417static int
418xpc_activating(void *__partid)
419{
420 partid_t partid = (u64) __partid;
421 struct xpc_partition *part = &xpc_partitions[partid];
422 unsigned long irq_flags;
423 struct sched_param param = { sched_priority: MAX_USER_RT_PRIO - 1 };
424 int ret;
425
426
427 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
428
429 spin_lock_irqsave(&part->act_lock, irq_flags);
430
431 if (part->act_state == XPC_P_DEACTIVATING) {
432 part->act_state = XPC_P_INACTIVE;
433 spin_unlock_irqrestore(&part->act_lock, irq_flags);
434 part->remote_rp_pa = 0;
435 return 0;
436 }
437
438 /* indicate the thread is activating */
439 DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ);
440 part->act_state = XPC_P_ACTIVATING;
441
442 XPC_SET_REASON(part, 0, 0);
443 spin_unlock_irqrestore(&part->act_lock, irq_flags);
444
445 dev_dbg(xpc_part, "bringing partition %d up\n", partid);
446
447 daemonize("xpc%02d", partid);
448
449 /*
450 * This thread needs to run at a realtime priority to prevent a
451 * significant performance degradation.
452 */
453 ret = sched_setscheduler(current, SCHED_FIFO, &param);
454 if (ret != 0) {
455 dev_warn(xpc_part, "unable to set pid %d to a realtime "
456 "priority, ret=%d\n", current->pid, ret);
457 }
458
459 /* allow this thread and its children to run on any CPU */
460 set_cpus_allowed(current, CPU_MASK_ALL);
461
462 /*
463 * Register the remote partition's AMOs with SAL so it can handle
464 * and cleanup errors within that address range should the remote
465 * partition go down. We don't unregister this range because it is
466 * difficult to tell when outstanding writes to the remote partition
467 * are finished and thus when it is safe to unregister. This should
468 * not result in wasted space in the SAL xp_addr_region table because
469 * we should get the same page for remote_amos_page_pa after module
470 * reloads and system reboots.
471 */
472 if (sn_register_xp_addr_region(part->remote_amos_page_pa,
473 PAGE_SIZE, 1) < 0) {
474 dev_warn(xpc_part, "xpc_partition_up(%d) failed to register "
475 "xp_addr region\n", partid);
476
477 spin_lock_irqsave(&part->act_lock, irq_flags);
478 part->act_state = XPC_P_INACTIVE;
479 XPC_SET_REASON(part, xpcPhysAddrRegFailed, __LINE__);
480 spin_unlock_irqrestore(&part->act_lock, irq_flags);
481 part->remote_rp_pa = 0;
482 return 0;
483 }
484
485 XPC_ALLOW_HB(partid, xpc_vars);
486 xpc_IPI_send_activated(part);
487
488
489 /*
490 * xpc_partition_up() holds this thread and marks this partition as
491 * XPC_P_ACTIVE by calling xpc_hb_mark_active().
492 */
493 (void) xpc_partition_up(part);
494
495 xpc_mark_partition_inactive(part);
496
497 if (part->reason == xpcReactivating) {
498 /* interrupting ourselves results in activating partition */
499 xpc_IPI_send_reactivate(part);
500 }
501
502 return 0;
503}
504
505
506void
507xpc_activate_partition(struct xpc_partition *part)
508{
509 partid_t partid = XPC_PARTID(part);
510 unsigned long irq_flags;
511 pid_t pid;
512
513
514 spin_lock_irqsave(&part->act_lock, irq_flags);
515
516 pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
517
518 DBUG_ON(part->act_state != XPC_P_INACTIVE);
519
520 if (pid > 0) {
521 part->act_state = XPC_P_ACTIVATION_REQ;
522 XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
523 } else {
524 XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
525 }
526
527 spin_unlock_irqrestore(&part->act_lock, irq_flags);
528}
529
530
531/*
532 * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
533 * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
534 * than one partition, we use an AMO_t structure per partition to indicate
535 * whether a partition has sent an IPI or not. >>> If it has, then wake up the
536 * associated kthread to handle it.
537 *
538 * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
539 * running on other partitions.
540 *
541 * Noteworthy Arguments:
542 *
543 * irq - Interrupt ReQuest number. NOT USED.
544 *
545 * dev_id - partid of IPI's potential sender.
546 *
547 * regs - processor's context before the processor entered
548 * interrupt code. NOT USED.
549 */
550irqreturn_t
551xpc_notify_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs)
552{
553 partid_t partid = (partid_t) (u64) dev_id;
554 struct xpc_partition *part = &xpc_partitions[partid];
555
556
557 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
558
559 if (xpc_part_ref(part)) {
560 xpc_check_for_channel_activity(part);
561
562 xpc_part_deref(part);
563 }
564 return IRQ_HANDLED;
565}
566
567
568/*
569 * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor
570 * because the write to their associated IPI amo completed after the IRQ/IPI
571 * was received.
572 */
573void
574xpc_dropped_IPI_check(struct xpc_partition *part)
575{
576 if (xpc_part_ref(part)) {
577 xpc_check_for_channel_activity(part);
578
579 part->dropped_IPI_timer.expires = jiffies +
580 XPC_P_DROPPED_IPI_WAIT;
581 add_timer(&part->dropped_IPI_timer);
582 xpc_part_deref(part);
583 }
584}
585
586
587void
588xpc_activate_kthreads(struct xpc_channel *ch, int needed)
589{
590 int idle = atomic_read(&ch->kthreads_idle);
591 int assigned = atomic_read(&ch->kthreads_assigned);
592 int wakeup;
593
594
595 DBUG_ON(needed <= 0);
596
597 if (idle > 0) {
598 wakeup = (needed > idle) ? idle : needed;
599 needed -= wakeup;
600
601 dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
602 "channel=%d\n", wakeup, ch->partid, ch->number);
603
604 /* only wakeup the requested number of kthreads */
605 wake_up_nr(&ch->idle_wq, wakeup);
606 }
607
608 if (needed <= 0) {
609 return;
610 }
611
612 if (needed + assigned > ch->kthreads_assigned_limit) {
613 needed = ch->kthreads_assigned_limit - assigned;
614 // >>>should never be less than 0
615 if (needed <= 0) {
616 return;
617 }
618 }
619
620 dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
621 needed, ch->partid, ch->number);
622
623 xpc_create_kthreads(ch, needed);
624}
625
626
627/*
628 * This function is where XPC's kthreads wait for messages to deliver.
629 */
630static void
631xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
632{
633 do {
634 /* deliver messages to their intended recipients */
635
636 while ((volatile s64) ch->w_local_GP.get <
637 (volatile s64) ch->w_remote_GP.put &&
638 !((volatile u32) ch->flags &
639 XPC_C_DISCONNECTING)) {
640 xpc_deliver_msg(ch);
641 }
642
643 if (atomic_inc_return(&ch->kthreads_idle) >
644 ch->kthreads_idle_limit) {
645 /* too many idle kthreads on this channel */
646 atomic_dec(&ch->kthreads_idle);
647 break;
648 }
649
650 dev_dbg(xpc_chan, "idle kthread calling "
651 "wait_event_interruptible_exclusive()\n");
652
653 (void) wait_event_interruptible_exclusive(ch->idle_wq,
654 ((volatile s64) ch->w_local_GP.get <
655 (volatile s64) ch->w_remote_GP.put ||
656 ((volatile u32) ch->flags &
657 XPC_C_DISCONNECTING)));
658
659 atomic_dec(&ch->kthreads_idle);
660
661 } while (!((volatile u32) ch->flags & XPC_C_DISCONNECTING));
662}
663
664
665static int
666xpc_daemonize_kthread(void *args)
667{
668 partid_t partid = XPC_UNPACK_ARG1(args);
669 u16 ch_number = XPC_UNPACK_ARG2(args);
670 struct xpc_partition *part = &xpc_partitions[partid];
671 struct xpc_channel *ch;
672 int n_needed;
673
674
675 daemonize("xpc%02dc%d", partid, ch_number);
676
677 dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
678 partid, ch_number);
679
680 ch = &part->channels[ch_number];
681
682 if (!(ch->flags & XPC_C_DISCONNECTING)) {
683 DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
684
685 /* let registerer know that connection has been established */
686
687 if (atomic_read(&ch->kthreads_assigned) == 1) {
688 xpc_connected_callout(ch);
689
690 /*
691 * It is possible that while the callout was being
692 * made that the remote partition sent some messages.
693 * If that is the case, we may need to activate
694 * additional kthreads to help deliver them. We only
695 * need one less than total #of messages to deliver.
696 */
697 n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1;
698 if (n_needed > 0 &&
699 !(ch->flags & XPC_C_DISCONNECTING)) {
700 xpc_activate_kthreads(ch, n_needed);
701 }
702 }
703
704 xpc_kthread_waitmsgs(part, ch);
705 }
706
707 if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
708 ((ch->flags & XPC_C_CONNECTCALLOUT) ||
709 (ch->reason != xpcUnregistering &&
710 ch->reason != xpcOtherUnregistering))) {
711 xpc_disconnected_callout(ch);
712 }
713
714
715 xpc_msgqueue_deref(ch);
716
717 dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
718 partid, ch_number);
719
720 xpc_part_deref(part);
721 return 0;
722}
723
724
725/*
726 * For each partition that XPC has established communications with, there is
727 * a minimum of one kernel thread assigned to perform any operation that
728 * may potentially sleep or block (basically the callouts to the asynchronous
729 * functions registered via xpc_connect()).
730 *
731 * Additional kthreads are created and destroyed by XPC as the workload
732 * demands.
733 *
734 * A kthread is assigned to one of the active channels that exists for a given
735 * partition.
736 */
737void
738xpc_create_kthreads(struct xpc_channel *ch, int needed)
739{
740 unsigned long irq_flags;
741 pid_t pid;
742 u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
743
744
745 while (needed-- > 0) {
746 pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0);
747 if (pid < 0) {
748 /* the fork failed */
749
750 if (atomic_read(&ch->kthreads_assigned) <
751 ch->kthreads_idle_limit) {
752 /*
753 * Flag this as an error only if we have an
754 * insufficient #of kthreads for the channel
755 * to function.
756 *
757 * No xpc_msgqueue_ref() is needed here since
758 * the channel mgr is doing this.
759 */
760 spin_lock_irqsave(&ch->lock, irq_flags);
761 XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources,
762 &irq_flags);
763 spin_unlock_irqrestore(&ch->lock, irq_flags);
764 }
765 break;
766 }
767
768 /*
769 * The following is done on behalf of the newly created
770 * kthread. That kthread is responsible for doing the
771 * counterpart to the following before it exits.
772 */
773 (void) xpc_part_ref(&xpc_partitions[ch->partid]);
774 xpc_msgqueue_ref(ch);
775 atomic_inc(&ch->kthreads_assigned);
776 ch->kthreads_created++; // >>> temporary debug only!!!
777 }
778}
779
780
781void
782xpc_disconnect_wait(int ch_number)
783{
784 partid_t partid;
785 struct xpc_partition *part;
786 struct xpc_channel *ch;
787
788
789 /* now wait for all callouts to the caller's function to cease */
790 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
791 part = &xpc_partitions[partid];
792
793 if (xpc_part_ref(part)) {
794 ch = &part->channels[ch_number];
795
796// >>> how do we keep from falling into the window between our check and going
797// >>> down and coming back up where sema is re-inited?
798 if (ch->flags & XPC_C_SETUP) {
799 (void) down(&ch->teardown_sema);
800 }
801
802 xpc_part_deref(part);
803 }
804 }
805}
806
807
808static void
809xpc_do_exit(void)
810{
811 partid_t partid;
812 int active_part_count;
813 struct xpc_partition *part;
814
815
816 /* now it's time to eliminate our heartbeat */
817 del_timer_sync(&xpc_hb_timer);
818 xpc_vars->heartbeating_to_mask = 0;
819
820 /* indicate to others that our reserved page is uninitialized */
821 xpc_rsvd_page->vars_pa = 0;
822
823 /*
824 * Ignore all incoming interrupts. Without interupts the heartbeat
825 * checker won't activate any new partitions that may come up.
826 */
827 free_irq(SGI_XPC_ACTIVATE, NULL);
828
829 /*
830 * Cause the heartbeat checker and the discovery threads to exit.
831 * We don't want them attempting to activate new partitions as we
832 * try to deactivate the existing ones.
833 */
834 xpc_exiting = 1;
835 wake_up_interruptible(&xpc_act_IRQ_wq);
836
837 /* wait for the heartbeat checker thread to mark itself inactive */
838 down(&xpc_hb_checker_exited);
839
840 /* wait for the discovery thread to mark itself inactive */
841 down(&xpc_discovery_exited);
842
843
844 set_current_state(TASK_INTERRUPTIBLE);
845 schedule_timeout(0.3 * HZ);
846 set_current_state(TASK_RUNNING);
847
848
849 /* wait for all partitions to become inactive */
850
851 do {
852 active_part_count = 0;
853
854 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
855 part = &xpc_partitions[partid];
856 if (part->act_state != XPC_P_INACTIVE) {
857 active_part_count++;
858
859 XPC_DEACTIVATE_PARTITION(part, xpcUnloading);
860 }
861 }
862
863 if (active_part_count) {
864 set_current_state(TASK_INTERRUPTIBLE);
865 schedule_timeout(0.3 * HZ);
866 set_current_state(TASK_RUNNING);
867 }
868
869 } while (active_part_count > 0);
870
871
872 /* close down protections for IPI operations */
873 xpc_restrict_IPI_ops();
874
875
876 /* clear the interface to XPC's functions */
877 xpc_clear_interface();
878
879 if (xpc_sysctl) {
880 unregister_sysctl_table(xpc_sysctl);
881 }
882}
883
884
885int __init
886xpc_init(void)
887{
888 int ret;
889 partid_t partid;
890 struct xpc_partition *part;
891 pid_t pid;
892
893
894 /*
895 * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
896 * both a partition's reserved page and its XPC variables. Its size was
897 * based on the size of a reserved page. So we need to ensure that the
898 * XPC variables will fit as well.
899 */
900 if (XPC_VARS_ALIGNED_SIZE > XPC_RSVD_PAGE_ALIGNED_SIZE) {
901 dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n");
902 return -EPERM;
903 }
904 DBUG_ON((u64) xpc_remote_copy_buffer !=
905 L1_CACHE_ALIGN((u64) xpc_remote_copy_buffer));
906
907 snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
908 snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
909
910 xpc_sysctl = register_sysctl_table(xpc_sys_dir, 1);
911
912 /*
913 * The first few fields of each entry of xpc_partitions[] need to
914 * be initialized now so that calls to xpc_connect() and
915 * xpc_disconnect() can be made prior to the activation of any remote
916 * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
917 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
918 * PARTITION HAS BEEN ACTIVATED.
919 */
920 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
921 part = &xpc_partitions[partid];
922
923 DBUG_ON((u64) part != L1_CACHE_ALIGN((u64) part));
924
925 part->act_IRQ_rcvd = 0;
926 spin_lock_init(&part->act_lock);
927 part->act_state = XPC_P_INACTIVE;
928 XPC_SET_REASON(part, 0, 0);
929 part->setup_state = XPC_P_UNSET;
930 init_waitqueue_head(&part->teardown_wq);
931 atomic_set(&part->references, 0);
932 }
933
934 /*
935 * Open up protections for IPI operations (and AMO operations on
936 * Shub 1.1 systems).
937 */
938 xpc_allow_IPI_ops();
939
940 /*
941 * Interrupts being processed will increment this atomic variable and
942 * awaken the heartbeat thread which will process the interrupts.
943 */
944 atomic_set(&xpc_act_IRQ_rcvd, 0);
945
946 /*
947 * This is safe to do before the xpc_hb_checker thread has started
948 * because the handler releases a wait queue. If an interrupt is
949 * received before the thread is waiting, it will not go to sleep,
950 * but rather immediately process the interrupt.
951 */
952 ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0,
953 "xpc hb", NULL);
954 if (ret != 0) {
955 dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
956 "errno=%d\n", -ret);
957
958 xpc_restrict_IPI_ops();
959
960 if (xpc_sysctl) {
961 unregister_sysctl_table(xpc_sysctl);
962 }
963 return -EBUSY;
964 }
965
966 /*
967 * Fill the partition reserved page with the information needed by
968 * other partitions to discover we are alive and establish initial
969 * communications.
970 */
971 xpc_rsvd_page = xpc_rsvd_page_init();
972 if (xpc_rsvd_page == NULL) {
973 dev_err(xpc_part, "could not setup our reserved page\n");
974
975 free_irq(SGI_XPC_ACTIVATE, NULL);
976 xpc_restrict_IPI_ops();
977
978 if (xpc_sysctl) {
979 unregister_sysctl_table(xpc_sysctl);
980 }
981 return -EBUSY;
982 }
983
984
985 /*
986 * Set the beating to other partitions into motion. This is
987 * the last requirement for other partitions' discovery to
988 * initiate communications with us.
989 */
990 init_timer(&xpc_hb_timer);
991 xpc_hb_timer.function = xpc_hb_beater;
992 xpc_hb_beater(0);
993
994
995 /*
996 * The real work-horse behind xpc. This processes incoming
997 * interrupts and monitors remote heartbeats.
998 */
999 pid = kernel_thread(xpc_hb_checker, NULL, 0);
1000 if (pid < 0) {
1001 dev_err(xpc_part, "failed while forking hb check thread\n");
1002
1003 /* indicate to others that our reserved page is uninitialized */
1004 xpc_rsvd_page->vars_pa = 0;
1005
1006 del_timer_sync(&xpc_hb_timer);
1007 free_irq(SGI_XPC_ACTIVATE, NULL);
1008 xpc_restrict_IPI_ops();
1009
1010 if (xpc_sysctl) {
1011 unregister_sysctl_table(xpc_sysctl);
1012 }
1013 return -EBUSY;
1014 }
1015
1016
1017 /*
1018 * Startup a thread that will attempt to discover other partitions to
1019 * activate based on info provided by SAL. This new thread is short
1020 * lived and will exit once discovery is complete.
1021 */
1022 pid = kernel_thread(xpc_initiate_discovery, NULL, 0);
1023 if (pid < 0) {
1024 dev_err(xpc_part, "failed while forking discovery thread\n");
1025
1026 /* mark this new thread as a non-starter */
1027 up(&xpc_discovery_exited);
1028
1029 xpc_do_exit();
1030 return -EBUSY;
1031 }
1032
1033
1034 /* set the interface to point at XPC's functions */
1035 xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
1036 xpc_initiate_allocate, xpc_initiate_send,
1037 xpc_initiate_send_notify, xpc_initiate_received,
1038 xpc_initiate_partid_to_nasids);
1039
1040 return 0;
1041}
1042module_init(xpc_init);
1043
1044
1045void __exit
1046xpc_exit(void)
1047{
1048 xpc_do_exit();
1049}
1050module_exit(xpc_exit);
1051
1052
1053MODULE_AUTHOR("Silicon Graphics, Inc.");
1054MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
1055MODULE_LICENSE("GPL");
1056
1057module_param(xpc_hb_interval, int, 0);
1058MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
1059 "heartbeat increments.");
1060
1061module_param(xpc_hb_check_interval, int, 0);
1062MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
1063 "heartbeat checks.");
1064
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
new file mode 100644
index 000000000000..2c3c4a8af553
--- /dev/null
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -0,0 +1,984 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9
10/*
11 * Cross Partition Communication (XPC) partition support.
12 *
13 * This is the part of XPC that detects the presence/absence of
14 * other partitions. It provides a heartbeat and monitors the
15 * heartbeats of other partitions.
16 *
17 */
18
19
20#include <linux/kernel.h>
21#include <linux/sysctl.h>
22#include <linux/cache.h>
23#include <linux/mmzone.h>
24#include <linux/nodemask.h>
25#include <asm/sn/bte.h>
26#include <asm/sn/intr.h>
27#include <asm/sn/sn_sal.h>
28#include <asm/sn/nodepda.h>
29#include <asm/sn/addrs.h>
30#include "xpc.h"
31
32
33/* XPC is exiting flag */
34int xpc_exiting;
35
36
37/* SH_IPI_ACCESS shub register value on startup */
38static u64 xpc_sh1_IPI_access;
39static u64 xpc_sh2_IPI_access0;
40static u64 xpc_sh2_IPI_access1;
41static u64 xpc_sh2_IPI_access2;
42static u64 xpc_sh2_IPI_access3;
43
44
45/* original protection values for each node */
46u64 xpc_prot_vec[MAX_COMPACT_NODES];
47
48
49/* this partition's reserved page */
50struct xpc_rsvd_page *xpc_rsvd_page;
51
52/* this partition's XPC variables (within the reserved page) */
53struct xpc_vars *xpc_vars;
54struct xpc_vars_part *xpc_vars_part;
55
56
57/*
58 * For performance reasons, each entry of xpc_partitions[] is cacheline
59 * aligned. And xpc_partitions[] is padded with an additional entry at the
60 * end so that the last legitimate entry doesn't share its cacheline with
61 * another variable.
62 */
63struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
64
65
66/*
67 * Generic buffer used to store a local copy of the remote partitions
68 * reserved page or XPC variables.
69 *
70 * xpc_discovery runs only once and is a seperate thread that is
71 * very likely going to be processing in parallel with receiving
72 * interrupts.
73 */
74char ____cacheline_aligned
75 xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE];
76
77
78/* systune related variables */
79int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
80int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT;
81
82
83/*
84 * Given a nasid, get the physical address of the partition's reserved page
85 * for that nasid. This function returns 0 on any error.
86 */
87static u64
88xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
89{
90 bte_result_t bte_res;
91 s64 status;
92 u64 cookie = 0;
93 u64 rp_pa = nasid; /* seed with nasid */
94 u64 len = 0;
95
96
97 while (1) {
98
99 status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
100 &len);
101
102 dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
103 "0x%016lx, address=0x%016lx, len=0x%016lx\n",
104 status, cookie, rp_pa, len);
105
106 if (status != SALRET_MORE_PASSES) {
107 break;
108 }
109
110 if (len > buf_size) {
111 dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len);
112 status = SALRET_ERROR;
113 break;
114 }
115
116 bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size,
117 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
118 if (bte_res != BTE_SUCCESS) {
119 dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
120 status = SALRET_ERROR;
121 break;
122 }
123 }
124
125 if (status != SALRET_OK) {
126 rp_pa = 0;
127 }
128 dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
129 return rp_pa;
130}
131
132
133/*
134 * Fill the partition reserved page with the information needed by
135 * other partitions to discover we are alive and establish initial
136 * communications.
137 */
138struct xpc_rsvd_page *
139xpc_rsvd_page_init(void)
140{
141 struct xpc_rsvd_page *rp;
142 AMO_t *amos_page;
143 u64 rp_pa, next_cl, nasid_array = 0;
144 int i, ret;
145
146
147 /* get the local reserved page's address */
148
149 rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0),
150 (u64) xpc_remote_copy_buffer,
151 XPC_RSVD_PAGE_ALIGNED_SIZE);
152 if (rp_pa == 0) {
153 dev_err(xpc_part, "SAL failed to locate the reserved page\n");
154 return NULL;
155 }
156 rp = (struct xpc_rsvd_page *) __va(rp_pa);
157
158 if (rp->partid != sn_partition_id) {
159 dev_err(xpc_part, "the reserved page's partid of %d should be "
160 "%d\n", rp->partid, sn_partition_id);
161 return NULL;
162 }
163
164 rp->version = XPC_RP_VERSION;
165
166 /*
167 * Place the XPC variables on the cache line following the
168 * reserved page structure.
169 */
170 next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE;
171 xpc_vars = (struct xpc_vars *) next_cl;
172
173 /*
174 * Before clearing xpc_vars, see if a page of AMOs had been previously
175 * allocated. If not we'll need to allocate one and set permissions
176 * so that cross-partition AMOs are allowed.
177 *
178 * The allocated AMO page needs MCA reporting to remain disabled after
179 * XPC has unloaded. To make this work, we keep a copy of the pointer
180 * to this page (i.e., amos_page) in the struct xpc_vars structure,
181 * which is pointed to by the reserved page, and re-use that saved copy
182 * on subsequent loads of XPC. This AMO page is never freed, and its
183 * memory protections are never restricted.
184 */
185 if ((amos_page = xpc_vars->amos_page) == NULL) {
186 amos_page = (AMO_t *) mspec_kalloc_page(0);
187 if (amos_page == NULL) {
188 dev_err(xpc_part, "can't allocate page of AMOs\n");
189 return NULL;
190 }
191
192 /*
193 * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems
194 * when xpc_allow_IPI_ops() is called via xpc_hb_init().
195 */
196 if (!enable_shub_wars_1_1()) {
197 ret = sn_change_memprotect(ia64_tpa((u64) amos_page),
198 PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1,
199 &nasid_array);
200 if (ret != 0) {
201 dev_err(xpc_part, "can't change memory "
202 "protections\n");
203 mspec_kfree_page((unsigned long) amos_page);
204 return NULL;
205 }
206 }
207 } else if (!IS_AMO_ADDRESS((u64) amos_page)) {
208 /*
209 * EFI's XPBOOT can also set amos_page in the reserved page,
210 * but it happens to leave it as an uncached physical address
211 * and we need it to be an uncached virtual, so we'll have to
212 * convert it.
213 */
214 if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) {
215 dev_err(xpc_part, "previously used amos_page address "
216 "is bad = 0x%p\n", (void *) amos_page);
217 return NULL;
218 }
219 amos_page = (AMO_t *) TO_AMO((u64) amos_page);
220 }
221
222 memset(xpc_vars, 0, sizeof(struct xpc_vars));
223
224 /*
225 * Place the XPC per partition specific variables on the cache line
226 * following the XPC variables structure.
227 */
228 next_cl += XPC_VARS_ALIGNED_SIZE;
229 memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) *
230 XP_MAX_PARTITIONS);
231 xpc_vars_part = (struct xpc_vars_part *) next_cl;
232 xpc_vars->vars_part_pa = __pa(next_cl);
233
234 xpc_vars->version = XPC_V_VERSION;
235 xpc_vars->act_nasid = cpuid_to_nasid(0);
236 xpc_vars->act_phys_cpuid = cpu_physical_id(0);
237 xpc_vars->amos_page = amos_page; /* save for next load of XPC */
238
239
240 /*
241 * Initialize the activation related AMO variables.
242 */
243 xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS);
244 for (i = 1; i < XP_NASID_MASK_WORDS; i++) {
245 xpc_IPI_init(i + XP_MAX_PARTITIONS);
246 }
247 /* export AMO page's physical address to other partitions */
248 xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page);
249
250 /*
251 * This signifies to the remote partition that our reserved
252 * page is initialized.
253 */
254 (volatile u64) rp->vars_pa = __pa(xpc_vars);
255
256 return rp;
257}
258
259
260/*
261 * Change protections to allow IPI operations (and AMO operations on
262 * Shub 1.1 systems).
263 */
264void
265xpc_allow_IPI_ops(void)
266{
267 int node;
268 int nasid;
269
270
271 // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
272
273 if (is_shub2()) {
274 xpc_sh2_IPI_access0 =
275 (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
276 xpc_sh2_IPI_access1 =
277 (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
278 xpc_sh2_IPI_access2 =
279 (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
280 xpc_sh2_IPI_access3 =
281 (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
282
283 for_each_online_node(node) {
284 nasid = cnodeid_to_nasid(node);
285 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
286 -1UL);
287 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
288 -1UL);
289 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
290 -1UL);
291 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
292 -1UL);
293 }
294
295 } else {
296 xpc_sh1_IPI_access =
297 (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
298
299 for_each_online_node(node) {
300 nasid = cnodeid_to_nasid(node);
301 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
302 -1UL);
303
304 /*
305 * Since the BIST collides with memory operations on
306 * SHUB 1.1 sn_change_memprotect() cannot be used.
307 */
308 if (enable_shub_wars_1_1()) {
309 /* open up everything */
310 xpc_prot_vec[node] = (u64) HUB_L((u64 *)
311 GLOBAL_MMR_ADDR(nasid,
312 SH1_MD_DQLP_MMR_DIR_PRIVEC0));
313 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
314 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
315 -1UL);
316 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
317 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
318 -1UL);
319 }
320 }
321 }
322}
323
324
325/*
326 * Restrict protections to disallow IPI operations (and AMO operations on
327 * Shub 1.1 systems).
328 */
329void
330xpc_restrict_IPI_ops(void)
331{
332 int node;
333 int nasid;
334
335
336 // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
337
338 if (is_shub2()) {
339
340 for_each_online_node(node) {
341 nasid = cnodeid_to_nasid(node);
342 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
343 xpc_sh2_IPI_access0);
344 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
345 xpc_sh2_IPI_access1);
346 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
347 xpc_sh2_IPI_access2);
348 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
349 xpc_sh2_IPI_access3);
350 }
351
352 } else {
353
354 for_each_online_node(node) {
355 nasid = cnodeid_to_nasid(node);
356 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
357 xpc_sh1_IPI_access);
358
359 if (enable_shub_wars_1_1()) {
360 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
361 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
362 xpc_prot_vec[node]);
363 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
364 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
365 xpc_prot_vec[node]);
366 }
367 }
368 }
369}
370
371
372/*
373 * At periodic intervals, scan through all active partitions and ensure
374 * their heartbeat is still active. If not, the partition is deactivated.
375 */
376void
377xpc_check_remote_hb(void)
378{
379 struct xpc_vars *remote_vars;
380 struct xpc_partition *part;
381 partid_t partid;
382 bte_result_t bres;
383
384
385 remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
386
387 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
388 if (partid == sn_partition_id) {
389 continue;
390 }
391
392 part = &xpc_partitions[partid];
393
394 if (part->act_state == XPC_P_INACTIVE ||
395 part->act_state == XPC_P_DEACTIVATING) {
396 continue;
397 }
398
399 /* pull the remote_hb cache line */
400 bres = xp_bte_copy(part->remote_vars_pa,
401 ia64_tpa((u64) remote_vars),
402 XPC_VARS_ALIGNED_SIZE,
403 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
404 if (bres != BTE_SUCCESS) {
405 XPC_DEACTIVATE_PARTITION(part,
406 xpc_map_bte_errors(bres));
407 continue;
408 }
409
410 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
411 " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid,
412 remote_vars->heartbeat, part->last_heartbeat,
413 remote_vars->kdb_status,
414 remote_vars->heartbeating_to_mask);
415
416 if (((remote_vars->heartbeat == part->last_heartbeat) &&
417 (remote_vars->kdb_status == 0)) ||
418 !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
419
420 XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
421 continue;
422 }
423
424 part->last_heartbeat = remote_vars->heartbeat;
425 }
426}
427
428
429/*
430 * Get a copy of the remote partition's rsvd page.
431 *
432 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
433 * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE.
434 */
435static enum xpc_retval
436xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
437 struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa)
438{
439 int bres, i;
440
441
442 /* get the reserved page's physical address */
443
444 *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp,
445 XPC_RSVD_PAGE_ALIGNED_SIZE);
446 if (*remote_rsvd_page_pa == 0) {
447 return xpcNoRsvdPageAddr;
448 }
449
450
451 /* pull over the reserved page structure */
452
453 bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp),
454 XPC_RSVD_PAGE_ALIGNED_SIZE,
455 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
456 if (bres != BTE_SUCCESS) {
457 return xpc_map_bte_errors(bres);
458 }
459
460
461 if (discovered_nasids != NULL) {
462 for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
463 discovered_nasids[i] |= remote_rp->part_nasids[i];
464 }
465 }
466
467
468 /* check that the partid is for another partition */
469
470 if (remote_rp->partid < 1 ||
471 remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
472 return xpcInvalidPartid;
473 }
474
475 if (remote_rp->partid == sn_partition_id) {
476 return xpcLocalPartid;
477 }
478
479
480 if (XPC_VERSION_MAJOR(remote_rp->version) !=
481 XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
482 return xpcBadVersion;
483 }
484
485 return xpcSuccess;
486}
487
488
489/*
490 * Get a copy of the remote partition's XPC variables.
491 *
492 * remote_vars points to a buffer that is cacheline aligned for BTE copies and
493 * assumed to be of size XPC_VARS_ALIGNED_SIZE.
494 */
495static enum xpc_retval
496xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
497{
498 int bres;
499
500
501 if (remote_vars_pa == 0) {
502 return xpcVarsNotSet;
503 }
504
505
506 /* pull over the cross partition variables */
507
508 bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
509 XPC_VARS_ALIGNED_SIZE,
510 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
511 if (bres != BTE_SUCCESS) {
512 return xpc_map_bte_errors(bres);
513 }
514
515 if (XPC_VERSION_MAJOR(remote_vars->version) !=
516 XPC_VERSION_MAJOR(XPC_V_VERSION)) {
517 return xpcBadVersion;
518 }
519
520 return xpcSuccess;
521}
522
523
524/*
525 * Prior code has determine the nasid which generated an IPI. Inspect
526 * that nasid to determine if its partition needs to be activated or
527 * deactivated.
528 *
529 * A partition is consider "awaiting activation" if our partition
530 * flags indicate it is not active and it has a heartbeat. A
531 * partition is considered "awaiting deactivation" if our partition
532 * flags indicate it is active but it has no heartbeat or it is not
533 * sending its heartbeat to us.
534 *
535 * To determine the heartbeat, the remote nasid must have a properly
536 * initialized reserved page.
537 */
538static void
539xpc_identify_act_IRQ_req(int nasid)
540{
541 struct xpc_rsvd_page *remote_rp;
542 struct xpc_vars *remote_vars;
543 u64 remote_rsvd_page_pa;
544 u64 remote_vars_pa;
545 partid_t partid;
546 struct xpc_partition *part;
547 enum xpc_retval ret;
548
549
550 /* pull over the reserved page structure */
551
552 remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
553
554 ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa);
555 if (ret != xpcSuccess) {
556 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
557 "which sent interrupt, reason=%d\n", nasid, ret);
558 return;
559 }
560
561 remote_vars_pa = remote_rp->vars_pa;
562 partid = remote_rp->partid;
563 part = &xpc_partitions[partid];
564
565
566 /* pull over the cross partition variables */
567
568 remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
569
570 ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
571 if (ret != xpcSuccess) {
572
573 dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
574 "which sent interrupt, reason=%d\n", nasid, ret);
575
576 XPC_DEACTIVATE_PARTITION(part, ret);
577 return;
578 }
579
580
581 part->act_IRQ_rcvd++;
582
583 dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
584 "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
585 remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
586
587
588 if (part->act_state == XPC_P_INACTIVE) {
589
590 part->remote_rp_pa = remote_rsvd_page_pa;
591 dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n",
592 part->remote_rp_pa);
593
594 part->remote_vars_pa = remote_vars_pa;
595 dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
596 part->remote_vars_pa);
597
598 part->last_heartbeat = remote_vars->heartbeat;
599 dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
600 part->last_heartbeat);
601
602 part->remote_vars_part_pa = remote_vars->vars_part_pa;
603 dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
604 part->remote_vars_part_pa);
605
606 part->remote_act_nasid = remote_vars->act_nasid;
607 dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
608 part->remote_act_nasid);
609
610 part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
611 dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
612 part->remote_act_phys_cpuid);
613
614 part->remote_amos_page_pa = remote_vars->amos_page_pa;
615 dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
616 part->remote_amos_page_pa);
617
618 xpc_activate_partition(part);
619
620 } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa ||
621 !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
622
623 part->reactivate_nasid = nasid;
624 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
625 }
626}
627
628
629/*
630 * Loop through the activation AMO variables and process any bits
631 * which are set. Each bit indicates a nasid sending a partition
632 * activation or deactivation request.
633 *
634 * Return #of IRQs detected.
635 */
636int
637xpc_identify_act_IRQ_sender(void)
638{
639 int word, bit;
640 u64 nasid_mask;
641 u64 nasid; /* remote nasid */
642 int n_IRQs_detected = 0;
643 AMO_t *act_amos;
644 struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
645
646
647 act_amos = xpc_vars->act_amos;
648
649
650 /* scan through act AMO variable looking for non-zero entries */
651 for (word = 0; word < XP_NASID_MASK_WORDS; word++) {
652
653 nasid_mask = xpc_IPI_receive(&act_amos[word]);
654 if (nasid_mask == 0) {
655 /* no IRQs from nasids in this variable */
656 continue;
657 }
658
659 dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
660 nasid_mask);
661
662
663 /*
664 * If this nasid has been added to the machine since
665 * our partition was reset, this will retain the
666 * remote nasid in our reserved pages machine mask.
667 * This is used in the event of module reload.
668 */
669 rp->mach_nasids[word] |= nasid_mask;
670
671
672 /* locate the nasid(s) which sent interrupts */
673
674 for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
675 if (nasid_mask & (1UL << bit)) {
676 n_IRQs_detected++;
677 nasid = XPC_NASID_FROM_W_B(word, bit);
678 dev_dbg(xpc_part, "interrupt from nasid %ld\n",
679 nasid);
680 xpc_identify_act_IRQ_req(nasid);
681 }
682 }
683 }
684 return n_IRQs_detected;
685}
686
687
688/*
689 * Mark specified partition as active.
690 */
691enum xpc_retval
692xpc_mark_partition_active(struct xpc_partition *part)
693{
694 unsigned long irq_flags;
695 enum xpc_retval ret;
696
697
698 dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
699
700 spin_lock_irqsave(&part->act_lock, irq_flags);
701 if (part->act_state == XPC_P_ACTIVATING) {
702 part->act_state = XPC_P_ACTIVE;
703 ret = xpcSuccess;
704 } else {
705 DBUG_ON(part->reason == xpcSuccess);
706 ret = part->reason;
707 }
708 spin_unlock_irqrestore(&part->act_lock, irq_flags);
709
710 return ret;
711}
712
713
714/*
715 * Notify XPC that the partition is down.
716 */
717void
718xpc_deactivate_partition(const int line, struct xpc_partition *part,
719 enum xpc_retval reason)
720{
721 unsigned long irq_flags;
722 partid_t partid = XPC_PARTID(part);
723
724
725 spin_lock_irqsave(&part->act_lock, irq_flags);
726
727 if (part->act_state == XPC_P_INACTIVE) {
728 XPC_SET_REASON(part, reason, line);
729 spin_unlock_irqrestore(&part->act_lock, irq_flags);
730 if (reason == xpcReactivating) {
731 /* we interrupt ourselves to reactivate partition */
732 xpc_IPI_send_reactivate(part);
733 }
734 return;
735 }
736 if (part->act_state == XPC_P_DEACTIVATING) {
737 if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
738 reason == xpcReactivating) {
739 XPC_SET_REASON(part, reason, line);
740 }
741 spin_unlock_irqrestore(&part->act_lock, irq_flags);
742 return;
743 }
744
745 part->act_state = XPC_P_DEACTIVATING;
746 XPC_SET_REASON(part, reason, line);
747
748 spin_unlock_irqrestore(&part->act_lock, irq_flags);
749
750 XPC_DISALLOW_HB(partid, xpc_vars);
751
752 dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid,
753 reason);
754
755 xpc_partition_down(part, reason);
756}
757
758
759/*
760 * Mark specified partition as active.
761 */
762void
763xpc_mark_partition_inactive(struct xpc_partition *part)
764{
765 unsigned long irq_flags;
766
767
768 dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
769 XPC_PARTID(part));
770
771 spin_lock_irqsave(&part->act_lock, irq_flags);
772 part->act_state = XPC_P_INACTIVE;
773 spin_unlock_irqrestore(&part->act_lock, irq_flags);
774 part->remote_rp_pa = 0;
775}
776
777
778/*
779 * SAL has provided a partition and machine mask. The partition mask
780 * contains a bit for each even nasid in our partition. The machine
781 * mask contains a bit for each even nasid in the entire machine.
782 *
783 * Using those two bit arrays, we can determine which nasids are
784 * known in the machine. Each should also have a reserved page
785 * initialized if they are available for partitioning.
786 */
787void
788xpc_discovery(void)
789{
790 void *remote_rp_base;
791 struct xpc_rsvd_page *remote_rp;
792 struct xpc_vars *remote_vars;
793 u64 remote_rsvd_page_pa;
794 u64 remote_vars_pa;
795 int region;
796 int max_regions;
797 int nasid;
798 struct xpc_rsvd_page *rp;
799 partid_t partid;
800 struct xpc_partition *part;
801 u64 *discovered_nasids;
802 enum xpc_retval ret;
803
804
805 remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE,
806 GFP_KERNEL, &remote_rp_base);
807 if (remote_rp == NULL) {
808 return;
809 }
810 remote_vars = (struct xpc_vars *) remote_rp;
811
812
813 discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS,
814 GFP_KERNEL);
815 if (discovered_nasids == NULL) {
816 kfree(remote_rp_base);
817 return;
818 }
819 memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS);
820
821 rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
822
823 /*
824 * The term 'region' in this context refers to the minimum number of
825 * nodes that can comprise an access protection grouping. The access
826 * protection is in regards to memory, IOI and IPI.
827 */
828//>>> move the next two #defines into either include/asm-ia64/sn/arch.h or
829//>>> include/asm-ia64/sn/addrs.h
830#define SH1_MAX_REGIONS 64
831#define SH2_MAX_REGIONS 256
832 max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS;
833
834 for (region = 0; region < max_regions; region++) {
835
836 if ((volatile int) xpc_exiting) {
837 break;
838 }
839
840 dev_dbg(xpc_part, "searching region %d\n", region);
841
842 for (nasid = (region * sn_region_size * 2);
843 nasid < ((region + 1) * sn_region_size * 2);
844 nasid += 2) {
845
846 if ((volatile int) xpc_exiting) {
847 break;
848 }
849
850 dev_dbg(xpc_part, "checking nasid %d\n", nasid);
851
852
853 if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) {
854 dev_dbg(xpc_part, "PROM indicates Nasid %d is "
855 "part of the local partition; skipping "
856 "region\n", nasid);
857 break;
858 }
859
860 if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) {
861 dev_dbg(xpc_part, "PROM indicates Nasid %d was "
862 "not on Numa-Link network at reset\n",
863 nasid);
864 continue;
865 }
866
867 if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
868 dev_dbg(xpc_part, "Nasid %d is part of a "
869 "partition which was previously "
870 "discovered\n", nasid);
871 continue;
872 }
873
874
875 /* pull over the reserved page structure */
876
877 ret = xpc_get_remote_rp(nasid, discovered_nasids,
878 remote_rp, &remote_rsvd_page_pa);
879 if (ret != xpcSuccess) {
880 dev_dbg(xpc_part, "unable to get reserved page "
881 "from nasid %d, reason=%d\n", nasid,
882 ret);
883
884 if (ret == xpcLocalPartid) {
885 break;
886 }
887 continue;
888 }
889
890 remote_vars_pa = remote_rp->vars_pa;
891
892 partid = remote_rp->partid;
893 part = &xpc_partitions[partid];
894
895
896 /* pull over the cross partition variables */
897
898 ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
899 if (ret != xpcSuccess) {
900 dev_dbg(xpc_part, "unable to get XPC variables "
901 "from nasid %d, reason=%d\n", nasid,
902 ret);
903
904 XPC_DEACTIVATE_PARTITION(part, ret);
905 continue;
906 }
907
908 if (part->act_state != XPC_P_INACTIVE) {
909 dev_dbg(xpc_part, "partition %d on nasid %d is "
910 "already activating\n", partid, nasid);
911 break;
912 }
913
914 /*
915 * Register the remote partition's AMOs with SAL so it
916 * can handle and cleanup errors within that address
917 * range should the remote partition go down. We don't
918 * unregister this range because it is difficult to
919 * tell when outstanding writes to the remote partition
920 * are finished and thus when it is thus safe to
921 * unregister. This should not result in wasted space
922 * in the SAL xp_addr_region table because we should
923 * get the same page for remote_act_amos_pa after
924 * module reloads and system reboots.
925 */
926 if (sn_register_xp_addr_region(
927 remote_vars->amos_page_pa,
928 PAGE_SIZE, 1) < 0) {
929 dev_dbg(xpc_part, "partition %d failed to "
930 "register xp_addr region 0x%016lx\n",
931 partid, remote_vars->amos_page_pa);
932
933 XPC_SET_REASON(part, xpcPhysAddrRegFailed,
934 __LINE__);
935 break;
936 }
937
938 /*
939 * The remote nasid is valid and available.
940 * Send an interrupt to that nasid to notify
941 * it that we are ready to begin activation.
942 */
943 dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
944 "nasid %d, phys_cpuid 0x%x\n",
945 remote_vars->amos_page_pa,
946 remote_vars->act_nasid,
947 remote_vars->act_phys_cpuid);
948
949 xpc_IPI_send_activate(remote_vars);
950 }
951 }
952
953 kfree(discovered_nasids);
954 kfree(remote_rp_base);
955}
956
957
958/*
959 * Given a partid, get the nasids owned by that partition from the
960 * remote partition's reserved page.
961 */
962enum xpc_retval
963xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
964{
965 struct xpc_partition *part;
966 u64 part_nasid_pa;
967 int bte_res;
968
969
970 part = &xpc_partitions[partid];
971 if (part->remote_rp_pa == 0) {
972 return xpcPartitionDown;
973 }
974
975 part_nasid_pa = part->remote_rp_pa +
976 (u64) &((struct xpc_rsvd_page *) 0)->part_nasids;
977
978 bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
979 L1_CACHE_ALIGN(XP_NASID_MASK_BYTES),
980 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
981
982 return xpc_map_bte_errors(bte_res);
983}
984
diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c
new file mode 100644
index 000000000000..78c13d676fa6
--- /dev/null
+++ b/arch/ia64/sn/kernel/xpnet.c
@@ -0,0 +1,715 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
7 */
8
9
10/*
11 * Cross Partition Network Interface (XPNET) support
12 *
13 * XPNET provides a virtual network layered on top of the Cross
14 * Partition communication layer.
15 *
16 * XPNET provides direct point-to-point and broadcast-like support
17 * for an ethernet-like device. The ethernet broadcast medium is
18 * replaced with a point-to-point message structure which passes
19 * pointers to a DMA-capable block that a remote partition should
20 * retrieve and pass to the upper level networking layer.
21 *
22 */
23
24
25#include <linux/config.h>
26#include <linux/module.h>
27#include <linux/kernel.h>
28#include <linux/pci.h>
29#include <linux/init.h>
30#include <linux/ioport.h>
31#include <linux/netdevice.h>
32#include <linux/etherdevice.h>
33#include <linux/delay.h>
34#include <linux/ethtool.h>
35#include <linux/mii.h>
36#include <linux/smp.h>
37#include <linux/string.h>
38#include <asm/sn/bte.h>
39#include <asm/sn/io.h>
40#include <asm/sn/sn_sal.h>
41#include <asm/types.h>
42#include <asm/atomic.h>
43#include <asm/sn/xp.h>
44
45
46/*
47 * The message payload transferred by XPC.
48 *
49 * buf_pa is the physical address where the DMA should pull from.
50 *
51 * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a
52 * cacheline boundary. To accomplish this, we record the number of
53 * bytes from the beginning of the first cacheline to the first useful
54 * byte of the skb (leadin_ignore) and the number of bytes from the
55 * last useful byte of the skb to the end of the last cacheline
56 * (tailout_ignore).
57 *
58 * size is the number of bytes to transfer which includes the skb->len
59 * (useful bytes of the senders skb) plus the leadin and tailout
60 */
61struct xpnet_message {
62 u16 version; /* Version for this message */
63 u16 embedded_bytes; /* #of bytes embedded in XPC message */
64 u32 magic; /* Special number indicating this is xpnet */
65 u64 buf_pa; /* phys address of buffer to retrieve */
66 u32 size; /* #of bytes in buffer */
67 u8 leadin_ignore; /* #of bytes to ignore at the beginning */
68 u8 tailout_ignore; /* #of bytes to ignore at the end */
69 unsigned char data; /* body of small packets */
70};
71
72/*
73 * Determine the size of our message, the cacheline aligned size,
74 * and then the number of message will request from XPC.
75 *
76 * XPC expects each message to exist in an individual cacheline.
77 */
78#define XPNET_MSG_SIZE (L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET)
79#define XPNET_MSG_DATA_MAX \
80 (XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data))
81#define XPNET_MSG_ALIGNED_SIZE (L1_CACHE_ALIGN(XPNET_MSG_SIZE))
82#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE)
83
84
85#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1)
86#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1)
87
88/*
89 * Version number of XPNET implementation. XPNET can always talk to versions
90 * with same major #, and never talk to versions with a different version.
91 */
92#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor))
93#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4)
94#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf)
95
96#define XPNET_VERSION _XPNET_VERSION(1,0) /* version 1.0 */
97#define XPNET_VERSION_EMBED _XPNET_VERSION(1,1) /* version 1.1 */
98#define XPNET_MAGIC 0x88786984 /* "XNET" */
99
100#define XPNET_VALID_MSG(_m) \
101 ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \
102 && (msg->magic == XPNET_MAGIC))
103
104#define XPNET_DEVICE_NAME "xp0"
105
106
107/*
108 * When messages are queued with xpc_send_notify, a kmalloc'd buffer
109 * of the following type is passed as a notification cookie. When the
110 * notification function is called, we use the cookie to decide
111 * whether all outstanding message sends have completed. The skb can
112 * then be released.
113 */
114struct xpnet_pending_msg {
115 struct list_head free_list;
116 struct sk_buff *skb;
117 atomic_t use_count;
118};
119
120/* driver specific structure pointed to by the device structure */
121struct xpnet_dev_private {
122 struct net_device_stats stats;
123};
124
125struct net_device *xpnet_device;
126
127/*
128 * When we are notified of other partitions activating, we add them to
129 * our bitmask of partitions to which we broadcast.
130 */
131static u64 xpnet_broadcast_partitions;
132/* protect above */
133static spinlock_t xpnet_broadcast_lock = SPIN_LOCK_UNLOCKED;
134
135/*
136 * Since the Block Transfer Engine (BTE) is being used for the transfer
137 * and it relies upon cache-line size transfers, we need to reserve at
138 * least one cache-line for head and tail alignment. The BTE is
139 * limited to 8MB transfers.
140 *
141 * Testing has shown that changing MTU to greater than 64KB has no effect
142 * on TCP as the two sides negotiate a Max Segment Size that is limited
143 * to 64K. Other protocols May use packets greater than this, but for
144 * now, the default is 64KB.
145 */
146#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
147/* 32KB has been determined to be the ideal */
148#define XPNET_DEF_MTU (0x8000UL)
149
150
151/*
152 * The partition id is encapsulated in the MAC address. The following
153 * define locates the octet the partid is in.
154 */
155#define XPNET_PARTID_OCTET 1
156#define XPNET_LICENSE_OCTET 2
157
158
159/*
160 * Define the XPNET debug device structure that is to be used with dev_dbg(),
161 * dev_err(), dev_warn(), and dev_info().
162 */
163struct device_driver xpnet_dbg_name = {
164 .name = "xpnet"
165};
166
167struct device xpnet_dbg_subname = {
168 .bus_id = {0}, /* set to "" */
169 .driver = &xpnet_dbg_name
170};
171
172struct device *xpnet = &xpnet_dbg_subname;
173
174/*
175 * Packet was recevied by XPC and forwarded to us.
176 */
177static void
178xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
179{
180 struct sk_buff *skb;
181 bte_result_t bret;
182 struct xpnet_dev_private *priv =
183 (struct xpnet_dev_private *) xpnet_device->priv;
184
185
186 if (!XPNET_VALID_MSG(msg)) {
187 /*
188 * Packet with a different XPC version. Ignore.
189 */
190 xpc_received(partid, channel, (void *) msg);
191
192 priv->stats.rx_errors++;
193
194 return;
195 }
196 dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
197 msg->leadin_ignore, msg->tailout_ignore);
198
199
200 /* reserve an extra cache line */
201 skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
202 if (!skb) {
203 dev_err(xpnet, "failed on dev_alloc_skb(%d)\n",
204 msg->size + L1_CACHE_BYTES);
205
206 xpc_received(partid, channel, (void *) msg);
207
208 priv->stats.rx_errors++;
209
210 return;
211 }
212
213 /*
214 * The allocated skb has some reserved space.
215 * In order to use bte_copy, we need to get the
216 * skb->data pointer moved forward.
217 */
218 skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
219 (L1_CACHE_BYTES - 1)) +
220 msg->leadin_ignore));
221
222 /*
223 * Update the tail pointer to indicate data actually
224 * transferred.
225 */
226 skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore));
227
228 /*
229 * Move the data over from the the other side.
230 */
231 if ((XPNET_VERSION_MINOR(msg->version) == 1) &&
232 (msg->embedded_bytes != 0)) {
233 dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, "
234 "%lu)\n", skb->data, &msg->data,
235 (size_t) msg->embedded_bytes);
236
237 memcpy(skb->data, &msg->data, (size_t) msg->embedded_bytes);
238 } else {
239 dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
240 "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa,
241 (void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
242 msg->size);
243
244 bret = bte_copy(msg->buf_pa,
245 __pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
246 msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
247
248 if (bret != BTE_SUCCESS) {
249 // >>> Need better way of cleaning skb. Currently skb
250 // >>> appears in_use and we can't just call
251 // >>> dev_kfree_skb.
252 dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned "
253 "error=0x%x\n", (void *)msg->buf_pa,
254 (void *)__pa((u64)skb->data &
255 ~(L1_CACHE_BYTES - 1)),
256 msg->size, bret);
257
258 xpc_received(partid, channel, (void *) msg);
259
260 priv->stats.rx_errors++;
261
262 return;
263 }
264 }
265
266 dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
267 "skb->end=0x%p skb->len=%d\n", (void *) skb->head,
268 (void *) skb->data, (void *) skb->tail, (void *) skb->end,
269 skb->len);
270
271 skb->dev = xpnet_device;
272 skb->protocol = eth_type_trans(skb, xpnet_device);
273 skb->ip_summed = CHECKSUM_UNNECESSARY;
274
275 dev_dbg(xpnet, "passing skb to network layer; \n\tskb->head=0x%p "
276 "skb->data=0x%p skb->tail=0x%p skb->end=0x%p skb->len=%d\n",
277 (void *) skb->head, (void *) skb->data, (void *) skb->tail,
278 (void *) skb->end, skb->len);
279
280
281 xpnet_device->last_rx = jiffies;
282 priv->stats.rx_packets++;
283 priv->stats.rx_bytes += skb->len + ETH_HLEN;
284
285 netif_rx_ni(skb);
286 xpc_received(partid, channel, (void *) msg);
287}
288
289
290/*
291 * This is the handler which XPC calls during any sort of change in
292 * state or message reception on a connection.
293 */
294static void
295xpnet_connection_activity(enum xpc_retval reason, partid_t partid, int channel,
296 void *data, void *key)
297{
298 long bp;
299
300
301 DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
302 DBUG_ON(channel != XPC_NET_CHANNEL);
303
304 switch(reason) {
305 case xpcMsgReceived: /* message received */
306 DBUG_ON(data == NULL);
307
308 xpnet_receive(partid, channel, (struct xpnet_message *) data);
309 break;
310
311 case xpcConnected: /* connection completed to a partition */
312 spin_lock_bh(&xpnet_broadcast_lock);
313 xpnet_broadcast_partitions |= 1UL << (partid -1 );
314 bp = xpnet_broadcast_partitions;
315 spin_unlock_bh(&xpnet_broadcast_lock);
316
317 netif_carrier_on(xpnet_device);
318
319 dev_dbg(xpnet, "%s connection created to partition %d; "
320 "xpnet_broadcast_partitions=0x%lx\n",
321 xpnet_device->name, partid, bp);
322 break;
323
324 default:
325 spin_lock_bh(&xpnet_broadcast_lock);
326 xpnet_broadcast_partitions &= ~(1UL << (partid -1 ));
327 bp = xpnet_broadcast_partitions;
328 spin_unlock_bh(&xpnet_broadcast_lock);
329
330 if (bp == 0) {
331 netif_carrier_off(xpnet_device);
332 }
333
334 dev_dbg(xpnet, "%s disconnected from partition %d; "
335 "xpnet_broadcast_partitions=0x%lx\n",
336 xpnet_device->name, partid, bp);
337 break;
338
339 }
340}
341
342
343static int
344xpnet_dev_open(struct net_device *dev)
345{
346 enum xpc_retval ret;
347
348
349 dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %d, "
350 "%d)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
351 XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS,
352 XPNET_MAX_IDLE_KTHREADS);
353
354 ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
355 XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
356 XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS);
357 if (ret != xpcSuccess) {
358 dev_err(xpnet, "ifconfig up of %s failed on XPC connect, "
359 "ret=%d\n", dev->name, ret);
360
361 return -ENOMEM;
362 }
363
364 dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name);
365
366 return 0;
367}
368
369
370static int
371xpnet_dev_stop(struct net_device *dev)
372{
373 xpc_disconnect(XPC_NET_CHANNEL);
374
375 dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name);
376
377 return 0;
378}
379
380
381static int
382xpnet_dev_change_mtu(struct net_device *dev, int new_mtu)
383{
384 /* 68 comes from min TCP+IP+MAC header */
385 if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) {
386 dev_err(xpnet, "ifconfig %s mtu %d failed; value must be "
387 "between 68 and %ld\n", dev->name, new_mtu,
388 XPNET_MAX_MTU);
389 return -EINVAL;
390 }
391
392 dev->mtu = new_mtu;
393 dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu);
394 return 0;
395}
396
397
398/*
399 * Required for the net_device structure.
400 */
401static int
402xpnet_dev_set_config(struct net_device *dev, struct ifmap *new_map)
403{
404 return 0;
405}
406
407
408/*
409 * Return statistics to the caller.
410 */
411static struct net_device_stats *
412xpnet_dev_get_stats(struct net_device *dev)
413{
414 struct xpnet_dev_private *priv;
415
416
417 priv = (struct xpnet_dev_private *) dev->priv;
418
419 return &priv->stats;
420}
421
422
423/*
424 * Notification that the other end has received the message and
425 * DMA'd the skb information. At this point, they are done with
426 * our side. When all recipients are done processing, we
427 * release the skb and then release our pending message structure.
428 */
429static void
430xpnet_send_completed(enum xpc_retval reason, partid_t partid, int channel,
431 void *__qm)
432{
433 struct xpnet_pending_msg *queued_msg =
434 (struct xpnet_pending_msg *) __qm;
435
436
437 DBUG_ON(queued_msg == NULL);
438
439 dev_dbg(xpnet, "message to %d notified with reason %d\n",
440 partid, reason);
441
442 if (atomic_dec_return(&queued_msg->use_count) == 0) {
443 dev_dbg(xpnet, "all acks for skb->head=-x%p\n",
444 (void *) queued_msg->skb->head);
445
446 dev_kfree_skb_any(queued_msg->skb);
447 kfree(queued_msg);
448 }
449}
450
451
452/*
453 * Network layer has formatted a packet (skb) and is ready to place it
454 * "on the wire". Prepare and send an xpnet_message to all partitions
455 * which have connected with us and are targets of this packet.
456 *
457 * MAC-NOTE: For the XPNET driver, the MAC address contains the
458 * destination partition_id. If the destination partition id word
459 * is 0xff, this packet is to broadcast to all partitions.
460 */
461static int
462xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
463{
464 struct xpnet_pending_msg *queued_msg;
465 enum xpc_retval ret;
466 struct xpnet_message *msg;
467 u64 start_addr, end_addr;
468 long dp;
469 u8 second_mac_octet;
470 partid_t dest_partid;
471 struct xpnet_dev_private *priv;
472 u16 embedded_bytes;
473
474
475 priv = (struct xpnet_dev_private *) dev->priv;
476
477
478 dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
479 "skb->end=0x%p skb->len=%d\n", (void *) skb->head,
480 (void *) skb->data, (void *) skb->tail, (void *) skb->end,
481 skb->len);
482
483
484 /*
485 * The xpnet_pending_msg tracks how many outstanding
486 * xpc_send_notifies are relying on this skb. When none
487 * remain, release the skb.
488 */
489 queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC);
490 if (queued_msg == NULL) {
491 dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping "
492 "packet\n", sizeof(struct xpnet_pending_msg));
493
494 priv->stats.tx_errors++;
495
496 return -ENOMEM;
497 }
498
499
500 /* get the beginning of the first cacheline and end of last */
501 start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1));
502 end_addr = L1_CACHE_ALIGN((u64) skb->tail);
503
504 /* calculate how many bytes to embed in the XPC message */
505 embedded_bytes = 0;
506 if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
507 /* skb->data does fit so embed */
508 embedded_bytes = skb->len;
509 }
510
511
512 /*
513 * Since the send occurs asynchronously, we set the count to one
514 * and begin sending. Any sends that happen to complete before
515 * we are done sending will not free the skb. We will be left
516 * with that task during exit. This also handles the case of
517 * a packet destined for a partition which is no longer up.
518 */
519 atomic_set(&queued_msg->use_count, 1);
520 queued_msg->skb = skb;
521
522
523 second_mac_octet = skb->data[XPNET_PARTID_OCTET];
524 if (second_mac_octet == 0xff) {
525 /* we are being asked to broadcast to all partitions */
526 dp = xpnet_broadcast_partitions;
527 } else if (second_mac_octet != 0) {
528 dp = xpnet_broadcast_partitions &
529 (1UL << (second_mac_octet - 1));
530 } else {
531 /* 0 is an invalid partid. Ignore */
532 dp = 0;
533 }
534 dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp);
535
536 /*
537 * If we wanted to allow promiscous mode to work like an
538 * unswitched network, this would be a good point to OR in a
539 * mask of partitions which should be receiving all packets.
540 */
541
542 /*
543 * Main send loop.
544 */
545 for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS;
546 dest_partid++) {
547
548
549 if (!(dp & (1UL << (dest_partid - 1)))) {
550 /* not destined for this partition */
551 continue;
552 }
553
554 /* remove this partition from the destinations mask */
555 dp &= ~(1UL << (dest_partid - 1));
556
557
558 /* found a partition to send to */
559
560 ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL,
561 XPC_NOWAIT, (void **)&msg);
562 if (unlikely(ret != xpcSuccess)) {
563 continue;
564 }
565
566 msg->embedded_bytes = embedded_bytes;
567 if (unlikely(embedded_bytes != 0)) {
568 msg->version = XPNET_VERSION_EMBED;
569 dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
570 &msg->data, skb->data, (size_t) embedded_bytes);
571 memcpy(&msg->data, skb->data, (size_t) embedded_bytes);
572 } else {
573 msg->version = XPNET_VERSION;
574 }
575 msg->magic = XPNET_MAGIC;
576 msg->size = end_addr - start_addr;
577 msg->leadin_ignore = (u64) skb->data - start_addr;
578 msg->tailout_ignore = end_addr - (u64) skb->tail;
579 msg->buf_pa = __pa(start_addr);
580
581 dev_dbg(xpnet, "sending XPC message to %d:%d\nmsg->buf_pa="
582 "0x%lx, msg->size=%u, msg->leadin_ignore=%u, "
583 "msg->tailout_ignore=%u\n", dest_partid,
584 XPC_NET_CHANNEL, msg->buf_pa, msg->size,
585 msg->leadin_ignore, msg->tailout_ignore);
586
587
588 atomic_inc(&queued_msg->use_count);
589
590 ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg,
591 xpnet_send_completed, queued_msg);
592 if (unlikely(ret != xpcSuccess)) {
593 atomic_dec(&queued_msg->use_count);
594 continue;
595 }
596
597 }
598
599 if (atomic_dec_return(&queued_msg->use_count) == 0) {
600 dev_dbg(xpnet, "no partitions to receive packet destined for "
601 "%d\n", dest_partid);
602
603
604 dev_kfree_skb(skb);
605 kfree(queued_msg);
606 }
607
608 priv->stats.tx_packets++;
609 priv->stats.tx_bytes += skb->len;
610
611 return 0;
612}
613
614
615/*
616 * Deal with transmit timeouts coming from the network layer.
617 */
618static void
619xpnet_dev_tx_timeout (struct net_device *dev)
620{
621 struct xpnet_dev_private *priv;
622
623
624 priv = (struct xpnet_dev_private *) dev->priv;
625
626 priv->stats.tx_errors++;
627 return;
628}
629
630
631static int __init
632xpnet_init(void)
633{
634 int i;
635 u32 license_num;
636 int result = -ENOMEM;
637
638
639 dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
640
641 /*
642 * use ether_setup() to init the majority of our device
643 * structure and then override the necessary pieces.
644 */
645 xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private),
646 XPNET_DEVICE_NAME, ether_setup);
647 if (xpnet_device == NULL) {
648 return -ENOMEM;
649 }
650
651 netif_carrier_off(xpnet_device);
652
653 xpnet_device->mtu = XPNET_DEF_MTU;
654 xpnet_device->change_mtu = xpnet_dev_change_mtu;
655 xpnet_device->open = xpnet_dev_open;
656 xpnet_device->get_stats = xpnet_dev_get_stats;
657 xpnet_device->stop = xpnet_dev_stop;
658 xpnet_device->hard_start_xmit = xpnet_dev_hard_start_xmit;
659 xpnet_device->tx_timeout = xpnet_dev_tx_timeout;
660 xpnet_device->set_config = xpnet_dev_set_config;
661
662 /*
663 * Multicast assumes the LSB of the first octet is set for multicast
664 * MAC addresses. We chose the first octet of the MAC to be unlikely
665 * to collide with any vendor's officially issued MAC.
666 */
667 xpnet_device->dev_addr[0] = 0xfe;
668 xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id;
669 license_num = sn_partition_serial_number_val();
670 for (i = 3; i >= 0; i--) {
671 xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] =
672 license_num & 0xff;
673 license_num = license_num >> 8;
674 }
675
676 /*
677 * ether_setup() sets this to a multicast device. We are
678 * really not supporting multicast at this time.
679 */
680 xpnet_device->flags &= ~IFF_MULTICAST;
681
682 /*
683 * No need to checksum as it is a DMA transfer. The BTE will
684 * report an error if the data is not retrievable and the
685 * packet will be dropped.
686 */
687 xpnet_device->features = NETIF_F_NO_CSUM;
688
689 result = register_netdev(xpnet_device);
690 if (result != 0) {
691 free_netdev(xpnet_device);
692 }
693
694 return result;
695}
696module_init(xpnet_init);
697
698
699static void __exit
700xpnet_exit(void)
701{
702 dev_info(xpnet, "unregistering network device %s\n",
703 xpnet_device[0].name);
704
705 unregister_netdev(xpnet_device);
706
707 free_netdev(xpnet_device);
708}
709module_exit(xpnet_exit);
710
711
712MODULE_AUTHOR("Silicon Graphics, Inc.");
713MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)");
714MODULE_LICENSE("GPL");
715
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
index c90685985d81..64af2b2c1787 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -301,7 +301,7 @@ void sn_dma_flush(uint64_t addr)
301 spin_lock_irqsave(&((struct sn_flush_device_list *)p)-> 301 spin_lock_irqsave(&((struct sn_flush_device_list *)p)->
302 sfdl_flush_lock, flags); 302 sfdl_flush_lock, flags);
303 303
304 p->sfdl_flush_value = 0; 304 *p->sfdl_flush_addr = 0;
305 305
306 /* force an interrupt. */ 306 /* force an interrupt. */
307 *(volatile uint32_t *)(p->sfdl_force_int_addr) = 1; 307 *(volatile uint32_t *)(p->sfdl_force_int_addr) = 1;
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index 54a0dd447e76..8dae9eb45456 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -431,7 +431,7 @@ tioca_dma_mapped(struct pci_dev *pdev, uint64_t paddr, size_t req_size)
431 ca_dmamap->cad_dma_addr = bus_addr; 431 ca_dmamap->cad_dma_addr = bus_addr;
432 ca_dmamap->cad_gart_size = entries; 432 ca_dmamap->cad_gart_size = entries;
433 ca_dmamap->cad_gart_entry = entry; 433 ca_dmamap->cad_gart_entry = entry;
434 list_add(&ca_dmamap->cad_list, &tioca_kern->ca_list); 434 list_add(&ca_dmamap->cad_list, &tioca_kern->ca_dmamaps);
435 435
436 if (xio_addr % ps) { 436 if (xio_addr % ps) {
437 tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr); 437 tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr);