aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-24 11:37:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-24 11:37:29 -0400
commite270b51df657011983241ec61a1fc7de186e16cd (patch)
tree3397be4cbf31676ca6ebb187903e8cfc2218f223 /arch
parent8fa82790fb9dfe57aeafc8de6b6a5c1df63efa06 (diff)
parent227739bf4c110bbd02d0c0f13b272c32de406e4c (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6: (23 commits) sparc: sunzilog uart order [SPARC64]: Detect trap frames in stack backtraces. [SPARC64]: %l6 trap return handling no longer necessary. [SPARC64]: Use trap type stored in pt_regs to handle syscall restart. [SPARC64]: Store magic cookie and trap type in pt_regs. [SPARC64]: PROM debug console can be CON_ANYTIME. sparc64: cleanup after SunOS/Solaris binary emulation removal sparc: cleanup after SunOS binary emulation removal [SPARC64]: Add NUMA support. [SPARC64]: Allocate TSB node-local. [SPARC64]: NUMA device infrastructure. [SPARC64]: Kill pci_iommu_table_init() declaration. [SPARC64]: Once we have the boot cmdline, call parse_early_param() [SPARC64]: Remove unused asm-sparc64/numnodes.h [SPARC64]: Decrease SECTION_SIZE_BITS to 30. [SPARC64]: Initialize MDESC earlier and use lmb_alloc() [SPARC64]: Use lmb_alloc() for PROM device tree. [SPARC64]: Call real_setup_per_cpu_areas() earlier and use lmb_alloc(). [SPARC64]: Fully use LMB information in bootmem_init(). [SPARC64]: Start using LMB information in bootmem_init(). ...
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc/kernel/entry.S1
-rw-r--r--arch/sparc/kernel/signal.c5
-rw-r--r--arch/sparc64/Kconfig20
-rw-r--r--arch/sparc64/defconfig99
-rw-r--r--arch/sparc64/kernel/ebus.c1
-rw-r--r--arch/sparc64/kernel/entry.S37
-rw-r--r--arch/sparc64/kernel/entry.h1
-rw-r--r--arch/sparc64/kernel/etrap.S4
-rw-r--r--arch/sparc64/kernel/iommu.c33
-rw-r--r--arch/sparc64/kernel/isa.c1
-rw-r--r--arch/sparc64/kernel/mdesc.c28
-rw-r--r--arch/sparc64/kernel/of_device.c12
-rw-r--r--arch/sparc64/kernel/pci.c12
-rw-r--r--arch/sparc64/kernel/pci_fire.c5
-rw-r--r--arch/sparc64/kernel/pci_impl.h4
-rw-r--r--arch/sparc64/kernel/pci_msi.c8
-rw-r--r--arch/sparc64/kernel/pci_psycho.c5
-rw-r--r--arch/sparc64/kernel/pci_sabre.c4
-rw-r--r--arch/sparc64/kernel/pci_schizo.c5
-rw-r--r--arch/sparc64/kernel/pci_sun4v.c13
-rw-r--r--arch/sparc64/kernel/prom.c14
-rw-r--r--arch/sparc64/kernel/rtrap.S21
-rw-r--r--arch/sparc64/kernel/sbus.c3
-rw-r--r--arch/sparc64/kernel/setup.c3
-rw-r--r--arch/sparc64/kernel/signal.c25
-rw-r--r--arch/sparc64/kernel/signal32.c20
-rw-r--r--arch/sparc64/kernel/smp.c11
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c2
-rw-r--r--arch/sparc64/kernel/stacktrace.c16
-rw-r--r--arch/sparc64/kernel/sun4v_tlb_miss.S16
-rw-r--r--arch/sparc64/kernel/sysfs.c12
-rw-r--r--arch/sparc64/kernel/traps.c19
-rw-r--r--arch/sparc64/kernel/tsb.S2
-rw-r--r--arch/sparc64/kernel/winfixup.S12
-rw-r--r--arch/sparc64/mm/init.c989
-rw-r--r--arch/sparc64/mm/tsb.c3
-rw-r--r--arch/sparc64/mm/ultra.S4
37 files changed, 1020 insertions, 450 deletions
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 135644f8add7..484c83d23eef 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -1409,7 +1409,6 @@ syscall_is_too_hard:
1409 1409
1410 st %o0, [%sp + STACKFRAME_SZ + PT_I0] 1410 st %o0, [%sp + STACKFRAME_SZ + PT_I0]
1411 1411
1412 .globl ret_sys_call
1413ret_sys_call: 1412ret_sys_call:
1414 ld [%curptr + TI_FLAGS], %l6 1413 ld [%curptr + TI_FLAGS], %l6
1415 cmp %o0, -ERESTART_RESTARTBLOCK 1414 cmp %o0, -ERESTART_RESTARTBLOCK
diff --git a/arch/sparc/kernel/signal.c b/arch/sparc/kernel/signal.c
index 1f730619a24a..3e849e8e3480 100644
--- a/arch/sparc/kernel/signal.c
+++ b/arch/sparc/kernel/signal.c
@@ -105,11 +105,6 @@ static int _sigpause_common(old_sigset_t set)
105 return -ERESTARTNOHAND; 105 return -ERESTARTNOHAND;
106} 106}
107 107
108asmlinkage int sys_sigpause(unsigned int set)
109{
110 return _sigpause_common(set);
111}
112
113asmlinkage int sys_sigsuspend(old_sigset_t set) 108asmlinkage int sys_sigsuspend(old_sigset_t set)
114{ 109{
115 return _sigpause_common(set); 110 return _sigpause_common(set);
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index df3eacb5ca15..8acc5cc38621 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -250,6 +250,26 @@ endchoice
250 250
251endmenu 251endmenu
252 252
253config NUMA
254 bool "NUMA support"
255
256config NODES_SHIFT
257 int
258 default "4"
259 depends on NEED_MULTIPLE_NODES
260
261# Some NUMA nodes have memory ranges that span
262# other nodes. Even though a pfn is valid and
263# between a node's start and end pfns, it may not
264# reside on that node. See memmap_init_zone()
265# for details.
266config NODES_SPAN_OTHER_NODES
267 def_bool y
268 depends on NEED_MULTIPLE_NODES
269
270config ARCH_POPULATES_NODE_MAP
271 def_bool y
272
253config ARCH_SELECT_MEMORY_MODEL 273config ARCH_SELECT_MEMORY_MODEL
254 def_bool y 274 def_bool y
255 275
diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig
index e1835868ad36..92f79680f70d 100644
--- a/arch/sparc64/defconfig
+++ b/arch/sparc64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.25 3# Linux kernel version: 2.6.25-numa
4# Sun Apr 20 01:33:21 2008 4# Wed Apr 23 04:49:08 2008
5# 5#
6CONFIG_SPARC=y 6CONFIG_SPARC=y
7CONFIG_SPARC64=y 7CONFIG_SPARC64=y
@@ -152,6 +152,8 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y
152CONFIG_HUGETLB_PAGE_SIZE_4MB=y 152CONFIG_HUGETLB_PAGE_SIZE_4MB=y
153# CONFIG_HUGETLB_PAGE_SIZE_512K is not set 153# CONFIG_HUGETLB_PAGE_SIZE_512K is not set
154# CONFIG_HUGETLB_PAGE_SIZE_64K is not set 154# CONFIG_HUGETLB_PAGE_SIZE_64K is not set
155# CONFIG_NUMA is not set
156CONFIG_ARCH_POPULATES_NODE_MAP=y
155CONFIG_ARCH_SELECT_MEMORY_MODEL=y 157CONFIG_ARCH_SELECT_MEMORY_MODEL=y
156CONFIG_ARCH_SPARSEMEM_ENABLE=y 158CONFIG_ARCH_SPARSEMEM_ENABLE=y
157CONFIG_ARCH_SPARSEMEM_DEFAULT=y 159CONFIG_ARCH_SPARSEMEM_DEFAULT=y
@@ -787,7 +789,6 @@ CONFIG_I2C_ALGOBIT=y
787# CONFIG_SENSORS_PCF8574 is not set 789# CONFIG_SENSORS_PCF8574 is not set
788# CONFIG_PCF8575 is not set 790# CONFIG_PCF8575 is not set
789# CONFIG_SENSORS_PCF8591 is not set 791# CONFIG_SENSORS_PCF8591 is not set
790# CONFIG_TPS65010 is not set
791# CONFIG_SENSORS_MAX6875 is not set 792# CONFIG_SENSORS_MAX6875 is not set
792# CONFIG_SENSORS_TSL2550 is not set 793# CONFIG_SENSORS_TSL2550 is not set
793# CONFIG_I2C_DEBUG_CORE is not set 794# CONFIG_I2C_DEBUG_CORE is not set
@@ -869,6 +870,7 @@ CONFIG_SSB_POSSIBLE=y
869# Multifunction device drivers 870# Multifunction device drivers
870# 871#
871# CONFIG_MFD_SM501 is not set 872# CONFIG_MFD_SM501 is not set
873# CONFIG_HTC_PASIC3 is not set
872 874
873# 875#
874# Multimedia devices 876# Multimedia devices
@@ -1219,10 +1221,6 @@ CONFIG_USB_STORAGE=m
1219# CONFIG_NEW_LEDS is not set 1221# CONFIG_NEW_LEDS is not set
1220# CONFIG_INFINIBAND is not set 1222# CONFIG_INFINIBAND is not set
1221# CONFIG_RTC_CLASS is not set 1223# CONFIG_RTC_CLASS is not set
1222
1223#
1224# Userspace I/O
1225#
1226# CONFIG_UIO is not set 1224# CONFIG_UIO is not set
1227 1225
1228# 1226#
@@ -1399,6 +1397,7 @@ CONFIG_SCHEDSTATS=y
1399CONFIG_DEBUG_BUGVERBOSE=y 1397CONFIG_DEBUG_BUGVERBOSE=y
1400# CONFIG_DEBUG_INFO is not set 1398# CONFIG_DEBUG_INFO is not set
1401# CONFIG_DEBUG_VM is not set 1399# CONFIG_DEBUG_VM is not set
1400# CONFIG_DEBUG_WRITECOUNT is not set
1402# CONFIG_DEBUG_LIST is not set 1401# CONFIG_DEBUG_LIST is not set
1403# CONFIG_DEBUG_SG is not set 1402# CONFIG_DEBUG_SG is not set
1404# CONFIG_BOOT_PRINTK_DELAY is not set 1403# CONFIG_BOOT_PRINTK_DELAY is not set
@@ -1425,53 +1424,82 @@ CONFIG_ASYNC_CORE=m
1425CONFIG_ASYNC_MEMCPY=m 1424CONFIG_ASYNC_MEMCPY=m
1426CONFIG_ASYNC_XOR=m 1425CONFIG_ASYNC_XOR=m
1427CONFIG_CRYPTO=y 1426CONFIG_CRYPTO=y
1427
1428#
1429# Crypto core or helper
1430#
1428CONFIG_CRYPTO_ALGAPI=y 1431CONFIG_CRYPTO_ALGAPI=y
1429CONFIG_CRYPTO_AEAD=y 1432CONFIG_CRYPTO_AEAD=y
1430CONFIG_CRYPTO_BLKCIPHER=y 1433CONFIG_CRYPTO_BLKCIPHER=y
1431# CONFIG_CRYPTO_SEQIV is not set
1432CONFIG_CRYPTO_HASH=y 1434CONFIG_CRYPTO_HASH=y
1433CONFIG_CRYPTO_MANAGER=y 1435CONFIG_CRYPTO_MANAGER=y
1436CONFIG_CRYPTO_GF128MUL=m
1437CONFIG_CRYPTO_NULL=m
1438# CONFIG_CRYPTO_CRYPTD is not set
1439CONFIG_CRYPTO_AUTHENC=y
1440CONFIG_CRYPTO_TEST=m
1441
1442#
1443# Authenticated Encryption with Associated Data
1444#
1445# CONFIG_CRYPTO_CCM is not set
1446# CONFIG_CRYPTO_GCM is not set
1447# CONFIG_CRYPTO_SEQIV is not set
1448
1449#
1450# Block modes
1451#
1452CONFIG_CRYPTO_CBC=y
1453# CONFIG_CRYPTO_CTR is not set
1454# CONFIG_CRYPTO_CTS is not set
1455CONFIG_CRYPTO_ECB=m
1456CONFIG_CRYPTO_LRW=m
1457CONFIG_CRYPTO_PCBC=m
1458CONFIG_CRYPTO_XTS=m
1459
1460#
1461# Hash modes
1462#
1434CONFIG_CRYPTO_HMAC=y 1463CONFIG_CRYPTO_HMAC=y
1435CONFIG_CRYPTO_XCBC=y 1464CONFIG_CRYPTO_XCBC=y
1436CONFIG_CRYPTO_NULL=m 1465
1466#
1467# Digest
1468#
1469CONFIG_CRYPTO_CRC32C=m
1437CONFIG_CRYPTO_MD4=y 1470CONFIG_CRYPTO_MD4=y
1438CONFIG_CRYPTO_MD5=y 1471CONFIG_CRYPTO_MD5=y
1472CONFIG_CRYPTO_MICHAEL_MIC=m
1439CONFIG_CRYPTO_SHA1=y 1473CONFIG_CRYPTO_SHA1=y
1440CONFIG_CRYPTO_SHA256=m 1474CONFIG_CRYPTO_SHA256=m
1441CONFIG_CRYPTO_SHA512=m 1475CONFIG_CRYPTO_SHA512=m
1442CONFIG_CRYPTO_WP512=m
1443CONFIG_CRYPTO_TGR192=m 1476CONFIG_CRYPTO_TGR192=m
1444CONFIG_CRYPTO_GF128MUL=m 1477CONFIG_CRYPTO_WP512=m
1445CONFIG_CRYPTO_ECB=m 1478
1446CONFIG_CRYPTO_CBC=y 1479#
1447CONFIG_CRYPTO_PCBC=m 1480# Ciphers
1448CONFIG_CRYPTO_LRW=m 1481#
1449CONFIG_CRYPTO_XTS=m
1450# CONFIG_CRYPTO_CTR is not set
1451# CONFIG_CRYPTO_GCM is not set
1452# CONFIG_CRYPTO_CCM is not set
1453# CONFIG_CRYPTO_CRYPTD is not set
1454CONFIG_CRYPTO_DES=y
1455CONFIG_CRYPTO_FCRYPT=m
1456CONFIG_CRYPTO_BLOWFISH=m
1457CONFIG_CRYPTO_TWOFISH=m
1458CONFIG_CRYPTO_TWOFISH_COMMON=m
1459CONFIG_CRYPTO_SERPENT=m
1460CONFIG_CRYPTO_AES=m 1482CONFIG_CRYPTO_AES=m
1483CONFIG_CRYPTO_ANUBIS=m
1484CONFIG_CRYPTO_ARC4=m
1485CONFIG_CRYPTO_BLOWFISH=m
1486CONFIG_CRYPTO_CAMELLIA=m
1461CONFIG_CRYPTO_CAST5=m 1487CONFIG_CRYPTO_CAST5=m
1462CONFIG_CRYPTO_CAST6=m 1488CONFIG_CRYPTO_CAST6=m
1463CONFIG_CRYPTO_TEA=m 1489CONFIG_CRYPTO_DES=y
1464CONFIG_CRYPTO_ARC4=m 1490CONFIG_CRYPTO_FCRYPT=m
1465CONFIG_CRYPTO_KHAZAD=m 1491CONFIG_CRYPTO_KHAZAD=m
1466CONFIG_CRYPTO_ANUBIS=m
1467CONFIG_CRYPTO_SEED=m
1468# CONFIG_CRYPTO_SALSA20 is not set 1492# CONFIG_CRYPTO_SALSA20 is not set
1493CONFIG_CRYPTO_SEED=m
1494CONFIG_CRYPTO_SERPENT=m
1495CONFIG_CRYPTO_TEA=m
1496CONFIG_CRYPTO_TWOFISH=m
1497CONFIG_CRYPTO_TWOFISH_COMMON=m
1498
1499#
1500# Compression
1501#
1469CONFIG_CRYPTO_DEFLATE=y 1502CONFIG_CRYPTO_DEFLATE=y
1470CONFIG_CRYPTO_MICHAEL_MIC=m
1471CONFIG_CRYPTO_CRC32C=m
1472CONFIG_CRYPTO_CAMELLIA=m
1473CONFIG_CRYPTO_TEST=m
1474CONFIG_CRYPTO_AUTHENC=y
1475# CONFIG_CRYPTO_LZO is not set 1503# CONFIG_CRYPTO_LZO is not set
1476CONFIG_CRYPTO_HW=y 1504CONFIG_CRYPTO_HW=y
1477# CONFIG_CRYPTO_DEV_HIFN_795X is not set 1505# CONFIG_CRYPTO_DEV_HIFN_795X is not set
@@ -1492,3 +1520,4 @@ CONFIG_PLIST=y
1492CONFIG_HAS_IOMEM=y 1520CONFIG_HAS_IOMEM=y
1493CONFIG_HAS_IOPORT=y 1521CONFIG_HAS_IOPORT=y
1494CONFIG_HAS_DMA=y 1522CONFIG_HAS_DMA=y
1523CONFIG_HAVE_LMB=y
diff --git a/arch/sparc64/kernel/ebus.c b/arch/sparc64/kernel/ebus.c
index 04ab81cb4f48..bc2632274840 100644
--- a/arch/sparc64/kernel/ebus.c
+++ b/arch/sparc64/kernel/ebus.c
@@ -396,6 +396,7 @@ static void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_de
396 sd->op = &dev->ofdev; 396 sd->op = &dev->ofdev;
397 sd->iommu = dev->bus->ofdev.dev.parent->archdata.iommu; 397 sd->iommu = dev->bus->ofdev.dev.parent->archdata.iommu;
398 sd->stc = dev->bus->ofdev.dev.parent->archdata.stc; 398 sd->stc = dev->bus->ofdev.dev.parent->archdata.stc;
399 sd->numa_node = dev->bus->ofdev.dev.parent->archdata.numa_node;
399 400
400 dev->ofdev.node = dp; 401 dev->ofdev.node = dp;
401 dev->ofdev.dev.parent = &dev->bus->ofdev.dev; 402 dev->ofdev.dev.parent = &dev->bus->ofdev.dev;
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index fb43c76bdc26..fd06e937ae1e 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -47,7 +47,7 @@ do_fpdis:
47 ba,pt %xcc, etrap 47 ba,pt %xcc, etrap
48109: or %g7, %lo(109b), %g7 48109: or %g7, %lo(109b), %g7
49 add %g0, %g0, %g0 49 add %g0, %g0, %g0
50 ba,a,pt %xcc, rtrap_clr_l6 50 ba,a,pt %xcc, rtrap
51 51
521: TRAP_LOAD_THREAD_REG(%g6, %g1) 521: TRAP_LOAD_THREAD_REG(%g6, %g1)
53 ldub [%g6 + TI_FPSAVED], %g5 53 ldub [%g6 + TI_FPSAVED], %g5
@@ -226,7 +226,7 @@ fp_other_bounce:
226 call do_fpother 226 call do_fpother
227 add %sp, PTREGS_OFF, %o0 227 add %sp, PTREGS_OFF, %o0
228 ba,pt %xcc, rtrap 228 ba,pt %xcc, rtrap
229 clr %l6 229 nop
230 230
231 .globl do_fpother_check_fitos 231 .globl do_fpother_check_fitos
232 .align 32 232 .align 32
@@ -489,7 +489,7 @@ utrap_trap: /* %g3=handler,%g4=level */
489 call bad_trap 489 call bad_trap
490 add %sp, PTREGS_OFF, %o0 490 add %sp, PTREGS_OFF, %o0
491 ba,pt %xcc, rtrap 491 ba,pt %xcc, rtrap
492 clr %l6 492 nop
493 493
494invoke_utrap: 494invoke_utrap:
495 sllx %g3, 3, %g3 495 sllx %g3, 3, %g3
@@ -607,7 +607,7 @@ __spitfire_cee_trap_continue:
607 call spitfire_access_error 607 call spitfire_access_error
608 add %sp, PTREGS_OFF, %o0 608 add %sp, PTREGS_OFF, %o0
609 ba,pt %xcc, rtrap 609 ba,pt %xcc, rtrap
610 clr %l6 610 nop
611 611
612 /* This is the trap handler entry point for ECC correctable 612 /* This is the trap handler entry point for ECC correctable
613 * errors. They are corrected, but we listen for the trap 613 * errors. They are corrected, but we listen for the trap
@@ -686,7 +686,7 @@ __spitfire_data_access_exception_tl1:
686 call spitfire_data_access_exception_tl1 686 call spitfire_data_access_exception_tl1
687 add %sp, PTREGS_OFF, %o0 687 add %sp, PTREGS_OFF, %o0
688 ba,pt %xcc, rtrap 688 ba,pt %xcc, rtrap
689 clr %l6 689 nop
690 690
691__spitfire_data_access_exception: 691__spitfire_data_access_exception:
692 rdpr %pstate, %g4 692 rdpr %pstate, %g4
@@ -705,7 +705,7 @@ __spitfire_data_access_exception:
705 call spitfire_data_access_exception 705 call spitfire_data_access_exception
706 add %sp, PTREGS_OFF, %o0 706 add %sp, PTREGS_OFF, %o0
707 ba,pt %xcc, rtrap 707 ba,pt %xcc, rtrap
708 clr %l6 708 nop
709 709
710 .globl __spitfire_insn_access_exception 710 .globl __spitfire_insn_access_exception
711 .globl __spitfire_insn_access_exception_tl1 711 .globl __spitfire_insn_access_exception_tl1
@@ -725,7 +725,7 @@ __spitfire_insn_access_exception_tl1:
725 call spitfire_insn_access_exception_tl1 725 call spitfire_insn_access_exception_tl1
726 add %sp, PTREGS_OFF, %o0 726 add %sp, PTREGS_OFF, %o0
727 ba,pt %xcc, rtrap 727 ba,pt %xcc, rtrap
728 clr %l6 728 nop
729 729
730__spitfire_insn_access_exception: 730__spitfire_insn_access_exception:
731 rdpr %pstate, %g4 731 rdpr %pstate, %g4
@@ -743,7 +743,7 @@ __spitfire_insn_access_exception:
743 call spitfire_insn_access_exception 743 call spitfire_insn_access_exception
744 add %sp, PTREGS_OFF, %o0 744 add %sp, PTREGS_OFF, %o0
745 ba,pt %xcc, rtrap 745 ba,pt %xcc, rtrap
746 clr %l6 746 nop
747 747
748 /* These get patched into the trap table at boot time 748 /* These get patched into the trap table at boot time
749 * once we know we have a cheetah processor. 749 * once we know we have a cheetah processor.
@@ -937,7 +937,7 @@ do_dcpe_tl1_fatal:
937 call cheetah_plus_parity_error 937 call cheetah_plus_parity_error
938 add %sp, PTREGS_OFF, %o1 938 add %sp, PTREGS_OFF, %o1
939 ba,pt %xcc, rtrap 939 ba,pt %xcc, rtrap
940 clr %l6 940 nop
941 941
942do_icpe_tl1: 942do_icpe_tl1:
943 rdpr %tl, %g1 ! Save original trap level 943 rdpr %tl, %g1 ! Save original trap level
@@ -979,7 +979,7 @@ do_icpe_tl1_fatal:
979 call cheetah_plus_parity_error 979 call cheetah_plus_parity_error
980 add %sp, PTREGS_OFF, %o1 980 add %sp, PTREGS_OFF, %o1
981 ba,pt %xcc, rtrap 981 ba,pt %xcc, rtrap
982 clr %l6 982 nop
983 983
984dcpe_icpe_tl1_common: 984dcpe_icpe_tl1_common:
985 /* Flush D-cache, re-enable D/I caches in DCU and finally 985 /* Flush D-cache, re-enable D/I caches in DCU and finally
@@ -1281,7 +1281,7 @@ __do_privact:
1281 call do_privact 1281 call do_privact
1282 add %sp, PTREGS_OFF, %o0 1282 add %sp, PTREGS_OFF, %o0
1283 ba,pt %xcc, rtrap 1283 ba,pt %xcc, rtrap
1284 clr %l6 1284 nop
1285 1285
1286 .globl do_mna 1286 .globl do_mna
1287do_mna: 1287do_mna:
@@ -1308,7 +1308,7 @@ do_mna:
1308 call mem_address_unaligned 1308 call mem_address_unaligned
1309 add %sp, PTREGS_OFF, %o0 1309 add %sp, PTREGS_OFF, %o0
1310 ba,pt %xcc, rtrap 1310 ba,pt %xcc, rtrap
1311 clr %l6 1311 nop
1312 1312
1313 .globl do_lddfmna 1313 .globl do_lddfmna
1314do_lddfmna: 1314do_lddfmna:
@@ -1326,7 +1326,7 @@ do_lddfmna:
1326 call handle_lddfmna 1326 call handle_lddfmna
1327 add %sp, PTREGS_OFF, %o0 1327 add %sp, PTREGS_OFF, %o0
1328 ba,pt %xcc, rtrap 1328 ba,pt %xcc, rtrap
1329 clr %l6 1329 nop
1330 1330
1331 .globl do_stdfmna 1331 .globl do_stdfmna
1332do_stdfmna: 1332do_stdfmna:
@@ -1344,7 +1344,7 @@ do_stdfmna:
1344 call handle_stdfmna 1344 call handle_stdfmna
1345 add %sp, PTREGS_OFF, %o0 1345 add %sp, PTREGS_OFF, %o0
1346 ba,pt %xcc, rtrap 1346 ba,pt %xcc, rtrap
1347 clr %l6 1347 nop
1348 1348
1349 .globl breakpoint_trap 1349 .globl breakpoint_trap
1350breakpoint_trap: 1350breakpoint_trap:
@@ -1424,13 +1424,13 @@ sys32_rt_sigreturn:
14241: ldx [%curptr + TI_FLAGS], %l5 14241: ldx [%curptr + TI_FLAGS], %l5
1425 andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0 1425 andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0
1426 be,pt %icc, rtrap 1426 be,pt %icc, rtrap
1427 clr %l6 1427 nop
1428 add %sp, PTREGS_OFF, %o0 1428 add %sp, PTREGS_OFF, %o0
1429 call syscall_trace 1429 call syscall_trace
1430 mov 1, %o1 1430 mov 1, %o1
1431 1431
1432 ba,pt %xcc, rtrap 1432 ba,pt %xcc, rtrap
1433 clr %l6 1433 nop
1434 1434
1435 /* This is how fork() was meant to be done, 8 instruction entry. 1435 /* This is how fork() was meant to be done, 8 instruction entry.
1436 * 1436 *
@@ -1559,7 +1559,7 @@ linux_sparc_syscall32:
1559 1559
1560 /* Linux native system calls enter here... */ 1560 /* Linux native system calls enter here... */
1561 .align 32 1561 .align 32
1562 .globl linux_sparc_syscall, ret_sys_call 1562 .globl linux_sparc_syscall
1563linux_sparc_syscall: 1563linux_sparc_syscall:
1564 /* Direct access to user regs, much faster. */ 1564 /* Direct access to user regs, much faster. */
1565 cmp %g1, NR_SYSCALLS ! IEU1 Group 1565 cmp %g1, NR_SYSCALLS ! IEU1 Group
@@ -1605,7 +1605,7 @@ ret_sys_call:
1605 bne,pn %icc, linux_syscall_trace2 1605 bne,pn %icc, linux_syscall_trace2
1606 add %l1, 0x4, %l2 ! npc = npc+4 1606 add %l1, 0x4, %l2 ! npc = npc+4
1607 stx %l1, [%sp + PTREGS_OFF + PT_V9_TPC] 1607 stx %l1, [%sp + PTREGS_OFF + PT_V9_TPC]
1608 ba,pt %xcc, rtrap_clr_l6 1608 ba,pt %xcc, rtrap
1609 stx %l2, [%sp + PTREGS_OFF + PT_V9_TNPC] 1609 stx %l2, [%sp + PTREGS_OFF + PT_V9_TNPC]
1610 1610
16111: 16111:
@@ -1616,7 +1616,6 @@ ret_sys_call:
1616 sub %g0, %o0, %o0 1616 sub %g0, %o0, %o0
1617 or %g3, %g2, %g3 1617 or %g3, %g2, %g3
1618 stx %o0, [%sp + PTREGS_OFF + PT_V9_I0] 1618 stx %o0, [%sp + PTREGS_OFF + PT_V9_I0]
1619 mov 1, %l6
1620 stx %g3, [%sp + PTREGS_OFF + PT_V9_TSTATE] 1619 stx %g3, [%sp + PTREGS_OFF + PT_V9_TSTATE]
1621 bne,pn %icc, linux_syscall_trace2 1620 bne,pn %icc, linux_syscall_trace2
1622 add %l1, 0x4, %l2 ! npc = npc+4 1621 add %l1, 0x4, %l2 ! npc = npc+4
diff --git a/arch/sparc64/kernel/entry.h b/arch/sparc64/kernel/entry.h
index 4a91e9c6d31b..32fbab620852 100644
--- a/arch/sparc64/kernel/entry.h
+++ b/arch/sparc64/kernel/entry.h
@@ -20,7 +20,6 @@ extern void timer_interrupt(int irq, struct pt_regs *regs);
20 20
21extern void do_notify_resume(struct pt_regs *regs, 21extern void do_notify_resume(struct pt_regs *regs,
22 unsigned long orig_i0, 22 unsigned long orig_i0,
23 int restart_syscall,
24 unsigned long thread_info_flags); 23 unsigned long thread_info_flags);
25 24
26extern asmlinkage void syscall_trace(struct pt_regs *regs, 25extern asmlinkage void syscall_trace(struct pt_regs *regs,
diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
index 4b2bf9eb447a..b49d3b60bc0c 100644
--- a/arch/sparc64/kernel/etrap.S
+++ b/arch/sparc64/kernel/etrap.S
@@ -53,7 +53,11 @@ etrap_irq:
53 stx %g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC] 53 stx %g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC]
54 rd %y, %g3 54 rd %y, %g3
55 stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC] 55 stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC]
56 rdpr %tt, %g1
56 st %g3, [%g2 + STACKFRAME_SZ + PT_V9_Y] 57 st %g3, [%g2 + STACKFRAME_SZ + PT_V9_Y]
58 sethi %hi(PT_REGS_MAGIC), %g3
59 or %g3, %g1, %g1
60 st %g1, [%g2 + STACKFRAME_SZ + PT_V9_MAGIC]
57 61
58 rdpr %cansave, %g1 62 rdpr %cansave, %g1
59 brnz,pt %g1, etrap_save 63 brnz,pt %g1, etrap_save
diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c
index 756fa24eeefa..2a37a6ca2a16 100644
--- a/arch/sparc64/kernel/iommu.c
+++ b/arch/sparc64/kernel/iommu.c
@@ -173,9 +173,11 @@ void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long np
173} 173}
174 174
175int iommu_table_init(struct iommu *iommu, int tsbsize, 175int iommu_table_init(struct iommu *iommu, int tsbsize,
176 u32 dma_offset, u32 dma_addr_mask) 176 u32 dma_offset, u32 dma_addr_mask,
177 int numa_node)
177{ 178{
178 unsigned long i, tsbbase, order, sz, num_tsb_entries; 179 unsigned long i, order, sz, num_tsb_entries;
180 struct page *page;
179 181
180 num_tsb_entries = tsbsize / sizeof(iopte_t); 182 num_tsb_entries = tsbsize / sizeof(iopte_t);
181 183
@@ -188,11 +190,12 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
188 /* Allocate and initialize the free area map. */ 190 /* Allocate and initialize the free area map. */
189 sz = num_tsb_entries / 8; 191 sz = num_tsb_entries / 8;
190 sz = (sz + 7UL) & ~7UL; 192 sz = (sz + 7UL) & ~7UL;
191 iommu->arena.map = kzalloc(sz, GFP_KERNEL); 193 iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
192 if (!iommu->arena.map) { 194 if (!iommu->arena.map) {
193 printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n"); 195 printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
194 return -ENOMEM; 196 return -ENOMEM;
195 } 197 }
198 memset(iommu->arena.map, 0, sz);
196 iommu->arena.limit = num_tsb_entries; 199 iommu->arena.limit = num_tsb_entries;
197 200
198 if (tlb_type != hypervisor) 201 if (tlb_type != hypervisor)
@@ -201,21 +204,23 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
201 /* Allocate and initialize the dummy page which we 204 /* Allocate and initialize the dummy page which we
202 * set inactive IO PTEs to point to. 205 * set inactive IO PTEs to point to.
203 */ 206 */
204 iommu->dummy_page = get_zeroed_page(GFP_KERNEL); 207 page = alloc_pages_node(numa_node, GFP_KERNEL, 0);
205 if (!iommu->dummy_page) { 208 if (!page) {
206 printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n"); 209 printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n");
207 goto out_free_map; 210 goto out_free_map;
208 } 211 }
212 iommu->dummy_page = (unsigned long) page_address(page);
213 memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
209 iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); 214 iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
210 215
211 /* Now allocate and setup the IOMMU page table itself. */ 216 /* Now allocate and setup the IOMMU page table itself. */
212 order = get_order(tsbsize); 217 order = get_order(tsbsize);
213 tsbbase = __get_free_pages(GFP_KERNEL, order); 218 page = alloc_pages_node(numa_node, GFP_KERNEL, order);
214 if (!tsbbase) { 219 if (!page) {
215 printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n"); 220 printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n");
216 goto out_free_dummy_page; 221 goto out_free_dummy_page;
217 } 222 }
218 iommu->page_table = (iopte_t *)tsbbase; 223 iommu->page_table = (iopte_t *)page_address(page);
219 224
220 for (i = 0; i < num_tsb_entries; i++) 225 for (i = 0; i < num_tsb_entries; i++)
221 iopte_make_dummy(iommu, &iommu->page_table[i]); 226 iopte_make_dummy(iommu, &iommu->page_table[i]);
@@ -276,20 +281,24 @@ static inline void iommu_free_ctx(struct iommu *iommu, int ctx)
276static void *dma_4u_alloc_coherent(struct device *dev, size_t size, 281static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
277 dma_addr_t *dma_addrp, gfp_t gfp) 282 dma_addr_t *dma_addrp, gfp_t gfp)
278{ 283{
284 unsigned long flags, order, first_page;
279 struct iommu *iommu; 285 struct iommu *iommu;
286 struct page *page;
287 int npages, nid;
280 iopte_t *iopte; 288 iopte_t *iopte;
281 unsigned long flags, order, first_page;
282 void *ret; 289 void *ret;
283 int npages;
284 290
285 size = IO_PAGE_ALIGN(size); 291 size = IO_PAGE_ALIGN(size);
286 order = get_order(size); 292 order = get_order(size);
287 if (order >= 10) 293 if (order >= 10)
288 return NULL; 294 return NULL;
289 295
290 first_page = __get_free_pages(gfp, order); 296 nid = dev->archdata.numa_node;
291 if (first_page == 0UL) 297 page = alloc_pages_node(nid, gfp, order);
298 if (unlikely(!page))
292 return NULL; 299 return NULL;
300
301 first_page = (unsigned long) page_address(page);
293 memset((char *)first_page, 0, PAGE_SIZE << order); 302 memset((char *)first_page, 0, PAGE_SIZE << order);
294 303
295 iommu = dev->archdata.iommu; 304 iommu = dev->archdata.iommu;
diff --git a/arch/sparc64/kernel/isa.c b/arch/sparc64/kernel/isa.c
index b5f7b354084f..a2af5ed784c9 100644
--- a/arch/sparc64/kernel/isa.c
+++ b/arch/sparc64/kernel/isa.c
@@ -92,6 +92,7 @@ static void __init isa_fill_devices(struct sparc_isa_bridge *isa_br)
92 sd->op = &isa_dev->ofdev; 92 sd->op = &isa_dev->ofdev;
93 sd->iommu = isa_br->ofdev.dev.parent->archdata.iommu; 93 sd->iommu = isa_br->ofdev.dev.parent->archdata.iommu;
94 sd->stc = isa_br->ofdev.dev.parent->archdata.stc; 94 sd->stc = isa_br->ofdev.dev.parent->archdata.stc;
95 sd->numa_node = isa_br->ofdev.dev.parent->archdata.numa_node;
95 96
96 isa_dev->ofdev.node = dp; 97 isa_dev->ofdev.node = dp;
97 isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev; 98 isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev;
diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c
index 910083589569..dde52bcf5c64 100644
--- a/arch/sparc64/kernel/mdesc.c
+++ b/arch/sparc64/kernel/mdesc.c
@@ -1,10 +1,10 @@
1/* mdesc.c: Sun4V machine description handling. 1/* mdesc.c: Sun4V machine description handling.
2 * 2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net> 3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
4 */ 4 */
5#include <linux/kernel.h> 5#include <linux/kernel.h>
6#include <linux/types.h> 6#include <linux/types.h>
7#include <linux/bootmem.h> 7#include <linux/lmb.h>
8#include <linux/log2.h> 8#include <linux/log2.h>
9#include <linux/list.h> 9#include <linux/list.h>
10#include <linux/slab.h> 10#include <linux/slab.h>
@@ -84,24 +84,28 @@ static void mdesc_handle_init(struct mdesc_handle *hp,
84 hp->handle_size = handle_size; 84 hp->handle_size = handle_size;
85} 85}
86 86
87static struct mdesc_handle * __init mdesc_bootmem_alloc(unsigned int mdesc_size) 87static struct mdesc_handle * __init mdesc_lmb_alloc(unsigned int mdesc_size)
88{ 88{
89 struct mdesc_handle *hp;
90 unsigned int handle_size, alloc_size; 89 unsigned int handle_size, alloc_size;
90 struct mdesc_handle *hp;
91 unsigned long paddr;
91 92
92 handle_size = (sizeof(struct mdesc_handle) - 93 handle_size = (sizeof(struct mdesc_handle) -
93 sizeof(struct mdesc_hdr) + 94 sizeof(struct mdesc_hdr) +
94 mdesc_size); 95 mdesc_size);
95 alloc_size = PAGE_ALIGN(handle_size); 96 alloc_size = PAGE_ALIGN(handle_size);
96 97
97 hp = __alloc_bootmem(alloc_size, PAGE_SIZE, 0UL); 98 paddr = lmb_alloc(alloc_size, PAGE_SIZE);
98 if (hp)
99 mdesc_handle_init(hp, handle_size, hp);
100 99
100 hp = NULL;
101 if (paddr) {
102 hp = __va(paddr);
103 mdesc_handle_init(hp, handle_size, hp);
104 }
101 return hp; 105 return hp;
102} 106}
103 107
104static void mdesc_bootmem_free(struct mdesc_handle *hp) 108static void mdesc_lmb_free(struct mdesc_handle *hp)
105{ 109{
106 unsigned int alloc_size, handle_size = hp->handle_size; 110 unsigned int alloc_size, handle_size = hp->handle_size;
107 unsigned long start, end; 111 unsigned long start, end;
@@ -124,9 +128,9 @@ static void mdesc_bootmem_free(struct mdesc_handle *hp)
124 } 128 }
125} 129}
126 130
127static struct mdesc_mem_ops bootmem_mdesc_ops = { 131static struct mdesc_mem_ops lmb_mdesc_ops = {
128 .alloc = mdesc_bootmem_alloc, 132 .alloc = mdesc_lmb_alloc,
129 .free = mdesc_bootmem_free, 133 .free = mdesc_lmb_free,
130}; 134};
131 135
132static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) 136static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
@@ -888,7 +892,7 @@ void __init sun4v_mdesc_init(void)
888 892
889 printk("MDESC: Size is %lu bytes.\n", len); 893 printk("MDESC: Size is %lu bytes.\n", len);
890 894
891 hp = mdesc_alloc(len, &bootmem_mdesc_ops); 895 hp = mdesc_alloc(len, &lmb_mdesc_ops);
892 if (hp == NULL) { 896 if (hp == NULL) {
893 prom_printf("MDESC: alloc of %lu bytes failed.\n", len); 897 prom_printf("MDESC: alloc of %lu bytes failed.\n", len);
894 prom_halt(); 898 prom_halt();
diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c
index 0fd9db95b896..9e58e8cba1c3 100644
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -6,6 +6,7 @@
6#include <linux/mod_devicetable.h> 6#include <linux/mod_devicetable.h>
7#include <linux/slab.h> 7#include <linux/slab.h>
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/irq.h>
9#include <linux/of_device.h> 10#include <linux/of_device.h>
10#include <linux/of_platform.h> 11#include <linux/of_platform.h>
11 12
@@ -660,6 +661,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
660 struct device_node *dp = op->node; 661 struct device_node *dp = op->node;
661 struct device_node *pp, *ip; 662 struct device_node *pp, *ip;
662 unsigned int orig_irq = irq; 663 unsigned int orig_irq = irq;
664 int nid;
663 665
664 if (irq == 0xffffffff) 666 if (irq == 0xffffffff)
665 return irq; 667 return irq;
@@ -672,7 +674,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
672 printk("%s: direct translate %x --> %x\n", 674 printk("%s: direct translate %x --> %x\n",
673 dp->full_name, orig_irq, irq); 675 dp->full_name, orig_irq, irq);
674 676
675 return irq; 677 goto out;
676 } 678 }
677 679
678 /* Something more complicated. Walk up to the root, applying 680 /* Something more complicated. Walk up to the root, applying
@@ -744,6 +746,14 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
744 printk("%s: Apply IRQ trans [%s] %x --> %x\n", 746 printk("%s: Apply IRQ trans [%s] %x --> %x\n",
745 op->node->full_name, ip->full_name, orig_irq, irq); 747 op->node->full_name, ip->full_name, orig_irq, irq);
746 748
749out:
750 nid = of_node_to_nid(dp);
751 if (nid != -1) {
752 cpumask_t numa_mask = node_to_cpumask(nid);
753
754 irq_set_affinity(irq, numa_mask);
755 }
756
747 return irq; 757 return irq;
748} 758}
749 759
diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c
index 545356b00e2e..49f912766519 100644
--- a/arch/sparc64/kernel/pci.c
+++ b/arch/sparc64/kernel/pci.c
@@ -369,10 +369,12 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
369 sd->host_controller = pbm; 369 sd->host_controller = pbm;
370 sd->prom_node = node; 370 sd->prom_node = node;
371 sd->op = of_find_device_by_node(node); 371 sd->op = of_find_device_by_node(node);
372 sd->numa_node = pbm->numa_node;
372 373
373 sd = &sd->op->dev.archdata; 374 sd = &sd->op->dev.archdata;
374 sd->iommu = pbm->iommu; 375 sd->iommu = pbm->iommu;
375 sd->stc = &pbm->stc; 376 sd->stc = &pbm->stc;
377 sd->numa_node = pbm->numa_node;
376 378
377 type = of_get_property(node, "device_type", NULL); 379 type = of_get_property(node, "device_type", NULL);
378 if (type == NULL) 380 if (type == NULL)
@@ -1159,6 +1161,16 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
1159 return 0; 1161 return 0;
1160} 1162}
1161 1163
1164#ifdef CONFIG_NUMA
1165int pcibus_to_node(struct pci_bus *pbus)
1166{
1167 struct pci_pbm_info *pbm = pbus->sysdata;
1168
1169 return pbm->numa_node;
1170}
1171EXPORT_SYMBOL(pcibus_to_node);
1172#endif
1173
1162/* Return the domain nuber for this pci bus */ 1174/* Return the domain nuber for this pci bus */
1163 1175
1164int pci_domain_nr(struct pci_bus *pbus) 1176int pci_domain_nr(struct pci_bus *pbus)
diff --git a/arch/sparc64/kernel/pci_fire.c b/arch/sparc64/kernel/pci_fire.c
index 7571ed563147..d23bb6f53cda 100644
--- a/arch/sparc64/kernel/pci_fire.c
+++ b/arch/sparc64/kernel/pci_fire.c
@@ -71,7 +71,8 @@ static int pci_fire_pbm_iommu_init(struct pci_pbm_info *pbm)
71 */ 71 */
72 fire_write(iommu->iommu_flushinv, ~(u64)0); 72 fire_write(iommu->iommu_flushinv, ~(u64)0);
73 73
74 err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask); 74 err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
75 pbm->numa_node);
75 if (err) 76 if (err)
76 return err; 77 return err;
77 78
@@ -449,6 +450,8 @@ static int __init pci_fire_pbm_init(struct pci_controller_info *p,
449 pbm->next = pci_pbm_root; 450 pbm->next = pci_pbm_root;
450 pci_pbm_root = pbm; 451 pci_pbm_root = pbm;
451 452
453 pbm->numa_node = -1;
454
452 pbm->scan_bus = pci_fire_scan_bus; 455 pbm->scan_bus = pci_fire_scan_bus;
453 pbm->pci_ops = &sun4u_pci_ops; 456 pbm->pci_ops = &sun4u_pci_ops;
454 pbm->config_space_reg_bits = 12; 457 pbm->config_space_reg_bits = 12;
diff --git a/arch/sparc64/kernel/pci_impl.h b/arch/sparc64/kernel/pci_impl.h
index 4a50da13ce48..218bac4ff79b 100644
--- a/arch/sparc64/kernel/pci_impl.h
+++ b/arch/sparc64/kernel/pci_impl.h
@@ -148,6 +148,8 @@ struct pci_pbm_info {
148 struct pci_bus *pci_bus; 148 struct pci_bus *pci_bus;
149 void (*scan_bus)(struct pci_pbm_info *); 149 void (*scan_bus)(struct pci_pbm_info *);
150 struct pci_ops *pci_ops; 150 struct pci_ops *pci_ops;
151
152 int numa_node;
151}; 153};
152 154
153struct pci_controller_info { 155struct pci_controller_info {
@@ -161,8 +163,6 @@ extern struct pci_pbm_info *pci_pbm_root;
161extern int pci_num_pbms; 163extern int pci_num_pbms;
162 164
163/* PCI bus scanning and fixup support. */ 165/* PCI bus scanning and fixup support. */
164extern void pci_iommu_table_init(struct iommu *iommu, int tsbsize,
165 u32 dma_offset, u32 dma_addr_mask);
166extern void pci_get_pbm_props(struct pci_pbm_info *pbm); 166extern void pci_get_pbm_props(struct pci_pbm_info *pbm);
167extern struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm); 167extern struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm);
168extern void pci_determine_mem_io_space(struct pci_pbm_info *pbm); 168extern void pci_determine_mem_io_space(struct pci_pbm_info *pbm);
diff --git a/arch/sparc64/kernel/pci_msi.c b/arch/sparc64/kernel/pci_msi.c
index d6d64b44af63..db5e8fd8f674 100644
--- a/arch/sparc64/kernel/pci_msi.c
+++ b/arch/sparc64/kernel/pci_msi.c
@@ -279,11 +279,17 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
279 unsigned long devino) 279 unsigned long devino)
280{ 280{
281 int irq = ops->msiq_build_irq(pbm, msiqid, devino); 281 int irq = ops->msiq_build_irq(pbm, msiqid, devino);
282 int err; 282 int err, nid;
283 283
284 if (irq < 0) 284 if (irq < 0)
285 return irq; 285 return irq;
286 286
287 nid = pbm->numa_node;
288 if (nid != -1) {
289 cpumask_t numa_mask = node_to_cpumask(nid);
290
291 irq_set_affinity(irq, numa_mask);
292 }
287 err = request_irq(irq, sparc64_msiq_interrupt, 0, 293 err = request_irq(irq, sparc64_msiq_interrupt, 0,
288 "MSIQ", 294 "MSIQ",
289 &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]); 295 &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]);
diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c
index 0bad96e5d184..994dbe0603da 100644
--- a/arch/sparc64/kernel/pci_psycho.c
+++ b/arch/sparc64/kernel/pci_psycho.c
@@ -848,7 +848,8 @@ static int psycho_iommu_init(struct pci_pbm_info *pbm)
848 /* Leave diag mode enabled for full-flushing done 848 /* Leave diag mode enabled for full-flushing done
849 * in pci_iommu.c 849 * in pci_iommu.c
850 */ 850 */
851 err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff); 851 err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff,
852 pbm->numa_node);
852 if (err) 853 if (err)
853 return err; 854 return err;
854 855
@@ -979,6 +980,8 @@ static void __init psycho_pbm_init(struct pci_controller_info *p,
979 pbm->next = pci_pbm_root; 980 pbm->next = pci_pbm_root;
980 pci_pbm_root = pbm; 981 pci_pbm_root = pbm;
981 982
983 pbm->numa_node = -1;
984
982 pbm->scan_bus = psycho_scan_bus; 985 pbm->scan_bus = psycho_scan_bus;
983 pbm->pci_ops = &sun4u_pci_ops; 986 pbm->pci_ops = &sun4u_pci_ops;
984 pbm->config_space_reg_bits = 8; 987 pbm->config_space_reg_bits = 8;
diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c
index 1c5f5fa2339f..4c34195baf37 100644
--- a/arch/sparc64/kernel/pci_sabre.c
+++ b/arch/sparc64/kernel/pci_sabre.c
@@ -704,7 +704,7 @@ static int sabre_iommu_init(struct pci_pbm_info *pbm,
704 * in pci_iommu.c 704 * in pci_iommu.c
705 */ 705 */
706 err = iommu_table_init(iommu, tsbsize * 1024 * 8, 706 err = iommu_table_init(iommu, tsbsize * 1024 * 8,
707 dvma_offset, dma_mask); 707 dvma_offset, dma_mask, pbm->numa_node);
708 if (err) 708 if (err)
709 return err; 709 return err;
710 710
@@ -737,6 +737,8 @@ static void __init sabre_pbm_init(struct pci_controller_info *p,
737 pbm->name = dp->full_name; 737 pbm->name = dp->full_name;
738 printk("%s: SABRE PCI Bus Module\n", pbm->name); 738 printk("%s: SABRE PCI Bus Module\n", pbm->name);
739 739
740 pbm->numa_node = -1;
741
740 pbm->scan_bus = sabre_scan_bus; 742 pbm->scan_bus = sabre_scan_bus;
741 pbm->pci_ops = &sun4u_pci_ops; 743 pbm->pci_ops = &sun4u_pci_ops;
742 pbm->config_space_reg_bits = 8; 744 pbm->config_space_reg_bits = 8;
diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c
index e30609362322..615edd9c8e2a 100644
--- a/arch/sparc64/kernel/pci_schizo.c
+++ b/arch/sparc64/kernel/pci_schizo.c
@@ -1220,7 +1220,8 @@ static int schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
1220 /* Leave diag mode enabled for full-flushing done 1220 /* Leave diag mode enabled for full-flushing done
1221 * in pci_iommu.c 1221 * in pci_iommu.c
1222 */ 1222 */
1223 err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask); 1223 err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
1224 pbm->numa_node);
1224 if (err) 1225 if (err)
1225 return err; 1226 return err;
1226 1227
@@ -1379,6 +1380,8 @@ static int __init schizo_pbm_init(struct pci_controller_info *p,
1379 pbm->next = pci_pbm_root; 1380 pbm->next = pci_pbm_root;
1380 pci_pbm_root = pbm; 1381 pci_pbm_root = pbm;
1381 1382
1383 pbm->numa_node = -1;
1384
1382 pbm->scan_bus = schizo_scan_bus; 1385 pbm->scan_bus = schizo_scan_bus;
1383 pbm->pci_ops = &sun4u_pci_ops; 1386 pbm->pci_ops = &sun4u_pci_ops;
1384 pbm->config_space_reg_bits = 8; 1387 pbm->config_space_reg_bits = 8;
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c
index 01839706bd52..e2bb9790039c 100644
--- a/arch/sparc64/kernel/pci_sun4v.c
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -127,10 +127,12 @@ static inline long iommu_batch_end(void)
127static void *dma_4v_alloc_coherent(struct device *dev, size_t size, 127static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
128 dma_addr_t *dma_addrp, gfp_t gfp) 128 dma_addr_t *dma_addrp, gfp_t gfp)
129{ 129{
130 struct iommu *iommu;
131 unsigned long flags, order, first_page, npages, n; 130 unsigned long flags, order, first_page, npages, n;
131 struct iommu *iommu;
132 struct page *page;
132 void *ret; 133 void *ret;
133 long entry; 134 long entry;
135 int nid;
134 136
135 size = IO_PAGE_ALIGN(size); 137 size = IO_PAGE_ALIGN(size);
136 order = get_order(size); 138 order = get_order(size);
@@ -139,10 +141,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
139 141
140 npages = size >> IO_PAGE_SHIFT; 142 npages = size >> IO_PAGE_SHIFT;
141 143
142 first_page = __get_free_pages(gfp, order); 144 nid = dev->archdata.numa_node;
143 if (unlikely(first_page == 0UL)) 145 page = alloc_pages_node(nid, gfp, order);
146 if (unlikely(!page))
144 return NULL; 147 return NULL;
145 148
149 first_page = (unsigned long) page_address(page);
146 memset((char *)first_page, 0, PAGE_SIZE << order); 150 memset((char *)first_page, 0, PAGE_SIZE << order);
147 151
148 iommu = dev->archdata.iommu; 152 iommu = dev->archdata.iommu;
@@ -899,6 +903,8 @@ static void __init pci_sun4v_pbm_init(struct pci_controller_info *p,
899 pbm->next = pci_pbm_root; 903 pbm->next = pci_pbm_root;
900 pci_pbm_root = pbm; 904 pci_pbm_root = pbm;
901 905
906 pbm->numa_node = of_node_to_nid(dp);
907
902 pbm->scan_bus = pci_sun4v_scan_bus; 908 pbm->scan_bus = pci_sun4v_scan_bus;
903 pbm->pci_ops = &sun4v_pci_ops; 909 pbm->pci_ops = &sun4v_pci_ops;
904 pbm->config_space_reg_bits = 12; 910 pbm->config_space_reg_bits = 12;
@@ -913,6 +919,7 @@ static void __init pci_sun4v_pbm_init(struct pci_controller_info *p,
913 pbm->name = dp->full_name; 919 pbm->name = dp->full_name;
914 920
915 printk("%s: SUN4V PCI Bus Module\n", pbm->name); 921 printk("%s: SUN4V PCI Bus Module\n", pbm->name);
922 printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
916 923
917 pci_determine_mem_io_space(pbm); 924 pci_determine_mem_io_space(pbm);
918 925
diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c
index 68964ddcde1e..ed03a18d3b36 100644
--- a/arch/sparc64/kernel/prom.c
+++ b/arch/sparc64/kernel/prom.c
@@ -19,8 +19,8 @@
19#include <linux/types.h> 19#include <linux/types.h>
20#include <linux/string.h> 20#include <linux/string.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/bootmem.h>
23#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/lmb.h>
24 24
25#include <asm/prom.h> 25#include <asm/prom.h>
26#include <asm/of_device.h> 26#include <asm/of_device.h>
@@ -122,16 +122,20 @@ int of_find_in_proplist(const char *list, const char *match, int len)
122} 122}
123EXPORT_SYMBOL(of_find_in_proplist); 123EXPORT_SYMBOL(of_find_in_proplist);
124 124
125static unsigned int prom_early_allocated; 125static unsigned int prom_early_allocated __initdata;
126 126
127static void * __init prom_early_alloc(unsigned long size) 127static void * __init prom_early_alloc(unsigned long size)
128{ 128{
129 unsigned long paddr = lmb_alloc(size, SMP_CACHE_BYTES);
129 void *ret; 130 void *ret;
130 131
131 ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL); 132 if (!paddr) {
132 if (ret != NULL) 133 prom_printf("prom_early_alloc(%lu) failed\n");
133 memset(ret, 0, size); 134 prom_halt();
135 }
134 136
137 ret = __va(paddr);
138 memset(ret, 0, size);
135 prom_early_allocated += size; 139 prom_early_allocated += size;
136 140
137 return ret; 141 return ret;
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
index 079d18a11d24..ecf6753b204a 100644
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -18,12 +18,6 @@
18#define RTRAP_PSTATE_IRQOFF (PSTATE_RMO|PSTATE_PEF|PSTATE_PRIV) 18#define RTRAP_PSTATE_IRQOFF (PSTATE_RMO|PSTATE_PEF|PSTATE_PRIV)
19#define RTRAP_PSTATE_AG_IRQOFF (PSTATE_RMO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG) 19#define RTRAP_PSTATE_AG_IRQOFF (PSTATE_RMO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
20 20
21 /* Register %l6 keeps track of whether we are returning
22 * from a system call or not. It is cleared if we call
23 * do_notify_resume, and it must not be otherwise modified
24 * until we fully commit to returning to userspace.
25 */
26
27 .text 21 .text
28 .align 32 22 .align 32
29__handle_softirq: 23__handle_softirq:
@@ -56,14 +50,12 @@ __handle_user_windows:
56 be,pt %xcc, __handle_user_windows_continue 50 be,pt %xcc, __handle_user_windows_continue
57 nop 51 nop
58 mov %l5, %o1 52 mov %l5, %o1
59 mov %l6, %o2
60 add %sp, PTREGS_OFF, %o0 53 add %sp, PTREGS_OFF, %o0
61 mov %l0, %o3 54 mov %l0, %o2
62 55
63 call do_notify_resume 56 call do_notify_resume
64 wrpr %g0, RTRAP_PSTATE, %pstate 57 wrpr %g0, RTRAP_PSTATE, %pstate
65 wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate 58 wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
66 clr %l6
67 /* Signal delivery can modify pt_regs tstate, so we must 59 /* Signal delivery can modify pt_regs tstate, so we must
68 * reload it. 60 * reload it.
69 */ 61 */
@@ -99,14 +91,12 @@ __handle_perfctrs:
99 be,pt %xcc, __handle_perfctrs_continue 91 be,pt %xcc, __handle_perfctrs_continue
100 sethi %hi(TSTATE_PEF), %o0 92 sethi %hi(TSTATE_PEF), %o0
101 mov %l5, %o1 93 mov %l5, %o1
102 mov %l6, %o2
103 add %sp, PTREGS_OFF, %o0 94 add %sp, PTREGS_OFF, %o0
104 mov %l0, %o3 95 mov %l0, %o2
105 call do_notify_resume 96 call do_notify_resume
106 97
107 wrpr %g0, RTRAP_PSTATE, %pstate 98 wrpr %g0, RTRAP_PSTATE, %pstate
108 wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate 99 wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
109 clr %l6
110 /* Signal delivery can modify pt_regs tstate, so we must 100 /* Signal delivery can modify pt_regs tstate, so we must
111 * reload it. 101 * reload it.
112 */ 102 */
@@ -127,13 +117,11 @@ __handle_userfpu:
127 117
128__handle_signal: 118__handle_signal:
129 mov %l5, %o1 119 mov %l5, %o1
130 mov %l6, %o2
131 add %sp, PTREGS_OFF, %o0 120 add %sp, PTREGS_OFF, %o0
132 mov %l0, %o3 121 mov %l0, %o2
133 call do_notify_resume 122 call do_notify_resume
134 wrpr %g0, RTRAP_PSTATE, %pstate 123 wrpr %g0, RTRAP_PSTATE, %pstate
135 wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate 124 wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
136 clr %l6
137 125
138 /* Signal delivery can modify pt_regs tstate, so we must 126 /* Signal delivery can modify pt_regs tstate, so we must
139 * reload it. 127 * reload it.
@@ -145,9 +133,8 @@ __handle_signal:
145 andn %l1, %l4, %l1 133 andn %l1, %l4, %l1
146 134
147 .align 64 135 .align 64
148 .globl rtrap_irq, rtrap_clr_l6, rtrap, irqsz_patchme, rtrap_xcall 136 .globl rtrap_irq, rtrap, irqsz_patchme, rtrap_xcall
149rtrap_irq: 137rtrap_irq:
150rtrap_clr_l6: clr %l6
151rtrap: 138rtrap:
152#ifndef CONFIG_SMP 139#ifndef CONFIG_SMP
153 sethi %hi(per_cpu____cpu_data), %l0 140 sethi %hi(per_cpu____cpu_data), %l0
diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c
index d1fb13ba02b5..fa2827c4a3ad 100644
--- a/arch/sparc64/kernel/sbus.c
+++ b/arch/sparc64/kernel/sbus.c
@@ -544,6 +544,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus)
544 544
545 sbus->ofdev.dev.archdata.iommu = iommu; 545 sbus->ofdev.dev.archdata.iommu = iommu;
546 sbus->ofdev.dev.archdata.stc = strbuf; 546 sbus->ofdev.dev.archdata.stc = strbuf;
547 sbus->ofdev.dev.archdata.numa_node = -1;
547 548
548 reg_base = regs + SYSIO_IOMMUREG_BASE; 549 reg_base = regs + SYSIO_IOMMUREG_BASE;
549 iommu->iommu_control = reg_base + IOMMU_CONTROL; 550 iommu->iommu_control = reg_base + IOMMU_CONTROL;
@@ -575,7 +576,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus)
575 sbus->portid, regs); 576 sbus->portid, regs);
576 577
577 /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */ 578 /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */
578 if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff)) 579 if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff, -1))
579 goto fatal_memory_error; 580 goto fatal_memory_error;
580 581
581 control = upa_readq(iommu->iommu_control); 582 control = upa_readq(iommu->iommu_control);
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index 6acb4c51cfe4..da5e6ee0c661 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -82,7 +82,7 @@ unsigned long cmdline_memory_size = 0;
82static struct console prom_early_console = { 82static struct console prom_early_console = {
83 .name = "earlyprom", 83 .name = "earlyprom",
84 .write = prom_console_write, 84 .write = prom_console_write,
85 .flags = CON_PRINTBUFFER | CON_BOOT, 85 .flags = CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME,
86 .index = -1, 86 .index = -1,
87}; 87};
88 88
@@ -281,6 +281,7 @@ void __init setup_arch(char **cmdline_p)
281 /* Initialize PROM console and command line. */ 281 /* Initialize PROM console and command line. */
282 *cmdline_p = prom_getbootargs(); 282 *cmdline_p = prom_getbootargs();
283 strcpy(boot_command_line, *cmdline_p); 283 strcpy(boot_command_line, *cmdline_p);
284 parse_early_param();
284 285
285 boot_flags_init(*cmdline_p); 286 boot_flags_init(*cmdline_p);
286 register_console(&prom_early_console); 287 register_console(&prom_early_console);
diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c
index 1c47009eb5ec..77a3e8592cbc 100644
--- a/arch/sparc64/kernel/signal.c
+++ b/arch/sparc64/kernel/signal.c
@@ -510,15 +510,20 @@ static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs,
510 * want to handle. Thus you cannot kill init even with a SIGKILL even by 510 * want to handle. Thus you cannot kill init even with a SIGKILL even by
511 * mistake. 511 * mistake.
512 */ 512 */
513static void do_signal(struct pt_regs *regs, unsigned long orig_i0, int restart_syscall) 513static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
514{ 514{
515 siginfo_t info;
516 struct signal_deliver_cookie cookie; 515 struct signal_deliver_cookie cookie;
517 struct k_sigaction ka; 516 struct k_sigaction ka;
518 int signr;
519 sigset_t *oldset; 517 sigset_t *oldset;
518 siginfo_t info;
519 int signr, tt;
520 520
521 cookie.restart_syscall = restart_syscall; 521 tt = regs->magic & 0x1ff;
522 if (tt == 0x110 || tt == 0x111 || tt == 0x16d) {
523 regs->magic &= ~0x1ff;
524 cookie.restart_syscall = 1;
525 } else
526 cookie.restart_syscall = 0;
522 cookie.orig_i0 = orig_i0; 527 cookie.orig_i0 = orig_i0;
523 528
524 if (test_thread_flag(TIF_RESTORE_SIGMASK)) 529 if (test_thread_flag(TIF_RESTORE_SIGMASK))
@@ -529,9 +534,8 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0, int restart_s
529#ifdef CONFIG_SPARC32_COMPAT 534#ifdef CONFIG_SPARC32_COMPAT
530 if (test_thread_flag(TIF_32BIT)) { 535 if (test_thread_flag(TIF_32BIT)) {
531 extern void do_signal32(sigset_t *, struct pt_regs *, 536 extern void do_signal32(sigset_t *, struct pt_regs *,
532 unsigned long, int); 537 struct signal_deliver_cookie *);
533 do_signal32(oldset, regs, orig_i0, 538 do_signal32(oldset, regs, &cookie);
534 cookie.restart_syscall);
535 return; 539 return;
536 } 540 }
537#endif 541#endif
@@ -539,7 +543,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0, int restart_s
539 signr = get_signal_to_deliver(&info, &ka, regs, &cookie); 543 signr = get_signal_to_deliver(&info, &ka, regs, &cookie);
540 if (signr > 0) { 544 if (signr > 0) {
541 if (cookie.restart_syscall) 545 if (cookie.restart_syscall)
542 syscall_restart(orig_i0, regs, &ka.sa); 546 syscall_restart(cookie.orig_i0, regs, &ka.sa);
543 handle_signal(signr, &ka, &info, oldset, regs); 547 handle_signal(signr, &ka, &info, oldset, regs);
544 548
545 /* a signal was successfully delivered; the saved 549 /* a signal was successfully delivered; the saved
@@ -576,11 +580,10 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0, int restart_s
576 } 580 }
577} 581}
578 582
579void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, int restart_syscall, 583void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags)
580 unsigned long thread_info_flags)
581{ 584{
582 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) 585 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
583 do_signal(regs, orig_i0, restart_syscall); 586 do_signal(regs, orig_i0);
584} 587}
585 588
586void ptrace_signal_deliver(struct pt_regs *regs, void *cookie) 589void ptrace_signal_deliver(struct pt_regs *regs, void *cookie)
diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c
index 74e0512f135c..43cdec64d9c9 100644
--- a/arch/sparc64/kernel/signal32.c
+++ b/arch/sparc64/kernel/signal32.c
@@ -982,20 +982,16 @@ static inline void syscall_restart32(unsigned long orig_i0, struct pt_regs *regs
982 * mistake. 982 * mistake.
983 */ 983 */
984void do_signal32(sigset_t *oldset, struct pt_regs * regs, 984void do_signal32(sigset_t *oldset, struct pt_regs * regs,
985 unsigned long orig_i0, int restart_syscall) 985 struct signal_deliver_cookie *cookie)
986{ 986{
987 siginfo_t info;
988 struct signal_deliver_cookie cookie;
989 struct k_sigaction ka; 987 struct k_sigaction ka;
988 siginfo_t info;
990 int signr; 989 int signr;
991 990
992 cookie.restart_syscall = restart_syscall; 991 signr = get_signal_to_deliver(&info, &ka, regs, cookie);
993 cookie.orig_i0 = orig_i0;
994
995 signr = get_signal_to_deliver(&info, &ka, regs, &cookie);
996 if (signr > 0) { 992 if (signr > 0) {
997 if (cookie.restart_syscall) 993 if (cookie->restart_syscall)
998 syscall_restart32(orig_i0, regs, &ka.sa); 994 syscall_restart32(cookie->orig_i0, regs, &ka.sa);
999 handle_signal32(signr, &ka, &info, oldset, regs); 995 handle_signal32(signr, &ka, &info, oldset, regs);
1000 996
1001 /* a signal was successfully delivered; the saved 997 /* a signal was successfully delivered; the saved
@@ -1007,16 +1003,16 @@ void do_signal32(sigset_t *oldset, struct pt_regs * regs,
1007 clear_thread_flag(TIF_RESTORE_SIGMASK); 1003 clear_thread_flag(TIF_RESTORE_SIGMASK);
1008 return; 1004 return;
1009 } 1005 }
1010 if (cookie.restart_syscall && 1006 if (cookie->restart_syscall &&
1011 (regs->u_regs[UREG_I0] == ERESTARTNOHAND || 1007 (regs->u_regs[UREG_I0] == ERESTARTNOHAND ||
1012 regs->u_regs[UREG_I0] == ERESTARTSYS || 1008 regs->u_regs[UREG_I0] == ERESTARTSYS ||
1013 regs->u_regs[UREG_I0] == ERESTARTNOINTR)) { 1009 regs->u_regs[UREG_I0] == ERESTARTNOINTR)) {
1014 /* replay the system call when we are done */ 1010 /* replay the system call when we are done */
1015 regs->u_regs[UREG_I0] = cookie.orig_i0; 1011 regs->u_regs[UREG_I0] = cookie->orig_i0;
1016 regs->tpc -= 4; 1012 regs->tpc -= 4;
1017 regs->tnpc -= 4; 1013 regs->tnpc -= 4;
1018 } 1014 }
1019 if (cookie.restart_syscall && 1015 if (cookie->restart_syscall &&
1020 regs->u_regs[UREG_I0] == ERESTART_RESTARTBLOCK) { 1016 regs->u_regs[UREG_I0] == ERESTART_RESTARTBLOCK) {
1021 regs->u_regs[UREG_G1] = __NR_restart_syscall; 1017 regs->u_regs[UREG_G1] = __NR_restart_syscall;
1022 regs->tpc -= 4; 1018 regs->tpc -= 4;
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 59f020d69d4c..524b88920947 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -20,7 +20,7 @@
20#include <linux/cache.h> 20#include <linux/cache.h>
21#include <linux/jiffies.h> 21#include <linux/jiffies.h>
22#include <linux/profile.h> 22#include <linux/profile.h>
23#include <linux/bootmem.h> 23#include <linux/lmb.h>
24 24
25#include <asm/head.h> 25#include <asm/head.h>
26#include <asm/ptrace.h> 26#include <asm/ptrace.h>
@@ -1431,7 +1431,7 @@ EXPORT_SYMBOL(__per_cpu_shift);
1431 1431
1432void __init real_setup_per_cpu_areas(void) 1432void __init real_setup_per_cpu_areas(void)
1433{ 1433{
1434 unsigned long goal, size, i; 1434 unsigned long paddr, goal, size, i;
1435 char *ptr; 1435 char *ptr;
1436 1436
1437 /* Copy section for each CPU (we discard the original) */ 1437 /* Copy section for each CPU (we discard the original) */
@@ -1441,8 +1441,13 @@ void __init real_setup_per_cpu_areas(void)
1441 for (size = PAGE_SIZE; size < goal; size <<= 1UL) 1441 for (size = PAGE_SIZE; size < goal; size <<= 1UL)
1442 __per_cpu_shift++; 1442 __per_cpu_shift++;
1443 1443
1444 ptr = alloc_bootmem_pages(size * NR_CPUS); 1444 paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE);
1445 if (!paddr) {
1446 prom_printf("Cannot allocate per-cpu memory.\n");
1447 prom_halt();
1448 }
1445 1449
1450 ptr = __va(paddr);
1446 __per_cpu_base = ptr - __per_cpu_start; 1451 __per_cpu_base = ptr - __per_cpu_start;
1447 1452
1448 for (i = 0; i < NR_CPUS; i++, ptr += size) 1453 for (i = 0; i < NR_CPUS; i++, ptr += size)
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 38736460b8db..66336590e830 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -68,8 +68,6 @@ extern void *__memscan_zero(void *, size_t);
68extern void *__memscan_generic(void *, int, size_t); 68extern void *__memscan_generic(void *, int, size_t);
69extern int __memcmp(const void *, const void *, __kernel_size_t); 69extern int __memcmp(const void *, const void *, __kernel_size_t);
70extern __kernel_size_t strlen(const char *); 70extern __kernel_size_t strlen(const char *);
71extern void linux_sparc_syscall(void);
72extern void rtrap(void);
73extern void show_regs(struct pt_regs *); 71extern void show_regs(struct pt_regs *);
74extern void syscall_trace(struct pt_regs *, int); 72extern void syscall_trace(struct pt_regs *, int);
75extern void sys_sigsuspend(void); 73extern void sys_sigsuspend(void);
diff --git a/arch/sparc64/kernel/stacktrace.c b/arch/sparc64/kernel/stacktrace.c
index 84d39e873e88..01b52f561af4 100644
--- a/arch/sparc64/kernel/stacktrace.c
+++ b/arch/sparc64/kernel/stacktrace.c
@@ -20,6 +20,8 @@ void save_stack_trace(struct stack_trace *trace)
20 thread_base = (unsigned long) tp; 20 thread_base = (unsigned long) tp;
21 do { 21 do {
22 struct reg_window *rw; 22 struct reg_window *rw;
23 struct pt_regs *regs;
24 unsigned long pc;
23 25
24 /* Bogus frame pointer? */ 26 /* Bogus frame pointer? */
25 if (fp < (thread_base + sizeof(struct thread_info)) || 27 if (fp < (thread_base + sizeof(struct thread_info)) ||
@@ -27,11 +29,19 @@ void save_stack_trace(struct stack_trace *trace)
27 break; 29 break;
28 30
29 rw = (struct reg_window *) fp; 31 rw = (struct reg_window *) fp;
32 regs = (struct pt_regs *) (rw + 1);
33
34 if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC) {
35 pc = regs->tpc;
36 fp = regs->u_regs[UREG_I6] + STACK_BIAS;
37 } else {
38 pc = rw->ins[7];
39 fp = rw->ins[6] + STACK_BIAS;
40 }
41
30 if (trace->skip > 0) 42 if (trace->skip > 0)
31 trace->skip--; 43 trace->skip--;
32 else 44 else
33 trace->entries[trace->nr_entries++] = rw->ins[7]; 45 trace->entries[trace->nr_entries++] = pc;
34
35 fp = rw->ins[6] + STACK_BIAS;
36 } while (trace->nr_entries < trace->max_entries); 46 } while (trace->nr_entries < trace->max_entries);
37} 47}
diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S
index fd9430562e0b..e1fbf8c75787 100644
--- a/arch/sparc64/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc64/kernel/sun4v_tlb_miss.S
@@ -262,7 +262,7 @@ sun4v_iacc:
262 mov %l5, %o2 262 mov %l5, %o2
263 call sun4v_insn_access_exception 263 call sun4v_insn_access_exception
264 add %sp, PTREGS_OFF, %o0 264 add %sp, PTREGS_OFF, %o0
265 ba,a,pt %xcc, rtrap_clr_l6 265 ba,a,pt %xcc, rtrap
266 266
267 /* Instruction Access Exception, tl1. */ 267 /* Instruction Access Exception, tl1. */
268sun4v_iacc_tl1: 268sun4v_iacc_tl1:
@@ -278,7 +278,7 @@ sun4v_iacc_tl1:
278 mov %l5, %o2 278 mov %l5, %o2
279 call sun4v_insn_access_exception_tl1 279 call sun4v_insn_access_exception_tl1
280 add %sp, PTREGS_OFF, %o0 280 add %sp, PTREGS_OFF, %o0
281 ba,a,pt %xcc, rtrap_clr_l6 281 ba,a,pt %xcc, rtrap
282 282
283 /* Data Access Exception, tl0. */ 283 /* Data Access Exception, tl0. */
284sun4v_dacc: 284sun4v_dacc:
@@ -294,7 +294,7 @@ sun4v_dacc:
294 mov %l5, %o2 294 mov %l5, %o2
295 call sun4v_data_access_exception 295 call sun4v_data_access_exception
296 add %sp, PTREGS_OFF, %o0 296 add %sp, PTREGS_OFF, %o0
297 ba,a,pt %xcc, rtrap_clr_l6 297 ba,a,pt %xcc, rtrap
298 298
299 /* Data Access Exception, tl1. */ 299 /* Data Access Exception, tl1. */
300sun4v_dacc_tl1: 300sun4v_dacc_tl1:
@@ -310,7 +310,7 @@ sun4v_dacc_tl1:
310 mov %l5, %o2 310 mov %l5, %o2
311 call sun4v_data_access_exception_tl1 311 call sun4v_data_access_exception_tl1
312 add %sp, PTREGS_OFF, %o0 312 add %sp, PTREGS_OFF, %o0
313 ba,a,pt %xcc, rtrap_clr_l6 313 ba,a,pt %xcc, rtrap
314 314
315 /* Memory Address Unaligned. */ 315 /* Memory Address Unaligned. */
316sun4v_mna: 316sun4v_mna:
@@ -344,7 +344,7 @@ sun4v_mna:
344 mov %l5, %o2 344 mov %l5, %o2
345 call sun4v_do_mna 345 call sun4v_do_mna
346 add %sp, PTREGS_OFF, %o0 346 add %sp, PTREGS_OFF, %o0
347 ba,a,pt %xcc, rtrap_clr_l6 347 ba,a,pt %xcc, rtrap
348 348
349 /* Privileged Action. */ 349 /* Privileged Action. */
350sun4v_privact: 350sun4v_privact:
@@ -352,7 +352,7 @@ sun4v_privact:
352 rd %pc, %g7 352 rd %pc, %g7
353 call do_privact 353 call do_privact
354 add %sp, PTREGS_OFF, %o0 354 add %sp, PTREGS_OFF, %o0
355 ba,a,pt %xcc, rtrap_clr_l6 355 ba,a,pt %xcc, rtrap
356 356
357 /* Unaligned ldd float, tl0. */ 357 /* Unaligned ldd float, tl0. */
358sun4v_lddfmna: 358sun4v_lddfmna:
@@ -368,7 +368,7 @@ sun4v_lddfmna:
368 mov %l5, %o2 368 mov %l5, %o2
369 call handle_lddfmna 369 call handle_lddfmna
370 add %sp, PTREGS_OFF, %o0 370 add %sp, PTREGS_OFF, %o0
371 ba,a,pt %xcc, rtrap_clr_l6 371 ba,a,pt %xcc, rtrap
372 372
373 /* Unaligned std float, tl0. */ 373 /* Unaligned std float, tl0. */
374sun4v_stdfmna: 374sun4v_stdfmna:
@@ -384,7 +384,7 @@ sun4v_stdfmna:
384 mov %l5, %o2 384 mov %l5, %o2
385 call handle_stdfmna 385 call handle_stdfmna
386 add %sp, PTREGS_OFF, %o0 386 add %sp, PTREGS_OFF, %o0
387 ba,a,pt %xcc, rtrap_clr_l6 387 ba,a,pt %xcc, rtrap
388 388
389#define BRANCH_ALWAYS 0x10680000 389#define BRANCH_ALWAYS 0x10680000
390#define NOP 0x01000000 390#define NOP 0x01000000
diff --git a/arch/sparc64/kernel/sysfs.c b/arch/sparc64/kernel/sysfs.c
index 52816c7be0b9..e885034a6b73 100644
--- a/arch/sparc64/kernel/sysfs.c
+++ b/arch/sparc64/kernel/sysfs.c
@@ -273,10 +273,22 @@ static void __init check_mmu_stats(void)
273 mmu_stats_supported = 1; 273 mmu_stats_supported = 1;
274} 274}
275 275
276static void register_nodes(void)
277{
278#ifdef CONFIG_NUMA
279 int i;
280
281 for (i = 0; i < MAX_NUMNODES; i++)
282 register_one_node(i);
283#endif
284}
285
276static int __init topology_init(void) 286static int __init topology_init(void)
277{ 287{
278 int cpu; 288 int cpu;
279 289
290 register_nodes();
291
280 check_mmu_stats(); 292 check_mmu_stats();
281 293
282 register_cpu_notifier(&sysfs_cpu_nb); 294 register_cpu_notifier(&sysfs_cpu_nb);
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 96da847023f3..d9b8d46707d1 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -2091,9 +2091,8 @@ static void user_instruction_dump(unsigned int __user *pc)
2091 2091
2092void show_stack(struct task_struct *tsk, unsigned long *_ksp) 2092void show_stack(struct task_struct *tsk, unsigned long *_ksp)
2093{ 2093{
2094 unsigned long pc, fp, thread_base, ksp; 2094 unsigned long fp, thread_base, ksp;
2095 struct thread_info *tp; 2095 struct thread_info *tp;
2096 struct reg_window *rw;
2097 int count = 0; 2096 int count = 0;
2098 2097
2099 ksp = (unsigned long) _ksp; 2098 ksp = (unsigned long) _ksp;
@@ -2117,15 +2116,27 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
2117 printk("\n"); 2116 printk("\n");
2118#endif 2117#endif
2119 do { 2118 do {
2119 struct reg_window *rw;
2120 struct pt_regs *regs;
2121 unsigned long pc;
2122
2120 /* Bogus frame pointer? */ 2123 /* Bogus frame pointer? */
2121 if (fp < (thread_base + sizeof(struct thread_info)) || 2124 if (fp < (thread_base + sizeof(struct thread_info)) ||
2122 fp >= (thread_base + THREAD_SIZE)) 2125 fp >= (thread_base + THREAD_SIZE))
2123 break; 2126 break;
2124 rw = (struct reg_window *)fp; 2127 rw = (struct reg_window *)fp;
2125 pc = rw->ins[7]; 2128 regs = (struct pt_regs *) (rw + 1);
2129
2130 if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC) {
2131 pc = regs->tpc;
2132 fp = regs->u_regs[UREG_I6] + STACK_BIAS;
2133 } else {
2134 pc = rw->ins[7];
2135 fp = rw->ins[6] + STACK_BIAS;
2136 }
2137
2126 printk(" [%016lx] ", pc); 2138 printk(" [%016lx] ", pc);
2127 print_symbol("%s\n", pc); 2139 print_symbol("%s\n", pc);
2128 fp = rw->ins[6] + STACK_BIAS;
2129 } while (++count < 16); 2140 } while (++count < 16);
2130#ifndef CONFIG_KALLSYMS 2141#ifndef CONFIG_KALLSYMS
2131 printk("\n"); 2142 printk("\n");
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
index 10adb2fb8ffe..c499214b501d 100644
--- a/arch/sparc64/kernel/tsb.S
+++ b/arch/sparc64/kernel/tsb.S
@@ -275,7 +275,7 @@ sparc64_realfault_common:
275 stx %l5, [%g6 + TI_FAULT_ADDR] ! Save fault address 275 stx %l5, [%g6 + TI_FAULT_ADDR] ! Save fault address
276 call do_sparc64_fault ! Call fault handler 276 call do_sparc64_fault ! Call fault handler
277 add %sp, PTREGS_OFF, %o0 ! Compute pt_regs arg 277 add %sp, PTREGS_OFF, %o0 ! Compute pt_regs arg
278 ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state 278 ba,pt %xcc, rtrap ! Restore cpu state
279 nop ! Delay slot (fill me) 279 nop ! Delay slot (fill me)
280 280
281winfix_trampoline: 281winfix_trampoline:
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
index c4aa110a10e5..a6b0863c27df 100644
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -32,7 +32,7 @@ fill_fixup:
32 rd %pc, %g7 32 rd %pc, %g7
33 call do_sparc64_fault 33 call do_sparc64_fault
34 add %sp, PTREGS_OFF, %o0 34 add %sp, PTREGS_OFF, %o0
35 ba,pt %xcc, rtrap_clr_l6 35 ba,pt %xcc, rtrap
36 nop 36 nop
37 37
38 /* Be very careful about usage of the trap globals here. 38 /* Be very careful about usage of the trap globals here.
@@ -100,7 +100,7 @@ spill_fixup_dax:
100 rd %pc, %g7 100 rd %pc, %g7
101 call do_sparc64_fault 101 call do_sparc64_fault
102 add %sp, PTREGS_OFF, %o0 102 add %sp, PTREGS_OFF, %o0
103 ba,a,pt %xcc, rtrap_clr_l6 103 ba,a,pt %xcc, rtrap
104 104
105winfix_mna: 105winfix_mna:
106 andn %g3, 0x7f, %g3 106 andn %g3, 0x7f, %g3
@@ -122,12 +122,12 @@ fill_fixup_mna:
122 mov %l4, %o2 122 mov %l4, %o2
123 call sun4v_do_mna 123 call sun4v_do_mna
124 mov %l5, %o1 124 mov %l5, %o1
125 ba,a,pt %xcc, rtrap_clr_l6 125 ba,a,pt %xcc, rtrap
1261: mov %l4, %o1 1261: mov %l4, %o1
127 mov %l5, %o2 127 mov %l5, %o2
128 call mem_address_unaligned 128 call mem_address_unaligned
129 nop 129 nop
130 ba,a,pt %xcc, rtrap_clr_l6 130 ba,a,pt %xcc, rtrap
131 131
132winfix_dax: 132winfix_dax:
133 andn %g3, 0x7f, %g3 133 andn %g3, 0x7f, %g3
@@ -150,7 +150,7 @@ fill_fixup_dax:
150 add %sp, PTREGS_OFF, %o0 150 add %sp, PTREGS_OFF, %o0
151 call sun4v_data_access_exception 151 call sun4v_data_access_exception
152 nop 152 nop
153 ba,a,pt %xcc, rtrap_clr_l6 153 ba,a,pt %xcc, rtrap
1541: call spitfire_data_access_exception 1541: call spitfire_data_access_exception
155 nop 155 nop
156 ba,a,pt %xcc, rtrap_clr_l6 156 ba,a,pt %xcc, rtrap
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index f37078d96407..177d8aaeec42 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -24,6 +24,8 @@
24#include <linux/cache.h> 24#include <linux/cache.h>
25#include <linux/sort.h> 25#include <linux/sort.h>
26#include <linux/percpu.h> 26#include <linux/percpu.h>
27#include <linux/lmb.h>
28#include <linux/mmzone.h>
27 29
28#include <asm/head.h> 30#include <asm/head.h>
29#include <asm/system.h> 31#include <asm/system.h>
@@ -72,9 +74,7 @@ extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
72#define MAX_BANKS 32 74#define MAX_BANKS 32
73 75
74static struct linux_prom64_registers pavail[MAX_BANKS] __initdata; 76static struct linux_prom64_registers pavail[MAX_BANKS] __initdata;
75static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
76static int pavail_ents __initdata; 77static int pavail_ents __initdata;
77static int pavail_rescan_ents __initdata;
78 78
79static int cmp_p64(const void *a, const void *b) 79static int cmp_p64(const void *a, const void *b)
80{ 80{
@@ -715,285 +715,684 @@ out:
715 smp_new_mmu_context_version(); 715 smp_new_mmu_context_version();
716} 716}
717 717
718/* Find a free area for the bootmem map, avoiding the kernel image 718static int numa_enabled = 1;
719 * and the initial ramdisk. 719static int numa_debug;
720 */ 720
721static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn, 721static int __init early_numa(char *p)
722 unsigned long end_pfn)
723{ 722{
724 unsigned long avoid_start, avoid_end, bootmap_size; 723 if (!p)
725 int i; 724 return 0;
725
726 if (strstr(p, "off"))
727 numa_enabled = 0;
728
729 if (strstr(p, "debug"))
730 numa_debug = 1;
731
732 return 0;
733}
734early_param("numa", early_numa);
726 735
727 bootmap_size = bootmem_bootmap_pages(end_pfn - start_pfn); 736#define numadbg(f, a...) \
728 bootmap_size <<= PAGE_SHIFT; 737do { if (numa_debug) \
738 printk(KERN_INFO f, ## a); \
739} while (0)
729 740
730 avoid_start = avoid_end = 0; 741static void __init find_ramdisk(unsigned long phys_base)
742{
731#ifdef CONFIG_BLK_DEV_INITRD 743#ifdef CONFIG_BLK_DEV_INITRD
732 avoid_start = initrd_start; 744 if (sparc_ramdisk_image || sparc_ramdisk_image64) {
733 avoid_end = PAGE_ALIGN(initrd_end); 745 unsigned long ramdisk_image;
746
747 /* Older versions of the bootloader only supported a
748 * 32-bit physical address for the ramdisk image
749 * location, stored at sparc_ramdisk_image. Newer
750 * SILO versions set sparc_ramdisk_image to zero and
751 * provide a full 64-bit physical address at
752 * sparc_ramdisk_image64.
753 */
754 ramdisk_image = sparc_ramdisk_image;
755 if (!ramdisk_image)
756 ramdisk_image = sparc_ramdisk_image64;
757
758 /* Another bootloader quirk. The bootloader normalizes
759 * the physical address to KERNBASE, so we have to
760 * factor that back out and add in the lowest valid
761 * physical page address to get the true physical address.
762 */
763 ramdisk_image -= KERNBASE;
764 ramdisk_image += phys_base;
765
766 numadbg("Found ramdisk at physical address 0x%lx, size %u\n",
767 ramdisk_image, sparc_ramdisk_size);
768
769 initrd_start = ramdisk_image;
770 initrd_end = ramdisk_image + sparc_ramdisk_size;
771
772 lmb_reserve(initrd_start, initrd_end);
773 }
734#endif 774#endif
775}
735 776
736 for (i = 0; i < pavail_ents; i++) { 777struct node_mem_mask {
737 unsigned long start, end; 778 unsigned long mask;
779 unsigned long val;
780 unsigned long bootmem_paddr;
781};
782static struct node_mem_mask node_masks[MAX_NUMNODES];
783static int num_node_masks;
738 784
739 start = pavail[i].phys_addr; 785int numa_cpu_lookup_table[NR_CPUS];
740 end = start + pavail[i].reg_size; 786cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
741 787
742 while (start < end) { 788#ifdef CONFIG_NEED_MULTIPLE_NODES
743 if (start >= kern_base && 789static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
744 start < PAGE_ALIGN(kern_base + kern_size)) {
745 start = PAGE_ALIGN(kern_base + kern_size);
746 continue;
747 }
748 if (start >= avoid_start && start < avoid_end) {
749 start = avoid_end;
750 continue;
751 }
752 790
753 if ((end - start) < bootmap_size) 791struct mdesc_mblock {
754 break; 792 u64 base;
793 u64 size;
794 u64 offset; /* RA-to-PA */
795};
796static struct mdesc_mblock *mblocks;
797static int num_mblocks;
755 798
756 if (start < kern_base && 799static unsigned long ra_to_pa(unsigned long addr)
757 (start + bootmap_size) > kern_base) { 800{
758 start = PAGE_ALIGN(kern_base + kern_size); 801 int i;
759 continue;
760 }
761 802
762 if (start < avoid_start && 803 for (i = 0; i < num_mblocks; i++) {
763 (start + bootmap_size) > avoid_start) { 804 struct mdesc_mblock *m = &mblocks[i];
764 start = avoid_end;
765 continue;
766 }
767 805
768 /* OK, it doesn't overlap anything, use it. */ 806 if (addr >= m->base &&
769 return start >> PAGE_SHIFT; 807 addr < (m->base + m->size)) {
808 addr += m->offset;
809 break;
770 } 810 }
771 } 811 }
772 812 return addr;
773 prom_printf("Cannot find free area for bootmap, aborting.\n");
774 prom_halt();
775} 813}
776 814
777static void __init trim_pavail(unsigned long *cur_size_p, 815static int find_node(unsigned long addr)
778 unsigned long *end_of_phys_p)
779{ 816{
780 unsigned long to_trim = *cur_size_p - cmdline_memory_size;
781 unsigned long avoid_start, avoid_end;
782 int i; 817 int i;
783 818
784 to_trim = PAGE_ALIGN(to_trim); 819 addr = ra_to_pa(addr);
820 for (i = 0; i < num_node_masks; i++) {
821 struct node_mem_mask *p = &node_masks[i];
785 822
786 avoid_start = avoid_end = 0; 823 if ((addr & p->mask) == p->val)
787#ifdef CONFIG_BLK_DEV_INITRD 824 return i;
788 avoid_start = initrd_start; 825 }
789 avoid_end = PAGE_ALIGN(initrd_end); 826 return -1;
827}
828
829static unsigned long nid_range(unsigned long start, unsigned long end,
830 int *nid)
831{
832 *nid = find_node(start);
833 start += PAGE_SIZE;
834 while (start < end) {
835 int n = find_node(start);
836
837 if (n != *nid)
838 break;
839 start += PAGE_SIZE;
840 }
841
842 return start;
843}
844#else
845static unsigned long nid_range(unsigned long start, unsigned long end,
846 int *nid)
847{
848 *nid = 0;
849 return end;
850}
790#endif 851#endif
791 852
792 /* Trim some pavail[] entries in order to satisfy the 853/* This must be invoked after performing all of the necessary
793 * requested "mem=xxx" kernel command line specification. 854 * add_active_range() calls for 'nid'. We need to be able to get
794 * 855 * correct data from get_pfn_range_for_nid().
795 * We must not trim off the kernel image area nor the 856 */
796 * initial ramdisk range (if any). Also, we must not trim 857static void __init allocate_node_data(int nid)
797 * any pavail[] entry down to zero in order to preserve 858{
798 * the invariant that all pavail[] entries have a non-zero 859 unsigned long paddr, num_pages, start_pfn, end_pfn;
799 * size which is assumed by all of the code in here. 860 struct pglist_data *p;
800 */ 861
801 for (i = 0; i < pavail_ents; i++) { 862#ifdef CONFIG_NEED_MULTIPLE_NODES
802 unsigned long start, end, kern_end; 863 paddr = lmb_alloc_nid(sizeof(struct pglist_data),
803 unsigned long trim_low, trim_high, n; 864 SMP_CACHE_BYTES, nid, nid_range);
865 if (!paddr) {
866 prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
867 prom_halt();
868 }
869 NODE_DATA(nid) = __va(paddr);
870 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
804 871
805 kern_end = PAGE_ALIGN(kern_base + kern_size); 872 NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
873#endif
806 874
807 trim_low = start = pavail[i].phys_addr; 875 p = NODE_DATA(nid);
808 trim_high = end = start + pavail[i].reg_size;
809 876
810 if (kern_base >= start && 877 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
811 kern_base < end) { 878 p->node_start_pfn = start_pfn;
812 trim_low = kern_base; 879 p->node_spanned_pages = end_pfn - start_pfn;
813 if (kern_end >= end) 880
814 continue; 881 if (p->node_spanned_pages) {
815 } 882 num_pages = bootmem_bootmap_pages(p->node_spanned_pages);
816 if (kern_end >= start && 883
817 kern_end < end) { 884 paddr = lmb_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid,
818 trim_high = kern_end; 885 nid_range);
819 } 886 if (!paddr) {
820 if (avoid_start && 887 prom_printf("Cannot allocate bootmap for nid[%d]\n",
821 avoid_start >= start && 888 nid);
822 avoid_start < end) { 889 prom_halt();
823 if (trim_low > avoid_start)
824 trim_low = avoid_start;
825 if (avoid_end >= end)
826 continue;
827 }
828 if (avoid_end &&
829 avoid_end >= start &&
830 avoid_end < end) {
831 if (trim_high < avoid_end)
832 trim_high = avoid_end;
833 } 890 }
891 node_masks[nid].bootmem_paddr = paddr;
892 }
893}
894
895static void init_node_masks_nonnuma(void)
896{
897 int i;
898
899 numadbg("Initializing tables for non-numa.\n");
900
901 node_masks[0].mask = node_masks[0].val = 0;
902 num_node_masks = 1;
903
904 for (i = 0; i < NR_CPUS; i++)
905 numa_cpu_lookup_table[i] = 0;
906
907 numa_cpumask_lookup_table[0] = CPU_MASK_ALL;
908}
909
910#ifdef CONFIG_NEED_MULTIPLE_NODES
911struct pglist_data *node_data[MAX_NUMNODES];
912
913EXPORT_SYMBOL(numa_cpu_lookup_table);
914EXPORT_SYMBOL(numa_cpumask_lookup_table);
915EXPORT_SYMBOL(node_data);
916
917struct mdesc_mlgroup {
918 u64 node;
919 u64 latency;
920 u64 match;
921 u64 mask;
922};
923static struct mdesc_mlgroup *mlgroups;
924static int num_mlgroups;
925
926static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio,
927 u32 cfg_handle)
928{
929 u64 arc;
834 930
835 if (trim_high <= trim_low) 931 mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) {
932 u64 target = mdesc_arc_target(md, arc);
933 const u64 *val;
934
935 val = mdesc_get_property(md, target,
936 "cfg-handle", NULL);
937 if (val && *val == cfg_handle)
938 return 0;
939 }
940 return -ENODEV;
941}
942
943static int scan_arcs_for_cfg_handle(struct mdesc_handle *md, u64 grp,
944 u32 cfg_handle)
945{
946 u64 arc, candidate, best_latency = ~(u64)0;
947
948 candidate = MDESC_NODE_NULL;
949 mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
950 u64 target = mdesc_arc_target(md, arc);
951 const char *name = mdesc_node_name(md, target);
952 const u64 *val;
953
954 if (strcmp(name, "pio-latency-group"))
836 continue; 955 continue;
837 956
838 if (trim_low == start && trim_high == end) { 957 val = mdesc_get_property(md, target, "latency", NULL);
839 /* Whole chunk is available for trimming. 958 if (!val)
840 * Trim all except one page, in order to keep 959 continue;
841 * entry non-empty. 960
842 */ 961 if (*val < best_latency) {
843 n = (end - start) - PAGE_SIZE; 962 candidate = target;
844 if (n > to_trim) 963 best_latency = *val;
845 n = to_trim;
846
847 if (n) {
848 pavail[i].phys_addr += n;
849 pavail[i].reg_size -= n;
850 to_trim -= n;
851 }
852 } else {
853 n = (trim_low - start);
854 if (n > to_trim)
855 n = to_trim;
856
857 if (n) {
858 pavail[i].phys_addr += n;
859 pavail[i].reg_size -= n;
860 to_trim -= n;
861 }
862 if (to_trim) {
863 n = end - trim_high;
864 if (n > to_trim)
865 n = to_trim;
866 if (n) {
867 pavail[i].reg_size -= n;
868 to_trim -= n;
869 }
870 }
871 } 964 }
965 }
966
967 if (candidate == MDESC_NODE_NULL)
968 return -ENODEV;
969
970 return scan_pio_for_cfg_handle(md, candidate, cfg_handle);
971}
972
973int of_node_to_nid(struct device_node *dp)
974{
975 const struct linux_prom64_registers *regs;
976 struct mdesc_handle *md;
977 u32 cfg_handle;
978 int count, nid;
979 u64 grp;
872 980
873 if (!to_trim) 981 if (!mlgroups)
982 return -1;
983
984 regs = of_get_property(dp, "reg", NULL);
985 if (!regs)
986 return -1;
987
988 cfg_handle = (regs->phys_addr >> 32UL) & 0x0fffffff;
989
990 md = mdesc_grab();
991
992 count = 0;
993 nid = -1;
994 mdesc_for_each_node_by_name(md, grp, "group") {
995 if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) {
996 nid = count;
874 break; 997 break;
998 }
999 count++;
875 } 1000 }
876 1001
877 /* Recalculate. */ 1002 mdesc_release(md);
878 *cur_size_p = 0UL; 1003
879 for (i = 0; i < pavail_ents; i++) { 1004 return nid;
880 *end_of_phys_p = pavail[i].phys_addr +
881 pavail[i].reg_size;
882 *cur_size_p += pavail[i].reg_size;
883 }
884} 1005}
885 1006
886/* About pages_avail, this is the value we will use to calculate 1007static void add_node_ranges(void)
887 * the zholes_size[] argument given to free_area_init_node(). The
888 * page allocator uses this to calculate nr_kernel_pages,
889 * nr_all_pages and zone->present_pages. On NUMA it is used
890 * to calculate zone->min_unmapped_pages and zone->min_slab_pages.
891 *
892 * So this number should really be set to what the page allocator
893 * actually ends up with. This means:
894 * 1) It should include bootmem map pages, we'll release those.
895 * 2) It should not include the kernel image, except for the
896 * __init sections which we will also release.
897 * 3) It should include the initrd image, since we'll release
898 * that too.
899 */
900static unsigned long __init bootmem_init(unsigned long *pages_avail,
901 unsigned long phys_base)
902{ 1008{
903 unsigned long bootmap_size, end_pfn;
904 unsigned long end_of_phys_memory = 0UL;
905 unsigned long bootmap_pfn, bytes_avail, size;
906 int i; 1009 int i;
907 1010
908 bytes_avail = 0UL; 1011 for (i = 0; i < lmb.memory.cnt; i++) {
909 for (i = 0; i < pavail_ents; i++) { 1012 unsigned long size = lmb_size_bytes(&lmb.memory, i);
910 end_of_phys_memory = pavail[i].phys_addr + 1013 unsigned long start, end;
911 pavail[i].reg_size; 1014
912 bytes_avail += pavail[i].reg_size; 1015 start = lmb.memory.region[i].base;
1016 end = start + size;
1017 while (start < end) {
1018 unsigned long this_end;
1019 int nid;
1020
1021 this_end = nid_range(start, end, &nid);
1022
1023 numadbg("Adding active range nid[%d] "
1024 "start[%lx] end[%lx]\n",
1025 nid, start, this_end);
1026
1027 add_active_range(nid,
1028 start >> PAGE_SHIFT,
1029 this_end >> PAGE_SHIFT);
1030
1031 start = this_end;
1032 }
913 } 1033 }
1034}
914 1035
915 /* Determine the location of the initial ramdisk before trying 1036static int __init grab_mlgroups(struct mdesc_handle *md)
916 * to honor the "mem=xxx" command line argument. We must know 1037{
917 * where the kernel image and the ramdisk image are so that we 1038 unsigned long paddr;
918 * do not trim those two areas from the physical memory map. 1039 int count = 0;
919 */ 1040 u64 node;
1041
1042 mdesc_for_each_node_by_name(md, node, "memory-latency-group")
1043 count++;
1044 if (!count)
1045 return -ENOENT;
1046
1047 paddr = lmb_alloc(count * sizeof(struct mdesc_mlgroup),
1048 SMP_CACHE_BYTES);
1049 if (!paddr)
1050 return -ENOMEM;
1051
1052 mlgroups = __va(paddr);
1053 num_mlgroups = count;
1054
1055 count = 0;
1056 mdesc_for_each_node_by_name(md, node, "memory-latency-group") {
1057 struct mdesc_mlgroup *m = &mlgroups[count++];
1058 const u64 *val;
1059
1060 m->node = node;
1061
1062 val = mdesc_get_property(md, node, "latency", NULL);
1063 m->latency = *val;
1064 val = mdesc_get_property(md, node, "address-match", NULL);
1065 m->match = *val;
1066 val = mdesc_get_property(md, node, "address-mask", NULL);
1067 m->mask = *val;
1068
1069 numadbg("MLGROUP[%d]: node[%lx] latency[%lx] "
1070 "match[%lx] mask[%lx]\n",
1071 count - 1, m->node, m->latency, m->match, m->mask);
1072 }
920 1073
921#ifdef CONFIG_BLK_DEV_INITRD 1074 return 0;
922 /* Now have to check initial ramdisk, so that bootmap does not overwrite it */ 1075}
923 if (sparc_ramdisk_image || sparc_ramdisk_image64) { 1076
924 unsigned long ramdisk_image = sparc_ramdisk_image ? 1077static int __init grab_mblocks(struct mdesc_handle *md)
925 sparc_ramdisk_image : sparc_ramdisk_image64; 1078{
926 ramdisk_image -= KERNBASE; 1079 unsigned long paddr;
927 initrd_start = ramdisk_image + phys_base; 1080 int count = 0;
928 initrd_end = initrd_start + sparc_ramdisk_size; 1081 u64 node;
929 if (initrd_end > end_of_phys_memory) { 1082
930 printk(KERN_CRIT "initrd extends beyond end of memory " 1083 mdesc_for_each_node_by_name(md, node, "mblock")
931 "(0x%016lx > 0x%016lx)\ndisabling initrd\n", 1084 count++;
932 initrd_end, end_of_phys_memory); 1085 if (!count)
933 initrd_start = 0; 1086 return -ENOENT;
934 initrd_end = 0; 1087
1088 paddr = lmb_alloc(count * sizeof(struct mdesc_mblock),
1089 SMP_CACHE_BYTES);
1090 if (!paddr)
1091 return -ENOMEM;
1092
1093 mblocks = __va(paddr);
1094 num_mblocks = count;
1095
1096 count = 0;
1097 mdesc_for_each_node_by_name(md, node, "mblock") {
1098 struct mdesc_mblock *m = &mblocks[count++];
1099 const u64 *val;
1100
1101 val = mdesc_get_property(md, node, "base", NULL);
1102 m->base = *val;
1103 val = mdesc_get_property(md, node, "size", NULL);
1104 m->size = *val;
1105 val = mdesc_get_property(md, node,
1106 "address-congruence-offset", NULL);
1107 m->offset = *val;
1108
1109 numadbg("MBLOCK[%d]: base[%lx] size[%lx] offset[%lx]\n",
1110 count - 1, m->base, m->size, m->offset);
1111 }
1112
1113 return 0;
1114}
1115
1116static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
1117 u64 grp, cpumask_t *mask)
1118{
1119 u64 arc;
1120
1121 cpus_clear(*mask);
1122
1123 mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) {
1124 u64 target = mdesc_arc_target(md, arc);
1125 const char *name = mdesc_node_name(md, target);
1126 const u64 *id;
1127
1128 if (strcmp(name, "cpu"))
1129 continue;
1130 id = mdesc_get_property(md, target, "id", NULL);
1131 if (*id < NR_CPUS)
1132 cpu_set(*id, *mask);
1133 }
1134}
1135
1136static struct mdesc_mlgroup * __init find_mlgroup(u64 node)
1137{
1138 int i;
1139
1140 for (i = 0; i < num_mlgroups; i++) {
1141 struct mdesc_mlgroup *m = &mlgroups[i];
1142 if (m->node == node)
1143 return m;
1144 }
1145 return NULL;
1146}
1147
1148static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp,
1149 int index)
1150{
1151 struct mdesc_mlgroup *candidate = NULL;
1152 u64 arc, best_latency = ~(u64)0;
1153 struct node_mem_mask *n;
1154
1155 mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
1156 u64 target = mdesc_arc_target(md, arc);
1157 struct mdesc_mlgroup *m = find_mlgroup(target);
1158 if (!m)
1159 continue;
1160 if (m->latency < best_latency) {
1161 candidate = m;
1162 best_latency = m->latency;
935 } 1163 }
936 } 1164 }
937#endif 1165 if (!candidate)
1166 return -ENOENT;
1167
1168 if (num_node_masks != index) {
1169 printk(KERN_ERR "Inconsistent NUMA state, "
1170 "index[%d] != num_node_masks[%d]\n",
1171 index, num_node_masks);
1172 return -EINVAL;
1173 }
938 1174
939 if (cmdline_memory_size && 1175 n = &node_masks[num_node_masks++];
940 bytes_avail > cmdline_memory_size)
941 trim_pavail(&bytes_avail,
942 &end_of_phys_memory);
943 1176
944 *pages_avail = bytes_avail >> PAGE_SHIFT; 1177 n->mask = candidate->mask;
1178 n->val = candidate->match;
945 1179
946 end_pfn = end_of_phys_memory >> PAGE_SHIFT; 1180 numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%lx])\n",
1181 index, n->mask, n->val, candidate->latency);
947 1182
948 /* Initialize the boot-time allocator. */ 1183 return 0;
949 max_pfn = max_low_pfn = end_pfn; 1184}
950 min_low_pfn = (phys_base >> PAGE_SHIFT); 1185
1186static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
1187 int index)
1188{
1189 cpumask_t mask;
1190 int cpu;
951 1191
952 bootmap_pfn = choose_bootmap_pfn(min_low_pfn, end_pfn); 1192 numa_parse_mdesc_group_cpus(md, grp, &mask);
953 1193
954 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, 1194 for_each_cpu_mask(cpu, mask)
955 min_low_pfn, end_pfn); 1195 numa_cpu_lookup_table[cpu] = index;
1196 numa_cpumask_lookup_table[index] = mask;
956 1197
957 /* Now register the available physical memory with the 1198 if (numa_debug) {
958 * allocator. 1199 printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index);
959 */ 1200 for_each_cpu_mask(cpu, mask)
960 for (i = 0; i < pavail_ents; i++) 1201 printk("%d ", cpu);
961 free_bootmem(pavail[i].phys_addr, pavail[i].reg_size); 1202 printk("]\n");
1203 }
962 1204
963#ifdef CONFIG_BLK_DEV_INITRD 1205 return numa_attach_mlgroup(md, grp, index);
964 if (initrd_start) { 1206}
965 size = initrd_end - initrd_start; 1207
1208static int __init numa_parse_mdesc(void)
1209{
1210 struct mdesc_handle *md = mdesc_grab();
1211 int i, err, count;
1212 u64 node;
1213
1214 node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
1215 if (node == MDESC_NODE_NULL) {
1216 mdesc_release(md);
1217 return -ENOENT;
1218 }
1219
1220 err = grab_mblocks(md);
1221 if (err < 0)
1222 goto out;
1223
1224 err = grab_mlgroups(md);
1225 if (err < 0)
1226 goto out;
1227
1228 count = 0;
1229 mdesc_for_each_node_by_name(md, node, "group") {
1230 err = numa_parse_mdesc_group(md, node, count);
1231 if (err < 0)
1232 break;
1233 count++;
1234 }
1235
1236 add_node_ranges();
1237
1238 for (i = 0; i < num_node_masks; i++) {
1239 allocate_node_data(i);
1240 node_set_online(i);
1241 }
1242
1243 err = 0;
1244out:
1245 mdesc_release(md);
1246 return err;
1247}
1248
1249static int __init numa_parse_sun4u(void)
1250{
1251 return -1;
1252}
966 1253
967 /* Reserve the initrd image area. */ 1254static int __init bootmem_init_numa(void)
968 reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT); 1255{
1256 int err = -1;
969 1257
970 initrd_start += PAGE_OFFSET; 1258 numadbg("bootmem_init_numa()\n");
971 initrd_end += PAGE_OFFSET; 1259
1260 if (numa_enabled) {
1261 if (tlb_type == hypervisor)
1262 err = numa_parse_mdesc();
1263 else
1264 err = numa_parse_sun4u();
972 } 1265 }
1266 return err;
1267}
1268
1269#else
1270
1271static int bootmem_init_numa(void)
1272{
1273 return -1;
1274}
1275
973#endif 1276#endif
974 /* Reserve the kernel text/data/bss. */
975 reserve_bootmem(kern_base, kern_size, BOOTMEM_DEFAULT);
976 *pages_avail -= PAGE_ALIGN(kern_size) >> PAGE_SHIFT;
977
978 /* Add back in the initmem pages. */
979 size = ((unsigned long)(__init_end) & PAGE_MASK) -
980 PAGE_ALIGN((unsigned long)__init_begin);
981 *pages_avail += size >> PAGE_SHIFT;
982
983 /* Reserve the bootmem map. We do not account for it
984 * in pages_avail because we will release that memory
985 * in free_all_bootmem.
986 */
987 size = bootmap_size;
988 reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT);
989 1277
990 for (i = 0; i < pavail_ents; i++) { 1278static void __init bootmem_init_nonnuma(void)
1279{
1280 unsigned long top_of_ram = lmb_end_of_DRAM();
1281 unsigned long total_ram = lmb_phys_mem_size();
1282 unsigned int i;
1283
1284 numadbg("bootmem_init_nonnuma()\n");
1285
1286 printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
1287 top_of_ram, total_ram);
1288 printk(KERN_INFO "Memory hole size: %ldMB\n",
1289 (top_of_ram - total_ram) >> 20);
1290
1291 init_node_masks_nonnuma();
1292
1293 for (i = 0; i < lmb.memory.cnt; i++) {
1294 unsigned long size = lmb_size_bytes(&lmb.memory, i);
991 unsigned long start_pfn, end_pfn; 1295 unsigned long start_pfn, end_pfn;
992 1296
993 start_pfn = pavail[i].phys_addr >> PAGE_SHIFT; 1297 if (!size)
994 end_pfn = (start_pfn + (pavail[i].reg_size >> PAGE_SHIFT)); 1298 continue;
995 memory_present(0, start_pfn, end_pfn); 1299
1300 start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
1301 end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
1302 add_active_range(0, start_pfn, end_pfn);
1303 }
1304
1305 allocate_node_data(0);
1306
1307 node_set_online(0);
1308}
1309
1310static void __init reserve_range_in_node(int nid, unsigned long start,
1311 unsigned long end)
1312{
1313 numadbg(" reserve_range_in_node(nid[%d],start[%lx],end[%lx]\n",
1314 nid, start, end);
1315 while (start < end) {
1316 unsigned long this_end;
1317 int n;
1318
1319 this_end = nid_range(start, end, &n);
1320 if (n == nid) {
1321 numadbg(" MATCH reserving range [%lx:%lx]\n",
1322 start, this_end);
1323 reserve_bootmem_node(NODE_DATA(nid), start,
1324 (this_end - start), BOOTMEM_DEFAULT);
1325 } else
1326 numadbg(" NO MATCH, advancing start to %lx\n",
1327 this_end);
1328
1329 start = this_end;
1330 }
1331}
1332
1333static void __init trim_reserved_in_node(int nid)
1334{
1335 int i;
1336
1337 numadbg(" trim_reserved_in_node(%d)\n", nid);
1338
1339 for (i = 0; i < lmb.reserved.cnt; i++) {
1340 unsigned long start = lmb.reserved.region[i].base;
1341 unsigned long size = lmb_size_bytes(&lmb.reserved, i);
1342 unsigned long end = start + size;
1343
1344 reserve_range_in_node(nid, start, end);
1345 }
1346}
1347
1348static void __init bootmem_init_one_node(int nid)
1349{
1350 struct pglist_data *p;
1351
1352 numadbg("bootmem_init_one_node(%d)\n", nid);
1353
1354 p = NODE_DATA(nid);
1355
1356 if (p->node_spanned_pages) {
1357 unsigned long paddr = node_masks[nid].bootmem_paddr;
1358 unsigned long end_pfn;
1359
1360 end_pfn = p->node_start_pfn + p->node_spanned_pages;
1361
1362 numadbg(" init_bootmem_node(%d, %lx, %lx, %lx)\n",
1363 nid, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn);
1364
1365 init_bootmem_node(p, paddr >> PAGE_SHIFT,
1366 p->node_start_pfn, end_pfn);
1367
1368 numadbg(" free_bootmem_with_active_regions(%d, %lx)\n",
1369 nid, end_pfn);
1370 free_bootmem_with_active_regions(nid, end_pfn);
1371
1372 trim_reserved_in_node(nid);
1373
1374 numadbg(" sparse_memory_present_with_active_regions(%d)\n",
1375 nid);
1376 sparse_memory_present_with_active_regions(nid);
996 } 1377 }
1378}
1379
1380static unsigned long __init bootmem_init(unsigned long phys_base)
1381{
1382 unsigned long end_pfn;
1383 int nid;
1384
1385 end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
1386 max_pfn = max_low_pfn = end_pfn;
1387 min_low_pfn = (phys_base >> PAGE_SHIFT);
1388
1389 if (bootmem_init_numa() < 0)
1390 bootmem_init_nonnuma();
1391
1392 /* XXX cpu notifier XXX */
1393
1394 for_each_online_node(nid)
1395 bootmem_init_one_node(nid);
997 1396
998 sparse_init(); 1397 sparse_init();
999 1398
@@ -1289,7 +1688,7 @@ void __init setup_per_cpu_areas(void)
1289 1688
1290void __init paging_init(void) 1689void __init paging_init(void)
1291{ 1690{
1292 unsigned long end_pfn, pages_avail, shift, phys_base; 1691 unsigned long end_pfn, shift, phys_base;
1293 unsigned long real_end, i; 1692 unsigned long real_end, i;
1294 1693
1295 /* These build time checkes make sure that the dcache_dirty_cpu() 1694 /* These build time checkes make sure that the dcache_dirty_cpu()
@@ -1330,12 +1729,26 @@ void __init paging_init(void)
1330 sun4v_ktsb_init(); 1729 sun4v_ktsb_init();
1331 } 1730 }
1332 1731
1732 lmb_init();
1733
1333 /* Find available physical memory... */ 1734 /* Find available physical memory... */
1334 read_obp_memory("available", &pavail[0], &pavail_ents); 1735 read_obp_memory("available", &pavail[0], &pavail_ents);
1335 1736
1336 phys_base = 0xffffffffffffffffUL; 1737 phys_base = 0xffffffffffffffffUL;
1337 for (i = 0; i < pavail_ents; i++) 1738 for (i = 0; i < pavail_ents; i++) {
1338 phys_base = min(phys_base, pavail[i].phys_addr); 1739 phys_base = min(phys_base, pavail[i].phys_addr);
1740 lmb_add(pavail[i].phys_addr, pavail[i].reg_size);
1741 }
1742
1743 lmb_reserve(kern_base, kern_size);
1744
1745 find_ramdisk(phys_base);
1746
1747 if (cmdline_memory_size)
1748 lmb_enforce_memory_limit(phys_base + cmdline_memory_size);
1749
1750 lmb_analyze();
1751 lmb_dump_all();
1339 1752
1340 set_bit(0, mmu_context_bmap); 1753 set_bit(0, mmu_context_bmap);
1341 1754
@@ -1371,14 +1784,10 @@ void __init paging_init(void)
1371 if (tlb_type == hypervisor) 1784 if (tlb_type == hypervisor)
1372 sun4v_ktsb_register(); 1785 sun4v_ktsb_register();
1373 1786
1374 /* Setup bootmem... */ 1787 /* We must setup the per-cpu areas before we pull in the
1375 pages_avail = 0; 1788 * PROM and the MDESC. The code there fills in cpu and
1376 last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base); 1789 * other information into per-cpu data structures.
1377 1790 */
1378 max_mapnr = last_valid_pfn;
1379
1380 kernel_physical_mapping_init();
1381
1382 real_setup_per_cpu_areas(); 1791 real_setup_per_cpu_areas();
1383 1792
1384 prom_build_devicetree(); 1793 prom_build_devicetree();
@@ -1386,20 +1795,22 @@ void __init paging_init(void)
1386 if (tlb_type == hypervisor) 1795 if (tlb_type == hypervisor)
1387 sun4v_mdesc_init(); 1796 sun4v_mdesc_init();
1388 1797
1798 /* Setup bootmem... */
1799 last_valid_pfn = end_pfn = bootmem_init(phys_base);
1800
1801#ifndef CONFIG_NEED_MULTIPLE_NODES
1802 max_mapnr = last_valid_pfn;
1803#endif
1804 kernel_physical_mapping_init();
1805
1389 { 1806 {
1390 unsigned long zones_size[MAX_NR_ZONES]; 1807 unsigned long max_zone_pfns[MAX_NR_ZONES];
1391 unsigned long zholes_size[MAX_NR_ZONES];
1392 int znum;
1393 1808
1394 for (znum = 0; znum < MAX_NR_ZONES; znum++) 1809 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
1395 zones_size[znum] = zholes_size[znum] = 0;
1396 1810
1397 zones_size[ZONE_NORMAL] = end_pfn; 1811 max_zone_pfns[ZONE_NORMAL] = end_pfn;
1398 zholes_size[ZONE_NORMAL] = end_pfn - pages_avail;
1399 1812
1400 free_area_init_node(0, &contig_page_data, zones_size, 1813 free_area_init_nodes(max_zone_pfns);
1401 __pa(PAGE_OFFSET) >> PAGE_SHIFT,
1402 zholes_size);
1403 } 1814 }
1404 1815
1405 printk("Booting Linux...\n"); 1816 printk("Booting Linux...\n");
@@ -1408,21 +1819,52 @@ void __init paging_init(void)
1408 cpu_probe(); 1819 cpu_probe();
1409} 1820}
1410 1821
1411static void __init taint_real_pages(void) 1822int __init page_in_phys_avail(unsigned long paddr)
1823{
1824 int i;
1825
1826 paddr &= PAGE_MASK;
1827
1828 for (i = 0; i < pavail_ents; i++) {
1829 unsigned long start, end;
1830
1831 start = pavail[i].phys_addr;
1832 end = start + pavail[i].reg_size;
1833
1834 if (paddr >= start && paddr < end)
1835 return 1;
1836 }
1837 if (paddr >= kern_base && paddr < (kern_base + kern_size))
1838 return 1;
1839#ifdef CONFIG_BLK_DEV_INITRD
1840 if (paddr >= __pa(initrd_start) &&
1841 paddr < __pa(PAGE_ALIGN(initrd_end)))
1842 return 1;
1843#endif
1844
1845 return 0;
1846}
1847
1848static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
1849static int pavail_rescan_ents __initdata;
1850
1851/* Certain OBP calls, such as fetching "available" properties, can
1852 * claim physical memory. So, along with initializing the valid
1853 * address bitmap, what we do here is refetch the physical available
1854 * memory list again, and make sure it provides at least as much
1855 * memory as 'pavail' does.
1856 */
1857static void setup_valid_addr_bitmap_from_pavail(void)
1412{ 1858{
1413 int i; 1859 int i;
1414 1860
1415 read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); 1861 read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents);
1416 1862
1417 /* Find changes discovered in the physmem available rescan and
1418 * reserve the lost portions in the bootmem maps.
1419 */
1420 for (i = 0; i < pavail_ents; i++) { 1863 for (i = 0; i < pavail_ents; i++) {
1421 unsigned long old_start, old_end; 1864 unsigned long old_start, old_end;
1422 1865
1423 old_start = pavail[i].phys_addr; 1866 old_start = pavail[i].phys_addr;
1424 old_end = old_start + 1867 old_end = old_start + pavail[i].reg_size;
1425 pavail[i].reg_size;
1426 while (old_start < old_end) { 1868 while (old_start < old_end) {
1427 int n; 1869 int n;
1428 1870
@@ -1440,7 +1882,16 @@ static void __init taint_real_pages(void)
1440 goto do_next_page; 1882 goto do_next_page;
1441 } 1883 }
1442 } 1884 }
1443 reserve_bootmem(old_start, PAGE_SIZE, BOOTMEM_DEFAULT); 1885
1886 prom_printf("mem_init: Lost memory in pavail\n");
1887 prom_printf("mem_init: OLD start[%lx] size[%lx]\n",
1888 pavail[i].phys_addr,
1889 pavail[i].reg_size);
1890 prom_printf("mem_init: NEW start[%lx] size[%lx]\n",
1891 pavail_rescan[i].phys_addr,
1892 pavail_rescan[i].reg_size);
1893 prom_printf("mem_init: Cannot continue, aborting.\n");
1894 prom_halt();
1444 1895
1445 do_next_page: 1896 do_next_page:
1446 old_start += PAGE_SIZE; 1897 old_start += PAGE_SIZE;
@@ -1448,32 +1899,6 @@ static void __init taint_real_pages(void)
1448 } 1899 }
1449} 1900}
1450 1901
1451int __init page_in_phys_avail(unsigned long paddr)
1452{
1453 int i;
1454
1455 paddr &= PAGE_MASK;
1456
1457 for (i = 0; i < pavail_rescan_ents; i++) {
1458 unsigned long start, end;
1459
1460 start = pavail_rescan[i].phys_addr;
1461 end = start + pavail_rescan[i].reg_size;
1462
1463 if (paddr >= start && paddr < end)
1464 return 1;
1465 }
1466 if (paddr >= kern_base && paddr < (kern_base + kern_size))
1467 return 1;
1468#ifdef CONFIG_BLK_DEV_INITRD
1469 if (paddr >= __pa(initrd_start) &&
1470 paddr < __pa(PAGE_ALIGN(initrd_end)))
1471 return 1;
1472#endif
1473
1474 return 0;
1475}
1476
1477void __init mem_init(void) 1902void __init mem_init(void)
1478{ 1903{
1479 unsigned long codepages, datapages, initpages; 1904 unsigned long codepages, datapages, initpages;
@@ -1496,14 +1921,26 @@ void __init mem_init(void)
1496 addr += PAGE_SIZE; 1921 addr += PAGE_SIZE;
1497 } 1922 }
1498 1923
1499 taint_real_pages(); 1924 setup_valid_addr_bitmap_from_pavail();
1500 1925
1501 high_memory = __va(last_valid_pfn << PAGE_SHIFT); 1926 high_memory = __va(last_valid_pfn << PAGE_SHIFT);
1502 1927
1928#ifdef CONFIG_NEED_MULTIPLE_NODES
1929 for_each_online_node(i) {
1930 if (NODE_DATA(i)->node_spanned_pages != 0) {
1931 totalram_pages +=
1932 free_all_bootmem_node(NODE_DATA(i));
1933 }
1934 }
1935#else
1936 totalram_pages = free_all_bootmem();
1937#endif
1938
1503 /* We subtract one to account for the mem_map_zero page 1939 /* We subtract one to account for the mem_map_zero page
1504 * allocated below. 1940 * allocated below.
1505 */ 1941 */
1506 totalram_pages = num_physpages = free_all_bootmem() - 1; 1942 totalram_pages -= 1;
1943 num_physpages = totalram_pages;
1507 1944
1508 /* 1945 /*
1509 * Set up the zero page, mark it reserved, so that page count 1946 * Set up the zero page, mark it reserved, so that page count
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index a3e6e4b635b3..fe70c8a557b5 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -321,7 +321,8 @@ retry_tsb_alloc:
321 if (new_size > (PAGE_SIZE * 2)) 321 if (new_size > (PAGE_SIZE * 2))
322 gfp_flags = __GFP_NOWARN | __GFP_NORETRY; 322 gfp_flags = __GFP_NOWARN | __GFP_NORETRY;
323 323
324 new_tsb = kmem_cache_alloc(tsb_caches[new_cache_index], gfp_flags); 324 new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
325 gfp_flags, numa_node_id());
325 if (unlikely(!new_tsb)) { 326 if (unlikely(!new_tsb)) {
326 /* Not being able to fork due to a high-order TSB 327 /* Not being able to fork due to a high-order TSB
327 * allocation failure is very bad behavior. Just back 328 * allocation failure is very bad behavior. Just back
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index 2865c105b6a4..e686a67561af 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -476,7 +476,6 @@ xcall_sync_tick:
476#endif 476#endif
477 call smp_synchronize_tick_client 477 call smp_synchronize_tick_client
478 nop 478 nop
479 clr %l6
480 b rtrap_xcall 479 b rtrap_xcall
481 ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 480 ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
482 481
@@ -511,7 +510,6 @@ xcall_report_regs:
511#endif 510#endif
512 call __show_regs 511 call __show_regs
513 add %sp, PTREGS_OFF, %o0 512 add %sp, PTREGS_OFF, %o0
514 clr %l6
515 /* Has to be a non-v9 branch due to the large distance. */ 513 /* Has to be a non-v9 branch due to the large distance. */
516 b rtrap_xcall 514 b rtrap_xcall
517 ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 515 ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
@@ -576,7 +574,7 @@ __hypervisor_tlb_xcall_error:
576 mov %l4, %o0 574 mov %l4, %o0
577 call hypervisor_tlbop_error_xcall 575 call hypervisor_tlbop_error_xcall
578 mov %l5, %o1 576 mov %l5, %o1
579 ba,a,pt %xcc, rtrap_clr_l6 577 ba,a,pt %xcc, rtrap
580 578
581 .globl __hypervisor_xcall_flush_tlb_mm 579 .globl __hypervisor_xcall_flush_tlb_mm
582__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ 580__hypervisor_xcall_flush_tlb_mm: /* 21 insns */