diff options
author | David S. Miller <davem@davemloft.net> | 2008-04-23 08:40:25 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-04-24 02:32:17 -0400 |
commit | 919ee677b656c52c5f86d3d916786891220d5452 (patch) | |
tree | dd1202209945b4c2529af074effdb7300edda684 | |
parent | 1f261ef53ba06658dfeb5a9c3007d0ad1b85cadf (diff) |
[SPARC64]: Add NUMA support.
Currently there is only code to parse NUMA attributes on
sun4v/niagara systems, but later on we will add such parsing
for older systems.
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | arch/sparc64/Kconfig | 20 | ||||
-rw-r--r-- | arch/sparc64/defconfig | 99 | ||||
-rw-r--r-- | arch/sparc64/kernel/sysfs.c | 12 | ||||
-rw-r--r-- | arch/sparc64/mm/init.c | 796 | ||||
-rw-r--r-- | include/asm-sparc64/mmzone.h | 17 | ||||
-rw-r--r-- | include/asm-sparc64/topology.h | 73 |
7 files changed, 881 insertions, 138 deletions
@@ -1,7 +1,7 @@ | |||
1 | VERSION = 2 | 1 | VERSION = 2 |
2 | PATCHLEVEL = 6 | 2 | PATCHLEVEL = 6 |
3 | SUBLEVEL = 25 | 3 | SUBLEVEL = 25 |
4 | EXTRAVERSION = | 4 | EXTRAVERSION = -numa |
5 | NAME = Funky Weasel is Jiggy wit it | 5 | NAME = Funky Weasel is Jiggy wit it |
6 | 6 | ||
7 | # *DOCUMENTATION* | 7 | # *DOCUMENTATION* |
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index df3eacb5ca15..8acc5cc38621 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig | |||
@@ -250,6 +250,26 @@ endchoice | |||
250 | 250 | ||
251 | endmenu | 251 | endmenu |
252 | 252 | ||
253 | config NUMA | ||
254 | bool "NUMA support" | ||
255 | |||
256 | config NODES_SHIFT | ||
257 | int | ||
258 | default "4" | ||
259 | depends on NEED_MULTIPLE_NODES | ||
260 | |||
261 | # Some NUMA nodes have memory ranges that span | ||
262 | # other nodes. Even though a pfn is valid and | ||
263 | # between a node's start and end pfns, it may not | ||
264 | # reside on that node. See memmap_init_zone() | ||
265 | # for details. | ||
266 | config NODES_SPAN_OTHER_NODES | ||
267 | def_bool y | ||
268 | depends on NEED_MULTIPLE_NODES | ||
269 | |||
270 | config ARCH_POPULATES_NODE_MAP | ||
271 | def_bool y | ||
272 | |||
253 | config ARCH_SELECT_MEMORY_MODEL | 273 | config ARCH_SELECT_MEMORY_MODEL |
254 | def_bool y | 274 | def_bool y |
255 | 275 | ||
diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index e1835868ad36..92f79680f70d 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig | |||
@@ -1,7 +1,7 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.25 | 3 | # Linux kernel version: 2.6.25-numa |
4 | # Sun Apr 20 01:33:21 2008 | 4 | # Wed Apr 23 04:49:08 2008 |
5 | # | 5 | # |
6 | CONFIG_SPARC=y | 6 | CONFIG_SPARC=y |
7 | CONFIG_SPARC64=y | 7 | CONFIG_SPARC64=y |
@@ -152,6 +152,8 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y | |||
152 | CONFIG_HUGETLB_PAGE_SIZE_4MB=y | 152 | CONFIG_HUGETLB_PAGE_SIZE_4MB=y |
153 | # CONFIG_HUGETLB_PAGE_SIZE_512K is not set | 153 | # CONFIG_HUGETLB_PAGE_SIZE_512K is not set |
154 | # CONFIG_HUGETLB_PAGE_SIZE_64K is not set | 154 | # CONFIG_HUGETLB_PAGE_SIZE_64K is not set |
155 | # CONFIG_NUMA is not set | ||
156 | CONFIG_ARCH_POPULATES_NODE_MAP=y | ||
155 | CONFIG_ARCH_SELECT_MEMORY_MODEL=y | 157 | CONFIG_ARCH_SELECT_MEMORY_MODEL=y |
156 | CONFIG_ARCH_SPARSEMEM_ENABLE=y | 158 | CONFIG_ARCH_SPARSEMEM_ENABLE=y |
157 | CONFIG_ARCH_SPARSEMEM_DEFAULT=y | 159 | CONFIG_ARCH_SPARSEMEM_DEFAULT=y |
@@ -787,7 +789,6 @@ CONFIG_I2C_ALGOBIT=y | |||
787 | # CONFIG_SENSORS_PCF8574 is not set | 789 | # CONFIG_SENSORS_PCF8574 is not set |
788 | # CONFIG_PCF8575 is not set | 790 | # CONFIG_PCF8575 is not set |
789 | # CONFIG_SENSORS_PCF8591 is not set | 791 | # CONFIG_SENSORS_PCF8591 is not set |
790 | # CONFIG_TPS65010 is not set | ||
791 | # CONFIG_SENSORS_MAX6875 is not set | 792 | # CONFIG_SENSORS_MAX6875 is not set |
792 | # CONFIG_SENSORS_TSL2550 is not set | 793 | # CONFIG_SENSORS_TSL2550 is not set |
793 | # CONFIG_I2C_DEBUG_CORE is not set | 794 | # CONFIG_I2C_DEBUG_CORE is not set |
@@ -869,6 +870,7 @@ CONFIG_SSB_POSSIBLE=y | |||
869 | # Multifunction device drivers | 870 | # Multifunction device drivers |
870 | # | 871 | # |
871 | # CONFIG_MFD_SM501 is not set | 872 | # CONFIG_MFD_SM501 is not set |
873 | # CONFIG_HTC_PASIC3 is not set | ||
872 | 874 | ||
873 | # | 875 | # |
874 | # Multimedia devices | 876 | # Multimedia devices |
@@ -1219,10 +1221,6 @@ CONFIG_USB_STORAGE=m | |||
1219 | # CONFIG_NEW_LEDS is not set | 1221 | # CONFIG_NEW_LEDS is not set |
1220 | # CONFIG_INFINIBAND is not set | 1222 | # CONFIG_INFINIBAND is not set |
1221 | # CONFIG_RTC_CLASS is not set | 1223 | # CONFIG_RTC_CLASS is not set |
1222 | |||
1223 | # | ||
1224 | # Userspace I/O | ||
1225 | # | ||
1226 | # CONFIG_UIO is not set | 1224 | # CONFIG_UIO is not set |
1227 | 1225 | ||
1228 | # | 1226 | # |
@@ -1399,6 +1397,7 @@ CONFIG_SCHEDSTATS=y | |||
1399 | CONFIG_DEBUG_BUGVERBOSE=y | 1397 | CONFIG_DEBUG_BUGVERBOSE=y |
1400 | # CONFIG_DEBUG_INFO is not set | 1398 | # CONFIG_DEBUG_INFO is not set |
1401 | # CONFIG_DEBUG_VM is not set | 1399 | # CONFIG_DEBUG_VM is not set |
1400 | # CONFIG_DEBUG_WRITECOUNT is not set | ||
1402 | # CONFIG_DEBUG_LIST is not set | 1401 | # CONFIG_DEBUG_LIST is not set |
1403 | # CONFIG_DEBUG_SG is not set | 1402 | # CONFIG_DEBUG_SG is not set |
1404 | # CONFIG_BOOT_PRINTK_DELAY is not set | 1403 | # CONFIG_BOOT_PRINTK_DELAY is not set |
@@ -1425,53 +1424,82 @@ CONFIG_ASYNC_CORE=m | |||
1425 | CONFIG_ASYNC_MEMCPY=m | 1424 | CONFIG_ASYNC_MEMCPY=m |
1426 | CONFIG_ASYNC_XOR=m | 1425 | CONFIG_ASYNC_XOR=m |
1427 | CONFIG_CRYPTO=y | 1426 | CONFIG_CRYPTO=y |
1427 | |||
1428 | # | ||
1429 | # Crypto core or helper | ||
1430 | # | ||
1428 | CONFIG_CRYPTO_ALGAPI=y | 1431 | CONFIG_CRYPTO_ALGAPI=y |
1429 | CONFIG_CRYPTO_AEAD=y | 1432 | CONFIG_CRYPTO_AEAD=y |
1430 | CONFIG_CRYPTO_BLKCIPHER=y | 1433 | CONFIG_CRYPTO_BLKCIPHER=y |
1431 | # CONFIG_CRYPTO_SEQIV is not set | ||
1432 | CONFIG_CRYPTO_HASH=y | 1434 | CONFIG_CRYPTO_HASH=y |
1433 | CONFIG_CRYPTO_MANAGER=y | 1435 | CONFIG_CRYPTO_MANAGER=y |
1436 | CONFIG_CRYPTO_GF128MUL=m | ||
1437 | CONFIG_CRYPTO_NULL=m | ||
1438 | # CONFIG_CRYPTO_CRYPTD is not set | ||
1439 | CONFIG_CRYPTO_AUTHENC=y | ||
1440 | CONFIG_CRYPTO_TEST=m | ||
1441 | |||
1442 | # | ||
1443 | # Authenticated Encryption with Associated Data | ||
1444 | # | ||
1445 | # CONFIG_CRYPTO_CCM is not set | ||
1446 | # CONFIG_CRYPTO_GCM is not set | ||
1447 | # CONFIG_CRYPTO_SEQIV is not set | ||
1448 | |||
1449 | # | ||
1450 | # Block modes | ||
1451 | # | ||
1452 | CONFIG_CRYPTO_CBC=y | ||
1453 | # CONFIG_CRYPTO_CTR is not set | ||
1454 | # CONFIG_CRYPTO_CTS is not set | ||
1455 | CONFIG_CRYPTO_ECB=m | ||
1456 | CONFIG_CRYPTO_LRW=m | ||
1457 | CONFIG_CRYPTO_PCBC=m | ||
1458 | CONFIG_CRYPTO_XTS=m | ||
1459 | |||
1460 | # | ||
1461 | # Hash modes | ||
1462 | # | ||
1434 | CONFIG_CRYPTO_HMAC=y | 1463 | CONFIG_CRYPTO_HMAC=y |
1435 | CONFIG_CRYPTO_XCBC=y | 1464 | CONFIG_CRYPTO_XCBC=y |
1436 | CONFIG_CRYPTO_NULL=m | 1465 | |
1466 | # | ||
1467 | # Digest | ||
1468 | # | ||
1469 | CONFIG_CRYPTO_CRC32C=m | ||
1437 | CONFIG_CRYPTO_MD4=y | 1470 | CONFIG_CRYPTO_MD4=y |
1438 | CONFIG_CRYPTO_MD5=y | 1471 | CONFIG_CRYPTO_MD5=y |
1472 | CONFIG_CRYPTO_MICHAEL_MIC=m | ||
1439 | CONFIG_CRYPTO_SHA1=y | 1473 | CONFIG_CRYPTO_SHA1=y |
1440 | CONFIG_CRYPTO_SHA256=m | 1474 | CONFIG_CRYPTO_SHA256=m |
1441 | CONFIG_CRYPTO_SHA512=m | 1475 | CONFIG_CRYPTO_SHA512=m |
1442 | CONFIG_CRYPTO_WP512=m | ||
1443 | CONFIG_CRYPTO_TGR192=m | 1476 | CONFIG_CRYPTO_TGR192=m |
1444 | CONFIG_CRYPTO_GF128MUL=m | 1477 | CONFIG_CRYPTO_WP512=m |
1445 | CONFIG_CRYPTO_ECB=m | 1478 | |
1446 | CONFIG_CRYPTO_CBC=y | 1479 | # |
1447 | CONFIG_CRYPTO_PCBC=m | 1480 | # Ciphers |
1448 | CONFIG_CRYPTO_LRW=m | 1481 | # |
1449 | CONFIG_CRYPTO_XTS=m | ||
1450 | # CONFIG_CRYPTO_CTR is not set | ||
1451 | # CONFIG_CRYPTO_GCM is not set | ||
1452 | # CONFIG_CRYPTO_CCM is not set | ||
1453 | # CONFIG_CRYPTO_CRYPTD is not set | ||
1454 | CONFIG_CRYPTO_DES=y | ||
1455 | CONFIG_CRYPTO_FCRYPT=m | ||
1456 | CONFIG_CRYPTO_BLOWFISH=m | ||
1457 | CONFIG_CRYPTO_TWOFISH=m | ||
1458 | CONFIG_CRYPTO_TWOFISH_COMMON=m | ||
1459 | CONFIG_CRYPTO_SERPENT=m | ||
1460 | CONFIG_CRYPTO_AES=m | 1482 | CONFIG_CRYPTO_AES=m |
1483 | CONFIG_CRYPTO_ANUBIS=m | ||
1484 | CONFIG_CRYPTO_ARC4=m | ||
1485 | CONFIG_CRYPTO_BLOWFISH=m | ||
1486 | CONFIG_CRYPTO_CAMELLIA=m | ||
1461 | CONFIG_CRYPTO_CAST5=m | 1487 | CONFIG_CRYPTO_CAST5=m |
1462 | CONFIG_CRYPTO_CAST6=m | 1488 | CONFIG_CRYPTO_CAST6=m |
1463 | CONFIG_CRYPTO_TEA=m | 1489 | CONFIG_CRYPTO_DES=y |
1464 | CONFIG_CRYPTO_ARC4=m | 1490 | CONFIG_CRYPTO_FCRYPT=m |
1465 | CONFIG_CRYPTO_KHAZAD=m | 1491 | CONFIG_CRYPTO_KHAZAD=m |
1466 | CONFIG_CRYPTO_ANUBIS=m | ||
1467 | CONFIG_CRYPTO_SEED=m | ||
1468 | # CONFIG_CRYPTO_SALSA20 is not set | 1492 | # CONFIG_CRYPTO_SALSA20 is not set |
1493 | CONFIG_CRYPTO_SEED=m | ||
1494 | CONFIG_CRYPTO_SERPENT=m | ||
1495 | CONFIG_CRYPTO_TEA=m | ||
1496 | CONFIG_CRYPTO_TWOFISH=m | ||
1497 | CONFIG_CRYPTO_TWOFISH_COMMON=m | ||
1498 | |||
1499 | # | ||
1500 | # Compression | ||
1501 | # | ||
1469 | CONFIG_CRYPTO_DEFLATE=y | 1502 | CONFIG_CRYPTO_DEFLATE=y |
1470 | CONFIG_CRYPTO_MICHAEL_MIC=m | ||
1471 | CONFIG_CRYPTO_CRC32C=m | ||
1472 | CONFIG_CRYPTO_CAMELLIA=m | ||
1473 | CONFIG_CRYPTO_TEST=m | ||
1474 | CONFIG_CRYPTO_AUTHENC=y | ||
1475 | # CONFIG_CRYPTO_LZO is not set | 1503 | # CONFIG_CRYPTO_LZO is not set |
1476 | CONFIG_CRYPTO_HW=y | 1504 | CONFIG_CRYPTO_HW=y |
1477 | # CONFIG_CRYPTO_DEV_HIFN_795X is not set | 1505 | # CONFIG_CRYPTO_DEV_HIFN_795X is not set |
@@ -1492,3 +1520,4 @@ CONFIG_PLIST=y | |||
1492 | CONFIG_HAS_IOMEM=y | 1520 | CONFIG_HAS_IOMEM=y |
1493 | CONFIG_HAS_IOPORT=y | 1521 | CONFIG_HAS_IOPORT=y |
1494 | CONFIG_HAS_DMA=y | 1522 | CONFIG_HAS_DMA=y |
1523 | CONFIG_HAVE_LMB=y | ||
diff --git a/arch/sparc64/kernel/sysfs.c b/arch/sparc64/kernel/sysfs.c index 52816c7be0b9..e885034a6b73 100644 --- a/arch/sparc64/kernel/sysfs.c +++ b/arch/sparc64/kernel/sysfs.c | |||
@@ -273,10 +273,22 @@ static void __init check_mmu_stats(void) | |||
273 | mmu_stats_supported = 1; | 273 | mmu_stats_supported = 1; |
274 | } | 274 | } |
275 | 275 | ||
276 | static void register_nodes(void) | ||
277 | { | ||
278 | #ifdef CONFIG_NUMA | ||
279 | int i; | ||
280 | |||
281 | for (i = 0; i < MAX_NUMNODES; i++) | ||
282 | register_one_node(i); | ||
283 | #endif | ||
284 | } | ||
285 | |||
276 | static int __init topology_init(void) | 286 | static int __init topology_init(void) |
277 | { | 287 | { |
278 | int cpu; | 288 | int cpu; |
279 | 289 | ||
290 | register_nodes(); | ||
291 | |||
280 | check_mmu_stats(); | 292 | check_mmu_stats(); |
281 | 293 | ||
282 | register_cpu_notifier(&sysfs_cpu_nb); | 294 | register_cpu_notifier(&sysfs_cpu_nb); |
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 8e0e86787127..177d8aaeec42 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/sort.h> | 25 | #include <linux/sort.h> |
26 | #include <linux/percpu.h> | 26 | #include <linux/percpu.h> |
27 | #include <linux/lmb.h> | 27 | #include <linux/lmb.h> |
28 | #include <linux/mmzone.h> | ||
28 | 29 | ||
29 | #include <asm/head.h> | 30 | #include <asm/head.h> |
30 | #include <asm/system.h> | 31 | #include <asm/system.h> |
@@ -73,9 +74,7 @@ extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; | |||
73 | #define MAX_BANKS 32 | 74 | #define MAX_BANKS 32 |
74 | 75 | ||
75 | static struct linux_prom64_registers pavail[MAX_BANKS] __initdata; | 76 | static struct linux_prom64_registers pavail[MAX_BANKS] __initdata; |
76 | static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; | ||
77 | static int pavail_ents __initdata; | 77 | static int pavail_ents __initdata; |
78 | static int pavail_rescan_ents __initdata; | ||
79 | 78 | ||
80 | static int cmp_p64(const void *a, const void *b) | 79 | static int cmp_p64(const void *a, const void *b) |
81 | { | 80 | { |
@@ -716,19 +715,28 @@ out: | |||
716 | smp_new_mmu_context_version(); | 715 | smp_new_mmu_context_version(); |
717 | } | 716 | } |
718 | 717 | ||
719 | /* Find a free area for the bootmem map, avoiding the kernel image | 718 | static int numa_enabled = 1; |
720 | * and the initial ramdisk. | 719 | static int numa_debug; |
721 | */ | 720 | |
722 | static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn, | 721 | static int __init early_numa(char *p) |
723 | unsigned long end_pfn) | ||
724 | { | 722 | { |
725 | unsigned long bootmap_size; | 723 | if (!p) |
724 | return 0; | ||
725 | |||
726 | if (strstr(p, "off")) | ||
727 | numa_enabled = 0; | ||
726 | 728 | ||
727 | bootmap_size = bootmem_bootmap_pages(end_pfn - start_pfn); | 729 | if (strstr(p, "debug")) |
728 | bootmap_size <<= PAGE_SHIFT; | 730 | numa_debug = 1; |
729 | 731 | ||
730 | return lmb_alloc(bootmap_size, PAGE_SIZE) >> PAGE_SHIFT; | 732 | return 0; |
731 | } | 733 | } |
734 | early_param("numa", early_numa); | ||
735 | |||
736 | #define numadbg(f, a...) \ | ||
737 | do { if (numa_debug) \ | ||
738 | printk(KERN_INFO f, ## a); \ | ||
739 | } while (0) | ||
732 | 740 | ||
733 | static void __init find_ramdisk(unsigned long phys_base) | 741 | static void __init find_ramdisk(unsigned long phys_base) |
734 | { | 742 | { |
@@ -755,6 +763,9 @@ static void __init find_ramdisk(unsigned long phys_base) | |||
755 | ramdisk_image -= KERNBASE; | 763 | ramdisk_image -= KERNBASE; |
756 | ramdisk_image += phys_base; | 764 | ramdisk_image += phys_base; |
757 | 765 | ||
766 | numadbg("Found ramdisk at physical address 0x%lx, size %u\n", | ||
767 | ramdisk_image, sparc_ramdisk_size); | ||
768 | |||
758 | initrd_start = ramdisk_image; | 769 | initrd_start = ramdisk_image; |
759 | initrd_end = ramdisk_image + sparc_ramdisk_size; | 770 | initrd_end = ramdisk_image + sparc_ramdisk_size; |
760 | 771 | ||
@@ -763,60 +774,625 @@ static void __init find_ramdisk(unsigned long phys_base) | |||
763 | #endif | 774 | #endif |
764 | } | 775 | } |
765 | 776 | ||
766 | /* About pages_avail, this is the value we will use to calculate | 777 | struct node_mem_mask { |
767 | * the zholes_size[] argument given to free_area_init_node(). The | 778 | unsigned long mask; |
768 | * page allocator uses this to calculate nr_kernel_pages, | 779 | unsigned long val; |
769 | * nr_all_pages and zone->present_pages. On NUMA it is used | 780 | unsigned long bootmem_paddr; |
770 | * to calculate zone->min_unmapped_pages and zone->min_slab_pages. | 781 | }; |
771 | * | 782 | static struct node_mem_mask node_masks[MAX_NUMNODES]; |
772 | * So this number should really be set to what the page allocator | 783 | static int num_node_masks; |
773 | * actually ends up with. This means: | 784 | |
774 | * 1) It should include bootmem map pages, we'll release those. | 785 | int numa_cpu_lookup_table[NR_CPUS]; |
775 | * 2) It should not include the kernel image, except for the | 786 | cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; |
776 | * __init sections which we will also release. | 787 | |
777 | * 3) It should include the initrd image, since we'll release | 788 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
778 | * that too. | 789 | static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; |
790 | |||
791 | struct mdesc_mblock { | ||
792 | u64 base; | ||
793 | u64 size; | ||
794 | u64 offset; /* RA-to-PA */ | ||
795 | }; | ||
796 | static struct mdesc_mblock *mblocks; | ||
797 | static int num_mblocks; | ||
798 | |||
799 | static unsigned long ra_to_pa(unsigned long addr) | ||
800 | { | ||
801 | int i; | ||
802 | |||
803 | for (i = 0; i < num_mblocks; i++) { | ||
804 | struct mdesc_mblock *m = &mblocks[i]; | ||
805 | |||
806 | if (addr >= m->base && | ||
807 | addr < (m->base + m->size)) { | ||
808 | addr += m->offset; | ||
809 | break; | ||
810 | } | ||
811 | } | ||
812 | return addr; | ||
813 | } | ||
814 | |||
815 | static int find_node(unsigned long addr) | ||
816 | { | ||
817 | int i; | ||
818 | |||
819 | addr = ra_to_pa(addr); | ||
820 | for (i = 0; i < num_node_masks; i++) { | ||
821 | struct node_mem_mask *p = &node_masks[i]; | ||
822 | |||
823 | if ((addr & p->mask) == p->val) | ||
824 | return i; | ||
825 | } | ||
826 | return -1; | ||
827 | } | ||
828 | |||
829 | static unsigned long nid_range(unsigned long start, unsigned long end, | ||
830 | int *nid) | ||
831 | { | ||
832 | *nid = find_node(start); | ||
833 | start += PAGE_SIZE; | ||
834 | while (start < end) { | ||
835 | int n = find_node(start); | ||
836 | |||
837 | if (n != *nid) | ||
838 | break; | ||
839 | start += PAGE_SIZE; | ||
840 | } | ||
841 | |||
842 | return start; | ||
843 | } | ||
844 | #else | ||
845 | static unsigned long nid_range(unsigned long start, unsigned long end, | ||
846 | int *nid) | ||
847 | { | ||
848 | *nid = 0; | ||
849 | return end; | ||
850 | } | ||
851 | #endif | ||
852 | |||
853 | /* This must be invoked after performing all of the necessary | ||
854 | * add_active_range() calls for 'nid'. We need to be able to get | ||
855 | * correct data from get_pfn_range_for_nid(). | ||
779 | */ | 856 | */ |
780 | static unsigned long __init bootmem_init(unsigned long *pages_avail, | 857 | static void __init allocate_node_data(int nid) |
781 | unsigned long phys_base) | 858 | { |
859 | unsigned long paddr, num_pages, start_pfn, end_pfn; | ||
860 | struct pglist_data *p; | ||
861 | |||
862 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
863 | paddr = lmb_alloc_nid(sizeof(struct pglist_data), | ||
864 | SMP_CACHE_BYTES, nid, nid_range); | ||
865 | if (!paddr) { | ||
866 | prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); | ||
867 | prom_halt(); | ||
868 | } | ||
869 | NODE_DATA(nid) = __va(paddr); | ||
870 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | ||
871 | |||
872 | NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; | ||
873 | #endif | ||
874 | |||
875 | p = NODE_DATA(nid); | ||
876 | |||
877 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); | ||
878 | p->node_start_pfn = start_pfn; | ||
879 | p->node_spanned_pages = end_pfn - start_pfn; | ||
880 | |||
881 | if (p->node_spanned_pages) { | ||
882 | num_pages = bootmem_bootmap_pages(p->node_spanned_pages); | ||
883 | |||
884 | paddr = lmb_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid, | ||
885 | nid_range); | ||
886 | if (!paddr) { | ||
887 | prom_printf("Cannot allocate bootmap for nid[%d]\n", | ||
888 | nid); | ||
889 | prom_halt(); | ||
890 | } | ||
891 | node_masks[nid].bootmem_paddr = paddr; | ||
892 | } | ||
893 | } | ||
894 | |||
895 | static void init_node_masks_nonnuma(void) | ||
782 | { | 896 | { |
783 | unsigned long end_pfn; | ||
784 | int i; | 897 | int i; |
785 | 898 | ||
786 | *pages_avail = lmb_phys_mem_size() >> PAGE_SHIFT; | 899 | numadbg("Initializing tables for non-numa.\n"); |
787 | end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; | ||
788 | 900 | ||
789 | /* Initialize the boot-time allocator. */ | 901 | node_masks[0].mask = node_masks[0].val = 0; |
790 | max_pfn = max_low_pfn = end_pfn; | 902 | num_node_masks = 1; |
791 | min_low_pfn = (phys_base >> PAGE_SHIFT); | ||
792 | 903 | ||
793 | init_bootmem_node(NODE_DATA(0), | 904 | for (i = 0; i < NR_CPUS; i++) |
794 | choose_bootmap_pfn(min_low_pfn, end_pfn), | 905 | numa_cpu_lookup_table[i] = 0; |
795 | min_low_pfn, end_pfn); | ||
796 | 906 | ||
797 | /* Now register the available physical memory with the | 907 | numa_cpumask_lookup_table[0] = CPU_MASK_ALL; |
798 | * allocator. | 908 | } |
799 | */ | 909 | |
800 | for (i = 0; i < lmb.memory.cnt; i++) | 910 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
801 | free_bootmem(lmb.memory.region[i].base, | 911 | struct pglist_data *node_data[MAX_NUMNODES]; |
802 | lmb_size_bytes(&lmb.memory, i)); | 912 | |
913 | EXPORT_SYMBOL(numa_cpu_lookup_table); | ||
914 | EXPORT_SYMBOL(numa_cpumask_lookup_table); | ||
915 | EXPORT_SYMBOL(node_data); | ||
916 | |||
917 | struct mdesc_mlgroup { | ||
918 | u64 node; | ||
919 | u64 latency; | ||
920 | u64 match; | ||
921 | u64 mask; | ||
922 | }; | ||
923 | static struct mdesc_mlgroup *mlgroups; | ||
924 | static int num_mlgroups; | ||
925 | |||
926 | static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio, | ||
927 | u32 cfg_handle) | ||
928 | { | ||
929 | u64 arc; | ||
930 | |||
931 | mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) { | ||
932 | u64 target = mdesc_arc_target(md, arc); | ||
933 | const u64 *val; | ||
934 | |||
935 | val = mdesc_get_property(md, target, | ||
936 | "cfg-handle", NULL); | ||
937 | if (val && *val == cfg_handle) | ||
938 | return 0; | ||
939 | } | ||
940 | return -ENODEV; | ||
941 | } | ||
942 | |||
943 | static int scan_arcs_for_cfg_handle(struct mdesc_handle *md, u64 grp, | ||
944 | u32 cfg_handle) | ||
945 | { | ||
946 | u64 arc, candidate, best_latency = ~(u64)0; | ||
947 | |||
948 | candidate = MDESC_NODE_NULL; | ||
949 | mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { | ||
950 | u64 target = mdesc_arc_target(md, arc); | ||
951 | const char *name = mdesc_node_name(md, target); | ||
952 | const u64 *val; | ||
953 | |||
954 | if (strcmp(name, "pio-latency-group")) | ||
955 | continue; | ||
956 | |||
957 | val = mdesc_get_property(md, target, "latency", NULL); | ||
958 | if (!val) | ||
959 | continue; | ||
960 | |||
961 | if (*val < best_latency) { | ||
962 | candidate = target; | ||
963 | best_latency = *val; | ||
964 | } | ||
965 | } | ||
966 | |||
967 | if (candidate == MDESC_NODE_NULL) | ||
968 | return -ENODEV; | ||
969 | |||
970 | return scan_pio_for_cfg_handle(md, candidate, cfg_handle); | ||
971 | } | ||
972 | |||
973 | int of_node_to_nid(struct device_node *dp) | ||
974 | { | ||
975 | const struct linux_prom64_registers *regs; | ||
976 | struct mdesc_handle *md; | ||
977 | u32 cfg_handle; | ||
978 | int count, nid; | ||
979 | u64 grp; | ||
980 | |||
981 | if (!mlgroups) | ||
982 | return -1; | ||
983 | |||
984 | regs = of_get_property(dp, "reg", NULL); | ||
985 | if (!regs) | ||
986 | return -1; | ||
987 | |||
988 | cfg_handle = (regs->phys_addr >> 32UL) & 0x0fffffff; | ||
989 | |||
990 | md = mdesc_grab(); | ||
991 | |||
992 | count = 0; | ||
993 | nid = -1; | ||
994 | mdesc_for_each_node_by_name(md, grp, "group") { | ||
995 | if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) { | ||
996 | nid = count; | ||
997 | break; | ||
998 | } | ||
999 | count++; | ||
1000 | } | ||
1001 | |||
1002 | mdesc_release(md); | ||
1003 | |||
1004 | return nid; | ||
1005 | } | ||
1006 | |||
1007 | static void add_node_ranges(void) | ||
1008 | { | ||
1009 | int i; | ||
1010 | |||
1011 | for (i = 0; i < lmb.memory.cnt; i++) { | ||
1012 | unsigned long size = lmb_size_bytes(&lmb.memory, i); | ||
1013 | unsigned long start, end; | ||
1014 | |||
1015 | start = lmb.memory.region[i].base; | ||
1016 | end = start + size; | ||
1017 | while (start < end) { | ||
1018 | unsigned long this_end; | ||
1019 | int nid; | ||
1020 | |||
1021 | this_end = nid_range(start, end, &nid); | ||
1022 | |||
1023 | numadbg("Adding active range nid[%d] " | ||
1024 | "start[%lx] end[%lx]\n", | ||
1025 | nid, start, this_end); | ||
1026 | |||
1027 | add_active_range(nid, | ||
1028 | start >> PAGE_SHIFT, | ||
1029 | this_end >> PAGE_SHIFT); | ||
1030 | |||
1031 | start = this_end; | ||
1032 | } | ||
1033 | } | ||
1034 | } | ||
803 | 1035 | ||
804 | for (i = 0; i < lmb.reserved.cnt; i++) | 1036 | static int __init grab_mlgroups(struct mdesc_handle *md) |
805 | reserve_bootmem(lmb.reserved.region[i].base, | 1037 | { |
806 | lmb_size_bytes(&lmb.reserved, i), | 1038 | unsigned long paddr; |
807 | BOOTMEM_DEFAULT); | 1039 | int count = 0; |
1040 | u64 node; | ||
1041 | |||
1042 | mdesc_for_each_node_by_name(md, node, "memory-latency-group") | ||
1043 | count++; | ||
1044 | if (!count) | ||
1045 | return -ENOENT; | ||
1046 | |||
1047 | paddr = lmb_alloc(count * sizeof(struct mdesc_mlgroup), | ||
1048 | SMP_CACHE_BYTES); | ||
1049 | if (!paddr) | ||
1050 | return -ENOMEM; | ||
1051 | |||
1052 | mlgroups = __va(paddr); | ||
1053 | num_mlgroups = count; | ||
1054 | |||
1055 | count = 0; | ||
1056 | mdesc_for_each_node_by_name(md, node, "memory-latency-group") { | ||
1057 | struct mdesc_mlgroup *m = &mlgroups[count++]; | ||
1058 | const u64 *val; | ||
1059 | |||
1060 | m->node = node; | ||
1061 | |||
1062 | val = mdesc_get_property(md, node, "latency", NULL); | ||
1063 | m->latency = *val; | ||
1064 | val = mdesc_get_property(md, node, "address-match", NULL); | ||
1065 | m->match = *val; | ||
1066 | val = mdesc_get_property(md, node, "address-mask", NULL); | ||
1067 | m->mask = *val; | ||
1068 | |||
1069 | numadbg("MLGROUP[%d]: node[%lx] latency[%lx] " | ||
1070 | "match[%lx] mask[%lx]\n", | ||
1071 | count - 1, m->node, m->latency, m->match, m->mask); | ||
1072 | } | ||
808 | 1073 | ||
809 | *pages_avail -= PAGE_ALIGN(kern_size) >> PAGE_SHIFT; | 1074 | return 0; |
1075 | } | ||
810 | 1076 | ||
811 | for (i = 0; i < lmb.memory.cnt; ++i) { | 1077 | static int __init grab_mblocks(struct mdesc_handle *md) |
812 | unsigned long start_pfn, end_pfn, pages; | 1078 | { |
1079 | unsigned long paddr; | ||
1080 | int count = 0; | ||
1081 | u64 node; | ||
1082 | |||
1083 | mdesc_for_each_node_by_name(md, node, "mblock") | ||
1084 | count++; | ||
1085 | if (!count) | ||
1086 | return -ENOENT; | ||
1087 | |||
1088 | paddr = lmb_alloc(count * sizeof(struct mdesc_mblock), | ||
1089 | SMP_CACHE_BYTES); | ||
1090 | if (!paddr) | ||
1091 | return -ENOMEM; | ||
1092 | |||
1093 | mblocks = __va(paddr); | ||
1094 | num_mblocks = count; | ||
1095 | |||
1096 | count = 0; | ||
1097 | mdesc_for_each_node_by_name(md, node, "mblock") { | ||
1098 | struct mdesc_mblock *m = &mblocks[count++]; | ||
1099 | const u64 *val; | ||
1100 | |||
1101 | val = mdesc_get_property(md, node, "base", NULL); | ||
1102 | m->base = *val; | ||
1103 | val = mdesc_get_property(md, node, "size", NULL); | ||
1104 | m->size = *val; | ||
1105 | val = mdesc_get_property(md, node, | ||
1106 | "address-congruence-offset", NULL); | ||
1107 | m->offset = *val; | ||
1108 | |||
1109 | numadbg("MBLOCK[%d]: base[%lx] size[%lx] offset[%lx]\n", | ||
1110 | count - 1, m->base, m->size, m->offset); | ||
1111 | } | ||
1112 | |||
1113 | return 0; | ||
1114 | } | ||
1115 | |||
1116 | static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md, | ||
1117 | u64 grp, cpumask_t *mask) | ||
1118 | { | ||
1119 | u64 arc; | ||
1120 | |||
1121 | cpus_clear(*mask); | ||
1122 | |||
1123 | mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) { | ||
1124 | u64 target = mdesc_arc_target(md, arc); | ||
1125 | const char *name = mdesc_node_name(md, target); | ||
1126 | const u64 *id; | ||
1127 | |||
1128 | if (strcmp(name, "cpu")) | ||
1129 | continue; | ||
1130 | id = mdesc_get_property(md, target, "id", NULL); | ||
1131 | if (*id < NR_CPUS) | ||
1132 | cpu_set(*id, *mask); | ||
1133 | } | ||
1134 | } | ||
1135 | |||
1136 | static struct mdesc_mlgroup * __init find_mlgroup(u64 node) | ||
1137 | { | ||
1138 | int i; | ||
1139 | |||
1140 | for (i = 0; i < num_mlgroups; i++) { | ||
1141 | struct mdesc_mlgroup *m = &mlgroups[i]; | ||
1142 | if (m->node == node) | ||
1143 | return m; | ||
1144 | } | ||
1145 | return NULL; | ||
1146 | } | ||
1147 | |||
1148 | static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp, | ||
1149 | int index) | ||
1150 | { | ||
1151 | struct mdesc_mlgroup *candidate = NULL; | ||
1152 | u64 arc, best_latency = ~(u64)0; | ||
1153 | struct node_mem_mask *n; | ||
1154 | |||
1155 | mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { | ||
1156 | u64 target = mdesc_arc_target(md, arc); | ||
1157 | struct mdesc_mlgroup *m = find_mlgroup(target); | ||
1158 | if (!m) | ||
1159 | continue; | ||
1160 | if (m->latency < best_latency) { | ||
1161 | candidate = m; | ||
1162 | best_latency = m->latency; | ||
1163 | } | ||
1164 | } | ||
1165 | if (!candidate) | ||
1166 | return -ENOENT; | ||
1167 | |||
1168 | if (num_node_masks != index) { | ||
1169 | printk(KERN_ERR "Inconsistent NUMA state, " | ||
1170 | "index[%d] != num_node_masks[%d]\n", | ||
1171 | index, num_node_masks); | ||
1172 | return -EINVAL; | ||
1173 | } | ||
1174 | |||
1175 | n = &node_masks[num_node_masks++]; | ||
1176 | |||
1177 | n->mask = candidate->mask; | ||
1178 | n->val = candidate->match; | ||
1179 | |||
1180 | numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%lx])\n", | ||
1181 | index, n->mask, n->val, candidate->latency); | ||
1182 | |||
1183 | return 0; | ||
1184 | } | ||
1185 | |||
1186 | static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, | ||
1187 | int index) | ||
1188 | { | ||
1189 | cpumask_t mask; | ||
1190 | int cpu; | ||
1191 | |||
1192 | numa_parse_mdesc_group_cpus(md, grp, &mask); | ||
1193 | |||
1194 | for_each_cpu_mask(cpu, mask) | ||
1195 | numa_cpu_lookup_table[cpu] = index; | ||
1196 | numa_cpumask_lookup_table[index] = mask; | ||
1197 | |||
1198 | if (numa_debug) { | ||
1199 | printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index); | ||
1200 | for_each_cpu_mask(cpu, mask) | ||
1201 | printk("%d ", cpu); | ||
1202 | printk("]\n"); | ||
1203 | } | ||
1204 | |||
1205 | return numa_attach_mlgroup(md, grp, index); | ||
1206 | } | ||
1207 | |||
1208 | static int __init numa_parse_mdesc(void) | ||
1209 | { | ||
1210 | struct mdesc_handle *md = mdesc_grab(); | ||
1211 | int i, err, count; | ||
1212 | u64 node; | ||
1213 | |||
1214 | node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); | ||
1215 | if (node == MDESC_NODE_NULL) { | ||
1216 | mdesc_release(md); | ||
1217 | return -ENOENT; | ||
1218 | } | ||
1219 | |||
1220 | err = grab_mblocks(md); | ||
1221 | if (err < 0) | ||
1222 | goto out; | ||
1223 | |||
1224 | err = grab_mlgroups(md); | ||
1225 | if (err < 0) | ||
1226 | goto out; | ||
1227 | |||
1228 | count = 0; | ||
1229 | mdesc_for_each_node_by_name(md, node, "group") { | ||
1230 | err = numa_parse_mdesc_group(md, node, count); | ||
1231 | if (err < 0) | ||
1232 | break; | ||
1233 | count++; | ||
1234 | } | ||
1235 | |||
1236 | add_node_ranges(); | ||
1237 | |||
1238 | for (i = 0; i < num_node_masks; i++) { | ||
1239 | allocate_node_data(i); | ||
1240 | node_set_online(i); | ||
1241 | } | ||
1242 | |||
1243 | err = 0; | ||
1244 | out: | ||
1245 | mdesc_release(md); | ||
1246 | return err; | ||
1247 | } | ||
1248 | |||
1249 | static int __init numa_parse_sun4u(void) | ||
1250 | { | ||
1251 | return -1; | ||
1252 | } | ||
1253 | |||
1254 | static int __init bootmem_init_numa(void) | ||
1255 | { | ||
1256 | int err = -1; | ||
1257 | |||
1258 | numadbg("bootmem_init_numa()\n"); | ||
1259 | |||
1260 | if (numa_enabled) { | ||
1261 | if (tlb_type == hypervisor) | ||
1262 | err = numa_parse_mdesc(); | ||
1263 | else | ||
1264 | err = numa_parse_sun4u(); | ||
1265 | } | ||
1266 | return err; | ||
1267 | } | ||
1268 | |||
1269 | #else | ||
1270 | |||
1271 | static int bootmem_init_numa(void) | ||
1272 | { | ||
1273 | return -1; | ||
1274 | } | ||
1275 | |||
1276 | #endif | ||
1277 | |||
1278 | static void __init bootmem_init_nonnuma(void) | ||
1279 | { | ||
1280 | unsigned long top_of_ram = lmb_end_of_DRAM(); | ||
1281 | unsigned long total_ram = lmb_phys_mem_size(); | ||
1282 | unsigned int i; | ||
1283 | |||
1284 | numadbg("bootmem_init_nonnuma()\n"); | ||
1285 | |||
1286 | printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", | ||
1287 | top_of_ram, total_ram); | ||
1288 | printk(KERN_INFO "Memory hole size: %ldMB\n", | ||
1289 | (top_of_ram - total_ram) >> 20); | ||
1290 | |||
1291 | init_node_masks_nonnuma(); | ||
1292 | |||
1293 | for (i = 0; i < lmb.memory.cnt; i++) { | ||
1294 | unsigned long size = lmb_size_bytes(&lmb.memory, i); | ||
1295 | unsigned long start_pfn, end_pfn; | ||
1296 | |||
1297 | if (!size) | ||
1298 | continue; | ||
813 | 1299 | ||
814 | pages = lmb_size_pages(&lmb.memory, i); | ||
815 | start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; | 1300 | start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; |
816 | end_pfn = start_pfn + pages; | 1301 | end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); |
1302 | add_active_range(0, start_pfn, end_pfn); | ||
1303 | } | ||
817 | 1304 | ||
818 | memory_present(0, start_pfn, end_pfn); | 1305 | allocate_node_data(0); |
1306 | |||
1307 | node_set_online(0); | ||
1308 | } | ||
1309 | |||
1310 | static void __init reserve_range_in_node(int nid, unsigned long start, | ||
1311 | unsigned long end) | ||
1312 | { | ||
1313 | numadbg(" reserve_range_in_node(nid[%d],start[%lx],end[%lx]\n", | ||
1314 | nid, start, end); | ||
1315 | while (start < end) { | ||
1316 | unsigned long this_end; | ||
1317 | int n; | ||
1318 | |||
1319 | this_end = nid_range(start, end, &n); | ||
1320 | if (n == nid) { | ||
1321 | numadbg(" MATCH reserving range [%lx:%lx]\n", | ||
1322 | start, this_end); | ||
1323 | reserve_bootmem_node(NODE_DATA(nid), start, | ||
1324 | (this_end - start), BOOTMEM_DEFAULT); | ||
1325 | } else | ||
1326 | numadbg(" NO MATCH, advancing start to %lx\n", | ||
1327 | this_end); | ||
1328 | |||
1329 | start = this_end; | ||
819 | } | 1330 | } |
1331 | } | ||
1332 | |||
1333 | static void __init trim_reserved_in_node(int nid) | ||
1334 | { | ||
1335 | int i; | ||
1336 | |||
1337 | numadbg(" trim_reserved_in_node(%d)\n", nid); | ||
1338 | |||
1339 | for (i = 0; i < lmb.reserved.cnt; i++) { | ||
1340 | unsigned long start = lmb.reserved.region[i].base; | ||
1341 | unsigned long size = lmb_size_bytes(&lmb.reserved, i); | ||
1342 | unsigned long end = start + size; | ||
1343 | |||
1344 | reserve_range_in_node(nid, start, end); | ||
1345 | } | ||
1346 | } | ||
1347 | |||
1348 | static void __init bootmem_init_one_node(int nid) | ||
1349 | { | ||
1350 | struct pglist_data *p; | ||
1351 | |||
1352 | numadbg("bootmem_init_one_node(%d)\n", nid); | ||
1353 | |||
1354 | p = NODE_DATA(nid); | ||
1355 | |||
1356 | if (p->node_spanned_pages) { | ||
1357 | unsigned long paddr = node_masks[nid].bootmem_paddr; | ||
1358 | unsigned long end_pfn; | ||
1359 | |||
1360 | end_pfn = p->node_start_pfn + p->node_spanned_pages; | ||
1361 | |||
1362 | numadbg(" init_bootmem_node(%d, %lx, %lx, %lx)\n", | ||
1363 | nid, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn); | ||
1364 | |||
1365 | init_bootmem_node(p, paddr >> PAGE_SHIFT, | ||
1366 | p->node_start_pfn, end_pfn); | ||
1367 | |||
1368 | numadbg(" free_bootmem_with_active_regions(%d, %lx)\n", | ||
1369 | nid, end_pfn); | ||
1370 | free_bootmem_with_active_regions(nid, end_pfn); | ||
1371 | |||
1372 | trim_reserved_in_node(nid); | ||
1373 | |||
1374 | numadbg(" sparse_memory_present_with_active_regions(%d)\n", | ||
1375 | nid); | ||
1376 | sparse_memory_present_with_active_regions(nid); | ||
1377 | } | ||
1378 | } | ||
1379 | |||
1380 | static unsigned long __init bootmem_init(unsigned long phys_base) | ||
1381 | { | ||
1382 | unsigned long end_pfn; | ||
1383 | int nid; | ||
1384 | |||
1385 | end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; | ||
1386 | max_pfn = max_low_pfn = end_pfn; | ||
1387 | min_low_pfn = (phys_base >> PAGE_SHIFT); | ||
1388 | |||
1389 | if (bootmem_init_numa() < 0) | ||
1390 | bootmem_init_nonnuma(); | ||
1391 | |||
1392 | /* XXX cpu notifier XXX */ | ||
1393 | |||
1394 | for_each_online_node(nid) | ||
1395 | bootmem_init_one_node(nid); | ||
820 | 1396 | ||
821 | sparse_init(); | 1397 | sparse_init(); |
822 | 1398 | ||
@@ -1112,7 +1688,7 @@ void __init setup_per_cpu_areas(void) | |||
1112 | 1688 | ||
1113 | void __init paging_init(void) | 1689 | void __init paging_init(void) |
1114 | { | 1690 | { |
1115 | unsigned long end_pfn, pages_avail, shift, phys_base; | 1691 | unsigned long end_pfn, shift, phys_base; |
1116 | unsigned long real_end, i; | 1692 | unsigned long real_end, i; |
1117 | 1693 | ||
1118 | /* These build time checkes make sure that the dcache_dirty_cpu() | 1694 | /* These build time checkes make sure that the dcache_dirty_cpu() |
@@ -1220,27 +1796,21 @@ void __init paging_init(void) | |||
1220 | sun4v_mdesc_init(); | 1796 | sun4v_mdesc_init(); |
1221 | 1797 | ||
1222 | /* Setup bootmem... */ | 1798 | /* Setup bootmem... */ |
1223 | pages_avail = 0; | 1799 | last_valid_pfn = end_pfn = bootmem_init(phys_base); |
1224 | last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base); | ||
1225 | 1800 | ||
1801 | #ifndef CONFIG_NEED_MULTIPLE_NODES | ||
1226 | max_mapnr = last_valid_pfn; | 1802 | max_mapnr = last_valid_pfn; |
1227 | 1803 | #endif | |
1228 | kernel_physical_mapping_init(); | 1804 | kernel_physical_mapping_init(); |
1229 | 1805 | ||
1230 | { | 1806 | { |
1231 | unsigned long zones_size[MAX_NR_ZONES]; | 1807 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
1232 | unsigned long zholes_size[MAX_NR_ZONES]; | ||
1233 | int znum; | ||
1234 | 1808 | ||
1235 | for (znum = 0; znum < MAX_NR_ZONES; znum++) | 1809 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
1236 | zones_size[znum] = zholes_size[znum] = 0; | ||
1237 | 1810 | ||
1238 | zones_size[ZONE_NORMAL] = end_pfn; | 1811 | max_zone_pfns[ZONE_NORMAL] = end_pfn; |
1239 | zholes_size[ZONE_NORMAL] = end_pfn - pages_avail; | ||
1240 | 1812 | ||
1241 | free_area_init_node(0, &contig_page_data, zones_size, | 1813 | free_area_init_nodes(max_zone_pfns); |
1242 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, | ||
1243 | zholes_size); | ||
1244 | } | 1814 | } |
1245 | 1815 | ||
1246 | printk("Booting Linux...\n"); | 1816 | printk("Booting Linux...\n"); |
@@ -1249,21 +1819,52 @@ void __init paging_init(void) | |||
1249 | cpu_probe(); | 1819 | cpu_probe(); |
1250 | } | 1820 | } |
1251 | 1821 | ||
1252 | static void __init taint_real_pages(void) | 1822 | int __init page_in_phys_avail(unsigned long paddr) |
1823 | { | ||
1824 | int i; | ||
1825 | |||
1826 | paddr &= PAGE_MASK; | ||
1827 | |||
1828 | for (i = 0; i < pavail_ents; i++) { | ||
1829 | unsigned long start, end; | ||
1830 | |||
1831 | start = pavail[i].phys_addr; | ||
1832 | end = start + pavail[i].reg_size; | ||
1833 | |||
1834 | if (paddr >= start && paddr < end) | ||
1835 | return 1; | ||
1836 | } | ||
1837 | if (paddr >= kern_base && paddr < (kern_base + kern_size)) | ||
1838 | return 1; | ||
1839 | #ifdef CONFIG_BLK_DEV_INITRD | ||
1840 | if (paddr >= __pa(initrd_start) && | ||
1841 | paddr < __pa(PAGE_ALIGN(initrd_end))) | ||
1842 | return 1; | ||
1843 | #endif | ||
1844 | |||
1845 | return 0; | ||
1846 | } | ||
1847 | |||
1848 | static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; | ||
1849 | static int pavail_rescan_ents __initdata; | ||
1850 | |||
1851 | /* Certain OBP calls, such as fetching "available" properties, can | ||
1852 | * claim physical memory. So, along with initializing the valid | ||
1853 | * address bitmap, what we do here is refetch the physical available | ||
1854 | * memory list again, and make sure it provides at least as much | ||
1855 | * memory as 'pavail' does. | ||
1856 | */ | ||
1857 | static void setup_valid_addr_bitmap_from_pavail(void) | ||
1253 | { | 1858 | { |
1254 | int i; | 1859 | int i; |
1255 | 1860 | ||
1256 | read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); | 1861 | read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); |
1257 | 1862 | ||
1258 | /* Find changes discovered in the physmem available rescan and | ||
1259 | * reserve the lost portions in the bootmem maps. | ||
1260 | */ | ||
1261 | for (i = 0; i < pavail_ents; i++) { | 1863 | for (i = 0; i < pavail_ents; i++) { |
1262 | unsigned long old_start, old_end; | 1864 | unsigned long old_start, old_end; |
1263 | 1865 | ||
1264 | old_start = pavail[i].phys_addr; | 1866 | old_start = pavail[i].phys_addr; |
1265 | old_end = old_start + | 1867 | old_end = old_start + pavail[i].reg_size; |
1266 | pavail[i].reg_size; | ||
1267 | while (old_start < old_end) { | 1868 | while (old_start < old_end) { |
1268 | int n; | 1869 | int n; |
1269 | 1870 | ||
@@ -1281,7 +1882,16 @@ static void __init taint_real_pages(void) | |||
1281 | goto do_next_page; | 1882 | goto do_next_page; |
1282 | } | 1883 | } |
1283 | } | 1884 | } |
1284 | reserve_bootmem(old_start, PAGE_SIZE, BOOTMEM_DEFAULT); | 1885 | |
1886 | prom_printf("mem_init: Lost memory in pavail\n"); | ||
1887 | prom_printf("mem_init: OLD start[%lx] size[%lx]\n", | ||
1888 | pavail[i].phys_addr, | ||
1889 | pavail[i].reg_size); | ||
1890 | prom_printf("mem_init: NEW start[%lx] size[%lx]\n", | ||
1891 | pavail_rescan[i].phys_addr, | ||
1892 | pavail_rescan[i].reg_size); | ||
1893 | prom_printf("mem_init: Cannot continue, aborting.\n"); | ||
1894 | prom_halt(); | ||
1285 | 1895 | ||
1286 | do_next_page: | 1896 | do_next_page: |
1287 | old_start += PAGE_SIZE; | 1897 | old_start += PAGE_SIZE; |
@@ -1289,32 +1899,6 @@ static void __init taint_real_pages(void) | |||
1289 | } | 1899 | } |
1290 | } | 1900 | } |
1291 | 1901 | ||
1292 | int __init page_in_phys_avail(unsigned long paddr) | ||
1293 | { | ||
1294 | int i; | ||
1295 | |||
1296 | paddr &= PAGE_MASK; | ||
1297 | |||
1298 | for (i = 0; i < pavail_rescan_ents; i++) { | ||
1299 | unsigned long start, end; | ||
1300 | |||
1301 | start = pavail_rescan[i].phys_addr; | ||
1302 | end = start + pavail_rescan[i].reg_size; | ||
1303 | |||
1304 | if (paddr >= start && paddr < end) | ||
1305 | return 1; | ||
1306 | } | ||
1307 | if (paddr >= kern_base && paddr < (kern_base + kern_size)) | ||
1308 | return 1; | ||
1309 | #ifdef CONFIG_BLK_DEV_INITRD | ||
1310 | if (paddr >= __pa(initrd_start) && | ||
1311 | paddr < __pa(PAGE_ALIGN(initrd_end))) | ||
1312 | return 1; | ||
1313 | #endif | ||
1314 | |||
1315 | return 0; | ||
1316 | } | ||
1317 | |||
1318 | void __init mem_init(void) | 1902 | void __init mem_init(void) |
1319 | { | 1903 | { |
1320 | unsigned long codepages, datapages, initpages; | 1904 | unsigned long codepages, datapages, initpages; |
@@ -1337,14 +1921,26 @@ void __init mem_init(void) | |||
1337 | addr += PAGE_SIZE; | 1921 | addr += PAGE_SIZE; |
1338 | } | 1922 | } |
1339 | 1923 | ||
1340 | taint_real_pages(); | 1924 | setup_valid_addr_bitmap_from_pavail(); |
1341 | 1925 | ||
1342 | high_memory = __va(last_valid_pfn << PAGE_SHIFT); | 1926 | high_memory = __va(last_valid_pfn << PAGE_SHIFT); |
1343 | 1927 | ||
1928 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
1929 | for_each_online_node(i) { | ||
1930 | if (NODE_DATA(i)->node_spanned_pages != 0) { | ||
1931 | totalram_pages += | ||
1932 | free_all_bootmem_node(NODE_DATA(i)); | ||
1933 | } | ||
1934 | } | ||
1935 | #else | ||
1936 | totalram_pages = free_all_bootmem(); | ||
1937 | #endif | ||
1938 | |||
1344 | /* We subtract one to account for the mem_map_zero page | 1939 | /* We subtract one to account for the mem_map_zero page |
1345 | * allocated below. | 1940 | * allocated below. |
1346 | */ | 1941 | */ |
1347 | totalram_pages = num_physpages = free_all_bootmem() - 1; | 1942 | totalram_pages -= 1; |
1943 | num_physpages = totalram_pages; | ||
1348 | 1944 | ||
1349 | /* | 1945 | /* |
1350 | * Set up the zero page, mark it reserved, so that page count | 1946 | * Set up the zero page, mark it reserved, so that page count |
diff --git a/include/asm-sparc64/mmzone.h b/include/asm-sparc64/mmzone.h new file mode 100644 index 000000000000..ebf5986c12ed --- /dev/null +++ b/include/asm-sparc64/mmzone.h | |||
@@ -0,0 +1,17 @@ | |||
1 | #ifndef _SPARC64_MMZONE_H | ||
2 | #define _SPARC64_MMZONE_H | ||
3 | |||
4 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
5 | |||
6 | extern struct pglist_data *node_data[]; | ||
7 | |||
8 | #define NODE_DATA(nid) (node_data[nid]) | ||
9 | #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) | ||
10 | #define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn) | ||
11 | |||
12 | extern int numa_cpu_lookup_table[]; | ||
13 | extern cpumask_t numa_cpumask_lookup_table[]; | ||
14 | |||
15 | #endif /* CONFIG_NEED_MULTIPLE_NODES */ | ||
16 | |||
17 | #endif /* _SPARC64_MMZONE_H */ | ||
diff --git a/include/asm-sparc64/topology.h b/include/asm-sparc64/topology.h index c6b557034f68..001c04027c82 100644 --- a/include/asm-sparc64/topology.h +++ b/include/asm-sparc64/topology.h | |||
@@ -1,6 +1,77 @@ | |||
1 | #ifndef _ASM_SPARC64_TOPOLOGY_H | 1 | #ifndef _ASM_SPARC64_TOPOLOGY_H |
2 | #define _ASM_SPARC64_TOPOLOGY_H | 2 | #define _ASM_SPARC64_TOPOLOGY_H |
3 | 3 | ||
4 | #ifdef CONFIG_NUMA | ||
5 | |||
6 | #include <asm/mmzone.h> | ||
7 | |||
8 | static inline int cpu_to_node(int cpu) | ||
9 | { | ||
10 | return numa_cpu_lookup_table[cpu]; | ||
11 | } | ||
12 | |||
13 | #define parent_node(node) (node) | ||
14 | |||
15 | static inline cpumask_t node_to_cpumask(int node) | ||
16 | { | ||
17 | return numa_cpumask_lookup_table[node]; | ||
18 | } | ||
19 | |||
20 | /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ | ||
21 | #define node_to_cpumask_ptr(v, node) \ | ||
22 | cpumask_t *v = &(numa_cpumask_lookup_table[node]) | ||
23 | |||
24 | #define node_to_cpumask_ptr_next(v, node) \ | ||
25 | v = &(numa_cpumask_lookup_table[node]) | ||
26 | |||
27 | static inline int node_to_first_cpu(int node) | ||
28 | { | ||
29 | cpumask_t tmp; | ||
30 | tmp = node_to_cpumask(node); | ||
31 | return first_cpu(tmp); | ||
32 | } | ||
33 | |||
34 | struct pci_bus; | ||
35 | #ifdef CONFIG_PCI | ||
36 | extern int pcibus_to_node(struct pci_bus *pbus); | ||
37 | #else | ||
38 | static inline int pcibus_to_node(struct pci_bus *pbus) | ||
39 | { | ||
40 | return -1; | ||
41 | } | ||
42 | #endif | ||
43 | |||
44 | #define pcibus_to_cpumask(bus) \ | ||
45 | (pcibus_to_node(bus) == -1 ? \ | ||
46 | CPU_MASK_ALL : \ | ||
47 | node_to_cpumask(pcibus_to_node(bus))) | ||
48 | |||
49 | #define SD_NODE_INIT (struct sched_domain) { \ | ||
50 | .min_interval = 8, \ | ||
51 | .max_interval = 32, \ | ||
52 | .busy_factor = 32, \ | ||
53 | .imbalance_pct = 125, \ | ||
54 | .cache_nice_tries = 2, \ | ||
55 | .busy_idx = 3, \ | ||
56 | .idle_idx = 2, \ | ||
57 | .newidle_idx = 0, \ | ||
58 | .wake_idx = 1, \ | ||
59 | .forkexec_idx = 1, \ | ||
60 | .flags = SD_LOAD_BALANCE \ | ||
61 | | SD_BALANCE_FORK \ | ||
62 | | SD_BALANCE_EXEC \ | ||
63 | | SD_SERIALIZE \ | ||
64 | | SD_WAKE_BALANCE, \ | ||
65 | .last_balance = jiffies, \ | ||
66 | .balance_interval = 1, \ | ||
67 | } | ||
68 | |||
69 | #else /* CONFIG_NUMA */ | ||
70 | |||
71 | #include <asm-generic/topology.h> | ||
72 | |||
73 | #endif /* !(CONFIG_NUMA) */ | ||
74 | |||
4 | #ifdef CONFIG_SMP | 75 | #ifdef CONFIG_SMP |
5 | #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) | 76 | #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) |
6 | #define topology_core_id(cpu) (cpu_data(cpu).core_id) | 77 | #define topology_core_id(cpu) (cpu_data(cpu).core_id) |
@@ -10,8 +81,6 @@ | |||
10 | #define smt_capable() (sparc64_multi_core) | 81 | #define smt_capable() (sparc64_multi_core) |
11 | #endif /* CONFIG_SMP */ | 82 | #endif /* CONFIG_SMP */ |
12 | 83 | ||
13 | #include <asm-generic/topology.h> | ||
14 | |||
15 | #define cpu_coregroup_map(cpu) (cpu_core_map[cpu]) | 84 | #define cpu_coregroup_map(cpu) (cpu_core_map[cpu]) |
16 | 85 | ||
17 | #endif /* _ASM_SPARC64_TOPOLOGY_H */ | 86 | #endif /* _ASM_SPARC64_TOPOLOGY_H */ |