aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/kernel/setup.c
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2012-04-01 14:04:21 -0400
committerChris Metcalf <cmetcalf@tilera.com>2012-05-25 12:48:27 -0400
commit621b19551507c8fd9d721f4038509c5bb155a983 (patch)
tree62d8d5e7a783364940153b4523fcfba821cee241 /arch/tile/kernel/setup.c
parentd9ed9faac283a3be73f0e11a2ef49ee55aece4db (diff)
arch/tile: support multiple huge page sizes dynamically
This change adds support for a new "super" bit in the PTE, using the new arch_make_huge_pte() method. The Tilera hypervisor sees the bit set at a given level of the page table and gangs together 4, 16, or 64 consecutive pages from that level of the hierarchy to create a larger TLB entry. One extra "super" page size can be specified at each of the three levels of the page table hierarchy on tilegx, using the "hugepagesz" argument on the boot command line. A new hypervisor API is added to allow Linux to tell the hypervisor how many PTEs to gang together at each level of the page table. To allow pre-allocating huge pages larger than the buddy allocator can handle, this change modifies the Tilera bootmem support to put all of memory on tilegx platforms into bootmem. As part of this change I eliminate the vestigial CONFIG_HIGHPTE support, which never worked anyway, and eliminate the hv_page_size() API in favor of the standard vma_kernel_pagesize() API. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile/kernel/setup.c')
-rw-r--r--arch/tile/kernel/setup.c161
1 files changed, 111 insertions, 50 deletions
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 32948e21113a..445c220eae51 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -28,6 +28,7 @@
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/smp.h> 29#include <linux/smp.h>
30#include <linux/timex.h> 30#include <linux/timex.h>
31#include <linux/hugetlb.h>
31#include <asm/setup.h> 32#include <asm/setup.h>
32#include <asm/sections.h> 33#include <asm/sections.h>
33#include <asm/cacheflush.h> 34#include <asm/cacheflush.h>
@@ -49,9 +50,6 @@ char chip_model[64] __write_once;
49struct pglist_data node_data[MAX_NUMNODES] __read_mostly; 50struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
50EXPORT_SYMBOL(node_data); 51EXPORT_SYMBOL(node_data);
51 52
52/* We only create bootmem data on node 0. */
53static bootmem_data_t __initdata node0_bdata;
54
55/* Information on the NUMA nodes that we compute early */ 53/* Information on the NUMA nodes that we compute early */
56unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES]; 54unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES];
57unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES]; 55unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES];
@@ -518,37 +516,96 @@ static void __init setup_memory(void)
518#endif 516#endif
519} 517}
520 518
521static void __init setup_bootmem_allocator(void) 519/*
520 * On 32-bit machines, we only put bootmem on the low controller,
521 * since PAs > 4GB can't be used in bootmem. In principle one could
522 * imagine, e.g., multiple 1 GB controllers all of which could support
523 * bootmem, but in practice using controllers this small isn't a
524 * particularly interesting scenario, so we just keep it simple and
525 * use only the first controller for bootmem on 32-bit machines.
526 */
527static inline int node_has_bootmem(int nid)
522{ 528{
523 unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn; 529#ifdef CONFIG_64BIT
530 return 1;
531#else
532 return nid == 0;
533#endif
534}
524 535
525 /* Provide a node 0 bdata. */ 536static inline unsigned long alloc_bootmem_pfn(int nid,
526 NODE_DATA(0)->bdata = &node0_bdata; 537 unsigned long size,
538 unsigned long goal)
539{
540 void *kva = __alloc_bootmem_node(NODE_DATA(nid), size,
541 PAGE_SIZE, goal);
542 unsigned long pfn = kaddr_to_pfn(kva);
543 BUG_ON(goal && PFN_PHYS(pfn) != goal);
544 return pfn;
545}
527 546
528#ifdef CONFIG_PCI 547static void __init setup_bootmem_allocator_node(int i)
529 /* Don't let boot memory alias the PCI region. */ 548{
530 last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn); 549 unsigned long start, end, mapsize, mapstart;
550
551 if (node_has_bootmem(i)) {
552 NODE_DATA(i)->bdata = &bootmem_node_data[i];
553 } else {
554 /* Share controller zero's bdata for now. */
555 NODE_DATA(i)->bdata = &bootmem_node_data[0];
556 return;
557 }
558
559 /* Skip up to after the bss in node 0. */
560 start = (i == 0) ? min_low_pfn : node_start_pfn[i];
561
562 /* Only lowmem, if we're a HIGHMEM build. */
563#ifdef CONFIG_HIGHMEM
564 end = node_lowmem_end_pfn[i];
531#else 565#else
532 last_alloc_pfn = max_low_pfn; 566 end = node_end_pfn[i];
533#endif 567#endif
534 568
535 /* 569 /* No memory here. */
536 * Initialize the boot-time allocator (with low memory only): 570 if (end == start)
537 * The first argument says where to put the bitmap, and the 571 return;
538 * second says where the end of allocatable memory is. 572
539 */ 573 /* Figure out where the bootmem bitmap is located. */
540 bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn); 574 mapsize = bootmem_bootmap_pages(end - start);
575 if (i == 0) {
576 /* Use some space right before the heap on node 0. */
577 mapstart = start;
578 start += mapsize;
579 } else {
580 /* Allocate bitmap on node 0 to avoid page table issues. */
581 mapstart = alloc_bootmem_pfn(0, PFN_PHYS(mapsize), 0);
582 }
541 583
584 /* Initialize a node. */
585 init_bootmem_node(NODE_DATA(i), mapstart, start, end);
586
587 /* Free all the space back into the allocator. */
588 free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start));
589
590#if defined(CONFIG_PCI)
542 /* 591 /*
543 * Let the bootmem allocator use all the space we've given it 592 * Throw away any memory aliased by the PCI region. FIXME: this
544 * except for its own bitmap. 593 * is a temporary hack to work around bug 10502, and needs to be
594 * fixed properly.
545 */ 595 */
546 first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size); 596 if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start)
547 if (first_alloc_pfn >= last_alloc_pfn) 597 reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn),
548 early_panic("Not enough memory on controller 0 for bootmem\n"); 598 PFN_PHYS(pci_reserve_end_pfn -
599 pci_reserve_start_pfn),
600 BOOTMEM_EXCLUSIVE);
601#endif
602}
549 603
550 free_bootmem(PFN_PHYS(first_alloc_pfn), 604static void __init setup_bootmem_allocator(void)
551 PFN_PHYS(last_alloc_pfn - first_alloc_pfn)); 605{
606 int i;
607 for (i = 0; i < MAX_NUMNODES; ++i)
608 setup_bootmem_allocator_node(i);
552 609
553#ifdef CONFIG_KEXEC 610#ifdef CONFIG_KEXEC
554 if (crashk_res.start != crashk_res.end) 611 if (crashk_res.start != crashk_res.end)
@@ -579,14 +636,6 @@ static int __init percpu_size(void)
579 return size; 636 return size;
580} 637}
581 638
582static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal)
583{
584 void *kva = __alloc_bootmem(size, PAGE_SIZE, goal);
585 unsigned long pfn = kaddr_to_pfn(kva);
586 BUG_ON(goal && PFN_PHYS(pfn) != goal);
587 return pfn;
588}
589
590static void __init zone_sizes_init(void) 639static void __init zone_sizes_init(void)
591{ 640{
592 unsigned long zones_size[MAX_NR_ZONES] = { 0 }; 641 unsigned long zones_size[MAX_NR_ZONES] = { 0 };
@@ -624,21 +673,22 @@ static void __init zone_sizes_init(void)
624 * though, there'll be no lowmem, so we just alloc_bootmem 673 * though, there'll be no lowmem, so we just alloc_bootmem
625 * the memmap. There will be no percpu memory either. 674 * the memmap. There will be no percpu memory either.
626 */ 675 */
627 if (__pfn_to_highbits(start) == 0) { 676 if (i != 0 && cpu_isset(i, isolnodes)) {
628 /* In low PAs, allocate via bootmem. */ 677 node_memmap_pfn[i] =
678 alloc_bootmem_pfn(0, memmap_size, 0);
679 BUG_ON(node_percpu[i] != 0);
680 } else if (node_has_bootmem(start)) {
629 unsigned long goal = 0; 681 unsigned long goal = 0;
630 node_memmap_pfn[i] = 682 node_memmap_pfn[i] =
631 alloc_bootmem_pfn(memmap_size, goal); 683 alloc_bootmem_pfn(i, memmap_size, 0);
632 if (kdata_huge) 684 if (kdata_huge)
633 goal = PFN_PHYS(lowmem_end) - node_percpu[i]; 685 goal = PFN_PHYS(lowmem_end) - node_percpu[i];
634 if (node_percpu[i]) 686 if (node_percpu[i])
635 node_percpu_pfn[i] = 687 node_percpu_pfn[i] =
636 alloc_bootmem_pfn(node_percpu[i], goal); 688 alloc_bootmem_pfn(i, node_percpu[i],
637 } else if (cpu_isset(i, isolnodes)) { 689 goal);
638 node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0);
639 BUG_ON(node_percpu[i] != 0);
640 } else { 690 } else {
641 /* In high PAs, just reserve some pages. */ 691 /* In non-bootmem zones, just reserve some pages. */
642 node_memmap_pfn[i] = node_free_pfn[i]; 692 node_memmap_pfn[i] = node_free_pfn[i];
643 node_free_pfn[i] += PFN_UP(memmap_size); 693 node_free_pfn[i] += PFN_UP(memmap_size);
644 if (!kdata_huge) { 694 if (!kdata_huge) {
@@ -662,16 +712,9 @@ static void __init zone_sizes_init(void)
662 zones_size[ZONE_NORMAL] = end - start; 712 zones_size[ZONE_NORMAL] = end - start;
663#endif 713#endif
664 714
665 /* 715 /* Take zone metadata from controller 0 if we're isolnode. */
666 * Everyone shares node 0's bootmem allocator, but 716 if (node_isset(i, isolnodes))
667 * we use alloc_remap(), above, to put the actual 717 NODE_DATA(i)->bdata = &bootmem_node_data[0];
668 * struct page array on the individual controllers,
669 * which is most of the data that we actually care about.
670 * We can't place bootmem allocators on the other
671 * controllers since the bootmem allocator can only
672 * operate on 32-bit physical addresses.
673 */
674 NODE_DATA(i)->bdata = NODE_DATA(0)->bdata;
675 718
676 free_area_init_node(i, zones_size, start, NULL); 719 free_area_init_node(i, zones_size, start, NULL);
677 printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", 720 printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n",
@@ -854,6 +897,22 @@ subsys_initcall(topology_init);
854 897
855#endif /* CONFIG_NUMA */ 898#endif /* CONFIG_NUMA */
856 899
900/*
901 * Initialize hugepage support on this cpu. We do this on all cores
902 * early in boot: before argument parsing for the boot cpu, and after
903 * argument parsing but before the init functions run on the secondaries.
904 * So the values we set up here in the hypervisor may be overridden on
905 * the boot cpu as arguments are parsed.
906 */
907static __cpuinit void init_super_pages(void)
908{
909#ifdef CONFIG_HUGETLB_SUPER_PAGES
910 int i;
911 for (i = 0; i < HUGE_SHIFT_ENTRIES; ++i)
912 hv_set_pte_super_shift(i, huge_shift[i]);
913#endif
914}
915
857/** 916/**
858 * setup_cpu() - Do all necessary per-cpu, tile-specific initialization. 917 * setup_cpu() - Do all necessary per-cpu, tile-specific initialization.
859 * @boot: Is this the boot cpu? 918 * @boot: Is this the boot cpu?
@@ -908,6 +967,8 @@ void __cpuinit setup_cpu(int boot)
908 /* Reset the network state on this cpu. */ 967 /* Reset the network state on this cpu. */
909 reset_network_state(); 968 reset_network_state();
910#endif 969#endif
970
971 init_super_pages();
911} 972}
912 973
913#ifdef CONFIG_BLK_DEV_INITRD 974#ifdef CONFIG_BLK_DEV_INITRD