aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2008-07-24 00:27:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-24 13:47:18 -0400
commitaa888a74977a8f2120ae9332376e179c39a6b07d (patch)
tree1834f8a81e0126ffdd9d9622a9522331dffa2ac8
parent01ad1c0827db5b3695c53e296dbb2c1da16a0911 (diff)
hugetlb: support larger than MAX_ORDER
This is needed on x86-64 to handle GB pages in hugetlbfs, because it is not practical to enlarge MAX_ORDER to 1GB. Instead the 1GB pages are only allocated at boot using the bootmem allocator using the hugepages=... option. These 1G bootmem pages are never freed. In theory it would be possible to implement that with some complications, but since it would be a one-way street (>= MAX_ORDER pages cannot be allocated later) I decided not to currently. The >= MAX_ORDER code is not ifdef'ed per architecture. It is not very big and the ifdef uglyness seemed not be worth it. Known problems: /proc/meminfo and "free" do not display the memory allocated for gb pages in "Total". This is a little confusing for the user. Acked-by: Andrew Hastings <abh@cray.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/hugetlb.c83
1 files changed, 81 insertions, 2 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5e620e25cf08..1a6fe87555b2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -14,6 +14,7 @@
14#include <linux/mempolicy.h> 14#include <linux/mempolicy.h>
15#include <linux/cpuset.h> 15#include <linux/cpuset.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/bootmem.h>
17#include <linux/sysfs.h> 18#include <linux/sysfs.h>
18 19
19#include <asm/page.h> 20#include <asm/page.h>
@@ -489,7 +490,7 @@ static void free_huge_page(struct page *page)
489 INIT_LIST_HEAD(&page->lru); 490 INIT_LIST_HEAD(&page->lru);
490 491
491 spin_lock(&hugetlb_lock); 492 spin_lock(&hugetlb_lock);
492 if (h->surplus_huge_pages_node[nid]) { 493 if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) {
493 update_and_free_page(h, page); 494 update_and_free_page(h, page);
494 h->surplus_huge_pages--; 495 h->surplus_huge_pages--;
495 h->surplus_huge_pages_node[nid]--; 496 h->surplus_huge_pages_node[nid]--;
@@ -550,6 +551,9 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
550{ 551{
551 struct page *page; 552 struct page *page;
552 553
554 if (h->order >= MAX_ORDER)
555 return NULL;
556
553 page = alloc_pages_node(nid, 557 page = alloc_pages_node(nid,
554 htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| 558 htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
555 __GFP_REPEAT|__GFP_NOWARN, 559 __GFP_REPEAT|__GFP_NOWARN,
@@ -616,6 +620,9 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
616 struct page *page; 620 struct page *page;
617 unsigned int nid; 621 unsigned int nid;
618 622
623 if (h->order >= MAX_ORDER)
624 return NULL;
625
619 /* 626 /*
620 * Assume we will successfully allocate the surplus page to 627 * Assume we will successfully allocate the surplus page to
621 * prevent racing processes from causing the surplus to exceed 628 * prevent racing processes from causing the surplus to exceed
@@ -792,6 +799,10 @@ static void return_unused_surplus_pages(struct hstate *h,
792 /* Uncommit the reservation */ 799 /* Uncommit the reservation */
793 h->resv_huge_pages -= unused_resv_pages; 800 h->resv_huge_pages -= unused_resv_pages;
794 801
802 /* Cannot return gigantic pages currently */
803 if (h->order >= MAX_ORDER)
804 return;
805
795 nr_pages = min(unused_resv_pages, h->surplus_huge_pages); 806 nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
796 807
797 while (remaining_iterations-- && nr_pages) { 808 while (remaining_iterations-- && nr_pages) {
@@ -913,6 +924,63 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
913 return page; 924 return page;
914} 925}
915 926
927static __initdata LIST_HEAD(huge_boot_pages);
928
929struct huge_bootmem_page {
930 struct list_head list;
931 struct hstate *hstate;
932};
933
934static int __init alloc_bootmem_huge_page(struct hstate *h)
935{
936 struct huge_bootmem_page *m;
937 int nr_nodes = nodes_weight(node_online_map);
938
939 while (nr_nodes) {
940 void *addr;
941
942 addr = __alloc_bootmem_node_nopanic(
943 NODE_DATA(h->hugetlb_next_nid),
944 huge_page_size(h), huge_page_size(h), 0);
945
946 if (addr) {
947 /*
948 * Use the beginning of the huge page to store the
949 * huge_bootmem_page struct (until gather_bootmem
950 * puts them into the mem_map).
951 */
952 m = addr;
953 if (m)
954 goto found;
955 }
956 hstate_next_node(h);
957 nr_nodes--;
958 }
959 return 0;
960
961found:
962 BUG_ON((unsigned long)virt_to_phys(m) & (huge_page_size(h) - 1));
963 /* Put them into a private list first because mem_map is not up yet */
964 list_add(&m->list, &huge_boot_pages);
965 m->hstate = h;
966 return 1;
967}
968
969/* Put bootmem huge pages into the standard lists after mem_map is up */
970static void __init gather_bootmem_prealloc(void)
971{
972 struct huge_bootmem_page *m;
973
974 list_for_each_entry(m, &huge_boot_pages, list) {
975 struct page *page = virt_to_page(m);
976 struct hstate *h = m->hstate;
977 __ClearPageReserved(page);
978 WARN_ON(page_count(page) != 1);
979 prep_compound_page(page, h->order);
980 prep_new_huge_page(h, page, page_to_nid(page));
981 }
982}
983
916static void __init hugetlb_init_one_hstate(struct hstate *h) 984static void __init hugetlb_init_one_hstate(struct hstate *h)
917{ 985{
918 unsigned long i; 986 unsigned long i;
@@ -923,7 +991,10 @@ static void __init hugetlb_init_one_hstate(struct hstate *h)
923 h->hugetlb_next_nid = first_node(node_online_map); 991 h->hugetlb_next_nid = first_node(node_online_map);
924 992
925 for (i = 0; i < h->max_huge_pages; ++i) { 993 for (i = 0; i < h->max_huge_pages; ++i) {
926 if (!alloc_fresh_huge_page(h)) 994 if (h->order >= MAX_ORDER) {
995 if (!alloc_bootmem_huge_page(h))
996 break;
997 } else if (!alloc_fresh_huge_page(h))
927 break; 998 break;
928 } 999 }
929 h->max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i; 1000 h->max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
@@ -956,6 +1027,9 @@ static void try_to_free_low(struct hstate *h, unsigned long count)
956{ 1027{
957 int i; 1028 int i;
958 1029
1030 if (h->order >= MAX_ORDER)
1031 return;
1032
959 for (i = 0; i < MAX_NUMNODES; ++i) { 1033 for (i = 0; i < MAX_NUMNODES; ++i) {
960 struct page *page, *next; 1034 struct page *page, *next;
961 struct list_head *freel = &h->hugepage_freelists[i]; 1035 struct list_head *freel = &h->hugepage_freelists[i];
@@ -982,6 +1056,9 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
982{ 1056{
983 unsigned long min_count, ret; 1057 unsigned long min_count, ret;
984 1058
1059 if (h->order >= MAX_ORDER)
1060 return h->max_huge_pages;
1061
985 /* 1062 /*
986 * Increase the pool size 1063 * Increase the pool size
987 * First take pages out of surplus state. Then make up the 1064 * First take pages out of surplus state. Then make up the
@@ -1210,6 +1287,8 @@ static int __init hugetlb_init(void)
1210 1287
1211 hugetlb_init_hstates(); 1288 hugetlb_init_hstates();
1212 1289
1290 gather_bootmem_prealloc();
1291
1213 report_hugepages(); 1292 report_hugepages();
1214 1293
1215 hugetlb_sysfs_init(); 1294 hugetlb_sysfs_init();