diff options
author | Andi Kleen <ak@suse.de> | 2008-07-24 00:27:42 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-24 13:47:17 -0400 |
commit | e5ff215941d59f8ae6bf58f6428dc5c26745a612 (patch) | |
tree | d8a3f730124d6608c6c880515625ca2d4e4ed044 /mm | |
parent | a5516438959d90b071ff0a484ce4f3f523dc3152 (diff) |
hugetlb: multiple hstates for multiple page sizes
Add basic support for more than one hstate in hugetlbfs. This is the key
to supporting multiple hugetlbfs page sizes at once.
- Rather than a single hstate, we now have an array, with an iterator
- default_hstate continues to be the struct hstate which we use by default
- Add functions for architectures to register new hstates
[akpm@linux-foundation.org: coding-style fixes]
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 148 |
1 files changed, 121 insertions, 27 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0d8153e25f09..82378d44a0c5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -22,12 +22,19 @@ | |||
22 | #include "internal.h" | 22 | #include "internal.h" |
23 | 23 | ||
24 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | 24 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; |
25 | unsigned long max_huge_pages; | ||
26 | unsigned long sysctl_overcommit_huge_pages; | ||
27 | static gfp_t htlb_alloc_mask = GFP_HIGHUSER; | 25 | static gfp_t htlb_alloc_mask = GFP_HIGHUSER; |
28 | unsigned long hugepages_treat_as_movable; | 26 | unsigned long hugepages_treat_as_movable; |
29 | 27 | ||
30 | struct hstate default_hstate; | 28 | static int max_hstate; |
29 | unsigned int default_hstate_idx; | ||
30 | struct hstate hstates[HUGE_MAX_HSTATE]; | ||
31 | |||
32 | /* for command line parsing */ | ||
33 | static struct hstate * __initdata parsed_hstate; | ||
34 | static unsigned long __initdata default_hstate_max_huge_pages; | ||
35 | |||
36 | #define for_each_hstate(h) \ | ||
37 | for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++) | ||
31 | 38 | ||
32 | /* | 39 | /* |
33 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages | 40 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages |
@@ -454,13 +461,24 @@ static void update_and_free_page(struct hstate *h, struct page *page) | |||
454 | __free_pages(page, huge_page_order(h)); | 461 | __free_pages(page, huge_page_order(h)); |
455 | } | 462 | } |
456 | 463 | ||
464 | struct hstate *size_to_hstate(unsigned long size) | ||
465 | { | ||
466 | struct hstate *h; | ||
467 | |||
468 | for_each_hstate(h) { | ||
469 | if (huge_page_size(h) == size) | ||
470 | return h; | ||
471 | } | ||
472 | return NULL; | ||
473 | } | ||
474 | |||
457 | static void free_huge_page(struct page *page) | 475 | static void free_huge_page(struct page *page) |
458 | { | 476 | { |
459 | /* | 477 | /* |
460 | * Can't pass hstate in here because it is called from the | 478 | * Can't pass hstate in here because it is called from the |
461 | * compound page destructor. | 479 | * compound page destructor. |
462 | */ | 480 | */ |
463 | struct hstate *h = &default_hstate; | 481 | struct hstate *h = page_hstate(page); |
464 | int nid = page_to_nid(page); | 482 | int nid = page_to_nid(page); |
465 | struct address_space *mapping; | 483 | struct address_space *mapping; |
466 | 484 | ||
@@ -887,39 +905,94 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
887 | return page; | 905 | return page; |
888 | } | 906 | } |
889 | 907 | ||
890 | static int __init hugetlb_init(void) | 908 | static void __init hugetlb_init_one_hstate(struct hstate *h) |
891 | { | 909 | { |
892 | unsigned long i; | 910 | unsigned long i; |
893 | struct hstate *h = &default_hstate; | ||
894 | |||
895 | if (HPAGE_SHIFT == 0) | ||
896 | return 0; | ||
897 | |||
898 | if (!h->order) { | ||
899 | h->order = HPAGE_SHIFT - PAGE_SHIFT; | ||
900 | h->mask = HPAGE_MASK; | ||
901 | } | ||
902 | 911 | ||
903 | for (i = 0; i < MAX_NUMNODES; ++i) | 912 | for (i = 0; i < MAX_NUMNODES; ++i) |
904 | INIT_LIST_HEAD(&h->hugepage_freelists[i]); | 913 | INIT_LIST_HEAD(&h->hugepage_freelists[i]); |
905 | 914 | ||
906 | h->hugetlb_next_nid = first_node(node_online_map); | 915 | h->hugetlb_next_nid = first_node(node_online_map); |
907 | 916 | ||
908 | for (i = 0; i < max_huge_pages; ++i) { | 917 | for (i = 0; i < h->max_huge_pages; ++i) { |
909 | if (!alloc_fresh_huge_page(h)) | 918 | if (!alloc_fresh_huge_page(h)) |
910 | break; | 919 | break; |
911 | } | 920 | } |
912 | max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i; | 921 | h->max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i; |
913 | printk(KERN_INFO "Total HugeTLB memory allocated, %ld\n", | 922 | } |
914 | h->free_huge_pages); | 923 | |
924 | static void __init hugetlb_init_hstates(void) | ||
925 | { | ||
926 | struct hstate *h; | ||
927 | |||
928 | for_each_hstate(h) { | ||
929 | hugetlb_init_one_hstate(h); | ||
930 | } | ||
931 | } | ||
932 | |||
933 | static void __init report_hugepages(void) | ||
934 | { | ||
935 | struct hstate *h; | ||
936 | |||
937 | for_each_hstate(h) { | ||
938 | printk(KERN_INFO "Total HugeTLB memory allocated, " | ||
939 | "%ld %dMB pages\n", | ||
940 | h->free_huge_pages, | ||
941 | 1 << (h->order + PAGE_SHIFT - 20)); | ||
942 | } | ||
943 | } | ||
944 | |||
945 | static int __init hugetlb_init(void) | ||
946 | { | ||
947 | BUILD_BUG_ON(HPAGE_SHIFT == 0); | ||
948 | |||
949 | if (!size_to_hstate(HPAGE_SIZE)) { | ||
950 | hugetlb_add_hstate(HUGETLB_PAGE_ORDER); | ||
951 | parsed_hstate->max_huge_pages = default_hstate_max_huge_pages; | ||
952 | } | ||
953 | default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates; | ||
954 | |||
955 | hugetlb_init_hstates(); | ||
956 | |||
957 | report_hugepages(); | ||
958 | |||
915 | return 0; | 959 | return 0; |
916 | } | 960 | } |
917 | module_init(hugetlb_init); | 961 | module_init(hugetlb_init); |
918 | 962 | ||
963 | /* Should be called on processing a hugepagesz=... option */ | ||
964 | void __init hugetlb_add_hstate(unsigned order) | ||
965 | { | ||
966 | struct hstate *h; | ||
967 | if (size_to_hstate(PAGE_SIZE << order)) { | ||
968 | printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n"); | ||
969 | return; | ||
970 | } | ||
971 | BUG_ON(max_hstate >= HUGE_MAX_HSTATE); | ||
972 | BUG_ON(order == 0); | ||
973 | h = &hstates[max_hstate++]; | ||
974 | h->order = order; | ||
975 | h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1); | ||
976 | hugetlb_init_one_hstate(h); | ||
977 | parsed_hstate = h; | ||
978 | } | ||
979 | |||
919 | static int __init hugetlb_setup(char *s) | 980 | static int __init hugetlb_setup(char *s) |
920 | { | 981 | { |
921 | if (sscanf(s, "%lu", &max_huge_pages) <= 0) | 982 | unsigned long *mhp; |
922 | max_huge_pages = 0; | 983 | |
984 | /* | ||
985 | * !max_hstate means we haven't parsed a hugepagesz= parameter yet, | ||
986 | * so this hugepages= parameter goes to the "default hstate". | ||
987 | */ | ||
988 | if (!max_hstate) | ||
989 | mhp = &default_hstate_max_huge_pages; | ||
990 | else | ||
991 | mhp = &parsed_hstate->max_huge_pages; | ||
992 | |||
993 | if (sscanf(s, "%lu", mhp) <= 0) | ||
994 | *mhp = 0; | ||
995 | |||
923 | return 1; | 996 | return 1; |
924 | } | 997 | } |
925 | __setup("hugepages=", hugetlb_setup); | 998 | __setup("hugepages=", hugetlb_setup); |
@@ -950,7 +1023,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count) | |||
950 | if (PageHighMem(page)) | 1023 | if (PageHighMem(page)) |
951 | continue; | 1024 | continue; |
952 | list_del(&page->lru); | 1025 | list_del(&page->lru); |
953 | update_and_free_page(page); | 1026 | update_and_free_page(h, page); |
954 | h->free_huge_pages--; | 1027 | h->free_huge_pages--; |
955 | h->free_huge_pages_node[page_to_nid(page)]--; | 1028 | h->free_huge_pages_node[page_to_nid(page)]--; |
956 | } | 1029 | } |
@@ -963,10 +1036,9 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count) | |||
963 | #endif | 1036 | #endif |
964 | 1037 | ||
965 | #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages) | 1038 | #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages) |
966 | static unsigned long set_max_huge_pages(unsigned long count) | 1039 | static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count) |
967 | { | 1040 | { |
968 | unsigned long min_count, ret; | 1041 | unsigned long min_count, ret; |
969 | struct hstate *h = &default_hstate; | ||
970 | 1042 | ||
971 | /* | 1043 | /* |
972 | * Increase the pool size | 1044 | * Increase the pool size |
@@ -1037,8 +1109,19 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, | |||
1037 | struct file *file, void __user *buffer, | 1109 | struct file *file, void __user *buffer, |
1038 | size_t *length, loff_t *ppos) | 1110 | size_t *length, loff_t *ppos) |
1039 | { | 1111 | { |
1112 | struct hstate *h = &default_hstate; | ||
1113 | unsigned long tmp; | ||
1114 | |||
1115 | if (!write) | ||
1116 | tmp = h->max_huge_pages; | ||
1117 | |||
1118 | table->data = &tmp; | ||
1119 | table->maxlen = sizeof(unsigned long); | ||
1040 | proc_doulongvec_minmax(table, write, file, buffer, length, ppos); | 1120 | proc_doulongvec_minmax(table, write, file, buffer, length, ppos); |
1041 | max_huge_pages = set_max_huge_pages(max_huge_pages); | 1121 | |
1122 | if (write) | ||
1123 | h->max_huge_pages = set_max_huge_pages(h, tmp); | ||
1124 | |||
1042 | return 0; | 1125 | return 0; |
1043 | } | 1126 | } |
1044 | 1127 | ||
@@ -1059,10 +1142,21 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, | |||
1059 | size_t *length, loff_t *ppos) | 1142 | size_t *length, loff_t *ppos) |
1060 | { | 1143 | { |
1061 | struct hstate *h = &default_hstate; | 1144 | struct hstate *h = &default_hstate; |
1145 | unsigned long tmp; | ||
1146 | |||
1147 | if (!write) | ||
1148 | tmp = h->nr_overcommit_huge_pages; | ||
1149 | |||
1150 | table->data = &tmp; | ||
1151 | table->maxlen = sizeof(unsigned long); | ||
1062 | proc_doulongvec_minmax(table, write, file, buffer, length, ppos); | 1152 | proc_doulongvec_minmax(table, write, file, buffer, length, ppos); |
1063 | spin_lock(&hugetlb_lock); | 1153 | |
1064 | h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages; | 1154 | if (write) { |
1065 | spin_unlock(&hugetlb_lock); | 1155 | spin_lock(&hugetlb_lock); |
1156 | h->nr_overcommit_huge_pages = tmp; | ||
1157 | spin_unlock(&hugetlb_lock); | ||
1158 | } | ||
1159 | |||
1066 | return 0; | 1160 | return 0; |
1067 | } | 1161 | } |
1068 | 1162 | ||