aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNishanth Aravamudan <nacc@us.ibm.com>2008-07-24 00:27:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-24 13:47:17 -0400
commita3437870160cf2caaac6bdd76c7377a5a4145a8c (patch)
tree6d3c8ddd442e4cd96f1f8bdcf59fcaef72f4edc9
parenta137e1cc6d6e7d315fef03962a2a5a113348b13b (diff)
hugetlb: new sysfs interface
Provide new hugepages user APIs that are more suited to multiple hstates in sysfs. There is a new directory, /sys/kernel/hugepages. Underneath that directory there will be a directory per-supported hugepage size, e.g.: /sys/kernel/hugepages/hugepages-64kB /sys/kernel/hugepages/hugepages-16384kB /sys/kernel/hugepages/hugepages-16777216kB corresponding to 64k, 16m and 16g respectively. Within each hugepages-size directory there are a number of files, corresponding to the tracked counters in the hstate, e.g.: /sys/kernel/hugepages/hugepages-64/nr_hugepages /sys/kernel/hugepages/hugepages-64/nr_overcommit_hugepages /sys/kernel/hugepages/hugepages-64/free_hugepages /sys/kernel/hugepages/hugepages-64/resv_hugepages /sys/kernel/hugepages/hugepages-64/surplus_hugepages Of these files, the first two are read-write and the latter three are read-only. The size of the hugepage being manipulated is trivially deducible from the enclosing directory and is always expressed in kB (to match meminfo). [dave@linux.vnet.ibm.com: fix build] [nacc@us.ibm.com: hugetlb: hang off of /sys/kernel/mm rather than /sys/kernel] [nacc@us.ibm.com: hugetlb: remove CONFIG_SYSFS dependency] Acked-by: Greg Kroah-Hartman <gregkh@suse.de> Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-hugepages15
-rw-r--r--Documentation/vm/hugetlbpage.txt23
-rw-r--r--include/linux/hugetlb.h2
-rw-r--r--mm/hugetlb.c288
4 files changed, 262 insertions, 66 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-hugepages b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
new file mode 100644
index 000000000000..e21c00571cf4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
@@ -0,0 +1,15 @@
1What: /sys/kernel/mm/hugepages/
2Date: June 2008
3Contact: Nishanth Aravamudan <nacc@us.ibm.com>, hugetlb maintainers
4Description:
5 /sys/kernel/mm/hugepages/ contains a number of subdirectories
6 of the form hugepages-<size>kB, where <size> is the page size
7 of the hugepages supported by the kernel/CPU combination.
8
9 Under these directories are a number of files:
10 nr_hugepages
11 nr_overcommit_hugepages
12 free_hugepages
13 surplus_hugepages
14 resv_hugepages
15 See Documentation/vm/hugetlbpage.txt for details.
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index 3102b81bef88..8a5b5763f0fe 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -95,6 +95,29 @@ this condition holds, however, no more surplus huge pages will be
95allowed on the system until one of the two sysctls are increased 95allowed on the system until one of the two sysctls are increased
96sufficiently, or the surplus huge pages go out of use and are freed. 96sufficiently, or the surplus huge pages go out of use and are freed.
97 97
98With support for multiple hugepage pools at run-time available, much of
99the hugepage userspace interface has been duplicated in sysfs. The above
100information applies to the default hugepage size (which will be
101controlled by the proc interfaces for backwards compatibility). The root
102hugepage control directory is
103
104 /sys/kernel/mm/hugepages
105
106For each hugepage size supported by the running kernel, a subdirectory
107will exist, of the form
108
109 hugepages-${size}kB
110
111Inside each of these directories, the same set of files will exist:
112
113 nr_hugepages
114 nr_overcommit_hugepages
115 free_hugepages
116 resv_hugepages
117 surplus_hugepages
118
119which function as described above for the default hugepage-sized case.
120
98If the user applications are going to request hugepages using mmap system 121If the user applications are going to request hugepages using mmap system
99call, then it is required that system administrator mount a file system of 122call, then it is required that system administrator mount a file system of
100type hugetlbfs: 123type hugetlbfs:
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ba9263e631b9..58c0de32e7f0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -164,6 +164,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
164 164
165#ifdef CONFIG_HUGETLB_PAGE 165#ifdef CONFIG_HUGETLB_PAGE
166 166
167#define HSTATE_NAME_LEN 32
167/* Defines one hugetlb page size */ 168/* Defines one hugetlb page size */
168struct hstate { 169struct hstate {
169 int hugetlb_next_nid; 170 int hugetlb_next_nid;
@@ -179,6 +180,7 @@ struct hstate {
179 unsigned int nr_huge_pages_node[MAX_NUMNODES]; 180 unsigned int nr_huge_pages_node[MAX_NUMNODES];
180 unsigned int free_huge_pages_node[MAX_NUMNODES]; 181 unsigned int free_huge_pages_node[MAX_NUMNODES];
181 unsigned int surplus_huge_pages_node[MAX_NUMNODES]; 182 unsigned int surplus_huge_pages_node[MAX_NUMNODES];
183 char name[HSTATE_NAME_LEN];
182}; 184};
183 185
184void __init hugetlb_add_hstate(unsigned order); 186void __init hugetlb_add_hstate(unsigned order);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4cf7a90e9140..bb49ce5d0067 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -14,6 +14,7 @@
14#include <linux/mempolicy.h> 14#include <linux/mempolicy.h>
15#include <linux/cpuset.h> 15#include <linux/cpuset.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/sysfs.h>
17 18
18#include <asm/page.h> 19#include <asm/page.h>
19#include <asm/pgtable.h> 20#include <asm/pgtable.h>
@@ -942,72 +943,6 @@ static void __init report_hugepages(void)
942 } 943 }
943} 944}
944 945
945static int __init hugetlb_init(void)
946{
947 BUILD_BUG_ON(HPAGE_SHIFT == 0);
948
949 if (!size_to_hstate(HPAGE_SIZE)) {
950 hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
951 parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
952 }
953 default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
954
955 hugetlb_init_hstates();
956
957 report_hugepages();
958
959 return 0;
960}
961module_init(hugetlb_init);
962
963/* Should be called on processing a hugepagesz=... option */
964void __init hugetlb_add_hstate(unsigned order)
965{
966 struct hstate *h;
967 if (size_to_hstate(PAGE_SIZE << order)) {
968 printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
969 return;
970 }
971 BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
972 BUG_ON(order == 0);
973 h = &hstates[max_hstate++];
974 h->order = order;
975 h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
976 hugetlb_init_one_hstate(h);
977 parsed_hstate = h;
978}
979
980static int __init hugetlb_setup(char *s)
981{
982 unsigned long *mhp;
983
984 /*
985 * !max_hstate means we haven't parsed a hugepagesz= parameter yet,
986 * so this hugepages= parameter goes to the "default hstate".
987 */
988 if (!max_hstate)
989 mhp = &default_hstate_max_huge_pages;
990 else
991 mhp = &parsed_hstate->max_huge_pages;
992
993 if (sscanf(s, "%lu", mhp) <= 0)
994 *mhp = 0;
995
996 return 1;
997}
998__setup("hugepages=", hugetlb_setup);
999
1000static unsigned int cpuset_mems_nr(unsigned int *array)
1001{
1002 int node;
1003 unsigned int nr = 0;
1004
1005 for_each_node_mask(node, cpuset_current_mems_allowed)
1006 nr += array[node];
1007
1008 return nr;
1009}
1010
1011#ifdef CONFIG_SYSCTL 946#ifdef CONFIG_SYSCTL
1012#ifdef CONFIG_HIGHMEM 947#ifdef CONFIG_HIGHMEM
1013static void try_to_free_low(struct hstate *h, unsigned long count) 948static void try_to_free_low(struct hstate *h, unsigned long count)
@@ -1105,6 +1040,227 @@ out:
1105 return ret; 1040 return ret;
1106} 1041}
1107 1042
1043#define HSTATE_ATTR_RO(_name) \
1044 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
1045
1046#define HSTATE_ATTR(_name) \
1047 static struct kobj_attribute _name##_attr = \
1048 __ATTR(_name, 0644, _name##_show, _name##_store)
1049
1050static struct kobject *hugepages_kobj;
1051static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
1052
1053static struct hstate *kobj_to_hstate(struct kobject *kobj)
1054{
1055 int i;
1056 for (i = 0; i < HUGE_MAX_HSTATE; i++)
1057 if (hstate_kobjs[i] == kobj)
1058 return &hstates[i];
1059 BUG();
1060 return NULL;
1061}
1062
1063static ssize_t nr_hugepages_show(struct kobject *kobj,
1064 struct kobj_attribute *attr, char *buf)
1065{
1066 struct hstate *h = kobj_to_hstate(kobj);
1067 return sprintf(buf, "%lu\n", h->nr_huge_pages);
1068}
1069static ssize_t nr_hugepages_store(struct kobject *kobj,
1070 struct kobj_attribute *attr, const char *buf, size_t count)
1071{
1072 int err;
1073 unsigned long input;
1074 struct hstate *h = kobj_to_hstate(kobj);
1075
1076 err = strict_strtoul(buf, 10, &input);
1077 if (err)
1078 return 0;
1079
1080 h->max_huge_pages = set_max_huge_pages(h, input);
1081
1082 return count;
1083}
1084HSTATE_ATTR(nr_hugepages);
1085
1086static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
1087 struct kobj_attribute *attr, char *buf)
1088{
1089 struct hstate *h = kobj_to_hstate(kobj);
1090 return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
1091}
1092static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
1093 struct kobj_attribute *attr, const char *buf, size_t count)
1094{
1095 int err;
1096 unsigned long input;
1097 struct hstate *h = kobj_to_hstate(kobj);
1098
1099 err = strict_strtoul(buf, 10, &input);
1100 if (err)
1101 return 0;
1102
1103 spin_lock(&hugetlb_lock);
1104 h->nr_overcommit_huge_pages = input;
1105 spin_unlock(&hugetlb_lock);
1106
1107 return count;
1108}
1109HSTATE_ATTR(nr_overcommit_hugepages);
1110
1111static ssize_t free_hugepages_show(struct kobject *kobj,
1112 struct kobj_attribute *attr, char *buf)
1113{
1114 struct hstate *h = kobj_to_hstate(kobj);
1115 return sprintf(buf, "%lu\n", h->free_huge_pages);
1116}
1117HSTATE_ATTR_RO(free_hugepages);
1118
1119static ssize_t resv_hugepages_show(struct kobject *kobj,
1120 struct kobj_attribute *attr, char *buf)
1121{
1122 struct hstate *h = kobj_to_hstate(kobj);
1123 return sprintf(buf, "%lu\n", h->resv_huge_pages);
1124}
1125HSTATE_ATTR_RO(resv_hugepages);
1126
1127static ssize_t surplus_hugepages_show(struct kobject *kobj,
1128 struct kobj_attribute *attr, char *buf)
1129{
1130 struct hstate *h = kobj_to_hstate(kobj);
1131 return sprintf(buf, "%lu\n", h->surplus_huge_pages);
1132}
1133HSTATE_ATTR_RO(surplus_hugepages);
1134
1135static struct attribute *hstate_attrs[] = {
1136 &nr_hugepages_attr.attr,
1137 &nr_overcommit_hugepages_attr.attr,
1138 &free_hugepages_attr.attr,
1139 &resv_hugepages_attr.attr,
1140 &surplus_hugepages_attr.attr,
1141 NULL,
1142};
1143
1144static struct attribute_group hstate_attr_group = {
1145 .attrs = hstate_attrs,
1146};
1147
1148static int __init hugetlb_sysfs_add_hstate(struct hstate *h)
1149{
1150 int retval;
1151
1152 hstate_kobjs[h - hstates] = kobject_create_and_add(h->name,
1153 hugepages_kobj);
1154 if (!hstate_kobjs[h - hstates])
1155 return -ENOMEM;
1156
1157 retval = sysfs_create_group(hstate_kobjs[h - hstates],
1158 &hstate_attr_group);
1159 if (retval)
1160 kobject_put(hstate_kobjs[h - hstates]);
1161
1162 return retval;
1163}
1164
1165static void __init hugetlb_sysfs_init(void)
1166{
1167 struct hstate *h;
1168 int err;
1169
1170 hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj);
1171 if (!hugepages_kobj)
1172 return;
1173
1174 for_each_hstate(h) {
1175 err = hugetlb_sysfs_add_hstate(h);
1176 if (err)
1177 printk(KERN_ERR "Hugetlb: Unable to add hstate %s",
1178 h->name);
1179 }
1180}
1181
1182static void __exit hugetlb_exit(void)
1183{
1184 struct hstate *h;
1185
1186 for_each_hstate(h) {
1187 kobject_put(hstate_kobjs[h - hstates]);
1188 }
1189
1190 kobject_put(hugepages_kobj);
1191}
1192module_exit(hugetlb_exit);
1193
1194static int __init hugetlb_init(void)
1195{
1196 BUILD_BUG_ON(HPAGE_SHIFT == 0);
1197
1198 if (!size_to_hstate(HPAGE_SIZE)) {
1199 hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
1200 parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
1201 }
1202 default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
1203
1204 hugetlb_init_hstates();
1205
1206 report_hugepages();
1207
1208 hugetlb_sysfs_init();
1209
1210 return 0;
1211}
1212module_init(hugetlb_init);
1213
1214/* Should be called on processing a hugepagesz=... option */
1215void __init hugetlb_add_hstate(unsigned order)
1216{
1217 struct hstate *h;
1218 if (size_to_hstate(PAGE_SIZE << order)) {
1219 printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
1220 return;
1221 }
1222 BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
1223 BUG_ON(order == 0);
1224 h = &hstates[max_hstate++];
1225 h->order = order;
1226 h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
1227 snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
1228 huge_page_size(h)/1024);
1229 hugetlb_init_one_hstate(h);
1230 parsed_hstate = h;
1231}
1232
1233static int __init hugetlb_setup(char *s)
1234{
1235 unsigned long *mhp;
1236
1237 /*
1238 * !max_hstate means we haven't parsed a hugepagesz= parameter yet,
1239 * so this hugepages= parameter goes to the "default hstate".
1240 */
1241 if (!max_hstate)
1242 mhp = &default_hstate_max_huge_pages;
1243 else
1244 mhp = &parsed_hstate->max_huge_pages;
1245
1246 if (sscanf(s, "%lu", mhp) <= 0)
1247 *mhp = 0;
1248
1249 return 1;
1250}
1251__setup("hugepages=", hugetlb_setup);
1252
1253static unsigned int cpuset_mems_nr(unsigned int *array)
1254{
1255 int node;
1256 unsigned int nr = 0;
1257
1258 for_each_node_mask(node, cpuset_current_mems_allowed)
1259 nr += array[node];
1260
1261 return nr;
1262}
1263
1108int hugetlb_sysctl_handler(struct ctl_table *table, int write, 1264int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1109 struct file *file, void __user *buffer, 1265 struct file *file, void __user *buffer,
1110 size_t *length, loff_t *ppos) 1266 size_t *length, loff_t *ppos)