diff options
author | Nishanth Aravamudan <nacc@us.ibm.com> | 2008-07-24 00:27:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-24 13:47:17 -0400 |
commit | a3437870160cf2caaac6bdd76c7377a5a4145a8c (patch) | |
tree | 6d3c8ddd442e4cd96f1f8bdcf59fcaef72f4edc9 | |
parent | a137e1cc6d6e7d315fef03962a2a5a113348b13b (diff) |
hugetlb: new sysfs interface
Provide new hugepages user APIs that are more suited to multiple hstates
in sysfs. There is a new directory, /sys/kernel/hugepages. Underneath
that directory there will be a directory per-supported hugepage size,
e.g.:
/sys/kernel/hugepages/hugepages-64kB
/sys/kernel/hugepages/hugepages-16384kB
/sys/kernel/hugepages/hugepages-16777216kB
corresponding to 64k, 16m and 16g respectively. Within each
hugepages-size directory there are a number of files, corresponding to the
tracked counters in the hstate, e.g.:
/sys/kernel/hugepages/hugepages-64/nr_hugepages
/sys/kernel/hugepages/hugepages-64/nr_overcommit_hugepages
/sys/kernel/hugepages/hugepages-64/free_hugepages
/sys/kernel/hugepages/hugepages-64/resv_hugepages
/sys/kernel/hugepages/hugepages-64/surplus_hugepages
Of these files, the first two are read-write and the latter three are
read-only. The size of the hugepage being manipulated is trivially
deducible from the enclosing directory and is always expressed in kB (to
match meminfo).
[dave@linux.vnet.ibm.com: fix build]
[nacc@us.ibm.com: hugetlb: hang off of /sys/kernel/mm rather than /sys/kernel]
[nacc@us.ibm.com: hugetlb: remove CONFIG_SYSFS dependency]
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/ABI/testing/sysfs-kernel-mm-hugepages | 15 | ||||
-rw-r--r-- | Documentation/vm/hugetlbpage.txt | 23 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 288 |
4 files changed, 262 insertions, 66 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-hugepages b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages new file mode 100644 index 000000000000..e21c00571cf4 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages | |||
@@ -0,0 +1,15 @@ | |||
1 | What: /sys/kernel/mm/hugepages/ | ||
2 | Date: June 2008 | ||
3 | Contact: Nishanth Aravamudan <nacc@us.ibm.com>, hugetlb maintainers | ||
4 | Description: | ||
5 | /sys/kernel/mm/hugepages/ contains a number of subdirectories | ||
6 | of the form hugepages-<size>kB, where <size> is the page size | ||
7 | of the hugepages supported by the kernel/CPU combination. | ||
8 | |||
9 | Under these directories are a number of files: | ||
10 | nr_hugepages | ||
11 | nr_overcommit_hugepages | ||
12 | free_hugepages | ||
13 | surplus_hugepages | ||
14 | resv_hugepages | ||
15 | See Documentation/vm/hugetlbpage.txt for details. | ||
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt index 3102b81bef88..8a5b5763f0fe 100644 --- a/Documentation/vm/hugetlbpage.txt +++ b/Documentation/vm/hugetlbpage.txt | |||
@@ -95,6 +95,29 @@ this condition holds, however, no more surplus huge pages will be | |||
95 | allowed on the system until one of the two sysctls are increased | 95 | allowed on the system until one of the two sysctls are increased |
96 | sufficiently, or the surplus huge pages go out of use and are freed. | 96 | sufficiently, or the surplus huge pages go out of use and are freed. |
97 | 97 | ||
98 | With support for multiple hugepage pools at run-time available, much of | ||
99 | the hugepage userspace interface has been duplicated in sysfs. The above | ||
100 | information applies to the default hugepage size (which will be | ||
101 | controlled by the proc interfaces for backwards compatibility). The root | ||
102 | hugepage control directory is | ||
103 | |||
104 | /sys/kernel/mm/hugepages | ||
105 | |||
106 | For each hugepage size supported by the running kernel, a subdirectory | ||
107 | will exist, of the form | ||
108 | |||
109 | hugepages-${size}kB | ||
110 | |||
111 | Inside each of these directories, the same set of files will exist: | ||
112 | |||
113 | nr_hugepages | ||
114 | nr_overcommit_hugepages | ||
115 | free_hugepages | ||
116 | resv_hugepages | ||
117 | surplus_hugepages | ||
118 | |||
119 | which function as described above for the default hugepage-sized case. | ||
120 | |||
98 | If the user applications are going to request hugepages using mmap system | 121 | If the user applications are going to request hugepages using mmap system |
99 | call, then it is required that system administrator mount a file system of | 122 | call, then it is required that system administrator mount a file system of |
100 | type hugetlbfs: | 123 | type hugetlbfs: |
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ba9263e631b9..58c0de32e7f0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -164,6 +164,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
164 | 164 | ||
165 | #ifdef CONFIG_HUGETLB_PAGE | 165 | #ifdef CONFIG_HUGETLB_PAGE |
166 | 166 | ||
167 | #define HSTATE_NAME_LEN 32 | ||
167 | /* Defines one hugetlb page size */ | 168 | /* Defines one hugetlb page size */ |
168 | struct hstate { | 169 | struct hstate { |
169 | int hugetlb_next_nid; | 170 | int hugetlb_next_nid; |
@@ -179,6 +180,7 @@ struct hstate { | |||
179 | unsigned int nr_huge_pages_node[MAX_NUMNODES]; | 180 | unsigned int nr_huge_pages_node[MAX_NUMNODES]; |
180 | unsigned int free_huge_pages_node[MAX_NUMNODES]; | 181 | unsigned int free_huge_pages_node[MAX_NUMNODES]; |
181 | unsigned int surplus_huge_pages_node[MAX_NUMNODES]; | 182 | unsigned int surplus_huge_pages_node[MAX_NUMNODES]; |
183 | char name[HSTATE_NAME_LEN]; | ||
182 | }; | 184 | }; |
183 | 185 | ||
184 | void __init hugetlb_add_hstate(unsigned order); | 186 | void __init hugetlb_add_hstate(unsigned order); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4cf7a90e9140..bb49ce5d0067 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/mempolicy.h> | 14 | #include <linux/mempolicy.h> |
15 | #include <linux/cpuset.h> | 15 | #include <linux/cpuset.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | #include <linux/sysfs.h> | ||
17 | 18 | ||
18 | #include <asm/page.h> | 19 | #include <asm/page.h> |
19 | #include <asm/pgtable.h> | 20 | #include <asm/pgtable.h> |
@@ -942,72 +943,6 @@ static void __init report_hugepages(void) | |||
942 | } | 943 | } |
943 | } | 944 | } |
944 | 945 | ||
945 | static int __init hugetlb_init(void) | ||
946 | { | ||
947 | BUILD_BUG_ON(HPAGE_SHIFT == 0); | ||
948 | |||
949 | if (!size_to_hstate(HPAGE_SIZE)) { | ||
950 | hugetlb_add_hstate(HUGETLB_PAGE_ORDER); | ||
951 | parsed_hstate->max_huge_pages = default_hstate_max_huge_pages; | ||
952 | } | ||
953 | default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates; | ||
954 | |||
955 | hugetlb_init_hstates(); | ||
956 | |||
957 | report_hugepages(); | ||
958 | |||
959 | return 0; | ||
960 | } | ||
961 | module_init(hugetlb_init); | ||
962 | |||
963 | /* Should be called on processing a hugepagesz=... option */ | ||
964 | void __init hugetlb_add_hstate(unsigned order) | ||
965 | { | ||
966 | struct hstate *h; | ||
967 | if (size_to_hstate(PAGE_SIZE << order)) { | ||
968 | printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n"); | ||
969 | return; | ||
970 | } | ||
971 | BUG_ON(max_hstate >= HUGE_MAX_HSTATE); | ||
972 | BUG_ON(order == 0); | ||
973 | h = &hstates[max_hstate++]; | ||
974 | h->order = order; | ||
975 | h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1); | ||
976 | hugetlb_init_one_hstate(h); | ||
977 | parsed_hstate = h; | ||
978 | } | ||
979 | |||
980 | static int __init hugetlb_setup(char *s) | ||
981 | { | ||
982 | unsigned long *mhp; | ||
983 | |||
984 | /* | ||
985 | * !max_hstate means we haven't parsed a hugepagesz= parameter yet, | ||
986 | * so this hugepages= parameter goes to the "default hstate". | ||
987 | */ | ||
988 | if (!max_hstate) | ||
989 | mhp = &default_hstate_max_huge_pages; | ||
990 | else | ||
991 | mhp = &parsed_hstate->max_huge_pages; | ||
992 | |||
993 | if (sscanf(s, "%lu", mhp) <= 0) | ||
994 | *mhp = 0; | ||
995 | |||
996 | return 1; | ||
997 | } | ||
998 | __setup("hugepages=", hugetlb_setup); | ||
999 | |||
1000 | static unsigned int cpuset_mems_nr(unsigned int *array) | ||
1001 | { | ||
1002 | int node; | ||
1003 | unsigned int nr = 0; | ||
1004 | |||
1005 | for_each_node_mask(node, cpuset_current_mems_allowed) | ||
1006 | nr += array[node]; | ||
1007 | |||
1008 | return nr; | ||
1009 | } | ||
1010 | |||
1011 | #ifdef CONFIG_SYSCTL | 946 | #ifdef CONFIG_SYSCTL |
1012 | #ifdef CONFIG_HIGHMEM | 947 | #ifdef CONFIG_HIGHMEM |
1013 | static void try_to_free_low(struct hstate *h, unsigned long count) | 948 | static void try_to_free_low(struct hstate *h, unsigned long count) |
@@ -1105,6 +1040,227 @@ out: | |||
1105 | return ret; | 1040 | return ret; |
1106 | } | 1041 | } |
1107 | 1042 | ||
1043 | #define HSTATE_ATTR_RO(_name) \ | ||
1044 | static struct kobj_attribute _name##_attr = __ATTR_RO(_name) | ||
1045 | |||
1046 | #define HSTATE_ATTR(_name) \ | ||
1047 | static struct kobj_attribute _name##_attr = \ | ||
1048 | __ATTR(_name, 0644, _name##_show, _name##_store) | ||
1049 | |||
1050 | static struct kobject *hugepages_kobj; | ||
1051 | static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; | ||
1052 | |||
1053 | static struct hstate *kobj_to_hstate(struct kobject *kobj) | ||
1054 | { | ||
1055 | int i; | ||
1056 | for (i = 0; i < HUGE_MAX_HSTATE; i++) | ||
1057 | if (hstate_kobjs[i] == kobj) | ||
1058 | return &hstates[i]; | ||
1059 | BUG(); | ||
1060 | return NULL; | ||
1061 | } | ||
1062 | |||
1063 | static ssize_t nr_hugepages_show(struct kobject *kobj, | ||
1064 | struct kobj_attribute *attr, char *buf) | ||
1065 | { | ||
1066 | struct hstate *h = kobj_to_hstate(kobj); | ||
1067 | return sprintf(buf, "%lu\n", h->nr_huge_pages); | ||
1068 | } | ||
1069 | static ssize_t nr_hugepages_store(struct kobject *kobj, | ||
1070 | struct kobj_attribute *attr, const char *buf, size_t count) | ||
1071 | { | ||
1072 | int err; | ||
1073 | unsigned long input; | ||
1074 | struct hstate *h = kobj_to_hstate(kobj); | ||
1075 | |||
1076 | err = strict_strtoul(buf, 10, &input); | ||
1077 | if (err) | ||
1078 | return 0; | ||
1079 | |||
1080 | h->max_huge_pages = set_max_huge_pages(h, input); | ||
1081 | |||
1082 | return count; | ||
1083 | } | ||
1084 | HSTATE_ATTR(nr_hugepages); | ||
1085 | |||
1086 | static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, | ||
1087 | struct kobj_attribute *attr, char *buf) | ||
1088 | { | ||
1089 | struct hstate *h = kobj_to_hstate(kobj); | ||
1090 | return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages); | ||
1091 | } | ||
1092 | static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, | ||
1093 | struct kobj_attribute *attr, const char *buf, size_t count) | ||
1094 | { | ||
1095 | int err; | ||
1096 | unsigned long input; | ||
1097 | struct hstate *h = kobj_to_hstate(kobj); | ||
1098 | |||
1099 | err = strict_strtoul(buf, 10, &input); | ||
1100 | if (err) | ||
1101 | return 0; | ||
1102 | |||
1103 | spin_lock(&hugetlb_lock); | ||
1104 | h->nr_overcommit_huge_pages = input; | ||
1105 | spin_unlock(&hugetlb_lock); | ||
1106 | |||
1107 | return count; | ||
1108 | } | ||
1109 | HSTATE_ATTR(nr_overcommit_hugepages); | ||
1110 | |||
1111 | static ssize_t free_hugepages_show(struct kobject *kobj, | ||
1112 | struct kobj_attribute *attr, char *buf) | ||
1113 | { | ||
1114 | struct hstate *h = kobj_to_hstate(kobj); | ||
1115 | return sprintf(buf, "%lu\n", h->free_huge_pages); | ||
1116 | } | ||
1117 | HSTATE_ATTR_RO(free_hugepages); | ||
1118 | |||
1119 | static ssize_t resv_hugepages_show(struct kobject *kobj, | ||
1120 | struct kobj_attribute *attr, char *buf) | ||
1121 | { | ||
1122 | struct hstate *h = kobj_to_hstate(kobj); | ||
1123 | return sprintf(buf, "%lu\n", h->resv_huge_pages); | ||
1124 | } | ||
1125 | HSTATE_ATTR_RO(resv_hugepages); | ||
1126 | |||
1127 | static ssize_t surplus_hugepages_show(struct kobject *kobj, | ||
1128 | struct kobj_attribute *attr, char *buf) | ||
1129 | { | ||
1130 | struct hstate *h = kobj_to_hstate(kobj); | ||
1131 | return sprintf(buf, "%lu\n", h->surplus_huge_pages); | ||
1132 | } | ||
1133 | HSTATE_ATTR_RO(surplus_hugepages); | ||
1134 | |||
1135 | static struct attribute *hstate_attrs[] = { | ||
1136 | &nr_hugepages_attr.attr, | ||
1137 | &nr_overcommit_hugepages_attr.attr, | ||
1138 | &free_hugepages_attr.attr, | ||
1139 | &resv_hugepages_attr.attr, | ||
1140 | &surplus_hugepages_attr.attr, | ||
1141 | NULL, | ||
1142 | }; | ||
1143 | |||
1144 | static struct attribute_group hstate_attr_group = { | ||
1145 | .attrs = hstate_attrs, | ||
1146 | }; | ||
1147 | |||
1148 | static int __init hugetlb_sysfs_add_hstate(struct hstate *h) | ||
1149 | { | ||
1150 | int retval; | ||
1151 | |||
1152 | hstate_kobjs[h - hstates] = kobject_create_and_add(h->name, | ||
1153 | hugepages_kobj); | ||
1154 | if (!hstate_kobjs[h - hstates]) | ||
1155 | return -ENOMEM; | ||
1156 | |||
1157 | retval = sysfs_create_group(hstate_kobjs[h - hstates], | ||
1158 | &hstate_attr_group); | ||
1159 | if (retval) | ||
1160 | kobject_put(hstate_kobjs[h - hstates]); | ||
1161 | |||
1162 | return retval; | ||
1163 | } | ||
1164 | |||
1165 | static void __init hugetlb_sysfs_init(void) | ||
1166 | { | ||
1167 | struct hstate *h; | ||
1168 | int err; | ||
1169 | |||
1170 | hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj); | ||
1171 | if (!hugepages_kobj) | ||
1172 | return; | ||
1173 | |||
1174 | for_each_hstate(h) { | ||
1175 | err = hugetlb_sysfs_add_hstate(h); | ||
1176 | if (err) | ||
1177 | printk(KERN_ERR "Hugetlb: Unable to add hstate %s", | ||
1178 | h->name); | ||
1179 | } | ||
1180 | } | ||
1181 | |||
1182 | static void __exit hugetlb_exit(void) | ||
1183 | { | ||
1184 | struct hstate *h; | ||
1185 | |||
1186 | for_each_hstate(h) { | ||
1187 | kobject_put(hstate_kobjs[h - hstates]); | ||
1188 | } | ||
1189 | |||
1190 | kobject_put(hugepages_kobj); | ||
1191 | } | ||
1192 | module_exit(hugetlb_exit); | ||
1193 | |||
1194 | static int __init hugetlb_init(void) | ||
1195 | { | ||
1196 | BUILD_BUG_ON(HPAGE_SHIFT == 0); | ||
1197 | |||
1198 | if (!size_to_hstate(HPAGE_SIZE)) { | ||
1199 | hugetlb_add_hstate(HUGETLB_PAGE_ORDER); | ||
1200 | parsed_hstate->max_huge_pages = default_hstate_max_huge_pages; | ||
1201 | } | ||
1202 | default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates; | ||
1203 | |||
1204 | hugetlb_init_hstates(); | ||
1205 | |||
1206 | report_hugepages(); | ||
1207 | |||
1208 | hugetlb_sysfs_init(); | ||
1209 | |||
1210 | return 0; | ||
1211 | } | ||
1212 | module_init(hugetlb_init); | ||
1213 | |||
1214 | /* Should be called on processing a hugepagesz=... option */ | ||
1215 | void __init hugetlb_add_hstate(unsigned order) | ||
1216 | { | ||
1217 | struct hstate *h; | ||
1218 | if (size_to_hstate(PAGE_SIZE << order)) { | ||
1219 | printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n"); | ||
1220 | return; | ||
1221 | } | ||
1222 | BUG_ON(max_hstate >= HUGE_MAX_HSTATE); | ||
1223 | BUG_ON(order == 0); | ||
1224 | h = &hstates[max_hstate++]; | ||
1225 | h->order = order; | ||
1226 | h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1); | ||
1227 | snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", | ||
1228 | huge_page_size(h)/1024); | ||
1229 | hugetlb_init_one_hstate(h); | ||
1230 | parsed_hstate = h; | ||
1231 | } | ||
1232 | |||
1233 | static int __init hugetlb_setup(char *s) | ||
1234 | { | ||
1235 | unsigned long *mhp; | ||
1236 | |||
1237 | /* | ||
1238 | * !max_hstate means we haven't parsed a hugepagesz= parameter yet, | ||
1239 | * so this hugepages= parameter goes to the "default hstate". | ||
1240 | */ | ||
1241 | if (!max_hstate) | ||
1242 | mhp = &default_hstate_max_huge_pages; | ||
1243 | else | ||
1244 | mhp = &parsed_hstate->max_huge_pages; | ||
1245 | |||
1246 | if (sscanf(s, "%lu", mhp) <= 0) | ||
1247 | *mhp = 0; | ||
1248 | |||
1249 | return 1; | ||
1250 | } | ||
1251 | __setup("hugepages=", hugetlb_setup); | ||
1252 | |||
1253 | static unsigned int cpuset_mems_nr(unsigned int *array) | ||
1254 | { | ||
1255 | int node; | ||
1256 | unsigned int nr = 0; | ||
1257 | |||
1258 | for_each_node_mask(node, cpuset_current_mems_allowed) | ||
1259 | nr += array[node]; | ||
1260 | |||
1261 | return nr; | ||
1262 | } | ||
1263 | |||
1108 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, | 1264 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, |
1109 | struct file *file, void __user *buffer, | 1265 | struct file *file, void __user *buffer, |
1110 | size_t *length, loff_t *ppos) | 1266 | size_t *length, loff_t *ppos) |