diff options
author | Nishanth Aravamudan <nacc@us.ibm.com> | 2008-07-24 00:27:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-24 13:47:17 -0400 |
commit | a3437870160cf2caaac6bdd76c7377a5a4145a8c (patch) | |
tree | 6d3c8ddd442e4cd96f1f8bdcf59fcaef72f4edc9 /mm | |
parent | a137e1cc6d6e7d315fef03962a2a5a113348b13b (diff) |
hugetlb: new sysfs interface
Provide new hugepages user APIs that are more suited to multiple hstates
in sysfs. There is a new directory, /sys/kernel/hugepages. Underneath
that directory there will be a directory per-supported hugepage size,
e.g.:
/sys/kernel/hugepages/hugepages-64kB
/sys/kernel/hugepages/hugepages-16384kB
/sys/kernel/hugepages/hugepages-16777216kB
corresponding to 64k, 16m and 16g respectively. Within each
hugepages-size directory there are a number of files, corresponding to the
tracked counters in the hstate, e.g.:
/sys/kernel/hugepages/hugepages-64/nr_hugepages
/sys/kernel/hugepages/hugepages-64/nr_overcommit_hugepages
/sys/kernel/hugepages/hugepages-64/free_hugepages
/sys/kernel/hugepages/hugepages-64/resv_hugepages
/sys/kernel/hugepages/hugepages-64/surplus_hugepages
Of these files, the first two are read-write and the latter three are
read-only. The size of the hugepage being manipulated is trivially
deducible from the enclosing directory and is always expressed in kB (to
match meminfo).
[dave@linux.vnet.ibm.com: fix build]
[nacc@us.ibm.com: hugetlb: hang off of /sys/kernel/mm rather than /sys/kernel]
[nacc@us.ibm.com: hugetlb: remove CONFIG_SYSFS dependency]
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 288 |
1 files changed, 222 insertions, 66 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4cf7a90e914..bb49ce5d006 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/mempolicy.h> | 14 | #include <linux/mempolicy.h> |
15 | #include <linux/cpuset.h> | 15 | #include <linux/cpuset.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | #include <linux/sysfs.h> | ||
17 | 18 | ||
18 | #include <asm/page.h> | 19 | #include <asm/page.h> |
19 | #include <asm/pgtable.h> | 20 | #include <asm/pgtable.h> |
@@ -942,72 +943,6 @@ static void __init report_hugepages(void) | |||
942 | } | 943 | } |
943 | } | 944 | } |
944 | 945 | ||
945 | static int __init hugetlb_init(void) | ||
946 | { | ||
947 | BUILD_BUG_ON(HPAGE_SHIFT == 0); | ||
948 | |||
949 | if (!size_to_hstate(HPAGE_SIZE)) { | ||
950 | hugetlb_add_hstate(HUGETLB_PAGE_ORDER); | ||
951 | parsed_hstate->max_huge_pages = default_hstate_max_huge_pages; | ||
952 | } | ||
953 | default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates; | ||
954 | |||
955 | hugetlb_init_hstates(); | ||
956 | |||
957 | report_hugepages(); | ||
958 | |||
959 | return 0; | ||
960 | } | ||
961 | module_init(hugetlb_init); | ||
962 | |||
963 | /* Should be called on processing a hugepagesz=... option */ | ||
964 | void __init hugetlb_add_hstate(unsigned order) | ||
965 | { | ||
966 | struct hstate *h; | ||
967 | if (size_to_hstate(PAGE_SIZE << order)) { | ||
968 | printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n"); | ||
969 | return; | ||
970 | } | ||
971 | BUG_ON(max_hstate >= HUGE_MAX_HSTATE); | ||
972 | BUG_ON(order == 0); | ||
973 | h = &hstates[max_hstate++]; | ||
974 | h->order = order; | ||
975 | h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1); | ||
976 | hugetlb_init_one_hstate(h); | ||
977 | parsed_hstate = h; | ||
978 | } | ||
979 | |||
980 | static int __init hugetlb_setup(char *s) | ||
981 | { | ||
982 | unsigned long *mhp; | ||
983 | |||
984 | /* | ||
985 | * !max_hstate means we haven't parsed a hugepagesz= parameter yet, | ||
986 | * so this hugepages= parameter goes to the "default hstate". | ||
987 | */ | ||
988 | if (!max_hstate) | ||
989 | mhp = &default_hstate_max_huge_pages; | ||
990 | else | ||
991 | mhp = &parsed_hstate->max_huge_pages; | ||
992 | |||
993 | if (sscanf(s, "%lu", mhp) <= 0) | ||
994 | *mhp = 0; | ||
995 | |||
996 | return 1; | ||
997 | } | ||
998 | __setup("hugepages=", hugetlb_setup); | ||
999 | |||
1000 | static unsigned int cpuset_mems_nr(unsigned int *array) | ||
1001 | { | ||
1002 | int node; | ||
1003 | unsigned int nr = 0; | ||
1004 | |||
1005 | for_each_node_mask(node, cpuset_current_mems_allowed) | ||
1006 | nr += array[node]; | ||
1007 | |||
1008 | return nr; | ||
1009 | } | ||
1010 | |||
1011 | #ifdef CONFIG_SYSCTL | 946 | #ifdef CONFIG_SYSCTL |
1012 | #ifdef CONFIG_HIGHMEM | 947 | #ifdef CONFIG_HIGHMEM |
1013 | static void try_to_free_low(struct hstate *h, unsigned long count) | 948 | static void try_to_free_low(struct hstate *h, unsigned long count) |
@@ -1105,6 +1040,227 @@ out: | |||
1105 | return ret; | 1040 | return ret; |
1106 | } | 1041 | } |
1107 | 1042 | ||
1043 | #define HSTATE_ATTR_RO(_name) \ | ||
1044 | static struct kobj_attribute _name##_attr = __ATTR_RO(_name) | ||
1045 | |||
1046 | #define HSTATE_ATTR(_name) \ | ||
1047 | static struct kobj_attribute _name##_attr = \ | ||
1048 | __ATTR(_name, 0644, _name##_show, _name##_store) | ||
1049 | |||
1050 | static struct kobject *hugepages_kobj; | ||
1051 | static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; | ||
1052 | |||
1053 | static struct hstate *kobj_to_hstate(struct kobject *kobj) | ||
1054 | { | ||
1055 | int i; | ||
1056 | for (i = 0; i < HUGE_MAX_HSTATE; i++) | ||
1057 | if (hstate_kobjs[i] == kobj) | ||
1058 | return &hstates[i]; | ||
1059 | BUG(); | ||
1060 | return NULL; | ||
1061 | } | ||
1062 | |||
1063 | static ssize_t nr_hugepages_show(struct kobject *kobj, | ||
1064 | struct kobj_attribute *attr, char *buf) | ||
1065 | { | ||
1066 | struct hstate *h = kobj_to_hstate(kobj); | ||
1067 | return sprintf(buf, "%lu\n", h->nr_huge_pages); | ||
1068 | } | ||
1069 | static ssize_t nr_hugepages_store(struct kobject *kobj, | ||
1070 | struct kobj_attribute *attr, const char *buf, size_t count) | ||
1071 | { | ||
1072 | int err; | ||
1073 | unsigned long input; | ||
1074 | struct hstate *h = kobj_to_hstate(kobj); | ||
1075 | |||
1076 | err = strict_strtoul(buf, 10, &input); | ||
1077 | if (err) | ||
1078 | return 0; | ||
1079 | |||
1080 | h->max_huge_pages = set_max_huge_pages(h, input); | ||
1081 | |||
1082 | return count; | ||
1083 | } | ||
1084 | HSTATE_ATTR(nr_hugepages); | ||
1085 | |||
1086 | static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, | ||
1087 | struct kobj_attribute *attr, char *buf) | ||
1088 | { | ||
1089 | struct hstate *h = kobj_to_hstate(kobj); | ||
1090 | return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages); | ||
1091 | } | ||
1092 | static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, | ||
1093 | struct kobj_attribute *attr, const char *buf, size_t count) | ||
1094 | { | ||
1095 | int err; | ||
1096 | unsigned long input; | ||
1097 | struct hstate *h = kobj_to_hstate(kobj); | ||
1098 | |||
1099 | err = strict_strtoul(buf, 10, &input); | ||
1100 | if (err) | ||
1101 | return 0; | ||
1102 | |||
1103 | spin_lock(&hugetlb_lock); | ||
1104 | h->nr_overcommit_huge_pages = input; | ||
1105 | spin_unlock(&hugetlb_lock); | ||
1106 | |||
1107 | return count; | ||
1108 | } | ||
1109 | HSTATE_ATTR(nr_overcommit_hugepages); | ||
1110 | |||
1111 | static ssize_t free_hugepages_show(struct kobject *kobj, | ||
1112 | struct kobj_attribute *attr, char *buf) | ||
1113 | { | ||
1114 | struct hstate *h = kobj_to_hstate(kobj); | ||
1115 | return sprintf(buf, "%lu\n", h->free_huge_pages); | ||
1116 | } | ||
1117 | HSTATE_ATTR_RO(free_hugepages); | ||
1118 | |||
1119 | static ssize_t resv_hugepages_show(struct kobject *kobj, | ||
1120 | struct kobj_attribute *attr, char *buf) | ||
1121 | { | ||
1122 | struct hstate *h = kobj_to_hstate(kobj); | ||
1123 | return sprintf(buf, "%lu\n", h->resv_huge_pages); | ||
1124 | } | ||
1125 | HSTATE_ATTR_RO(resv_hugepages); | ||
1126 | |||
1127 | static ssize_t surplus_hugepages_show(struct kobject *kobj, | ||
1128 | struct kobj_attribute *attr, char *buf) | ||
1129 | { | ||
1130 | struct hstate *h = kobj_to_hstate(kobj); | ||
1131 | return sprintf(buf, "%lu\n", h->surplus_huge_pages); | ||
1132 | } | ||
1133 | HSTATE_ATTR_RO(surplus_hugepages); | ||
1134 | |||
1135 | static struct attribute *hstate_attrs[] = { | ||
1136 | &nr_hugepages_attr.attr, | ||
1137 | &nr_overcommit_hugepages_attr.attr, | ||
1138 | &free_hugepages_attr.attr, | ||
1139 | &resv_hugepages_attr.attr, | ||
1140 | &surplus_hugepages_attr.attr, | ||
1141 | NULL, | ||
1142 | }; | ||
1143 | |||
1144 | static struct attribute_group hstate_attr_group = { | ||
1145 | .attrs = hstate_attrs, | ||
1146 | }; | ||
1147 | |||
1148 | static int __init hugetlb_sysfs_add_hstate(struct hstate *h) | ||
1149 | { | ||
1150 | int retval; | ||
1151 | |||
1152 | hstate_kobjs[h - hstates] = kobject_create_and_add(h->name, | ||
1153 | hugepages_kobj); | ||
1154 | if (!hstate_kobjs[h - hstates]) | ||
1155 | return -ENOMEM; | ||
1156 | |||
1157 | retval = sysfs_create_group(hstate_kobjs[h - hstates], | ||
1158 | &hstate_attr_group); | ||
1159 | if (retval) | ||
1160 | kobject_put(hstate_kobjs[h - hstates]); | ||
1161 | |||
1162 | return retval; | ||
1163 | } | ||
1164 | |||
1165 | static void __init hugetlb_sysfs_init(void) | ||
1166 | { | ||
1167 | struct hstate *h; | ||
1168 | int err; | ||
1169 | |||
1170 | hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj); | ||
1171 | if (!hugepages_kobj) | ||
1172 | return; | ||
1173 | |||
1174 | for_each_hstate(h) { | ||
1175 | err = hugetlb_sysfs_add_hstate(h); | ||
1176 | if (err) | ||
1177 | printk(KERN_ERR "Hugetlb: Unable to add hstate %s", | ||
1178 | h->name); | ||
1179 | } | ||
1180 | } | ||
1181 | |||
1182 | static void __exit hugetlb_exit(void) | ||
1183 | { | ||
1184 | struct hstate *h; | ||
1185 | |||
1186 | for_each_hstate(h) { | ||
1187 | kobject_put(hstate_kobjs[h - hstates]); | ||
1188 | } | ||
1189 | |||
1190 | kobject_put(hugepages_kobj); | ||
1191 | } | ||
1192 | module_exit(hugetlb_exit); | ||
1193 | |||
1194 | static int __init hugetlb_init(void) | ||
1195 | { | ||
1196 | BUILD_BUG_ON(HPAGE_SHIFT == 0); | ||
1197 | |||
1198 | if (!size_to_hstate(HPAGE_SIZE)) { | ||
1199 | hugetlb_add_hstate(HUGETLB_PAGE_ORDER); | ||
1200 | parsed_hstate->max_huge_pages = default_hstate_max_huge_pages; | ||
1201 | } | ||
1202 | default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates; | ||
1203 | |||
1204 | hugetlb_init_hstates(); | ||
1205 | |||
1206 | report_hugepages(); | ||
1207 | |||
1208 | hugetlb_sysfs_init(); | ||
1209 | |||
1210 | return 0; | ||
1211 | } | ||
1212 | module_init(hugetlb_init); | ||
1213 | |||
1214 | /* Should be called on processing a hugepagesz=... option */ | ||
1215 | void __init hugetlb_add_hstate(unsigned order) | ||
1216 | { | ||
1217 | struct hstate *h; | ||
1218 | if (size_to_hstate(PAGE_SIZE << order)) { | ||
1219 | printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n"); | ||
1220 | return; | ||
1221 | } | ||
1222 | BUG_ON(max_hstate >= HUGE_MAX_HSTATE); | ||
1223 | BUG_ON(order == 0); | ||
1224 | h = &hstates[max_hstate++]; | ||
1225 | h->order = order; | ||
1226 | h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1); | ||
1227 | snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", | ||
1228 | huge_page_size(h)/1024); | ||
1229 | hugetlb_init_one_hstate(h); | ||
1230 | parsed_hstate = h; | ||
1231 | } | ||
1232 | |||
1233 | static int __init hugetlb_setup(char *s) | ||
1234 | { | ||
1235 | unsigned long *mhp; | ||
1236 | |||
1237 | /* | ||
1238 | * !max_hstate means we haven't parsed a hugepagesz= parameter yet, | ||
1239 | * so this hugepages= parameter goes to the "default hstate". | ||
1240 | */ | ||
1241 | if (!max_hstate) | ||
1242 | mhp = &default_hstate_max_huge_pages; | ||
1243 | else | ||
1244 | mhp = &parsed_hstate->max_huge_pages; | ||
1245 | |||
1246 | if (sscanf(s, "%lu", mhp) <= 0) | ||
1247 | *mhp = 0; | ||
1248 | |||
1249 | return 1; | ||
1250 | } | ||
1251 | __setup("hugepages=", hugetlb_setup); | ||
1252 | |||
1253 | static unsigned int cpuset_mems_nr(unsigned int *array) | ||
1254 | { | ||
1255 | int node; | ||
1256 | unsigned int nr = 0; | ||
1257 | |||
1258 | for_each_node_mask(node, cpuset_current_mems_allowed) | ||
1259 | nr += array[node]; | ||
1260 | |||
1261 | return nr; | ||
1262 | } | ||
1263 | |||
1108 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, | 1264 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, |
1109 | struct file *file, void __user *buffer, | 1265 | struct file *file, void __user *buffer, |
1110 | size_t *length, loff_t *ppos) | 1266 | size_t *length, loff_t *ppos) |