diff options
Diffstat (limited to 'kernel')
45 files changed, 1198 insertions, 717 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 6c5f081132a4..188c43223f52 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -11,7 +11,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o pm_qos_params.o | 12 | notifier.o ksysfs.o pm_qos_params.o |
13 | 13 | ||
14 | obj-$(CONFIG_SYSCTL) += sysctl_check.o | 14 | obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o |
15 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 15 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
16 | obj-y += time/ | 16 | obj-y += time/ |
17 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | 17 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 9435d9392df5..0e0bd27e6512 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -267,7 +267,7 @@ static int audit_to_watch(struct audit_krule *krule, char *path, int len, | |||
267 | return -EINVAL; | 267 | return -EINVAL; |
268 | 268 | ||
269 | watch = audit_init_watch(path); | 269 | watch = audit_init_watch(path); |
270 | if (unlikely(IS_ERR(watch))) | 270 | if (IS_ERR(watch)) |
271 | return PTR_ERR(watch); | 271 | return PTR_ERR(watch); |
272 | 272 | ||
273 | audit_get_watch(watch); | 273 | audit_get_watch(watch); |
@@ -851,7 +851,7 @@ static struct audit_watch *audit_dupe_watch(struct audit_watch *old) | |||
851 | return ERR_PTR(-ENOMEM); | 851 | return ERR_PTR(-ENOMEM); |
852 | 852 | ||
853 | new = audit_init_watch(path); | 853 | new = audit_init_watch(path); |
854 | if (unlikely(IS_ERR(new))) { | 854 | if (IS_ERR(new)) { |
855 | kfree(path); | 855 | kfree(path); |
856 | goto out; | 856 | goto out; |
857 | } | 857 | } |
@@ -992,7 +992,7 @@ static void audit_update_watch(struct audit_parent *parent, | |||
992 | audit_set_auditable(current->audit_context); | 992 | audit_set_auditable(current->audit_context); |
993 | 993 | ||
994 | nwatch = audit_dupe_watch(owatch); | 994 | nwatch = audit_dupe_watch(owatch); |
995 | if (unlikely(IS_ERR(nwatch))) { | 995 | if (IS_ERR(nwatch)) { |
996 | mutex_unlock(&audit_filter_mutex); | 996 | mutex_unlock(&audit_filter_mutex); |
997 | audit_panic("error updating watch, skipping"); | 997 | audit_panic("error updating watch, skipping"); |
998 | return; | 998 | return; |
@@ -1007,7 +1007,7 @@ static void audit_update_watch(struct audit_parent *parent, | |||
1007 | list_del_rcu(&oentry->list); | 1007 | list_del_rcu(&oentry->list); |
1008 | 1008 | ||
1009 | nentry = audit_dupe_rule(&oentry->rule, nwatch); | 1009 | nentry = audit_dupe_rule(&oentry->rule, nwatch); |
1010 | if (unlikely(IS_ERR(nentry))) | 1010 | if (IS_ERR(nentry)) |
1011 | audit_panic("error updating watch, removing"); | 1011 | audit_panic("error updating watch, removing"); |
1012 | else { | 1012 | else { |
1013 | int h = audit_hash_ino((u32)ino); | 1013 | int h = audit_hash_ino((u32)ino); |
@@ -1790,7 +1790,7 @@ int audit_update_lsm_rules(void) | |||
1790 | watch = entry->rule.watch; | 1790 | watch = entry->rule.watch; |
1791 | tree = entry->rule.tree; | 1791 | tree = entry->rule.tree; |
1792 | nentry = audit_dupe_rule(&entry->rule, watch); | 1792 | nentry = audit_dupe_rule(&entry->rule, watch); |
1793 | if (unlikely(IS_ERR(nentry))) { | 1793 | if (IS_ERR(nentry)) { |
1794 | /* save the first error encountered for the | 1794 | /* save the first error encountered for the |
1795 | * return value */ | 1795 | * return value */ |
1796 | if (!err) | 1796 | if (!err) |
diff --git a/kernel/bounds.c b/kernel/bounds.c new file mode 100644 index 000000000000..3c5301381837 --- /dev/null +++ b/kernel/bounds.c | |||
@@ -0,0 +1,19 @@ | |||
1 | /* | ||
2 | * Generate definitions needed by the preprocessor. | ||
3 | * This code generates raw asm output which is post-processed | ||
4 | * to extract and format the required data. | ||
5 | */ | ||
6 | |||
7 | #define __GENERATING_BOUNDS_H | ||
8 | /* Include headers that define the enum constants of interest */ | ||
9 | #include <linux/page-flags.h> | ||
10 | #include <linux/mmzone.h> | ||
11 | #include <linux/kbuild.h> | ||
12 | |||
13 | void foo(void) | ||
14 | { | ||
15 | /* The enum constants to put into include/linux/bounds.h */ | ||
16 | DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); | ||
17 | DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); | ||
18 | /* End of constants */ | ||
19 | } | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6d8de051382b..b9d467d83fc1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/kmod.h> | 44 | #include <linux/kmod.h> |
45 | #include <linux/delayacct.h> | 45 | #include <linux/delayacct.h> |
46 | #include <linux/cgroupstats.h> | 46 | #include <linux/cgroupstats.h> |
47 | #include <linux/hash.h> | ||
47 | 48 | ||
48 | #include <asm/atomic.h> | 49 | #include <asm/atomic.h> |
49 | 50 | ||
@@ -118,17 +119,7 @@ static int root_count; | |||
118 | * be called. | 119 | * be called. |
119 | */ | 120 | */ |
120 | static int need_forkexit_callback; | 121 | static int need_forkexit_callback; |
121 | 122 | static int need_mm_owner_callback __read_mostly; | |
122 | /* bits in struct cgroup flags field */ | ||
123 | enum { | ||
124 | /* Control Group is dead */ | ||
125 | CGRP_REMOVED, | ||
126 | /* Control Group has previously had a child cgroup or a task, | ||
127 | * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */ | ||
128 | CGRP_RELEASABLE, | ||
129 | /* Control Group requires release notifications to userspace */ | ||
130 | CGRP_NOTIFY_ON_RELEASE, | ||
131 | }; | ||
132 | 123 | ||
133 | /* convenient tests for these bits */ | 124 | /* convenient tests for these bits */ |
134 | inline int cgroup_is_removed(const struct cgroup *cgrp) | 125 | inline int cgroup_is_removed(const struct cgroup *cgrp) |
@@ -204,6 +195,27 @@ static struct cg_cgroup_link init_css_set_link; | |||
204 | static DEFINE_RWLOCK(css_set_lock); | 195 | static DEFINE_RWLOCK(css_set_lock); |
205 | static int css_set_count; | 196 | static int css_set_count; |
206 | 197 | ||
198 | /* hash table for cgroup groups. This improves the performance to | ||
199 | * find an existing css_set */ | ||
200 | #define CSS_SET_HASH_BITS 7 | ||
201 | #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) | ||
202 | static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; | ||
203 | |||
204 | static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) | ||
205 | { | ||
206 | int i; | ||
207 | int index; | ||
208 | unsigned long tmp = 0UL; | ||
209 | |||
210 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) | ||
211 | tmp += (unsigned long)css[i]; | ||
212 | tmp = (tmp >> 16) ^ tmp; | ||
213 | |||
214 | index = hash_long(tmp, CSS_SET_HASH_BITS); | ||
215 | |||
216 | return &css_set_table[index]; | ||
217 | } | ||
218 | |||
207 | /* We don't maintain the lists running through each css_set to its | 219 | /* We don't maintain the lists running through each css_set to its |
208 | * task until after the first call to cgroup_iter_start(). This | 220 | * task until after the first call to cgroup_iter_start(). This |
209 | * reduces the fork()/exit() overhead for people who have cgroups | 221 | * reduces the fork()/exit() overhead for people who have cgroups |
@@ -230,7 +242,7 @@ static int use_task_css_set_links; | |||
230 | static void unlink_css_set(struct css_set *cg) | 242 | static void unlink_css_set(struct css_set *cg) |
231 | { | 243 | { |
232 | write_lock(&css_set_lock); | 244 | write_lock(&css_set_lock); |
233 | list_del(&cg->list); | 245 | hlist_del(&cg->hlist); |
234 | css_set_count--; | 246 | css_set_count--; |
235 | while (!list_empty(&cg->cg_links)) { | 247 | while (!list_empty(&cg->cg_links)) { |
236 | struct cg_cgroup_link *link; | 248 | struct cg_cgroup_link *link; |
@@ -295,9 +307,7 @@ static inline void put_css_set_taskexit(struct css_set *cg) | |||
295 | /* | 307 | /* |
296 | * find_existing_css_set() is a helper for | 308 | * find_existing_css_set() is a helper for |
297 | * find_css_set(), and checks to see whether an existing | 309 | * find_css_set(), and checks to see whether an existing |
298 | * css_set is suitable. This currently walks a linked-list for | 310 | * css_set is suitable. |
299 | * simplicity; a later patch will use a hash table for better | ||
300 | * performance | ||
301 | * | 311 | * |
302 | * oldcg: the cgroup group that we're using before the cgroup | 312 | * oldcg: the cgroup group that we're using before the cgroup |
303 | * transition | 313 | * transition |
@@ -314,7 +324,9 @@ static struct css_set *find_existing_css_set( | |||
314 | { | 324 | { |
315 | int i; | 325 | int i; |
316 | struct cgroupfs_root *root = cgrp->root; | 326 | struct cgroupfs_root *root = cgrp->root; |
317 | struct list_head *l = &init_css_set.list; | 327 | struct hlist_head *hhead; |
328 | struct hlist_node *node; | ||
329 | struct css_set *cg; | ||
318 | 330 | ||
319 | /* Built the set of subsystem state objects that we want to | 331 | /* Built the set of subsystem state objects that we want to |
320 | * see in the new css_set */ | 332 | * see in the new css_set */ |
@@ -331,18 +343,13 @@ static struct css_set *find_existing_css_set( | |||
331 | } | 343 | } |
332 | } | 344 | } |
333 | 345 | ||
334 | /* Look through existing cgroup groups to find one to reuse */ | 346 | hhead = css_set_hash(template); |
335 | do { | 347 | hlist_for_each_entry(cg, node, hhead, hlist) { |
336 | struct css_set *cg = | ||
337 | list_entry(l, struct css_set, list); | ||
338 | |||
339 | if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { | 348 | if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { |
340 | /* All subsystems matched */ | 349 | /* All subsystems matched */ |
341 | return cg; | 350 | return cg; |
342 | } | 351 | } |
343 | /* Try the next cgroup group */ | 352 | } |
344 | l = l->next; | ||
345 | } while (l != &init_css_set.list); | ||
346 | 353 | ||
347 | /* No existing cgroup group matched */ | 354 | /* No existing cgroup group matched */ |
348 | return NULL; | 355 | return NULL; |
@@ -404,6 +411,8 @@ static struct css_set *find_css_set( | |||
404 | struct list_head tmp_cg_links; | 411 | struct list_head tmp_cg_links; |
405 | struct cg_cgroup_link *link; | 412 | struct cg_cgroup_link *link; |
406 | 413 | ||
414 | struct hlist_head *hhead; | ||
415 | |||
407 | /* First see if we already have a cgroup group that matches | 416 | /* First see if we already have a cgroup group that matches |
408 | * the desired set */ | 417 | * the desired set */ |
409 | write_lock(&css_set_lock); | 418 | write_lock(&css_set_lock); |
@@ -428,6 +437,7 @@ static struct css_set *find_css_set( | |||
428 | kref_init(&res->ref); | 437 | kref_init(&res->ref); |
429 | INIT_LIST_HEAD(&res->cg_links); | 438 | INIT_LIST_HEAD(&res->cg_links); |
430 | INIT_LIST_HEAD(&res->tasks); | 439 | INIT_LIST_HEAD(&res->tasks); |
440 | INIT_HLIST_NODE(&res->hlist); | ||
431 | 441 | ||
432 | /* Copy the set of subsystem state objects generated in | 442 | /* Copy the set of subsystem state objects generated in |
433 | * find_existing_css_set() */ | 443 | * find_existing_css_set() */ |
@@ -467,9 +477,12 @@ static struct css_set *find_css_set( | |||
467 | 477 | ||
468 | BUG_ON(!list_empty(&tmp_cg_links)); | 478 | BUG_ON(!list_empty(&tmp_cg_links)); |
469 | 479 | ||
470 | /* Link this cgroup group into the list */ | ||
471 | list_add(&res->list, &init_css_set.list); | ||
472 | css_set_count++; | 480 | css_set_count++; |
481 | |||
482 | /* Add this cgroup group to the hash table */ | ||
483 | hhead = css_set_hash(res->subsys); | ||
484 | hlist_add_head(&res->hlist, hhead); | ||
485 | |||
473 | write_unlock(&css_set_lock); | 486 | write_unlock(&css_set_lock); |
474 | 487 | ||
475 | return res; | 488 | return res; |
@@ -948,7 +961,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
948 | int ret = 0; | 961 | int ret = 0; |
949 | struct super_block *sb; | 962 | struct super_block *sb; |
950 | struct cgroupfs_root *root; | 963 | struct cgroupfs_root *root; |
951 | struct list_head tmp_cg_links, *l; | 964 | struct list_head tmp_cg_links; |
952 | INIT_LIST_HEAD(&tmp_cg_links); | 965 | INIT_LIST_HEAD(&tmp_cg_links); |
953 | 966 | ||
954 | /* First find the desired set of subsystems */ | 967 | /* First find the desired set of subsystems */ |
@@ -990,6 +1003,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
990 | /* New superblock */ | 1003 | /* New superblock */ |
991 | struct cgroup *cgrp = &root->top_cgroup; | 1004 | struct cgroup *cgrp = &root->top_cgroup; |
992 | struct inode *inode; | 1005 | struct inode *inode; |
1006 | int i; | ||
993 | 1007 | ||
994 | BUG_ON(sb->s_root != NULL); | 1008 | BUG_ON(sb->s_root != NULL); |
995 | 1009 | ||
@@ -1034,22 +1048,25 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1034 | /* Link the top cgroup in this hierarchy into all | 1048 | /* Link the top cgroup in this hierarchy into all |
1035 | * the css_set objects */ | 1049 | * the css_set objects */ |
1036 | write_lock(&css_set_lock); | 1050 | write_lock(&css_set_lock); |
1037 | l = &init_css_set.list; | 1051 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { |
1038 | do { | 1052 | struct hlist_head *hhead = &css_set_table[i]; |
1053 | struct hlist_node *node; | ||
1039 | struct css_set *cg; | 1054 | struct css_set *cg; |
1040 | struct cg_cgroup_link *link; | 1055 | |
1041 | cg = list_entry(l, struct css_set, list); | 1056 | hlist_for_each_entry(cg, node, hhead, hlist) { |
1042 | BUG_ON(list_empty(&tmp_cg_links)); | 1057 | struct cg_cgroup_link *link; |
1043 | link = list_entry(tmp_cg_links.next, | 1058 | |
1044 | struct cg_cgroup_link, | 1059 | BUG_ON(list_empty(&tmp_cg_links)); |
1045 | cgrp_link_list); | 1060 | link = list_entry(tmp_cg_links.next, |
1046 | list_del(&link->cgrp_link_list); | 1061 | struct cg_cgroup_link, |
1047 | link->cg = cg; | 1062 | cgrp_link_list); |
1048 | list_add(&link->cgrp_link_list, | 1063 | list_del(&link->cgrp_link_list); |
1049 | &root->top_cgroup.css_sets); | 1064 | link->cg = cg; |
1050 | list_add(&link->cg_link_list, &cg->cg_links); | 1065 | list_add(&link->cgrp_link_list, |
1051 | l = l->next; | 1066 | &root->top_cgroup.css_sets); |
1052 | } while (l != &init_css_set.list); | 1067 | list_add(&link->cg_link_list, &cg->cg_links); |
1068 | } | ||
1069 | } | ||
1053 | write_unlock(&css_set_lock); | 1070 | write_unlock(&css_set_lock); |
1054 | 1071 | ||
1055 | free_cg_links(&tmp_cg_links); | 1072 | free_cg_links(&tmp_cg_links); |
@@ -1307,18 +1324,16 @@ enum cgroup_filetype { | |||
1307 | FILE_DIR, | 1324 | FILE_DIR, |
1308 | FILE_TASKLIST, | 1325 | FILE_TASKLIST, |
1309 | FILE_NOTIFY_ON_RELEASE, | 1326 | FILE_NOTIFY_ON_RELEASE, |
1310 | FILE_RELEASABLE, | ||
1311 | FILE_RELEASE_AGENT, | 1327 | FILE_RELEASE_AGENT, |
1312 | }; | 1328 | }; |
1313 | 1329 | ||
1314 | static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft, | 1330 | static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, |
1315 | struct file *file, | 1331 | struct file *file, |
1316 | const char __user *userbuf, | 1332 | const char __user *userbuf, |
1317 | size_t nbytes, loff_t *unused_ppos) | 1333 | size_t nbytes, loff_t *unused_ppos) |
1318 | { | 1334 | { |
1319 | char buffer[64]; | 1335 | char buffer[64]; |
1320 | int retval = 0; | 1336 | int retval = 0; |
1321 | u64 val; | ||
1322 | char *end; | 1337 | char *end; |
1323 | 1338 | ||
1324 | if (!nbytes) | 1339 | if (!nbytes) |
@@ -1329,16 +1344,18 @@ static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft, | |||
1329 | return -EFAULT; | 1344 | return -EFAULT; |
1330 | 1345 | ||
1331 | buffer[nbytes] = 0; /* nul-terminate */ | 1346 | buffer[nbytes] = 0; /* nul-terminate */ |
1332 | 1347 | strstrip(buffer); | |
1333 | /* strip newline if necessary */ | 1348 | if (cft->write_u64) { |
1334 | if (nbytes && (buffer[nbytes-1] == '\n')) | 1349 | u64 val = simple_strtoull(buffer, &end, 0); |
1335 | buffer[nbytes-1] = 0; | 1350 | if (*end) |
1336 | val = simple_strtoull(buffer, &end, 0); | 1351 | return -EINVAL; |
1337 | if (*end) | 1352 | retval = cft->write_u64(cgrp, cft, val); |
1338 | return -EINVAL; | 1353 | } else { |
1339 | 1354 | s64 val = simple_strtoll(buffer, &end, 0); | |
1340 | /* Pass to subsystem */ | 1355 | if (*end) |
1341 | retval = cft->write_uint(cgrp, cft, val); | 1356 | return -EINVAL; |
1357 | retval = cft->write_s64(cgrp, cft, val); | ||
1358 | } | ||
1342 | if (!retval) | 1359 | if (!retval) |
1343 | retval = nbytes; | 1360 | retval = nbytes; |
1344 | return retval; | 1361 | return retval; |
@@ -1419,23 +1436,39 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, | |||
1419 | return -ENODEV; | 1436 | return -ENODEV; |
1420 | if (cft->write) | 1437 | if (cft->write) |
1421 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); | 1438 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); |
1422 | if (cft->write_uint) | 1439 | if (cft->write_u64 || cft->write_s64) |
1423 | return cgroup_write_uint(cgrp, cft, file, buf, nbytes, ppos); | 1440 | return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos); |
1441 | if (cft->trigger) { | ||
1442 | int ret = cft->trigger(cgrp, (unsigned int)cft->private); | ||
1443 | return ret ? ret : nbytes; | ||
1444 | } | ||
1424 | return -EINVAL; | 1445 | return -EINVAL; |
1425 | } | 1446 | } |
1426 | 1447 | ||
1427 | static ssize_t cgroup_read_uint(struct cgroup *cgrp, struct cftype *cft, | 1448 | static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft, |
1428 | struct file *file, | 1449 | struct file *file, |
1429 | char __user *buf, size_t nbytes, | 1450 | char __user *buf, size_t nbytes, |
1430 | loff_t *ppos) | 1451 | loff_t *ppos) |
1431 | { | 1452 | { |
1432 | char tmp[64]; | 1453 | char tmp[64]; |
1433 | u64 val = cft->read_uint(cgrp, cft); | 1454 | u64 val = cft->read_u64(cgrp, cft); |
1434 | int len = sprintf(tmp, "%llu\n", (unsigned long long) val); | 1455 | int len = sprintf(tmp, "%llu\n", (unsigned long long) val); |
1435 | 1456 | ||
1436 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | 1457 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); |
1437 | } | 1458 | } |
1438 | 1459 | ||
1460 | static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft, | ||
1461 | struct file *file, | ||
1462 | char __user *buf, size_t nbytes, | ||
1463 | loff_t *ppos) | ||
1464 | { | ||
1465 | char tmp[64]; | ||
1466 | s64 val = cft->read_s64(cgrp, cft); | ||
1467 | int len = sprintf(tmp, "%lld\n", (long long) val); | ||
1468 | |||
1469 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | ||
1470 | } | ||
1471 | |||
1439 | static ssize_t cgroup_common_file_read(struct cgroup *cgrp, | 1472 | static ssize_t cgroup_common_file_read(struct cgroup *cgrp, |
1440 | struct cftype *cft, | 1473 | struct cftype *cft, |
1441 | struct file *file, | 1474 | struct file *file, |
@@ -1490,11 +1523,56 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, | |||
1490 | 1523 | ||
1491 | if (cft->read) | 1524 | if (cft->read) |
1492 | return cft->read(cgrp, cft, file, buf, nbytes, ppos); | 1525 | return cft->read(cgrp, cft, file, buf, nbytes, ppos); |
1493 | if (cft->read_uint) | 1526 | if (cft->read_u64) |
1494 | return cgroup_read_uint(cgrp, cft, file, buf, nbytes, ppos); | 1527 | return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos); |
1528 | if (cft->read_s64) | ||
1529 | return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos); | ||
1495 | return -EINVAL; | 1530 | return -EINVAL; |
1496 | } | 1531 | } |
1497 | 1532 | ||
1533 | /* | ||
1534 | * seqfile ops/methods for returning structured data. Currently just | ||
1535 | * supports string->u64 maps, but can be extended in future. | ||
1536 | */ | ||
1537 | |||
1538 | struct cgroup_seqfile_state { | ||
1539 | struct cftype *cft; | ||
1540 | struct cgroup *cgroup; | ||
1541 | }; | ||
1542 | |||
1543 | static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) | ||
1544 | { | ||
1545 | struct seq_file *sf = cb->state; | ||
1546 | return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value); | ||
1547 | } | ||
1548 | |||
1549 | static int cgroup_seqfile_show(struct seq_file *m, void *arg) | ||
1550 | { | ||
1551 | struct cgroup_seqfile_state *state = m->private; | ||
1552 | struct cftype *cft = state->cft; | ||
1553 | if (cft->read_map) { | ||
1554 | struct cgroup_map_cb cb = { | ||
1555 | .fill = cgroup_map_add, | ||
1556 | .state = m, | ||
1557 | }; | ||
1558 | return cft->read_map(state->cgroup, cft, &cb); | ||
1559 | } | ||
1560 | return cft->read_seq_string(state->cgroup, cft, m); | ||
1561 | } | ||
1562 | |||
1563 | int cgroup_seqfile_release(struct inode *inode, struct file *file) | ||
1564 | { | ||
1565 | struct seq_file *seq = file->private_data; | ||
1566 | kfree(seq->private); | ||
1567 | return single_release(inode, file); | ||
1568 | } | ||
1569 | |||
1570 | static struct file_operations cgroup_seqfile_operations = { | ||
1571 | .read = seq_read, | ||
1572 | .llseek = seq_lseek, | ||
1573 | .release = cgroup_seqfile_release, | ||
1574 | }; | ||
1575 | |||
1498 | static int cgroup_file_open(struct inode *inode, struct file *file) | 1576 | static int cgroup_file_open(struct inode *inode, struct file *file) |
1499 | { | 1577 | { |
1500 | int err; | 1578 | int err; |
@@ -1507,7 +1585,18 @@ static int cgroup_file_open(struct inode *inode, struct file *file) | |||
1507 | cft = __d_cft(file->f_dentry); | 1585 | cft = __d_cft(file->f_dentry); |
1508 | if (!cft) | 1586 | if (!cft) |
1509 | return -ENODEV; | 1587 | return -ENODEV; |
1510 | if (cft->open) | 1588 | if (cft->read_map || cft->read_seq_string) { |
1589 | struct cgroup_seqfile_state *state = | ||
1590 | kzalloc(sizeof(*state), GFP_USER); | ||
1591 | if (!state) | ||
1592 | return -ENOMEM; | ||
1593 | state->cft = cft; | ||
1594 | state->cgroup = __d_cgrp(file->f_dentry->d_parent); | ||
1595 | file->f_op = &cgroup_seqfile_operations; | ||
1596 | err = single_open(file, cgroup_seqfile_show, state); | ||
1597 | if (err < 0) | ||
1598 | kfree(state); | ||
1599 | } else if (cft->open) | ||
1511 | err = cft->open(inode, file); | 1600 | err = cft->open(inode, file); |
1512 | else | 1601 | else |
1513 | err = 0; | 1602 | err = 0; |
@@ -1715,7 +1804,7 @@ static void cgroup_advance_iter(struct cgroup *cgrp, | |||
1715 | * The tasklist_lock is not held here, as do_each_thread() and | 1804 | * The tasklist_lock is not held here, as do_each_thread() and |
1716 | * while_each_thread() are protected by RCU. | 1805 | * while_each_thread() are protected by RCU. |
1717 | */ | 1806 | */ |
1718 | void cgroup_enable_task_cg_lists(void) | 1807 | static void cgroup_enable_task_cg_lists(void) |
1719 | { | 1808 | { |
1720 | struct task_struct *p, *g; | 1809 | struct task_struct *p, *g; |
1721 | write_lock(&css_set_lock); | 1810 | write_lock(&css_set_lock); |
@@ -1913,14 +2002,14 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
1913 | 2002 | ||
1914 | if (heap->size) { | 2003 | if (heap->size) { |
1915 | for (i = 0; i < heap->size; i++) { | 2004 | for (i = 0; i < heap->size; i++) { |
1916 | struct task_struct *p = heap->ptrs[i]; | 2005 | struct task_struct *q = heap->ptrs[i]; |
1917 | if (i == 0) { | 2006 | if (i == 0) { |
1918 | latest_time = p->start_time; | 2007 | latest_time = q->start_time; |
1919 | latest_task = p; | 2008 | latest_task = q; |
1920 | } | 2009 | } |
1921 | /* Process the task per the caller's callback */ | 2010 | /* Process the task per the caller's callback */ |
1922 | scan->process_task(p, scan); | 2011 | scan->process_task(q, scan); |
1923 | put_task_struct(p); | 2012 | put_task_struct(q); |
1924 | } | 2013 | } |
1925 | /* | 2014 | /* |
1926 | * If we had to process any tasks at all, scan again | 2015 | * If we had to process any tasks at all, scan again |
@@ -2138,11 +2227,6 @@ static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, | |||
2138 | return notify_on_release(cgrp); | 2227 | return notify_on_release(cgrp); |
2139 | } | 2228 | } |
2140 | 2229 | ||
2141 | static u64 cgroup_read_releasable(struct cgroup *cgrp, struct cftype *cft) | ||
2142 | { | ||
2143 | return test_bit(CGRP_RELEASABLE, &cgrp->flags); | ||
2144 | } | ||
2145 | |||
2146 | /* | 2230 | /* |
2147 | * for the common functions, 'private' gives the type of file | 2231 | * for the common functions, 'private' gives the type of file |
2148 | */ | 2232 | */ |
@@ -2158,16 +2242,10 @@ static struct cftype files[] = { | |||
2158 | 2242 | ||
2159 | { | 2243 | { |
2160 | .name = "notify_on_release", | 2244 | .name = "notify_on_release", |
2161 | .read_uint = cgroup_read_notify_on_release, | 2245 | .read_u64 = cgroup_read_notify_on_release, |
2162 | .write = cgroup_common_file_write, | 2246 | .write = cgroup_common_file_write, |
2163 | .private = FILE_NOTIFY_ON_RELEASE, | 2247 | .private = FILE_NOTIFY_ON_RELEASE, |
2164 | }, | 2248 | }, |
2165 | |||
2166 | { | ||
2167 | .name = "releasable", | ||
2168 | .read_uint = cgroup_read_releasable, | ||
2169 | .private = FILE_RELEASABLE, | ||
2170 | } | ||
2171 | }; | 2249 | }; |
2172 | 2250 | ||
2173 | static struct cftype cft_release_agent = { | 2251 | static struct cftype cft_release_agent = { |
@@ -2401,10 +2479,9 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2401 | return 0; | 2479 | return 0; |
2402 | } | 2480 | } |
2403 | 2481 | ||
2404 | static void cgroup_init_subsys(struct cgroup_subsys *ss) | 2482 | static void __init cgroup_init_subsys(struct cgroup_subsys *ss) |
2405 | { | 2483 | { |
2406 | struct cgroup_subsys_state *css; | 2484 | struct cgroup_subsys_state *css; |
2407 | struct list_head *l; | ||
2408 | 2485 | ||
2409 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); | 2486 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); |
2410 | 2487 | ||
@@ -2415,34 +2492,19 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2415 | BUG_ON(IS_ERR(css)); | 2492 | BUG_ON(IS_ERR(css)); |
2416 | init_cgroup_css(css, ss, dummytop); | 2493 | init_cgroup_css(css, ss, dummytop); |
2417 | 2494 | ||
2418 | /* Update all cgroup groups to contain a subsys | 2495 | /* Update the init_css_set to contain a subsys |
2419 | * pointer to this state - since the subsystem is | 2496 | * pointer to this state - since the subsystem is |
2420 | * newly registered, all tasks and hence all cgroup | 2497 | * newly registered, all tasks and hence the |
2421 | * groups are in the subsystem's top cgroup. */ | 2498 | * init_css_set is in the subsystem's top cgroup. */ |
2422 | write_lock(&css_set_lock); | 2499 | init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; |
2423 | l = &init_css_set.list; | ||
2424 | do { | ||
2425 | struct css_set *cg = | ||
2426 | list_entry(l, struct css_set, list); | ||
2427 | cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; | ||
2428 | l = l->next; | ||
2429 | } while (l != &init_css_set.list); | ||
2430 | write_unlock(&css_set_lock); | ||
2431 | |||
2432 | /* If this subsystem requested that it be notified with fork | ||
2433 | * events, we should send it one now for every process in the | ||
2434 | * system */ | ||
2435 | if (ss->fork) { | ||
2436 | struct task_struct *g, *p; | ||
2437 | |||
2438 | read_lock(&tasklist_lock); | ||
2439 | do_each_thread(g, p) { | ||
2440 | ss->fork(ss, p); | ||
2441 | } while_each_thread(g, p); | ||
2442 | read_unlock(&tasklist_lock); | ||
2443 | } | ||
2444 | 2500 | ||
2445 | need_forkexit_callback |= ss->fork || ss->exit; | 2501 | need_forkexit_callback |= ss->fork || ss->exit; |
2502 | need_mm_owner_callback |= !!ss->mm_owner_changed; | ||
2503 | |||
2504 | /* At system boot, before all subsystems have been | ||
2505 | * registered, no tasks have been forked, so we don't | ||
2506 | * need to invoke fork callbacks here. */ | ||
2507 | BUG_ON(!list_empty(&init_task.tasks)); | ||
2446 | 2508 | ||
2447 | ss->active = 1; | 2509 | ss->active = 1; |
2448 | } | 2510 | } |
@@ -2458,9 +2520,9 @@ int __init cgroup_init_early(void) | |||
2458 | int i; | 2520 | int i; |
2459 | kref_init(&init_css_set.ref); | 2521 | kref_init(&init_css_set.ref); |
2460 | kref_get(&init_css_set.ref); | 2522 | kref_get(&init_css_set.ref); |
2461 | INIT_LIST_HEAD(&init_css_set.list); | ||
2462 | INIT_LIST_HEAD(&init_css_set.cg_links); | 2523 | INIT_LIST_HEAD(&init_css_set.cg_links); |
2463 | INIT_LIST_HEAD(&init_css_set.tasks); | 2524 | INIT_LIST_HEAD(&init_css_set.tasks); |
2525 | INIT_HLIST_NODE(&init_css_set.hlist); | ||
2464 | css_set_count = 1; | 2526 | css_set_count = 1; |
2465 | init_cgroup_root(&rootnode); | 2527 | init_cgroup_root(&rootnode); |
2466 | list_add(&rootnode.root_list, &roots); | 2528 | list_add(&rootnode.root_list, &roots); |
@@ -2473,6 +2535,9 @@ int __init cgroup_init_early(void) | |||
2473 | list_add(&init_css_set_link.cg_link_list, | 2535 | list_add(&init_css_set_link.cg_link_list, |
2474 | &init_css_set.cg_links); | 2536 | &init_css_set.cg_links); |
2475 | 2537 | ||
2538 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) | ||
2539 | INIT_HLIST_HEAD(&css_set_table[i]); | ||
2540 | |||
2476 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 2541 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
2477 | struct cgroup_subsys *ss = subsys[i]; | 2542 | struct cgroup_subsys *ss = subsys[i]; |
2478 | 2543 | ||
@@ -2502,7 +2567,7 @@ int __init cgroup_init(void) | |||
2502 | { | 2567 | { |
2503 | int err; | 2568 | int err; |
2504 | int i; | 2569 | int i; |
2505 | struct proc_dir_entry *entry; | 2570 | struct hlist_head *hhead; |
2506 | 2571 | ||
2507 | err = bdi_init(&cgroup_backing_dev_info); | 2572 | err = bdi_init(&cgroup_backing_dev_info); |
2508 | if (err) | 2573 | if (err) |
@@ -2514,13 +2579,15 @@ int __init cgroup_init(void) | |||
2514 | cgroup_init_subsys(ss); | 2579 | cgroup_init_subsys(ss); |
2515 | } | 2580 | } |
2516 | 2581 | ||
2582 | /* Add init_css_set to the hash table */ | ||
2583 | hhead = css_set_hash(init_css_set.subsys); | ||
2584 | hlist_add_head(&init_css_set.hlist, hhead); | ||
2585 | |||
2517 | err = register_filesystem(&cgroup_fs_type); | 2586 | err = register_filesystem(&cgroup_fs_type); |
2518 | if (err < 0) | 2587 | if (err < 0) |
2519 | goto out; | 2588 | goto out; |
2520 | 2589 | ||
2521 | entry = create_proc_entry("cgroups", 0, NULL); | 2590 | proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations); |
2522 | if (entry) | ||
2523 | entry->proc_fops = &proc_cgroupstats_operations; | ||
2524 | 2591 | ||
2525 | out: | 2592 | out: |
2526 | if (err) | 2593 | if (err) |
@@ -2683,6 +2750,34 @@ void cgroup_fork_callbacks(struct task_struct *child) | |||
2683 | } | 2750 | } |
2684 | } | 2751 | } |
2685 | 2752 | ||
2753 | #ifdef CONFIG_MM_OWNER | ||
2754 | /** | ||
2755 | * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes | ||
2756 | * @p: the new owner | ||
2757 | * | ||
2758 | * Called on every change to mm->owner. mm_init_owner() does not | ||
2759 | * invoke this routine, since it assigns the mm->owner the first time | ||
2760 | * and does not change it. | ||
2761 | */ | ||
2762 | void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) | ||
2763 | { | ||
2764 | struct cgroup *oldcgrp, *newcgrp; | ||
2765 | |||
2766 | if (need_mm_owner_callback) { | ||
2767 | int i; | ||
2768 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2769 | struct cgroup_subsys *ss = subsys[i]; | ||
2770 | oldcgrp = task_cgroup(old, ss->subsys_id); | ||
2771 | newcgrp = task_cgroup(new, ss->subsys_id); | ||
2772 | if (oldcgrp == newcgrp) | ||
2773 | continue; | ||
2774 | if (ss->mm_owner_changed) | ||
2775 | ss->mm_owner_changed(ss, oldcgrp, newcgrp); | ||
2776 | } | ||
2777 | } | ||
2778 | } | ||
2779 | #endif /* CONFIG_MM_OWNER */ | ||
2780 | |||
2686 | /** | 2781 | /** |
2687 | * cgroup_post_fork - called on a new task after adding it to the task list | 2782 | * cgroup_post_fork - called on a new task after adding it to the task list |
2688 | * @child: the task in question | 2783 | * @child: the task in question |
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c index 37301e877cb0..c3dc3aba4c02 100644 --- a/kernel/cgroup_debug.c +++ b/kernel/cgroup_debug.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * kernel/ccontainer_debug.c - Example cgroup subsystem that | 2 | * kernel/cgroup_debug.c - Example cgroup subsystem that |
3 | * exposes debug info | 3 | * exposes debug info |
4 | * | 4 | * |
5 | * Copyright (C) Google Inc, 2007 | 5 | * Copyright (C) Google Inc, 2007 |
@@ -62,25 +62,35 @@ static u64 current_css_set_refcount_read(struct cgroup *cont, | |||
62 | return count; | 62 | return count; |
63 | } | 63 | } |
64 | 64 | ||
65 | static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) | ||
66 | { | ||
67 | return test_bit(CGRP_RELEASABLE, &cgrp->flags); | ||
68 | } | ||
69 | |||
65 | static struct cftype files[] = { | 70 | static struct cftype files[] = { |
66 | { | 71 | { |
67 | .name = "cgroup_refcount", | 72 | .name = "cgroup_refcount", |
68 | .read_uint = cgroup_refcount_read, | 73 | .read_u64 = cgroup_refcount_read, |
69 | }, | 74 | }, |
70 | { | 75 | { |
71 | .name = "taskcount", | 76 | .name = "taskcount", |
72 | .read_uint = taskcount_read, | 77 | .read_u64 = taskcount_read, |
73 | }, | 78 | }, |
74 | 79 | ||
75 | { | 80 | { |
76 | .name = "current_css_set", | 81 | .name = "current_css_set", |
77 | .read_uint = current_css_set_read, | 82 | .read_u64 = current_css_set_read, |
78 | }, | 83 | }, |
79 | 84 | ||
80 | { | 85 | { |
81 | .name = "current_css_set_refcount", | 86 | .name = "current_css_set_refcount", |
82 | .read_uint = current_css_set_refcount_read, | 87 | .read_u64 = current_css_set_refcount_read, |
83 | }, | 88 | }, |
89 | |||
90 | { | ||
91 | .name = "releasable", | ||
92 | .read_u64 = releasable_read, | ||
93 | } | ||
84 | }; | 94 | }; |
85 | 95 | ||
86 | static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) | 96 | static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) |
diff --git a/kernel/configs.c b/kernel/configs.c index e84d3f9c6c7b..4c345210ed8c 100644 --- a/kernel/configs.c +++ b/kernel/configs.c | |||
@@ -79,12 +79,11 @@ static int __init ikconfig_init(void) | |||
79 | struct proc_dir_entry *entry; | 79 | struct proc_dir_entry *entry; |
80 | 80 | ||
81 | /* create the current config file */ | 81 | /* create the current config file */ |
82 | entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, | 82 | entry = proc_create("config.gz", S_IFREG | S_IRUGO, NULL, |
83 | &proc_root); | 83 | &ikconfig_file_ops); |
84 | if (!entry) | 84 | if (!entry) |
85 | return -ENOMEM; | 85 | return -ENOMEM; |
86 | 86 | ||
87 | entry->proc_fops = &ikconfig_file_ops; | ||
88 | entry->size = kernel_config_data_size; | 87 | entry->size = kernel_config_data_size; |
89 | 88 | ||
90 | return 0; | 89 | return 0; |
@@ -95,7 +94,7 @@ static int __init ikconfig_init(void) | |||
95 | 94 | ||
96 | static void __exit ikconfig_cleanup(void) | 95 | static void __exit ikconfig_cleanup(void) |
97 | { | 96 | { |
98 | remove_proc_entry("config.gz", &proc_root); | 97 | remove_proc_entry("config.gz", NULL); |
99 | } | 98 | } |
100 | 99 | ||
101 | module_init(ikconfig_init); | 100 | module_init(ikconfig_init); |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 2011ad8d2697..a98f6ab16ecd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -33,17 +33,13 @@ static struct { | |||
33 | * an ongoing cpu hotplug operation. | 33 | * an ongoing cpu hotplug operation. |
34 | */ | 34 | */ |
35 | int refcount; | 35 | int refcount; |
36 | wait_queue_head_t writer_queue; | ||
37 | } cpu_hotplug; | 36 | } cpu_hotplug; |
38 | 37 | ||
39 | #define writer_exists() (cpu_hotplug.active_writer != NULL) | ||
40 | |||
41 | void __init cpu_hotplug_init(void) | 38 | void __init cpu_hotplug_init(void) |
42 | { | 39 | { |
43 | cpu_hotplug.active_writer = NULL; | 40 | cpu_hotplug.active_writer = NULL; |
44 | mutex_init(&cpu_hotplug.lock); | 41 | mutex_init(&cpu_hotplug.lock); |
45 | cpu_hotplug.refcount = 0; | 42 | cpu_hotplug.refcount = 0; |
46 | init_waitqueue_head(&cpu_hotplug.writer_queue); | ||
47 | } | 43 | } |
48 | 44 | ||
49 | #ifdef CONFIG_HOTPLUG_CPU | 45 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -65,11 +61,8 @@ void put_online_cpus(void) | |||
65 | if (cpu_hotplug.active_writer == current) | 61 | if (cpu_hotplug.active_writer == current) |
66 | return; | 62 | return; |
67 | mutex_lock(&cpu_hotplug.lock); | 63 | mutex_lock(&cpu_hotplug.lock); |
68 | cpu_hotplug.refcount--; | 64 | if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer)) |
69 | 65 | wake_up_process(cpu_hotplug.active_writer); | |
70 | if (unlikely(writer_exists()) && !cpu_hotplug.refcount) | ||
71 | wake_up(&cpu_hotplug.writer_queue); | ||
72 | |||
73 | mutex_unlock(&cpu_hotplug.lock); | 66 | mutex_unlock(&cpu_hotplug.lock); |
74 | 67 | ||
75 | } | 68 | } |
@@ -98,8 +91,8 @@ void cpu_maps_update_done(void) | |||
98 | * Note that during a cpu-hotplug operation, the new readers, if any, | 91 | * Note that during a cpu-hotplug operation, the new readers, if any, |
99 | * will be blocked by the cpu_hotplug.lock | 92 | * will be blocked by the cpu_hotplug.lock |
100 | * | 93 | * |
101 | * Since cpu_maps_update_begin is always called after invoking | 94 | * Since cpu_hotplug_begin() is always called after invoking |
102 | * cpu_maps_update_begin, we can be sure that only one writer is active. | 95 | * cpu_maps_update_begin(), we can be sure that only one writer is active. |
103 | * | 96 | * |
104 | * Note that theoretically, there is a possibility of a livelock: | 97 | * Note that theoretically, there is a possibility of a livelock: |
105 | * - Refcount goes to zero, last reader wakes up the sleeping | 98 | * - Refcount goes to zero, last reader wakes up the sleeping |
@@ -115,19 +108,16 @@ void cpu_maps_update_done(void) | |||
115 | */ | 108 | */ |
116 | static void cpu_hotplug_begin(void) | 109 | static void cpu_hotplug_begin(void) |
117 | { | 110 | { |
118 | DECLARE_WAITQUEUE(wait, current); | ||
119 | |||
120 | mutex_lock(&cpu_hotplug.lock); | ||
121 | |||
122 | cpu_hotplug.active_writer = current; | 111 | cpu_hotplug.active_writer = current; |
123 | add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait); | 112 | |
124 | while (cpu_hotplug.refcount) { | 113 | for (;;) { |
125 | set_current_state(TASK_UNINTERRUPTIBLE); | 114 | mutex_lock(&cpu_hotplug.lock); |
115 | if (likely(!cpu_hotplug.refcount)) | ||
116 | break; | ||
117 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
126 | mutex_unlock(&cpu_hotplug.lock); | 118 | mutex_unlock(&cpu_hotplug.lock); |
127 | schedule(); | 119 | schedule(); |
128 | mutex_lock(&cpu_hotplug.lock); | ||
129 | } | 120 | } |
130 | remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait); | ||
131 | } | 121 | } |
132 | 122 | ||
133 | static void cpu_hotplug_done(void) | 123 | static void cpu_hotplug_done(void) |
@@ -136,7 +126,7 @@ static void cpu_hotplug_done(void) | |||
136 | mutex_unlock(&cpu_hotplug.lock); | 126 | mutex_unlock(&cpu_hotplug.lock); |
137 | } | 127 | } |
138 | /* Need to know about CPUs going up/down? */ | 128 | /* Need to know about CPUs going up/down? */ |
139 | int __cpuinit register_cpu_notifier(struct notifier_block *nb) | 129 | int __ref register_cpu_notifier(struct notifier_block *nb) |
140 | { | 130 | { |
141 | int ret; | 131 | int ret; |
142 | cpu_maps_update_begin(); | 132 | cpu_maps_update_begin(); |
@@ -149,7 +139,7 @@ int __cpuinit register_cpu_notifier(struct notifier_block *nb) | |||
149 | 139 | ||
150 | EXPORT_SYMBOL(register_cpu_notifier); | 140 | EXPORT_SYMBOL(register_cpu_notifier); |
151 | 141 | ||
152 | void unregister_cpu_notifier(struct notifier_block *nb) | 142 | void __ref unregister_cpu_notifier(struct notifier_block *nb) |
153 | { | 143 | { |
154 | cpu_maps_update_begin(); | 144 | cpu_maps_update_begin(); |
155 | raw_notifier_chain_unregister(&cpu_chain, nb); | 145 | raw_notifier_chain_unregister(&cpu_chain, nb); |
@@ -180,7 +170,7 @@ struct take_cpu_down_param { | |||
180 | }; | 170 | }; |
181 | 171 | ||
182 | /* Take this CPU down. */ | 172 | /* Take this CPU down. */ |
183 | static int take_cpu_down(void *_param) | 173 | static int __ref take_cpu_down(void *_param) |
184 | { | 174 | { |
185 | struct take_cpu_down_param *param = _param; | 175 | struct take_cpu_down_param *param = _param; |
186 | int err; | 176 | int err; |
@@ -199,7 +189,7 @@ static int take_cpu_down(void *_param) | |||
199 | } | 189 | } |
200 | 190 | ||
201 | /* Requires cpu_add_remove_lock to be held */ | 191 | /* Requires cpu_add_remove_lock to be held */ |
202 | static int _cpu_down(unsigned int cpu, int tasks_frozen) | 192 | static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) |
203 | { | 193 | { |
204 | int err, nr_calls = 0; | 194 | int err, nr_calls = 0; |
205 | struct task_struct *p; | 195 | struct task_struct *p; |
@@ -274,7 +264,7 @@ out_release: | |||
274 | return err; | 264 | return err; |
275 | } | 265 | } |
276 | 266 | ||
277 | int cpu_down(unsigned int cpu) | 267 | int __ref cpu_down(unsigned int cpu) |
278 | { | 268 | { |
279 | int err = 0; | 269 | int err = 0; |
280 | 270 | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8b35fbd8292f..8da627d33804 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -127,6 +127,7 @@ struct cpuset_hotplug_scanner { | |||
127 | typedef enum { | 127 | typedef enum { |
128 | CS_CPU_EXCLUSIVE, | 128 | CS_CPU_EXCLUSIVE, |
129 | CS_MEM_EXCLUSIVE, | 129 | CS_MEM_EXCLUSIVE, |
130 | CS_MEM_HARDWALL, | ||
130 | CS_MEMORY_MIGRATE, | 131 | CS_MEMORY_MIGRATE, |
131 | CS_SCHED_LOAD_BALANCE, | 132 | CS_SCHED_LOAD_BALANCE, |
132 | CS_SPREAD_PAGE, | 133 | CS_SPREAD_PAGE, |
@@ -144,6 +145,11 @@ static inline int is_mem_exclusive(const struct cpuset *cs) | |||
144 | return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); | 145 | return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); |
145 | } | 146 | } |
146 | 147 | ||
148 | static inline int is_mem_hardwall(const struct cpuset *cs) | ||
149 | { | ||
150 | return test_bit(CS_MEM_HARDWALL, &cs->flags); | ||
151 | } | ||
152 | |||
147 | static inline int is_sched_load_balance(const struct cpuset *cs) | 153 | static inline int is_sched_load_balance(const struct cpuset *cs) |
148 | { | 154 | { |
149 | return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); | 155 | return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); |
@@ -735,7 +741,8 @@ static inline int started_after(void *p1, void *p2) | |||
735 | * Return nonzero if this tasks's cpus_allowed mask should be changed (in other | 741 | * Return nonzero if this tasks's cpus_allowed mask should be changed (in other |
736 | * words, if its mask is not equal to its cpuset's mask). | 742 | * words, if its mask is not equal to its cpuset's mask). |
737 | */ | 743 | */ |
738 | int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | 744 | static int cpuset_test_cpumask(struct task_struct *tsk, |
745 | struct cgroup_scanner *scan) | ||
739 | { | 746 | { |
740 | return !cpus_equal(tsk->cpus_allowed, | 747 | return !cpus_equal(tsk->cpus_allowed, |
741 | (cgroup_cs(scan->cg))->cpus_allowed); | 748 | (cgroup_cs(scan->cg))->cpus_allowed); |
@@ -752,7 +759,8 @@ int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | |||
752 | * We don't need to re-check for the cgroup/cpuset membership, since we're | 759 | * We don't need to re-check for the cgroup/cpuset membership, since we're |
753 | * holding cgroup_lock() at this point. | 760 | * holding cgroup_lock() at this point. |
754 | */ | 761 | */ |
755 | void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | 762 | static void cpuset_change_cpumask(struct task_struct *tsk, |
763 | struct cgroup_scanner *scan) | ||
756 | { | 764 | { |
757 | set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); | 765 | set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); |
758 | } | 766 | } |
@@ -941,7 +949,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
941 | cs->mems_generation = cpuset_mems_generation++; | 949 | cs->mems_generation = cpuset_mems_generation++; |
942 | mutex_unlock(&callback_mutex); | 950 | mutex_unlock(&callback_mutex); |
943 | 951 | ||
944 | cpuset_being_rebound = cs; /* causes mpol_copy() rebind */ | 952 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ |
945 | 953 | ||
946 | fudge = 10; /* spare mmarray[] slots */ | 954 | fudge = 10; /* spare mmarray[] slots */ |
947 | fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ | 955 | fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ |
@@ -992,7 +1000,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
992 | * rebind the vma mempolicies of each mm in mmarray[] to their | 1000 | * rebind the vma mempolicies of each mm in mmarray[] to their |
993 | * new cpuset, and release that mm. The mpol_rebind_mm() | 1001 | * new cpuset, and release that mm. The mpol_rebind_mm() |
994 | * call takes mmap_sem, which we couldn't take while holding | 1002 | * call takes mmap_sem, which we couldn't take while holding |
995 | * tasklist_lock. Forks can happen again now - the mpol_copy() | 1003 | * tasklist_lock. Forks can happen again now - the mpol_dup() |
996 | * cpuset_being_rebound check will catch such forks, and rebind | 1004 | * cpuset_being_rebound check will catch such forks, and rebind |
997 | * their vma mempolicies too. Because we still hold the global | 1005 | * their vma mempolicies too. Because we still hold the global |
998 | * cgroup_mutex, we know that no other rebind effort will | 1006 | * cgroup_mutex, we know that no other rebind effort will |
@@ -1023,19 +1031,6 @@ int current_cpuset_is_being_rebound(void) | |||
1023 | return task_cs(current) == cpuset_being_rebound; | 1031 | return task_cs(current) == cpuset_being_rebound; |
1024 | } | 1032 | } |
1025 | 1033 | ||
1026 | /* | ||
1027 | * Call with cgroup_mutex held. | ||
1028 | */ | ||
1029 | |||
1030 | static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) | ||
1031 | { | ||
1032 | if (simple_strtoul(buf, NULL, 10) != 0) | ||
1033 | cpuset_memory_pressure_enabled = 1; | ||
1034 | else | ||
1035 | cpuset_memory_pressure_enabled = 0; | ||
1036 | return 0; | ||
1037 | } | ||
1038 | |||
1039 | static int update_relax_domain_level(struct cpuset *cs, char *buf) | 1034 | static int update_relax_domain_level(struct cpuset *cs, char *buf) |
1040 | { | 1035 | { |
1041 | int val = simple_strtol(buf, NULL, 10); | 1036 | int val = simple_strtol(buf, NULL, 10); |
@@ -1053,25 +1048,20 @@ static int update_relax_domain_level(struct cpuset *cs, char *buf) | |||
1053 | 1048 | ||
1054 | /* | 1049 | /* |
1055 | * update_flag - read a 0 or a 1 in a file and update associated flag | 1050 | * update_flag - read a 0 or a 1 in a file and update associated flag |
1056 | * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, | 1051 | * bit: the bit to update (see cpuset_flagbits_t) |
1057 | * CS_SCHED_LOAD_BALANCE, | 1052 | * cs: the cpuset to update |
1058 | * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE, | 1053 | * turning_on: whether the flag is being set or cleared |
1059 | * CS_SPREAD_PAGE, CS_SPREAD_SLAB) | ||
1060 | * cs: the cpuset to update | ||
1061 | * buf: the buffer where we read the 0 or 1 | ||
1062 | * | 1054 | * |
1063 | * Call with cgroup_mutex held. | 1055 | * Call with cgroup_mutex held. |
1064 | */ | 1056 | */ |
1065 | 1057 | ||
1066 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) | 1058 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, |
1059 | int turning_on) | ||
1067 | { | 1060 | { |
1068 | int turning_on; | ||
1069 | struct cpuset trialcs; | 1061 | struct cpuset trialcs; |
1070 | int err; | 1062 | int err; |
1071 | int cpus_nonempty, balance_flag_changed; | 1063 | int cpus_nonempty, balance_flag_changed; |
1072 | 1064 | ||
1073 | turning_on = (simple_strtoul(buf, NULL, 10) != 0); | ||
1074 | |||
1075 | trialcs = *cs; | 1065 | trialcs = *cs; |
1076 | if (turning_on) | 1066 | if (turning_on) |
1077 | set_bit(bit, &trialcs.flags); | 1067 | set_bit(bit, &trialcs.flags); |
@@ -1241,6 +1231,7 @@ typedef enum { | |||
1241 | FILE_MEMLIST, | 1231 | FILE_MEMLIST, |
1242 | FILE_CPU_EXCLUSIVE, | 1232 | FILE_CPU_EXCLUSIVE, |
1243 | FILE_MEM_EXCLUSIVE, | 1233 | FILE_MEM_EXCLUSIVE, |
1234 | FILE_MEM_HARDWALL, | ||
1244 | FILE_SCHED_LOAD_BALANCE, | 1235 | FILE_SCHED_LOAD_BALANCE, |
1245 | FILE_SCHED_RELAX_DOMAIN_LEVEL, | 1236 | FILE_SCHED_RELAX_DOMAIN_LEVEL, |
1246 | FILE_MEMORY_PRESSURE_ENABLED, | 1237 | FILE_MEMORY_PRESSURE_ENABLED, |
@@ -1265,7 +1256,8 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont, | |||
1265 | return -E2BIG; | 1256 | return -E2BIG; |
1266 | 1257 | ||
1267 | /* +1 for nul-terminator */ | 1258 | /* +1 for nul-terminator */ |
1268 | if ((buffer = kmalloc(nbytes + 1, GFP_KERNEL)) == 0) | 1259 | buffer = kmalloc(nbytes + 1, GFP_KERNEL); |
1260 | if (!buffer) | ||
1269 | return -ENOMEM; | 1261 | return -ENOMEM; |
1270 | 1262 | ||
1271 | if (copy_from_user(buffer, userbuf, nbytes)) { | 1263 | if (copy_from_user(buffer, userbuf, nbytes)) { |
@@ -1288,46 +1280,71 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont, | |||
1288 | case FILE_MEMLIST: | 1280 | case FILE_MEMLIST: |
1289 | retval = update_nodemask(cs, buffer); | 1281 | retval = update_nodemask(cs, buffer); |
1290 | break; | 1282 | break; |
1283 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | ||
1284 | retval = update_relax_domain_level(cs, buffer); | ||
1285 | break; | ||
1286 | default: | ||
1287 | retval = -EINVAL; | ||
1288 | goto out2; | ||
1289 | } | ||
1290 | |||
1291 | if (retval == 0) | ||
1292 | retval = nbytes; | ||
1293 | out2: | ||
1294 | cgroup_unlock(); | ||
1295 | out1: | ||
1296 | kfree(buffer); | ||
1297 | return retval; | ||
1298 | } | ||
1299 | |||
1300 | static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) | ||
1301 | { | ||
1302 | int retval = 0; | ||
1303 | struct cpuset *cs = cgroup_cs(cgrp); | ||
1304 | cpuset_filetype_t type = cft->private; | ||
1305 | |||
1306 | cgroup_lock(); | ||
1307 | |||
1308 | if (cgroup_is_removed(cgrp)) { | ||
1309 | cgroup_unlock(); | ||
1310 | return -ENODEV; | ||
1311 | } | ||
1312 | |||
1313 | switch (type) { | ||
1291 | case FILE_CPU_EXCLUSIVE: | 1314 | case FILE_CPU_EXCLUSIVE: |
1292 | retval = update_flag(CS_CPU_EXCLUSIVE, cs, buffer); | 1315 | retval = update_flag(CS_CPU_EXCLUSIVE, cs, val); |
1293 | break; | 1316 | break; |
1294 | case FILE_MEM_EXCLUSIVE: | 1317 | case FILE_MEM_EXCLUSIVE: |
1295 | retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer); | 1318 | retval = update_flag(CS_MEM_EXCLUSIVE, cs, val); |
1296 | break; | 1319 | break; |
1297 | case FILE_SCHED_LOAD_BALANCE: | 1320 | case FILE_MEM_HARDWALL: |
1298 | retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); | 1321 | retval = update_flag(CS_MEM_HARDWALL, cs, val); |
1299 | break; | 1322 | break; |
1300 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | 1323 | case FILE_SCHED_LOAD_BALANCE: |
1301 | retval = update_relax_domain_level(cs, buffer); | 1324 | retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val); |
1302 | break; | 1325 | break; |
1303 | case FILE_MEMORY_MIGRATE: | 1326 | case FILE_MEMORY_MIGRATE: |
1304 | retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); | 1327 | retval = update_flag(CS_MEMORY_MIGRATE, cs, val); |
1305 | break; | 1328 | break; |
1306 | case FILE_MEMORY_PRESSURE_ENABLED: | 1329 | case FILE_MEMORY_PRESSURE_ENABLED: |
1307 | retval = update_memory_pressure_enabled(cs, buffer); | 1330 | cpuset_memory_pressure_enabled = !!val; |
1308 | break; | 1331 | break; |
1309 | case FILE_MEMORY_PRESSURE: | 1332 | case FILE_MEMORY_PRESSURE: |
1310 | retval = -EACCES; | 1333 | retval = -EACCES; |
1311 | break; | 1334 | break; |
1312 | case FILE_SPREAD_PAGE: | 1335 | case FILE_SPREAD_PAGE: |
1313 | retval = update_flag(CS_SPREAD_PAGE, cs, buffer); | 1336 | retval = update_flag(CS_SPREAD_PAGE, cs, val); |
1314 | cs->mems_generation = cpuset_mems_generation++; | 1337 | cs->mems_generation = cpuset_mems_generation++; |
1315 | break; | 1338 | break; |
1316 | case FILE_SPREAD_SLAB: | 1339 | case FILE_SPREAD_SLAB: |
1317 | retval = update_flag(CS_SPREAD_SLAB, cs, buffer); | 1340 | retval = update_flag(CS_SPREAD_SLAB, cs, val); |
1318 | cs->mems_generation = cpuset_mems_generation++; | 1341 | cs->mems_generation = cpuset_mems_generation++; |
1319 | break; | 1342 | break; |
1320 | default: | 1343 | default: |
1321 | retval = -EINVAL; | 1344 | retval = -EINVAL; |
1322 | goto out2; | 1345 | break; |
1323 | } | 1346 | } |
1324 | |||
1325 | if (retval == 0) | ||
1326 | retval = nbytes; | ||
1327 | out2: | ||
1328 | cgroup_unlock(); | 1347 | cgroup_unlock(); |
1329 | out1: | ||
1330 | kfree(buffer); | ||
1331 | return retval; | 1348 | return retval; |
1332 | } | 1349 | } |
1333 | 1350 | ||
@@ -1389,33 +1406,9 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont, | |||
1389 | case FILE_MEMLIST: | 1406 | case FILE_MEMLIST: |
1390 | s += cpuset_sprintf_memlist(s, cs); | 1407 | s += cpuset_sprintf_memlist(s, cs); |
1391 | break; | 1408 | break; |
1392 | case FILE_CPU_EXCLUSIVE: | ||
1393 | *s++ = is_cpu_exclusive(cs) ? '1' : '0'; | ||
1394 | break; | ||
1395 | case FILE_MEM_EXCLUSIVE: | ||
1396 | *s++ = is_mem_exclusive(cs) ? '1' : '0'; | ||
1397 | break; | ||
1398 | case FILE_SCHED_LOAD_BALANCE: | ||
1399 | *s++ = is_sched_load_balance(cs) ? '1' : '0'; | ||
1400 | break; | ||
1401 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | 1409 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: |
1402 | s += sprintf(s, "%d", cs->relax_domain_level); | 1410 | s += sprintf(s, "%d", cs->relax_domain_level); |
1403 | break; | 1411 | break; |
1404 | case FILE_MEMORY_MIGRATE: | ||
1405 | *s++ = is_memory_migrate(cs) ? '1' : '0'; | ||
1406 | break; | ||
1407 | case FILE_MEMORY_PRESSURE_ENABLED: | ||
1408 | *s++ = cpuset_memory_pressure_enabled ? '1' : '0'; | ||
1409 | break; | ||
1410 | case FILE_MEMORY_PRESSURE: | ||
1411 | s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter)); | ||
1412 | break; | ||
1413 | case FILE_SPREAD_PAGE: | ||
1414 | *s++ = is_spread_page(cs) ? '1' : '0'; | ||
1415 | break; | ||
1416 | case FILE_SPREAD_SLAB: | ||
1417 | *s++ = is_spread_slab(cs) ? '1' : '0'; | ||
1418 | break; | ||
1419 | default: | 1412 | default: |
1420 | retval = -EINVAL; | 1413 | retval = -EINVAL; |
1421 | goto out; | 1414 | goto out; |
@@ -1428,121 +1421,137 @@ out: | |||
1428 | return retval; | 1421 | return retval; |
1429 | } | 1422 | } |
1430 | 1423 | ||
1431 | 1424 | static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) | |
1432 | 1425 | { | |
1426 | struct cpuset *cs = cgroup_cs(cont); | ||
1427 | cpuset_filetype_t type = cft->private; | ||
1428 | switch (type) { | ||
1429 | case FILE_CPU_EXCLUSIVE: | ||
1430 | return is_cpu_exclusive(cs); | ||
1431 | case FILE_MEM_EXCLUSIVE: | ||
1432 | return is_mem_exclusive(cs); | ||
1433 | case FILE_MEM_HARDWALL: | ||
1434 | return is_mem_hardwall(cs); | ||
1435 | case FILE_SCHED_LOAD_BALANCE: | ||
1436 | return is_sched_load_balance(cs); | ||
1437 | case FILE_MEMORY_MIGRATE: | ||
1438 | return is_memory_migrate(cs); | ||
1439 | case FILE_MEMORY_PRESSURE_ENABLED: | ||
1440 | return cpuset_memory_pressure_enabled; | ||
1441 | case FILE_MEMORY_PRESSURE: | ||
1442 | return fmeter_getrate(&cs->fmeter); | ||
1443 | case FILE_SPREAD_PAGE: | ||
1444 | return is_spread_page(cs); | ||
1445 | case FILE_SPREAD_SLAB: | ||
1446 | return is_spread_slab(cs); | ||
1447 | default: | ||
1448 | BUG(); | ||
1449 | } | ||
1450 | } | ||
1433 | 1451 | ||
1434 | 1452 | ||
1435 | /* | 1453 | /* |
1436 | * for the common functions, 'private' gives the type of file | 1454 | * for the common functions, 'private' gives the type of file |
1437 | */ | 1455 | */ |
1438 | 1456 | ||
1439 | static struct cftype cft_cpus = { | 1457 | static struct cftype files[] = { |
1440 | .name = "cpus", | 1458 | { |
1441 | .read = cpuset_common_file_read, | 1459 | .name = "cpus", |
1442 | .write = cpuset_common_file_write, | 1460 | .read = cpuset_common_file_read, |
1443 | .private = FILE_CPULIST, | 1461 | .write = cpuset_common_file_write, |
1444 | }; | 1462 | .private = FILE_CPULIST, |
1445 | 1463 | }, | |
1446 | static struct cftype cft_mems = { | 1464 | |
1447 | .name = "mems", | 1465 | { |
1448 | .read = cpuset_common_file_read, | 1466 | .name = "mems", |
1449 | .write = cpuset_common_file_write, | 1467 | .read = cpuset_common_file_read, |
1450 | .private = FILE_MEMLIST, | 1468 | .write = cpuset_common_file_write, |
1451 | }; | 1469 | .private = FILE_MEMLIST, |
1452 | 1470 | }, | |
1453 | static struct cftype cft_cpu_exclusive = { | 1471 | |
1454 | .name = "cpu_exclusive", | 1472 | { |
1455 | .read = cpuset_common_file_read, | 1473 | .name = "cpu_exclusive", |
1456 | .write = cpuset_common_file_write, | 1474 | .read_u64 = cpuset_read_u64, |
1457 | .private = FILE_CPU_EXCLUSIVE, | 1475 | .write_u64 = cpuset_write_u64, |
1458 | }; | 1476 | .private = FILE_CPU_EXCLUSIVE, |
1459 | 1477 | }, | |
1460 | static struct cftype cft_mem_exclusive = { | 1478 | |
1461 | .name = "mem_exclusive", | 1479 | { |
1462 | .read = cpuset_common_file_read, | 1480 | .name = "mem_exclusive", |
1463 | .write = cpuset_common_file_write, | 1481 | .read_u64 = cpuset_read_u64, |
1464 | .private = FILE_MEM_EXCLUSIVE, | 1482 | .write_u64 = cpuset_write_u64, |
1465 | }; | 1483 | .private = FILE_MEM_EXCLUSIVE, |
1466 | 1484 | }, | |
1467 | static struct cftype cft_sched_load_balance = { | 1485 | |
1468 | .name = "sched_load_balance", | 1486 | { |
1469 | .read = cpuset_common_file_read, | 1487 | .name = "mem_hardwall", |
1470 | .write = cpuset_common_file_write, | 1488 | .read_u64 = cpuset_read_u64, |
1471 | .private = FILE_SCHED_LOAD_BALANCE, | 1489 | .write_u64 = cpuset_write_u64, |
1472 | }; | 1490 | .private = FILE_MEM_HARDWALL, |
1473 | 1491 | }, | |
1474 | static struct cftype cft_sched_relax_domain_level = { | 1492 | |
1475 | .name = "sched_relax_domain_level", | 1493 | { |
1476 | .read = cpuset_common_file_read, | 1494 | .name = "sched_load_balance", |
1477 | .write = cpuset_common_file_write, | 1495 | .read_u64 = cpuset_read_u64, |
1478 | .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, | 1496 | .write_u64 = cpuset_write_u64, |
1479 | }; | 1497 | .private = FILE_SCHED_LOAD_BALANCE, |
1480 | 1498 | }, | |
1481 | static struct cftype cft_memory_migrate = { | 1499 | |
1482 | .name = "memory_migrate", | 1500 | { |
1483 | .read = cpuset_common_file_read, | 1501 | .name = "sched_relax_domain_level", |
1484 | .write = cpuset_common_file_write, | 1502 | .read_u64 = cpuset_read_u64, |
1485 | .private = FILE_MEMORY_MIGRATE, | 1503 | .write_u64 = cpuset_write_u64, |
1504 | .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, | ||
1505 | }, | ||
1506 | |||
1507 | { | ||
1508 | .name = "memory_migrate", | ||
1509 | .read_u64 = cpuset_read_u64, | ||
1510 | .write_u64 = cpuset_write_u64, | ||
1511 | .private = FILE_MEMORY_MIGRATE, | ||
1512 | }, | ||
1513 | |||
1514 | { | ||
1515 | .name = "memory_pressure", | ||
1516 | .read_u64 = cpuset_read_u64, | ||
1517 | .write_u64 = cpuset_write_u64, | ||
1518 | .private = FILE_MEMORY_PRESSURE, | ||
1519 | }, | ||
1520 | |||
1521 | { | ||
1522 | .name = "memory_spread_page", | ||
1523 | .read_u64 = cpuset_read_u64, | ||
1524 | .write_u64 = cpuset_write_u64, | ||
1525 | .private = FILE_SPREAD_PAGE, | ||
1526 | }, | ||
1527 | |||
1528 | { | ||
1529 | .name = "memory_spread_slab", | ||
1530 | .read_u64 = cpuset_read_u64, | ||
1531 | .write_u64 = cpuset_write_u64, | ||
1532 | .private = FILE_SPREAD_SLAB, | ||
1533 | }, | ||
1486 | }; | 1534 | }; |
1487 | 1535 | ||
1488 | static struct cftype cft_memory_pressure_enabled = { | 1536 | static struct cftype cft_memory_pressure_enabled = { |
1489 | .name = "memory_pressure_enabled", | 1537 | .name = "memory_pressure_enabled", |
1490 | .read = cpuset_common_file_read, | 1538 | .read_u64 = cpuset_read_u64, |
1491 | .write = cpuset_common_file_write, | 1539 | .write_u64 = cpuset_write_u64, |
1492 | .private = FILE_MEMORY_PRESSURE_ENABLED, | 1540 | .private = FILE_MEMORY_PRESSURE_ENABLED, |
1493 | }; | 1541 | }; |
1494 | 1542 | ||
1495 | static struct cftype cft_memory_pressure = { | ||
1496 | .name = "memory_pressure", | ||
1497 | .read = cpuset_common_file_read, | ||
1498 | .write = cpuset_common_file_write, | ||
1499 | .private = FILE_MEMORY_PRESSURE, | ||
1500 | }; | ||
1501 | |||
1502 | static struct cftype cft_spread_page = { | ||
1503 | .name = "memory_spread_page", | ||
1504 | .read = cpuset_common_file_read, | ||
1505 | .write = cpuset_common_file_write, | ||
1506 | .private = FILE_SPREAD_PAGE, | ||
1507 | }; | ||
1508 | |||
1509 | static struct cftype cft_spread_slab = { | ||
1510 | .name = "memory_spread_slab", | ||
1511 | .read = cpuset_common_file_read, | ||
1512 | .write = cpuset_common_file_write, | ||
1513 | .private = FILE_SPREAD_SLAB, | ||
1514 | }; | ||
1515 | |||
1516 | static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) | 1543 | static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) |
1517 | { | 1544 | { |
1518 | int err; | 1545 | int err; |
1519 | 1546 | ||
1520 | if ((err = cgroup_add_file(cont, ss, &cft_cpus)) < 0) | 1547 | err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); |
1521 | return err; | 1548 | if (err) |
1522 | if ((err = cgroup_add_file(cont, ss, &cft_mems)) < 0) | ||
1523 | return err; | ||
1524 | if ((err = cgroup_add_file(cont, ss, &cft_cpu_exclusive)) < 0) | ||
1525 | return err; | ||
1526 | if ((err = cgroup_add_file(cont, ss, &cft_mem_exclusive)) < 0) | ||
1527 | return err; | ||
1528 | if ((err = cgroup_add_file(cont, ss, &cft_memory_migrate)) < 0) | ||
1529 | return err; | ||
1530 | if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) | ||
1531 | return err; | ||
1532 | if ((err = cgroup_add_file(cont, ss, | ||
1533 | &cft_sched_relax_domain_level)) < 0) | ||
1534 | return err; | ||
1535 | if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) | ||
1536 | return err; | ||
1537 | if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) | ||
1538 | return err; | ||
1539 | if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0) | ||
1540 | return err; | 1549 | return err; |
1541 | /* memory_pressure_enabled is in root cpuset only */ | 1550 | /* memory_pressure_enabled is in root cpuset only */ |
1542 | if (err == 0 && !cont->parent) | 1551 | if (!cont->parent) |
1543 | err = cgroup_add_file(cont, ss, | 1552 | err = cgroup_add_file(cont, ss, |
1544 | &cft_memory_pressure_enabled); | 1553 | &cft_memory_pressure_enabled); |
1545 | return 0; | 1554 | return err; |
1546 | } | 1555 | } |
1547 | 1556 | ||
1548 | /* | 1557 | /* |
@@ -1642,7 +1651,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1642 | cpuset_update_task_memory_state(); | 1651 | cpuset_update_task_memory_state(); |
1643 | 1652 | ||
1644 | if (is_sched_load_balance(cs)) | 1653 | if (is_sched_load_balance(cs)) |
1645 | update_flag(CS_SCHED_LOAD_BALANCE, cs, "0"); | 1654 | update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); |
1646 | 1655 | ||
1647 | number_of_cpusets--; | 1656 | number_of_cpusets--; |
1648 | kfree(cs); | 1657 | kfree(cs); |
@@ -1707,7 +1716,8 @@ int __init cpuset_init(void) | |||
1707 | * Called by cgroup_scan_tasks() for each task in a cgroup. | 1716 | * Called by cgroup_scan_tasks() for each task in a cgroup. |
1708 | * Return nonzero to stop the walk through the tasks. | 1717 | * Return nonzero to stop the walk through the tasks. |
1709 | */ | 1718 | */ |
1710 | void cpuset_do_move_task(struct task_struct *tsk, struct cgroup_scanner *scan) | 1719 | static void cpuset_do_move_task(struct task_struct *tsk, |
1720 | struct cgroup_scanner *scan) | ||
1711 | { | 1721 | { |
1712 | struct cpuset_hotplug_scanner *chsp; | 1722 | struct cpuset_hotplug_scanner *chsp; |
1713 | 1723 | ||
@@ -1958,33 +1968,25 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk) | |||
1958 | } | 1968 | } |
1959 | 1969 | ||
1960 | /** | 1970 | /** |
1961 | * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed | 1971 | * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed |
1962 | * @zl: the zonelist to be checked | 1972 | * @nodemask: the nodemask to be checked |
1963 | * | 1973 | * |
1964 | * Are any of the nodes on zonelist zl allowed in current->mems_allowed? | 1974 | * Are any of the nodes in the nodemask allowed in current->mems_allowed? |
1965 | */ | 1975 | */ |
1966 | int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) | 1976 | int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) |
1967 | { | 1977 | { |
1968 | int i; | 1978 | return nodes_intersects(*nodemask, current->mems_allowed); |
1969 | |||
1970 | for (i = 0; zl->zones[i]; i++) { | ||
1971 | int nid = zone_to_nid(zl->zones[i]); | ||
1972 | |||
1973 | if (node_isset(nid, current->mems_allowed)) | ||
1974 | return 1; | ||
1975 | } | ||
1976 | return 0; | ||
1977 | } | 1979 | } |
1978 | 1980 | ||
1979 | /* | 1981 | /* |
1980 | * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive | 1982 | * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or |
1981 | * ancestor to the specified cpuset. Call holding callback_mutex. | 1983 | * mem_hardwall ancestor to the specified cpuset. Call holding |
1982 | * If no ancestor is mem_exclusive (an unusual configuration), then | 1984 | * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall |
1983 | * returns the root cpuset. | 1985 | * (an unusual configuration), then returns the root cpuset. |
1984 | */ | 1986 | */ |
1985 | static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | 1987 | static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs) |
1986 | { | 1988 | { |
1987 | while (!is_mem_exclusive(cs) && cs->parent) | 1989 | while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent) |
1988 | cs = cs->parent; | 1990 | cs = cs->parent; |
1989 | return cs; | 1991 | return cs; |
1990 | } | 1992 | } |
@@ -1998,7 +2000,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
1998 | * __GFP_THISNODE is set, yes, we can always allocate. If zone | 2000 | * __GFP_THISNODE is set, yes, we can always allocate. If zone |
1999 | * z's node is in our tasks mems_allowed, yes. If it's not a | 2001 | * z's node is in our tasks mems_allowed, yes. If it's not a |
2000 | * __GFP_HARDWALL request and this zone's nodes is in the nearest | 2002 | * __GFP_HARDWALL request and this zone's nodes is in the nearest |
2001 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. | 2003 | * hardwalled cpuset ancestor to this tasks cpuset, yes. |
2002 | * If the task has been OOM killed and has access to memory reserves | 2004 | * If the task has been OOM killed and has access to memory reserves |
2003 | * as specified by the TIF_MEMDIE flag, yes. | 2005 | * as specified by the TIF_MEMDIE flag, yes. |
2004 | * Otherwise, no. | 2006 | * Otherwise, no. |
@@ -2021,7 +2023,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2021 | * and do not allow allocations outside the current tasks cpuset | 2023 | * and do not allow allocations outside the current tasks cpuset |
2022 | * unless the task has been OOM killed as is marked TIF_MEMDIE. | 2024 | * unless the task has been OOM killed as is marked TIF_MEMDIE. |
2023 | * GFP_KERNEL allocations are not so marked, so can escape to the | 2025 | * GFP_KERNEL allocations are not so marked, so can escape to the |
2024 | * nearest enclosing mem_exclusive ancestor cpuset. | 2026 | * nearest enclosing hardwalled ancestor cpuset. |
2025 | * | 2027 | * |
2026 | * Scanning up parent cpusets requires callback_mutex. The | 2028 | * Scanning up parent cpusets requires callback_mutex. The |
2027 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit | 2029 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit |
@@ -2044,7 +2046,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2044 | * in_interrupt - any node ok (current task context irrelevant) | 2046 | * in_interrupt - any node ok (current task context irrelevant) |
2045 | * GFP_ATOMIC - any node ok | 2047 | * GFP_ATOMIC - any node ok |
2046 | * TIF_MEMDIE - any node ok | 2048 | * TIF_MEMDIE - any node ok |
2047 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok | 2049 | * GFP_KERNEL - any node in enclosing hardwalled cpuset ok |
2048 | * GFP_USER - only nodes in current tasks mems allowed ok. | 2050 | * GFP_USER - only nodes in current tasks mems allowed ok. |
2049 | * | 2051 | * |
2050 | * Rule: | 2052 | * Rule: |
@@ -2081,7 +2083,7 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) | |||
2081 | mutex_lock(&callback_mutex); | 2083 | mutex_lock(&callback_mutex); |
2082 | 2084 | ||
2083 | task_lock(current); | 2085 | task_lock(current); |
2084 | cs = nearest_exclusive_ancestor(task_cs(current)); | 2086 | cs = nearest_hardwall_ancestor(task_cs(current)); |
2085 | task_unlock(current); | 2087 | task_unlock(current); |
2086 | 2088 | ||
2087 | allowed = node_isset(node, cs->mems_allowed); | 2089 | allowed = node_isset(node, cs->mems_allowed); |
diff --git a/kernel/dma.c b/kernel/dma.c index 6a82bb716dac..d2c60a822790 100644 --- a/kernel/dma.c +++ b/kernel/dma.c | |||
@@ -149,12 +149,7 @@ static const struct file_operations proc_dma_operations = { | |||
149 | 149 | ||
150 | static int __init proc_dma_init(void) | 150 | static int __init proc_dma_init(void) |
151 | { | 151 | { |
152 | struct proc_dir_entry *e; | 152 | proc_create("dma", 0, NULL, &proc_dma_operations); |
153 | |||
154 | e = create_proc_entry("dma", 0, NULL); | ||
155 | if (e) | ||
156 | e->proc_fops = &proc_dma_operations; | ||
157 | |||
158 | return 0; | 153 | return 0; |
159 | } | 154 | } |
160 | 155 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 97f609f574b1..ae0f2c4e452b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -557,6 +557,88 @@ void exit_fs(struct task_struct *tsk) | |||
557 | 557 | ||
558 | EXPORT_SYMBOL_GPL(exit_fs); | 558 | EXPORT_SYMBOL_GPL(exit_fs); |
559 | 559 | ||
560 | #ifdef CONFIG_MM_OWNER | ||
561 | /* | ||
562 | * Task p is exiting and it owned mm, lets find a new owner for it | ||
563 | */ | ||
564 | static inline int | ||
565 | mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) | ||
566 | { | ||
567 | /* | ||
568 | * If there are other users of the mm and the owner (us) is exiting | ||
569 | * we need to find a new owner to take on the responsibility. | ||
570 | */ | ||
571 | if (!mm) | ||
572 | return 0; | ||
573 | if (atomic_read(&mm->mm_users) <= 1) | ||
574 | return 0; | ||
575 | if (mm->owner != p) | ||
576 | return 0; | ||
577 | return 1; | ||
578 | } | ||
579 | |||
580 | void mm_update_next_owner(struct mm_struct *mm) | ||
581 | { | ||
582 | struct task_struct *c, *g, *p = current; | ||
583 | |||
584 | retry: | ||
585 | if (!mm_need_new_owner(mm, p)) | ||
586 | return; | ||
587 | |||
588 | read_lock(&tasklist_lock); | ||
589 | /* | ||
590 | * Search in the children | ||
591 | */ | ||
592 | list_for_each_entry(c, &p->children, sibling) { | ||
593 | if (c->mm == mm) | ||
594 | goto assign_new_owner; | ||
595 | } | ||
596 | |||
597 | /* | ||
598 | * Search in the siblings | ||
599 | */ | ||
600 | list_for_each_entry(c, &p->parent->children, sibling) { | ||
601 | if (c->mm == mm) | ||
602 | goto assign_new_owner; | ||
603 | } | ||
604 | |||
605 | /* | ||
606 | * Search through everything else. We should not get | ||
607 | * here often | ||
608 | */ | ||
609 | do_each_thread(g, c) { | ||
610 | if (c->mm == mm) | ||
611 | goto assign_new_owner; | ||
612 | } while_each_thread(g, c); | ||
613 | |||
614 | read_unlock(&tasklist_lock); | ||
615 | return; | ||
616 | |||
617 | assign_new_owner: | ||
618 | BUG_ON(c == p); | ||
619 | get_task_struct(c); | ||
620 | /* | ||
621 | * The task_lock protects c->mm from changing. | ||
622 | * We always want mm->owner->mm == mm | ||
623 | */ | ||
624 | task_lock(c); | ||
625 | /* | ||
626 | * Delay read_unlock() till we have the task_lock() | ||
627 | * to ensure that c does not slip away underneath us | ||
628 | */ | ||
629 | read_unlock(&tasklist_lock); | ||
630 | if (c->mm != mm) { | ||
631 | task_unlock(c); | ||
632 | put_task_struct(c); | ||
633 | goto retry; | ||
634 | } | ||
635 | cgroup_mm_owner_callbacks(mm->owner, c); | ||
636 | mm->owner = c; | ||
637 | task_unlock(c); | ||
638 | put_task_struct(c); | ||
639 | } | ||
640 | #endif /* CONFIG_MM_OWNER */ | ||
641 | |||
560 | /* | 642 | /* |
561 | * Turn us into a lazy TLB process if we | 643 | * Turn us into a lazy TLB process if we |
562 | * aren't already.. | 644 | * aren't already.. |
@@ -596,6 +678,7 @@ static void exit_mm(struct task_struct * tsk) | |||
596 | /* We don't want this task to be frozen prematurely */ | 678 | /* We don't want this task to be frozen prematurely */ |
597 | clear_freeze_flag(tsk); | 679 | clear_freeze_flag(tsk); |
598 | task_unlock(tsk); | 680 | task_unlock(tsk); |
681 | mm_update_next_owner(mm); | ||
599 | mmput(mm); | 682 | mmput(mm); |
600 | } | 683 | } |
601 | 684 | ||
@@ -967,7 +1050,7 @@ NORET_TYPE void do_exit(long code) | |||
967 | proc_exit_connector(tsk); | 1050 | proc_exit_connector(tsk); |
968 | exit_notify(tsk, group_dead); | 1051 | exit_notify(tsk, group_dead); |
969 | #ifdef CONFIG_NUMA | 1052 | #ifdef CONFIG_NUMA |
970 | mpol_free(tsk->mempolicy); | 1053 | mpol_put(tsk->mempolicy); |
971 | tsk->mempolicy = NULL; | 1054 | tsk->mempolicy = NULL; |
972 | #endif | 1055 | #endif |
973 | #ifdef CONFIG_FUTEX | 1056 | #ifdef CONFIG_FUTEX |
diff --git a/kernel/fork.c b/kernel/fork.c index c674aa8d3c31..068ffe007529 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -279,7 +279,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
279 | if (!tmp) | 279 | if (!tmp) |
280 | goto fail_nomem; | 280 | goto fail_nomem; |
281 | *tmp = *mpnt; | 281 | *tmp = *mpnt; |
282 | pol = mpol_copy(vma_policy(mpnt)); | 282 | pol = mpol_dup(vma_policy(mpnt)); |
283 | retval = PTR_ERR(pol); | 283 | retval = PTR_ERR(pol); |
284 | if (IS_ERR(pol)) | 284 | if (IS_ERR(pol)) |
285 | goto fail_nomem_policy; | 285 | goto fail_nomem_policy; |
@@ -381,14 +381,13 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
381 | mm->ioctx_list = NULL; | 381 | mm->ioctx_list = NULL; |
382 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 382 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
383 | mm->cached_hole_size = ~0UL; | 383 | mm->cached_hole_size = ~0UL; |
384 | mm_init_cgroup(mm, p); | 384 | mm_init_owner(mm, p); |
385 | 385 | ||
386 | if (likely(!mm_alloc_pgd(mm))) { | 386 | if (likely(!mm_alloc_pgd(mm))) { |
387 | mm->def_flags = 0; | 387 | mm->def_flags = 0; |
388 | return mm; | 388 | return mm; |
389 | } | 389 | } |
390 | 390 | ||
391 | mm_free_cgroup(mm); | ||
392 | free_mm(mm); | 391 | free_mm(mm); |
393 | return NULL; | 392 | return NULL; |
394 | } | 393 | } |
@@ -432,13 +431,13 @@ void mmput(struct mm_struct *mm) | |||
432 | if (atomic_dec_and_test(&mm->mm_users)) { | 431 | if (atomic_dec_and_test(&mm->mm_users)) { |
433 | exit_aio(mm); | 432 | exit_aio(mm); |
434 | exit_mmap(mm); | 433 | exit_mmap(mm); |
434 | set_mm_exe_file(mm, NULL); | ||
435 | if (!list_empty(&mm->mmlist)) { | 435 | if (!list_empty(&mm->mmlist)) { |
436 | spin_lock(&mmlist_lock); | 436 | spin_lock(&mmlist_lock); |
437 | list_del(&mm->mmlist); | 437 | list_del(&mm->mmlist); |
438 | spin_unlock(&mmlist_lock); | 438 | spin_unlock(&mmlist_lock); |
439 | } | 439 | } |
440 | put_swap_token(mm); | 440 | put_swap_token(mm); |
441 | mm_free_cgroup(mm); | ||
442 | mmdrop(mm); | 441 | mmdrop(mm); |
443 | } | 442 | } |
444 | } | 443 | } |
@@ -545,6 +544,8 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
545 | if (init_new_context(tsk, mm)) | 544 | if (init_new_context(tsk, mm)) |
546 | goto fail_nocontext; | 545 | goto fail_nocontext; |
547 | 546 | ||
547 | dup_mm_exe_file(oldmm, mm); | ||
548 | |||
548 | err = dup_mmap(mm, oldmm); | 549 | err = dup_mmap(mm, oldmm); |
549 | if (err) | 550 | if (err) |
550 | goto free_pt; | 551 | goto free_pt; |
@@ -982,6 +983,13 @@ static void rt_mutex_init_task(struct task_struct *p) | |||
982 | #endif | 983 | #endif |
983 | } | 984 | } |
984 | 985 | ||
986 | #ifdef CONFIG_MM_OWNER | ||
987 | void mm_init_owner(struct mm_struct *mm, struct task_struct *p) | ||
988 | { | ||
989 | mm->owner = p; | ||
990 | } | ||
991 | #endif /* CONFIG_MM_OWNER */ | ||
992 | |||
985 | /* | 993 | /* |
986 | * This creates a new process as a copy of the old one, | 994 | * This creates a new process as a copy of the old one, |
987 | * but does not actually start it yet. | 995 | * but does not actually start it yet. |
@@ -1116,7 +1124,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1116 | p->audit_context = NULL; | 1124 | p->audit_context = NULL; |
1117 | cgroup_fork(p); | 1125 | cgroup_fork(p); |
1118 | #ifdef CONFIG_NUMA | 1126 | #ifdef CONFIG_NUMA |
1119 | p->mempolicy = mpol_copy(p->mempolicy); | 1127 | p->mempolicy = mpol_dup(p->mempolicy); |
1120 | if (IS_ERR(p->mempolicy)) { | 1128 | if (IS_ERR(p->mempolicy)) { |
1121 | retval = PTR_ERR(p->mempolicy); | 1129 | retval = PTR_ERR(p->mempolicy); |
1122 | p->mempolicy = NULL; | 1130 | p->mempolicy = NULL; |
@@ -1374,7 +1382,7 @@ bad_fork_cleanup_security: | |||
1374 | security_task_free(p); | 1382 | security_task_free(p); |
1375 | bad_fork_cleanup_policy: | 1383 | bad_fork_cleanup_policy: |
1376 | #ifdef CONFIG_NUMA | 1384 | #ifdef CONFIG_NUMA |
1377 | mpol_free(p->mempolicy); | 1385 | mpol_put(p->mempolicy); |
1378 | bad_fork_cleanup_cgroup: | 1386 | bad_fork_cleanup_cgroup: |
1379 | #endif | 1387 | #endif |
1380 | cgroup_exit(p, cgroup_callbacks_done); | 1388 | cgroup_exit(p, cgroup_callbacks_done); |
@@ -1664,18 +1672,6 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp | |||
1664 | } | 1672 | } |
1665 | 1673 | ||
1666 | /* | 1674 | /* |
1667 | * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not | ||
1668 | * supported yet | ||
1669 | */ | ||
1670 | static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp) | ||
1671 | { | ||
1672 | if (unshare_flags & CLONE_SYSVSEM) | ||
1673 | return -EINVAL; | ||
1674 | |||
1675 | return 0; | ||
1676 | } | ||
1677 | |||
1678 | /* | ||
1679 | * unshare allows a process to 'unshare' part of the process | 1675 | * unshare allows a process to 'unshare' part of the process |
1680 | * context which was originally shared using clone. copy_* | 1676 | * context which was originally shared using clone. copy_* |
1681 | * functions used by do_fork() cannot be used here directly | 1677 | * functions used by do_fork() cannot be used here directly |
@@ -1690,8 +1686,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1690 | struct sighand_struct *new_sigh = NULL; | 1686 | struct sighand_struct *new_sigh = NULL; |
1691 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; | 1687 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; |
1692 | struct files_struct *fd, *new_fd = NULL; | 1688 | struct files_struct *fd, *new_fd = NULL; |
1693 | struct sem_undo_list *new_ulist = NULL; | ||
1694 | struct nsproxy *new_nsproxy = NULL; | 1689 | struct nsproxy *new_nsproxy = NULL; |
1690 | int do_sysvsem = 0; | ||
1695 | 1691 | ||
1696 | check_unshare_flags(&unshare_flags); | 1692 | check_unshare_flags(&unshare_flags); |
1697 | 1693 | ||
@@ -1703,6 +1699,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1703 | CLONE_NEWNET)) | 1699 | CLONE_NEWNET)) |
1704 | goto bad_unshare_out; | 1700 | goto bad_unshare_out; |
1705 | 1701 | ||
1702 | /* | ||
1703 | * CLONE_NEWIPC must also detach from the undolist: after switching | ||
1704 | * to a new ipc namespace, the semaphore arrays from the old | ||
1705 | * namespace are unreachable. | ||
1706 | */ | ||
1707 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) | ||
1708 | do_sysvsem = 1; | ||
1706 | if ((err = unshare_thread(unshare_flags))) | 1709 | if ((err = unshare_thread(unshare_flags))) |
1707 | goto bad_unshare_out; | 1710 | goto bad_unshare_out; |
1708 | if ((err = unshare_fs(unshare_flags, &new_fs))) | 1711 | if ((err = unshare_fs(unshare_flags, &new_fs))) |
@@ -1713,13 +1716,17 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1713 | goto bad_unshare_cleanup_sigh; | 1716 | goto bad_unshare_cleanup_sigh; |
1714 | if ((err = unshare_fd(unshare_flags, &new_fd))) | 1717 | if ((err = unshare_fd(unshare_flags, &new_fd))) |
1715 | goto bad_unshare_cleanup_vm; | 1718 | goto bad_unshare_cleanup_vm; |
1716 | if ((err = unshare_semundo(unshare_flags, &new_ulist))) | ||
1717 | goto bad_unshare_cleanup_fd; | ||
1718 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, | 1719 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, |
1719 | new_fs))) | 1720 | new_fs))) |
1720 | goto bad_unshare_cleanup_semundo; | 1721 | goto bad_unshare_cleanup_fd; |
1721 | 1722 | ||
1722 | if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) { | 1723 | if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { |
1724 | if (do_sysvsem) { | ||
1725 | /* | ||
1726 | * CLONE_SYSVSEM is equivalent to sys_exit(). | ||
1727 | */ | ||
1728 | exit_sem(current); | ||
1729 | } | ||
1723 | 1730 | ||
1724 | if (new_nsproxy) { | 1731 | if (new_nsproxy) { |
1725 | switch_task_namespaces(current, new_nsproxy); | 1732 | switch_task_namespaces(current, new_nsproxy); |
@@ -1755,7 +1762,6 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1755 | if (new_nsproxy) | 1762 | if (new_nsproxy) |
1756 | put_nsproxy(new_nsproxy); | 1763 | put_nsproxy(new_nsproxy); |
1757 | 1764 | ||
1758 | bad_unshare_cleanup_semundo: | ||
1759 | bad_unshare_cleanup_fd: | 1765 | bad_unshare_cleanup_fd: |
1760 | if (new_fd) | 1766 | if (new_fd) |
1761 | put_files_struct(new_fd); | 1767 | put_files_struct(new_fd); |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f78777abe769..dea4c9124ac8 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -590,7 +590,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
590 | list_add_tail(&timer->cb_entry, | 590 | list_add_tail(&timer->cb_entry, |
591 | &base->cpu_base->cb_pending); | 591 | &base->cpu_base->cb_pending); |
592 | timer->state = HRTIMER_STATE_PENDING; | 592 | timer->state = HRTIMER_STATE_PENDING; |
593 | raise_softirq(HRTIMER_SOFTIRQ); | ||
594 | return 1; | 593 | return 1; |
595 | default: | 594 | default: |
596 | BUG(); | 595 | BUG(); |
@@ -633,6 +632,11 @@ static int hrtimer_switch_to_hres(void) | |||
633 | return 1; | 632 | return 1; |
634 | } | 633 | } |
635 | 634 | ||
635 | static inline void hrtimer_raise_softirq(void) | ||
636 | { | ||
637 | raise_softirq(HRTIMER_SOFTIRQ); | ||
638 | } | ||
639 | |||
636 | #else | 640 | #else |
637 | 641 | ||
638 | static inline int hrtimer_hres_active(void) { return 0; } | 642 | static inline int hrtimer_hres_active(void) { return 0; } |
@@ -651,6 +655,7 @@ static inline int hrtimer_reprogram(struct hrtimer *timer, | |||
651 | { | 655 | { |
652 | return 0; | 656 | return 0; |
653 | } | 657 | } |
658 | static inline void hrtimer_raise_softirq(void) { } | ||
654 | 659 | ||
655 | #endif /* CONFIG_HIGH_RES_TIMERS */ | 660 | #endif /* CONFIG_HIGH_RES_TIMERS */ |
656 | 661 | ||
@@ -850,7 +855,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
850 | { | 855 | { |
851 | struct hrtimer_clock_base *base, *new_base; | 856 | struct hrtimer_clock_base *base, *new_base; |
852 | unsigned long flags; | 857 | unsigned long flags; |
853 | int ret; | 858 | int ret, raise; |
854 | 859 | ||
855 | base = lock_hrtimer_base(timer, &flags); | 860 | base = lock_hrtimer_base(timer, &flags); |
856 | 861 | ||
@@ -884,8 +889,18 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
884 | enqueue_hrtimer(timer, new_base, | 889 | enqueue_hrtimer(timer, new_base, |
885 | new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); | 890 | new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); |
886 | 891 | ||
892 | /* | ||
893 | * The timer may be expired and moved to the cb_pending | ||
894 | * list. We can not raise the softirq with base lock held due | ||
895 | * to a possible deadlock with runqueue lock. | ||
896 | */ | ||
897 | raise = timer->state == HRTIMER_STATE_PENDING; | ||
898 | |||
887 | unlock_hrtimer_base(timer, &flags); | 899 | unlock_hrtimer_base(timer, &flags); |
888 | 900 | ||
901 | if (raise) | ||
902 | hrtimer_raise_softirq(); | ||
903 | |||
889 | return ret; | 904 | return ret; |
890 | } | 905 | } |
891 | EXPORT_SYMBOL_GPL(hrtimer_start); | 906 | EXPORT_SYMBOL_GPL(hrtimer_start); |
@@ -1080,8 +1095,19 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) | |||
1080 | * If the timer was rearmed on another CPU, reprogram | 1095 | * If the timer was rearmed on another CPU, reprogram |
1081 | * the event device. | 1096 | * the event device. |
1082 | */ | 1097 | */ |
1083 | if (timer->base->first == &timer->node) | 1098 | struct hrtimer_clock_base *base = timer->base; |
1084 | hrtimer_reprogram(timer, timer->base); | 1099 | |
1100 | if (base->first == &timer->node && | ||
1101 | hrtimer_reprogram(timer, base)) { | ||
1102 | /* | ||
1103 | * Timer is expired. Thus move it from tree to | ||
1104 | * pending list again. | ||
1105 | */ | ||
1106 | __remove_hrtimer(timer, base, | ||
1107 | HRTIMER_STATE_PENDING, 0); | ||
1108 | list_add_tail(&timer->cb_entry, | ||
1109 | &base->cpu_base->cb_pending); | ||
1110 | } | ||
1085 | } | 1111 | } |
1086 | } | 1112 | } |
1087 | spin_unlock_irq(&cpu_base->lock); | 1113 | spin_unlock_irq(&cpu_base->lock); |
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 6d9204f3a370..38a25b8d8bff 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/module.h> | 1 | #include <linux/module.h> |
2 | #include <linux/interrupt.h> | 2 | #include <linux/interrupt.h> |
3 | #include <linux/device.h> | 3 | #include <linux/device.h> |
4 | #include <linux/gfp.h> | ||
4 | 5 | ||
5 | /* | 6 | /* |
6 | * Device resource management aware IRQ request/free implementation. | 7 | * Device resource management aware IRQ request/free implementation. |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 438a01464287..46e4ad1723f0 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/random.h> | 12 | #include <linux/random.h> |
13 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
14 | #include <linux/slab.h> | ||
14 | 15 | ||
15 | #include "internals.h" | 16 | #include "internals.h" |
16 | 17 | ||
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index f091d13def00..6fc0040f3e3a 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -472,11 +472,7 @@ static const struct file_operations kallsyms_operations = { | |||
472 | 472 | ||
473 | static int __init kallsyms_init(void) | 473 | static int __init kallsyms_init(void) |
474 | { | 474 | { |
475 | struct proc_dir_entry *entry; | 475 | proc_create("kallsyms", 0444, NULL, &kallsyms_operations); |
476 | |||
477 | entry = create_proc_entry("kallsyms", 0444, NULL); | ||
478 | if (entry) | ||
479 | entry->proc_fops = &kallsyms_operations; | ||
480 | return 0; | 476 | return 0; |
481 | } | 477 | } |
482 | __initcall(kallsyms_init); | 478 | __initcall(kallsyms_init); |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 6782dce93d01..cb85c79989b4 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -1405,6 +1405,9 @@ static int __init crash_save_vmcoreinfo_init(void) | |||
1405 | VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); | 1405 | VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); |
1406 | VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); | 1406 | VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); |
1407 | VMCOREINFO_NUMBER(NR_FREE_PAGES); | 1407 | VMCOREINFO_NUMBER(NR_FREE_PAGES); |
1408 | VMCOREINFO_NUMBER(PG_lru); | ||
1409 | VMCOREINFO_NUMBER(PG_private); | ||
1410 | VMCOREINFO_NUMBER(PG_swapcache); | ||
1408 | 1411 | ||
1409 | arch_crash_save_vmcoreinfo(); | 1412 | arch_crash_save_vmcoreinfo(); |
1410 | 1413 | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index fcfb580c3afc..1e0250cb9486 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -72,6 +72,18 @@ DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ | |||
72 | DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ | 72 | DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ |
73 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 73 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
74 | 74 | ||
75 | /* | ||
76 | * Normally, functions that we'd want to prohibit kprobes in, are marked | ||
77 | * __kprobes. But, there are cases where such functions already belong to | ||
78 | * a different section (__sched for preempt_schedule) | ||
79 | * | ||
80 | * For such cases, we now have a blacklist | ||
81 | */ | ||
82 | struct kprobe_blackpoint kprobe_blacklist[] = { | ||
83 | {"preempt_schedule",}, | ||
84 | {NULL} /* Terminator */ | ||
85 | }; | ||
86 | |||
75 | #ifdef __ARCH_WANT_KPROBES_INSN_SLOT | 87 | #ifdef __ARCH_WANT_KPROBES_INSN_SLOT |
76 | /* | 88 | /* |
77 | * kprobe->ainsn.insn points to the copy of the instruction to be | 89 | * kprobe->ainsn.insn points to the copy of the instruction to be |
@@ -417,6 +429,21 @@ static inline void free_rp_inst(struct kretprobe *rp) | |||
417 | } | 429 | } |
418 | } | 430 | } |
419 | 431 | ||
432 | static void __kprobes cleanup_rp_inst(struct kretprobe *rp) | ||
433 | { | ||
434 | unsigned long flags; | ||
435 | struct kretprobe_instance *ri; | ||
436 | struct hlist_node *pos, *next; | ||
437 | /* No race here */ | ||
438 | spin_lock_irqsave(&kretprobe_lock, flags); | ||
439 | hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) { | ||
440 | ri->rp = NULL; | ||
441 | hlist_del(&ri->uflist); | ||
442 | } | ||
443 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
444 | free_rp_inst(rp); | ||
445 | } | ||
446 | |||
420 | /* | 447 | /* |
421 | * Keep all fields in the kprobe consistent | 448 | * Keep all fields in the kprobe consistent |
422 | */ | 449 | */ |
@@ -492,9 +519,22 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, | |||
492 | 519 | ||
493 | static int __kprobes in_kprobes_functions(unsigned long addr) | 520 | static int __kprobes in_kprobes_functions(unsigned long addr) |
494 | { | 521 | { |
522 | struct kprobe_blackpoint *kb; | ||
523 | |||
495 | if (addr >= (unsigned long)__kprobes_text_start && | 524 | if (addr >= (unsigned long)__kprobes_text_start && |
496 | addr < (unsigned long)__kprobes_text_end) | 525 | addr < (unsigned long)__kprobes_text_end) |
497 | return -EINVAL; | 526 | return -EINVAL; |
527 | /* | ||
528 | * If there exists a kprobe_blacklist, verify and | ||
529 | * fail any probe registration in the prohibited area | ||
530 | */ | ||
531 | for (kb = kprobe_blacklist; kb->name != NULL; kb++) { | ||
532 | if (kb->start_addr) { | ||
533 | if (addr >= kb->start_addr && | ||
534 | addr < (kb->start_addr + kb->range)) | ||
535 | return -EINVAL; | ||
536 | } | ||
537 | } | ||
498 | return 0; | 538 | return 0; |
499 | } | 539 | } |
500 | 540 | ||
@@ -555,6 +595,7 @@ static int __kprobes __register_kprobe(struct kprobe *p, | |||
555 | } | 595 | } |
556 | 596 | ||
557 | p->nmissed = 0; | 597 | p->nmissed = 0; |
598 | INIT_LIST_HEAD(&p->list); | ||
558 | mutex_lock(&kprobe_mutex); | 599 | mutex_lock(&kprobe_mutex); |
559 | old_p = get_kprobe(p->addr); | 600 | old_p = get_kprobe(p->addr); |
560 | if (old_p) { | 601 | if (old_p) { |
@@ -581,35 +622,28 @@ out: | |||
581 | return ret; | 622 | return ret; |
582 | } | 623 | } |
583 | 624 | ||
584 | int __kprobes register_kprobe(struct kprobe *p) | 625 | /* |
585 | { | 626 | * Unregister a kprobe without a scheduler synchronization. |
586 | return __register_kprobe(p, (unsigned long)__builtin_return_address(0)); | 627 | */ |
587 | } | 628 | static int __kprobes __unregister_kprobe_top(struct kprobe *p) |
588 | |||
589 | void __kprobes unregister_kprobe(struct kprobe *p) | ||
590 | { | 629 | { |
591 | struct module *mod; | ||
592 | struct kprobe *old_p, *list_p; | 630 | struct kprobe *old_p, *list_p; |
593 | int cleanup_p; | ||
594 | 631 | ||
595 | mutex_lock(&kprobe_mutex); | ||
596 | old_p = get_kprobe(p->addr); | 632 | old_p = get_kprobe(p->addr); |
597 | if (unlikely(!old_p)) { | 633 | if (unlikely(!old_p)) |
598 | mutex_unlock(&kprobe_mutex); | 634 | return -EINVAL; |
599 | return; | 635 | |
600 | } | ||
601 | if (p != old_p) { | 636 | if (p != old_p) { |
602 | list_for_each_entry_rcu(list_p, &old_p->list, list) | 637 | list_for_each_entry_rcu(list_p, &old_p->list, list) |
603 | if (list_p == p) | 638 | if (list_p == p) |
604 | /* kprobe p is a valid probe */ | 639 | /* kprobe p is a valid probe */ |
605 | goto valid_p; | 640 | goto valid_p; |
606 | mutex_unlock(&kprobe_mutex); | 641 | return -EINVAL; |
607 | return; | ||
608 | } | 642 | } |
609 | valid_p: | 643 | valid_p: |
610 | if (old_p == p || | 644 | if (old_p == p || |
611 | (old_p->pre_handler == aggr_pre_handler && | 645 | (old_p->pre_handler == aggr_pre_handler && |
612 | p->list.next == &old_p->list && p->list.prev == &old_p->list)) { | 646 | list_is_singular(&old_p->list))) { |
613 | /* | 647 | /* |
614 | * Only probe on the hash list. Disarm only if kprobes are | 648 | * Only probe on the hash list. Disarm only if kprobes are |
615 | * enabled - otherwise, the breakpoint would already have | 649 | * enabled - otherwise, the breakpoint would already have |
@@ -618,43 +652,97 @@ valid_p: | |||
618 | if (kprobe_enabled) | 652 | if (kprobe_enabled) |
619 | arch_disarm_kprobe(p); | 653 | arch_disarm_kprobe(p); |
620 | hlist_del_rcu(&old_p->hlist); | 654 | hlist_del_rcu(&old_p->hlist); |
621 | cleanup_p = 1; | ||
622 | } else { | 655 | } else { |
656 | if (p->break_handler) | ||
657 | old_p->break_handler = NULL; | ||
658 | if (p->post_handler) { | ||
659 | list_for_each_entry_rcu(list_p, &old_p->list, list) { | ||
660 | if ((list_p != p) && (list_p->post_handler)) | ||
661 | goto noclean; | ||
662 | } | ||
663 | old_p->post_handler = NULL; | ||
664 | } | ||
665 | noclean: | ||
623 | list_del_rcu(&p->list); | 666 | list_del_rcu(&p->list); |
624 | cleanup_p = 0; | ||
625 | } | 667 | } |
668 | return 0; | ||
669 | } | ||
626 | 670 | ||
627 | mutex_unlock(&kprobe_mutex); | 671 | static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) |
672 | { | ||
673 | struct module *mod; | ||
674 | struct kprobe *old_p; | ||
628 | 675 | ||
629 | synchronize_sched(); | ||
630 | if (p->mod_refcounted) { | 676 | if (p->mod_refcounted) { |
631 | mod = module_text_address((unsigned long)p->addr); | 677 | mod = module_text_address((unsigned long)p->addr); |
632 | if (mod) | 678 | if (mod) |
633 | module_put(mod); | 679 | module_put(mod); |
634 | } | 680 | } |
635 | 681 | ||
636 | if (cleanup_p) { | 682 | if (list_empty(&p->list) || list_is_singular(&p->list)) { |
637 | if (p != old_p) { | 683 | if (!list_empty(&p->list)) { |
638 | list_del_rcu(&p->list); | 684 | /* "p" is the last child of an aggr_kprobe */ |
685 | old_p = list_entry(p->list.next, struct kprobe, list); | ||
686 | list_del(&p->list); | ||
639 | kfree(old_p); | 687 | kfree(old_p); |
640 | } | 688 | } |
641 | arch_remove_kprobe(p); | 689 | arch_remove_kprobe(p); |
642 | } else { | 690 | } |
643 | mutex_lock(&kprobe_mutex); | 691 | } |
644 | if (p->break_handler) | 692 | |
645 | old_p->break_handler = NULL; | 693 | static int __register_kprobes(struct kprobe **kps, int num, |
646 | if (p->post_handler){ | 694 | unsigned long called_from) |
647 | list_for_each_entry_rcu(list_p, &old_p->list, list){ | 695 | { |
648 | if (list_p->post_handler){ | 696 | int i, ret = 0; |
649 | cleanup_p = 2; | 697 | |
650 | break; | 698 | if (num <= 0) |
651 | } | 699 | return -EINVAL; |
652 | } | 700 | for (i = 0; i < num; i++) { |
653 | if (cleanup_p == 0) | 701 | ret = __register_kprobe(kps[i], called_from); |
654 | old_p->post_handler = NULL; | 702 | if (ret < 0 && i > 0) { |
703 | unregister_kprobes(kps, i); | ||
704 | break; | ||
655 | } | 705 | } |
656 | mutex_unlock(&kprobe_mutex); | ||
657 | } | 706 | } |
707 | return ret; | ||
708 | } | ||
709 | |||
710 | /* | ||
711 | * Registration and unregistration functions for kprobe. | ||
712 | */ | ||
713 | int __kprobes register_kprobe(struct kprobe *p) | ||
714 | { | ||
715 | return __register_kprobes(&p, 1, | ||
716 | (unsigned long)__builtin_return_address(0)); | ||
717 | } | ||
718 | |||
719 | void __kprobes unregister_kprobe(struct kprobe *p) | ||
720 | { | ||
721 | unregister_kprobes(&p, 1); | ||
722 | } | ||
723 | |||
724 | int __kprobes register_kprobes(struct kprobe **kps, int num) | ||
725 | { | ||
726 | return __register_kprobes(kps, num, | ||
727 | (unsigned long)__builtin_return_address(0)); | ||
728 | } | ||
729 | |||
730 | void __kprobes unregister_kprobes(struct kprobe **kps, int num) | ||
731 | { | ||
732 | int i; | ||
733 | |||
734 | if (num <= 0) | ||
735 | return; | ||
736 | mutex_lock(&kprobe_mutex); | ||
737 | for (i = 0; i < num; i++) | ||
738 | if (__unregister_kprobe_top(kps[i]) < 0) | ||
739 | kps[i]->addr = NULL; | ||
740 | mutex_unlock(&kprobe_mutex); | ||
741 | |||
742 | synchronize_sched(); | ||
743 | for (i = 0; i < num; i++) | ||
744 | if (kps[i]->addr) | ||
745 | __unregister_kprobe_bottom(kps[i]); | ||
658 | } | 746 | } |
659 | 747 | ||
660 | static struct notifier_block kprobe_exceptions_nb = { | 748 | static struct notifier_block kprobe_exceptions_nb = { |
@@ -667,24 +755,69 @@ unsigned long __weak arch_deref_entry_point(void *entry) | |||
667 | return (unsigned long)entry; | 755 | return (unsigned long)entry; |
668 | } | 756 | } |
669 | 757 | ||
670 | int __kprobes register_jprobe(struct jprobe *jp) | 758 | static int __register_jprobes(struct jprobe **jps, int num, |
759 | unsigned long called_from) | ||
671 | { | 760 | { |
672 | unsigned long addr = arch_deref_entry_point(jp->entry); | 761 | struct jprobe *jp; |
762 | int ret = 0, i; | ||
673 | 763 | ||
674 | if (!kernel_text_address(addr)) | 764 | if (num <= 0) |
675 | return -EINVAL; | 765 | return -EINVAL; |
766 | for (i = 0; i < num; i++) { | ||
767 | unsigned long addr; | ||
768 | jp = jps[i]; | ||
769 | addr = arch_deref_entry_point(jp->entry); | ||
770 | |||
771 | if (!kernel_text_address(addr)) | ||
772 | ret = -EINVAL; | ||
773 | else { | ||
774 | /* Todo: Verify probepoint is a function entry point */ | ||
775 | jp->kp.pre_handler = setjmp_pre_handler; | ||
776 | jp->kp.break_handler = longjmp_break_handler; | ||
777 | ret = __register_kprobe(&jp->kp, called_from); | ||
778 | } | ||
779 | if (ret < 0 && i > 0) { | ||
780 | unregister_jprobes(jps, i); | ||
781 | break; | ||
782 | } | ||
783 | } | ||
784 | return ret; | ||
785 | } | ||
676 | 786 | ||
677 | /* Todo: Verify probepoint is a function entry point */ | 787 | int __kprobes register_jprobe(struct jprobe *jp) |
678 | jp->kp.pre_handler = setjmp_pre_handler; | 788 | { |
679 | jp->kp.break_handler = longjmp_break_handler; | 789 | return __register_jprobes(&jp, 1, |
680 | |||
681 | return __register_kprobe(&jp->kp, | ||
682 | (unsigned long)__builtin_return_address(0)); | 790 | (unsigned long)__builtin_return_address(0)); |
683 | } | 791 | } |
684 | 792 | ||
685 | void __kprobes unregister_jprobe(struct jprobe *jp) | 793 | void __kprobes unregister_jprobe(struct jprobe *jp) |
686 | { | 794 | { |
687 | unregister_kprobe(&jp->kp); | 795 | unregister_jprobes(&jp, 1); |
796 | } | ||
797 | |||
798 | int __kprobes register_jprobes(struct jprobe **jps, int num) | ||
799 | { | ||
800 | return __register_jprobes(jps, num, | ||
801 | (unsigned long)__builtin_return_address(0)); | ||
802 | } | ||
803 | |||
804 | void __kprobes unregister_jprobes(struct jprobe **jps, int num) | ||
805 | { | ||
806 | int i; | ||
807 | |||
808 | if (num <= 0) | ||
809 | return; | ||
810 | mutex_lock(&kprobe_mutex); | ||
811 | for (i = 0; i < num; i++) | ||
812 | if (__unregister_kprobe_top(&jps[i]->kp) < 0) | ||
813 | jps[i]->kp.addr = NULL; | ||
814 | mutex_unlock(&kprobe_mutex); | ||
815 | |||
816 | synchronize_sched(); | ||
817 | for (i = 0; i < num; i++) { | ||
818 | if (jps[i]->kp.addr) | ||
819 | __unregister_kprobe_bottom(&jps[i]->kp); | ||
820 | } | ||
688 | } | 821 | } |
689 | 822 | ||
690 | #ifdef CONFIG_KRETPROBES | 823 | #ifdef CONFIG_KRETPROBES |
@@ -725,7 +858,8 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
725 | return 0; | 858 | return 0; |
726 | } | 859 | } |
727 | 860 | ||
728 | int __kprobes register_kretprobe(struct kretprobe *rp) | 861 | static int __kprobes __register_kretprobe(struct kretprobe *rp, |
862 | unsigned long called_from) | ||
729 | { | 863 | { |
730 | int ret = 0; | 864 | int ret = 0; |
731 | struct kretprobe_instance *inst; | 865 | struct kretprobe_instance *inst; |
@@ -771,46 +905,101 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
771 | 905 | ||
772 | rp->nmissed = 0; | 906 | rp->nmissed = 0; |
773 | /* Establish function entry probe point */ | 907 | /* Establish function entry probe point */ |
774 | if ((ret = __register_kprobe(&rp->kp, | 908 | ret = __register_kprobe(&rp->kp, called_from); |
775 | (unsigned long)__builtin_return_address(0))) != 0) | 909 | if (ret != 0) |
776 | free_rp_inst(rp); | 910 | free_rp_inst(rp); |
777 | return ret; | 911 | return ret; |
778 | } | 912 | } |
779 | 913 | ||
914 | static int __register_kretprobes(struct kretprobe **rps, int num, | ||
915 | unsigned long called_from) | ||
916 | { | ||
917 | int ret = 0, i; | ||
918 | |||
919 | if (num <= 0) | ||
920 | return -EINVAL; | ||
921 | for (i = 0; i < num; i++) { | ||
922 | ret = __register_kretprobe(rps[i], called_from); | ||
923 | if (ret < 0 && i > 0) { | ||
924 | unregister_kretprobes(rps, i); | ||
925 | break; | ||
926 | } | ||
927 | } | ||
928 | return ret; | ||
929 | } | ||
930 | |||
931 | int __kprobes register_kretprobe(struct kretprobe *rp) | ||
932 | { | ||
933 | return __register_kretprobes(&rp, 1, | ||
934 | (unsigned long)__builtin_return_address(0)); | ||
935 | } | ||
936 | |||
937 | void __kprobes unregister_kretprobe(struct kretprobe *rp) | ||
938 | { | ||
939 | unregister_kretprobes(&rp, 1); | ||
940 | } | ||
941 | |||
942 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) | ||
943 | { | ||
944 | return __register_kretprobes(rps, num, | ||
945 | (unsigned long)__builtin_return_address(0)); | ||
946 | } | ||
947 | |||
948 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) | ||
949 | { | ||
950 | int i; | ||
951 | |||
952 | if (num <= 0) | ||
953 | return; | ||
954 | mutex_lock(&kprobe_mutex); | ||
955 | for (i = 0; i < num; i++) | ||
956 | if (__unregister_kprobe_top(&rps[i]->kp) < 0) | ||
957 | rps[i]->kp.addr = NULL; | ||
958 | mutex_unlock(&kprobe_mutex); | ||
959 | |||
960 | synchronize_sched(); | ||
961 | for (i = 0; i < num; i++) { | ||
962 | if (rps[i]->kp.addr) { | ||
963 | __unregister_kprobe_bottom(&rps[i]->kp); | ||
964 | cleanup_rp_inst(rps[i]); | ||
965 | } | ||
966 | } | ||
967 | } | ||
968 | |||
780 | #else /* CONFIG_KRETPROBES */ | 969 | #else /* CONFIG_KRETPROBES */ |
781 | int __kprobes register_kretprobe(struct kretprobe *rp) | 970 | int __kprobes register_kretprobe(struct kretprobe *rp) |
782 | { | 971 | { |
783 | return -ENOSYS; | 972 | return -ENOSYS; |
784 | } | 973 | } |
785 | 974 | ||
786 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, | 975 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) |
787 | struct pt_regs *regs) | ||
788 | { | 976 | { |
789 | return 0; | 977 | return -ENOSYS; |
790 | } | 978 | } |
791 | #endif /* CONFIG_KRETPROBES */ | ||
792 | |||
793 | void __kprobes unregister_kretprobe(struct kretprobe *rp) | 979 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
794 | { | 980 | { |
795 | unsigned long flags; | 981 | } |
796 | struct kretprobe_instance *ri; | ||
797 | struct hlist_node *pos, *next; | ||
798 | 982 | ||
799 | unregister_kprobe(&rp->kp); | 983 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) |
984 | { | ||
985 | } | ||
800 | 986 | ||
801 | /* No race here */ | 987 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, |
802 | spin_lock_irqsave(&kretprobe_lock, flags); | 988 | struct pt_regs *regs) |
803 | hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) { | 989 | { |
804 | ri->rp = NULL; | 990 | return 0; |
805 | hlist_del(&ri->uflist); | ||
806 | } | ||
807 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
808 | free_rp_inst(rp); | ||
809 | } | 991 | } |
810 | 992 | ||
993 | #endif /* CONFIG_KRETPROBES */ | ||
994 | |||
811 | static int __init init_kprobes(void) | 995 | static int __init init_kprobes(void) |
812 | { | 996 | { |
813 | int i, err = 0; | 997 | int i, err = 0; |
998 | unsigned long offset = 0, size = 0; | ||
999 | char *modname, namebuf[128]; | ||
1000 | const char *symbol_name; | ||
1001 | void *addr; | ||
1002 | struct kprobe_blackpoint *kb; | ||
814 | 1003 | ||
815 | /* FIXME allocate the probe table, currently defined statically */ | 1004 | /* FIXME allocate the probe table, currently defined statically */ |
816 | /* initialize all list heads */ | 1005 | /* initialize all list heads */ |
@@ -819,6 +1008,28 @@ static int __init init_kprobes(void) | |||
819 | INIT_HLIST_HEAD(&kretprobe_inst_table[i]); | 1008 | INIT_HLIST_HEAD(&kretprobe_inst_table[i]); |
820 | } | 1009 | } |
821 | 1010 | ||
1011 | /* | ||
1012 | * Lookup and populate the kprobe_blacklist. | ||
1013 | * | ||
1014 | * Unlike the kretprobe blacklist, we'll need to determine | ||
1015 | * the range of addresses that belong to the said functions, | ||
1016 | * since a kprobe need not necessarily be at the beginning | ||
1017 | * of a function. | ||
1018 | */ | ||
1019 | for (kb = kprobe_blacklist; kb->name != NULL; kb++) { | ||
1020 | kprobe_lookup_name(kb->name, addr); | ||
1021 | if (!addr) | ||
1022 | continue; | ||
1023 | |||
1024 | kb->start_addr = (unsigned long)addr; | ||
1025 | symbol_name = kallsyms_lookup(kb->start_addr, | ||
1026 | &size, &offset, &modname, namebuf); | ||
1027 | if (!symbol_name) | ||
1028 | kb->range = 0; | ||
1029 | else | ||
1030 | kb->range = size; | ||
1031 | } | ||
1032 | |||
822 | if (kretprobe_blacklist_size) { | 1033 | if (kretprobe_blacklist_size) { |
823 | /* lookup the function address from its name */ | 1034 | /* lookup the function address from its name */ |
824 | for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { | 1035 | for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { |
@@ -1066,8 +1277,12 @@ module_init(init_kprobes); | |||
1066 | 1277 | ||
1067 | EXPORT_SYMBOL_GPL(register_kprobe); | 1278 | EXPORT_SYMBOL_GPL(register_kprobe); |
1068 | EXPORT_SYMBOL_GPL(unregister_kprobe); | 1279 | EXPORT_SYMBOL_GPL(unregister_kprobe); |
1280 | EXPORT_SYMBOL_GPL(register_kprobes); | ||
1281 | EXPORT_SYMBOL_GPL(unregister_kprobes); | ||
1069 | EXPORT_SYMBOL_GPL(register_jprobe); | 1282 | EXPORT_SYMBOL_GPL(register_jprobe); |
1070 | EXPORT_SYMBOL_GPL(unregister_jprobe); | 1283 | EXPORT_SYMBOL_GPL(unregister_jprobe); |
1284 | EXPORT_SYMBOL_GPL(register_jprobes); | ||
1285 | EXPORT_SYMBOL_GPL(unregister_jprobes); | ||
1071 | #ifdef CONFIG_KPROBES | 1286 | #ifdef CONFIG_KPROBES |
1072 | EXPORT_SYMBOL_GPL(jprobe_return); | 1287 | EXPORT_SYMBOL_GPL(jprobe_return); |
1073 | #endif | 1288 | #endif |
@@ -1075,4 +1290,6 @@ EXPORT_SYMBOL_GPL(jprobe_return); | |||
1075 | #ifdef CONFIG_KPROBES | 1290 | #ifdef CONFIG_KPROBES |
1076 | EXPORT_SYMBOL_GPL(register_kretprobe); | 1291 | EXPORT_SYMBOL_GPL(register_kretprobe); |
1077 | EXPORT_SYMBOL_GPL(unregister_kretprobe); | 1292 | EXPORT_SYMBOL_GPL(unregister_kretprobe); |
1293 | EXPORT_SYMBOL_GPL(register_kretprobes); | ||
1294 | EXPORT_SYMBOL_GPL(unregister_kretprobes); | ||
1078 | #endif | 1295 | #endif |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 92cf6930ab51..ac72eea48339 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -144,9 +144,9 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
144 | 144 | ||
145 | spin_lock(&kthread_create_lock); | 145 | spin_lock(&kthread_create_lock); |
146 | list_add_tail(&create.list, &kthread_create_list); | 146 | list_add_tail(&create.list, &kthread_create_list); |
147 | wake_up_process(kthreadd_task); | ||
148 | spin_unlock(&kthread_create_lock); | 147 | spin_unlock(&kthread_create_lock); |
149 | 148 | ||
149 | wake_up_process(kthreadd_task); | ||
150 | wait_for_completion(&create.done); | 150 | wait_for_completion(&create.done); |
151 | 151 | ||
152 | if (!IS_ERR(create.result)) { | 152 | if (!IS_ERR(create.result)) { |
diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 7c74dab0d21b..5e7b45c56923 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c | |||
@@ -233,14 +233,7 @@ static struct file_operations lstats_fops = { | |||
233 | 233 | ||
234 | static int __init init_lstats_procfs(void) | 234 | static int __init init_lstats_procfs(void) |
235 | { | 235 | { |
236 | struct proc_dir_entry *pe; | 236 | proc_create("latency_stats", 0644, NULL, &lstats_fops); |
237 | |||
238 | pe = create_proc_entry("latency_stats", 0644, NULL); | ||
239 | if (!pe) | ||
240 | return -ENOMEM; | ||
241 | |||
242 | pe->proc_fops = &lstats_fops; | ||
243 | |||
244 | return 0; | 237 | return 0; |
245 | } | 238 | } |
246 | __initcall(init_lstats_procfs); | 239 | __initcall(init_lstats_procfs); |
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 8a135bd163c2..dc5d29648d85 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c | |||
@@ -660,20 +660,12 @@ static const struct file_operations proc_lock_stat_operations = { | |||
660 | 660 | ||
661 | static int __init lockdep_proc_init(void) | 661 | static int __init lockdep_proc_init(void) |
662 | { | 662 | { |
663 | struct proc_dir_entry *entry; | 663 | proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations); |
664 | 664 | proc_create("lockdep_stats", S_IRUSR, NULL, | |
665 | entry = create_proc_entry("lockdep", S_IRUSR, NULL); | 665 | &proc_lockdep_stats_operations); |
666 | if (entry) | ||
667 | entry->proc_fops = &proc_lockdep_operations; | ||
668 | |||
669 | entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL); | ||
670 | if (entry) | ||
671 | entry->proc_fops = &proc_lockdep_stats_operations; | ||
672 | 666 | ||
673 | #ifdef CONFIG_LOCK_STAT | 667 | #ifdef CONFIG_LOCK_STAT |
674 | entry = create_proc_entry("lock_stat", S_IRUSR, NULL); | 668 | proc_create("lock_stat", S_IRUSR, NULL, &proc_lock_stat_operations); |
675 | if (entry) | ||
676 | entry->proc_fops = &proc_lock_stat_operations; | ||
677 | #endif | 669 | #endif |
678 | 670 | ||
679 | return 0; | 671 | return 0; |
diff --git a/kernel/marker.c b/kernel/marker.c index 005b95954593..139260e5460c 100644 --- a/kernel/marker.c +++ b/kernel/marker.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
24 | #include <linux/marker.h> | 24 | #include <linux/marker.h> |
25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
26 | #include <linux/slab.h> | ||
26 | 27 | ||
27 | extern struct marker __start___markers[]; | 28 | extern struct marker __start___markers[]; |
28 | extern struct marker __stop___markers[]; | 29 | extern struct marker __stop___markers[]; |
diff --git a/kernel/notifier.c b/kernel/notifier.c index 643360d1bb14..823be11584ef 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c | |||
@@ -31,6 +31,21 @@ static int notifier_chain_register(struct notifier_block **nl, | |||
31 | return 0; | 31 | return 0; |
32 | } | 32 | } |
33 | 33 | ||
34 | static int notifier_chain_cond_register(struct notifier_block **nl, | ||
35 | struct notifier_block *n) | ||
36 | { | ||
37 | while ((*nl) != NULL) { | ||
38 | if ((*nl) == n) | ||
39 | return 0; | ||
40 | if (n->priority > (*nl)->priority) | ||
41 | break; | ||
42 | nl = &((*nl)->next); | ||
43 | } | ||
44 | n->next = *nl; | ||
45 | rcu_assign_pointer(*nl, n); | ||
46 | return 0; | ||
47 | } | ||
48 | |||
34 | static int notifier_chain_unregister(struct notifier_block **nl, | 49 | static int notifier_chain_unregister(struct notifier_block **nl, |
35 | struct notifier_block *n) | 50 | struct notifier_block *n) |
36 | { | 51 | { |
@@ -205,6 +220,29 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, | |||
205 | EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); | 220 | EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); |
206 | 221 | ||
207 | /** | 222 | /** |
223 | * blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain | ||
224 | * @nh: Pointer to head of the blocking notifier chain | ||
225 | * @n: New entry in notifier chain | ||
226 | * | ||
227 | * Adds a notifier to a blocking notifier chain, only if not already | ||
228 | * present in the chain. | ||
229 | * Must be called in process context. | ||
230 | * | ||
231 | * Currently always returns zero. | ||
232 | */ | ||
233 | int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh, | ||
234 | struct notifier_block *n) | ||
235 | { | ||
236 | int ret; | ||
237 | |||
238 | down_write(&nh->rwsem); | ||
239 | ret = notifier_chain_cond_register(&nh->head, n); | ||
240 | up_write(&nh->rwsem); | ||
241 | return ret; | ||
242 | } | ||
243 | EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register); | ||
244 | |||
245 | /** | ||
208 | * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain | 246 | * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain |
209 | * @nh: Pointer to head of the blocking notifier chain | 247 | * @nh: Pointer to head of the blocking notifier chain |
210 | * @n: Entry to remove from notifier chain | 248 | * @n: Entry to remove from notifier chain |
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index aead4d69f62b..48d7ed6fc3a4 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/cgroup.h> | 8 | #include <linux/cgroup.h> |
9 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
10 | #include <linux/slab.h> | ||
11 | #include <linux/nsproxy.h> | ||
10 | 12 | ||
11 | struct ns_cgroup { | 13 | struct ns_cgroup { |
12 | struct cgroup_subsys_state css; | 14 | struct cgroup_subsys_state css; |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index f5d332cf8c63..adc785146a1c 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -139,6 +139,18 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) | |||
139 | goto out; | 139 | goto out; |
140 | } | 140 | } |
141 | 141 | ||
142 | /* | ||
143 | * CLONE_NEWIPC must detach from the undolist: after switching | ||
144 | * to a new ipc namespace, the semaphore arrays from the old | ||
145 | * namespace are unreachable. In clone parlance, CLONE_SYSVSEM | ||
146 | * means share undolist with parent, so we must forbid using | ||
147 | * it along with CLONE_NEWIPC. | ||
148 | */ | ||
149 | if ((flags & CLONE_NEWIPC) && (flags & CLONE_SYSVSEM)) { | ||
150 | err = -EINVAL; | ||
151 | goto out; | ||
152 | } | ||
153 | |||
142 | new_ns = create_new_namespaces(flags, tsk, tsk->fs); | 154 | new_ns = create_new_namespaces(flags, tsk, tsk->fs); |
143 | if (IS_ERR(new_ns)) { | 155 | if (IS_ERR(new_ns)) { |
144 | err = PTR_ERR(new_ns); | 156 | err = PTR_ERR(new_ns); |
diff --git a/kernel/panic.c b/kernel/panic.c index 24af9f8bac99..425567f45b9f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -153,6 +153,8 @@ EXPORT_SYMBOL(panic); | |||
153 | * 'M' - System experienced a machine check exception. | 153 | * 'M' - System experienced a machine check exception. |
154 | * 'B' - System has hit bad_page. | 154 | * 'B' - System has hit bad_page. |
155 | * 'U' - Userspace-defined naughtiness. | 155 | * 'U' - Userspace-defined naughtiness. |
156 | * 'A' - ACPI table overridden. | ||
157 | * 'W' - Taint on warning. | ||
156 | * | 158 | * |
157 | * The string is overwritten by the next call to print_taint(). | 159 | * The string is overwritten by the next call to print_taint(). |
158 | */ | 160 | */ |
@@ -161,7 +163,7 @@ const char *print_tainted(void) | |||
161 | { | 163 | { |
162 | static char buf[20]; | 164 | static char buf[20]; |
163 | if (tainted) { | 165 | if (tainted) { |
164 | snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c", | 166 | snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c%c", |
165 | tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', | 167 | tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', |
166 | tainted & TAINT_FORCED_MODULE ? 'F' : ' ', | 168 | tainted & TAINT_FORCED_MODULE ? 'F' : ' ', |
167 | tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', | 169 | tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', |
@@ -170,7 +172,8 @@ const char *print_tainted(void) | |||
170 | tainted & TAINT_BAD_PAGE ? 'B' : ' ', | 172 | tainted & TAINT_BAD_PAGE ? 'B' : ' ', |
171 | tainted & TAINT_USER ? 'U' : ' ', | 173 | tainted & TAINT_USER ? 'U' : ' ', |
172 | tainted & TAINT_DIE ? 'D' : ' ', | 174 | tainted & TAINT_DIE ? 'D' : ' ', |
173 | tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' '); | 175 | tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' ', |
176 | tainted & TAINT_WARN ? 'W' : ' '); | ||
174 | } | 177 | } |
175 | else | 178 | else |
176 | snprintf(buf, sizeof(buf), "Not tainted"); | 179 | snprintf(buf, sizeof(buf), "Not tainted"); |
@@ -312,6 +315,7 @@ void warn_on_slowpath(const char *file, int line) | |||
312 | print_modules(); | 315 | print_modules(); |
313 | dump_stack(); | 316 | dump_stack(); |
314 | print_oops_end_marker(); | 317 | print_oops_end_marker(); |
318 | add_taint(TAINT_WARN); | ||
315 | } | 319 | } |
316 | EXPORT_SYMBOL(warn_on_slowpath); | 320 | EXPORT_SYMBOL(warn_on_slowpath); |
317 | #endif | 321 | #endif |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 6d792b66d854..5ca37fa50beb 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -92,7 +92,7 @@ static struct pid_namespace *create_pid_namespace(int level) | |||
92 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | 92 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); |
93 | 93 | ||
94 | for (i = 1; i < PIDMAP_ENTRIES; i++) { | 94 | for (i = 1; i < PIDMAP_ENTRIES; i++) { |
95 | ns->pidmap[i].page = 0; | 95 | ns->pidmap[i].page = NULL; |
96 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | 96 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); |
97 | } | 97 | } |
98 | 98 | ||
diff --git a/kernel/power/console.c b/kernel/power/console.c index 89bcf4973ee5..b8628be2a465 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c | |||
@@ -7,17 +7,39 @@ | |||
7 | #include <linux/vt_kern.h> | 7 | #include <linux/vt_kern.h> |
8 | #include <linux/kbd_kern.h> | 8 | #include <linux/kbd_kern.h> |
9 | #include <linux/console.h> | 9 | #include <linux/console.h> |
10 | #include <linux/module.h> | ||
10 | #include "power.h" | 11 | #include "power.h" |
11 | 12 | ||
12 | #if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) | 13 | #if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) |
13 | #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) | 14 | #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) |
14 | 15 | ||
15 | static int orig_fgconsole, orig_kmsg; | 16 | static int orig_fgconsole, orig_kmsg; |
17 | static int disable_vt_switch; | ||
18 | |||
19 | /* | ||
20 | * Normally during a suspend, we allocate a new console and switch to it. | ||
21 | * When we resume, we switch back to the original console. This switch | ||
22 | * can be slow, so on systems where the framebuffer can handle restoration | ||
23 | * of video registers anyways, there's little point in doing the console | ||
24 | * switch. This function allows you to disable it by passing it '0'. | ||
25 | */ | ||
26 | void pm_set_vt_switch(int do_switch) | ||
27 | { | ||
28 | acquire_console_sem(); | ||
29 | disable_vt_switch = !do_switch; | ||
30 | release_console_sem(); | ||
31 | } | ||
32 | EXPORT_SYMBOL(pm_set_vt_switch); | ||
16 | 33 | ||
17 | int pm_prepare_console(void) | 34 | int pm_prepare_console(void) |
18 | { | 35 | { |
19 | acquire_console_sem(); | 36 | acquire_console_sem(); |
20 | 37 | ||
38 | if (disable_vt_switch) { | ||
39 | release_console_sem(); | ||
40 | return 0; | ||
41 | } | ||
42 | |||
21 | orig_fgconsole = fg_console; | 43 | orig_fgconsole = fg_console; |
22 | 44 | ||
23 | if (vc_allocate(SUSPEND_CONSOLE)) { | 45 | if (vc_allocate(SUSPEND_CONSOLE)) { |
@@ -50,9 +72,12 @@ int pm_prepare_console(void) | |||
50 | void pm_restore_console(void) | 72 | void pm_restore_console(void) |
51 | { | 73 | { |
52 | acquire_console_sem(); | 74 | acquire_console_sem(); |
75 | if (disable_vt_switch) { | ||
76 | release_console_sem(); | ||
77 | return; | ||
78 | } | ||
53 | set_console(orig_fgconsole); | 79 | set_console(orig_fgconsole); |
54 | release_console_sem(); | 80 | release_console_sem(); |
55 | kmsg_redirect = orig_kmsg; | 81 | kmsg_redirect = orig_kmsg; |
56 | return; | ||
57 | } | 82 | } |
58 | #endif | 83 | #endif |
diff --git a/kernel/printk.c b/kernel/printk.c index bdd4ea8c3f2b..d3f9c0f788bf 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -1287,31 +1287,7 @@ void tty_write_message(struct tty_struct *tty, char *msg) | |||
1287 | */ | 1287 | */ |
1288 | int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst) | 1288 | int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst) |
1289 | { | 1289 | { |
1290 | static DEFINE_SPINLOCK(ratelimit_lock); | 1290 | return __ratelimit(ratelimit_jiffies, ratelimit_burst); |
1291 | static unsigned toks = 10 * 5 * HZ; | ||
1292 | static unsigned long last_msg; | ||
1293 | static int missed; | ||
1294 | unsigned long flags; | ||
1295 | unsigned long now = jiffies; | ||
1296 | |||
1297 | spin_lock_irqsave(&ratelimit_lock, flags); | ||
1298 | toks += now - last_msg; | ||
1299 | last_msg = now; | ||
1300 | if (toks > (ratelimit_burst * ratelimit_jiffies)) | ||
1301 | toks = ratelimit_burst * ratelimit_jiffies; | ||
1302 | if (toks >= ratelimit_jiffies) { | ||
1303 | int lost = missed; | ||
1304 | |||
1305 | missed = 0; | ||
1306 | toks -= ratelimit_jiffies; | ||
1307 | spin_unlock_irqrestore(&ratelimit_lock, flags); | ||
1308 | if (lost) | ||
1309 | printk(KERN_WARNING "printk: %d messages suppressed.\n", lost); | ||
1310 | return 1; | ||
1311 | } | ||
1312 | missed++; | ||
1313 | spin_unlock_irqrestore(&ratelimit_lock, flags); | ||
1314 | return 0; | ||
1315 | } | 1291 | } |
1316 | EXPORT_SYMBOL(__printk_ratelimit); | 1292 | EXPORT_SYMBOL(__printk_ratelimit); |
1317 | 1293 | ||
diff --git a/kernel/profile.c b/kernel/profile.c index 606d7387265c..ae7ead82cbc9 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -587,10 +587,10 @@ static int __init create_proc_profile(void) | |||
587 | return 0; | 587 | return 0; |
588 | if (create_hash_tables()) | 588 | if (create_hash_tables()) |
589 | return -1; | 589 | return -1; |
590 | entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL); | 590 | entry = proc_create("profile", S_IWUSR | S_IRUGO, |
591 | NULL, &proc_profile_operations); | ||
591 | if (!entry) | 592 | if (!entry) |
592 | return 0; | 593 | return 0; |
593 | entry->proc_fops = &proc_profile_operations; | ||
594 | entry->size = (1+prof_len) * sizeof(atomic_t); | 594 | entry->size = (1+prof_len) * sizeof(atomic_t); |
595 | hotcpu_notifier(profile_cpu_callback, 0); | 595 | hotcpu_notifier(profile_cpu_callback, 0); |
596 | return 0; | 596 | return 0; |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 67e392ed5496..dac4b4e57293 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -612,7 +612,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) | |||
612 | return (copied == sizeof(data)) ? 0 : -EIO; | 612 | return (copied == sizeof(data)) ? 0 : -EIO; |
613 | } | 613 | } |
614 | 614 | ||
615 | #ifdef CONFIG_COMPAT | 615 | #if defined CONFIG_COMPAT && defined __ARCH_WANT_COMPAT_SYS_PTRACE |
616 | #include <linux/compat.h> | 616 | #include <linux/compat.h> |
617 | 617 | ||
618 | int compat_ptrace_request(struct task_struct *child, compat_long_t request, | 618 | int compat_ptrace_request(struct task_struct *child, compat_long_t request, |
@@ -667,7 +667,6 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, | |||
667 | return ret; | 667 | return ret; |
668 | } | 668 | } |
669 | 669 | ||
670 | #ifdef __ARCH_WANT_COMPAT_SYS_PTRACE | ||
671 | asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, | 670 | asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, |
672 | compat_long_t addr, compat_long_t data) | 671 | compat_long_t addr, compat_long_t data) |
673 | { | 672 | { |
@@ -710,6 +709,4 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, | |||
710 | unlock_kernel(); | 709 | unlock_kernel(); |
711 | return ret; | 710 | return ret; |
712 | } | 711 | } |
713 | #endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */ | 712 | #endif /* CONFIG_COMPAT && __ARCH_WANT_COMPAT_SYS_PTRACE */ |
714 | |||
715 | #endif /* CONFIG_COMPAT */ | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 47894f919d4e..33acc424667e 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/byteorder/swabb.h> | 45 | #include <linux/byteorder/swabb.h> |
46 | #include <linux/stat.h> | 46 | #include <linux/stat.h> |
47 | #include <linux/srcu.h> | 47 | #include <linux/srcu.h> |
48 | #include <linux/slab.h> | ||
48 | 49 | ||
49 | MODULE_LICENSE("GPL"); | 50 | MODULE_LICENSE("GPL"); |
50 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " | 51 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " |
diff --git a/kernel/relay.c b/kernel/relay.c index d6204a485818..7de644cdec43 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -65,6 +65,35 @@ static struct vm_operations_struct relay_file_mmap_ops = { | |||
65 | .close = relay_file_mmap_close, | 65 | .close = relay_file_mmap_close, |
66 | }; | 66 | }; |
67 | 67 | ||
68 | /* | ||
69 | * allocate an array of pointers of struct page | ||
70 | */ | ||
71 | static struct page **relay_alloc_page_array(unsigned int n_pages) | ||
72 | { | ||
73 | struct page **array; | ||
74 | size_t pa_size = n_pages * sizeof(struct page *); | ||
75 | |||
76 | if (pa_size > PAGE_SIZE) { | ||
77 | array = vmalloc(pa_size); | ||
78 | if (array) | ||
79 | memset(array, 0, pa_size); | ||
80 | } else { | ||
81 | array = kzalloc(pa_size, GFP_KERNEL); | ||
82 | } | ||
83 | return array; | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * free an array of pointers of struct page | ||
88 | */ | ||
89 | static void relay_free_page_array(struct page **array) | ||
90 | { | ||
91 | if (is_vmalloc_addr(array)) | ||
92 | vfree(array); | ||
93 | else | ||
94 | kfree(array); | ||
95 | } | ||
96 | |||
68 | /** | 97 | /** |
69 | * relay_mmap_buf: - mmap channel buffer to process address space | 98 | * relay_mmap_buf: - mmap channel buffer to process address space |
70 | * @buf: relay channel buffer | 99 | * @buf: relay channel buffer |
@@ -109,7 +138,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) | |||
109 | *size = PAGE_ALIGN(*size); | 138 | *size = PAGE_ALIGN(*size); |
110 | n_pages = *size >> PAGE_SHIFT; | 139 | n_pages = *size >> PAGE_SHIFT; |
111 | 140 | ||
112 | buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL); | 141 | buf->page_array = relay_alloc_page_array(n_pages); |
113 | if (!buf->page_array) | 142 | if (!buf->page_array) |
114 | return NULL; | 143 | return NULL; |
115 | 144 | ||
@@ -130,7 +159,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) | |||
130 | depopulate: | 159 | depopulate: |
131 | for (j = 0; j < i; j++) | 160 | for (j = 0; j < i; j++) |
132 | __free_page(buf->page_array[j]); | 161 | __free_page(buf->page_array[j]); |
133 | kfree(buf->page_array); | 162 | relay_free_page_array(buf->page_array); |
134 | return NULL; | 163 | return NULL; |
135 | } | 164 | } |
136 | 165 | ||
@@ -189,7 +218,7 @@ static void relay_destroy_buf(struct rchan_buf *buf) | |||
189 | vunmap(buf->start); | 218 | vunmap(buf->start); |
190 | for (i = 0; i < buf->page_count; i++) | 219 | for (i = 0; i < buf->page_count; i++) |
191 | __free_page(buf->page_array[i]); | 220 | __free_page(buf->page_array[i]); |
192 | kfree(buf->page_array); | 221 | relay_free_page_array(buf->page_array); |
193 | } | 222 | } |
194 | chan->buf[buf->cpu] = NULL; | 223 | chan->buf[buf->cpu] = NULL; |
195 | kfree(buf->padding); | 224 | kfree(buf->padding); |
@@ -1162,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in, | |||
1162 | ret = 0; | 1191 | ret = 0; |
1163 | spliced = 0; | 1192 | spliced = 0; |
1164 | 1193 | ||
1165 | while (len) { | 1194 | while (len && !spliced) { |
1166 | ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); | 1195 | ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); |
1167 | if (ret < 0) | 1196 | if (ret < 0) |
1168 | break; | 1197 | break; |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index efbfc0fc232f..d3c61b4ebef2 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/types.h> | 10 | #include <linux/types.h> |
11 | #include <linux/parser.h> | 11 | #include <linux/parser.h> |
12 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
13 | #include <linux/slab.h> | ||
13 | #include <linux/res_counter.h> | 14 | #include <linux/res_counter.h> |
14 | #include <linux/uaccess.h> | 15 | #include <linux/uaccess.h> |
15 | 16 | ||
@@ -27,6 +28,8 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val) | |||
27 | } | 28 | } |
28 | 29 | ||
29 | counter->usage += val; | 30 | counter->usage += val; |
31 | if (counter->usage > counter->max_usage) | ||
32 | counter->max_usage = counter->usage; | ||
30 | return 0; | 33 | return 0; |
31 | } | 34 | } |
32 | 35 | ||
@@ -65,6 +68,8 @@ res_counter_member(struct res_counter *counter, int member) | |||
65 | switch (member) { | 68 | switch (member) { |
66 | case RES_USAGE: | 69 | case RES_USAGE: |
67 | return &counter->usage; | 70 | return &counter->usage; |
71 | case RES_MAX_USAGE: | ||
72 | return &counter->max_usage; | ||
68 | case RES_LIMIT: | 73 | case RES_LIMIT: |
69 | return &counter->limit; | 74 | return &counter->limit; |
70 | case RES_FAILCNT: | 75 | case RES_FAILCNT: |
@@ -92,6 +97,11 @@ ssize_t res_counter_read(struct res_counter *counter, int member, | |||
92 | pos, buf, s - buf); | 97 | pos, buf, s - buf); |
93 | } | 98 | } |
94 | 99 | ||
100 | u64 res_counter_read_u64(struct res_counter *counter, int member) | ||
101 | { | ||
102 | return *res_counter_member(counter, member); | ||
103 | } | ||
104 | |||
95 | ssize_t res_counter_write(struct res_counter *counter, int member, | 105 | ssize_t res_counter_write(struct res_counter *counter, int member, |
96 | const char __user *userbuf, size_t nbytes, loff_t *pos, | 106 | const char __user *userbuf, size_t nbytes, loff_t *pos, |
97 | int (*write_strategy)(char *st_buf, unsigned long long *val)) | 107 | int (*write_strategy)(char *st_buf, unsigned long long *val)) |
diff --git a/kernel/resource.c b/kernel/resource.c index cee12cc47cab..74af2d7cb5a1 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -131,14 +131,8 @@ static const struct file_operations proc_iomem_operations = { | |||
131 | 131 | ||
132 | static int __init ioresources_init(void) | 132 | static int __init ioresources_init(void) |
133 | { | 133 | { |
134 | struct proc_dir_entry *entry; | 134 | proc_create("ioports", 0, NULL, &proc_ioports_operations); |
135 | 135 | proc_create("iomem", 0, NULL, &proc_iomem_operations); | |
136 | entry = create_proc_entry("ioports", 0, NULL); | ||
137 | if (entry) | ||
138 | entry->proc_fops = &proc_ioports_operations; | ||
139 | entry = create_proc_entry("iomem", 0, NULL); | ||
140 | if (entry) | ||
141 | entry->proc_fops = &proc_iomem_operations; | ||
142 | return 0; | 136 | return 0; |
143 | } | 137 | } |
144 | __initcall(ioresources_init); | 138 | __initcall(ioresources_init); |
diff --git a/kernel/sched.c b/kernel/sched.c index 740fb409e5bb..e2f7f5acc807 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -9057,13 +9057,13 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
9057 | } | 9057 | } |
9058 | 9058 | ||
9059 | #ifdef CONFIG_FAIR_GROUP_SCHED | 9059 | #ifdef CONFIG_FAIR_GROUP_SCHED |
9060 | static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, | 9060 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, |
9061 | u64 shareval) | 9061 | u64 shareval) |
9062 | { | 9062 | { |
9063 | return sched_group_set_shares(cgroup_tg(cgrp), shareval); | 9063 | return sched_group_set_shares(cgroup_tg(cgrp), shareval); |
9064 | } | 9064 | } |
9065 | 9065 | ||
9066 | static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft) | 9066 | static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) |
9067 | { | 9067 | { |
9068 | struct task_group *tg = cgroup_tg(cgrp); | 9068 | struct task_group *tg = cgroup_tg(cgrp); |
9069 | 9069 | ||
@@ -9073,48 +9073,14 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft) | |||
9073 | 9073 | ||
9074 | #ifdef CONFIG_RT_GROUP_SCHED | 9074 | #ifdef CONFIG_RT_GROUP_SCHED |
9075 | static ssize_t cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, | 9075 | static ssize_t cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, |
9076 | struct file *file, | 9076 | s64 val) |
9077 | const char __user *userbuf, | ||
9078 | size_t nbytes, loff_t *unused_ppos) | ||
9079 | { | 9077 | { |
9080 | char buffer[64]; | 9078 | return sched_group_set_rt_runtime(cgroup_tg(cgrp), val); |
9081 | int retval = 0; | ||
9082 | s64 val; | ||
9083 | char *end; | ||
9084 | |||
9085 | if (!nbytes) | ||
9086 | return -EINVAL; | ||
9087 | if (nbytes >= sizeof(buffer)) | ||
9088 | return -E2BIG; | ||
9089 | if (copy_from_user(buffer, userbuf, nbytes)) | ||
9090 | return -EFAULT; | ||
9091 | |||
9092 | buffer[nbytes] = 0; /* nul-terminate */ | ||
9093 | |||
9094 | /* strip newline if necessary */ | ||
9095 | if (nbytes && (buffer[nbytes-1] == '\n')) | ||
9096 | buffer[nbytes-1] = 0; | ||
9097 | val = simple_strtoll(buffer, &end, 0); | ||
9098 | if (*end) | ||
9099 | return -EINVAL; | ||
9100 | |||
9101 | /* Pass to subsystem */ | ||
9102 | retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val); | ||
9103 | if (!retval) | ||
9104 | retval = nbytes; | ||
9105 | return retval; | ||
9106 | } | 9079 | } |
9107 | 9080 | ||
9108 | static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft, | 9081 | static s64 cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft) |
9109 | struct file *file, | ||
9110 | char __user *buf, size_t nbytes, | ||
9111 | loff_t *ppos) | ||
9112 | { | 9082 | { |
9113 | char tmp[64]; | 9083 | return sched_group_rt_runtime(cgroup_tg(cgrp)); |
9114 | long val = sched_group_rt_runtime(cgroup_tg(cgrp)); | ||
9115 | int len = sprintf(tmp, "%ld\n", val); | ||
9116 | |||
9117 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | ||
9118 | } | 9084 | } |
9119 | 9085 | ||
9120 | static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype, | 9086 | static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype, |
@@ -9133,20 +9099,20 @@ static struct cftype cpu_files[] = { | |||
9133 | #ifdef CONFIG_FAIR_GROUP_SCHED | 9099 | #ifdef CONFIG_FAIR_GROUP_SCHED |
9134 | { | 9100 | { |
9135 | .name = "shares", | 9101 | .name = "shares", |
9136 | .read_uint = cpu_shares_read_uint, | 9102 | .read_u64 = cpu_shares_read_u64, |
9137 | .write_uint = cpu_shares_write_uint, | 9103 | .write_u64 = cpu_shares_write_u64, |
9138 | }, | 9104 | }, |
9139 | #endif | 9105 | #endif |
9140 | #ifdef CONFIG_RT_GROUP_SCHED | 9106 | #ifdef CONFIG_RT_GROUP_SCHED |
9141 | { | 9107 | { |
9142 | .name = "rt_runtime_us", | 9108 | .name = "rt_runtime_us", |
9143 | .read = cpu_rt_runtime_read, | 9109 | .read_s64 = cpu_rt_runtime_read, |
9144 | .write = cpu_rt_runtime_write, | 9110 | .write_s64 = cpu_rt_runtime_write, |
9145 | }, | 9111 | }, |
9146 | { | 9112 | { |
9147 | .name = "rt_period_us", | 9113 | .name = "rt_period_us", |
9148 | .read_uint = cpu_rt_period_read_uint, | 9114 | .read_u64 = cpu_rt_period_read_uint, |
9149 | .write_uint = cpu_rt_period_write_uint, | 9115 | .write_u64 = cpu_rt_period_write_uint, |
9150 | }, | 9116 | }, |
9151 | #endif | 9117 | #endif |
9152 | }; | 9118 | }; |
@@ -9277,8 +9243,8 @@ out: | |||
9277 | static struct cftype files[] = { | 9243 | static struct cftype files[] = { |
9278 | { | 9244 | { |
9279 | .name = "usage", | 9245 | .name = "usage", |
9280 | .read_uint = cpuusage_read, | 9246 | .read_u64 = cpuusage_read, |
9281 | .write_uint = cpuusage_write, | 9247 | .write_u64 = cpuusage_write, |
9282 | }, | 9248 | }, |
9283 | }; | 9249 | }; |
9284 | 9250 | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index f3f4af4b8b0f..8a9498e7c831 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -277,12 +277,9 @@ static int __init init_sched_debug_procfs(void) | |||
277 | { | 277 | { |
278 | struct proc_dir_entry *pe; | 278 | struct proc_dir_entry *pe; |
279 | 279 | ||
280 | pe = create_proc_entry("sched_debug", 0644, NULL); | 280 | pe = proc_create("sched_debug", 0644, NULL, &sched_debug_fops); |
281 | if (!pe) | 281 | if (!pe) |
282 | return -ENOMEM; | 282 | return -ENOMEM; |
283 | |||
284 | pe->proc_fops = &sched_debug_fops; | ||
285 | |||
286 | return 0; | 283 | return 0; |
287 | } | 284 | } |
288 | 285 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 6a0cc71ee88d..e423d0d9e6ff 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1545,6 +1545,19 @@ out: | |||
1545 | * | 1545 | * |
1546 | */ | 1546 | */ |
1547 | 1547 | ||
1548 | static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r, | ||
1549 | cputime_t *utimep, cputime_t *stimep) | ||
1550 | { | ||
1551 | *utimep = cputime_add(*utimep, t->utime); | ||
1552 | *stimep = cputime_add(*stimep, t->stime); | ||
1553 | r->ru_nvcsw += t->nvcsw; | ||
1554 | r->ru_nivcsw += t->nivcsw; | ||
1555 | r->ru_minflt += t->min_flt; | ||
1556 | r->ru_majflt += t->maj_flt; | ||
1557 | r->ru_inblock += task_io_get_inblock(t); | ||
1558 | r->ru_oublock += task_io_get_oublock(t); | ||
1559 | } | ||
1560 | |||
1548 | static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | 1561 | static void k_getrusage(struct task_struct *p, int who, struct rusage *r) |
1549 | { | 1562 | { |
1550 | struct task_struct *t; | 1563 | struct task_struct *t; |
@@ -1554,6 +1567,11 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1554 | memset((char *) r, 0, sizeof *r); | 1567 | memset((char *) r, 0, sizeof *r); |
1555 | utime = stime = cputime_zero; | 1568 | utime = stime = cputime_zero; |
1556 | 1569 | ||
1570 | if (who == RUSAGE_THREAD) { | ||
1571 | accumulate_thread_rusage(p, r, &utime, &stime); | ||
1572 | goto out; | ||
1573 | } | ||
1574 | |||
1557 | rcu_read_lock(); | 1575 | rcu_read_lock(); |
1558 | if (!lock_task_sighand(p, &flags)) { | 1576 | if (!lock_task_sighand(p, &flags)) { |
1559 | rcu_read_unlock(); | 1577 | rcu_read_unlock(); |
@@ -1586,14 +1604,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1586 | r->ru_oublock += p->signal->oublock; | 1604 | r->ru_oublock += p->signal->oublock; |
1587 | t = p; | 1605 | t = p; |
1588 | do { | 1606 | do { |
1589 | utime = cputime_add(utime, t->utime); | 1607 | accumulate_thread_rusage(t, r, &utime, &stime); |
1590 | stime = cputime_add(stime, t->stime); | ||
1591 | r->ru_nvcsw += t->nvcsw; | ||
1592 | r->ru_nivcsw += t->nivcsw; | ||
1593 | r->ru_minflt += t->min_flt; | ||
1594 | r->ru_majflt += t->maj_flt; | ||
1595 | r->ru_inblock += task_io_get_inblock(t); | ||
1596 | r->ru_oublock += task_io_get_oublock(t); | ||
1597 | t = next_thread(t); | 1608 | t = next_thread(t); |
1598 | } while (t != p); | 1609 | } while (t != p); |
1599 | break; | 1610 | break; |
@@ -1605,6 +1616,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1605 | unlock_task_sighand(p, &flags); | 1616 | unlock_task_sighand(p, &flags); |
1606 | rcu_read_unlock(); | 1617 | rcu_read_unlock(); |
1607 | 1618 | ||
1619 | out: | ||
1608 | cputime_to_timeval(utime, &r->ru_utime); | 1620 | cputime_to_timeval(utime, &r->ru_utime); |
1609 | cputime_to_timeval(stime, &r->ru_stime); | 1621 | cputime_to_timeval(stime, &r->ru_stime); |
1610 | } | 1622 | } |
@@ -1618,7 +1630,8 @@ int getrusage(struct task_struct *p, int who, struct rusage __user *ru) | |||
1618 | 1630 | ||
1619 | asmlinkage long sys_getrusage(int who, struct rusage __user *ru) | 1631 | asmlinkage long sys_getrusage(int who, struct rusage __user *ru) |
1620 | { | 1632 | { |
1621 | if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) | 1633 | if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && |
1634 | who != RUSAGE_THREAD) | ||
1622 | return -EINVAL; | 1635 | return -EINVAL; |
1623 | return getrusage(current, who, ru); | 1636 | return getrusage(current, who, ru); |
1624 | } | 1637 | } |
@@ -1632,10 +1645,9 @@ asmlinkage long sys_umask(int mask) | |||
1632 | asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | 1645 | asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, |
1633 | unsigned long arg4, unsigned long arg5) | 1646 | unsigned long arg4, unsigned long arg5) |
1634 | { | 1647 | { |
1635 | long error; | 1648 | long uninitialized_var(error); |
1636 | 1649 | ||
1637 | error = security_task_prctl(option, arg2, arg3, arg4, arg5); | 1650 | if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) |
1638 | if (error) | ||
1639 | return error; | 1651 | return error; |
1640 | 1652 | ||
1641 | switch (option) { | 1653 | switch (option) { |
@@ -1688,17 +1700,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
1688 | error = -EINVAL; | 1700 | error = -EINVAL; |
1689 | break; | 1701 | break; |
1690 | 1702 | ||
1691 | case PR_GET_KEEPCAPS: | ||
1692 | if (current->keep_capabilities) | ||
1693 | error = 1; | ||
1694 | break; | ||
1695 | case PR_SET_KEEPCAPS: | ||
1696 | if (arg2 != 0 && arg2 != 1) { | ||
1697 | error = -EINVAL; | ||
1698 | break; | ||
1699 | } | ||
1700 | current->keep_capabilities = arg2; | ||
1701 | break; | ||
1702 | case PR_SET_NAME: { | 1703 | case PR_SET_NAME: { |
1703 | struct task_struct *me = current; | 1704 | struct task_struct *me = current; |
1704 | unsigned char ncomm[sizeof(me->comm)]; | 1705 | unsigned char ncomm[sizeof(me->comm)]; |
@@ -1732,17 +1733,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
1732 | case PR_SET_SECCOMP: | 1733 | case PR_SET_SECCOMP: |
1733 | error = prctl_set_seccomp(arg2); | 1734 | error = prctl_set_seccomp(arg2); |
1734 | break; | 1735 | break; |
1735 | |||
1736 | case PR_CAPBSET_READ: | ||
1737 | if (!cap_valid(arg2)) | ||
1738 | return -EINVAL; | ||
1739 | return !!cap_raised(current->cap_bset, arg2); | ||
1740 | case PR_CAPBSET_DROP: | ||
1741 | #ifdef CONFIG_SECURITY_FILE_CAPABILITIES | ||
1742 | return cap_prctl_drop(arg2); | ||
1743 | #else | ||
1744 | return -EINVAL; | ||
1745 | #endif | ||
1746 | case PR_GET_TSC: | 1736 | case PR_GET_TSC: |
1747 | error = GET_TSC_CTL(arg2); | 1737 | error = GET_TSC_CTL(arg2); |
1748 | break; | 1738 | break; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fd3364827ccf..d7ffdc59816a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/writeback.h> | 38 | #include <linux/writeback.h> |
39 | #include <linux/hugetlb.h> | 39 | #include <linux/hugetlb.h> |
40 | #include <linux/initrd.h> | 40 | #include <linux/initrd.h> |
41 | #include <linux/key.h> | ||
41 | #include <linux/times.h> | 42 | #include <linux/times.h> |
42 | #include <linux/limits.h> | 43 | #include <linux/limits.h> |
43 | #include <linux/dcache.h> | 44 | #include <linux/dcache.h> |
@@ -144,12 +145,6 @@ extern int no_unaligned_warning; | |||
144 | extern int max_lock_depth; | 145 | extern int max_lock_depth; |
145 | #endif | 146 | #endif |
146 | 147 | ||
147 | #ifdef CONFIG_SYSCTL_SYSCALL | ||
148 | static int parse_table(int __user *, int, void __user *, size_t __user *, | ||
149 | void __user *, size_t, struct ctl_table *); | ||
150 | #endif | ||
151 | |||
152 | |||
153 | #ifdef CONFIG_PROC_SYSCTL | 148 | #ifdef CONFIG_PROC_SYSCTL |
154 | static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, | 149 | static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, |
155 | void __user *buffer, size_t *lenp, loff_t *ppos); | 150 | void __user *buffer, size_t *lenp, loff_t *ppos); |
@@ -809,6 +804,14 @@ static struct ctl_table kern_table[] = { | |||
809 | .proc_handler = &proc_dostring, | 804 | .proc_handler = &proc_dostring, |
810 | .strategy = &sysctl_string, | 805 | .strategy = &sysctl_string, |
811 | }, | 806 | }, |
807 | #ifdef CONFIG_KEYS | ||
808 | { | ||
809 | .ctl_name = CTL_UNNUMBERED, | ||
810 | .procname = "keys", | ||
811 | .mode = 0555, | ||
812 | .child = key_sysctls, | ||
813 | }, | ||
814 | #endif | ||
812 | /* | 815 | /* |
813 | * NOTE: do not add new entries to this table unless you have read | 816 | * NOTE: do not add new entries to this table unless you have read |
814 | * Documentation/sysctl/ctl_unnumbered.txt | 817 | * Documentation/sysctl/ctl_unnumbered.txt |
@@ -1430,6 +1433,76 @@ void register_sysctl_root(struct ctl_table_root *root) | |||
1430 | } | 1433 | } |
1431 | 1434 | ||
1432 | #ifdef CONFIG_SYSCTL_SYSCALL | 1435 | #ifdef CONFIG_SYSCTL_SYSCALL |
1436 | /* Perform the actual read/write of a sysctl table entry. */ | ||
1437 | static int do_sysctl_strategy(struct ctl_table_root *root, | ||
1438 | struct ctl_table *table, | ||
1439 | int __user *name, int nlen, | ||
1440 | void __user *oldval, size_t __user *oldlenp, | ||
1441 | void __user *newval, size_t newlen) | ||
1442 | { | ||
1443 | int op = 0, rc; | ||
1444 | |||
1445 | if (oldval) | ||
1446 | op |= 004; | ||
1447 | if (newval) | ||
1448 | op |= 002; | ||
1449 | if (sysctl_perm(root, table, op)) | ||
1450 | return -EPERM; | ||
1451 | |||
1452 | if (table->strategy) { | ||
1453 | rc = table->strategy(table, name, nlen, oldval, oldlenp, | ||
1454 | newval, newlen); | ||
1455 | if (rc < 0) | ||
1456 | return rc; | ||
1457 | if (rc > 0) | ||
1458 | return 0; | ||
1459 | } | ||
1460 | |||
1461 | /* If there is no strategy routine, or if the strategy returns | ||
1462 | * zero, proceed with automatic r/w */ | ||
1463 | if (table->data && table->maxlen) { | ||
1464 | rc = sysctl_data(table, name, nlen, oldval, oldlenp, | ||
1465 | newval, newlen); | ||
1466 | if (rc < 0) | ||
1467 | return rc; | ||
1468 | } | ||
1469 | return 0; | ||
1470 | } | ||
1471 | |||
1472 | static int parse_table(int __user *name, int nlen, | ||
1473 | void __user *oldval, size_t __user *oldlenp, | ||
1474 | void __user *newval, size_t newlen, | ||
1475 | struct ctl_table_root *root, | ||
1476 | struct ctl_table *table) | ||
1477 | { | ||
1478 | int n; | ||
1479 | repeat: | ||
1480 | if (!nlen) | ||
1481 | return -ENOTDIR; | ||
1482 | if (get_user(n, name)) | ||
1483 | return -EFAULT; | ||
1484 | for ( ; table->ctl_name || table->procname; table++) { | ||
1485 | if (!table->ctl_name) | ||
1486 | continue; | ||
1487 | if (n == table->ctl_name) { | ||
1488 | int error; | ||
1489 | if (table->child) { | ||
1490 | if (sysctl_perm(root, table, 001)) | ||
1491 | return -EPERM; | ||
1492 | name++; | ||
1493 | nlen--; | ||
1494 | table = table->child; | ||
1495 | goto repeat; | ||
1496 | } | ||
1497 | error = do_sysctl_strategy(root, table, name, nlen, | ||
1498 | oldval, oldlenp, | ||
1499 | newval, newlen); | ||
1500 | return error; | ||
1501 | } | ||
1502 | } | ||
1503 | return -ENOTDIR; | ||
1504 | } | ||
1505 | |||
1433 | int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, | 1506 | int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, |
1434 | void __user *newval, size_t newlen) | 1507 | void __user *newval, size_t newlen) |
1435 | { | 1508 | { |
@@ -1447,7 +1520,8 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol | |||
1447 | for (head = sysctl_head_next(NULL); head; | 1520 | for (head = sysctl_head_next(NULL); head; |
1448 | head = sysctl_head_next(head)) { | 1521 | head = sysctl_head_next(head)) { |
1449 | error = parse_table(name, nlen, oldval, oldlenp, | 1522 | error = parse_table(name, nlen, oldval, oldlenp, |
1450 | newval, newlen, head->ctl_table); | 1523 | newval, newlen, |
1524 | head->root, head->ctl_table); | ||
1451 | if (error != -ENOTDIR) { | 1525 | if (error != -ENOTDIR) { |
1452 | sysctl_head_finish(head); | 1526 | sysctl_head_finish(head); |
1453 | break; | 1527 | break; |
@@ -1493,84 +1567,22 @@ static int test_perm(int mode, int op) | |||
1493 | return -EACCES; | 1567 | return -EACCES; |
1494 | } | 1568 | } |
1495 | 1569 | ||
1496 | int sysctl_perm(struct ctl_table *table, int op) | 1570 | int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) |
1497 | { | 1571 | { |
1498 | int error; | 1572 | int error; |
1573 | int mode; | ||
1574 | |||
1499 | error = security_sysctl(table, op); | 1575 | error = security_sysctl(table, op); |
1500 | if (error) | 1576 | if (error) |
1501 | return error; | 1577 | return error; |
1502 | return test_perm(table->mode, op); | ||
1503 | } | ||
1504 | |||
1505 | #ifdef CONFIG_SYSCTL_SYSCALL | ||
1506 | static int parse_table(int __user *name, int nlen, | ||
1507 | void __user *oldval, size_t __user *oldlenp, | ||
1508 | void __user *newval, size_t newlen, | ||
1509 | struct ctl_table *table) | ||
1510 | { | ||
1511 | int n; | ||
1512 | repeat: | ||
1513 | if (!nlen) | ||
1514 | return -ENOTDIR; | ||
1515 | if (get_user(n, name)) | ||
1516 | return -EFAULT; | ||
1517 | for ( ; table->ctl_name || table->procname; table++) { | ||
1518 | if (!table->ctl_name) | ||
1519 | continue; | ||
1520 | if (n == table->ctl_name) { | ||
1521 | int error; | ||
1522 | if (table->child) { | ||
1523 | if (sysctl_perm(table, 001)) | ||
1524 | return -EPERM; | ||
1525 | name++; | ||
1526 | nlen--; | ||
1527 | table = table->child; | ||
1528 | goto repeat; | ||
1529 | } | ||
1530 | error = do_sysctl_strategy(table, name, nlen, | ||
1531 | oldval, oldlenp, | ||
1532 | newval, newlen); | ||
1533 | return error; | ||
1534 | } | ||
1535 | } | ||
1536 | return -ENOTDIR; | ||
1537 | } | ||
1538 | 1578 | ||
1539 | /* Perform the actual read/write of a sysctl table entry. */ | 1579 | if (root->permissions) |
1540 | int do_sysctl_strategy (struct ctl_table *table, | 1580 | mode = root->permissions(root, current->nsproxy, table); |
1541 | int __user *name, int nlen, | 1581 | else |
1542 | void __user *oldval, size_t __user *oldlenp, | 1582 | mode = table->mode; |
1543 | void __user *newval, size_t newlen) | ||
1544 | { | ||
1545 | int op = 0, rc; | ||
1546 | |||
1547 | if (oldval) | ||
1548 | op |= 004; | ||
1549 | if (newval) | ||
1550 | op |= 002; | ||
1551 | if (sysctl_perm(table, op)) | ||
1552 | return -EPERM; | ||
1553 | 1583 | ||
1554 | if (table->strategy) { | 1584 | return test_perm(mode, op); |
1555 | rc = table->strategy(table, name, nlen, oldval, oldlenp, | ||
1556 | newval, newlen); | ||
1557 | if (rc < 0) | ||
1558 | return rc; | ||
1559 | if (rc > 0) | ||
1560 | return 0; | ||
1561 | } | ||
1562 | |||
1563 | /* If there is no strategy routine, or if the strategy returns | ||
1564 | * zero, proceed with automatic r/w */ | ||
1565 | if (table->data && table->maxlen) { | ||
1566 | rc = sysctl_data(table, name, nlen, oldval, oldlenp, | ||
1567 | newval, newlen); | ||
1568 | if (rc < 0) | ||
1569 | return rc; | ||
1570 | } | ||
1571 | return 0; | ||
1572 | } | 1585 | } |
1573 | #endif /* CONFIG_SYSCTL_SYSCALL */ | ||
1574 | 1586 | ||
1575 | static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) | 1587 | static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) |
1576 | { | 1588 | { |
@@ -1583,9 +1595,13 @@ static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) | |||
1583 | 1595 | ||
1584 | static __init int sysctl_init(void) | 1596 | static __init int sysctl_init(void) |
1585 | { | 1597 | { |
1586 | int err; | ||
1587 | sysctl_set_parent(NULL, root_table); | 1598 | sysctl_set_parent(NULL, root_table); |
1588 | err = sysctl_check_table(current->nsproxy, root_table); | 1599 | #ifdef CONFIG_SYSCTL_SYSCALL_CHECK |
1600 | { | ||
1601 | int err; | ||
1602 | err = sysctl_check_table(current->nsproxy, root_table); | ||
1603 | } | ||
1604 | #endif | ||
1589 | return 0; | 1605 | return 0; |
1590 | } | 1606 | } |
1591 | 1607 | ||
@@ -1712,10 +1728,12 @@ struct ctl_table_header *__register_sysctl_paths( | |||
1712 | header->unregistering = NULL; | 1728 | header->unregistering = NULL; |
1713 | header->root = root; | 1729 | header->root = root; |
1714 | sysctl_set_parent(NULL, header->ctl_table); | 1730 | sysctl_set_parent(NULL, header->ctl_table); |
1731 | #ifdef CONFIG_SYSCTL_SYSCALL_CHECK | ||
1715 | if (sysctl_check_table(namespaces, header->ctl_table)) { | 1732 | if (sysctl_check_table(namespaces, header->ctl_table)) { |
1716 | kfree(header); | 1733 | kfree(header); |
1717 | return NULL; | 1734 | return NULL; |
1718 | } | 1735 | } |
1736 | #endif | ||
1719 | spin_lock(&sysctl_lock); | 1737 | spin_lock(&sysctl_lock); |
1720 | header_list = lookup_header_list(root, namespaces); | 1738 | header_list = lookup_header_list(root, namespaces); |
1721 | list_add_tail(&header->ctl_entry, header_list); | 1739 | list_add_tail(&header->ctl_entry, header_list); |
diff --git a/kernel/time.c b/kernel/time.c index 35d373a98782..86729042e4cd 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/syscalls.h> | 35 | #include <linux/syscalls.h> |
36 | #include <linux/security.h> | 36 | #include <linux/security.h> |
37 | #include <linux/fs.h> | 37 | #include <linux/fs.h> |
38 | #include <linux/slab.h> | ||
38 | 39 | ||
39 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
40 | #include <asm/unistd.h> | 41 | #include <asm/unistd.h> |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 67fe8fc21fb1..a40e20fd0001 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -278,12 +278,9 @@ static int __init init_timer_list_procfs(void) | |||
278 | { | 278 | { |
279 | struct proc_dir_entry *pe; | 279 | struct proc_dir_entry *pe; |
280 | 280 | ||
281 | pe = create_proc_entry("timer_list", 0644, NULL); | 281 | pe = proc_create("timer_list", 0644, NULL, &timer_list_fops); |
282 | if (!pe) | 282 | if (!pe) |
283 | return -ENOMEM; | 283 | return -ENOMEM; |
284 | |||
285 | pe->proc_fops = &timer_list_fops; | ||
286 | |||
287 | return 0; | 284 | return 0; |
288 | } | 285 | } |
289 | __initcall(init_timer_list_procfs); | 286 | __initcall(init_timer_list_procfs); |
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 417da8c5bc72..c994530d166d 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c | |||
@@ -415,12 +415,9 @@ static int __init init_tstats_procfs(void) | |||
415 | { | 415 | { |
416 | struct proc_dir_entry *pe; | 416 | struct proc_dir_entry *pe; |
417 | 417 | ||
418 | pe = create_proc_entry("timer_stats", 0644, NULL); | 418 | pe = proc_create("timer_stats", 0644, NULL, &tstats_fops); |
419 | if (!pe) | 419 | if (!pe) |
420 | return -ENOMEM; | 420 | return -ENOMEM; |
421 | |||
422 | pe->proc_fops = &tstats_fops; | ||
423 | |||
424 | return 0; | 421 | return 0; |
425 | } | 422 | } |
426 | __initcall(init_tstats_procfs); | 423 | __initcall(init_tstats_procfs); |
diff --git a/kernel/user.c b/kernel/user.c index debce602bfdd..aefbbfa3159f 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -53,10 +53,6 @@ struct user_struct root_user = { | |||
53 | .files = ATOMIC_INIT(0), | 53 | .files = ATOMIC_INIT(0), |
54 | .sigpending = ATOMIC_INIT(0), | 54 | .sigpending = ATOMIC_INIT(0), |
55 | .locked_shm = 0, | 55 | .locked_shm = 0, |
56 | #ifdef CONFIG_KEYS | ||
57 | .uid_keyring = &root_user_keyring, | ||
58 | .session_keyring = &root_session_keyring, | ||
59 | #endif | ||
60 | #ifdef CONFIG_USER_SCHED | 56 | #ifdef CONFIG_USER_SCHED |
61 | .tg = &init_task_group, | 57 | .tg = &init_task_group, |
62 | #endif | 58 | #endif |
@@ -420,12 +416,12 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
420 | new->mq_bytes = 0; | 416 | new->mq_bytes = 0; |
421 | #endif | 417 | #endif |
422 | new->locked_shm = 0; | 418 | new->locked_shm = 0; |
423 | 419 | #ifdef CONFIG_KEYS | |
424 | if (alloc_uid_keyring(new, current) < 0) | 420 | new->uid_keyring = new->session_keyring = NULL; |
425 | goto out_free_user; | 421 | #endif |
426 | 422 | ||
427 | if (sched_create_user(new) < 0) | 423 | if (sched_create_user(new) < 0) |
428 | goto out_put_keys; | 424 | goto out_free_user; |
429 | 425 | ||
430 | if (uids_user_create(new)) | 426 | if (uids_user_create(new)) |
431 | goto out_destoy_sched; | 427 | goto out_destoy_sched; |
@@ -459,9 +455,6 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
459 | 455 | ||
460 | out_destoy_sched: | 456 | out_destoy_sched: |
461 | sched_destroy_user(new); | 457 | sched_destroy_user(new); |
462 | out_put_keys: | ||
463 | key_put(new->uid_keyring); | ||
464 | key_put(new->session_keyring); | ||
465 | out_free_user: | 458 | out_free_user: |
466 | kmem_cache_free(uid_cachep, new); | 459 | kmem_cache_free(uid_cachep, new); |
467 | out_unlock: | 460 | out_unlock: |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 4c9006275df7..a9ab0596de44 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/version.h> | 9 | #include <linux/version.h> |
10 | #include <linux/nsproxy.h> | 10 | #include <linux/nsproxy.h> |
11 | #include <linux/slab.h> | ||
11 | #include <linux/user_namespace.h> | 12 | #include <linux/user_namespace.h> |
12 | 13 | ||
13 | /* | 14 | /* |
@@ -73,3 +74,4 @@ void free_user_ns(struct kref *kref) | |||
73 | release_uids(ns); | 74 | release_uids(ns); |
74 | kfree(ns); | 75 | kfree(ns); |
75 | } | 76 | } |
77 | EXPORT_SYMBOL(free_user_ns); | ||
diff --git a/kernel/utsname.c b/kernel/utsname.c index 816d7b24fa03..64d398f12444 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/utsname.h> | 14 | #include <linux/utsname.h> |
15 | #include <linux/version.h> | 15 | #include <linux/version.h> |
16 | #include <linux/err.h> | 16 | #include <linux/err.h> |
17 | #include <linux/slab.h> | ||
17 | 18 | ||
18 | /* | 19 | /* |
19 | * Clone a new ns copying an original utsname, setting refcount to 1 | 20 | * Clone a new ns copying an original utsname, setting refcount to 1 |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 00ff4d08e370..7db251a959c5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -158,8 +158,8 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, | |||
158 | * | 158 | * |
159 | * Returns 0 if @work was already on a queue, non-zero otherwise. | 159 | * Returns 0 if @work was already on a queue, non-zero otherwise. |
160 | * | 160 | * |
161 | * We queue the work to the CPU it was submitted, but there is no | 161 | * We queue the work to the CPU on which it was submitted, but if the CPU dies |
162 | * guarantee that it will be processed by that CPU. | 162 | * it can be processed by another CPU. |
163 | */ | 163 | */ |
164 | int queue_work(struct workqueue_struct *wq, struct work_struct *work) | 164 | int queue_work(struct workqueue_struct *wq, struct work_struct *work) |
165 | { | 165 | { |
@@ -772,7 +772,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name, | |||
772 | } | 772 | } |
773 | EXPORT_SYMBOL_GPL(__create_workqueue_key); | 773 | EXPORT_SYMBOL_GPL(__create_workqueue_key); |
774 | 774 | ||
775 | static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | 775 | static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) |
776 | { | 776 | { |
777 | /* | 777 | /* |
778 | * Our caller is either destroy_workqueue() or CPU_DEAD, | 778 | * Our caller is either destroy_workqueue() or CPU_DEAD, |
@@ -808,19 +808,16 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | |||
808 | void destroy_workqueue(struct workqueue_struct *wq) | 808 | void destroy_workqueue(struct workqueue_struct *wq) |
809 | { | 809 | { |
810 | const cpumask_t *cpu_map = wq_cpu_map(wq); | 810 | const cpumask_t *cpu_map = wq_cpu_map(wq); |
811 | struct cpu_workqueue_struct *cwq; | ||
812 | int cpu; | 811 | int cpu; |
813 | 812 | ||
814 | get_online_cpus(); | 813 | get_online_cpus(); |
815 | spin_lock(&workqueue_lock); | 814 | spin_lock(&workqueue_lock); |
816 | list_del(&wq->list); | 815 | list_del(&wq->list); |
817 | spin_unlock(&workqueue_lock); | 816 | spin_unlock(&workqueue_lock); |
818 | put_online_cpus(); | ||
819 | 817 | ||
820 | for_each_cpu_mask(cpu, *cpu_map) { | 818 | for_each_cpu_mask(cpu, *cpu_map) |
821 | cwq = per_cpu_ptr(wq->cpu_wq, cpu); | 819 | cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu)); |
822 | cleanup_workqueue_thread(cwq, cpu); | 820 | put_online_cpus(); |
823 | } | ||
824 | 821 | ||
825 | free_percpu(wq->cpu_wq); | 822 | free_percpu(wq->cpu_wq); |
826 | kfree(wq); | 823 | kfree(wq); |
@@ -838,7 +835,6 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
838 | action &= ~CPU_TASKS_FROZEN; | 835 | action &= ~CPU_TASKS_FROZEN; |
839 | 836 | ||
840 | switch (action) { | 837 | switch (action) { |
841 | |||
842 | case CPU_UP_PREPARE: | 838 | case CPU_UP_PREPARE: |
843 | cpu_set(cpu, cpu_populated_map); | 839 | cpu_set(cpu, cpu_populated_map); |
844 | } | 840 | } |
@@ -861,11 +857,17 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
861 | case CPU_UP_CANCELED: | 857 | case CPU_UP_CANCELED: |
862 | start_workqueue_thread(cwq, -1); | 858 | start_workqueue_thread(cwq, -1); |
863 | case CPU_DEAD: | 859 | case CPU_DEAD: |
864 | cleanup_workqueue_thread(cwq, cpu); | 860 | cleanup_workqueue_thread(cwq); |
865 | break; | 861 | break; |
866 | } | 862 | } |
867 | } | 863 | } |
868 | 864 | ||
865 | switch (action) { | ||
866 | case CPU_UP_CANCELED: | ||
867 | case CPU_DEAD: | ||
868 | cpu_clear(cpu, cpu_populated_map); | ||
869 | } | ||
870 | |||
869 | return NOTIFY_OK; | 871 | return NOTIFY_OK; |
870 | } | 872 | } |
871 | 873 | ||