diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-21 20:51:34 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-21 20:51:34 -0500 |
| commit | f075e0f6993f41c72dbb1d3e7a2d7740f14e89e2 (patch) | |
| tree | a25b464a67fffc6f43940e0e85e2735a48bb1ad7 | |
| parent | 5cb7398caf69e3943df78435a19a8a77fe8b9463 (diff) | |
| parent | dd4b0a4676907481256d16d5de0851b315a6f22c (diff) | |
Merge branch 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo:
"The bulk of changes are cleanups and preparations for the upcoming
kernfs conversion.
- cgroup_event mechanism which is and will be used only by memcg is
moved to memcg.
- pidlist handling is updated so that it can be served by seq_file.
Also, the list is not sorted if sane_behavior. cgroup
documentation explicitly states that the file is not sorted but it
has been for quite some time.
- All cgroup file handling now happens on top of seq_file. This is
to prepare for kernfs conversion. In addition, all operations are
restructured so that they map 1-1 to kernfs operations.
- Other cleanups and low-pri fixes"
* 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (40 commits)
cgroup: trivial style updates
cgroup: remove stray references to css_id
doc: cgroups: Fix typo in doc/cgroups
cgroup: fix fail path in cgroup_load_subsys()
cgroup: fix missing unlock on error in cgroup_load_subsys()
cgroup: remove for_each_root_subsys()
cgroup: implement for_each_css()
cgroup: factor out cgroup_subsys_state creation into create_css()
cgroup: combine css handling loops in cgroup_create()
cgroup: reorder operations in cgroup_create()
cgroup: make for_each_subsys() useable under cgroup_root_mutex
cgroup: css iterations and css_from_dir() are safe under cgroup_mutex
cgroup: unify pidlist and other file handling
cgroup: replace cftype->read_seq_string() with cftype->seq_show()
cgroup: attach cgroup_open_file to all cgroup files
cgroup: generalize cgroup_pidlist_open_file
cgroup: unify read path so that seq_file is always used
cgroup: unify cgroup_write_X64() and cgroup_write_string()
cgroup: remove cftype->read(), ->read_map() and ->write()
hugetlb_cgroup: convert away from cftype->read()
...
| -rw-r--r-- | Documentation/cgroups/cgroups.txt | 20 | ||||
| -rw-r--r-- | Documentation/cgroups/memory.txt | 4 | ||||
| -rw-r--r-- | Documentation/cgroups/resource_counter.txt | 4 | ||||
| -rw-r--r-- | block/blk-throttle.c | 35 | ||||
| -rw-r--r-- | block/cfq-iosched.c | 131 | ||||
| -rw-r--r-- | drivers/md/bcache/request.c | 1 | ||||
| -rw-r--r-- | include/linux/cgroup.h | 112 | ||||
| -rw-r--r-- | include/linux/vmpressure.h | 8 | ||||
| -rw-r--r-- | init/Kconfig | 3 | ||||
| -rw-r--r-- | kernel/cgroup.c | 1202 | ||||
| -rw-r--r-- | kernel/cgroup_freezer.c | 7 | ||||
| -rw-r--r-- | kernel/cpuset.c | 71 | ||||
| -rw-r--r-- | kernel/sched/core.c | 13 | ||||
| -rw-r--r-- | kernel/sched/cpuacct.c | 18 | ||||
| -rw-r--r-- | mm/hugetlb_cgroup.c | 22 | ||||
| -rw-r--r-- | mm/memcontrol.c | 426 | ||||
| -rw-r--r-- | mm/page_cgroup.c | 2 | ||||
| -rw-r--r-- | mm/vmpressure.c | 26 | ||||
| -rw-r--r-- | net/core/netprio_cgroup.c | 8 | ||||
| -rw-r--r-- | security/device_cgroup.c | 7 |
20 files changed, 1022 insertions, 1098 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 638bf17ff869..821de56d1580 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
| @@ -24,7 +24,6 @@ CONTENTS: | |||
| 24 | 2.1 Basic Usage | 24 | 2.1 Basic Usage |
| 25 | 2.2 Attaching processes | 25 | 2.2 Attaching processes |
| 26 | 2.3 Mounting hierarchies by name | 26 | 2.3 Mounting hierarchies by name |
| 27 | 2.4 Notification API | ||
| 28 | 3. Kernel API | 27 | 3. Kernel API |
| 29 | 3.1 Overview | 28 | 3.1 Overview |
| 30 | 3.2 Synchronization | 29 | 3.2 Synchronization |
| @@ -472,25 +471,6 @@ you give a subsystem a name. | |||
| 472 | The name of the subsystem appears as part of the hierarchy description | 471 | The name of the subsystem appears as part of the hierarchy description |
| 473 | in /proc/mounts and /proc/<pid>/cgroups. | 472 | in /proc/mounts and /proc/<pid>/cgroups. |
| 474 | 473 | ||
| 475 | 2.4 Notification API | ||
| 476 | -------------------- | ||
| 477 | |||
| 478 | There is mechanism which allows to get notifications about changing | ||
| 479 | status of a cgroup. | ||
| 480 | |||
| 481 | To register a new notification handler you need to: | ||
| 482 | - create a file descriptor for event notification using eventfd(2); | ||
| 483 | - open a control file to be monitored (e.g. memory.usage_in_bytes); | ||
| 484 | - write "<event_fd> <control_fd> <args>" to cgroup.event_control. | ||
| 485 | Interpretation of args is defined by control file implementation; | ||
| 486 | |||
| 487 | eventfd will be woken up by control file implementation or when the | ||
| 488 | cgroup is removed. | ||
| 489 | |||
| 490 | To unregister a notification handler just close eventfd. | ||
| 491 | |||
| 492 | NOTE: Support of notifications should be implemented for the control | ||
| 493 | file. See documentation for the subsystem. | ||
| 494 | 474 | ||
| 495 | 3. Kernel API | 475 | 3. Kernel API |
| 496 | ============= | 476 | ============= |
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index e2bc132608fd..2622115276aa 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt | |||
| @@ -577,7 +577,7 @@ Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable" | |||
| 577 | per-node page counts including "hierarchical_<counter>" which sums up all | 577 | per-node page counts including "hierarchical_<counter>" which sums up all |
| 578 | hierarchical children's values in addition to the memcg's own value. | 578 | hierarchical children's values in addition to the memcg's own value. |
| 579 | 579 | ||
| 580 | The ouput format of memory.numa_stat is: | 580 | The output format of memory.numa_stat is: |
| 581 | 581 | ||
| 582 | total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ... | 582 | total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ... |
| 583 | file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ... | 583 | file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ... |
| @@ -670,7 +670,7 @@ page tables. | |||
| 670 | 670 | ||
| 671 | 8.1 Interface | 671 | 8.1 Interface |
| 672 | 672 | ||
| 673 | This feature is disabled by default. It can be enabledi (and disabled again) by | 673 | This feature is disabled by default. It can be enabled (and disabled again) by |
| 674 | writing to memory.move_charge_at_immigrate of the destination cgroup. | 674 | writing to memory.move_charge_at_immigrate of the destination cgroup. |
| 675 | 675 | ||
| 676 | If you want to enable it: | 676 | If you want to enable it: |
diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt index c4d99ed0b418..52e1da16a309 100644 --- a/Documentation/cgroups/resource_counter.txt +++ b/Documentation/cgroups/resource_counter.txt | |||
| @@ -97,8 +97,8 @@ to work with it. | |||
| 97 | (struct res_counter *rc, struct res_counter *top, | 97 | (struct res_counter *rc, struct res_counter *top, |
| 98 | unsinged long val) | 98 | unsinged long val) |
| 99 | 99 | ||
| 100 | Almost same as res_cunter_uncharge() but propagation of uncharge | 100 | Almost same as res_counter_uncharge() but propagation of uncharge |
| 101 | stops when rc == top. This is useful when kill a res_coutner in | 101 | stops when rc == top. This is useful when kill a res_counter in |
| 102 | child cgroup. | 102 | child cgroup. |
| 103 | 103 | ||
| 104 | 2.1 Other accounting routines | 104 | 2.1 Other accounting routines |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 06534049afba..a760857e6b62 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
| @@ -1303,13 +1303,10 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, | |||
| 1303 | return __blkg_prfill_rwstat(sf, pd, &rwstat); | 1303 | return __blkg_prfill_rwstat(sf, pd, &rwstat); |
| 1304 | } | 1304 | } |
| 1305 | 1305 | ||
| 1306 | static int tg_print_cpu_rwstat(struct cgroup_subsys_state *css, | 1306 | static int tg_print_cpu_rwstat(struct seq_file *sf, void *v) |
| 1307 | struct cftype *cft, struct seq_file *sf) | ||
| 1308 | { | 1307 | { |
| 1309 | struct blkcg *blkcg = css_to_blkcg(css); | 1308 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_cpu_rwstat, |
| 1310 | 1309 | &blkcg_policy_throtl, seq_cft(sf)->private, true); | |
| 1311 | blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl, | ||
| 1312 | cft->private, true); | ||
| 1313 | return 0; | 1310 | return 0; |
| 1314 | } | 1311 | } |
| 1315 | 1312 | ||
| @@ -1335,19 +1332,17 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd, | |||
| 1335 | return __blkg_prfill_u64(sf, pd, v); | 1332 | return __blkg_prfill_u64(sf, pd, v); |
| 1336 | } | 1333 | } |
| 1337 | 1334 | ||
| 1338 | static int tg_print_conf_u64(struct cgroup_subsys_state *css, | 1335 | static int tg_print_conf_u64(struct seq_file *sf, void *v) |
| 1339 | struct cftype *cft, struct seq_file *sf) | ||
| 1340 | { | 1336 | { |
| 1341 | blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_u64, | 1337 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64, |
| 1342 | &blkcg_policy_throtl, cft->private, false); | 1338 | &blkcg_policy_throtl, seq_cft(sf)->private, false); |
| 1343 | return 0; | 1339 | return 0; |
| 1344 | } | 1340 | } |
| 1345 | 1341 | ||
| 1346 | static int tg_print_conf_uint(struct cgroup_subsys_state *css, | 1342 | static int tg_print_conf_uint(struct seq_file *sf, void *v) |
| 1347 | struct cftype *cft, struct seq_file *sf) | ||
| 1348 | { | 1343 | { |
| 1349 | blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_uint, | 1344 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint, |
| 1350 | &blkcg_policy_throtl, cft->private, false); | 1345 | &blkcg_policy_throtl, seq_cft(sf)->private, false); |
| 1351 | return 0; | 1346 | return 0; |
| 1352 | } | 1347 | } |
| 1353 | 1348 | ||
| @@ -1428,40 +1423,40 @@ static struct cftype throtl_files[] = { | |||
| 1428 | { | 1423 | { |
| 1429 | .name = "throttle.read_bps_device", | 1424 | .name = "throttle.read_bps_device", |
| 1430 | .private = offsetof(struct throtl_grp, bps[READ]), | 1425 | .private = offsetof(struct throtl_grp, bps[READ]), |
| 1431 | .read_seq_string = tg_print_conf_u64, | 1426 | .seq_show = tg_print_conf_u64, |
| 1432 | .write_string = tg_set_conf_u64, | 1427 | .write_string = tg_set_conf_u64, |
| 1433 | .max_write_len = 256, | 1428 | .max_write_len = 256, |
| 1434 | }, | 1429 | }, |
| 1435 | { | 1430 | { |
| 1436 | .name = "throttle.write_bps_device", | 1431 | .name = "throttle.write_bps_device", |
| 1437 | .private = offsetof(struct throtl_grp, bps[WRITE]), | 1432 | .private = offsetof(struct throtl_grp, bps[WRITE]), |
| 1438 | .read_seq_string = tg_print_conf_u64, | 1433 | .seq_show = tg_print_conf_u64, |
| 1439 | .write_string = tg_set_conf_u64, | 1434 | .write_string = tg_set_conf_u64, |
| 1440 | .max_write_len = 256, | 1435 | .max_write_len = 256, |
| 1441 | }, | 1436 | }, |
| 1442 | { | 1437 | { |
| 1443 | .name = "throttle.read_iops_device", | 1438 | .name = "throttle.read_iops_device", |
| 1444 | .private = offsetof(struct throtl_grp, iops[READ]), | 1439 | .private = offsetof(struct throtl_grp, iops[READ]), |
| 1445 | .read_seq_string = tg_print_conf_uint, | 1440 | .seq_show = tg_print_conf_uint, |
| 1446 | .write_string = tg_set_conf_uint, | 1441 | .write_string = tg_set_conf_uint, |
| 1447 | .max_write_len = 256, | 1442 | .max_write_len = 256, |
| 1448 | }, | 1443 | }, |
| 1449 | { | 1444 | { |
| 1450 | .name = "throttle.write_iops_device", | 1445 | .name = "throttle.write_iops_device", |
| 1451 | .private = offsetof(struct throtl_grp, iops[WRITE]), | 1446 | .private = offsetof(struct throtl_grp, iops[WRITE]), |
| 1452 | .read_seq_string = tg_print_conf_uint, | 1447 | .seq_show = tg_print_conf_uint, |
| 1453 | .write_string = tg_set_conf_uint, | 1448 | .write_string = tg_set_conf_uint, |
| 1454 | .max_write_len = 256, | 1449 | .max_write_len = 256, |
| 1455 | }, | 1450 | }, |
| 1456 | { | 1451 | { |
| 1457 | .name = "throttle.io_service_bytes", | 1452 | .name = "throttle.io_service_bytes", |
| 1458 | .private = offsetof(struct tg_stats_cpu, service_bytes), | 1453 | .private = offsetof(struct tg_stats_cpu, service_bytes), |
| 1459 | .read_seq_string = tg_print_cpu_rwstat, | 1454 | .seq_show = tg_print_cpu_rwstat, |
| 1460 | }, | 1455 | }, |
| 1461 | { | 1456 | { |
| 1462 | .name = "throttle.io_serviced", | 1457 | .name = "throttle.io_serviced", |
| 1463 | .private = offsetof(struct tg_stats_cpu, serviced), | 1458 | .private = offsetof(struct tg_stats_cpu, serviced), |
| 1464 | .read_seq_string = tg_print_cpu_rwstat, | 1459 | .seq_show = tg_print_cpu_rwstat, |
| 1465 | }, | 1460 | }, |
| 1466 | { } /* terminate */ | 1461 | { } /* terminate */ |
| 1467 | }; | 1462 | }; |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4d5cec1ad80d..744833b630c6 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
| @@ -1632,11 +1632,11 @@ static u64 cfqg_prfill_weight_device(struct seq_file *sf, | |||
| 1632 | return __blkg_prfill_u64(sf, pd, cfqg->dev_weight); | 1632 | return __blkg_prfill_u64(sf, pd, cfqg->dev_weight); |
| 1633 | } | 1633 | } |
| 1634 | 1634 | ||
| 1635 | static int cfqg_print_weight_device(struct cgroup_subsys_state *css, | 1635 | static int cfqg_print_weight_device(struct seq_file *sf, void *v) |
| 1636 | struct cftype *cft, struct seq_file *sf) | ||
| 1637 | { | 1636 | { |
| 1638 | blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_weight_device, | 1637 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
| 1639 | &blkcg_policy_cfq, 0, false); | 1638 | cfqg_prfill_weight_device, &blkcg_policy_cfq, |
| 1639 | 0, false); | ||
| 1640 | return 0; | 1640 | return 0; |
| 1641 | } | 1641 | } |
| 1642 | 1642 | ||
| @@ -1650,26 +1650,23 @@ static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf, | |||
| 1650 | return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); | 1650 | return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); |
| 1651 | } | 1651 | } |
| 1652 | 1652 | ||
| 1653 | static int cfqg_print_leaf_weight_device(struct cgroup_subsys_state *css, | 1653 | static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v) |
| 1654 | struct cftype *cft, | ||
| 1655 | struct seq_file *sf) | ||
| 1656 | { | 1654 | { |
| 1657 | blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_leaf_weight_device, | 1655 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
| 1658 | &blkcg_policy_cfq, 0, false); | 1656 | cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq, |
| 1657 | 0, false); | ||
| 1659 | return 0; | 1658 | return 0; |
| 1660 | } | 1659 | } |
| 1661 | 1660 | ||
| 1662 | static int cfq_print_weight(struct cgroup_subsys_state *css, struct cftype *cft, | 1661 | static int cfq_print_weight(struct seq_file *sf, void *v) |
| 1663 | struct seq_file *sf) | ||
| 1664 | { | 1662 | { |
| 1665 | seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_weight); | 1663 | seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight); |
| 1666 | return 0; | 1664 | return 0; |
| 1667 | } | 1665 | } |
| 1668 | 1666 | ||
| 1669 | static int cfq_print_leaf_weight(struct cgroup_subsys_state *css, | 1667 | static int cfq_print_leaf_weight(struct seq_file *sf, void *v) |
| 1670 | struct cftype *cft, struct seq_file *sf) | ||
| 1671 | { | 1668 | { |
| 1672 | seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_leaf_weight); | 1669 | seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight); |
| 1673 | return 0; | 1670 | return 0; |
| 1674 | } | 1671 | } |
| 1675 | 1672 | ||
| @@ -1762,23 +1759,17 @@ static int cfq_set_leaf_weight(struct cgroup_subsys_state *css, | |||
| 1762 | return __cfq_set_weight(css, cft, val, true); | 1759 | return __cfq_set_weight(css, cft, val, true); |
| 1763 | } | 1760 | } |
| 1764 | 1761 | ||
| 1765 | static int cfqg_print_stat(struct cgroup_subsys_state *css, struct cftype *cft, | 1762 | static int cfqg_print_stat(struct seq_file *sf, void *v) |
| 1766 | struct seq_file *sf) | ||
| 1767 | { | 1763 | { |
| 1768 | struct blkcg *blkcg = css_to_blkcg(css); | 1764 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, |
| 1769 | 1765 | &blkcg_policy_cfq, seq_cft(sf)->private, false); | |
| 1770 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq, | ||
| 1771 | cft->private, false); | ||
| 1772 | return 0; | 1766 | return 0; |
| 1773 | } | 1767 | } |
| 1774 | 1768 | ||
| 1775 | static int cfqg_print_rwstat(struct cgroup_subsys_state *css, | 1769 | static int cfqg_print_rwstat(struct seq_file *sf, void *v) |
| 1776 | struct cftype *cft, struct seq_file *sf) | ||
| 1777 | { | 1770 | { |
| 1778 | struct blkcg *blkcg = css_to_blkcg(css); | 1771 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, |
| 1779 | 1772 | &blkcg_policy_cfq, seq_cft(sf)->private, true); | |
| 1780 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq, | ||
| 1781 | cft->private, true); | ||
| 1782 | return 0; | 1773 | return 0; |
| 1783 | } | 1774 | } |
| 1784 | 1775 | ||
| @@ -1798,23 +1789,19 @@ static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf, | |||
| 1798 | return __blkg_prfill_rwstat(sf, pd, &sum); | 1789 | return __blkg_prfill_rwstat(sf, pd, &sum); |
| 1799 | } | 1790 | } |
| 1800 | 1791 | ||
| 1801 | static int cfqg_print_stat_recursive(struct cgroup_subsys_state *css, | 1792 | static int cfqg_print_stat_recursive(struct seq_file *sf, void *v) |
| 1802 | struct cftype *cft, struct seq_file *sf) | ||
| 1803 | { | 1793 | { |
| 1804 | struct blkcg *blkcg = css_to_blkcg(css); | 1794 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
| 1805 | 1795 | cfqg_prfill_stat_recursive, &blkcg_policy_cfq, | |
| 1806 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive, | 1796 | seq_cft(sf)->private, false); |
| 1807 | &blkcg_policy_cfq, cft->private, false); | ||
| 1808 | return 0; | 1797 | return 0; |
| 1809 | } | 1798 | } |
| 1810 | 1799 | ||
| 1811 | static int cfqg_print_rwstat_recursive(struct cgroup_subsys_state *css, | 1800 | static int cfqg_print_rwstat_recursive(struct seq_file *sf, void *v) |
| 1812 | struct cftype *cft, struct seq_file *sf) | ||
| 1813 | { | 1801 | { |
| 1814 | struct blkcg *blkcg = css_to_blkcg(css); | 1802 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
| 1815 | 1803 | cfqg_prfill_rwstat_recursive, &blkcg_policy_cfq, | |
| 1816 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive, | 1804 | seq_cft(sf)->private, true); |
| 1817 | &blkcg_policy_cfq, cft->private, true); | ||
| 1818 | return 0; | 1805 | return 0; |
| 1819 | } | 1806 | } |
| 1820 | 1807 | ||
| @@ -1835,13 +1822,11 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, | |||
| 1835 | } | 1822 | } |
| 1836 | 1823 | ||
| 1837 | /* print avg_queue_size */ | 1824 | /* print avg_queue_size */ |
| 1838 | static int cfqg_print_avg_queue_size(struct cgroup_subsys_state *css, | 1825 | static int cfqg_print_avg_queue_size(struct seq_file *sf, void *v) |
| 1839 | struct cftype *cft, struct seq_file *sf) | ||
| 1840 | { | 1826 | { |
| 1841 | struct blkcg *blkcg = css_to_blkcg(css); | 1827 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
| 1842 | 1828 | cfqg_prfill_avg_queue_size, &blkcg_policy_cfq, | |
| 1843 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size, | 1829 | 0, false); |
| 1844 | &blkcg_policy_cfq, 0, false); | ||
| 1845 | return 0; | 1830 | return 0; |
| 1846 | } | 1831 | } |
| 1847 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ | 1832 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ |
| @@ -1851,14 +1836,14 @@ static struct cftype cfq_blkcg_files[] = { | |||
| 1851 | { | 1836 | { |
| 1852 | .name = "weight_device", | 1837 | .name = "weight_device", |
| 1853 | .flags = CFTYPE_ONLY_ON_ROOT, | 1838 | .flags = CFTYPE_ONLY_ON_ROOT, |
| 1854 | .read_seq_string = cfqg_print_leaf_weight_device, | 1839 | .seq_show = cfqg_print_leaf_weight_device, |
| 1855 | .write_string = cfqg_set_leaf_weight_device, | 1840 | .write_string = cfqg_set_leaf_weight_device, |
| 1856 | .max_write_len = 256, | 1841 | .max_write_len = 256, |
| 1857 | }, | 1842 | }, |
| 1858 | { | 1843 | { |
| 1859 | .name = "weight", | 1844 | .name = "weight", |
| 1860 | .flags = CFTYPE_ONLY_ON_ROOT, | 1845 | .flags = CFTYPE_ONLY_ON_ROOT, |
| 1861 | .read_seq_string = cfq_print_leaf_weight, | 1846 | .seq_show = cfq_print_leaf_weight, |
| 1862 | .write_u64 = cfq_set_leaf_weight, | 1847 | .write_u64 = cfq_set_leaf_weight, |
| 1863 | }, | 1848 | }, |
| 1864 | 1849 | ||
| @@ -1866,26 +1851,26 @@ static struct cftype cfq_blkcg_files[] = { | |||
| 1866 | { | 1851 | { |
| 1867 | .name = "weight_device", | 1852 | .name = "weight_device", |
| 1868 | .flags = CFTYPE_NOT_ON_ROOT, | 1853 | .flags = CFTYPE_NOT_ON_ROOT, |
| 1869 | .read_seq_string = cfqg_print_weight_device, | 1854 | .seq_show = cfqg_print_weight_device, |
| 1870 | .write_string = cfqg_set_weight_device, | 1855 | .write_string = cfqg_set_weight_device, |
| 1871 | .max_write_len = 256, | 1856 | .max_write_len = 256, |
| 1872 | }, | 1857 | }, |
| 1873 | { | 1858 | { |
| 1874 | .name = "weight", | 1859 | .name = "weight", |
| 1875 | .flags = CFTYPE_NOT_ON_ROOT, | 1860 | .flags = CFTYPE_NOT_ON_ROOT, |
| 1876 | .read_seq_string = cfq_print_weight, | 1861 | .seq_show = cfq_print_weight, |
| 1877 | .write_u64 = cfq_set_weight, | 1862 | .write_u64 = cfq_set_weight, |
| 1878 | }, | 1863 | }, |
| 1879 | 1864 | ||
| 1880 | { | 1865 | { |
| 1881 | .name = "leaf_weight_device", | 1866 | .name = "leaf_weight_device", |
| 1882 | .read_seq_string = cfqg_print_leaf_weight_device, | 1867 | .seq_show = cfqg_print_leaf_weight_device, |
| 1883 | .write_string = cfqg_set_leaf_weight_device, | 1868 | .write_string = cfqg_set_leaf_weight_device, |
| 1884 | .max_write_len = 256, | 1869 | .max_write_len = 256, |
| 1885 | }, | 1870 | }, |
| 1886 | { | 1871 | { |
| 1887 | .name = "leaf_weight", | 1872 | .name = "leaf_weight", |
| 1888 | .read_seq_string = cfq_print_leaf_weight, | 1873 | .seq_show = cfq_print_leaf_weight, |
| 1889 | .write_u64 = cfq_set_leaf_weight, | 1874 | .write_u64 = cfq_set_leaf_weight, |
| 1890 | }, | 1875 | }, |
| 1891 | 1876 | ||
| @@ -1893,114 +1878,114 @@ static struct cftype cfq_blkcg_files[] = { | |||
| 1893 | { | 1878 | { |
| 1894 | .name = "time", | 1879 | .name = "time", |
| 1895 | .private = offsetof(struct cfq_group, stats.time), | 1880 | .private = offsetof(struct cfq_group, stats.time), |
| 1896 | .read_seq_string = cfqg_print_stat, | 1881 | .seq_show = cfqg_print_stat, |
| 1897 | }, | 1882 | }, |
| 1898 | { | 1883 | { |
| 1899 | .name = "sectors", | 1884 | .name = "sectors", |
| 1900 | .private = offsetof(struct cfq_group, stats.sectors), | 1885 | .private = offsetof(struct cfq_group, stats.sectors), |
| 1901 | .read_seq_string = cfqg_print_stat, | 1886 | .seq_show = cfqg_print_stat, |
| 1902 | }, | 1887 | }, |
| 1903 | { | 1888 | { |
| 1904 | .name = "io_service_bytes", | 1889 | .name = "io_service_bytes", |
| 1905 | .private = offsetof(struct cfq_group, stats.service_bytes), | 1890 | .private = offsetof(struct cfq_group, stats.service_bytes), |
| 1906 | .read_seq_string = cfqg_print_rwstat, | 1891 | .seq_show = cfqg_print_rwstat, |
| 1907 | }, | 1892 | }, |
| 1908 | { | 1893 | { |
| 1909 | .name = "io_serviced", | 1894 | .name = "io_serviced", |
| 1910 | .private = offsetof(struct cfq_group, stats.serviced), | 1895 | .private = offsetof(struct cfq_group, stats.serviced), |
| 1911 | .read_seq_string = cfqg_print_rwstat, | 1896 | .seq_show = cfqg_print_rwstat, |
| 1912 | }, | 1897 | }, |
| 1913 | { | 1898 | { |
| 1914 | .name = "io_service_time", | 1899 | .name = "io_service_time", |
| 1915 | .private = offsetof(struct cfq_group, stats.service_time), | 1900 | .private = offsetof(struct cfq_group, stats.service_time), |
| 1916 | .read_seq_string = cfqg_print_rwstat, | 1901 | .seq_show = cfqg_print_rwstat, |
| 1917 | }, | 1902 | }, |
| 1918 | { | 1903 | { |
| 1919 | .name = "io_wait_time", | 1904 | .name = "io_wait_time", |
| 1920 | .private = offsetof(struct cfq_group, stats.wait_time), | 1905 | .private = offsetof(struct cfq_group, stats.wait_time), |
| 1921 | .read_seq_string = cfqg_print_rwstat, | 1906 | .seq_show = cfqg_print_rwstat, |
| 1922 | }, | 1907 | }, |
| 1923 | { | 1908 | { |
| 1924 | .name = "io_merged", | 1909 | .name = "io_merged", |
| 1925 | .private = offsetof(struct cfq_group, stats.merged), | 1910 | .private = offsetof(struct cfq_group, stats.merged), |
| 1926 | .read_seq_string = cfqg_print_rwstat, | 1911 | .seq_show = cfqg_print_rwstat, |
| 1927 | }, | 1912 | }, |
| 1928 | { | 1913 | { |
| 1929 | .name = "io_queued", | 1914 | .name = "io_queued", |
| 1930 | .private = offsetof(struct cfq_group, stats.queued), | 1915 | .private = offsetof(struct cfq_group, stats.queued), |
| 1931 | .read_seq_string = cfqg_print_rwstat, | 1916 | .seq_show = cfqg_print_rwstat, |
| 1932 | }, | 1917 | }, |
| 1933 | 1918 | ||
| 1934 | /* the same statictics which cover the cfqg and its descendants */ | 1919 | /* the same statictics which cover the cfqg and its descendants */ |
| 1935 | { | 1920 | { |
| 1936 | .name = "time_recursive", | 1921 | .name = "time_recursive", |
| 1937 | .private = offsetof(struct cfq_group, stats.time), | 1922 | .private = offsetof(struct cfq_group, stats.time), |
| 1938 | .read_seq_string = cfqg_print_stat_recursive, | 1923 | .seq_show = cfqg_print_stat_recursive, |
| 1939 | }, | 1924 | }, |
| 1940 | { | 1925 | { |
| 1941 | .name = "sectors_recursive", | 1926 | .name = "sectors_recursive", |
| 1942 | .private = offsetof(struct cfq_group, stats.sectors), | 1927 | .private = offsetof(struct cfq_group, stats.sectors), |
| 1943 | .read_seq_string = cfqg_print_stat_recursive, | 1928 | .seq_show = cfqg_print_stat_recursive, |
| 1944 | }, | 1929 | }, |
| 1945 | { | 1930 | { |
| 1946 | .name = "io_service_bytes_recursive", | 1931 | .name = "io_service_bytes_recursive", |
| 1947 | .private = offsetof(struct cfq_group, stats.service_bytes), | 1932 | .private = offsetof(struct cfq_group, stats.service_bytes), |
| 1948 | .read_seq_string = cfqg_print_rwstat_recursive, | 1933 | .seq_show = cfqg_print_rwstat_recursive, |
| 1949 | }, | 1934 | }, |
| 1950 | { | 1935 | { |
| 1951 | .name = "io_serviced_recursive", | 1936 | .name = "io_serviced_recursive", |
| 1952 | .private = offsetof(struct cfq_group, stats.serviced), | 1937 | .private = offsetof(struct cfq_group, stats.serviced), |
| 1953 | .read_seq_string = cfqg_print_rwstat_recursive, | 1938 | .seq_show = cfqg_print_rwstat_recursive, |
| 1954 | }, | 1939 | }, |
| 1955 | { | 1940 | { |
| 1956 | .name = "io_service_time_recursive", | 1941 | .name = "io_service_time_recursive", |
| 1957 | .private = offsetof(struct cfq_group, stats.service_time), | 1942 | .private = offsetof(struct cfq_group, stats.service_time), |
| 1958 | .read_seq_string = cfqg_print_rwstat_recursive, | 1943 | .seq_show = cfqg_print_rwstat_recursive, |
| 1959 | }, | 1944 | }, |
| 1960 | { | 1945 | { |
| 1961 | .name = "io_wait_time_recursive", | 1946 | .name = "io_wait_time_recursive", |
| 1962 | .private = offsetof(struct cfq_group, stats.wait_time), | 1947 | .private = offsetof(struct cfq_group, stats.wait_time), |
| 1963 | .read_seq_string = cfqg_print_rwstat_recursive, | 1948 | .seq_show = cfqg_print_rwstat_recursive, |
| 1964 | }, | 1949 | }, |
| 1965 | { | 1950 | { |
| 1966 | .name = "io_merged_recursive", | 1951 | .name = "io_merged_recursive", |
| 1967 | .private = offsetof(struct cfq_group, stats.merged), | 1952 | .private = offsetof(struct cfq_group, stats.merged), |
| 1968 | .read_seq_string = cfqg_print_rwstat_recursive, | 1953 | .seq_show = cfqg_print_rwstat_recursive, |
| 1969 | }, | 1954 | }, |
| 1970 | { | 1955 | { |
| 1971 | .name = "io_queued_recursive", | 1956 | .name = "io_queued_recursive", |
| 1972 | .private = offsetof(struct cfq_group, stats.queued), | 1957 | .private = offsetof(struct cfq_group, stats.queued), |
| 1973 | .read_seq_string = cfqg_print_rwstat_recursive, | 1958 | .seq_show = cfqg_print_rwstat_recursive, |
| 1974 | }, | 1959 | }, |
| 1975 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 1960 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
| 1976 | { | 1961 | { |
| 1977 | .name = "avg_queue_size", | 1962 | .name = "avg_queue_size", |
| 1978 | .read_seq_string = cfqg_print_avg_queue_size, | 1963 | .seq_show = cfqg_print_avg_queue_size, |
| 1979 | }, | 1964 | }, |
| 1980 | { | 1965 | { |
| 1981 | .name = "group_wait_time", | 1966 | .name = "group_wait_time", |
| 1982 | .private = offsetof(struct cfq_group, stats.group_wait_time), | 1967 | .private = offsetof(struct cfq_group, stats.group_wait_time), |
| 1983 | .read_seq_string = cfqg_print_stat, | 1968 | .seq_show = cfqg_print_stat, |
| 1984 | }, | 1969 | }, |
| 1985 | { | 1970 | { |
| 1986 | .name = "idle_time", | 1971 | .name = "idle_time", |
| 1987 | .private = offsetof(struct cfq_group, stats.idle_time), | 1972 | .private = offsetof(struct cfq_group, stats.idle_time), |
| 1988 | .read_seq_string = cfqg_print_stat, | 1973 | .seq_show = cfqg_print_stat, |
| 1989 | }, | 1974 | }, |
| 1990 | { | 1975 | { |
| 1991 | .name = "empty_time", | 1976 | .name = "empty_time", |
| 1992 | .private = offsetof(struct cfq_group, stats.empty_time), | 1977 | .private = offsetof(struct cfq_group, stats.empty_time), |
| 1993 | .read_seq_string = cfqg_print_stat, | 1978 | .seq_show = cfqg_print_stat, |
| 1994 | }, | 1979 | }, |
| 1995 | { | 1980 | { |
| 1996 | .name = "dequeue", | 1981 | .name = "dequeue", |
| 1997 | .private = offsetof(struct cfq_group, stats.dequeue), | 1982 | .private = offsetof(struct cfq_group, stats.dequeue), |
| 1998 | .read_seq_string = cfqg_print_stat, | 1983 | .seq_show = cfqg_print_stat, |
| 1999 | }, | 1984 | }, |
| 2000 | { | 1985 | { |
| 2001 | .name = "unaccounted_time", | 1986 | .name = "unaccounted_time", |
| 2002 | .private = offsetof(struct cfq_group, stats.unaccounted_time), | 1987 | .private = offsetof(struct cfq_group, stats.unaccounted_time), |
| 2003 | .read_seq_string = cfqg_print_stat, | 1988 | .seq_show = cfqg_print_stat, |
| 2004 | }, | 1989 | }, |
| 2005 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ | 1990 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ |
| 2006 | { } /* terminate */ | 1991 | { } /* terminate */ |
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index fbcc851ed5a5..61bcfc21d2a0 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
| @@ -163,7 +163,6 @@ static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup) | |||
| 163 | static void bcachecg_destroy(struct cgroup *cgroup) | 163 | static void bcachecg_destroy(struct cgroup *cgroup) |
| 164 | { | 164 | { |
| 165 | struct bch_cgroup *cg = cgroup_to_bcache(cgroup); | 165 | struct bch_cgroup *cg = cgroup_to_bcache(cgroup); |
| 166 | free_css_id(&bcache_subsys, &cg->css); | ||
| 167 | kfree(cg); | 166 | kfree(cg); |
| 168 | } | 167 | } |
| 169 | 168 | ||
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 39c1d9469677..5c097596104b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/xattr.h> | 21 | #include <linux/xattr.h> |
| 22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
| 23 | #include <linux/percpu-refcount.h> | 23 | #include <linux/percpu-refcount.h> |
| 24 | #include <linux/seq_file.h> | ||
| 24 | 25 | ||
| 25 | #ifdef CONFIG_CGROUPS | 26 | #ifdef CONFIG_CGROUPS |
| 26 | 27 | ||
| @@ -28,8 +29,6 @@ struct cgroupfs_root; | |||
| 28 | struct cgroup_subsys; | 29 | struct cgroup_subsys; |
| 29 | struct inode; | 30 | struct inode; |
| 30 | struct cgroup; | 31 | struct cgroup; |
| 31 | struct css_id; | ||
| 32 | struct eventfd_ctx; | ||
| 33 | 32 | ||
| 34 | extern int cgroup_init_early(void); | 33 | extern int cgroup_init_early(void); |
| 35 | extern int cgroup_init(void); | 34 | extern int cgroup_init(void); |
| @@ -79,8 +78,6 @@ struct cgroup_subsys_state { | |||
| 79 | struct cgroup_subsys_state *parent; | 78 | struct cgroup_subsys_state *parent; |
| 80 | 79 | ||
| 81 | unsigned long flags; | 80 | unsigned long flags; |
| 82 | /* ID for this css, if possible */ | ||
| 83 | struct css_id __rcu *id; | ||
| 84 | 81 | ||
| 85 | /* percpu_ref killing and RCU release */ | 82 | /* percpu_ref killing and RCU release */ |
| 86 | struct rcu_head rcu_head; | 83 | struct rcu_head rcu_head; |
| @@ -239,10 +236,6 @@ struct cgroup { | |||
| 239 | struct rcu_head rcu_head; | 236 | struct rcu_head rcu_head; |
| 240 | struct work_struct destroy_work; | 237 | struct work_struct destroy_work; |
| 241 | 238 | ||
| 242 | /* List of events which userspace want to receive */ | ||
| 243 | struct list_head event_list; | ||
| 244 | spinlock_t event_list_lock; | ||
| 245 | |||
| 246 | /* directory xattrs */ | 239 | /* directory xattrs */ |
| 247 | struct simple_xattrs xattrs; | 240 | struct simple_xattrs xattrs; |
| 248 | }; | 241 | }; |
| @@ -280,6 +273,9 @@ enum { | |||
| 280 | * - "tasks" is removed. Everything should be at process | 273 | * - "tasks" is removed. Everything should be at process |
| 281 | * granularity. Use "cgroup.procs" instead. | 274 | * granularity. Use "cgroup.procs" instead. |
| 282 | * | 275 | * |
| 276 | * - "cgroup.procs" is not sorted. pids will be unique unless they | ||
| 277 | * got recycled inbetween reads. | ||
| 278 | * | ||
| 283 | * - "release_agent" and "notify_on_release" are removed. | 279 | * - "release_agent" and "notify_on_release" are removed. |
| 284 | * Replacement notification mechanism will be implemented. | 280 | * Replacement notification mechanism will be implemented. |
| 285 | * | 281 | * |
| @@ -320,9 +316,6 @@ struct cgroupfs_root { | |||
| 320 | /* Unique id for this hierarchy. */ | 316 | /* Unique id for this hierarchy. */ |
| 321 | int hierarchy_id; | 317 | int hierarchy_id; |
| 322 | 318 | ||
| 323 | /* A list running through the attached subsystems */ | ||
| 324 | struct list_head subsys_list; | ||
| 325 | |||
| 326 | /* The root cgroup for this hierarchy */ | 319 | /* The root cgroup for this hierarchy */ |
| 327 | struct cgroup top_cgroup; | 320 | struct cgroup top_cgroup; |
| 328 | 321 | ||
| @@ -389,16 +382,6 @@ struct css_set { | |||
| 389 | }; | 382 | }; |
| 390 | 383 | ||
| 391 | /* | 384 | /* |
| 392 | * cgroup_map_cb is an abstract callback API for reporting map-valued | ||
| 393 | * control files | ||
| 394 | */ | ||
| 395 | |||
| 396 | struct cgroup_map_cb { | ||
| 397 | int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value); | ||
| 398 | void *state; | ||
| 399 | }; | ||
| 400 | |||
| 401 | /* | ||
| 402 | * struct cftype: handler definitions for cgroup control files | 385 | * struct cftype: handler definitions for cgroup control files |
| 403 | * | 386 | * |
| 404 | * When reading/writing to a file: | 387 | * When reading/writing to a file: |
| @@ -445,10 +428,6 @@ struct cftype { | |||
| 445 | */ | 428 | */ |
| 446 | struct cgroup_subsys *ss; | 429 | struct cgroup_subsys *ss; |
| 447 | 430 | ||
| 448 | int (*open)(struct inode *inode, struct file *file); | ||
| 449 | ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft, | ||
| 450 | struct file *file, | ||
| 451 | char __user *buf, size_t nbytes, loff_t *ppos); | ||
| 452 | /* | 431 | /* |
| 453 | * read_u64() is a shortcut for the common case of returning a | 432 | * read_u64() is a shortcut for the common case of returning a |
| 454 | * single integer. Use it in place of read() | 433 | * single integer. Use it in place of read() |
| @@ -458,24 +437,14 @@ struct cftype { | |||
| 458 | * read_s64() is a signed version of read_u64() | 437 | * read_s64() is a signed version of read_u64() |
| 459 | */ | 438 | */ |
| 460 | s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); | 439 | s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); |
| 461 | /* | ||
| 462 | * read_map() is used for defining a map of key/value | ||
| 463 | * pairs. It should call cb->fill(cb, key, value) for each | ||
| 464 | * entry. The key/value pairs (and their ordering) should not | ||
| 465 | * change between reboots. | ||
| 466 | */ | ||
| 467 | int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft, | ||
| 468 | struct cgroup_map_cb *cb); | ||
| 469 | /* | ||
| 470 | * read_seq_string() is used for outputting a simple sequence | ||
| 471 | * using seqfile. | ||
| 472 | */ | ||
| 473 | int (*read_seq_string)(struct cgroup_subsys_state *css, | ||
| 474 | struct cftype *cft, struct seq_file *m); | ||
| 475 | 440 | ||
| 476 | ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft, | 441 | /* generic seq_file read interface */ |
| 477 | struct file *file, | 442 | int (*seq_show)(struct seq_file *sf, void *v); |
| 478 | const char __user *buf, size_t nbytes, loff_t *ppos); | 443 | |
| 444 | /* optional ops, implement all or none */ | ||
| 445 | void *(*seq_start)(struct seq_file *sf, loff_t *ppos); | ||
| 446 | void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); | ||
| 447 | void (*seq_stop)(struct seq_file *sf, void *v); | ||
| 479 | 448 | ||
| 480 | /* | 449 | /* |
| 481 | * write_u64() is a shortcut for the common case of accepting | 450 | * write_u64() is a shortcut for the common case of accepting |
| @@ -504,27 +473,6 @@ struct cftype { | |||
| 504 | * kick type for multiplexing. | 473 | * kick type for multiplexing. |
| 505 | */ | 474 | */ |
| 506 | int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); | 475 | int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); |
| 507 | |||
| 508 | int (*release)(struct inode *inode, struct file *file); | ||
| 509 | |||
| 510 | /* | ||
| 511 | * register_event() callback will be used to add new userspace | ||
| 512 | * waiter for changes related to the cftype. Implement it if | ||
| 513 | * you want to provide this functionality. Use eventfd_signal() | ||
| 514 | * on eventfd to send notification to userspace. | ||
| 515 | */ | ||
| 516 | int (*register_event)(struct cgroup_subsys_state *css, | ||
| 517 | struct cftype *cft, struct eventfd_ctx *eventfd, | ||
| 518 | const char *args); | ||
| 519 | /* | ||
| 520 | * unregister_event() callback will be called when userspace | ||
| 521 | * closes the eventfd or on cgroup removing. | ||
| 522 | * This callback must be implemented, if you want provide | ||
| 523 | * notification functionality. | ||
| 524 | */ | ||
| 525 | void (*unregister_event)(struct cgroup_subsys_state *css, | ||
| 526 | struct cftype *cft, | ||
| 527 | struct eventfd_ctx *eventfd); | ||
| 528 | }; | 476 | }; |
| 529 | 477 | ||
| 530 | /* | 478 | /* |
| @@ -538,6 +486,26 @@ struct cftype_set { | |||
| 538 | }; | 486 | }; |
| 539 | 487 | ||
| 540 | /* | 488 | /* |
| 489 | * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. Don't | ||
| 490 | * access directly. | ||
| 491 | */ | ||
| 492 | struct cfent { | ||
| 493 | struct list_head node; | ||
| 494 | struct dentry *dentry; | ||
| 495 | struct cftype *type; | ||
| 496 | struct cgroup_subsys_state *css; | ||
| 497 | |||
| 498 | /* file xattrs */ | ||
| 499 | struct simple_xattrs xattrs; | ||
| 500 | }; | ||
| 501 | |||
| 502 | /* seq_file->private points to the following, only ->priv is public */ | ||
| 503 | struct cgroup_open_file { | ||
| 504 | struct cfent *cfe; | ||
| 505 | void *priv; | ||
| 506 | }; | ||
| 507 | |||
| 508 | /* | ||
| 541 | * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This | 509 | * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This |
| 542 | * function can be called as long as @cgrp is accessible. | 510 | * function can be called as long as @cgrp is accessible. |
| 543 | */ | 511 | */ |
| @@ -552,6 +520,18 @@ static inline const char *cgroup_name(const struct cgroup *cgrp) | |||
| 552 | return rcu_dereference(cgrp->name)->name; | 520 | return rcu_dereference(cgrp->name)->name; |
| 553 | } | 521 | } |
| 554 | 522 | ||
| 523 | static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq) | ||
| 524 | { | ||
| 525 | struct cgroup_open_file *of = seq->private; | ||
| 526 | return of->cfe->css; | ||
| 527 | } | ||
| 528 | |||
| 529 | static inline struct cftype *seq_cft(struct seq_file *seq) | ||
| 530 | { | ||
| 531 | struct cgroup_open_file *of = seq->private; | ||
| 532 | return of->cfe->type; | ||
| 533 | } | ||
| 534 | |||
| 555 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); | 535 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); |
| 556 | int cgroup_rm_cftypes(struct cftype *cfts); | 536 | int cgroup_rm_cftypes(struct cftype *cfts); |
| 557 | 537 | ||
| @@ -631,12 +611,8 @@ struct cgroup_subsys { | |||
| 631 | #define MAX_CGROUP_TYPE_NAMELEN 32 | 611 | #define MAX_CGROUP_TYPE_NAMELEN 32 |
| 632 | const char *name; | 612 | const char *name; |
| 633 | 613 | ||
| 634 | /* | 614 | /* link to parent, protected by cgroup_lock() */ |
| 635 | * Link to parent, and list entry in parent's children. | ||
| 636 | * Protected by cgroup_lock() | ||
| 637 | */ | ||
| 638 | struct cgroupfs_root *root; | 615 | struct cgroupfs_root *root; |
| 639 | struct list_head sibling; | ||
| 640 | 616 | ||
| 641 | /* list of cftype_sets */ | 617 | /* list of cftype_sets */ |
| 642 | struct list_head cftsets; | 618 | struct list_head cftsets; |
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 3f3788d49362..3e4535876d37 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <linux/gfp.h> | 7 | #include <linux/gfp.h> |
| 8 | #include <linux/types.h> | 8 | #include <linux/types.h> |
| 9 | #include <linux/cgroup.h> | 9 | #include <linux/cgroup.h> |
| 10 | #include <linux/eventfd.h> | ||
| 10 | 11 | ||
| 11 | struct vmpressure { | 12 | struct vmpressure { |
| 12 | unsigned long scanned; | 13 | unsigned long scanned; |
| @@ -33,13 +34,10 @@ extern void vmpressure_init(struct vmpressure *vmpr); | |||
| 33 | extern void vmpressure_cleanup(struct vmpressure *vmpr); | 34 | extern void vmpressure_cleanup(struct vmpressure *vmpr); |
| 34 | extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); | 35 | extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); |
| 35 | extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); | 36 | extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); |
| 36 | extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); | 37 | extern int vmpressure_register_event(struct mem_cgroup *memcg, |
| 37 | extern int vmpressure_register_event(struct cgroup_subsys_state *css, | ||
| 38 | struct cftype *cft, | ||
| 39 | struct eventfd_ctx *eventfd, | 38 | struct eventfd_ctx *eventfd, |
| 40 | const char *args); | 39 | const char *args); |
| 41 | extern void vmpressure_unregister_event(struct cgroup_subsys_state *css, | 40 | extern void vmpressure_unregister_event(struct mem_cgroup *memcg, |
| 42 | struct cftype *cft, | ||
| 43 | struct eventfd_ctx *eventfd); | 41 | struct eventfd_ctx *eventfd); |
| 44 | #else | 42 | #else |
| 45 | static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, | 43 | static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, |
diff --git a/init/Kconfig b/init/Kconfig index 5236dc562a36..8d402e33b7fc 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
| @@ -854,7 +854,6 @@ config NUMA_BALANCING | |||
| 854 | 854 | ||
| 855 | menuconfig CGROUPS | 855 | menuconfig CGROUPS |
| 856 | boolean "Control Group support" | 856 | boolean "Control Group support" |
| 857 | depends on EVENTFD | ||
| 858 | help | 857 | help |
| 859 | This option adds support for grouping sets of processes together, for | 858 | This option adds support for grouping sets of processes together, for |
| 860 | use with process control subsystems such as Cpusets, CFS, memory | 859 | use with process control subsystems such as Cpusets, CFS, memory |
| @@ -921,6 +920,7 @@ config MEMCG | |||
| 921 | bool "Memory Resource Controller for Control Groups" | 920 | bool "Memory Resource Controller for Control Groups" |
| 922 | depends on RESOURCE_COUNTERS | 921 | depends on RESOURCE_COUNTERS |
| 923 | select MM_OWNER | 922 | select MM_OWNER |
| 923 | select EVENTFD | ||
| 924 | help | 924 | help |
| 925 | Provides a memory resource controller that manages both anonymous | 925 | Provides a memory resource controller that manages both anonymous |
| 926 | memory and page cache. (See Documentation/cgroups/memory.txt) | 926 | memory and page cache. (See Documentation/cgroups/memory.txt) |
| @@ -1160,7 +1160,6 @@ config UIDGID_STRICT_TYPE_CHECKS | |||
| 1160 | 1160 | ||
| 1161 | config SCHED_AUTOGROUP | 1161 | config SCHED_AUTOGROUP |
| 1162 | bool "Automatic process group scheduling" | 1162 | bool "Automatic process group scheduling" |
| 1163 | select EVENTFD | ||
| 1164 | select CGROUPS | 1163 | select CGROUPS |
| 1165 | select CGROUP_SCHED | 1164 | select CGROUP_SCHED |
| 1166 | select FAIR_GROUP_SCHED | 1165 | select FAIR_GROUP_SCHED |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index bc1dcabe9217..e2f46ba37f72 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -41,7 +41,6 @@ | |||
| 41 | #include <linux/rcupdate.h> | 41 | #include <linux/rcupdate.h> |
| 42 | #include <linux/sched.h> | 42 | #include <linux/sched.h> |
| 43 | #include <linux/backing-dev.h> | 43 | #include <linux/backing-dev.h> |
| 44 | #include <linux/seq_file.h> | ||
| 45 | #include <linux/slab.h> | 44 | #include <linux/slab.h> |
| 46 | #include <linux/magic.h> | 45 | #include <linux/magic.h> |
| 47 | #include <linux/spinlock.h> | 46 | #include <linux/spinlock.h> |
| @@ -56,15 +55,20 @@ | |||
| 56 | #include <linux/pid_namespace.h> | 55 | #include <linux/pid_namespace.h> |
| 57 | #include <linux/idr.h> | 56 | #include <linux/idr.h> |
| 58 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ | 57 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ |
| 59 | #include <linux/eventfd.h> | ||
| 60 | #include <linux/poll.h> | ||
| 61 | #include <linux/flex_array.h> /* used in cgroup_attach_task */ | 58 | #include <linux/flex_array.h> /* used in cgroup_attach_task */ |
| 62 | #include <linux/kthread.h> | 59 | #include <linux/kthread.h> |
| 63 | #include <linux/file.h> | ||
| 64 | 60 | ||
| 65 | #include <linux/atomic.h> | 61 | #include <linux/atomic.h> |
| 66 | 62 | ||
| 67 | /* | 63 | /* |
| 64 | * pidlists linger the following amount before being destroyed. The goal | ||
| 65 | * is avoiding frequent destruction in the middle of consecutive read calls | ||
| 66 | * Expiring in the middle is a performance problem not a correctness one. | ||
| 67 | * 1 sec should be enough. | ||
| 68 | */ | ||
| 69 | #define CGROUP_PIDLIST_DESTROY_DELAY HZ | ||
| 70 | |||
| 71 | /* | ||
| 68 | * cgroup_mutex is the master lock. Any modification to cgroup or its | 72 | * cgroup_mutex is the master lock. Any modification to cgroup or its |
| 69 | * hierarchy must be performed while holding it. | 73 | * hierarchy must be performed while holding it. |
| 70 | * | 74 | * |
| @@ -89,6 +93,19 @@ static DEFINE_MUTEX(cgroup_mutex); | |||
| 89 | 93 | ||
| 90 | static DEFINE_MUTEX(cgroup_root_mutex); | 94 | static DEFINE_MUTEX(cgroup_root_mutex); |
| 91 | 95 | ||
| 96 | #define cgroup_assert_mutex_or_rcu_locked() \ | ||
| 97 | rcu_lockdep_assert(rcu_read_lock_held() || \ | ||
| 98 | lockdep_is_held(&cgroup_mutex), \ | ||
| 99 | "cgroup_mutex or RCU read lock required"); | ||
| 100 | |||
| 101 | #ifdef CONFIG_LOCKDEP | ||
| 102 | #define cgroup_assert_mutex_or_root_locked() \ | ||
| 103 | WARN_ON_ONCE(debug_locks && (!lockdep_is_held(&cgroup_mutex) && \ | ||
| 104 | !lockdep_is_held(&cgroup_root_mutex))) | ||
| 105 | #else | ||
| 106 | #define cgroup_assert_mutex_or_root_locked() do { } while (0) | ||
| 107 | #endif | ||
| 108 | |||
| 92 | /* | 109 | /* |
| 93 | * cgroup destruction makes heavy use of work items and there can be a lot | 110 | * cgroup destruction makes heavy use of work items and there can be a lot |
| 94 | * of concurrent destructions. Use a separate workqueue so that cgroup | 111 | * of concurrent destructions. Use a separate workqueue so that cgroup |
| @@ -98,6 +115,12 @@ static DEFINE_MUTEX(cgroup_root_mutex); | |||
| 98 | static struct workqueue_struct *cgroup_destroy_wq; | 115 | static struct workqueue_struct *cgroup_destroy_wq; |
| 99 | 116 | ||
| 100 | /* | 117 | /* |
| 118 | * pidlist destructions need to be flushed on cgroup destruction. Use a | ||
| 119 | * separate workqueue as flush domain. | ||
| 120 | */ | ||
| 121 | static struct workqueue_struct *cgroup_pidlist_destroy_wq; | ||
| 122 | |||
| 123 | /* | ||
| 101 | * Generate an array of cgroup subsystem pointers. At boot time, this is | 124 | * Generate an array of cgroup subsystem pointers. At boot time, this is |
| 102 | * populated with the built in subsystems, and modular subsystems are | 125 | * populated with the built in subsystems, and modular subsystems are |
| 103 | * registered after that. The mutable section of this array is protected by | 126 | * registered after that. The mutable section of this array is protected by |
| @@ -119,49 +142,6 @@ static struct cgroupfs_root cgroup_dummy_root; | |||
| 119 | /* dummy_top is a shorthand for the dummy hierarchy's top cgroup */ | 142 | /* dummy_top is a shorthand for the dummy hierarchy's top cgroup */ |
| 120 | static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup; | 143 | static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup; |
| 121 | 144 | ||
| 122 | /* | ||
| 123 | * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. | ||
| 124 | */ | ||
| 125 | struct cfent { | ||
| 126 | struct list_head node; | ||
| 127 | struct dentry *dentry; | ||
| 128 | struct cftype *type; | ||
| 129 | struct cgroup_subsys_state *css; | ||
| 130 | |||
| 131 | /* file xattrs */ | ||
| 132 | struct simple_xattrs xattrs; | ||
| 133 | }; | ||
| 134 | |||
| 135 | /* | ||
| 136 | * cgroup_event represents events which userspace want to receive. | ||
| 137 | */ | ||
| 138 | struct cgroup_event { | ||
| 139 | /* | ||
| 140 | * css which the event belongs to. | ||
| 141 | */ | ||
| 142 | struct cgroup_subsys_state *css; | ||
| 143 | /* | ||
| 144 | * Control file which the event associated. | ||
| 145 | */ | ||
| 146 | struct cftype *cft; | ||
| 147 | /* | ||
| 148 | * eventfd to signal userspace about the event. | ||
| 149 | */ | ||
| 150 | struct eventfd_ctx *eventfd; | ||
| 151 | /* | ||
| 152 | * Each of these stored in a list by the cgroup. | ||
| 153 | */ | ||
| 154 | struct list_head list; | ||
| 155 | /* | ||
| 156 | * All fields below needed to unregister event when | ||
| 157 | * userspace closes eventfd. | ||
| 158 | */ | ||
| 159 | poll_table pt; | ||
| 160 | wait_queue_head_t *wqh; | ||
| 161 | wait_queue_t wait; | ||
| 162 | struct work_struct remove; | ||
| 163 | }; | ||
| 164 | |||
| 165 | /* The list of hierarchy roots */ | 145 | /* The list of hierarchy roots */ |
| 166 | 146 | ||
| 167 | static LIST_HEAD(cgroup_roots); | 147 | static LIST_HEAD(cgroup_roots); |
| @@ -200,6 +180,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp); | |||
| 200 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], | 180 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], |
| 201 | bool is_add); | 181 | bool is_add); |
| 202 | static int cgroup_file_release(struct inode *inode, struct file *file); | 182 | static int cgroup_file_release(struct inode *inode, struct file *file); |
| 183 | static void cgroup_pidlist_destroy_all(struct cgroup *cgrp); | ||
| 203 | 184 | ||
| 204 | /** | 185 | /** |
| 205 | * cgroup_css - obtain a cgroup's css for the specified subsystem | 186 | * cgroup_css - obtain a cgroup's css for the specified subsystem |
| @@ -262,16 +243,32 @@ static int notify_on_release(const struct cgroup *cgrp) | |||
| 262 | } | 243 | } |
| 263 | 244 | ||
| 264 | /** | 245 | /** |
| 246 | * for_each_css - iterate all css's of a cgroup | ||
| 247 | * @css: the iteration cursor | ||
| 248 | * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end | ||
| 249 | * @cgrp: the target cgroup to iterate css's of | ||
| 250 | * | ||
| 251 | * Should be called under cgroup_mutex. | ||
| 252 | */ | ||
| 253 | #define for_each_css(css, ssid, cgrp) \ | ||
| 254 | for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ | ||
| 255 | if (!((css) = rcu_dereference_check( \ | ||
| 256 | (cgrp)->subsys[(ssid)], \ | ||
| 257 | lockdep_is_held(&cgroup_mutex)))) { } \ | ||
| 258 | else | ||
| 259 | |||
| 260 | /** | ||
| 265 | * for_each_subsys - iterate all loaded cgroup subsystems | 261 | * for_each_subsys - iterate all loaded cgroup subsystems |
| 266 | * @ss: the iteration cursor | 262 | * @ss: the iteration cursor |
| 267 | * @i: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end | 263 | * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end |
| 268 | * | 264 | * |
| 269 | * Should be called under cgroup_mutex. | 265 | * Iterates through all loaded subsystems. Should be called under |
| 266 | * cgroup_mutex or cgroup_root_mutex. | ||
| 270 | */ | 267 | */ |
| 271 | #define for_each_subsys(ss, i) \ | 268 | #define for_each_subsys(ss, ssid) \ |
| 272 | for ((i) = 0; (i) < CGROUP_SUBSYS_COUNT; (i)++) \ | 269 | for (({ cgroup_assert_mutex_or_root_locked(); (ssid) = 0; }); \ |
| 273 | if (({ lockdep_assert_held(&cgroup_mutex); \ | 270 | (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ |
| 274 | !((ss) = cgroup_subsys[i]); })) { } \ | 271 | if (!((ss) = cgroup_subsys[(ssid)])) { } \ |
| 275 | else | 272 | else |
| 276 | 273 | ||
| 277 | /** | 274 | /** |
| @@ -286,10 +283,6 @@ static int notify_on_release(const struct cgroup *cgrp) | |||
| 286 | for ((i) = 0; (i) < CGROUP_BUILTIN_SUBSYS_COUNT && \ | 283 | for ((i) = 0; (i) < CGROUP_BUILTIN_SUBSYS_COUNT && \ |
| 287 | (((ss) = cgroup_subsys[i]) || true); (i)++) | 284 | (((ss) = cgroup_subsys[i]) || true); (i)++) |
| 288 | 285 | ||
| 289 | /* iterate each subsystem attached to a hierarchy */ | ||
| 290 | #define for_each_root_subsys(root, ss) \ | ||
| 291 | list_for_each_entry((ss), &(root)->subsys_list, sibling) | ||
| 292 | |||
| 293 | /* iterate across the active hierarchies */ | 286 | /* iterate across the active hierarchies */ |
| 294 | #define for_each_active_root(root) \ | 287 | #define for_each_active_root(root) \ |
| 295 | list_for_each_entry((root), &cgroup_roots, root_list) | 288 | list_for_each_entry((root), &cgroup_roots, root_list) |
| @@ -863,11 +856,7 @@ static void cgroup_free_fn(struct work_struct *work) | |||
| 863 | */ | 856 | */ |
| 864 | deactivate_super(cgrp->root->sb); | 857 | deactivate_super(cgrp->root->sb); |
| 865 | 858 | ||
| 866 | /* | 859 | cgroup_pidlist_destroy_all(cgrp); |
| 867 | * if we're getting rid of the cgroup, refcount should ensure | ||
| 868 | * that there are no pidlists left. | ||
| 869 | */ | ||
| 870 | BUG_ON(!list_empty(&cgrp->pidlists)); | ||
| 871 | 860 | ||
| 872 | simple_xattrs_free(&cgrp->xattrs); | 861 | simple_xattrs_free(&cgrp->xattrs); |
| 873 | 862 | ||
| @@ -1050,7 +1039,6 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 1050 | cgroup_css(cgroup_dummy_top, ss)); | 1039 | cgroup_css(cgroup_dummy_top, ss)); |
| 1051 | cgroup_css(cgrp, ss)->cgroup = cgrp; | 1040 | cgroup_css(cgrp, ss)->cgroup = cgrp; |
| 1052 | 1041 | ||
| 1053 | list_move(&ss->sibling, &root->subsys_list); | ||
| 1054 | ss->root = root; | 1042 | ss->root = root; |
| 1055 | if (ss->bind) | 1043 | if (ss->bind) |
| 1056 | ss->bind(cgroup_css(cgrp, ss)); | 1044 | ss->bind(cgroup_css(cgrp, ss)); |
| @@ -1069,7 +1057,6 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 1069 | RCU_INIT_POINTER(cgrp->subsys[i], NULL); | 1057 | RCU_INIT_POINTER(cgrp->subsys[i], NULL); |
| 1070 | 1058 | ||
| 1071 | cgroup_subsys[i]->root = &cgroup_dummy_root; | 1059 | cgroup_subsys[i]->root = &cgroup_dummy_root; |
| 1072 | list_move(&ss->sibling, &cgroup_dummy_root.subsys_list); | ||
| 1073 | 1060 | ||
| 1074 | /* subsystem is now free - drop reference on module */ | 1061 | /* subsystem is now free - drop reference on module */ |
| 1075 | module_put(ss->module); | 1062 | module_put(ss->module); |
| @@ -1096,10 +1083,12 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) | |||
| 1096 | { | 1083 | { |
| 1097 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; | 1084 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; |
| 1098 | struct cgroup_subsys *ss; | 1085 | struct cgroup_subsys *ss; |
| 1086 | int ssid; | ||
| 1099 | 1087 | ||
| 1100 | mutex_lock(&cgroup_root_mutex); | 1088 | mutex_lock(&cgroup_root_mutex); |
| 1101 | for_each_root_subsys(root, ss) | 1089 | for_each_subsys(ss, ssid) |
| 1102 | seq_printf(seq, ",%s", ss->name); | 1090 | if (root->subsys_mask & (1 << ssid)) |
| 1091 | seq_printf(seq, ",%s", ss->name); | ||
| 1103 | if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) | 1092 | if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) |
| 1104 | seq_puts(seq, ",sane_behavior"); | 1093 | seq_puts(seq, ",sane_behavior"); |
| 1105 | if (root->flags & CGRP_ROOT_NOPREFIX) | 1094 | if (root->flags & CGRP_ROOT_NOPREFIX) |
| @@ -1362,8 +1351,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
| 1362 | INIT_LIST_HEAD(&cgrp->pidlists); | 1351 | INIT_LIST_HEAD(&cgrp->pidlists); |
| 1363 | mutex_init(&cgrp->pidlist_mutex); | 1352 | mutex_init(&cgrp->pidlist_mutex); |
| 1364 | cgrp->dummy_css.cgroup = cgrp; | 1353 | cgrp->dummy_css.cgroup = cgrp; |
| 1365 | INIT_LIST_HEAD(&cgrp->event_list); | ||
| 1366 | spin_lock_init(&cgrp->event_list_lock); | ||
| 1367 | simple_xattrs_init(&cgrp->xattrs); | 1354 | simple_xattrs_init(&cgrp->xattrs); |
| 1368 | } | 1355 | } |
| 1369 | 1356 | ||
| @@ -1371,7 +1358,6 @@ static void init_cgroup_root(struct cgroupfs_root *root) | |||
| 1371 | { | 1358 | { |
| 1372 | struct cgroup *cgrp = &root->top_cgroup; | 1359 | struct cgroup *cgrp = &root->top_cgroup; |
| 1373 | 1360 | ||
| 1374 | INIT_LIST_HEAD(&root->subsys_list); | ||
| 1375 | INIT_LIST_HEAD(&root->root_list); | 1361 | INIT_LIST_HEAD(&root->root_list); |
| 1376 | root->number_of_cgroups = 1; | 1362 | root->number_of_cgroups = 1; |
| 1377 | cgrp->root = root; | 1363 | cgrp->root = root; |
| @@ -1693,7 +1679,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1693 | return ERR_PTR(ret); | 1679 | return ERR_PTR(ret); |
| 1694 | } | 1680 | } |
| 1695 | 1681 | ||
| 1696 | static void cgroup_kill_sb(struct super_block *sb) { | 1682 | static void cgroup_kill_sb(struct super_block *sb) |
| 1683 | { | ||
| 1697 | struct cgroupfs_root *root = sb->s_fs_info; | 1684 | struct cgroupfs_root *root = sb->s_fs_info; |
| 1698 | struct cgroup *cgrp = &root->top_cgroup; | 1685 | struct cgroup *cgrp = &root->top_cgroup; |
| 1699 | struct cgrp_cset_link *link, *tmp_link; | 1686 | struct cgrp_cset_link *link, *tmp_link; |
| @@ -1976,8 +1963,8 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
| 1976 | bool threadgroup) | 1963 | bool threadgroup) |
| 1977 | { | 1964 | { |
| 1978 | int retval, i, group_size; | 1965 | int retval, i, group_size; |
| 1979 | struct cgroup_subsys *ss, *failed_ss = NULL; | ||
| 1980 | struct cgroupfs_root *root = cgrp->root; | 1966 | struct cgroupfs_root *root = cgrp->root; |
| 1967 | struct cgroup_subsys_state *css, *failed_css = NULL; | ||
| 1981 | /* threadgroup list cursor and array */ | 1968 | /* threadgroup list cursor and array */ |
| 1982 | struct task_struct *leader = tsk; | 1969 | struct task_struct *leader = tsk; |
| 1983 | struct task_and_cgroup *tc; | 1970 | struct task_and_cgroup *tc; |
| @@ -2050,13 +2037,11 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
| 2050 | /* | 2037 | /* |
| 2051 | * step 1: check that we can legitimately attach to the cgroup. | 2038 | * step 1: check that we can legitimately attach to the cgroup. |
| 2052 | */ | 2039 | */ |
| 2053 | for_each_root_subsys(root, ss) { | 2040 | for_each_css(css, i, cgrp) { |
| 2054 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | 2041 | if (css->ss->can_attach) { |
| 2055 | 2042 | retval = css->ss->can_attach(css, &tset); | |
| 2056 | if (ss->can_attach) { | ||
| 2057 | retval = ss->can_attach(css, &tset); | ||
| 2058 | if (retval) { | 2043 | if (retval) { |
| 2059 | failed_ss = ss; | 2044 | failed_css = css; |
| 2060 | goto out_cancel_attach; | 2045 | goto out_cancel_attach; |
| 2061 | } | 2046 | } |
| 2062 | } | 2047 | } |
| @@ -2092,12 +2077,9 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
| 2092 | /* | 2077 | /* |
| 2093 | * step 4: do subsystem attach callbacks. | 2078 | * step 4: do subsystem attach callbacks. |
| 2094 | */ | 2079 | */ |
| 2095 | for_each_root_subsys(root, ss) { | 2080 | for_each_css(css, i, cgrp) |
| 2096 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | 2081 | if (css->ss->attach) |
| 2097 | 2082 | css->ss->attach(css, &tset); | |
| 2098 | if (ss->attach) | ||
| 2099 | ss->attach(css, &tset); | ||
| 2100 | } | ||
| 2101 | 2083 | ||
| 2102 | /* | 2084 | /* |
| 2103 | * step 5: success! and cleanup | 2085 | * step 5: success! and cleanup |
| @@ -2114,13 +2096,11 @@ out_put_css_set_refs: | |||
| 2114 | } | 2096 | } |
| 2115 | out_cancel_attach: | 2097 | out_cancel_attach: |
| 2116 | if (retval) { | 2098 | if (retval) { |
| 2117 | for_each_root_subsys(root, ss) { | 2099 | for_each_css(css, i, cgrp) { |
| 2118 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | 2100 | if (css == failed_css) |
| 2119 | |||
| 2120 | if (ss == failed_ss) | ||
| 2121 | break; | 2101 | break; |
| 2122 | if (ss->cancel_attach) | 2102 | if (css->ss->cancel_attach) |
| 2123 | ss->cancel_attach(css, &tset); | 2103 | css->ss->cancel_attach(css, &tset); |
| 2124 | } | 2104 | } |
| 2125 | } | 2105 | } |
| 2126 | out_free_group_list: | 2106 | out_free_group_list: |
| @@ -2148,7 +2128,7 @@ retry_find_task: | |||
| 2148 | tsk = find_task_by_vpid(pid); | 2128 | tsk = find_task_by_vpid(pid); |
| 2149 | if (!tsk) { | 2129 | if (!tsk) { |
| 2150 | rcu_read_unlock(); | 2130 | rcu_read_unlock(); |
| 2151 | ret= -ESRCH; | 2131 | ret = -ESRCH; |
| 2152 | goto out_unlock_cgroup; | 2132 | goto out_unlock_cgroup; |
| 2153 | } | 2133 | } |
| 2154 | /* | 2134 | /* |
| @@ -2260,10 +2240,9 @@ static int cgroup_release_agent_write(struct cgroup_subsys_state *css, | |||
| 2260 | return 0; | 2240 | return 0; |
| 2261 | } | 2241 | } |
| 2262 | 2242 | ||
| 2263 | static int cgroup_release_agent_show(struct cgroup_subsys_state *css, | 2243 | static int cgroup_release_agent_show(struct seq_file *seq, void *v) |
| 2264 | struct cftype *cft, struct seq_file *seq) | ||
| 2265 | { | 2244 | { |
| 2266 | struct cgroup *cgrp = css->cgroup; | 2245 | struct cgroup *cgrp = seq_css(seq)->cgroup; |
| 2267 | 2246 | ||
| 2268 | if (!cgroup_lock_live_group(cgrp)) | 2247 | if (!cgroup_lock_live_group(cgrp)) |
| 2269 | return -ENODEV; | 2248 | return -ENODEV; |
| @@ -2273,174 +2252,129 @@ static int cgroup_release_agent_show(struct cgroup_subsys_state *css, | |||
| 2273 | return 0; | 2252 | return 0; |
| 2274 | } | 2253 | } |
| 2275 | 2254 | ||
| 2276 | static int cgroup_sane_behavior_show(struct cgroup_subsys_state *css, | 2255 | static int cgroup_sane_behavior_show(struct seq_file *seq, void *v) |
| 2277 | struct cftype *cft, struct seq_file *seq) | ||
| 2278 | { | 2256 | { |
| 2279 | seq_printf(seq, "%d\n", cgroup_sane_behavior(css->cgroup)); | 2257 | struct cgroup *cgrp = seq_css(seq)->cgroup; |
| 2258 | |||
| 2259 | seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp)); | ||
| 2280 | return 0; | 2260 | return 0; |
| 2281 | } | 2261 | } |
| 2282 | 2262 | ||
| 2283 | /* A buffer size big enough for numbers or short strings */ | 2263 | /* A buffer size big enough for numbers or short strings */ |
| 2284 | #define CGROUP_LOCAL_BUFFER_SIZE 64 | 2264 | #define CGROUP_LOCAL_BUFFER_SIZE 64 |
| 2285 | 2265 | ||
| 2286 | static ssize_t cgroup_write_X64(struct cgroup_subsys_state *css, | 2266 | static ssize_t cgroup_file_write(struct file *file, const char __user *userbuf, |
| 2287 | struct cftype *cft, struct file *file, | 2267 | size_t nbytes, loff_t *ppos) |
| 2288 | const char __user *userbuf, size_t nbytes, | ||
| 2289 | loff_t *unused_ppos) | ||
| 2290 | { | 2268 | { |
| 2291 | char buffer[CGROUP_LOCAL_BUFFER_SIZE]; | 2269 | struct cfent *cfe = __d_cfe(file->f_dentry); |
| 2292 | int retval = 0; | 2270 | struct cftype *cft = __d_cft(file->f_dentry); |
| 2293 | char *end; | 2271 | struct cgroup_subsys_state *css = cfe->css; |
| 2272 | size_t max_bytes = cft->max_write_len ?: CGROUP_LOCAL_BUFFER_SIZE - 1; | ||
| 2273 | char *buf; | ||
| 2274 | int ret; | ||
| 2294 | 2275 | ||
| 2295 | if (!nbytes) | 2276 | if (nbytes >= max_bytes) |
| 2296 | return -EINVAL; | ||
| 2297 | if (nbytes >= sizeof(buffer)) | ||
| 2298 | return -E2BIG; | 2277 | return -E2BIG; |
| 2299 | if (copy_from_user(buffer, userbuf, nbytes)) | ||
| 2300 | return -EFAULT; | ||
| 2301 | 2278 | ||
| 2302 | buffer[nbytes] = 0; /* nul-terminate */ | 2279 | buf = kmalloc(nbytes + 1, GFP_KERNEL); |
| 2303 | if (cft->write_u64) { | 2280 | if (!buf) |
| 2304 | u64 val = simple_strtoull(strstrip(buffer), &end, 0); | 2281 | return -ENOMEM; |
| 2305 | if (*end) | 2282 | |
| 2306 | return -EINVAL; | 2283 | if (copy_from_user(buf, userbuf, nbytes)) { |
| 2307 | retval = cft->write_u64(css, cft, val); | 2284 | ret = -EFAULT; |
| 2285 | goto out_free; | ||
| 2286 | } | ||
| 2287 | |||
| 2288 | buf[nbytes] = '\0'; | ||
| 2289 | |||
| 2290 | if (cft->write_string) { | ||
| 2291 | ret = cft->write_string(css, cft, strstrip(buf)); | ||
| 2292 | } else if (cft->write_u64) { | ||
| 2293 | unsigned long long v; | ||
| 2294 | ret = kstrtoull(buf, 0, &v); | ||
| 2295 | if (!ret) | ||
| 2296 | ret = cft->write_u64(css, cft, v); | ||
| 2297 | } else if (cft->write_s64) { | ||
| 2298 | long long v; | ||
| 2299 | ret = kstrtoll(buf, 0, &v); | ||
| 2300 | if (!ret) | ||
| 2301 | ret = cft->write_s64(css, cft, v); | ||
| 2302 | } else if (cft->trigger) { | ||
| 2303 | ret = cft->trigger(css, (unsigned int)cft->private); | ||
| 2308 | } else { | 2304 | } else { |
| 2309 | s64 val = simple_strtoll(strstrip(buffer), &end, 0); | 2305 | ret = -EINVAL; |
| 2310 | if (*end) | ||
| 2311 | return -EINVAL; | ||
| 2312 | retval = cft->write_s64(css, cft, val); | ||
| 2313 | } | 2306 | } |
| 2314 | if (!retval) | 2307 | out_free: |
| 2315 | retval = nbytes; | 2308 | kfree(buf); |
| 2316 | return retval; | 2309 | return ret ?: nbytes; |
| 2317 | } | 2310 | } |
| 2318 | 2311 | ||
| 2319 | static ssize_t cgroup_write_string(struct cgroup_subsys_state *css, | 2312 | /* |
| 2320 | struct cftype *cft, struct file *file, | 2313 | * seqfile ops/methods for returning structured data. Currently just |
| 2321 | const char __user *userbuf, size_t nbytes, | 2314 | * supports string->u64 maps, but can be extended in future. |
| 2322 | loff_t *unused_ppos) | 2315 | */ |
| 2316 | |||
| 2317 | static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos) | ||
| 2323 | { | 2318 | { |
| 2324 | char local_buffer[CGROUP_LOCAL_BUFFER_SIZE]; | 2319 | struct cftype *cft = seq_cft(seq); |
| 2325 | int retval = 0; | ||
| 2326 | size_t max_bytes = cft->max_write_len; | ||
| 2327 | char *buffer = local_buffer; | ||
| 2328 | 2320 | ||
| 2329 | if (!max_bytes) | 2321 | if (cft->seq_start) { |
| 2330 | max_bytes = sizeof(local_buffer) - 1; | 2322 | return cft->seq_start(seq, ppos); |
| 2331 | if (nbytes >= max_bytes) | 2323 | } else { |
| 2332 | return -E2BIG; | 2324 | /* |
| 2333 | /* Allocate a dynamic buffer if we need one */ | 2325 | * The same behavior and code as single_open(). Returns |
| 2334 | if (nbytes >= sizeof(local_buffer)) { | 2326 | * !NULL if pos is at the beginning; otherwise, NULL. |
| 2335 | buffer = kmalloc(nbytes + 1, GFP_KERNEL); | 2327 | */ |
| 2336 | if (buffer == NULL) | 2328 | return NULL + !*ppos; |
| 2337 | return -ENOMEM; | ||
| 2338 | } | ||
| 2339 | if (nbytes && copy_from_user(buffer, userbuf, nbytes)) { | ||
| 2340 | retval = -EFAULT; | ||
| 2341 | goto out; | ||
| 2342 | } | 2329 | } |
| 2343 | |||
| 2344 | buffer[nbytes] = 0; /* nul-terminate */ | ||
| 2345 | retval = cft->write_string(css, cft, strstrip(buffer)); | ||
| 2346 | if (!retval) | ||
| 2347 | retval = nbytes; | ||
| 2348 | out: | ||
| 2349 | if (buffer != local_buffer) | ||
| 2350 | kfree(buffer); | ||
| 2351 | return retval; | ||
| 2352 | } | 2330 | } |
| 2353 | 2331 | ||
| 2354 | static ssize_t cgroup_file_write(struct file *file, const char __user *buf, | 2332 | static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos) |
| 2355 | size_t nbytes, loff_t *ppos) | ||
| 2356 | { | 2333 | { |
| 2357 | struct cfent *cfe = __d_cfe(file->f_dentry); | 2334 | struct cftype *cft = seq_cft(seq); |
| 2358 | struct cftype *cft = __d_cft(file->f_dentry); | ||
| 2359 | struct cgroup_subsys_state *css = cfe->css; | ||
| 2360 | 2335 | ||
| 2361 | if (cft->write) | 2336 | if (cft->seq_next) { |
| 2362 | return cft->write(css, cft, file, buf, nbytes, ppos); | 2337 | return cft->seq_next(seq, v, ppos); |
| 2363 | if (cft->write_u64 || cft->write_s64) | 2338 | } else { |
| 2364 | return cgroup_write_X64(css, cft, file, buf, nbytes, ppos); | 2339 | /* |
| 2365 | if (cft->write_string) | 2340 | * The same behavior and code as single_open(), always |
| 2366 | return cgroup_write_string(css, cft, file, buf, nbytes, ppos); | 2341 | * terminate after the initial read. |
| 2367 | if (cft->trigger) { | 2342 | */ |
| 2368 | int ret = cft->trigger(css, (unsigned int)cft->private); | 2343 | ++*ppos; |
| 2369 | return ret ? ret : nbytes; | 2344 | return NULL; |
| 2370 | } | 2345 | } |
| 2371 | return -EINVAL; | ||
| 2372 | } | 2346 | } |
| 2373 | 2347 | ||
| 2374 | static ssize_t cgroup_read_u64(struct cgroup_subsys_state *css, | 2348 | static void cgroup_seqfile_stop(struct seq_file *seq, void *v) |
| 2375 | struct cftype *cft, struct file *file, | ||
| 2376 | char __user *buf, size_t nbytes, loff_t *ppos) | ||
| 2377 | { | 2349 | { |
| 2378 | char tmp[CGROUP_LOCAL_BUFFER_SIZE]; | 2350 | struct cftype *cft = seq_cft(seq); |
| 2379 | u64 val = cft->read_u64(css, cft); | ||
| 2380 | int len = sprintf(tmp, "%llu\n", (unsigned long long) val); | ||
| 2381 | 2351 | ||
| 2382 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | 2352 | if (cft->seq_stop) |
| 2353 | cft->seq_stop(seq, v); | ||
| 2383 | } | 2354 | } |
| 2384 | 2355 | ||
| 2385 | static ssize_t cgroup_read_s64(struct cgroup_subsys_state *css, | 2356 | static int cgroup_seqfile_show(struct seq_file *m, void *arg) |
| 2386 | struct cftype *cft, struct file *file, | ||
| 2387 | char __user *buf, size_t nbytes, loff_t *ppos) | ||
| 2388 | { | 2357 | { |
| 2389 | char tmp[CGROUP_LOCAL_BUFFER_SIZE]; | 2358 | struct cftype *cft = seq_cft(m); |
| 2390 | s64 val = cft->read_s64(css, cft); | 2359 | struct cgroup_subsys_state *css = seq_css(m); |
| 2391 | int len = sprintf(tmp, "%lld\n", (long long) val); | ||
| 2392 | 2360 | ||
| 2393 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | 2361 | if (cft->seq_show) |
| 2394 | } | 2362 | return cft->seq_show(m, arg); |
| 2395 | 2363 | ||
| 2396 | static ssize_t cgroup_file_read(struct file *file, char __user *buf, | ||
| 2397 | size_t nbytes, loff_t *ppos) | ||
| 2398 | { | ||
| 2399 | struct cfent *cfe = __d_cfe(file->f_dentry); | ||
| 2400 | struct cftype *cft = __d_cft(file->f_dentry); | ||
| 2401 | struct cgroup_subsys_state *css = cfe->css; | ||
| 2402 | |||
| 2403 | if (cft->read) | ||
| 2404 | return cft->read(css, cft, file, buf, nbytes, ppos); | ||
| 2405 | if (cft->read_u64) | 2364 | if (cft->read_u64) |
| 2406 | return cgroup_read_u64(css, cft, file, buf, nbytes, ppos); | 2365 | seq_printf(m, "%llu\n", cft->read_u64(css, cft)); |
| 2407 | if (cft->read_s64) | 2366 | else if (cft->read_s64) |
| 2408 | return cgroup_read_s64(css, cft, file, buf, nbytes, ppos); | 2367 | seq_printf(m, "%lld\n", cft->read_s64(css, cft)); |
| 2409 | return -EINVAL; | 2368 | else |
| 2410 | } | 2369 | return -EINVAL; |
| 2411 | 2370 | return 0; | |
| 2412 | /* | ||
| 2413 | * seqfile ops/methods for returning structured data. Currently just | ||
| 2414 | * supports string->u64 maps, but can be extended in future. | ||
| 2415 | */ | ||
| 2416 | |||
| 2417 | static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) | ||
| 2418 | { | ||
| 2419 | struct seq_file *sf = cb->state; | ||
| 2420 | return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value); | ||
| 2421 | } | ||
| 2422 | |||
| 2423 | static int cgroup_seqfile_show(struct seq_file *m, void *arg) | ||
| 2424 | { | ||
| 2425 | struct cfent *cfe = m->private; | ||
| 2426 | struct cftype *cft = cfe->type; | ||
| 2427 | struct cgroup_subsys_state *css = cfe->css; | ||
| 2428 | |||
| 2429 | if (cft->read_map) { | ||
| 2430 | struct cgroup_map_cb cb = { | ||
| 2431 | .fill = cgroup_map_add, | ||
| 2432 | .state = m, | ||
| 2433 | }; | ||
| 2434 | return cft->read_map(css, cft, &cb); | ||
| 2435 | } | ||
| 2436 | return cft->read_seq_string(css, cft, m); | ||
| 2437 | } | 2371 | } |
| 2438 | 2372 | ||
| 2439 | static const struct file_operations cgroup_seqfile_operations = { | 2373 | static struct seq_operations cgroup_seq_operations = { |
| 2440 | .read = seq_read, | 2374 | .start = cgroup_seqfile_start, |
| 2441 | .write = cgroup_file_write, | 2375 | .next = cgroup_seqfile_next, |
| 2442 | .llseek = seq_lseek, | 2376 | .stop = cgroup_seqfile_stop, |
| 2443 | .release = cgroup_file_release, | 2377 | .show = cgroup_seqfile_show, |
| 2444 | }; | 2378 | }; |
| 2445 | 2379 | ||
| 2446 | static int cgroup_file_open(struct inode *inode, struct file *file) | 2380 | static int cgroup_file_open(struct inode *inode, struct file *file) |
| @@ -2449,6 +2383,7 @@ static int cgroup_file_open(struct inode *inode, struct file *file) | |||
| 2449 | struct cftype *cft = __d_cft(file->f_dentry); | 2383 | struct cftype *cft = __d_cft(file->f_dentry); |
| 2450 | struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent); | 2384 | struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent); |
| 2451 | struct cgroup_subsys_state *css; | 2385 | struct cgroup_subsys_state *css; |
| 2386 | struct cgroup_open_file *of; | ||
| 2452 | int err; | 2387 | int err; |
| 2453 | 2388 | ||
| 2454 | err = generic_file_open(inode, file); | 2389 | err = generic_file_open(inode, file); |
| @@ -2478,32 +2413,26 @@ static int cgroup_file_open(struct inode *inode, struct file *file) | |||
| 2478 | WARN_ON_ONCE(cfe->css && cfe->css != css); | 2413 | WARN_ON_ONCE(cfe->css && cfe->css != css); |
| 2479 | cfe->css = css; | 2414 | cfe->css = css; |
| 2480 | 2415 | ||
| 2481 | if (cft->read_map || cft->read_seq_string) { | 2416 | of = __seq_open_private(file, &cgroup_seq_operations, |
| 2482 | file->f_op = &cgroup_seqfile_operations; | 2417 | sizeof(struct cgroup_open_file)); |
| 2483 | err = single_open(file, cgroup_seqfile_show, cfe); | 2418 | if (of) { |
| 2484 | } else if (cft->open) { | 2419 | of->cfe = cfe; |
| 2485 | err = cft->open(inode, file); | 2420 | return 0; |
| 2486 | } | 2421 | } |
| 2487 | 2422 | ||
| 2488 | if (css->ss && err) | 2423 | if (css->ss) |
| 2489 | css_put(css); | 2424 | css_put(css); |
| 2490 | return err; | 2425 | return -ENOMEM; |
| 2491 | } | 2426 | } |
| 2492 | 2427 | ||
| 2493 | static int cgroup_file_release(struct inode *inode, struct file *file) | 2428 | static int cgroup_file_release(struct inode *inode, struct file *file) |
| 2494 | { | 2429 | { |
| 2495 | struct cfent *cfe = __d_cfe(file->f_dentry); | 2430 | struct cfent *cfe = __d_cfe(file->f_dentry); |
| 2496 | struct cftype *cft = __d_cft(file->f_dentry); | ||
| 2497 | struct cgroup_subsys_state *css = cfe->css; | 2431 | struct cgroup_subsys_state *css = cfe->css; |
| 2498 | int ret = 0; | ||
| 2499 | 2432 | ||
| 2500 | if (cft->release) | ||
| 2501 | ret = cft->release(inode, file); | ||
| 2502 | if (css->ss) | 2433 | if (css->ss) |
| 2503 | css_put(css); | 2434 | css_put(css); |
| 2504 | if (file->f_op == &cgroup_seqfile_operations) | 2435 | return seq_release_private(inode, file); |
| 2505 | single_release(inode, file); | ||
| 2506 | return ret; | ||
| 2507 | } | 2436 | } |
| 2508 | 2437 | ||
| 2509 | /* | 2438 | /* |
| @@ -2614,7 +2543,7 @@ static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size) | |||
| 2614 | } | 2543 | } |
| 2615 | 2544 | ||
| 2616 | static const struct file_operations cgroup_file_operations = { | 2545 | static const struct file_operations cgroup_file_operations = { |
| 2617 | .read = cgroup_file_read, | 2546 | .read = seq_read, |
| 2618 | .write = cgroup_file_write, | 2547 | .write = cgroup_file_write, |
| 2619 | .llseek = generic_file_llseek, | 2548 | .llseek = generic_file_llseek, |
| 2620 | .open = cgroup_file_open, | 2549 | .open = cgroup_file_open, |
| @@ -2639,16 +2568,6 @@ static const struct inode_operations cgroup_dir_inode_operations = { | |||
| 2639 | .removexattr = cgroup_removexattr, | 2568 | .removexattr = cgroup_removexattr, |
| 2640 | }; | 2569 | }; |
| 2641 | 2570 | ||
| 2642 | /* | ||
| 2643 | * Check if a file is a control file | ||
| 2644 | */ | ||
| 2645 | static inline struct cftype *__file_cft(struct file *file) | ||
| 2646 | { | ||
| 2647 | if (file_inode(file)->i_fop != &cgroup_file_operations) | ||
| 2648 | return ERR_PTR(-EINVAL); | ||
| 2649 | return __d_cft(file->f_dentry); | ||
| 2650 | } | ||
| 2651 | |||
| 2652 | static int cgroup_create_file(struct dentry *dentry, umode_t mode, | 2571 | static int cgroup_create_file(struct dentry *dentry, umode_t mode, |
| 2653 | struct super_block *sb) | 2572 | struct super_block *sb) |
| 2654 | { | 2573 | { |
| @@ -2706,12 +2625,11 @@ static umode_t cgroup_file_mode(const struct cftype *cft) | |||
| 2706 | if (cft->mode) | 2625 | if (cft->mode) |
| 2707 | return cft->mode; | 2626 | return cft->mode; |
| 2708 | 2627 | ||
| 2709 | if (cft->read || cft->read_u64 || cft->read_s64 || | 2628 | if (cft->read_u64 || cft->read_s64 || cft->seq_show) |
| 2710 | cft->read_map || cft->read_seq_string) | ||
| 2711 | mode |= S_IRUGO; | 2629 | mode |= S_IRUGO; |
| 2712 | 2630 | ||
| 2713 | if (cft->write || cft->write_u64 || cft->write_s64 || | 2631 | if (cft->write_u64 || cft->write_s64 || cft->write_string || |
| 2714 | cft->write_string || cft->trigger) | 2632 | cft->trigger) |
| 2715 | mode |= S_IWUSR; | 2633 | mode |= S_IWUSR; |
| 2716 | 2634 | ||
| 2717 | return mode; | 2635 | return mode; |
| @@ -3007,9 +2925,9 @@ static void cgroup_enable_task_cg_lists(void) | |||
| 3007 | * @parent_css: css whose children to walk | 2925 | * @parent_css: css whose children to walk |
| 3008 | * | 2926 | * |
| 3009 | * This function returns the next child of @parent_css and should be called | 2927 | * This function returns the next child of @parent_css and should be called |
| 3010 | * under RCU read lock. The only requirement is that @parent_css and | 2928 | * under either cgroup_mutex or RCU read lock. The only requirement is |
| 3011 | * @pos_css are accessible. The next sibling is guaranteed to be returned | 2929 | * that @parent_css and @pos_css are accessible. The next sibling is |
| 3012 | * regardless of their states. | 2930 | * guaranteed to be returned regardless of their states. |
| 3013 | */ | 2931 | */ |
| 3014 | struct cgroup_subsys_state * | 2932 | struct cgroup_subsys_state * |
| 3015 | css_next_child(struct cgroup_subsys_state *pos_css, | 2933 | css_next_child(struct cgroup_subsys_state *pos_css, |
| @@ -3019,7 +2937,7 @@ css_next_child(struct cgroup_subsys_state *pos_css, | |||
| 3019 | struct cgroup *cgrp = parent_css->cgroup; | 2937 | struct cgroup *cgrp = parent_css->cgroup; |
| 3020 | struct cgroup *next; | 2938 | struct cgroup *next; |
| 3021 | 2939 | ||
| 3022 | WARN_ON_ONCE(!rcu_read_lock_held()); | 2940 | cgroup_assert_mutex_or_rcu_locked(); |
| 3023 | 2941 | ||
| 3024 | /* | 2942 | /* |
| 3025 | * @pos could already have been removed. Once a cgroup is removed, | 2943 | * @pos could already have been removed. Once a cgroup is removed, |
| @@ -3066,10 +2984,10 @@ EXPORT_SYMBOL_GPL(css_next_child); | |||
| 3066 | * to visit for pre-order traversal of @root's descendants. @root is | 2984 | * to visit for pre-order traversal of @root's descendants. @root is |
| 3067 | * included in the iteration and the first node to be visited. | 2985 | * included in the iteration and the first node to be visited. |
| 3068 | * | 2986 | * |
| 3069 | * While this function requires RCU read locking, it doesn't require the | 2987 | * While this function requires cgroup_mutex or RCU read locking, it |
| 3070 | * whole traversal to be contained in a single RCU critical section. This | 2988 | * doesn't require the whole traversal to be contained in a single critical |
| 3071 | * function will return the correct next descendant as long as both @pos | 2989 | * section. This function will return the correct next descendant as long |
| 3072 | * and @root are accessible and @pos is a descendant of @root. | 2990 | * as both @pos and @root are accessible and @pos is a descendant of @root. |
| 3073 | */ | 2991 | */ |
| 3074 | struct cgroup_subsys_state * | 2992 | struct cgroup_subsys_state * |
| 3075 | css_next_descendant_pre(struct cgroup_subsys_state *pos, | 2993 | css_next_descendant_pre(struct cgroup_subsys_state *pos, |
| @@ -3077,7 +2995,7 @@ css_next_descendant_pre(struct cgroup_subsys_state *pos, | |||
| 3077 | { | 2995 | { |
| 3078 | struct cgroup_subsys_state *next; | 2996 | struct cgroup_subsys_state *next; |
| 3079 | 2997 | ||
| 3080 | WARN_ON_ONCE(!rcu_read_lock_held()); | 2998 | cgroup_assert_mutex_or_rcu_locked(); |
| 3081 | 2999 | ||
| 3082 | /* if first iteration, visit @root */ | 3000 | /* if first iteration, visit @root */ |
| 3083 | if (!pos) | 3001 | if (!pos) |
| @@ -3108,17 +3026,17 @@ EXPORT_SYMBOL_GPL(css_next_descendant_pre); | |||
| 3108 | * is returned. This can be used during pre-order traversal to skip | 3026 | * is returned. This can be used during pre-order traversal to skip |
| 3109 | * subtree of @pos. | 3027 | * subtree of @pos. |
| 3110 | * | 3028 | * |
| 3111 | * While this function requires RCU read locking, it doesn't require the | 3029 | * While this function requires cgroup_mutex or RCU read locking, it |
| 3112 | * whole traversal to be contained in a single RCU critical section. This | 3030 | * doesn't require the whole traversal to be contained in a single critical |
| 3113 | * function will return the correct rightmost descendant as long as @pos is | 3031 | * section. This function will return the correct rightmost descendant as |
| 3114 | * accessible. | 3032 | * long as @pos is accessible. |
| 3115 | */ | 3033 | */ |
| 3116 | struct cgroup_subsys_state * | 3034 | struct cgroup_subsys_state * |
| 3117 | css_rightmost_descendant(struct cgroup_subsys_state *pos) | 3035 | css_rightmost_descendant(struct cgroup_subsys_state *pos) |
| 3118 | { | 3036 | { |
| 3119 | struct cgroup_subsys_state *last, *tmp; | 3037 | struct cgroup_subsys_state *last, *tmp; |
| 3120 | 3038 | ||
| 3121 | WARN_ON_ONCE(!rcu_read_lock_held()); | 3039 | cgroup_assert_mutex_or_rcu_locked(); |
| 3122 | 3040 | ||
| 3123 | do { | 3041 | do { |
| 3124 | last = pos; | 3042 | last = pos; |
| @@ -3154,10 +3072,11 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos) | |||
| 3154 | * to visit for post-order traversal of @root's descendants. @root is | 3072 | * to visit for post-order traversal of @root's descendants. @root is |
| 3155 | * included in the iteration and the last node to be visited. | 3073 | * included in the iteration and the last node to be visited. |
| 3156 | * | 3074 | * |
| 3157 | * While this function requires RCU read locking, it doesn't require the | 3075 | * While this function requires cgroup_mutex or RCU read locking, it |
| 3158 | * whole traversal to be contained in a single RCU critical section. This | 3076 | * doesn't require the whole traversal to be contained in a single critical |
| 3159 | * function will return the correct next descendant as long as both @pos | 3077 | * section. This function will return the correct next descendant as long |
| 3160 | * and @cgroup are accessible and @pos is a descendant of @cgroup. | 3078 | * as both @pos and @cgroup are accessible and @pos is a descendant of |
| 3079 | * @cgroup. | ||
| 3161 | */ | 3080 | */ |
| 3162 | struct cgroup_subsys_state * | 3081 | struct cgroup_subsys_state * |
| 3163 | css_next_descendant_post(struct cgroup_subsys_state *pos, | 3082 | css_next_descendant_post(struct cgroup_subsys_state *pos, |
| @@ -3165,7 +3084,7 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, | |||
| 3165 | { | 3084 | { |
| 3166 | struct cgroup_subsys_state *next; | 3085 | struct cgroup_subsys_state *next; |
| 3167 | 3086 | ||
| 3168 | WARN_ON_ONCE(!rcu_read_lock_held()); | 3087 | cgroup_assert_mutex_or_rcu_locked(); |
| 3169 | 3088 | ||
| 3170 | /* if first iteration, visit leftmost descendant which may be @root */ | 3089 | /* if first iteration, visit leftmost descendant which may be @root */ |
| 3171 | if (!pos) | 3090 | if (!pos) |
| @@ -3504,14 +3423,12 @@ struct cgroup_pidlist { | |||
| 3504 | pid_t *list; | 3423 | pid_t *list; |
| 3505 | /* how many elements the above list has */ | 3424 | /* how many elements the above list has */ |
| 3506 | int length; | 3425 | int length; |
| 3507 | /* how many files are using the current array */ | ||
| 3508 | int use_count; | ||
| 3509 | /* each of these stored in a list by its cgroup */ | 3426 | /* each of these stored in a list by its cgroup */ |
| 3510 | struct list_head links; | 3427 | struct list_head links; |
| 3511 | /* pointer to the cgroup we belong to, for list removal purposes */ | 3428 | /* pointer to the cgroup we belong to, for list removal purposes */ |
| 3512 | struct cgroup *owner; | 3429 | struct cgroup *owner; |
| 3513 | /* protects the other fields */ | 3430 | /* for delayed destruction */ |
| 3514 | struct rw_semaphore rwsem; | 3431 | struct delayed_work destroy_dwork; |
| 3515 | }; | 3432 | }; |
| 3516 | 3433 | ||
| 3517 | /* | 3434 | /* |
| @@ -3527,6 +3444,7 @@ static void *pidlist_allocate(int count) | |||
| 3527 | else | 3444 | else |
| 3528 | return kmalloc(count * sizeof(pid_t), GFP_KERNEL); | 3445 | return kmalloc(count * sizeof(pid_t), GFP_KERNEL); |
| 3529 | } | 3446 | } |
| 3447 | |||
| 3530 | static void pidlist_free(void *p) | 3448 | static void pidlist_free(void *p) |
| 3531 | { | 3449 | { |
| 3532 | if (is_vmalloc_addr(p)) | 3450 | if (is_vmalloc_addr(p)) |
| @@ -3536,6 +3454,47 @@ static void pidlist_free(void *p) | |||
| 3536 | } | 3454 | } |
| 3537 | 3455 | ||
| 3538 | /* | 3456 | /* |
| 3457 | * Used to destroy all pidlists lingering waiting for destroy timer. None | ||
| 3458 | * should be left afterwards. | ||
| 3459 | */ | ||
| 3460 | static void cgroup_pidlist_destroy_all(struct cgroup *cgrp) | ||
| 3461 | { | ||
| 3462 | struct cgroup_pidlist *l, *tmp_l; | ||
| 3463 | |||
| 3464 | mutex_lock(&cgrp->pidlist_mutex); | ||
| 3465 | list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links) | ||
| 3466 | mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0); | ||
| 3467 | mutex_unlock(&cgrp->pidlist_mutex); | ||
| 3468 | |||
| 3469 | flush_workqueue(cgroup_pidlist_destroy_wq); | ||
| 3470 | BUG_ON(!list_empty(&cgrp->pidlists)); | ||
| 3471 | } | ||
| 3472 | |||
| 3473 | static void cgroup_pidlist_destroy_work_fn(struct work_struct *work) | ||
| 3474 | { | ||
| 3475 | struct delayed_work *dwork = to_delayed_work(work); | ||
| 3476 | struct cgroup_pidlist *l = container_of(dwork, struct cgroup_pidlist, | ||
| 3477 | destroy_dwork); | ||
| 3478 | struct cgroup_pidlist *tofree = NULL; | ||
| 3479 | |||
| 3480 | mutex_lock(&l->owner->pidlist_mutex); | ||
| 3481 | |||
| 3482 | /* | ||
| 3483 | * Destroy iff we didn't get queued again. The state won't change | ||
| 3484 | * as destroy_dwork can only be queued while locked. | ||
| 3485 | */ | ||
| 3486 | if (!delayed_work_pending(dwork)) { | ||
| 3487 | list_del(&l->links); | ||
| 3488 | pidlist_free(l->list); | ||
| 3489 | put_pid_ns(l->key.ns); | ||
| 3490 | tofree = l; | ||
| 3491 | } | ||
| 3492 | |||
| 3493 | mutex_unlock(&l->owner->pidlist_mutex); | ||
| 3494 | kfree(tofree); | ||
| 3495 | } | ||
| 3496 | |||
| 3497 | /* | ||
| 3539 | * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries | 3498 | * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries |
| 3540 | * Returns the number of unique elements. | 3499 | * Returns the number of unique elements. |
| 3541 | */ | 3500 | */ |
| @@ -3565,52 +3524,92 @@ after: | |||
| 3565 | return dest; | 3524 | return dest; |
| 3566 | } | 3525 | } |
| 3567 | 3526 | ||
| 3527 | /* | ||
| 3528 | * The two pid files - task and cgroup.procs - guaranteed that the result | ||
| 3529 | * is sorted, which forced this whole pidlist fiasco. As pid order is | ||
| 3530 | * different per namespace, each namespace needs differently sorted list, | ||
| 3531 | * making it impossible to use, for example, single rbtree of member tasks | ||
| 3532 | * sorted by task pointer. As pidlists can be fairly large, allocating one | ||
| 3533 | * per open file is dangerous, so cgroup had to implement shared pool of | ||
| 3534 | * pidlists keyed by cgroup and namespace. | ||
| 3535 | * | ||
| 3536 | * All this extra complexity was caused by the original implementation | ||
| 3537 | * committing to an entirely unnecessary property. In the long term, we | ||
| 3538 | * want to do away with it. Explicitly scramble sort order if | ||
| 3539 | * sane_behavior so that no such expectation exists in the new interface. | ||
| 3540 | * | ||
| 3541 | * Scrambling is done by swapping every two consecutive bits, which is | ||
| 3542 | * non-identity one-to-one mapping which disturbs sort order sufficiently. | ||
| 3543 | */ | ||
| 3544 | static pid_t pid_fry(pid_t pid) | ||
| 3545 | { | ||
| 3546 | unsigned a = pid & 0x55555555; | ||
| 3547 | unsigned b = pid & 0xAAAAAAAA; | ||
| 3548 | |||
| 3549 | return (a << 1) | (b >> 1); | ||
| 3550 | } | ||
| 3551 | |||
| 3552 | static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid) | ||
| 3553 | { | ||
| 3554 | if (cgroup_sane_behavior(cgrp)) | ||
| 3555 | return pid_fry(pid); | ||
| 3556 | else | ||
| 3557 | return pid; | ||
| 3558 | } | ||
| 3559 | |||
| 3568 | static int cmppid(const void *a, const void *b) | 3560 | static int cmppid(const void *a, const void *b) |
| 3569 | { | 3561 | { |
| 3570 | return *(pid_t *)a - *(pid_t *)b; | 3562 | return *(pid_t *)a - *(pid_t *)b; |
| 3571 | } | 3563 | } |
| 3572 | 3564 | ||
| 3565 | static int fried_cmppid(const void *a, const void *b) | ||
| 3566 | { | ||
| 3567 | return pid_fry(*(pid_t *)a) - pid_fry(*(pid_t *)b); | ||
| 3568 | } | ||
| 3569 | |||
| 3570 | static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | ||
| 3571 | enum cgroup_filetype type) | ||
| 3572 | { | ||
| 3573 | struct cgroup_pidlist *l; | ||
| 3574 | /* don't need task_nsproxy() if we're looking at ourself */ | ||
| 3575 | struct pid_namespace *ns = task_active_pid_ns(current); | ||
| 3576 | |||
| 3577 | lockdep_assert_held(&cgrp->pidlist_mutex); | ||
| 3578 | |||
| 3579 | list_for_each_entry(l, &cgrp->pidlists, links) | ||
| 3580 | if (l->key.type == type && l->key.ns == ns) | ||
| 3581 | return l; | ||
| 3582 | return NULL; | ||
| 3583 | } | ||
| 3584 | |||
| 3573 | /* | 3585 | /* |
| 3574 | * find the appropriate pidlist for our purpose (given procs vs tasks) | 3586 | * find the appropriate pidlist for our purpose (given procs vs tasks) |
| 3575 | * returns with the lock on that pidlist already held, and takes care | 3587 | * returns with the lock on that pidlist already held, and takes care |
| 3576 | * of the use count, or returns NULL with no locks held if we're out of | 3588 | * of the use count, or returns NULL with no locks held if we're out of |
| 3577 | * memory. | 3589 | * memory. |
| 3578 | */ | 3590 | */ |
| 3579 | static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | 3591 | static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp, |
| 3580 | enum cgroup_filetype type) | 3592 | enum cgroup_filetype type) |
| 3581 | { | 3593 | { |
| 3582 | struct cgroup_pidlist *l; | 3594 | struct cgroup_pidlist *l; |
| 3583 | /* don't need task_nsproxy() if we're looking at ourself */ | ||
| 3584 | struct pid_namespace *ns = task_active_pid_ns(current); | ||
| 3585 | 3595 | ||
| 3586 | /* | 3596 | lockdep_assert_held(&cgrp->pidlist_mutex); |
| 3587 | * We can't drop the pidlist_mutex before taking the l->rwsem in case | 3597 | |
| 3588 | * the last ref-holder is trying to remove l from the list at the same | 3598 | l = cgroup_pidlist_find(cgrp, type); |
| 3589 | * time. Holding the pidlist_mutex precludes somebody taking whichever | 3599 | if (l) |
| 3590 | * list we find out from under us - compare release_pid_array(). | 3600 | return l; |
| 3591 | */ | 3601 | |
| 3592 | mutex_lock(&cgrp->pidlist_mutex); | ||
| 3593 | list_for_each_entry(l, &cgrp->pidlists, links) { | ||
| 3594 | if (l->key.type == type && l->key.ns == ns) { | ||
| 3595 | /* make sure l doesn't vanish out from under us */ | ||
| 3596 | down_write(&l->rwsem); | ||
| 3597 | mutex_unlock(&cgrp->pidlist_mutex); | ||
| 3598 | return l; | ||
| 3599 | } | ||
| 3600 | } | ||
| 3601 | /* entry not found; create a new one */ | 3602 | /* entry not found; create a new one */ |
| 3602 | l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); | 3603 | l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); |
| 3603 | if (!l) { | 3604 | if (!l) |
| 3604 | mutex_unlock(&cgrp->pidlist_mutex); | ||
| 3605 | return l; | 3605 | return l; |
| 3606 | } | 3606 | |
| 3607 | init_rwsem(&l->rwsem); | 3607 | INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn); |
| 3608 | down_write(&l->rwsem); | ||
| 3609 | l->key.type = type; | 3608 | l->key.type = type; |
| 3610 | l->key.ns = get_pid_ns(ns); | 3609 | /* don't need task_nsproxy() if we're looking at ourself */ |
| 3610 | l->key.ns = get_pid_ns(task_active_pid_ns(current)); | ||
| 3611 | l->owner = cgrp; | 3611 | l->owner = cgrp; |
| 3612 | list_add(&l->links, &cgrp->pidlists); | 3612 | list_add(&l->links, &cgrp->pidlists); |
| 3613 | mutex_unlock(&cgrp->pidlist_mutex); | ||
| 3614 | return l; | 3613 | return l; |
| 3615 | } | 3614 | } |
| 3616 | 3615 | ||
| @@ -3627,6 +3626,8 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
| 3627 | struct task_struct *tsk; | 3626 | struct task_struct *tsk; |
| 3628 | struct cgroup_pidlist *l; | 3627 | struct cgroup_pidlist *l; |
| 3629 | 3628 | ||
| 3629 | lockdep_assert_held(&cgrp->pidlist_mutex); | ||
| 3630 | |||
| 3630 | /* | 3631 | /* |
| 3631 | * If cgroup gets more users after we read count, we won't have | 3632 | * If cgroup gets more users after we read count, we won't have |
| 3632 | * enough space - tough. This race is indistinguishable to the | 3633 | * enough space - tough. This race is indistinguishable to the |
| @@ -3653,20 +3654,24 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
| 3653 | css_task_iter_end(&it); | 3654 | css_task_iter_end(&it); |
| 3654 | length = n; | 3655 | length = n; |
| 3655 | /* now sort & (if procs) strip out duplicates */ | 3656 | /* now sort & (if procs) strip out duplicates */ |
| 3656 | sort(array, length, sizeof(pid_t), cmppid, NULL); | 3657 | if (cgroup_sane_behavior(cgrp)) |
| 3658 | sort(array, length, sizeof(pid_t), fried_cmppid, NULL); | ||
| 3659 | else | ||
| 3660 | sort(array, length, sizeof(pid_t), cmppid, NULL); | ||
| 3657 | if (type == CGROUP_FILE_PROCS) | 3661 | if (type == CGROUP_FILE_PROCS) |
| 3658 | length = pidlist_uniq(array, length); | 3662 | length = pidlist_uniq(array, length); |
| 3659 | l = cgroup_pidlist_find(cgrp, type); | 3663 | |
| 3664 | l = cgroup_pidlist_find_create(cgrp, type); | ||
| 3660 | if (!l) { | 3665 | if (!l) { |
| 3666 | mutex_unlock(&cgrp->pidlist_mutex); | ||
| 3661 | pidlist_free(array); | 3667 | pidlist_free(array); |
| 3662 | return -ENOMEM; | 3668 | return -ENOMEM; |
| 3663 | } | 3669 | } |
| 3664 | /* store array, freeing old if necessary - lock already held */ | 3670 | |
| 3671 | /* store array, freeing old if necessary */ | ||
| 3665 | pidlist_free(l->list); | 3672 | pidlist_free(l->list); |
| 3666 | l->list = array; | 3673 | l->list = array; |
| 3667 | l->length = length; | 3674 | l->length = length; |
| 3668 | l->use_count++; | ||
| 3669 | up_write(&l->rwsem); | ||
| 3670 | *lp = l; | 3675 | *lp = l; |
| 3671 | return 0; | 3676 | return 0; |
| 3672 | } | 3677 | } |
| @@ -3740,20 +3745,45 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) | |||
| 3740 | * after a seek to the start). Use a binary-search to find the | 3745 | * after a seek to the start). Use a binary-search to find the |
| 3741 | * next pid to display, if any | 3746 | * next pid to display, if any |
| 3742 | */ | 3747 | */ |
| 3743 | struct cgroup_pidlist *l = s->private; | 3748 | struct cgroup_open_file *of = s->private; |
| 3749 | struct cgroup *cgrp = seq_css(s)->cgroup; | ||
| 3750 | struct cgroup_pidlist *l; | ||
| 3751 | enum cgroup_filetype type = seq_cft(s)->private; | ||
| 3744 | int index = 0, pid = *pos; | 3752 | int index = 0, pid = *pos; |
| 3745 | int *iter; | 3753 | int *iter, ret; |
| 3754 | |||
| 3755 | mutex_lock(&cgrp->pidlist_mutex); | ||
| 3756 | |||
| 3757 | /* | ||
| 3758 | * !NULL @of->priv indicates that this isn't the first start() | ||
| 3759 | * after open. If the matching pidlist is around, we can use that. | ||
| 3760 | * Look for it. Note that @of->priv can't be used directly. It | ||
| 3761 | * could already have been destroyed. | ||
| 3762 | */ | ||
| 3763 | if (of->priv) | ||
| 3764 | of->priv = cgroup_pidlist_find(cgrp, type); | ||
| 3765 | |||
| 3766 | /* | ||
| 3767 | * Either this is the first start() after open or the matching | ||
| 3768 | * pidlist has been destroyed inbetween. Create a new one. | ||
| 3769 | */ | ||
| 3770 | if (!of->priv) { | ||
| 3771 | ret = pidlist_array_load(cgrp, type, | ||
| 3772 | (struct cgroup_pidlist **)&of->priv); | ||
| 3773 | if (ret) | ||
| 3774 | return ERR_PTR(ret); | ||
| 3775 | } | ||
| 3776 | l = of->priv; | ||
| 3746 | 3777 | ||
| 3747 | down_read(&l->rwsem); | ||
| 3748 | if (pid) { | 3778 | if (pid) { |
| 3749 | int end = l->length; | 3779 | int end = l->length; |
| 3750 | 3780 | ||
| 3751 | while (index < end) { | 3781 | while (index < end) { |
| 3752 | int mid = (index + end) / 2; | 3782 | int mid = (index + end) / 2; |
| 3753 | if (l->list[mid] == pid) { | 3783 | if (cgroup_pid_fry(cgrp, l->list[mid]) == pid) { |
| 3754 | index = mid; | 3784 | index = mid; |
| 3755 | break; | 3785 | break; |
| 3756 | } else if (l->list[mid] <= pid) | 3786 | } else if (cgroup_pid_fry(cgrp, l->list[mid]) <= pid) |
| 3757 | index = mid + 1; | 3787 | index = mid + 1; |
| 3758 | else | 3788 | else |
| 3759 | end = mid; | 3789 | end = mid; |
| @@ -3764,19 +3794,25 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) | |||
| 3764 | return NULL; | 3794 | return NULL; |
| 3765 | /* Update the abstract position to be the actual pid that we found */ | 3795 | /* Update the abstract position to be the actual pid that we found */ |
| 3766 | iter = l->list + index; | 3796 | iter = l->list + index; |
| 3767 | *pos = *iter; | 3797 | *pos = cgroup_pid_fry(cgrp, *iter); |
| 3768 | return iter; | 3798 | return iter; |
| 3769 | } | 3799 | } |
| 3770 | 3800 | ||
| 3771 | static void cgroup_pidlist_stop(struct seq_file *s, void *v) | 3801 | static void cgroup_pidlist_stop(struct seq_file *s, void *v) |
| 3772 | { | 3802 | { |
| 3773 | struct cgroup_pidlist *l = s->private; | 3803 | struct cgroup_open_file *of = s->private; |
| 3774 | up_read(&l->rwsem); | 3804 | struct cgroup_pidlist *l = of->priv; |
| 3805 | |||
| 3806 | if (l) | ||
| 3807 | mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, | ||
| 3808 | CGROUP_PIDLIST_DESTROY_DELAY); | ||
| 3809 | mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex); | ||
| 3775 | } | 3810 | } |
| 3776 | 3811 | ||
| 3777 | static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) | 3812 | static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) |
| 3778 | { | 3813 | { |
| 3779 | struct cgroup_pidlist *l = s->private; | 3814 | struct cgroup_open_file *of = s->private; |
| 3815 | struct cgroup_pidlist *l = of->priv; | ||
| 3780 | pid_t *p = v; | 3816 | pid_t *p = v; |
| 3781 | pid_t *end = l->list + l->length; | 3817 | pid_t *end = l->list + l->length; |
| 3782 | /* | 3818 | /* |
| @@ -3787,7 +3823,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) | |||
| 3787 | if (p >= end) { | 3823 | if (p >= end) { |
| 3788 | return NULL; | 3824 | return NULL; |
| 3789 | } else { | 3825 | } else { |
| 3790 | *pos = *p; | 3826 | *pos = cgroup_pid_fry(seq_css(s)->cgroup, *p); |
| 3791 | return p; | 3827 | return p; |
| 3792 | } | 3828 | } |
| 3793 | } | 3829 | } |
| @@ -3808,92 +3844,6 @@ static const struct seq_operations cgroup_pidlist_seq_operations = { | |||
| 3808 | .show = cgroup_pidlist_show, | 3844 | .show = cgroup_pidlist_show, |
| 3809 | }; | 3845 | }; |
| 3810 | 3846 | ||
| 3811 | static void cgroup_release_pid_array(struct cgroup_pidlist *l) | ||
| 3812 | { | ||
| 3813 | /* | ||
| 3814 | * the case where we're the last user of this particular pidlist will | ||
| 3815 | * have us remove it from the cgroup's list, which entails taking the | ||
| 3816 | * mutex. since in pidlist_find the pidlist->lock depends on cgroup-> | ||
| 3817 | * pidlist_mutex, we have to take pidlist_mutex first. | ||
| 3818 | */ | ||
| 3819 | mutex_lock(&l->owner->pidlist_mutex); | ||
| 3820 | down_write(&l->rwsem); | ||
| 3821 | BUG_ON(!l->use_count); | ||
| 3822 | if (!--l->use_count) { | ||
| 3823 | /* we're the last user if refcount is 0; remove and free */ | ||
| 3824 | list_del(&l->links); | ||
| 3825 | mutex_unlock(&l->owner->pidlist_mutex); | ||
| 3826 | pidlist_free(l->list); | ||
| 3827 | put_pid_ns(l->key.ns); | ||
| 3828 | up_write(&l->rwsem); | ||
| 3829 | kfree(l); | ||
| 3830 | return; | ||
| 3831 | } | ||
| 3832 | mutex_unlock(&l->owner->pidlist_mutex); | ||
| 3833 | up_write(&l->rwsem); | ||
| 3834 | } | ||
| 3835 | |||
| 3836 | static int cgroup_pidlist_release(struct inode *inode, struct file *file) | ||
| 3837 | { | ||
| 3838 | struct cgroup_pidlist *l; | ||
| 3839 | if (!(file->f_mode & FMODE_READ)) | ||
| 3840 | return 0; | ||
| 3841 | /* | ||
| 3842 | * the seq_file will only be initialized if the file was opened for | ||
| 3843 | * reading; hence we check if it's not null only in that case. | ||
| 3844 | */ | ||
| 3845 | l = ((struct seq_file *)file->private_data)->private; | ||
| 3846 | cgroup_release_pid_array(l); | ||
| 3847 | return seq_release(inode, file); | ||
| 3848 | } | ||
| 3849 | |||
| 3850 | static const struct file_operations cgroup_pidlist_operations = { | ||
| 3851 | .read = seq_read, | ||
| 3852 | .llseek = seq_lseek, | ||
| 3853 | .write = cgroup_file_write, | ||
| 3854 | .release = cgroup_pidlist_release, | ||
| 3855 | }; | ||
| 3856 | |||
| 3857 | /* | ||
| 3858 | * The following functions handle opens on a file that displays a pidlist | ||
| 3859 | * (tasks or procs). Prepare an array of the process/thread IDs of whoever's | ||
| 3860 | * in the cgroup. | ||
| 3861 | */ | ||
| 3862 | /* helper function for the two below it */ | ||
| 3863 | static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type) | ||
| 3864 | { | ||
| 3865 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | ||
| 3866 | struct cgroup_pidlist *l; | ||
| 3867 | int retval; | ||
| 3868 | |||
| 3869 | /* Nothing to do for write-only files */ | ||
| 3870 | if (!(file->f_mode & FMODE_READ)) | ||
| 3871 | return 0; | ||
| 3872 | |||
| 3873 | /* have the array populated */ | ||
| 3874 | retval = pidlist_array_load(cgrp, type, &l); | ||
| 3875 | if (retval) | ||
| 3876 | return retval; | ||
| 3877 | /* configure file information */ | ||
| 3878 | file->f_op = &cgroup_pidlist_operations; | ||
| 3879 | |||
| 3880 | retval = seq_open(file, &cgroup_pidlist_seq_operations); | ||
| 3881 | if (retval) { | ||
| 3882 | cgroup_release_pid_array(l); | ||
| 3883 | return retval; | ||
| 3884 | } | ||
| 3885 | ((struct seq_file *)file->private_data)->private = l; | ||
| 3886 | return 0; | ||
| 3887 | } | ||
| 3888 | static int cgroup_tasks_open(struct inode *unused, struct file *file) | ||
| 3889 | { | ||
| 3890 | return cgroup_pidlist_open(file, CGROUP_FILE_TASKS); | ||
| 3891 | } | ||
| 3892 | static int cgroup_procs_open(struct inode *unused, struct file *file) | ||
| 3893 | { | ||
| 3894 | return cgroup_pidlist_open(file, CGROUP_FILE_PROCS); | ||
| 3895 | } | ||
| 3896 | |||
| 3897 | static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css, | 3847 | static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css, |
| 3898 | struct cftype *cft) | 3848 | struct cftype *cft) |
| 3899 | { | 3849 | { |
| @@ -3928,202 +3878,6 @@ static void cgroup_dput(struct cgroup *cgrp) | |||
| 3928 | deactivate_super(sb); | 3878 | deactivate_super(sb); |
| 3929 | } | 3879 | } |
| 3930 | 3880 | ||
| 3931 | /* | ||
| 3932 | * Unregister event and free resources. | ||
| 3933 | * | ||
| 3934 | * Gets called from workqueue. | ||
| 3935 | */ | ||
| 3936 | static void cgroup_event_remove(struct work_struct *work) | ||
| 3937 | { | ||
| 3938 | struct cgroup_event *event = container_of(work, struct cgroup_event, | ||
| 3939 | remove); | ||
| 3940 | struct cgroup_subsys_state *css = event->css; | ||
| 3941 | |||
| 3942 | remove_wait_queue(event->wqh, &event->wait); | ||
| 3943 | |||
| 3944 | event->cft->unregister_event(css, event->cft, event->eventfd); | ||
| 3945 | |||
| 3946 | /* Notify userspace the event is going away. */ | ||
| 3947 | eventfd_signal(event->eventfd, 1); | ||
| 3948 | |||
| 3949 | eventfd_ctx_put(event->eventfd); | ||
| 3950 | kfree(event); | ||
| 3951 | css_put(css); | ||
| 3952 | } | ||
| 3953 | |||
| 3954 | /* | ||
| 3955 | * Gets called on POLLHUP on eventfd when user closes it. | ||
| 3956 | * | ||
| 3957 | * Called with wqh->lock held and interrupts disabled. | ||
| 3958 | */ | ||
| 3959 | static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, | ||
| 3960 | int sync, void *key) | ||
| 3961 | { | ||
| 3962 | struct cgroup_event *event = container_of(wait, | ||
| 3963 | struct cgroup_event, wait); | ||
| 3964 | struct cgroup *cgrp = event->css->cgroup; | ||
| 3965 | unsigned long flags = (unsigned long)key; | ||
| 3966 | |||
| 3967 | if (flags & POLLHUP) { | ||
| 3968 | /* | ||
| 3969 | * If the event has been detached at cgroup removal, we | ||
| 3970 | * can simply return knowing the other side will cleanup | ||
| 3971 | * for us. | ||
| 3972 | * | ||
| 3973 | * We can't race against event freeing since the other | ||
| 3974 | * side will require wqh->lock via remove_wait_queue(), | ||
| 3975 | * which we hold. | ||
| 3976 | */ | ||
| 3977 | spin_lock(&cgrp->event_list_lock); | ||
| 3978 | if (!list_empty(&event->list)) { | ||
| 3979 | list_del_init(&event->list); | ||
| 3980 | /* | ||
| 3981 | * We are in atomic context, but cgroup_event_remove() | ||
| 3982 | * may sleep, so we have to call it in workqueue. | ||
| 3983 | */ | ||
| 3984 | schedule_work(&event->remove); | ||
| 3985 | } | ||
| 3986 | spin_unlock(&cgrp->event_list_lock); | ||
| 3987 | } | ||
| 3988 | |||
| 3989 | return 0; | ||
| 3990 | } | ||
| 3991 | |||
| 3992 | static void cgroup_event_ptable_queue_proc(struct file *file, | ||
| 3993 | wait_queue_head_t *wqh, poll_table *pt) | ||
| 3994 | { | ||
| 3995 | struct cgroup_event *event = container_of(pt, | ||
| 3996 | struct cgroup_event, pt); | ||
| 3997 | |||
| 3998 | event->wqh = wqh; | ||
| 3999 | add_wait_queue(wqh, &event->wait); | ||
| 4000 | } | ||
| 4001 | |||
| 4002 | /* | ||
| 4003 | * Parse input and register new cgroup event handler. | ||
| 4004 | * | ||
| 4005 | * Input must be in format '<event_fd> <control_fd> <args>'. | ||
| 4006 | * Interpretation of args is defined by control file implementation. | ||
| 4007 | */ | ||
| 4008 | static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, | ||
| 4009 | struct cftype *cft, const char *buffer) | ||
| 4010 | { | ||
| 4011 | struct cgroup *cgrp = dummy_css->cgroup; | ||
| 4012 | struct cgroup_event *event; | ||
| 4013 | struct cgroup_subsys_state *cfile_css; | ||
| 4014 | unsigned int efd, cfd; | ||
| 4015 | struct fd efile; | ||
| 4016 | struct fd cfile; | ||
| 4017 | char *endp; | ||
| 4018 | int ret; | ||
| 4019 | |||
| 4020 | efd = simple_strtoul(buffer, &endp, 10); | ||
| 4021 | if (*endp != ' ') | ||
| 4022 | return -EINVAL; | ||
| 4023 | buffer = endp + 1; | ||
| 4024 | |||
| 4025 | cfd = simple_strtoul(buffer, &endp, 10); | ||
| 4026 | if ((*endp != ' ') && (*endp != '\0')) | ||
| 4027 | return -EINVAL; | ||
| 4028 | buffer = endp + 1; | ||
| 4029 | |||
| 4030 | event = kzalloc(sizeof(*event), GFP_KERNEL); | ||
| 4031 | if (!event) | ||
| 4032 | return -ENOMEM; | ||
| 4033 | |||
| 4034 | INIT_LIST_HEAD(&event->list); | ||
| 4035 | init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc); | ||
| 4036 | init_waitqueue_func_entry(&event->wait, cgroup_event_wake); | ||
| 4037 | INIT_WORK(&event->remove, cgroup_event_remove); | ||
| 4038 | |||
| 4039 | efile = fdget(efd); | ||
| 4040 | if (!efile.file) { | ||
| 4041 | ret = -EBADF; | ||
| 4042 | goto out_kfree; | ||
| 4043 | } | ||
| 4044 | |||
| 4045 | event->eventfd = eventfd_ctx_fileget(efile.file); | ||
| 4046 | if (IS_ERR(event->eventfd)) { | ||
| 4047 | ret = PTR_ERR(event->eventfd); | ||
| 4048 | goto out_put_efile; | ||
| 4049 | } | ||
| 4050 | |||
| 4051 | cfile = fdget(cfd); | ||
| 4052 | if (!cfile.file) { | ||
| 4053 | ret = -EBADF; | ||
| 4054 | goto out_put_eventfd; | ||
| 4055 | } | ||
| 4056 | |||
| 4057 | /* the process need read permission on control file */ | ||
| 4058 | /* AV: shouldn't we check that it's been opened for read instead? */ | ||
| 4059 | ret = inode_permission(file_inode(cfile.file), MAY_READ); | ||
| 4060 | if (ret < 0) | ||
| 4061 | goto out_put_cfile; | ||
| 4062 | |||
| 4063 | event->cft = __file_cft(cfile.file); | ||
| 4064 | if (IS_ERR(event->cft)) { | ||
| 4065 | ret = PTR_ERR(event->cft); | ||
| 4066 | goto out_put_cfile; | ||
| 4067 | } | ||
| 4068 | |||
| 4069 | if (!event->cft->ss) { | ||
| 4070 | ret = -EBADF; | ||
| 4071 | goto out_put_cfile; | ||
| 4072 | } | ||
| 4073 | |||
| 4074 | /* | ||
| 4075 | * Determine the css of @cfile, verify it belongs to the same | ||
| 4076 | * cgroup as cgroup.event_control, and associate @event with it. | ||
| 4077 | * Remaining events are automatically removed on cgroup destruction | ||
| 4078 | * but the removal is asynchronous, so take an extra ref. | ||
| 4079 | */ | ||
| 4080 | rcu_read_lock(); | ||
| 4081 | |||
| 4082 | ret = -EINVAL; | ||
| 4083 | event->css = cgroup_css(cgrp, event->cft->ss); | ||
| 4084 | cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss); | ||
| 4085 | if (event->css && event->css == cfile_css && css_tryget(event->css)) | ||
| 4086 | ret = 0; | ||
| 4087 | |||
| 4088 | rcu_read_unlock(); | ||
| 4089 | if (ret) | ||
| 4090 | goto out_put_cfile; | ||
| 4091 | |||
| 4092 | if (!event->cft->register_event || !event->cft->unregister_event) { | ||
| 4093 | ret = -EINVAL; | ||
| 4094 | goto out_put_css; | ||
| 4095 | } | ||
| 4096 | |||
| 4097 | ret = event->cft->register_event(event->css, event->cft, | ||
| 4098 | event->eventfd, buffer); | ||
| 4099 | if (ret) | ||
| 4100 | goto out_put_css; | ||
| 4101 | |||
| 4102 | efile.file->f_op->poll(efile.file, &event->pt); | ||
| 4103 | |||
| 4104 | spin_lock(&cgrp->event_list_lock); | ||
| 4105 | list_add(&event->list, &cgrp->event_list); | ||
| 4106 | spin_unlock(&cgrp->event_list_lock); | ||
| 4107 | |||
| 4108 | fdput(cfile); | ||
| 4109 | fdput(efile); | ||
| 4110 | |||
| 4111 | return 0; | ||
| 4112 | |||
| 4113 | out_put_css: | ||
| 4114 | css_put(event->css); | ||
| 4115 | out_put_cfile: | ||
| 4116 | fdput(cfile); | ||
| 4117 | out_put_eventfd: | ||
| 4118 | eventfd_ctx_put(event->eventfd); | ||
| 4119 | out_put_efile: | ||
| 4120 | fdput(efile); | ||
| 4121 | out_kfree: | ||
| 4122 | kfree(event); | ||
| 4123 | |||
| 4124 | return ret; | ||
| 4125 | } | ||
| 4126 | |||
| 4127 | static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css, | 3881 | static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css, |
| 4128 | struct cftype *cft) | 3882 | struct cftype *cft) |
| 4129 | { | 3883 | { |
| @@ -4143,17 +3897,15 @@ static int cgroup_clone_children_write(struct cgroup_subsys_state *css, | |||
| 4143 | static struct cftype cgroup_base_files[] = { | 3897 | static struct cftype cgroup_base_files[] = { |
| 4144 | { | 3898 | { |
| 4145 | .name = "cgroup.procs", | 3899 | .name = "cgroup.procs", |
| 4146 | .open = cgroup_procs_open, | 3900 | .seq_start = cgroup_pidlist_start, |
| 3901 | .seq_next = cgroup_pidlist_next, | ||
| 3902 | .seq_stop = cgroup_pidlist_stop, | ||
| 3903 | .seq_show = cgroup_pidlist_show, | ||
| 3904 | .private = CGROUP_FILE_PROCS, | ||
| 4147 | .write_u64 = cgroup_procs_write, | 3905 | .write_u64 = cgroup_procs_write, |
| 4148 | .release = cgroup_pidlist_release, | ||
| 4149 | .mode = S_IRUGO | S_IWUSR, | 3906 | .mode = S_IRUGO | S_IWUSR, |
| 4150 | }, | 3907 | }, |
| 4151 | { | 3908 | { |
| 4152 | .name = "cgroup.event_control", | ||
| 4153 | .write_string = cgroup_write_event_control, | ||
| 4154 | .mode = S_IWUGO, | ||
| 4155 | }, | ||
| 4156 | { | ||
| 4157 | .name = "cgroup.clone_children", | 3909 | .name = "cgroup.clone_children", |
| 4158 | .flags = CFTYPE_INSANE, | 3910 | .flags = CFTYPE_INSANE, |
| 4159 | .read_u64 = cgroup_clone_children_read, | 3911 | .read_u64 = cgroup_clone_children_read, |
| @@ -4162,7 +3914,7 @@ static struct cftype cgroup_base_files[] = { | |||
| 4162 | { | 3914 | { |
| 4163 | .name = "cgroup.sane_behavior", | 3915 | .name = "cgroup.sane_behavior", |
| 4164 | .flags = CFTYPE_ONLY_ON_ROOT, | 3916 | .flags = CFTYPE_ONLY_ON_ROOT, |
| 4165 | .read_seq_string = cgroup_sane_behavior_show, | 3917 | .seq_show = cgroup_sane_behavior_show, |
| 4166 | }, | 3918 | }, |
| 4167 | 3919 | ||
| 4168 | /* | 3920 | /* |
| @@ -4173,9 +3925,12 @@ static struct cftype cgroup_base_files[] = { | |||
| 4173 | { | 3925 | { |
| 4174 | .name = "tasks", | 3926 | .name = "tasks", |
| 4175 | .flags = CFTYPE_INSANE, /* use "procs" instead */ | 3927 | .flags = CFTYPE_INSANE, /* use "procs" instead */ |
| 4176 | .open = cgroup_tasks_open, | 3928 | .seq_start = cgroup_pidlist_start, |
| 3929 | .seq_next = cgroup_pidlist_next, | ||
| 3930 | .seq_stop = cgroup_pidlist_stop, | ||
| 3931 | .seq_show = cgroup_pidlist_show, | ||
| 3932 | .private = CGROUP_FILE_TASKS, | ||
| 4177 | .write_u64 = cgroup_tasks_write, | 3933 | .write_u64 = cgroup_tasks_write, |
| 4178 | .release = cgroup_pidlist_release, | ||
| 4179 | .mode = S_IRUGO | S_IWUSR, | 3934 | .mode = S_IRUGO | S_IWUSR, |
| 4180 | }, | 3935 | }, |
| 4181 | { | 3936 | { |
| @@ -4187,7 +3942,7 @@ static struct cftype cgroup_base_files[] = { | |||
| 4187 | { | 3942 | { |
| 4188 | .name = "release_agent", | 3943 | .name = "release_agent", |
| 4189 | .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT, | 3944 | .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT, |
| 4190 | .read_seq_string = cgroup_release_agent_show, | 3945 | .seq_show = cgroup_release_agent_show, |
| 4191 | .write_string = cgroup_release_agent_write, | 3946 | .write_string = cgroup_release_agent_write, |
| 4192 | .max_write_len = PATH_MAX, | 3947 | .max_write_len = PATH_MAX, |
| 4193 | }, | 3948 | }, |
| @@ -4333,6 +4088,62 @@ static void offline_css(struct cgroup_subsys_state *css) | |||
| 4333 | RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css); | 4088 | RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css); |
| 4334 | } | 4089 | } |
| 4335 | 4090 | ||
| 4091 | /** | ||
| 4092 | * create_css - create a cgroup_subsys_state | ||
| 4093 | * @cgrp: the cgroup new css will be associated with | ||
| 4094 | * @ss: the subsys of new css | ||
| 4095 | * | ||
| 4096 | * Create a new css associated with @cgrp - @ss pair. On success, the new | ||
| 4097 | * css is online and installed in @cgrp with all interface files created. | ||
| 4098 | * Returns 0 on success, -errno on failure. | ||
| 4099 | */ | ||
| 4100 | static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) | ||
| 4101 | { | ||
| 4102 | struct cgroup *parent = cgrp->parent; | ||
| 4103 | struct cgroup_subsys_state *css; | ||
| 4104 | int err; | ||
| 4105 | |||
| 4106 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); | ||
| 4107 | lockdep_assert_held(&cgroup_mutex); | ||
| 4108 | |||
| 4109 | css = ss->css_alloc(cgroup_css(parent, ss)); | ||
| 4110 | if (IS_ERR(css)) | ||
| 4111 | return PTR_ERR(css); | ||
| 4112 | |||
| 4113 | err = percpu_ref_init(&css->refcnt, css_release); | ||
| 4114 | if (err) | ||
| 4115 | goto err_free; | ||
| 4116 | |||
| 4117 | init_css(css, ss, cgrp); | ||
| 4118 | |||
| 4119 | err = cgroup_populate_dir(cgrp, 1 << ss->subsys_id); | ||
| 4120 | if (err) | ||
| 4121 | goto err_free; | ||
| 4122 | |||
| 4123 | err = online_css(css); | ||
| 4124 | if (err) | ||
| 4125 | goto err_free; | ||
| 4126 | |||
| 4127 | dget(cgrp->dentry); | ||
| 4128 | css_get(css->parent); | ||
| 4129 | |||
| 4130 | if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && | ||
| 4131 | parent->parent) { | ||
| 4132 | pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", | ||
| 4133 | current->comm, current->pid, ss->name); | ||
| 4134 | if (!strcmp(ss->name, "memory")) | ||
| 4135 | pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n"); | ||
| 4136 | ss->warned_broken_hierarchy = true; | ||
| 4137 | } | ||
| 4138 | |||
| 4139 | return 0; | ||
| 4140 | |||
| 4141 | err_free: | ||
| 4142 | percpu_ref_cancel_init(&css->refcnt); | ||
| 4143 | ss->css_free(css); | ||
| 4144 | return err; | ||
| 4145 | } | ||
| 4146 | |||
| 4336 | /* | 4147 | /* |
| 4337 | * cgroup_create - create a cgroup | 4148 | * cgroup_create - create a cgroup |
| 4338 | * @parent: cgroup that will be parent of the new cgroup | 4149 | * @parent: cgroup that will be parent of the new cgroup |
| @@ -4344,11 +4155,10 @@ static void offline_css(struct cgroup_subsys_state *css) | |||
| 4344 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | 4155 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, |
| 4345 | umode_t mode) | 4156 | umode_t mode) |
| 4346 | { | 4157 | { |
| 4347 | struct cgroup_subsys_state *css_ar[CGROUP_SUBSYS_COUNT] = { }; | ||
| 4348 | struct cgroup *cgrp; | 4158 | struct cgroup *cgrp; |
| 4349 | struct cgroup_name *name; | 4159 | struct cgroup_name *name; |
| 4350 | struct cgroupfs_root *root = parent->root; | 4160 | struct cgroupfs_root *root = parent->root; |
| 4351 | int err = 0; | 4161 | int ssid, err = 0; |
| 4352 | struct cgroup_subsys *ss; | 4162 | struct cgroup_subsys *ss; |
| 4353 | struct super_block *sb = root->sb; | 4163 | struct super_block *sb = root->sb; |
| 4354 | 4164 | ||
| @@ -4404,23 +4214,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4404 | if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags)) | 4214 | if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags)) |
| 4405 | set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); | 4215 | set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); |
| 4406 | 4216 | ||
| 4407 | for_each_root_subsys(root, ss) { | ||
| 4408 | struct cgroup_subsys_state *css; | ||
| 4409 | |||
| 4410 | css = ss->css_alloc(cgroup_css(parent, ss)); | ||
| 4411 | if (IS_ERR(css)) { | ||
| 4412 | err = PTR_ERR(css); | ||
| 4413 | goto err_free_all; | ||
| 4414 | } | ||
| 4415 | css_ar[ss->subsys_id] = css; | ||
| 4416 | |||
| 4417 | err = percpu_ref_init(&css->refcnt, css_release); | ||
| 4418 | if (err) | ||
| 4419 | goto err_free_all; | ||
| 4420 | |||
| 4421 | init_css(css, ss, cgrp); | ||
| 4422 | } | ||
| 4423 | |||
| 4424 | /* | 4217 | /* |
| 4425 | * Create directory. cgroup_create_file() returns with the new | 4218 | * Create directory. cgroup_create_file() returns with the new |
| 4426 | * directory locked on success so that it can be populated without | 4219 | * directory locked on success so that it can be populated without |
| @@ -4428,7 +4221,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4428 | */ | 4221 | */ |
| 4429 | err = cgroup_create_file(dentry, S_IFDIR | mode, sb); | 4222 | err = cgroup_create_file(dentry, S_IFDIR | mode, sb); |
| 4430 | if (err < 0) | 4223 | if (err < 0) |
| 4431 | goto err_free_all; | 4224 | goto err_unlock; |
| 4432 | lockdep_assert_held(&dentry->d_inode->i_mutex); | 4225 | lockdep_assert_held(&dentry->d_inode->i_mutex); |
| 4433 | 4226 | ||
| 4434 | cgrp->serial_nr = cgroup_serial_nr_next++; | 4227 | cgrp->serial_nr = cgroup_serial_nr_next++; |
| @@ -4440,55 +4233,31 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4440 | /* hold a ref to the parent's dentry */ | 4233 | /* hold a ref to the parent's dentry */ |
| 4441 | dget(parent->dentry); | 4234 | dget(parent->dentry); |
| 4442 | 4235 | ||
| 4443 | /* creation succeeded, notify subsystems */ | 4236 | /* |
| 4444 | for_each_root_subsys(root, ss) { | 4237 | * @cgrp is now fully operational. If something fails after this |
| 4445 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; | 4238 | * point, it'll be released via the normal destruction path. |
| 4446 | 4239 | */ | |
| 4447 | err = online_css(css); | ||
| 4448 | if (err) | ||
| 4449 | goto err_destroy; | ||
| 4450 | |||
| 4451 | /* each css holds a ref to the cgroup's dentry and parent css */ | ||
| 4452 | dget(dentry); | ||
| 4453 | css_get(css->parent); | ||
| 4454 | |||
| 4455 | /* mark it consumed for error path */ | ||
| 4456 | css_ar[ss->subsys_id] = NULL; | ||
| 4457 | |||
| 4458 | if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && | ||
| 4459 | parent->parent) { | ||
| 4460 | pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", | ||
| 4461 | current->comm, current->pid, ss->name); | ||
| 4462 | if (!strcmp(ss->name, "memory")) | ||
| 4463 | pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n"); | ||
| 4464 | ss->warned_broken_hierarchy = true; | ||
| 4465 | } | ||
| 4466 | } | ||
| 4467 | |||
| 4468 | idr_replace(&root->cgroup_idr, cgrp, cgrp->id); | 4240 | idr_replace(&root->cgroup_idr, cgrp, cgrp->id); |
| 4469 | 4241 | ||
| 4470 | err = cgroup_addrm_files(cgrp, cgroup_base_files, true); | 4242 | err = cgroup_addrm_files(cgrp, cgroup_base_files, true); |
| 4471 | if (err) | 4243 | if (err) |
| 4472 | goto err_destroy; | 4244 | goto err_destroy; |
| 4473 | 4245 | ||
| 4474 | err = cgroup_populate_dir(cgrp, root->subsys_mask); | 4246 | /* let's create and online css's */ |
| 4475 | if (err) | 4247 | for_each_subsys(ss, ssid) { |
| 4476 | goto err_destroy; | 4248 | if (root->subsys_mask & (1 << ssid)) { |
| 4249 | err = create_css(cgrp, ss); | ||
| 4250 | if (err) | ||
| 4251 | goto err_destroy; | ||
| 4252 | } | ||
| 4253 | } | ||
| 4477 | 4254 | ||
| 4478 | mutex_unlock(&cgroup_mutex); | 4255 | mutex_unlock(&cgroup_mutex); |
| 4479 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 4256 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
| 4480 | 4257 | ||
| 4481 | return 0; | 4258 | return 0; |
| 4482 | 4259 | ||
| 4483 | err_free_all: | 4260 | err_unlock: |
| 4484 | for_each_root_subsys(root, ss) { | ||
| 4485 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; | ||
| 4486 | |||
| 4487 | if (css) { | ||
| 4488 | percpu_ref_cancel_init(&css->refcnt); | ||
| 4489 | ss->css_free(css); | ||
| 4490 | } | ||
| 4491 | } | ||
| 4492 | mutex_unlock(&cgroup_mutex); | 4261 | mutex_unlock(&cgroup_mutex); |
| 4493 | /* Release the reference count that we took on the superblock */ | 4262 | /* Release the reference count that we took on the superblock */ |
| 4494 | deactivate_super(sb); | 4263 | deactivate_super(sb); |
| @@ -4501,14 +4270,6 @@ err_free_cgrp: | |||
| 4501 | return err; | 4270 | return err; |
| 4502 | 4271 | ||
| 4503 | err_destroy: | 4272 | err_destroy: |
| 4504 | for_each_root_subsys(root, ss) { | ||
| 4505 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; | ||
| 4506 | |||
| 4507 | if (css) { | ||
| 4508 | percpu_ref_cancel_init(&css->refcnt); | ||
| 4509 | ss->css_free(css); | ||
| 4510 | } | ||
| 4511 | } | ||
| 4512 | cgroup_destroy_locked(cgrp); | 4273 | cgroup_destroy_locked(cgrp); |
| 4513 | mutex_unlock(&cgroup_mutex); | 4274 | mutex_unlock(&cgroup_mutex); |
| 4514 | mutex_unlock(&dentry->d_inode->i_mutex); | 4275 | mutex_unlock(&dentry->d_inode->i_mutex); |
| @@ -4631,10 +4392,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
| 4631 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) | 4392 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) |
| 4632 | { | 4393 | { |
| 4633 | struct dentry *d = cgrp->dentry; | 4394 | struct dentry *d = cgrp->dentry; |
| 4634 | struct cgroup_event *event, *tmp; | 4395 | struct cgroup_subsys_state *css; |
| 4635 | struct cgroup_subsys *ss; | ||
| 4636 | struct cgroup *child; | 4396 | struct cgroup *child; |
| 4637 | bool empty; | 4397 | bool empty; |
| 4398 | int ssid; | ||
| 4638 | 4399 | ||
| 4639 | lockdep_assert_held(&d->d_inode->i_mutex); | 4400 | lockdep_assert_held(&d->d_inode->i_mutex); |
| 4640 | lockdep_assert_held(&cgroup_mutex); | 4401 | lockdep_assert_held(&cgroup_mutex); |
| @@ -4670,12 +4431,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
| 4670 | * will be invoked to perform the rest of destruction once the | 4431 | * will be invoked to perform the rest of destruction once the |
| 4671 | * percpu refs of all css's are confirmed to be killed. | 4432 | * percpu refs of all css's are confirmed to be killed. |
| 4672 | */ | 4433 | */ |
| 4673 | for_each_root_subsys(cgrp->root, ss) { | 4434 | for_each_css(css, ssid, cgrp) |
| 4674 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | 4435 | kill_css(css); |
| 4675 | |||
| 4676 | if (css) | ||
| 4677 | kill_css(css); | ||
| 4678 | } | ||
| 4679 | 4436 | ||
| 4680 | /* | 4437 | /* |
| 4681 | * Mark @cgrp dead. This prevents further task migration and child | 4438 | * Mark @cgrp dead. This prevents further task migration and child |
| @@ -4710,18 +4467,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
| 4710 | dget(d); | 4467 | dget(d); |
| 4711 | cgroup_d_remove_dir(d); | 4468 | cgroup_d_remove_dir(d); |
| 4712 | 4469 | ||
| 4713 | /* | ||
| 4714 | * Unregister events and notify userspace. | ||
| 4715 | * Notify userspace about cgroup removing only after rmdir of cgroup | ||
| 4716 | * directory to avoid race between userspace and kernelspace. | ||
| 4717 | */ | ||
| 4718 | spin_lock(&cgrp->event_list_lock); | ||
| 4719 | list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) { | ||
| 4720 | list_del_init(&event->list); | ||
| 4721 | schedule_work(&event->remove); | ||
| 4722 | } | ||
| 4723 | spin_unlock(&cgrp->event_list_lock); | ||
| 4724 | |||
| 4725 | return 0; | 4470 | return 0; |
| 4726 | }; | 4471 | }; |
| 4727 | 4472 | ||
| @@ -4792,7 +4537,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
| 4792 | cgroup_init_cftsets(ss); | 4537 | cgroup_init_cftsets(ss); |
| 4793 | 4538 | ||
| 4794 | /* Create the top cgroup state for this subsystem */ | 4539 | /* Create the top cgroup state for this subsystem */ |
| 4795 | list_add(&ss->sibling, &cgroup_dummy_root.subsys_list); | ||
| 4796 | ss->root = &cgroup_dummy_root; | 4540 | ss->root = &cgroup_dummy_root; |
| 4797 | css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss)); | 4541 | css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss)); |
| 4798 | /* We don't handle early failures gracefully */ | 4542 | /* We don't handle early failures gracefully */ |
| @@ -4866,6 +4610,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4866 | cgroup_init_cftsets(ss); | 4610 | cgroup_init_cftsets(ss); |
| 4867 | 4611 | ||
| 4868 | mutex_lock(&cgroup_mutex); | 4612 | mutex_lock(&cgroup_mutex); |
| 4613 | mutex_lock(&cgroup_root_mutex); | ||
| 4869 | cgroup_subsys[ss->subsys_id] = ss; | 4614 | cgroup_subsys[ss->subsys_id] = ss; |
| 4870 | 4615 | ||
| 4871 | /* | 4616 | /* |
| @@ -4877,11 +4622,11 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4877 | if (IS_ERR(css)) { | 4622 | if (IS_ERR(css)) { |
| 4878 | /* failure case - need to deassign the cgroup_subsys[] slot. */ | 4623 | /* failure case - need to deassign the cgroup_subsys[] slot. */ |
| 4879 | cgroup_subsys[ss->subsys_id] = NULL; | 4624 | cgroup_subsys[ss->subsys_id] = NULL; |
| 4625 | mutex_unlock(&cgroup_root_mutex); | ||
| 4880 | mutex_unlock(&cgroup_mutex); | 4626 | mutex_unlock(&cgroup_mutex); |
| 4881 | return PTR_ERR(css); | 4627 | return PTR_ERR(css); |
| 4882 | } | 4628 | } |
| 4883 | 4629 | ||
| 4884 | list_add(&ss->sibling, &cgroup_dummy_root.subsys_list); | ||
| 4885 | ss->root = &cgroup_dummy_root; | 4630 | ss->root = &cgroup_dummy_root; |
| 4886 | 4631 | ||
| 4887 | /* our new subsystem will be attached to the dummy hierarchy. */ | 4632 | /* our new subsystem will be attached to the dummy hierarchy. */ |
| @@ -4911,14 +4656,18 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4911 | write_unlock(&css_set_lock); | 4656 | write_unlock(&css_set_lock); |
| 4912 | 4657 | ||
| 4913 | ret = online_css(css); | 4658 | ret = online_css(css); |
| 4914 | if (ret) | 4659 | if (ret) { |
| 4660 | ss->css_free(css); | ||
| 4915 | goto err_unload; | 4661 | goto err_unload; |
| 4662 | } | ||
| 4916 | 4663 | ||
| 4917 | /* success! */ | 4664 | /* success! */ |
| 4665 | mutex_unlock(&cgroup_root_mutex); | ||
| 4918 | mutex_unlock(&cgroup_mutex); | 4666 | mutex_unlock(&cgroup_mutex); |
| 4919 | return 0; | 4667 | return 0; |
| 4920 | 4668 | ||
| 4921 | err_unload: | 4669 | err_unload: |
| 4670 | mutex_unlock(&cgroup_root_mutex); | ||
| 4922 | mutex_unlock(&cgroup_mutex); | 4671 | mutex_unlock(&cgroup_mutex); |
| 4923 | /* @ss can't be mounted here as try_module_get() would fail */ | 4672 | /* @ss can't be mounted here as try_module_get() would fail */ |
| 4924 | cgroup_unload_subsys(ss); | 4673 | cgroup_unload_subsys(ss); |
| @@ -4937,6 +4686,7 @@ EXPORT_SYMBOL_GPL(cgroup_load_subsys); | |||
| 4937 | void cgroup_unload_subsys(struct cgroup_subsys *ss) | 4686 | void cgroup_unload_subsys(struct cgroup_subsys *ss) |
| 4938 | { | 4687 | { |
| 4939 | struct cgrp_cset_link *link; | 4688 | struct cgrp_cset_link *link; |
| 4689 | struct cgroup_subsys_state *css; | ||
| 4940 | 4690 | ||
| 4941 | BUG_ON(ss->module == NULL); | 4691 | BUG_ON(ss->module == NULL); |
| 4942 | 4692 | ||
| @@ -4948,15 +4698,15 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
| 4948 | BUG_ON(ss->root != &cgroup_dummy_root); | 4698 | BUG_ON(ss->root != &cgroup_dummy_root); |
| 4949 | 4699 | ||
| 4950 | mutex_lock(&cgroup_mutex); | 4700 | mutex_lock(&cgroup_mutex); |
| 4701 | mutex_lock(&cgroup_root_mutex); | ||
| 4951 | 4702 | ||
| 4952 | offline_css(cgroup_css(cgroup_dummy_top, ss)); | 4703 | css = cgroup_css(cgroup_dummy_top, ss); |
| 4704 | if (css) | ||
| 4705 | offline_css(css); | ||
| 4953 | 4706 | ||
| 4954 | /* deassign the subsys_id */ | 4707 | /* deassign the subsys_id */ |
| 4955 | cgroup_subsys[ss->subsys_id] = NULL; | 4708 | cgroup_subsys[ss->subsys_id] = NULL; |
| 4956 | 4709 | ||
| 4957 | /* remove subsystem from the dummy root's list of subsystems */ | ||
| 4958 | list_del_init(&ss->sibling); | ||
| 4959 | |||
| 4960 | /* | 4710 | /* |
| 4961 | * disentangle the css from all css_sets attached to the dummy | 4711 | * disentangle the css from all css_sets attached to the dummy |
| 4962 | * top. as in loading, we need to pay our respects to the hashtable | 4712 | * top. as in loading, we need to pay our respects to the hashtable |
| @@ -4979,9 +4729,11 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
| 4979 | * need to free before marking as null because ss->css_free needs | 4729 | * need to free before marking as null because ss->css_free needs |
| 4980 | * the cgrp->subsys pointer to find their state. | 4730 | * the cgrp->subsys pointer to find their state. |
| 4981 | */ | 4731 | */ |
| 4982 | ss->css_free(cgroup_css(cgroup_dummy_top, ss)); | 4732 | if (css) |
| 4733 | ss->css_free(css); | ||
| 4983 | RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL); | 4734 | RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL); |
| 4984 | 4735 | ||
| 4736 | mutex_unlock(&cgroup_root_mutex); | ||
| 4985 | mutex_unlock(&cgroup_mutex); | 4737 | mutex_unlock(&cgroup_mutex); |
| 4986 | } | 4738 | } |
| 4987 | EXPORT_SYMBOL_GPL(cgroup_unload_subsys); | 4739 | EXPORT_SYMBOL_GPL(cgroup_unload_subsys); |
| @@ -5100,6 +4852,15 @@ static int __init cgroup_wq_init(void) | |||
| 5100 | */ | 4852 | */ |
| 5101 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); | 4853 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); |
| 5102 | BUG_ON(!cgroup_destroy_wq); | 4854 | BUG_ON(!cgroup_destroy_wq); |
| 4855 | |||
| 4856 | /* | ||
| 4857 | * Used to destroy pidlists and separate to serve as flush domain. | ||
| 4858 | * Cap @max_active to 1 too. | ||
| 4859 | */ | ||
| 4860 | cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy", | ||
| 4861 | 0, 1); | ||
| 4862 | BUG_ON(!cgroup_pidlist_destroy_wq); | ||
| 4863 | |||
| 5103 | return 0; | 4864 | return 0; |
| 5104 | } | 4865 | } |
| 5105 | core_initcall(cgroup_wq_init); | 4866 | core_initcall(cgroup_wq_init); |
| @@ -5143,11 +4904,12 @@ int proc_cgroup_show(struct seq_file *m, void *v) | |||
| 5143 | for_each_active_root(root) { | 4904 | for_each_active_root(root) { |
| 5144 | struct cgroup_subsys *ss; | 4905 | struct cgroup_subsys *ss; |
| 5145 | struct cgroup *cgrp; | 4906 | struct cgroup *cgrp; |
| 5146 | int count = 0; | 4907 | int ssid, count = 0; |
| 5147 | 4908 | ||
| 5148 | seq_printf(m, "%d:", root->hierarchy_id); | 4909 | seq_printf(m, "%d:", root->hierarchy_id); |
| 5149 | for_each_root_subsys(root, ss) | 4910 | for_each_subsys(ss, ssid) |
| 5150 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); | 4911 | if (root->subsys_mask & (1 << ssid)) |
| 4912 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); | ||
| 5151 | if (strlen(root->name)) | 4913 | if (strlen(root->name)) |
| 5152 | seq_printf(m, "%sname=%s", count ? "," : "", | 4914 | seq_printf(m, "%sname=%s", count ? "," : "", |
| 5153 | root->name); | 4915 | root->name); |
| @@ -5488,16 +5250,16 @@ __setup("cgroup_disable=", cgroup_disable); | |||
| 5488 | * @dentry: directory dentry of interest | 5250 | * @dentry: directory dentry of interest |
| 5489 | * @ss: subsystem of interest | 5251 | * @ss: subsystem of interest |
| 5490 | * | 5252 | * |
| 5491 | * Must be called under RCU read lock. The caller is responsible for | 5253 | * Must be called under cgroup_mutex or RCU read lock. The caller is |
| 5492 | * pinning the returned css if it needs to be accessed outside the RCU | 5254 | * responsible for pinning the returned css if it needs to be accessed |
| 5493 | * critical section. | 5255 | * outside the critical section. |
| 5494 | */ | 5256 | */ |
| 5495 | struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, | 5257 | struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, |
| 5496 | struct cgroup_subsys *ss) | 5258 | struct cgroup_subsys *ss) |
| 5497 | { | 5259 | { |
| 5498 | struct cgroup *cgrp; | 5260 | struct cgroup *cgrp; |
| 5499 | 5261 | ||
| 5500 | WARN_ON_ONCE(!rcu_read_lock_held()); | 5262 | cgroup_assert_mutex_or_rcu_locked(); |
| 5501 | 5263 | ||
| 5502 | /* is @dentry a cgroup dir? */ | 5264 | /* is @dentry a cgroup dir? */ |
| 5503 | if (!dentry->d_inode || | 5265 | if (!dentry->d_inode || |
| @@ -5520,9 +5282,7 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) | |||
| 5520 | { | 5282 | { |
| 5521 | struct cgroup *cgrp; | 5283 | struct cgroup *cgrp; |
| 5522 | 5284 | ||
| 5523 | rcu_lockdep_assert(rcu_read_lock_held() || | 5285 | cgroup_assert_mutex_or_rcu_locked(); |
| 5524 | lockdep_is_held(&cgroup_mutex), | ||
| 5525 | "css_from_id() needs proper protection"); | ||
| 5526 | 5286 | ||
| 5527 | cgrp = idr_find(&ss->root->cgroup_idr, id); | 5287 | cgrp = idr_find(&ss->root->cgroup_idr, id); |
| 5528 | if (cgrp) | 5288 | if (cgrp) |
| @@ -5570,9 +5330,7 @@ static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css, | |||
| 5570 | return count; | 5330 | return count; |
| 5571 | } | 5331 | } |
| 5572 | 5332 | ||
| 5573 | static int current_css_set_cg_links_read(struct cgroup_subsys_state *css, | 5333 | static int current_css_set_cg_links_read(struct seq_file *seq, void *v) |
| 5574 | struct cftype *cft, | ||
| 5575 | struct seq_file *seq) | ||
| 5576 | { | 5334 | { |
| 5577 | struct cgrp_cset_link *link; | 5335 | struct cgrp_cset_link *link; |
| 5578 | struct css_set *cset; | 5336 | struct css_set *cset; |
| @@ -5597,9 +5355,9 @@ static int current_css_set_cg_links_read(struct cgroup_subsys_state *css, | |||
| 5597 | } | 5355 | } |
| 5598 | 5356 | ||
| 5599 | #define MAX_TASKS_SHOWN_PER_CSS 25 | 5357 | #define MAX_TASKS_SHOWN_PER_CSS 25 |
| 5600 | static int cgroup_css_links_read(struct cgroup_subsys_state *css, | 5358 | static int cgroup_css_links_read(struct seq_file *seq, void *v) |
| 5601 | struct cftype *cft, struct seq_file *seq) | ||
| 5602 | { | 5359 | { |
| 5360 | struct cgroup_subsys_state *css = seq_css(seq); | ||
| 5603 | struct cgrp_cset_link *link; | 5361 | struct cgrp_cset_link *link; |
| 5604 | 5362 | ||
| 5605 | read_lock(&css_set_lock); | 5363 | read_lock(&css_set_lock); |
| @@ -5645,12 +5403,12 @@ static struct cftype debug_files[] = { | |||
| 5645 | 5403 | ||
| 5646 | { | 5404 | { |
| 5647 | .name = "current_css_set_cg_links", | 5405 | .name = "current_css_set_cg_links", |
| 5648 | .read_seq_string = current_css_set_cg_links_read, | 5406 | .seq_show = current_css_set_cg_links_read, |
| 5649 | }, | 5407 | }, |
| 5650 | 5408 | ||
| 5651 | { | 5409 | { |
| 5652 | .name = "cgroup_css_links", | 5410 | .name = "cgroup_css_links", |
| 5653 | .read_seq_string = cgroup_css_links_read, | 5411 | .seq_show = cgroup_css_links_read, |
| 5654 | }, | 5412 | }, |
| 5655 | 5413 | ||
| 5656 | { | 5414 | { |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index f0ff64d0ebaa..6c3154e477f6 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
| @@ -301,10 +301,9 @@ out_unlock: | |||
| 301 | spin_unlock_irq(&freezer->lock); | 301 | spin_unlock_irq(&freezer->lock); |
| 302 | } | 302 | } |
| 303 | 303 | ||
| 304 | static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft, | 304 | static int freezer_read(struct seq_file *m, void *v) |
| 305 | struct seq_file *m) | ||
| 306 | { | 305 | { |
| 307 | struct cgroup_subsys_state *pos; | 306 | struct cgroup_subsys_state *css = seq_css(m), *pos; |
| 308 | 307 | ||
| 309 | rcu_read_lock(); | 308 | rcu_read_lock(); |
| 310 | 309 | ||
| @@ -458,7 +457,7 @@ static struct cftype files[] = { | |||
| 458 | { | 457 | { |
| 459 | .name = "state", | 458 | .name = "state", |
| 460 | .flags = CFTYPE_NOT_ON_ROOT, | 459 | .flags = CFTYPE_NOT_ON_ROOT, |
| 461 | .read_seq_string = freezer_read, | 460 | .seq_show = freezer_read, |
| 462 | .write_string = freezer_write, | 461 | .write_string = freezer_write, |
| 463 | }, | 462 | }, |
| 464 | { | 463 | { |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4772034b4b17..4410ac6a55f1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -1731,66 +1731,41 @@ out_unlock: | |||
| 1731 | * used, list of ranges of sequential numbers, is variable length, | 1731 | * used, list of ranges of sequential numbers, is variable length, |
| 1732 | * and since these maps can change value dynamically, one could read | 1732 | * and since these maps can change value dynamically, one could read |
| 1733 | * gibberish by doing partial reads while a list was changing. | 1733 | * gibberish by doing partial reads while a list was changing. |
| 1734 | * A single large read to a buffer that crosses a page boundary is | ||
| 1735 | * ok, because the result being copied to user land is not recomputed | ||
| 1736 | * across a page fault. | ||
| 1737 | */ | 1734 | */ |
| 1738 | 1735 | static int cpuset_common_seq_show(struct seq_file *sf, void *v) | |
| 1739 | static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs) | ||
| 1740 | { | 1736 | { |
| 1741 | size_t count; | 1737 | struct cpuset *cs = css_cs(seq_css(sf)); |
| 1742 | 1738 | cpuset_filetype_t type = seq_cft(sf)->private; | |
| 1743 | mutex_lock(&callback_mutex); | 1739 | ssize_t count; |
| 1744 | count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); | 1740 | char *buf, *s; |
| 1745 | mutex_unlock(&callback_mutex); | 1741 | int ret = 0; |
| 1746 | 1742 | ||
| 1747 | return count; | 1743 | count = seq_get_buf(sf, &buf); |
| 1748 | } | 1744 | s = buf; |
| 1749 | |||
| 1750 | static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs) | ||
| 1751 | { | ||
| 1752 | size_t count; | ||
| 1753 | 1745 | ||
| 1754 | mutex_lock(&callback_mutex); | 1746 | mutex_lock(&callback_mutex); |
| 1755 | count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed); | ||
| 1756 | mutex_unlock(&callback_mutex); | ||
| 1757 | |||
| 1758 | return count; | ||
| 1759 | } | ||
| 1760 | |||
| 1761 | static ssize_t cpuset_common_file_read(struct cgroup_subsys_state *css, | ||
| 1762 | struct cftype *cft, struct file *file, | ||
| 1763 | char __user *buf, size_t nbytes, | ||
| 1764 | loff_t *ppos) | ||
| 1765 | { | ||
| 1766 | struct cpuset *cs = css_cs(css); | ||
| 1767 | cpuset_filetype_t type = cft->private; | ||
| 1768 | char *page; | ||
| 1769 | ssize_t retval = 0; | ||
| 1770 | char *s; | ||
| 1771 | |||
| 1772 | if (!(page = (char *)__get_free_page(GFP_TEMPORARY))) | ||
| 1773 | return -ENOMEM; | ||
| 1774 | |||
| 1775 | s = page; | ||
| 1776 | 1747 | ||
| 1777 | switch (type) { | 1748 | switch (type) { |
| 1778 | case FILE_CPULIST: | 1749 | case FILE_CPULIST: |
| 1779 | s += cpuset_sprintf_cpulist(s, cs); | 1750 | s += cpulist_scnprintf(s, count, cs->cpus_allowed); |
| 1780 | break; | 1751 | break; |
| 1781 | case FILE_MEMLIST: | 1752 | case FILE_MEMLIST: |
| 1782 | s += cpuset_sprintf_memlist(s, cs); | 1753 | s += nodelist_scnprintf(s, count, cs->mems_allowed); |
| 1783 | break; | 1754 | break; |
| 1784 | default: | 1755 | default: |
| 1785 | retval = -EINVAL; | 1756 | ret = -EINVAL; |
| 1786 | goto out; | 1757 | goto out_unlock; |
| 1787 | } | 1758 | } |
| 1788 | *s++ = '\n'; | ||
| 1789 | 1759 | ||
| 1790 | retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page); | 1760 | if (s < buf + count - 1) { |
| 1791 | out: | 1761 | *s++ = '\n'; |
| 1792 | free_page((unsigned long)page); | 1762 | seq_commit(sf, s - buf); |
| 1793 | return retval; | 1763 | } else { |
| 1764 | seq_commit(sf, -1); | ||
| 1765 | } | ||
| 1766 | out_unlock: | ||
| 1767 | mutex_unlock(&callback_mutex); | ||
| 1768 | return ret; | ||
| 1794 | } | 1769 | } |
| 1795 | 1770 | ||
| 1796 | static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft) | 1771 | static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft) |
| @@ -1847,7 +1822,7 @@ static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) | |||
| 1847 | static struct cftype files[] = { | 1822 | static struct cftype files[] = { |
| 1848 | { | 1823 | { |
| 1849 | .name = "cpus", | 1824 | .name = "cpus", |
| 1850 | .read = cpuset_common_file_read, | 1825 | .seq_show = cpuset_common_seq_show, |
| 1851 | .write_string = cpuset_write_resmask, | 1826 | .write_string = cpuset_write_resmask, |
| 1852 | .max_write_len = (100U + 6 * NR_CPUS), | 1827 | .max_write_len = (100U + 6 * NR_CPUS), |
| 1853 | .private = FILE_CPULIST, | 1828 | .private = FILE_CPULIST, |
| @@ -1855,7 +1830,7 @@ static struct cftype files[] = { | |||
| 1855 | 1830 | ||
| 1856 | { | 1831 | { |
| 1857 | .name = "mems", | 1832 | .name = "mems", |
| 1858 | .read = cpuset_common_file_read, | 1833 | .seq_show = cpuset_common_seq_show, |
| 1859 | .write_string = cpuset_write_resmask, | 1834 | .write_string = cpuset_write_resmask, |
| 1860 | .max_write_len = (100U + 6 * MAX_NUMNODES), | 1835 | .max_write_len = (100U + 6 * MAX_NUMNODES), |
| 1861 | .private = FILE_MEMLIST, | 1836 | .private = FILE_MEMLIST, |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 36c951b7eef8..3897e09e86a2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -7852,15 +7852,14 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) | |||
| 7852 | return ret; | 7852 | return ret; |
| 7853 | } | 7853 | } |
| 7854 | 7854 | ||
| 7855 | static int cpu_stats_show(struct cgroup_subsys_state *css, struct cftype *cft, | 7855 | static int cpu_stats_show(struct seq_file *sf, void *v) |
| 7856 | struct cgroup_map_cb *cb) | ||
| 7857 | { | 7856 | { |
| 7858 | struct task_group *tg = css_tg(css); | 7857 | struct task_group *tg = css_tg(seq_css(sf)); |
| 7859 | struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; | 7858 | struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; |
| 7860 | 7859 | ||
| 7861 | cb->fill(cb, "nr_periods", cfs_b->nr_periods); | 7860 | seq_printf(sf, "nr_periods %d\n", cfs_b->nr_periods); |
| 7862 | cb->fill(cb, "nr_throttled", cfs_b->nr_throttled); | 7861 | seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled); |
| 7863 | cb->fill(cb, "throttled_time", cfs_b->throttled_time); | 7862 | seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time); |
| 7864 | 7863 | ||
| 7865 | return 0; | 7864 | return 0; |
| 7866 | } | 7865 | } |
| @@ -7914,7 +7913,7 @@ static struct cftype cpu_files[] = { | |||
| 7914 | }, | 7913 | }, |
| 7915 | { | 7914 | { |
| 7916 | .name = "stat", | 7915 | .name = "stat", |
| 7917 | .read_map = cpu_stats_show, | 7916 | .seq_show = cpu_stats_show, |
| 7918 | }, | 7917 | }, |
| 7919 | #endif | 7918 | #endif |
| 7920 | #ifdef CONFIG_RT_GROUP_SCHED | 7919 | #ifdef CONFIG_RT_GROUP_SCHED |
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index f64722ff0299..622e0818f905 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c | |||
| @@ -163,10 +163,9 @@ out: | |||
| 163 | return err; | 163 | return err; |
| 164 | } | 164 | } |
| 165 | 165 | ||
| 166 | static int cpuacct_percpu_seq_read(struct cgroup_subsys_state *css, | 166 | static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) |
| 167 | struct cftype *cft, struct seq_file *m) | ||
| 168 | { | 167 | { |
| 169 | struct cpuacct *ca = css_ca(css); | 168 | struct cpuacct *ca = css_ca(seq_css(m)); |
| 170 | u64 percpu; | 169 | u64 percpu; |
| 171 | int i; | 170 | int i; |
| 172 | 171 | ||
| @@ -183,10 +182,9 @@ static const char * const cpuacct_stat_desc[] = { | |||
| 183 | [CPUACCT_STAT_SYSTEM] = "system", | 182 | [CPUACCT_STAT_SYSTEM] = "system", |
| 184 | }; | 183 | }; |
| 185 | 184 | ||
| 186 | static int cpuacct_stats_show(struct cgroup_subsys_state *css, | 185 | static int cpuacct_stats_show(struct seq_file *sf, void *v) |
| 187 | struct cftype *cft, struct cgroup_map_cb *cb) | ||
| 188 | { | 186 | { |
| 189 | struct cpuacct *ca = css_ca(css); | 187 | struct cpuacct *ca = css_ca(seq_css(sf)); |
| 190 | int cpu; | 188 | int cpu; |
| 191 | s64 val = 0; | 189 | s64 val = 0; |
| 192 | 190 | ||
| @@ -196,7 +194,7 @@ static int cpuacct_stats_show(struct cgroup_subsys_state *css, | |||
| 196 | val += kcpustat->cpustat[CPUTIME_NICE]; | 194 | val += kcpustat->cpustat[CPUTIME_NICE]; |
| 197 | } | 195 | } |
| 198 | val = cputime64_to_clock_t(val); | 196 | val = cputime64_to_clock_t(val); |
| 199 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val); | 197 | seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val); |
| 200 | 198 | ||
| 201 | val = 0; | 199 | val = 0; |
| 202 | for_each_online_cpu(cpu) { | 200 | for_each_online_cpu(cpu) { |
| @@ -207,7 +205,7 @@ static int cpuacct_stats_show(struct cgroup_subsys_state *css, | |||
| 207 | } | 205 | } |
| 208 | 206 | ||
| 209 | val = cputime64_to_clock_t(val); | 207 | val = cputime64_to_clock_t(val); |
| 210 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); | 208 | seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); |
| 211 | 209 | ||
| 212 | return 0; | 210 | return 0; |
| 213 | } | 211 | } |
| @@ -220,11 +218,11 @@ static struct cftype files[] = { | |||
| 220 | }, | 218 | }, |
| 221 | { | 219 | { |
| 222 | .name = "usage_percpu", | 220 | .name = "usage_percpu", |
| 223 | .read_seq_string = cpuacct_percpu_seq_read, | 221 | .seq_show = cpuacct_percpu_seq_show, |
| 224 | }, | 222 | }, |
| 225 | { | 223 | { |
| 226 | .name = "stat", | 224 | .name = "stat", |
| 227 | .read_map = cpuacct_stats_show, | 225 | .seq_show = cpuacct_stats_show, |
| 228 | }, | 226 | }, |
| 229 | { } /* terminate */ | 227 | { } /* terminate */ |
| 230 | }; | 228 | }; |
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index bda8e44f6fde..d747a84e09b0 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c | |||
| @@ -242,22 +242,16 @@ void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, | |||
| 242 | return; | 242 | return; |
| 243 | } | 243 | } |
| 244 | 244 | ||
| 245 | static ssize_t hugetlb_cgroup_read(struct cgroup_subsys_state *css, | 245 | static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, |
| 246 | struct cftype *cft, struct file *file, | 246 | struct cftype *cft) |
| 247 | char __user *buf, size_t nbytes, | ||
| 248 | loff_t *ppos) | ||
| 249 | { | 247 | { |
| 250 | u64 val; | 248 | int idx, name; |
| 251 | char str[64]; | ||
| 252 | int idx, name, len; | ||
| 253 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); | 249 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); |
| 254 | 250 | ||
| 255 | idx = MEMFILE_IDX(cft->private); | 251 | idx = MEMFILE_IDX(cft->private); |
| 256 | name = MEMFILE_ATTR(cft->private); | 252 | name = MEMFILE_ATTR(cft->private); |
| 257 | 253 | ||
| 258 | val = res_counter_read_u64(&h_cg->hugepage[idx], name); | 254 | return res_counter_read_u64(&h_cg->hugepage[idx], name); |
| 259 | len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val); | ||
| 260 | return simple_read_from_buffer(buf, nbytes, ppos, str, len); | ||
| 261 | } | 255 | } |
| 262 | 256 | ||
| 263 | static int hugetlb_cgroup_write(struct cgroup_subsys_state *css, | 257 | static int hugetlb_cgroup_write(struct cgroup_subsys_state *css, |
| @@ -337,28 +331,28 @@ static void __init __hugetlb_cgroup_file_init(int idx) | |||
| 337 | cft = &h->cgroup_files[0]; | 331 | cft = &h->cgroup_files[0]; |
| 338 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); | 332 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); |
| 339 | cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); | 333 | cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); |
| 340 | cft->read = hugetlb_cgroup_read; | 334 | cft->read_u64 = hugetlb_cgroup_read_u64; |
| 341 | cft->write_string = hugetlb_cgroup_write; | 335 | cft->write_string = hugetlb_cgroup_write; |
| 342 | 336 | ||
| 343 | /* Add the usage file */ | 337 | /* Add the usage file */ |
| 344 | cft = &h->cgroup_files[1]; | 338 | cft = &h->cgroup_files[1]; |
| 345 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); | 339 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); |
| 346 | cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); | 340 | cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); |
| 347 | cft->read = hugetlb_cgroup_read; | 341 | cft->read_u64 = hugetlb_cgroup_read_u64; |
| 348 | 342 | ||
| 349 | /* Add the MAX usage file */ | 343 | /* Add the MAX usage file */ |
| 350 | cft = &h->cgroup_files[2]; | 344 | cft = &h->cgroup_files[2]; |
| 351 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); | 345 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); |
| 352 | cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); | 346 | cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); |
| 353 | cft->trigger = hugetlb_cgroup_reset; | 347 | cft->trigger = hugetlb_cgroup_reset; |
| 354 | cft->read = hugetlb_cgroup_read; | 348 | cft->read_u64 = hugetlb_cgroup_read_u64; |
| 355 | 349 | ||
| 356 | /* Add the failcntfile */ | 350 | /* Add the failcntfile */ |
| 357 | cft = &h->cgroup_files[3]; | 351 | cft = &h->cgroup_files[3]; |
| 358 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); | 352 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); |
| 359 | cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); | 353 | cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); |
| 360 | cft->trigger = hugetlb_cgroup_reset; | 354 | cft->trigger = hugetlb_cgroup_reset; |
| 361 | cft->read = hugetlb_cgroup_read; | 355 | cft->read_u64 = hugetlb_cgroup_read_u64; |
| 362 | 356 | ||
| 363 | /* NULL terminate the last cft */ | 357 | /* NULL terminate the last cft */ |
| 364 | cft = &h->cgroup_files[4]; | 358 | cft = &h->cgroup_files[4]; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7f1a356153c0..7caff36180cd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -45,6 +45,7 @@ | |||
| 45 | #include <linux/swapops.h> | 45 | #include <linux/swapops.h> |
| 46 | #include <linux/spinlock.h> | 46 | #include <linux/spinlock.h> |
| 47 | #include <linux/eventfd.h> | 47 | #include <linux/eventfd.h> |
| 48 | #include <linux/poll.h> | ||
| 48 | #include <linux/sort.h> | 49 | #include <linux/sort.h> |
| 49 | #include <linux/fs.h> | 50 | #include <linux/fs.h> |
| 50 | #include <linux/seq_file.h> | 51 | #include <linux/seq_file.h> |
| @@ -55,6 +56,7 @@ | |||
| 55 | #include <linux/cpu.h> | 56 | #include <linux/cpu.h> |
| 56 | #include <linux/oom.h> | 57 | #include <linux/oom.h> |
| 57 | #include <linux/lockdep.h> | 58 | #include <linux/lockdep.h> |
| 59 | #include <linux/file.h> | ||
| 58 | #include "internal.h" | 60 | #include "internal.h" |
| 59 | #include <net/sock.h> | 61 | #include <net/sock.h> |
| 60 | #include <net/ip.h> | 62 | #include <net/ip.h> |
| @@ -227,6 +229,46 @@ struct mem_cgroup_eventfd_list { | |||
| 227 | struct eventfd_ctx *eventfd; | 229 | struct eventfd_ctx *eventfd; |
| 228 | }; | 230 | }; |
| 229 | 231 | ||
| 232 | /* | ||
| 233 | * cgroup_event represents events which userspace want to receive. | ||
| 234 | */ | ||
| 235 | struct mem_cgroup_event { | ||
| 236 | /* | ||
| 237 | * memcg which the event belongs to. | ||
| 238 | */ | ||
| 239 | struct mem_cgroup *memcg; | ||
| 240 | /* | ||
| 241 | * eventfd to signal userspace about the event. | ||
| 242 | */ | ||
| 243 | struct eventfd_ctx *eventfd; | ||
| 244 | /* | ||
| 245 | * Each of these stored in a list by the cgroup. | ||
| 246 | */ | ||
| 247 | struct list_head list; | ||
| 248 | /* | ||
| 249 | * register_event() callback will be used to add new userspace | ||
| 250 | * waiter for changes related to this event. Use eventfd_signal() | ||
| 251 | * on eventfd to send notification to userspace. | ||
| 252 | */ | ||
| 253 | int (*register_event)(struct mem_cgroup *memcg, | ||
| 254 | struct eventfd_ctx *eventfd, const char *args); | ||
| 255 | /* | ||
| 256 | * unregister_event() callback will be called when userspace closes | ||
| 257 | * the eventfd or on cgroup removing. This callback must be set, | ||
| 258 | * if you want provide notification functionality. | ||
| 259 | */ | ||
| 260 | void (*unregister_event)(struct mem_cgroup *memcg, | ||
| 261 | struct eventfd_ctx *eventfd); | ||
| 262 | /* | ||
| 263 | * All fields below needed to unregister event when | ||
| 264 | * userspace closes eventfd. | ||
| 265 | */ | ||
| 266 | poll_table pt; | ||
| 267 | wait_queue_head_t *wqh; | ||
| 268 | wait_queue_t wait; | ||
| 269 | struct work_struct remove; | ||
| 270 | }; | ||
| 271 | |||
| 230 | static void mem_cgroup_threshold(struct mem_cgroup *memcg); | 272 | static void mem_cgroup_threshold(struct mem_cgroup *memcg); |
| 231 | static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); | 273 | static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); |
| 232 | 274 | ||
| @@ -331,6 +373,10 @@ struct mem_cgroup { | |||
| 331 | atomic_t numainfo_updating; | 373 | atomic_t numainfo_updating; |
| 332 | #endif | 374 | #endif |
| 333 | 375 | ||
| 376 | /* List of events which userspace want to receive */ | ||
| 377 | struct list_head event_list; | ||
| 378 | spinlock_t event_list_lock; | ||
| 379 | |||
| 334 | struct mem_cgroup_per_node *nodeinfo[0]; | 380 | struct mem_cgroup_per_node *nodeinfo[0]; |
| 335 | /* WARNING: nodeinfo must be the last member here */ | 381 | /* WARNING: nodeinfo must be the last member here */ |
| 336 | }; | 382 | }; |
| @@ -490,11 +536,6 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr) | |||
| 490 | return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; | 536 | return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; |
| 491 | } | 537 | } |
| 492 | 538 | ||
| 493 | struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css) | ||
| 494 | { | ||
| 495 | return &mem_cgroup_from_css(css)->vmpressure; | ||
| 496 | } | ||
| 497 | |||
| 498 | static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) | 539 | static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) |
| 499 | { | 540 | { |
| 500 | return (memcg == root_mem_cgroup); | 541 | return (memcg == root_mem_cgroup); |
| @@ -2976,10 +3017,9 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p) | |||
| 2976 | } | 3017 | } |
| 2977 | 3018 | ||
| 2978 | #ifdef CONFIG_SLABINFO | 3019 | #ifdef CONFIG_SLABINFO |
| 2979 | static int mem_cgroup_slabinfo_read(struct cgroup_subsys_state *css, | 3020 | static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v) |
| 2980 | struct cftype *cft, struct seq_file *m) | ||
| 2981 | { | 3021 | { |
| 2982 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 3022 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
| 2983 | struct memcg_cache_params *params; | 3023 | struct memcg_cache_params *params; |
| 2984 | 3024 | ||
| 2985 | if (!memcg_can_account_kmem(memcg)) | 3025 | if (!memcg_can_account_kmem(memcg)) |
| @@ -5112,14 +5152,12 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) | |||
| 5112 | return val << PAGE_SHIFT; | 5152 | return val << PAGE_SHIFT; |
| 5113 | } | 5153 | } |
| 5114 | 5154 | ||
| 5115 | static ssize_t mem_cgroup_read(struct cgroup_subsys_state *css, | 5155 | static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, |
| 5116 | struct cftype *cft, struct file *file, | 5156 | struct cftype *cft) |
| 5117 | char __user *buf, size_t nbytes, loff_t *ppos) | ||
| 5118 | { | 5157 | { |
| 5119 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5158 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
| 5120 | char str[64]; | ||
| 5121 | u64 val; | 5159 | u64 val; |
| 5122 | int name, len; | 5160 | int name; |
| 5123 | enum res_type type; | 5161 | enum res_type type; |
| 5124 | 5162 | ||
| 5125 | type = MEMFILE_TYPE(cft->private); | 5163 | type = MEMFILE_TYPE(cft->private); |
| @@ -5145,8 +5183,7 @@ static ssize_t mem_cgroup_read(struct cgroup_subsys_state *css, | |||
| 5145 | BUG(); | 5183 | BUG(); |
| 5146 | } | 5184 | } |
| 5147 | 5185 | ||
| 5148 | len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val); | 5186 | return val; |
| 5149 | return simple_read_from_buffer(buf, nbytes, ppos, str, len); | ||
| 5150 | } | 5187 | } |
| 5151 | 5188 | ||
| 5152 | static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) | 5189 | static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) |
| @@ -5383,8 +5420,7 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, | |||
| 5383 | #endif | 5420 | #endif |
| 5384 | 5421 | ||
| 5385 | #ifdef CONFIG_NUMA | 5422 | #ifdef CONFIG_NUMA |
| 5386 | static int memcg_numa_stat_show(struct cgroup_subsys_state *css, | 5423 | static int memcg_numa_stat_show(struct seq_file *m, void *v) |
| 5387 | struct cftype *cft, struct seq_file *m) | ||
| 5388 | { | 5424 | { |
| 5389 | struct numa_stat { | 5425 | struct numa_stat { |
| 5390 | const char *name; | 5426 | const char *name; |
| @@ -5400,7 +5436,7 @@ static int memcg_numa_stat_show(struct cgroup_subsys_state *css, | |||
| 5400 | const struct numa_stat *stat; | 5436 | const struct numa_stat *stat; |
| 5401 | int nid; | 5437 | int nid; |
| 5402 | unsigned long nr; | 5438 | unsigned long nr; |
| 5403 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5439 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
| 5404 | 5440 | ||
| 5405 | for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { | 5441 | for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { |
| 5406 | nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask); | 5442 | nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask); |
| @@ -5439,10 +5475,9 @@ static inline void mem_cgroup_lru_names_not_uptodate(void) | |||
| 5439 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); | 5475 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); |
| 5440 | } | 5476 | } |
| 5441 | 5477 | ||
| 5442 | static int memcg_stat_show(struct cgroup_subsys_state *css, struct cftype *cft, | 5478 | static int memcg_stat_show(struct seq_file *m, void *v) |
| 5443 | struct seq_file *m) | ||
| 5444 | { | 5479 | { |
| 5445 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5480 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
| 5446 | struct mem_cgroup *mi; | 5481 | struct mem_cgroup *mi; |
| 5447 | unsigned int i; | 5482 | unsigned int i; |
| 5448 | 5483 | ||
| @@ -5651,13 +5686,11 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg) | |||
| 5651 | mem_cgroup_oom_notify_cb(iter); | 5686 | mem_cgroup_oom_notify_cb(iter); |
| 5652 | } | 5687 | } |
| 5653 | 5688 | ||
| 5654 | static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css, | 5689 | static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, |
| 5655 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) | 5690 | struct eventfd_ctx *eventfd, const char *args, enum res_type type) |
| 5656 | { | 5691 | { |
| 5657 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
| 5658 | struct mem_cgroup_thresholds *thresholds; | 5692 | struct mem_cgroup_thresholds *thresholds; |
| 5659 | struct mem_cgroup_threshold_ary *new; | 5693 | struct mem_cgroup_threshold_ary *new; |
| 5660 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
| 5661 | u64 threshold, usage; | 5694 | u64 threshold, usage; |
| 5662 | int i, size, ret; | 5695 | int i, size, ret; |
| 5663 | 5696 | ||
| @@ -5734,13 +5767,23 @@ unlock: | |||
| 5734 | return ret; | 5767 | return ret; |
| 5735 | } | 5768 | } |
| 5736 | 5769 | ||
| 5737 | static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css, | 5770 | static int mem_cgroup_usage_register_event(struct mem_cgroup *memcg, |
| 5738 | struct cftype *cft, struct eventfd_ctx *eventfd) | 5771 | struct eventfd_ctx *eventfd, const char *args) |
| 5772 | { | ||
| 5773 | return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEM); | ||
| 5774 | } | ||
| 5775 | |||
| 5776 | static int memsw_cgroup_usage_register_event(struct mem_cgroup *memcg, | ||
| 5777 | struct eventfd_ctx *eventfd, const char *args) | ||
| 5778 | { | ||
| 5779 | return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEMSWAP); | ||
| 5780 | } | ||
| 5781 | |||
| 5782 | static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, | ||
| 5783 | struct eventfd_ctx *eventfd, enum res_type type) | ||
| 5739 | { | 5784 | { |
| 5740 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
| 5741 | struct mem_cgroup_thresholds *thresholds; | 5785 | struct mem_cgroup_thresholds *thresholds; |
| 5742 | struct mem_cgroup_threshold_ary *new; | 5786 | struct mem_cgroup_threshold_ary *new; |
| 5743 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
| 5744 | u64 usage; | 5787 | u64 usage; |
| 5745 | int i, j, size; | 5788 | int i, j, size; |
| 5746 | 5789 | ||
| @@ -5813,14 +5856,23 @@ unlock: | |||
| 5813 | mutex_unlock(&memcg->thresholds_lock); | 5856 | mutex_unlock(&memcg->thresholds_lock); |
| 5814 | } | 5857 | } |
| 5815 | 5858 | ||
| 5816 | static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, | 5859 | static void mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, |
| 5817 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) | 5860 | struct eventfd_ctx *eventfd) |
| 5861 | { | ||
| 5862 | return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEM); | ||
| 5863 | } | ||
| 5864 | |||
| 5865 | static void memsw_cgroup_usage_unregister_event(struct mem_cgroup *memcg, | ||
| 5866 | struct eventfd_ctx *eventfd) | ||
| 5867 | { | ||
| 5868 | return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEMSWAP); | ||
| 5869 | } | ||
| 5870 | |||
| 5871 | static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg, | ||
| 5872 | struct eventfd_ctx *eventfd, const char *args) | ||
| 5818 | { | 5873 | { |
| 5819 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
| 5820 | struct mem_cgroup_eventfd_list *event; | 5874 | struct mem_cgroup_eventfd_list *event; |
| 5821 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
| 5822 | 5875 | ||
| 5823 | BUG_ON(type != _OOM_TYPE); | ||
| 5824 | event = kmalloc(sizeof(*event), GFP_KERNEL); | 5876 | event = kmalloc(sizeof(*event), GFP_KERNEL); |
| 5825 | if (!event) | 5877 | if (!event) |
| 5826 | return -ENOMEM; | 5878 | return -ENOMEM; |
| @@ -5838,14 +5890,10 @@ static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, | |||
| 5838 | return 0; | 5890 | return 0; |
| 5839 | } | 5891 | } |
| 5840 | 5892 | ||
| 5841 | static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css, | 5893 | static void mem_cgroup_oom_unregister_event(struct mem_cgroup *memcg, |
| 5842 | struct cftype *cft, struct eventfd_ctx *eventfd) | 5894 | struct eventfd_ctx *eventfd) |
| 5843 | { | 5895 | { |
| 5844 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
| 5845 | struct mem_cgroup_eventfd_list *ev, *tmp; | 5896 | struct mem_cgroup_eventfd_list *ev, *tmp; |
| 5846 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
| 5847 | |||
| 5848 | BUG_ON(type != _OOM_TYPE); | ||
| 5849 | 5897 | ||
| 5850 | spin_lock(&memcg_oom_lock); | 5898 | spin_lock(&memcg_oom_lock); |
| 5851 | 5899 | ||
| @@ -5859,17 +5907,12 @@ static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css, | |||
| 5859 | spin_unlock(&memcg_oom_lock); | 5907 | spin_unlock(&memcg_oom_lock); |
| 5860 | } | 5908 | } |
| 5861 | 5909 | ||
| 5862 | static int mem_cgroup_oom_control_read(struct cgroup_subsys_state *css, | 5910 | static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) |
| 5863 | struct cftype *cft, struct cgroup_map_cb *cb) | ||
| 5864 | { | 5911 | { |
| 5865 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5912 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(sf)); |
| 5866 | |||
| 5867 | cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable); | ||
| 5868 | 5913 | ||
| 5869 | if (atomic_read(&memcg->under_oom)) | 5914 | seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable); |
| 5870 | cb->fill(cb, "under_oom", 1); | 5915 | seq_printf(sf, "under_oom %d\n", (bool)atomic_read(&memcg->under_oom)); |
| 5871 | else | ||
| 5872 | cb->fill(cb, "under_oom", 0); | ||
| 5873 | return 0; | 5916 | return 0; |
| 5874 | } | 5917 | } |
| 5875 | 5918 | ||
| @@ -5962,41 +6005,261 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) | |||
| 5962 | } | 6005 | } |
| 5963 | #endif | 6006 | #endif |
| 5964 | 6007 | ||
| 6008 | /* | ||
| 6009 | * DO NOT USE IN NEW FILES. | ||
| 6010 | * | ||
| 6011 | * "cgroup.event_control" implementation. | ||
| 6012 | * | ||
| 6013 | * This is way over-engineered. It tries to support fully configurable | ||
| 6014 | * events for each user. Such level of flexibility is completely | ||
| 6015 | * unnecessary especially in the light of the planned unified hierarchy. | ||
| 6016 | * | ||
| 6017 | * Please deprecate this and replace with something simpler if at all | ||
| 6018 | * possible. | ||
| 6019 | */ | ||
| 6020 | |||
| 6021 | /* | ||
| 6022 | * Unregister event and free resources. | ||
| 6023 | * | ||
| 6024 | * Gets called from workqueue. | ||
| 6025 | */ | ||
| 6026 | static void memcg_event_remove(struct work_struct *work) | ||
| 6027 | { | ||
| 6028 | struct mem_cgroup_event *event = | ||
| 6029 | container_of(work, struct mem_cgroup_event, remove); | ||
| 6030 | struct mem_cgroup *memcg = event->memcg; | ||
| 6031 | |||
| 6032 | remove_wait_queue(event->wqh, &event->wait); | ||
| 6033 | |||
| 6034 | event->unregister_event(memcg, event->eventfd); | ||
| 6035 | |||
| 6036 | /* Notify userspace the event is going away. */ | ||
| 6037 | eventfd_signal(event->eventfd, 1); | ||
| 6038 | |||
| 6039 | eventfd_ctx_put(event->eventfd); | ||
| 6040 | kfree(event); | ||
| 6041 | css_put(&memcg->css); | ||
| 6042 | } | ||
| 6043 | |||
| 6044 | /* | ||
| 6045 | * Gets called on POLLHUP on eventfd when user closes it. | ||
| 6046 | * | ||
| 6047 | * Called with wqh->lock held and interrupts disabled. | ||
| 6048 | */ | ||
| 6049 | static int memcg_event_wake(wait_queue_t *wait, unsigned mode, | ||
| 6050 | int sync, void *key) | ||
| 6051 | { | ||
| 6052 | struct mem_cgroup_event *event = | ||
| 6053 | container_of(wait, struct mem_cgroup_event, wait); | ||
| 6054 | struct mem_cgroup *memcg = event->memcg; | ||
| 6055 | unsigned long flags = (unsigned long)key; | ||
| 6056 | |||
| 6057 | if (flags & POLLHUP) { | ||
| 6058 | /* | ||
| 6059 | * If the event has been detached at cgroup removal, we | ||
| 6060 | * can simply return knowing the other side will cleanup | ||
| 6061 | * for us. | ||
| 6062 | * | ||
| 6063 | * We can't race against event freeing since the other | ||
| 6064 | * side will require wqh->lock via remove_wait_queue(), | ||
| 6065 | * which we hold. | ||
| 6066 | */ | ||
| 6067 | spin_lock(&memcg->event_list_lock); | ||
| 6068 | if (!list_empty(&event->list)) { | ||
| 6069 | list_del_init(&event->list); | ||
| 6070 | /* | ||
| 6071 | * We are in atomic context, but cgroup_event_remove() | ||
| 6072 | * may sleep, so we have to call it in workqueue. | ||
| 6073 | */ | ||
| 6074 | schedule_work(&event->remove); | ||
| 6075 | } | ||
| 6076 | spin_unlock(&memcg->event_list_lock); | ||
| 6077 | } | ||
| 6078 | |||
| 6079 | return 0; | ||
| 6080 | } | ||
| 6081 | |||
| 6082 | static void memcg_event_ptable_queue_proc(struct file *file, | ||
| 6083 | wait_queue_head_t *wqh, poll_table *pt) | ||
| 6084 | { | ||
| 6085 | struct mem_cgroup_event *event = | ||
| 6086 | container_of(pt, struct mem_cgroup_event, pt); | ||
| 6087 | |||
| 6088 | event->wqh = wqh; | ||
| 6089 | add_wait_queue(wqh, &event->wait); | ||
| 6090 | } | ||
| 6091 | |||
| 6092 | /* | ||
| 6093 | * DO NOT USE IN NEW FILES. | ||
| 6094 | * | ||
| 6095 | * Parse input and register new cgroup event handler. | ||
| 6096 | * | ||
| 6097 | * Input must be in format '<event_fd> <control_fd> <args>'. | ||
| 6098 | * Interpretation of args is defined by control file implementation. | ||
| 6099 | */ | ||
| 6100 | static int memcg_write_event_control(struct cgroup_subsys_state *css, | ||
| 6101 | struct cftype *cft, const char *buffer) | ||
| 6102 | { | ||
| 6103 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
| 6104 | struct mem_cgroup_event *event; | ||
| 6105 | struct cgroup_subsys_state *cfile_css; | ||
| 6106 | unsigned int efd, cfd; | ||
| 6107 | struct fd efile; | ||
| 6108 | struct fd cfile; | ||
| 6109 | const char *name; | ||
| 6110 | char *endp; | ||
| 6111 | int ret; | ||
| 6112 | |||
| 6113 | efd = simple_strtoul(buffer, &endp, 10); | ||
| 6114 | if (*endp != ' ') | ||
| 6115 | return -EINVAL; | ||
| 6116 | buffer = endp + 1; | ||
| 6117 | |||
| 6118 | cfd = simple_strtoul(buffer, &endp, 10); | ||
| 6119 | if ((*endp != ' ') && (*endp != '\0')) | ||
| 6120 | return -EINVAL; | ||
| 6121 | buffer = endp + 1; | ||
| 6122 | |||
| 6123 | event = kzalloc(sizeof(*event), GFP_KERNEL); | ||
| 6124 | if (!event) | ||
| 6125 | return -ENOMEM; | ||
| 6126 | |||
| 6127 | event->memcg = memcg; | ||
| 6128 | INIT_LIST_HEAD(&event->list); | ||
| 6129 | init_poll_funcptr(&event->pt, memcg_event_ptable_queue_proc); | ||
| 6130 | init_waitqueue_func_entry(&event->wait, memcg_event_wake); | ||
| 6131 | INIT_WORK(&event->remove, memcg_event_remove); | ||
| 6132 | |||
| 6133 | efile = fdget(efd); | ||
| 6134 | if (!efile.file) { | ||
| 6135 | ret = -EBADF; | ||
| 6136 | goto out_kfree; | ||
| 6137 | } | ||
| 6138 | |||
| 6139 | event->eventfd = eventfd_ctx_fileget(efile.file); | ||
| 6140 | if (IS_ERR(event->eventfd)) { | ||
| 6141 | ret = PTR_ERR(event->eventfd); | ||
| 6142 | goto out_put_efile; | ||
| 6143 | } | ||
| 6144 | |||
| 6145 | cfile = fdget(cfd); | ||
| 6146 | if (!cfile.file) { | ||
| 6147 | ret = -EBADF; | ||
| 6148 | goto out_put_eventfd; | ||
| 6149 | } | ||
| 6150 | |||
| 6151 | /* the process need read permission on control file */ | ||
| 6152 | /* AV: shouldn't we check that it's been opened for read instead? */ | ||
| 6153 | ret = inode_permission(file_inode(cfile.file), MAY_READ); | ||
| 6154 | if (ret < 0) | ||
| 6155 | goto out_put_cfile; | ||
| 6156 | |||
| 6157 | /* | ||
| 6158 | * Determine the event callbacks and set them in @event. This used | ||
| 6159 | * to be done via struct cftype but cgroup core no longer knows | ||
| 6160 | * about these events. The following is crude but the whole thing | ||
| 6161 | * is for compatibility anyway. | ||
| 6162 | * | ||
| 6163 | * DO NOT ADD NEW FILES. | ||
| 6164 | */ | ||
| 6165 | name = cfile.file->f_dentry->d_name.name; | ||
| 6166 | |||
| 6167 | if (!strcmp(name, "memory.usage_in_bytes")) { | ||
| 6168 | event->register_event = mem_cgroup_usage_register_event; | ||
| 6169 | event->unregister_event = mem_cgroup_usage_unregister_event; | ||
| 6170 | } else if (!strcmp(name, "memory.oom_control")) { | ||
| 6171 | event->register_event = mem_cgroup_oom_register_event; | ||
| 6172 | event->unregister_event = mem_cgroup_oom_unregister_event; | ||
| 6173 | } else if (!strcmp(name, "memory.pressure_level")) { | ||
| 6174 | event->register_event = vmpressure_register_event; | ||
| 6175 | event->unregister_event = vmpressure_unregister_event; | ||
| 6176 | } else if (!strcmp(name, "memory.memsw.usage_in_bytes")) { | ||
| 6177 | event->register_event = memsw_cgroup_usage_register_event; | ||
| 6178 | event->unregister_event = memsw_cgroup_usage_unregister_event; | ||
| 6179 | } else { | ||
| 6180 | ret = -EINVAL; | ||
| 6181 | goto out_put_cfile; | ||
| 6182 | } | ||
| 6183 | |||
| 6184 | /* | ||
| 6185 | * Verify @cfile should belong to @css. Also, remaining events are | ||
| 6186 | * automatically removed on cgroup destruction but the removal is | ||
| 6187 | * asynchronous, so take an extra ref on @css. | ||
| 6188 | */ | ||
| 6189 | rcu_read_lock(); | ||
| 6190 | |||
| 6191 | ret = -EINVAL; | ||
| 6192 | cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, | ||
| 6193 | &mem_cgroup_subsys); | ||
| 6194 | if (cfile_css == css && css_tryget(css)) | ||
| 6195 | ret = 0; | ||
| 6196 | |||
| 6197 | rcu_read_unlock(); | ||
| 6198 | if (ret) | ||
| 6199 | goto out_put_cfile; | ||
| 6200 | |||
| 6201 | ret = event->register_event(memcg, event->eventfd, buffer); | ||
| 6202 | if (ret) | ||
| 6203 | goto out_put_css; | ||
| 6204 | |||
| 6205 | efile.file->f_op->poll(efile.file, &event->pt); | ||
| 6206 | |||
| 6207 | spin_lock(&memcg->event_list_lock); | ||
| 6208 | list_add(&event->list, &memcg->event_list); | ||
| 6209 | spin_unlock(&memcg->event_list_lock); | ||
| 6210 | |||
| 6211 | fdput(cfile); | ||
| 6212 | fdput(efile); | ||
| 6213 | |||
| 6214 | return 0; | ||
| 6215 | |||
| 6216 | out_put_css: | ||
| 6217 | css_put(css); | ||
| 6218 | out_put_cfile: | ||
| 6219 | fdput(cfile); | ||
| 6220 | out_put_eventfd: | ||
| 6221 | eventfd_ctx_put(event->eventfd); | ||
| 6222 | out_put_efile: | ||
| 6223 | fdput(efile); | ||
| 6224 | out_kfree: | ||
| 6225 | kfree(event); | ||
| 6226 | |||
| 6227 | return ret; | ||
| 6228 | } | ||
| 6229 | |||
| 5965 | static struct cftype mem_cgroup_files[] = { | 6230 | static struct cftype mem_cgroup_files[] = { |
| 5966 | { | 6231 | { |
| 5967 | .name = "usage_in_bytes", | 6232 | .name = "usage_in_bytes", |
| 5968 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), | 6233 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), |
| 5969 | .read = mem_cgroup_read, | 6234 | .read_u64 = mem_cgroup_read_u64, |
| 5970 | .register_event = mem_cgroup_usage_register_event, | ||
| 5971 | .unregister_event = mem_cgroup_usage_unregister_event, | ||
| 5972 | }, | 6235 | }, |
| 5973 | { | 6236 | { |
| 5974 | .name = "max_usage_in_bytes", | 6237 | .name = "max_usage_in_bytes", |
| 5975 | .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), | 6238 | .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), |
| 5976 | .trigger = mem_cgroup_reset, | 6239 | .trigger = mem_cgroup_reset, |
| 5977 | .read = mem_cgroup_read, | 6240 | .read_u64 = mem_cgroup_read_u64, |
| 5978 | }, | 6241 | }, |
| 5979 | { | 6242 | { |
| 5980 | .name = "limit_in_bytes", | 6243 | .name = "limit_in_bytes", |
| 5981 | .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), | 6244 | .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), |
| 5982 | .write_string = mem_cgroup_write, | 6245 | .write_string = mem_cgroup_write, |
| 5983 | .read = mem_cgroup_read, | 6246 | .read_u64 = mem_cgroup_read_u64, |
| 5984 | }, | 6247 | }, |
| 5985 | { | 6248 | { |
| 5986 | .name = "soft_limit_in_bytes", | 6249 | .name = "soft_limit_in_bytes", |
| 5987 | .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), | 6250 | .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), |
| 5988 | .write_string = mem_cgroup_write, | 6251 | .write_string = mem_cgroup_write, |
| 5989 | .read = mem_cgroup_read, | 6252 | .read_u64 = mem_cgroup_read_u64, |
| 5990 | }, | 6253 | }, |
| 5991 | { | 6254 | { |
| 5992 | .name = "failcnt", | 6255 | .name = "failcnt", |
| 5993 | .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), | 6256 | .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), |
| 5994 | .trigger = mem_cgroup_reset, | 6257 | .trigger = mem_cgroup_reset, |
| 5995 | .read = mem_cgroup_read, | 6258 | .read_u64 = mem_cgroup_read_u64, |
| 5996 | }, | 6259 | }, |
| 5997 | { | 6260 | { |
| 5998 | .name = "stat", | 6261 | .name = "stat", |
| 5999 | .read_seq_string = memcg_stat_show, | 6262 | .seq_show = memcg_stat_show, |
| 6000 | }, | 6263 | }, |
| 6001 | { | 6264 | { |
| 6002 | .name = "force_empty", | 6265 | .name = "force_empty", |
| @@ -6009,6 +6272,12 @@ static struct cftype mem_cgroup_files[] = { | |||
| 6009 | .read_u64 = mem_cgroup_hierarchy_read, | 6272 | .read_u64 = mem_cgroup_hierarchy_read, |
| 6010 | }, | 6273 | }, |
| 6011 | { | 6274 | { |
| 6275 | .name = "cgroup.event_control", /* XXX: for compat */ | ||
| 6276 | .write_string = memcg_write_event_control, | ||
| 6277 | .flags = CFTYPE_NO_PREFIX, | ||
| 6278 | .mode = S_IWUGO, | ||
| 6279 | }, | ||
| 6280 | { | ||
| 6012 | .name = "swappiness", | 6281 | .name = "swappiness", |
| 6013 | .read_u64 = mem_cgroup_swappiness_read, | 6282 | .read_u64 = mem_cgroup_swappiness_read, |
| 6014 | .write_u64 = mem_cgroup_swappiness_write, | 6283 | .write_u64 = mem_cgroup_swappiness_write, |
| @@ -6020,21 +6289,17 @@ static struct cftype mem_cgroup_files[] = { | |||
| 6020 | }, | 6289 | }, |
| 6021 | { | 6290 | { |
| 6022 | .name = "oom_control", | 6291 | .name = "oom_control", |
| 6023 | .read_map = mem_cgroup_oom_control_read, | 6292 | .seq_show = mem_cgroup_oom_control_read, |
| 6024 | .write_u64 = mem_cgroup_oom_control_write, | 6293 | .write_u64 = mem_cgroup_oom_control_write, |
| 6025 | .register_event = mem_cgroup_oom_register_event, | ||
| 6026 | .unregister_event = mem_cgroup_oom_unregister_event, | ||
| 6027 | .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), | 6294 | .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), |
| 6028 | }, | 6295 | }, |
| 6029 | { | 6296 | { |
| 6030 | .name = "pressure_level", | 6297 | .name = "pressure_level", |
| 6031 | .register_event = vmpressure_register_event, | ||
| 6032 | .unregister_event = vmpressure_unregister_event, | ||
| 6033 | }, | 6298 | }, |
| 6034 | #ifdef CONFIG_NUMA | 6299 | #ifdef CONFIG_NUMA |
| 6035 | { | 6300 | { |
| 6036 | .name = "numa_stat", | 6301 | .name = "numa_stat", |
| 6037 | .read_seq_string = memcg_numa_stat_show, | 6302 | .seq_show = memcg_numa_stat_show, |
| 6038 | }, | 6303 | }, |
| 6039 | #endif | 6304 | #endif |
| 6040 | #ifdef CONFIG_MEMCG_KMEM | 6305 | #ifdef CONFIG_MEMCG_KMEM |
| @@ -6042,29 +6307,29 @@ static struct cftype mem_cgroup_files[] = { | |||
| 6042 | .name = "kmem.limit_in_bytes", | 6307 | .name = "kmem.limit_in_bytes", |
| 6043 | .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), | 6308 | .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), |
| 6044 | .write_string = mem_cgroup_write, | 6309 | .write_string = mem_cgroup_write, |
| 6045 | .read = mem_cgroup_read, | 6310 | .read_u64 = mem_cgroup_read_u64, |
| 6046 | }, | 6311 | }, |
| 6047 | { | 6312 | { |
| 6048 | .name = "kmem.usage_in_bytes", | 6313 | .name = "kmem.usage_in_bytes", |
| 6049 | .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), | 6314 | .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), |
| 6050 | .read = mem_cgroup_read, | 6315 | .read_u64 = mem_cgroup_read_u64, |
| 6051 | }, | 6316 | }, |
| 6052 | { | 6317 | { |
| 6053 | .name = "kmem.failcnt", | 6318 | .name = "kmem.failcnt", |
| 6054 | .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), | 6319 | .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), |
| 6055 | .trigger = mem_cgroup_reset, | 6320 | .trigger = mem_cgroup_reset, |
| 6056 | .read = mem_cgroup_read, | 6321 | .read_u64 = mem_cgroup_read_u64, |
| 6057 | }, | 6322 | }, |
| 6058 | { | 6323 | { |
| 6059 | .name = "kmem.max_usage_in_bytes", | 6324 | .name = "kmem.max_usage_in_bytes", |
| 6060 | .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), | 6325 | .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), |
| 6061 | .trigger = mem_cgroup_reset, | 6326 | .trigger = mem_cgroup_reset, |
| 6062 | .read = mem_cgroup_read, | 6327 | .read_u64 = mem_cgroup_read_u64, |
| 6063 | }, | 6328 | }, |
| 6064 | #ifdef CONFIG_SLABINFO | 6329 | #ifdef CONFIG_SLABINFO |
| 6065 | { | 6330 | { |
| 6066 | .name = "kmem.slabinfo", | 6331 | .name = "kmem.slabinfo", |
| 6067 | .read_seq_string = mem_cgroup_slabinfo_read, | 6332 | .seq_show = mem_cgroup_slabinfo_read, |
| 6068 | }, | 6333 | }, |
| 6069 | #endif | 6334 | #endif |
| 6070 | #endif | 6335 | #endif |
| @@ -6076,27 +6341,25 @@ static struct cftype memsw_cgroup_files[] = { | |||
| 6076 | { | 6341 | { |
| 6077 | .name = "memsw.usage_in_bytes", | 6342 | .name = "memsw.usage_in_bytes", |
| 6078 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), | 6343 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), |
| 6079 | .read = mem_cgroup_read, | 6344 | .read_u64 = mem_cgroup_read_u64, |
| 6080 | .register_event = mem_cgroup_usage_register_event, | ||
| 6081 | .unregister_event = mem_cgroup_usage_unregister_event, | ||
| 6082 | }, | 6345 | }, |
| 6083 | { | 6346 | { |
| 6084 | .name = "memsw.max_usage_in_bytes", | 6347 | .name = "memsw.max_usage_in_bytes", |
| 6085 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), | 6348 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), |
| 6086 | .trigger = mem_cgroup_reset, | 6349 | .trigger = mem_cgroup_reset, |
| 6087 | .read = mem_cgroup_read, | 6350 | .read_u64 = mem_cgroup_read_u64, |
| 6088 | }, | 6351 | }, |
| 6089 | { | 6352 | { |
| 6090 | .name = "memsw.limit_in_bytes", | 6353 | .name = "memsw.limit_in_bytes", |
| 6091 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), | 6354 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), |
| 6092 | .write_string = mem_cgroup_write, | 6355 | .write_string = mem_cgroup_write, |
| 6093 | .read = mem_cgroup_read, | 6356 | .read_u64 = mem_cgroup_read_u64, |
| 6094 | }, | 6357 | }, |
| 6095 | { | 6358 | { |
| 6096 | .name = "memsw.failcnt", | 6359 | .name = "memsw.failcnt", |
| 6097 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), | 6360 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), |
| 6098 | .trigger = mem_cgroup_reset, | 6361 | .trigger = mem_cgroup_reset, |
| 6099 | .read = mem_cgroup_read, | 6362 | .read_u64 = mem_cgroup_read_u64, |
| 6100 | }, | 6363 | }, |
| 6101 | { }, /* terminate */ | 6364 | { }, /* terminate */ |
| 6102 | }; | 6365 | }; |
| @@ -6268,6 +6531,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | |||
| 6268 | mutex_init(&memcg->thresholds_lock); | 6531 | mutex_init(&memcg->thresholds_lock); |
| 6269 | spin_lock_init(&memcg->move_lock); | 6532 | spin_lock_init(&memcg->move_lock); |
| 6270 | vmpressure_init(&memcg->vmpressure); | 6533 | vmpressure_init(&memcg->vmpressure); |
| 6534 | INIT_LIST_HEAD(&memcg->event_list); | ||
| 6535 | spin_lock_init(&memcg->event_list_lock); | ||
| 6271 | 6536 | ||
| 6272 | return &memcg->css; | 6537 | return &memcg->css; |
| 6273 | 6538 | ||
| @@ -6343,6 +6608,19 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) | |||
| 6343 | static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | 6608 | static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) |
| 6344 | { | 6609 | { |
| 6345 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 6610 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
| 6611 | struct mem_cgroup_event *event, *tmp; | ||
| 6612 | |||
| 6613 | /* | ||
| 6614 | * Unregister events and notify userspace. | ||
| 6615 | * Notify userspace about cgroup removing only after rmdir of cgroup | ||
| 6616 | * directory to avoid race between userspace and kernelspace. | ||
| 6617 | */ | ||
| 6618 | spin_lock(&memcg->event_list_lock); | ||
| 6619 | list_for_each_entry_safe(event, tmp, &memcg->event_list, list) { | ||
| 6620 | list_del_init(&event->list); | ||
| 6621 | schedule_work(&event->remove); | ||
| 6622 | } | ||
| 6623 | spin_unlock(&memcg->event_list_lock); | ||
| 6346 | 6624 | ||
| 6347 | kmem_cgroup_css_offline(memcg); | 6625 | kmem_cgroup_css_offline(memcg); |
| 6348 | 6626 | ||
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 6d757e3a872a..3bd0b8e6ab12 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
| @@ -451,7 +451,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) | |||
| 451 | * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry | 451 | * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry |
| 452 | * @ent: swap entry to be looked up. | 452 | * @ent: swap entry to be looked up. |
| 453 | * | 453 | * |
| 454 | * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) | 454 | * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) |
| 455 | */ | 455 | */ |
| 456 | unsigned short lookup_swap_cgroup_id(swp_entry_t ent) | 456 | unsigned short lookup_swap_cgroup_id(swp_entry_t ent) |
| 457 | { | 457 | { |
diff --git a/mm/vmpressure.c b/mm/vmpressure.c index e0f62837c3f4..196970a4541f 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c | |||
| @@ -278,8 +278,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) | |||
| 278 | 278 | ||
| 279 | /** | 279 | /** |
| 280 | * vmpressure_register_event() - Bind vmpressure notifications to an eventfd | 280 | * vmpressure_register_event() - Bind vmpressure notifications to an eventfd |
| 281 | * @css: css that is interested in vmpressure notifications | 281 | * @memcg: memcg that is interested in vmpressure notifications |
| 282 | * @cft: cgroup control files handle | ||
| 283 | * @eventfd: eventfd context to link notifications with | 282 | * @eventfd: eventfd context to link notifications with |
| 284 | * @args: event arguments (used to set up a pressure level threshold) | 283 | * @args: event arguments (used to set up a pressure level threshold) |
| 285 | * | 284 | * |
| @@ -289,15 +288,12 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) | |||
| 289 | * threshold (one of vmpressure_str_levels, i.e. "low", "medium", or | 288 | * threshold (one of vmpressure_str_levels, i.e. "low", "medium", or |
| 290 | * "critical"). | 289 | * "critical"). |
| 291 | * | 290 | * |
| 292 | * This function should not be used directly, just pass it to (struct | 291 | * To be used as memcg event method. |
| 293 | * cftype).register_event, and then cgroup core will handle everything by | ||
| 294 | * itself. | ||
| 295 | */ | 292 | */ |
| 296 | int vmpressure_register_event(struct cgroup_subsys_state *css, | 293 | int vmpressure_register_event(struct mem_cgroup *memcg, |
| 297 | struct cftype *cft, struct eventfd_ctx *eventfd, | 294 | struct eventfd_ctx *eventfd, const char *args) |
| 298 | const char *args) | ||
| 299 | { | 295 | { |
| 300 | struct vmpressure *vmpr = css_to_vmpressure(css); | 296 | struct vmpressure *vmpr = memcg_to_vmpressure(memcg); |
| 301 | struct vmpressure_event *ev; | 297 | struct vmpressure_event *ev; |
| 302 | int level; | 298 | int level; |
| 303 | 299 | ||
| @@ -325,23 +321,19 @@ int vmpressure_register_event(struct cgroup_subsys_state *css, | |||
| 325 | 321 | ||
| 326 | /** | 322 | /** |
| 327 | * vmpressure_unregister_event() - Unbind eventfd from vmpressure | 323 | * vmpressure_unregister_event() - Unbind eventfd from vmpressure |
| 328 | * @css: css handle | 324 | * @memcg: memcg handle |
| 329 | * @cft: cgroup control files handle | ||
| 330 | * @eventfd: eventfd context that was used to link vmpressure with the @cg | 325 | * @eventfd: eventfd context that was used to link vmpressure with the @cg |
| 331 | * | 326 | * |
| 332 | * This function does internal manipulations to detach the @eventfd from | 327 | * This function does internal manipulations to detach the @eventfd from |
| 333 | * the vmpressure notifications, and then frees internal resources | 328 | * the vmpressure notifications, and then frees internal resources |
| 334 | * associated with the @eventfd (but the @eventfd itself is not freed). | 329 | * associated with the @eventfd (but the @eventfd itself is not freed). |
| 335 | * | 330 | * |
| 336 | * This function should not be used directly, just pass it to (struct | 331 | * To be used as memcg event method. |
| 337 | * cftype).unregister_event, and then cgroup core will handle everything | ||
| 338 | * by itself. | ||
| 339 | */ | 332 | */ |
| 340 | void vmpressure_unregister_event(struct cgroup_subsys_state *css, | 333 | void vmpressure_unregister_event(struct mem_cgroup *memcg, |
| 341 | struct cftype *cft, | ||
| 342 | struct eventfd_ctx *eventfd) | 334 | struct eventfd_ctx *eventfd) |
| 343 | { | 335 | { |
| 344 | struct vmpressure *vmpr = css_to_vmpressure(css); | 336 | struct vmpressure *vmpr = memcg_to_vmpressure(memcg); |
| 345 | struct vmpressure_event *ev; | 337 | struct vmpressure_event *ev; |
| 346 | 338 | ||
| 347 | mutex_lock(&vmpr->events_lock); | 339 | mutex_lock(&vmpr->events_lock); |
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 9b7cf6c85f82..56cbb69ba024 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c | |||
| @@ -173,14 +173,14 @@ static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft) | |||
| 173 | return css->cgroup->id; | 173 | return css->cgroup->id; |
| 174 | } | 174 | } |
| 175 | 175 | ||
| 176 | static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft, | 176 | static int read_priomap(struct seq_file *sf, void *v) |
| 177 | struct cgroup_map_cb *cb) | ||
| 178 | { | 177 | { |
| 179 | struct net_device *dev; | 178 | struct net_device *dev; |
| 180 | 179 | ||
| 181 | rcu_read_lock(); | 180 | rcu_read_lock(); |
| 182 | for_each_netdev_rcu(&init_net, dev) | 181 | for_each_netdev_rcu(&init_net, dev) |
| 183 | cb->fill(cb, dev->name, netprio_prio(css, dev)); | 182 | seq_printf(sf, "%s %u\n", dev->name, |
| 183 | netprio_prio(seq_css(sf), dev)); | ||
| 184 | rcu_read_unlock(); | 184 | rcu_read_unlock(); |
| 185 | return 0; | 185 | return 0; |
| 186 | } | 186 | } |
| @@ -238,7 +238,7 @@ static struct cftype ss_files[] = { | |||
| 238 | }, | 238 | }, |
| 239 | { | 239 | { |
| 240 | .name = "ifpriomap", | 240 | .name = "ifpriomap", |
| 241 | .read_map = read_priomap, | 241 | .seq_show = read_priomap, |
| 242 | .write_string = write_priomap, | 242 | .write_string = write_priomap, |
| 243 | }, | 243 | }, |
| 244 | { } /* terminate */ | 244 | { } /* terminate */ |
diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 7c2a0a71049e..d3b6d2cd3a06 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c | |||
| @@ -274,10 +274,9 @@ static void set_majmin(char *str, unsigned m) | |||
| 274 | sprintf(str, "%u", m); | 274 | sprintf(str, "%u", m); |
| 275 | } | 275 | } |
| 276 | 276 | ||
| 277 | static int devcgroup_seq_read(struct cgroup_subsys_state *css, | 277 | static int devcgroup_seq_show(struct seq_file *m, void *v) |
| 278 | struct cftype *cft, struct seq_file *m) | ||
| 279 | { | 278 | { |
| 280 | struct dev_cgroup *devcgroup = css_to_devcgroup(css); | 279 | struct dev_cgroup *devcgroup = css_to_devcgroup(seq_css(m)); |
| 281 | struct dev_exception_item *ex; | 280 | struct dev_exception_item *ex; |
| 282 | char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; | 281 | char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; |
| 283 | 282 | ||
| @@ -679,7 +678,7 @@ static struct cftype dev_cgroup_files[] = { | |||
| 679 | }, | 678 | }, |
| 680 | { | 679 | { |
| 681 | .name = "list", | 680 | .name = "list", |
| 682 | .read_seq_string = devcgroup_seq_read, | 681 | .seq_show = devcgroup_seq_show, |
| 683 | .private = DEVCG_LIST, | 682 | .private = DEVCG_LIST, |
| 684 | }, | 683 | }, |
| 685 | { } /* terminate */ | 684 | { } /* terminate */ |
