diff options
58 files changed, 2045 insertions, 1660 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 638bf17ff869..821de56d1580 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -24,7 +24,6 @@ CONTENTS: | |||
24 | 2.1 Basic Usage | 24 | 2.1 Basic Usage |
25 | 2.2 Attaching processes | 25 | 2.2 Attaching processes |
26 | 2.3 Mounting hierarchies by name | 26 | 2.3 Mounting hierarchies by name |
27 | 2.4 Notification API | ||
28 | 3. Kernel API | 27 | 3. Kernel API |
29 | 3.1 Overview | 28 | 3.1 Overview |
30 | 3.2 Synchronization | 29 | 3.2 Synchronization |
@@ -472,25 +471,6 @@ you give a subsystem a name. | |||
472 | The name of the subsystem appears as part of the hierarchy description | 471 | The name of the subsystem appears as part of the hierarchy description |
473 | in /proc/mounts and /proc/<pid>/cgroups. | 472 | in /proc/mounts and /proc/<pid>/cgroups. |
474 | 473 | ||
475 | 2.4 Notification API | ||
476 | -------------------- | ||
477 | |||
478 | There is mechanism which allows to get notifications about changing | ||
479 | status of a cgroup. | ||
480 | |||
481 | To register a new notification handler you need to: | ||
482 | - create a file descriptor for event notification using eventfd(2); | ||
483 | - open a control file to be monitored (e.g. memory.usage_in_bytes); | ||
484 | - write "<event_fd> <control_fd> <args>" to cgroup.event_control. | ||
485 | Interpretation of args is defined by control file implementation; | ||
486 | |||
487 | eventfd will be woken up by control file implementation or when the | ||
488 | cgroup is removed. | ||
489 | |||
490 | To unregister a notification handler just close eventfd. | ||
491 | |||
492 | NOTE: Support of notifications should be implemented for the control | ||
493 | file. See documentation for the subsystem. | ||
494 | 474 | ||
495 | 3. Kernel API | 475 | 3. Kernel API |
496 | ============= | 476 | ============= |
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index e2bc132608fd..2622115276aa 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt | |||
@@ -577,7 +577,7 @@ Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable" | |||
577 | per-node page counts including "hierarchical_<counter>" which sums up all | 577 | per-node page counts including "hierarchical_<counter>" which sums up all |
578 | hierarchical children's values in addition to the memcg's own value. | 578 | hierarchical children's values in addition to the memcg's own value. |
579 | 579 | ||
580 | The ouput format of memory.numa_stat is: | 580 | The output format of memory.numa_stat is: |
581 | 581 | ||
582 | total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ... | 582 | total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ... |
583 | file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ... | 583 | file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ... |
@@ -670,7 +670,7 @@ page tables. | |||
670 | 670 | ||
671 | 8.1 Interface | 671 | 8.1 Interface |
672 | 672 | ||
673 | This feature is disabled by default. It can be enabledi (and disabled again) by | 673 | This feature is disabled by default. It can be enabled (and disabled again) by |
674 | writing to memory.move_charge_at_immigrate of the destination cgroup. | 674 | writing to memory.move_charge_at_immigrate of the destination cgroup. |
675 | 675 | ||
676 | If you want to enable it: | 676 | If you want to enable it: |
diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt index c4d99ed0b418..52e1da16a309 100644 --- a/Documentation/cgroups/resource_counter.txt +++ b/Documentation/cgroups/resource_counter.txt | |||
@@ -97,8 +97,8 @@ to work with it. | |||
97 | (struct res_counter *rc, struct res_counter *top, | 97 | (struct res_counter *rc, struct res_counter *top, |
98 | unsinged long val) | 98 | unsinged long val) |
99 | 99 | ||
100 | Almost same as res_cunter_uncharge() but propagation of uncharge | 100 | Almost same as res_counter_uncharge() but propagation of uncharge |
101 | stops when rc == top. This is useful when kill a res_coutner in | 101 | stops when rc == top. This is useful when kill a res_counter in |
102 | child cgroup. | 102 | child cgroup. |
103 | 103 | ||
104 | 2.1 Other accounting routines | 104 | 2.1 Other accounting routines |
diff --git a/Documentation/devicetree/bindings/ata/marvell.txt b/Documentation/devicetree/bindings/ata/marvell.txt index b5cdd20cde9c..1c8351604d38 100644 --- a/Documentation/devicetree/bindings/ata/marvell.txt +++ b/Documentation/devicetree/bindings/ata/marvell.txt | |||
@@ -1,7 +1,7 @@ | |||
1 | * Marvell Orion SATA | 1 | * Marvell Orion SATA |
2 | 2 | ||
3 | Required Properties: | 3 | Required Properties: |
4 | - compatibility : "marvell,orion-sata" | 4 | - compatibility : "marvell,orion-sata" or "marvell,armada-370-sata" |
5 | - reg : Address range of controller | 5 | - reg : Address range of controller |
6 | - interrupts : Interrupt controller is using | 6 | - interrupts : Interrupt controller is using |
7 | - nr-ports : Number of SATA ports in use. | 7 | - nr-ports : Number of SATA ports in use. |
diff --git a/Documentation/devicetree/bindings/ata/sata_rcar.txt b/Documentation/devicetree/bindings/ata/sata_rcar.txt new file mode 100644 index 000000000000..1e6111333fa8 --- /dev/null +++ b/Documentation/devicetree/bindings/ata/sata_rcar.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | * Renesas R-Car SATA | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : should contain one of the following: | ||
5 | - "renesas,sata-r8a7779" for R-Car H1 | ||
6 | - "renesas,sata-r8a7790" for R-Car H2 | ||
7 | - "renesas,sata-r8a7791" for R-Car M2 | ||
8 | - reg : address and length of the SATA registers; | ||
9 | - interrupts : must consist of one interrupt specifier. | ||
10 | |||
11 | Example: | ||
12 | |||
13 | sata: sata@fc600000 { | ||
14 | compatible = "renesas,sata-r8a7779"; | ||
15 | reg = <0xfc600000 0x2000>; | ||
16 | interrupt-parent = <&gic>; | ||
17 | interrupts = <0 100 IRQ_TYPE_LEVEL_HIGH>; | ||
18 | }; | ||
diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi index 7f10f627ae5b..80ffacd128f8 100644 --- a/arch/arm/boot/dts/armada-370-xp.dtsi +++ b/arch/arm/boot/dts/armada-370-xp.dtsi | |||
@@ -152,7 +152,7 @@ | |||
152 | }; | 152 | }; |
153 | 153 | ||
154 | sata@a0000 { | 154 | sata@a0000 { |
155 | compatible = "marvell,orion-sata"; | 155 | compatible = "marvell,armada-370-sata"; |
156 | reg = <0xa0000 0x5000>; | 156 | reg = <0xa0000 0x5000>; |
157 | interrupts = <55>; | 157 | interrupts = <55>; |
158 | clocks = <&gateclk 15>, <&gateclk 30>; | 158 | clocks = <&gateclk 15>, <&gateclk 30>; |
diff --git a/arch/arm/plat-samsung/include/plat/regs-ata.h b/arch/arm/plat-samsung/include/plat/regs-ata.h deleted file mode 100644 index f5df92fdae26..000000000000 --- a/arch/arm/plat-samsung/include/plat/regs-ata.h +++ /dev/null | |||
@@ -1,56 +0,0 @@ | |||
1 | /* linux/arch/arm/plat-samsung/include/plat/regs-ata.h | ||
2 | * | ||
3 | * Copyright (c) 2010 Samsung Electronics Co., Ltd. | ||
4 | * http://www.samsung.com | ||
5 | * | ||
6 | * Samsung CF-ATA register definitions | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #ifndef __ASM_PLAT_REGS_ATA_H | ||
14 | #define __ASM_PLAT_REGS_ATA_H __FILE__ | ||
15 | |||
16 | #define S3C_CFATA_REG(x) (x) | ||
17 | |||
18 | #define S3C_CFATA_MUX S3C_CFATA_REG(0x0) | ||
19 | |||
20 | #define S3C_ATA_CTRL S3C_CFATA_REG(0x0) | ||
21 | #define S3C_ATA_STATUS S3C_CFATA_REG(0x4) | ||
22 | #define S3C_ATA_CMD S3C_CFATA_REG(0x8) | ||
23 | #define S3C_ATA_SWRST S3C_CFATA_REG(0xc) | ||
24 | #define S3C_ATA_IRQ S3C_CFATA_REG(0x10) | ||
25 | #define S3C_ATA_IRQ_MSK S3C_CFATA_REG(0x14) | ||
26 | #define S3C_ATA_CFG S3C_CFATA_REG(0x18) | ||
27 | |||
28 | #define S3C_ATA_MDMA_TIME S3C_CFATA_REG(0x28) | ||
29 | #define S3C_ATA_PIO_TIME S3C_CFATA_REG(0x2c) | ||
30 | #define S3C_ATA_UDMA_TIME S3C_CFATA_REG(0x30) | ||
31 | #define S3C_ATA_XFR_NUM S3C_CFATA_REG(0x34) | ||
32 | #define S3C_ATA_XFR_CNT S3C_CFATA_REG(0x38) | ||
33 | #define S3C_ATA_TBUF_START S3C_CFATA_REG(0x3c) | ||
34 | #define S3C_ATA_TBUF_SIZE S3C_CFATA_REG(0x40) | ||
35 | #define S3C_ATA_SBUF_START S3C_CFATA_REG(0x44) | ||
36 | #define S3C_ATA_SBUF_SIZE S3C_CFATA_REG(0x48) | ||
37 | #define S3C_ATA_CADR_TBUF S3C_CFATA_REG(0x4c) | ||
38 | #define S3C_ATA_CADR_SBUF S3C_CFATA_REG(0x50) | ||
39 | #define S3C_ATA_PIO_DTR S3C_CFATA_REG(0x54) | ||
40 | #define S3C_ATA_PIO_FED S3C_CFATA_REG(0x58) | ||
41 | #define S3C_ATA_PIO_SCR S3C_CFATA_REG(0x5c) | ||
42 | #define S3C_ATA_PIO_LLR S3C_CFATA_REG(0x60) | ||
43 | #define S3C_ATA_PIO_LMR S3C_CFATA_REG(0x64) | ||
44 | #define S3C_ATA_PIO_LHR S3C_CFATA_REG(0x68) | ||
45 | #define S3C_ATA_PIO_DVR S3C_CFATA_REG(0x6c) | ||
46 | #define S3C_ATA_PIO_CSD S3C_CFATA_REG(0x70) | ||
47 | #define S3C_ATA_PIO_DAD S3C_CFATA_REG(0x74) | ||
48 | #define S3C_ATA_PIO_READY S3C_CFATA_REG(0x78) | ||
49 | #define S3C_ATA_PIO_RDATA S3C_CFATA_REG(0x7c) | ||
50 | |||
51 | #define S3C_CFATA_MUX_TRUEIDE 0x01 | ||
52 | |||
53 | #define S3C_ATA_CFG_SWAP 0x40 | ||
54 | #define S3C_ATA_CFG_IORDYEN 0x02 | ||
55 | |||
56 | #endif /* __ASM_PLAT_REGS_ATA_H */ | ||
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 06534049afba..a760857e6b62 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -1303,13 +1303,10 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, | |||
1303 | return __blkg_prfill_rwstat(sf, pd, &rwstat); | 1303 | return __blkg_prfill_rwstat(sf, pd, &rwstat); |
1304 | } | 1304 | } |
1305 | 1305 | ||
1306 | static int tg_print_cpu_rwstat(struct cgroup_subsys_state *css, | 1306 | static int tg_print_cpu_rwstat(struct seq_file *sf, void *v) |
1307 | struct cftype *cft, struct seq_file *sf) | ||
1308 | { | 1307 | { |
1309 | struct blkcg *blkcg = css_to_blkcg(css); | 1308 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_cpu_rwstat, |
1310 | 1309 | &blkcg_policy_throtl, seq_cft(sf)->private, true); | |
1311 | blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl, | ||
1312 | cft->private, true); | ||
1313 | return 0; | 1310 | return 0; |
1314 | } | 1311 | } |
1315 | 1312 | ||
@@ -1335,19 +1332,17 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd, | |||
1335 | return __blkg_prfill_u64(sf, pd, v); | 1332 | return __blkg_prfill_u64(sf, pd, v); |
1336 | } | 1333 | } |
1337 | 1334 | ||
1338 | static int tg_print_conf_u64(struct cgroup_subsys_state *css, | 1335 | static int tg_print_conf_u64(struct seq_file *sf, void *v) |
1339 | struct cftype *cft, struct seq_file *sf) | ||
1340 | { | 1336 | { |
1341 | blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_u64, | 1337 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64, |
1342 | &blkcg_policy_throtl, cft->private, false); | 1338 | &blkcg_policy_throtl, seq_cft(sf)->private, false); |
1343 | return 0; | 1339 | return 0; |
1344 | } | 1340 | } |
1345 | 1341 | ||
1346 | static int tg_print_conf_uint(struct cgroup_subsys_state *css, | 1342 | static int tg_print_conf_uint(struct seq_file *sf, void *v) |
1347 | struct cftype *cft, struct seq_file *sf) | ||
1348 | { | 1343 | { |
1349 | blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_uint, | 1344 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint, |
1350 | &blkcg_policy_throtl, cft->private, false); | 1345 | &blkcg_policy_throtl, seq_cft(sf)->private, false); |
1351 | return 0; | 1346 | return 0; |
1352 | } | 1347 | } |
1353 | 1348 | ||
@@ -1428,40 +1423,40 @@ static struct cftype throtl_files[] = { | |||
1428 | { | 1423 | { |
1429 | .name = "throttle.read_bps_device", | 1424 | .name = "throttle.read_bps_device", |
1430 | .private = offsetof(struct throtl_grp, bps[READ]), | 1425 | .private = offsetof(struct throtl_grp, bps[READ]), |
1431 | .read_seq_string = tg_print_conf_u64, | 1426 | .seq_show = tg_print_conf_u64, |
1432 | .write_string = tg_set_conf_u64, | 1427 | .write_string = tg_set_conf_u64, |
1433 | .max_write_len = 256, | 1428 | .max_write_len = 256, |
1434 | }, | 1429 | }, |
1435 | { | 1430 | { |
1436 | .name = "throttle.write_bps_device", | 1431 | .name = "throttle.write_bps_device", |
1437 | .private = offsetof(struct throtl_grp, bps[WRITE]), | 1432 | .private = offsetof(struct throtl_grp, bps[WRITE]), |
1438 | .read_seq_string = tg_print_conf_u64, | 1433 | .seq_show = tg_print_conf_u64, |
1439 | .write_string = tg_set_conf_u64, | 1434 | .write_string = tg_set_conf_u64, |
1440 | .max_write_len = 256, | 1435 | .max_write_len = 256, |
1441 | }, | 1436 | }, |
1442 | { | 1437 | { |
1443 | .name = "throttle.read_iops_device", | 1438 | .name = "throttle.read_iops_device", |
1444 | .private = offsetof(struct throtl_grp, iops[READ]), | 1439 | .private = offsetof(struct throtl_grp, iops[READ]), |
1445 | .read_seq_string = tg_print_conf_uint, | 1440 | .seq_show = tg_print_conf_uint, |
1446 | .write_string = tg_set_conf_uint, | 1441 | .write_string = tg_set_conf_uint, |
1447 | .max_write_len = 256, | 1442 | .max_write_len = 256, |
1448 | }, | 1443 | }, |
1449 | { | 1444 | { |
1450 | .name = "throttle.write_iops_device", | 1445 | .name = "throttle.write_iops_device", |
1451 | .private = offsetof(struct throtl_grp, iops[WRITE]), | 1446 | .private = offsetof(struct throtl_grp, iops[WRITE]), |
1452 | .read_seq_string = tg_print_conf_uint, | 1447 | .seq_show = tg_print_conf_uint, |
1453 | .write_string = tg_set_conf_uint, | 1448 | .write_string = tg_set_conf_uint, |
1454 | .max_write_len = 256, | 1449 | .max_write_len = 256, |
1455 | }, | 1450 | }, |
1456 | { | 1451 | { |
1457 | .name = "throttle.io_service_bytes", | 1452 | .name = "throttle.io_service_bytes", |
1458 | .private = offsetof(struct tg_stats_cpu, service_bytes), | 1453 | .private = offsetof(struct tg_stats_cpu, service_bytes), |
1459 | .read_seq_string = tg_print_cpu_rwstat, | 1454 | .seq_show = tg_print_cpu_rwstat, |
1460 | }, | 1455 | }, |
1461 | { | 1456 | { |
1462 | .name = "throttle.io_serviced", | 1457 | .name = "throttle.io_serviced", |
1463 | .private = offsetof(struct tg_stats_cpu, serviced), | 1458 | .private = offsetof(struct tg_stats_cpu, serviced), |
1464 | .read_seq_string = tg_print_cpu_rwstat, | 1459 | .seq_show = tg_print_cpu_rwstat, |
1465 | }, | 1460 | }, |
1466 | { } /* terminate */ | 1461 | { } /* terminate */ |
1467 | }; | 1462 | }; |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4d5cec1ad80d..744833b630c6 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -1632,11 +1632,11 @@ static u64 cfqg_prfill_weight_device(struct seq_file *sf, | |||
1632 | return __blkg_prfill_u64(sf, pd, cfqg->dev_weight); | 1632 | return __blkg_prfill_u64(sf, pd, cfqg->dev_weight); |
1633 | } | 1633 | } |
1634 | 1634 | ||
1635 | static int cfqg_print_weight_device(struct cgroup_subsys_state *css, | 1635 | static int cfqg_print_weight_device(struct seq_file *sf, void *v) |
1636 | struct cftype *cft, struct seq_file *sf) | ||
1637 | { | 1636 | { |
1638 | blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_weight_device, | 1637 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
1639 | &blkcg_policy_cfq, 0, false); | 1638 | cfqg_prfill_weight_device, &blkcg_policy_cfq, |
1639 | 0, false); | ||
1640 | return 0; | 1640 | return 0; |
1641 | } | 1641 | } |
1642 | 1642 | ||
@@ -1650,26 +1650,23 @@ static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf, | |||
1650 | return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); | 1650 | return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); |
1651 | } | 1651 | } |
1652 | 1652 | ||
1653 | static int cfqg_print_leaf_weight_device(struct cgroup_subsys_state *css, | 1653 | static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v) |
1654 | struct cftype *cft, | ||
1655 | struct seq_file *sf) | ||
1656 | { | 1654 | { |
1657 | blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_leaf_weight_device, | 1655 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
1658 | &blkcg_policy_cfq, 0, false); | 1656 | cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq, |
1657 | 0, false); | ||
1659 | return 0; | 1658 | return 0; |
1660 | } | 1659 | } |
1661 | 1660 | ||
1662 | static int cfq_print_weight(struct cgroup_subsys_state *css, struct cftype *cft, | 1661 | static int cfq_print_weight(struct seq_file *sf, void *v) |
1663 | struct seq_file *sf) | ||
1664 | { | 1662 | { |
1665 | seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_weight); | 1663 | seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight); |
1666 | return 0; | 1664 | return 0; |
1667 | } | 1665 | } |
1668 | 1666 | ||
1669 | static int cfq_print_leaf_weight(struct cgroup_subsys_state *css, | 1667 | static int cfq_print_leaf_weight(struct seq_file *sf, void *v) |
1670 | struct cftype *cft, struct seq_file *sf) | ||
1671 | { | 1668 | { |
1672 | seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_leaf_weight); | 1669 | seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight); |
1673 | return 0; | 1670 | return 0; |
1674 | } | 1671 | } |
1675 | 1672 | ||
@@ -1762,23 +1759,17 @@ static int cfq_set_leaf_weight(struct cgroup_subsys_state *css, | |||
1762 | return __cfq_set_weight(css, cft, val, true); | 1759 | return __cfq_set_weight(css, cft, val, true); |
1763 | } | 1760 | } |
1764 | 1761 | ||
1765 | static int cfqg_print_stat(struct cgroup_subsys_state *css, struct cftype *cft, | 1762 | static int cfqg_print_stat(struct seq_file *sf, void *v) |
1766 | struct seq_file *sf) | ||
1767 | { | 1763 | { |
1768 | struct blkcg *blkcg = css_to_blkcg(css); | 1764 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, |
1769 | 1765 | &blkcg_policy_cfq, seq_cft(sf)->private, false); | |
1770 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq, | ||
1771 | cft->private, false); | ||
1772 | return 0; | 1766 | return 0; |
1773 | } | 1767 | } |
1774 | 1768 | ||
1775 | static int cfqg_print_rwstat(struct cgroup_subsys_state *css, | 1769 | static int cfqg_print_rwstat(struct seq_file *sf, void *v) |
1776 | struct cftype *cft, struct seq_file *sf) | ||
1777 | { | 1770 | { |
1778 | struct blkcg *blkcg = css_to_blkcg(css); | 1771 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, |
1779 | 1772 | &blkcg_policy_cfq, seq_cft(sf)->private, true); | |
1780 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq, | ||
1781 | cft->private, true); | ||
1782 | return 0; | 1773 | return 0; |
1783 | } | 1774 | } |
1784 | 1775 | ||
@@ -1798,23 +1789,19 @@ static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf, | |||
1798 | return __blkg_prfill_rwstat(sf, pd, &sum); | 1789 | return __blkg_prfill_rwstat(sf, pd, &sum); |
1799 | } | 1790 | } |
1800 | 1791 | ||
1801 | static int cfqg_print_stat_recursive(struct cgroup_subsys_state *css, | 1792 | static int cfqg_print_stat_recursive(struct seq_file *sf, void *v) |
1802 | struct cftype *cft, struct seq_file *sf) | ||
1803 | { | 1793 | { |
1804 | struct blkcg *blkcg = css_to_blkcg(css); | 1794 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
1805 | 1795 | cfqg_prfill_stat_recursive, &blkcg_policy_cfq, | |
1806 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive, | 1796 | seq_cft(sf)->private, false); |
1807 | &blkcg_policy_cfq, cft->private, false); | ||
1808 | return 0; | 1797 | return 0; |
1809 | } | 1798 | } |
1810 | 1799 | ||
1811 | static int cfqg_print_rwstat_recursive(struct cgroup_subsys_state *css, | 1800 | static int cfqg_print_rwstat_recursive(struct seq_file *sf, void *v) |
1812 | struct cftype *cft, struct seq_file *sf) | ||
1813 | { | 1801 | { |
1814 | struct blkcg *blkcg = css_to_blkcg(css); | 1802 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
1815 | 1803 | cfqg_prfill_rwstat_recursive, &blkcg_policy_cfq, | |
1816 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive, | 1804 | seq_cft(sf)->private, true); |
1817 | &blkcg_policy_cfq, cft->private, true); | ||
1818 | return 0; | 1805 | return 0; |
1819 | } | 1806 | } |
1820 | 1807 | ||
@@ -1835,13 +1822,11 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, | |||
1835 | } | 1822 | } |
1836 | 1823 | ||
1837 | /* print avg_queue_size */ | 1824 | /* print avg_queue_size */ |
1838 | static int cfqg_print_avg_queue_size(struct cgroup_subsys_state *css, | 1825 | static int cfqg_print_avg_queue_size(struct seq_file *sf, void *v) |
1839 | struct cftype *cft, struct seq_file *sf) | ||
1840 | { | 1826 | { |
1841 | struct blkcg *blkcg = css_to_blkcg(css); | 1827 | blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
1842 | 1828 | cfqg_prfill_avg_queue_size, &blkcg_policy_cfq, | |
1843 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size, | 1829 | 0, false); |
1844 | &blkcg_policy_cfq, 0, false); | ||
1845 | return 0; | 1830 | return 0; |
1846 | } | 1831 | } |
1847 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ | 1832 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ |
@@ -1851,14 +1836,14 @@ static struct cftype cfq_blkcg_files[] = { | |||
1851 | { | 1836 | { |
1852 | .name = "weight_device", | 1837 | .name = "weight_device", |
1853 | .flags = CFTYPE_ONLY_ON_ROOT, | 1838 | .flags = CFTYPE_ONLY_ON_ROOT, |
1854 | .read_seq_string = cfqg_print_leaf_weight_device, | 1839 | .seq_show = cfqg_print_leaf_weight_device, |
1855 | .write_string = cfqg_set_leaf_weight_device, | 1840 | .write_string = cfqg_set_leaf_weight_device, |
1856 | .max_write_len = 256, | 1841 | .max_write_len = 256, |
1857 | }, | 1842 | }, |
1858 | { | 1843 | { |
1859 | .name = "weight", | 1844 | .name = "weight", |
1860 | .flags = CFTYPE_ONLY_ON_ROOT, | 1845 | .flags = CFTYPE_ONLY_ON_ROOT, |
1861 | .read_seq_string = cfq_print_leaf_weight, | 1846 | .seq_show = cfq_print_leaf_weight, |
1862 | .write_u64 = cfq_set_leaf_weight, | 1847 | .write_u64 = cfq_set_leaf_weight, |
1863 | }, | 1848 | }, |
1864 | 1849 | ||
@@ -1866,26 +1851,26 @@ static struct cftype cfq_blkcg_files[] = { | |||
1866 | { | 1851 | { |
1867 | .name = "weight_device", | 1852 | .name = "weight_device", |
1868 | .flags = CFTYPE_NOT_ON_ROOT, | 1853 | .flags = CFTYPE_NOT_ON_ROOT, |
1869 | .read_seq_string = cfqg_print_weight_device, | 1854 | .seq_show = cfqg_print_weight_device, |
1870 | .write_string = cfqg_set_weight_device, | 1855 | .write_string = cfqg_set_weight_device, |
1871 | .max_write_len = 256, | 1856 | .max_write_len = 256, |
1872 | }, | 1857 | }, |
1873 | { | 1858 | { |
1874 | .name = "weight", | 1859 | .name = "weight", |
1875 | .flags = CFTYPE_NOT_ON_ROOT, | 1860 | .flags = CFTYPE_NOT_ON_ROOT, |
1876 | .read_seq_string = cfq_print_weight, | 1861 | .seq_show = cfq_print_weight, |
1877 | .write_u64 = cfq_set_weight, | 1862 | .write_u64 = cfq_set_weight, |
1878 | }, | 1863 | }, |
1879 | 1864 | ||
1880 | { | 1865 | { |
1881 | .name = "leaf_weight_device", | 1866 | .name = "leaf_weight_device", |
1882 | .read_seq_string = cfqg_print_leaf_weight_device, | 1867 | .seq_show = cfqg_print_leaf_weight_device, |
1883 | .write_string = cfqg_set_leaf_weight_device, | 1868 | .write_string = cfqg_set_leaf_weight_device, |
1884 | .max_write_len = 256, | 1869 | .max_write_len = 256, |
1885 | }, | 1870 | }, |
1886 | { | 1871 | { |
1887 | .name = "leaf_weight", | 1872 | .name = "leaf_weight", |
1888 | .read_seq_string = cfq_print_leaf_weight, | 1873 | .seq_show = cfq_print_leaf_weight, |
1889 | .write_u64 = cfq_set_leaf_weight, | 1874 | .write_u64 = cfq_set_leaf_weight, |
1890 | }, | 1875 | }, |
1891 | 1876 | ||
@@ -1893,114 +1878,114 @@ static struct cftype cfq_blkcg_files[] = { | |||
1893 | { | 1878 | { |
1894 | .name = "time", | 1879 | .name = "time", |
1895 | .private = offsetof(struct cfq_group, stats.time), | 1880 | .private = offsetof(struct cfq_group, stats.time), |
1896 | .read_seq_string = cfqg_print_stat, | 1881 | .seq_show = cfqg_print_stat, |
1897 | }, | 1882 | }, |
1898 | { | 1883 | { |
1899 | .name = "sectors", | 1884 | .name = "sectors", |
1900 | .private = offsetof(struct cfq_group, stats.sectors), | 1885 | .private = offsetof(struct cfq_group, stats.sectors), |
1901 | .read_seq_string = cfqg_print_stat, | 1886 | .seq_show = cfqg_print_stat, |
1902 | }, | 1887 | }, |
1903 | { | 1888 | { |
1904 | .name = "io_service_bytes", | 1889 | .name = "io_service_bytes", |
1905 | .private = offsetof(struct cfq_group, stats.service_bytes), | 1890 | .private = offsetof(struct cfq_group, stats.service_bytes), |
1906 | .read_seq_string = cfqg_print_rwstat, | 1891 | .seq_show = cfqg_print_rwstat, |
1907 | }, | 1892 | }, |
1908 | { | 1893 | { |
1909 | .name = "io_serviced", | 1894 | .name = "io_serviced", |
1910 | .private = offsetof(struct cfq_group, stats.serviced), | 1895 | .private = offsetof(struct cfq_group, stats.serviced), |
1911 | .read_seq_string = cfqg_print_rwstat, | 1896 | .seq_show = cfqg_print_rwstat, |
1912 | }, | 1897 | }, |
1913 | { | 1898 | { |
1914 | .name = "io_service_time", | 1899 | .name = "io_service_time", |
1915 | .private = offsetof(struct cfq_group, stats.service_time), | 1900 | .private = offsetof(struct cfq_group, stats.service_time), |
1916 | .read_seq_string = cfqg_print_rwstat, | 1901 | .seq_show = cfqg_print_rwstat, |
1917 | }, | 1902 | }, |
1918 | { | 1903 | { |
1919 | .name = "io_wait_time", | 1904 | .name = "io_wait_time", |
1920 | .private = offsetof(struct cfq_group, stats.wait_time), | 1905 | .private = offsetof(struct cfq_group, stats.wait_time), |
1921 | .read_seq_string = cfqg_print_rwstat, | 1906 | .seq_show = cfqg_print_rwstat, |
1922 | }, | 1907 | }, |
1923 | { | 1908 | { |
1924 | .name = "io_merged", | 1909 | .name = "io_merged", |
1925 | .private = offsetof(struct cfq_group, stats.merged), | 1910 | .private = offsetof(struct cfq_group, stats.merged), |
1926 | .read_seq_string = cfqg_print_rwstat, | 1911 | .seq_show = cfqg_print_rwstat, |
1927 | }, | 1912 | }, |
1928 | { | 1913 | { |
1929 | .name = "io_queued", | 1914 | .name = "io_queued", |
1930 | .private = offsetof(struct cfq_group, stats.queued), | 1915 | .private = offsetof(struct cfq_group, stats.queued), |
1931 | .read_seq_string = cfqg_print_rwstat, | 1916 | .seq_show = cfqg_print_rwstat, |
1932 | }, | 1917 | }, |
1933 | 1918 | ||
1934 | /* the same statictics which cover the cfqg and its descendants */ | 1919 | /* the same statictics which cover the cfqg and its descendants */ |
1935 | { | 1920 | { |
1936 | .name = "time_recursive", | 1921 | .name = "time_recursive", |
1937 | .private = offsetof(struct cfq_group, stats.time), | 1922 | .private = offsetof(struct cfq_group, stats.time), |
1938 | .read_seq_string = cfqg_print_stat_recursive, | 1923 | .seq_show = cfqg_print_stat_recursive, |
1939 | }, | 1924 | }, |
1940 | { | 1925 | { |
1941 | .name = "sectors_recursive", | 1926 | .name = "sectors_recursive", |
1942 | .private = offsetof(struct cfq_group, stats.sectors), | 1927 | .private = offsetof(struct cfq_group, stats.sectors), |
1943 | .read_seq_string = cfqg_print_stat_recursive, | 1928 | .seq_show = cfqg_print_stat_recursive, |
1944 | }, | 1929 | }, |
1945 | { | 1930 | { |
1946 | .name = "io_service_bytes_recursive", | 1931 | .name = "io_service_bytes_recursive", |
1947 | .private = offsetof(struct cfq_group, stats.service_bytes), | 1932 | .private = offsetof(struct cfq_group, stats.service_bytes), |
1948 | .read_seq_string = cfqg_print_rwstat_recursive, | 1933 | .seq_show = cfqg_print_rwstat_recursive, |
1949 | }, | 1934 | }, |
1950 | { | 1935 | { |
1951 | .name = "io_serviced_recursive", | 1936 | .name = "io_serviced_recursive", |
1952 | .private = offsetof(struct cfq_group, stats.serviced), | 1937 | .private = offsetof(struct cfq_group, stats.serviced), |
1953 | .read_seq_string = cfqg_print_rwstat_recursive, | 1938 | .seq_show = cfqg_print_rwstat_recursive, |
1954 | }, | 1939 | }, |
1955 | { | 1940 | { |
1956 | .name = "io_service_time_recursive", | 1941 | .name = "io_service_time_recursive", |
1957 | .private = offsetof(struct cfq_group, stats.service_time), | 1942 | .private = offsetof(struct cfq_group, stats.service_time), |
1958 | .read_seq_string = cfqg_print_rwstat_recursive, | 1943 | .seq_show = cfqg_print_rwstat_recursive, |
1959 | }, | 1944 | }, |
1960 | { | 1945 | { |
1961 | .name = "io_wait_time_recursive", | 1946 | .name = "io_wait_time_recursive", |
1962 | .private = offsetof(struct cfq_group, stats.wait_time), | 1947 | .private = offsetof(struct cfq_group, stats.wait_time), |
1963 | .read_seq_string = cfqg_print_rwstat_recursive, | 1948 | .seq_show = cfqg_print_rwstat_recursive, |
1964 | }, | 1949 | }, |
1965 | { | 1950 | { |
1966 | .name = "io_merged_recursive", | 1951 | .name = "io_merged_recursive", |
1967 | .private = offsetof(struct cfq_group, stats.merged), | 1952 | .private = offsetof(struct cfq_group, stats.merged), |
1968 | .read_seq_string = cfqg_print_rwstat_recursive, | 1953 | .seq_show = cfqg_print_rwstat_recursive, |
1969 | }, | 1954 | }, |
1970 | { | 1955 | { |
1971 | .name = "io_queued_recursive", | 1956 | .name = "io_queued_recursive", |
1972 | .private = offsetof(struct cfq_group, stats.queued), | 1957 | .private = offsetof(struct cfq_group, stats.queued), |
1973 | .read_seq_string = cfqg_print_rwstat_recursive, | 1958 | .seq_show = cfqg_print_rwstat_recursive, |
1974 | }, | 1959 | }, |
1975 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 1960 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
1976 | { | 1961 | { |
1977 | .name = "avg_queue_size", | 1962 | .name = "avg_queue_size", |
1978 | .read_seq_string = cfqg_print_avg_queue_size, | 1963 | .seq_show = cfqg_print_avg_queue_size, |
1979 | }, | 1964 | }, |
1980 | { | 1965 | { |
1981 | .name = "group_wait_time", | 1966 | .name = "group_wait_time", |
1982 | .private = offsetof(struct cfq_group, stats.group_wait_time), | 1967 | .private = offsetof(struct cfq_group, stats.group_wait_time), |
1983 | .read_seq_string = cfqg_print_stat, | 1968 | .seq_show = cfqg_print_stat, |
1984 | }, | 1969 | }, |
1985 | { | 1970 | { |
1986 | .name = "idle_time", | 1971 | .name = "idle_time", |
1987 | .private = offsetof(struct cfq_group, stats.idle_time), | 1972 | .private = offsetof(struct cfq_group, stats.idle_time), |
1988 | .read_seq_string = cfqg_print_stat, | 1973 | .seq_show = cfqg_print_stat, |
1989 | }, | 1974 | }, |
1990 | { | 1975 | { |
1991 | .name = "empty_time", | 1976 | .name = "empty_time", |
1992 | .private = offsetof(struct cfq_group, stats.empty_time), | 1977 | .private = offsetof(struct cfq_group, stats.empty_time), |
1993 | .read_seq_string = cfqg_print_stat, | 1978 | .seq_show = cfqg_print_stat, |
1994 | }, | 1979 | }, |
1995 | { | 1980 | { |
1996 | .name = "dequeue", | 1981 | .name = "dequeue", |
1997 | .private = offsetof(struct cfq_group, stats.dequeue), | 1982 | .private = offsetof(struct cfq_group, stats.dequeue), |
1998 | .read_seq_string = cfqg_print_stat, | 1983 | .seq_show = cfqg_print_stat, |
1999 | }, | 1984 | }, |
2000 | { | 1985 | { |
2001 | .name = "unaccounted_time", | 1986 | .name = "unaccounted_time", |
2002 | .private = offsetof(struct cfq_group, stats.unaccounted_time), | 1987 | .private = offsetof(struct cfq_group, stats.unaccounted_time), |
2003 | .read_seq_string = cfqg_print_stat, | 1988 | .seq_show = cfqg_print_stat, |
2004 | }, | 1989 | }, |
2005 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ | 1990 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ |
2006 | { } /* terminate */ | 1991 | { } /* terminate */ |
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index e3a92a6da39a..74911c2cb1dd 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c | |||
@@ -83,6 +83,8 @@ enum board_ids { | |||
83 | static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); | 83 | static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); |
84 | static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, | 84 | static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, |
85 | unsigned long deadline); | 85 | unsigned long deadline); |
86 | static void ahci_mcp89_apple_enable(struct pci_dev *pdev); | ||
87 | static bool is_mcp89_apple(struct pci_dev *pdev); | ||
86 | static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class, | 88 | static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class, |
87 | unsigned long deadline); | 89 | unsigned long deadline); |
88 | #ifdef CONFIG_PM | 90 | #ifdef CONFIG_PM |
@@ -664,6 +666,10 @@ static int ahci_pci_device_resume(struct pci_dev *pdev) | |||
664 | if (rc) | 666 | if (rc) |
665 | return rc; | 667 | return rc; |
666 | 668 | ||
669 | /* Apple BIOS helpfully mangles the registers on resume */ | ||
670 | if (is_mcp89_apple(pdev)) | ||
671 | ahci_mcp89_apple_enable(pdev); | ||
672 | |||
667 | if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) { | 673 | if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) { |
668 | rc = ahci_pci_reset_controller(host); | 674 | rc = ahci_pci_reset_controller(host); |
669 | if (rc) | 675 | if (rc) |
@@ -780,6 +786,48 @@ static void ahci_p5wdh_workaround(struct ata_host *host) | |||
780 | } | 786 | } |
781 | } | 787 | } |
782 | 788 | ||
789 | /* | ||
790 | * Macbook7,1 firmware forcibly disables MCP89 AHCI and changes PCI ID when | ||
791 | * booting in BIOS compatibility mode. We restore the registers but not ID. | ||
792 | */ | ||
793 | static void ahci_mcp89_apple_enable(struct pci_dev *pdev) | ||
794 | { | ||
795 | u32 val; | ||
796 | |||
797 | printk(KERN_INFO "ahci: enabling MCP89 AHCI mode\n"); | ||
798 | |||
799 | pci_read_config_dword(pdev, 0xf8, &val); | ||
800 | val |= 1 << 0x1b; | ||
801 | /* the following changes the device ID, but appears not to affect function */ | ||
802 | /* val = (val & ~0xf0000000) | 0x80000000; */ | ||
803 | pci_write_config_dword(pdev, 0xf8, val); | ||
804 | |||
805 | pci_read_config_dword(pdev, 0x54c, &val); | ||
806 | val |= 1 << 0xc; | ||
807 | pci_write_config_dword(pdev, 0x54c, val); | ||
808 | |||
809 | pci_read_config_dword(pdev, 0x4a4, &val); | ||
810 | val &= 0xff; | ||
811 | val |= 0x01060100; | ||
812 | pci_write_config_dword(pdev, 0x4a4, val); | ||
813 | |||
814 | pci_read_config_dword(pdev, 0x54c, &val); | ||
815 | val &= ~(1 << 0xc); | ||
816 | pci_write_config_dword(pdev, 0x54c, val); | ||
817 | |||
818 | pci_read_config_dword(pdev, 0xf8, &val); | ||
819 | val &= ~(1 << 0x1b); | ||
820 | pci_write_config_dword(pdev, 0xf8, val); | ||
821 | } | ||
822 | |||
823 | static bool is_mcp89_apple(struct pci_dev *pdev) | ||
824 | { | ||
825 | return pdev->vendor == PCI_VENDOR_ID_NVIDIA && | ||
826 | pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP89_SATA && | ||
827 | pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE && | ||
828 | pdev->subsystem_device == 0xcb89; | ||
829 | } | ||
830 | |||
783 | /* only some SB600 ahci controllers can do 64bit DMA */ | 831 | /* only some SB600 ahci controllers can do 64bit DMA */ |
784 | static bool ahci_sb600_enable_64bit(struct pci_dev *pdev) | 832 | static bool ahci_sb600_enable_64bit(struct pci_dev *pdev) |
785 | { | 833 | { |
@@ -1100,7 +1148,7 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host) | |||
1100 | {} | 1148 | {} |
1101 | #endif | 1149 | #endif |
1102 | 1150 | ||
1103 | int ahci_init_interrupts(struct pci_dev *pdev, struct ahci_host_priv *hpriv) | 1151 | static int ahci_init_interrupts(struct pci_dev *pdev, struct ahci_host_priv *hpriv) |
1104 | { | 1152 | { |
1105 | int rc; | 1153 | int rc; |
1106 | unsigned int maxvec; | 1154 | unsigned int maxvec; |
@@ -1212,15 +1260,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) | |||
1212 | if (pdev->vendor == PCI_VENDOR_ID_MARVELL && !marvell_enable) | 1260 | if (pdev->vendor == PCI_VENDOR_ID_MARVELL && !marvell_enable) |
1213 | return -ENODEV; | 1261 | return -ENODEV; |
1214 | 1262 | ||
1215 | /* | 1263 | /* Apple BIOS on MCP89 prevents us using AHCI */ |
1216 | * For some reason, MCP89 on MacBook 7,1 doesn't work with | 1264 | if (is_mcp89_apple(pdev)) |
1217 | * ahci, use ata_generic instead. | 1265 | ahci_mcp89_apple_enable(pdev); |
1218 | */ | ||
1219 | if (pdev->vendor == PCI_VENDOR_ID_NVIDIA && | ||
1220 | pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP89_SATA && | ||
1221 | pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE && | ||
1222 | pdev->subsystem_device == 0xcb89) | ||
1223 | return -ENODEV; | ||
1224 | 1266 | ||
1225 | /* Promise's PDC42819 is a SAS/SATA controller that has an AHCI mode. | 1267 | /* Promise's PDC42819 is a SAS/SATA controller that has an AHCI mode. |
1226 | * At the moment, we can only use the AHCI mode. Let the users know | 1268 | * At the moment, we can only use the AHCI mode. Let the users know |
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c index 3e23e9941dad..dd4d6f74d7bd 100644 --- a/drivers/ata/ahci_imx.c +++ b/drivers/ata/ahci_imx.c | |||
@@ -34,10 +34,21 @@ enum { | |||
34 | HOST_TIMER1MS = 0xe0, /* Timer 1-ms */ | 34 | HOST_TIMER1MS = 0xe0, /* Timer 1-ms */ |
35 | }; | 35 | }; |
36 | 36 | ||
37 | enum ahci_imx_type { | ||
38 | AHCI_IMX53, | ||
39 | AHCI_IMX6Q, | ||
40 | }; | ||
41 | |||
37 | struct imx_ahci_priv { | 42 | struct imx_ahci_priv { |
38 | struct platform_device *ahci_pdev; | 43 | struct platform_device *ahci_pdev; |
44 | enum ahci_imx_type type; | ||
45 | |||
46 | /* i.MX53 clock */ | ||
47 | struct clk *sata_gate_clk; | ||
48 | /* Common clock */ | ||
39 | struct clk *sata_ref_clk; | 49 | struct clk *sata_ref_clk; |
40 | struct clk *ahb_clk; | 50 | struct clk *ahb_clk; |
51 | |||
41 | struct regmap *gpr; | 52 | struct regmap *gpr; |
42 | bool no_device; | 53 | bool no_device; |
43 | bool first_time; | 54 | bool first_time; |
@@ -47,6 +58,59 @@ static int ahci_imx_hotplug; | |||
47 | module_param_named(hotplug, ahci_imx_hotplug, int, 0644); | 58 | module_param_named(hotplug, ahci_imx_hotplug, int, 0644); |
48 | MODULE_PARM_DESC(hotplug, "AHCI IMX hot-plug support (0=Don't support, 1=support)"); | 59 | MODULE_PARM_DESC(hotplug, "AHCI IMX hot-plug support (0=Don't support, 1=support)"); |
49 | 60 | ||
61 | static int imx_sata_clock_enable(struct device *dev) | ||
62 | { | ||
63 | struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); | ||
64 | int ret; | ||
65 | |||
66 | if (imxpriv->type == AHCI_IMX53) { | ||
67 | ret = clk_prepare_enable(imxpriv->sata_gate_clk); | ||
68 | if (ret < 0) { | ||
69 | dev_err(dev, "prepare-enable sata_gate clock err:%d\n", | ||
70 | ret); | ||
71 | return ret; | ||
72 | } | ||
73 | } | ||
74 | |||
75 | ret = clk_prepare_enable(imxpriv->sata_ref_clk); | ||
76 | if (ret < 0) { | ||
77 | dev_err(dev, "prepare-enable sata_ref clock err:%d\n", | ||
78 | ret); | ||
79 | goto clk_err; | ||
80 | } | ||
81 | |||
82 | if (imxpriv->type == AHCI_IMX6Q) { | ||
83 | regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13, | ||
84 | IMX6Q_GPR13_SATA_MPLL_CLK_EN, | ||
85 | IMX6Q_GPR13_SATA_MPLL_CLK_EN); | ||
86 | } | ||
87 | |||
88 | usleep_range(1000, 2000); | ||
89 | |||
90 | return 0; | ||
91 | |||
92 | clk_err: | ||
93 | if (imxpriv->type == AHCI_IMX53) | ||
94 | clk_disable_unprepare(imxpriv->sata_gate_clk); | ||
95 | return ret; | ||
96 | } | ||
97 | |||
98 | static void imx_sata_clock_disable(struct device *dev) | ||
99 | { | ||
100 | struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); | ||
101 | |||
102 | if (imxpriv->type == AHCI_IMX6Q) { | ||
103 | regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13, | ||
104 | IMX6Q_GPR13_SATA_MPLL_CLK_EN, | ||
105 | !IMX6Q_GPR13_SATA_MPLL_CLK_EN); | ||
106 | } | ||
107 | |||
108 | clk_disable_unprepare(imxpriv->sata_ref_clk); | ||
109 | |||
110 | if (imxpriv->type == AHCI_IMX53) | ||
111 | clk_disable_unprepare(imxpriv->sata_gate_clk); | ||
112 | } | ||
113 | |||
50 | static void ahci_imx_error_handler(struct ata_port *ap) | 114 | static void ahci_imx_error_handler(struct ata_port *ap) |
51 | { | 115 | { |
52 | u32 reg_val; | 116 | u32 reg_val; |
@@ -72,16 +136,29 @@ static void ahci_imx_error_handler(struct ata_port *ap) | |||
72 | */ | 136 | */ |
73 | reg_val = readl(mmio + PORT_PHY_CTL); | 137 | reg_val = readl(mmio + PORT_PHY_CTL); |
74 | writel(reg_val | PORT_PHY_CTL_PDDQ_LOC, mmio + PORT_PHY_CTL); | 138 | writel(reg_val | PORT_PHY_CTL_PDDQ_LOC, mmio + PORT_PHY_CTL); |
75 | regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13, | 139 | imx_sata_clock_disable(ap->dev); |
76 | IMX6Q_GPR13_SATA_MPLL_CLK_EN, | ||
77 | !IMX6Q_GPR13_SATA_MPLL_CLK_EN); | ||
78 | clk_disable_unprepare(imxpriv->sata_ref_clk); | ||
79 | imxpriv->no_device = true; | 140 | imxpriv->no_device = true; |
80 | } | 141 | } |
81 | 142 | ||
143 | static int ahci_imx_softreset(struct ata_link *link, unsigned int *class, | ||
144 | unsigned long deadline) | ||
145 | { | ||
146 | struct ata_port *ap = link->ap; | ||
147 | struct imx_ahci_priv *imxpriv = dev_get_drvdata(ap->dev->parent); | ||
148 | int ret = -EIO; | ||
149 | |||
150 | if (imxpriv->type == AHCI_IMX53) | ||
151 | ret = ahci_pmp_retry_srst_ops.softreset(link, class, deadline); | ||
152 | else if (imxpriv->type == AHCI_IMX6Q) | ||
153 | ret = ahci_ops.softreset(link, class, deadline); | ||
154 | |||
155 | return ret; | ||
156 | } | ||
157 | |||
82 | static struct ata_port_operations ahci_imx_ops = { | 158 | static struct ata_port_operations ahci_imx_ops = { |
83 | .inherits = &ahci_platform_ops, | 159 | .inherits = &ahci_platform_ops, |
84 | .error_handler = ahci_imx_error_handler, | 160 | .error_handler = ahci_imx_error_handler, |
161 | .softreset = ahci_imx_softreset, | ||
85 | }; | 162 | }; |
86 | 163 | ||
87 | static const struct ata_port_info ahci_imx_port_info = { | 164 | static const struct ata_port_info ahci_imx_port_info = { |
@@ -91,52 +168,15 @@ static const struct ata_port_info ahci_imx_port_info = { | |||
91 | .port_ops = &ahci_imx_ops, | 168 | .port_ops = &ahci_imx_ops, |
92 | }; | 169 | }; |
93 | 170 | ||
94 | static int imx6q_sata_init(struct device *dev, void __iomem *mmio) | 171 | static int imx_sata_init(struct device *dev, void __iomem *mmio) |
95 | { | 172 | { |
96 | int ret = 0; | 173 | int ret = 0; |
97 | unsigned int reg_val; | 174 | unsigned int reg_val; |
98 | struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); | 175 | struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); |
99 | 176 | ||
100 | imxpriv->gpr = | 177 | ret = imx_sata_clock_enable(dev); |
101 | syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); | 178 | if (ret < 0) |
102 | if (IS_ERR(imxpriv->gpr)) { | ||
103 | dev_err(dev, "failed to find fsl,imx6q-iomux-gpr regmap\n"); | ||
104 | return PTR_ERR(imxpriv->gpr); | ||
105 | } | ||
106 | |||
107 | ret = clk_prepare_enable(imxpriv->sata_ref_clk); | ||
108 | if (ret < 0) { | ||
109 | dev_err(dev, "prepare-enable sata_ref clock err:%d\n", ret); | ||
110 | return ret; | 179 | return ret; |
111 | } | ||
112 | |||
113 | /* | ||
114 | * set PHY Paremeters, two steps to configure the GPR13, | ||
115 | * one write for rest of parameters, mask of first write | ||
116 | * is 0x07ffffff, and the other one write for setting | ||
117 | * the mpll_clk_en. | ||
118 | */ | ||
119 | regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK | ||
120 | | IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK | ||
121 | | IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK | ||
122 | | IMX6Q_GPR13_SATA_SPD_MODE_MASK | ||
123 | | IMX6Q_GPR13_SATA_MPLL_SS_EN | ||
124 | | IMX6Q_GPR13_SATA_TX_ATTEN_MASK | ||
125 | | IMX6Q_GPR13_SATA_TX_BOOST_MASK | ||
126 | | IMX6Q_GPR13_SATA_TX_LVL_MASK | ||
127 | | IMX6Q_GPR13_SATA_MPLL_CLK_EN | ||
128 | | IMX6Q_GPR13_SATA_TX_EDGE_RATE | ||
129 | , IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB | ||
130 | | IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M | ||
131 | | IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F | ||
132 | | IMX6Q_GPR13_SATA_SPD_MODE_3P0G | ||
133 | | IMX6Q_GPR13_SATA_MPLL_SS_EN | ||
134 | | IMX6Q_GPR13_SATA_TX_ATTEN_9_16 | ||
135 | | IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB | ||
136 | | IMX6Q_GPR13_SATA_TX_LVL_1_025_V); | ||
137 | regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_MPLL_CLK_EN, | ||
138 | IMX6Q_GPR13_SATA_MPLL_CLK_EN); | ||
139 | usleep_range(100, 200); | ||
140 | 180 | ||
141 | /* | 181 | /* |
142 | * Configure the HWINIT bits of the HOST_CAP and HOST_PORTS_IMPL, | 182 | * Configure the HWINIT bits of the HOST_CAP and HOST_PORTS_IMPL, |
@@ -162,13 +202,9 @@ static int imx6q_sata_init(struct device *dev, void __iomem *mmio) | |||
162 | return 0; | 202 | return 0; |
163 | } | 203 | } |
164 | 204 | ||
165 | static void imx6q_sata_exit(struct device *dev) | 205 | static void imx_sata_exit(struct device *dev) |
166 | { | 206 | { |
167 | struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); | 207 | imx_sata_clock_disable(dev); |
168 | |||
169 | regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_MPLL_CLK_EN, | ||
170 | !IMX6Q_GPR13_SATA_MPLL_CLK_EN); | ||
171 | clk_disable_unprepare(imxpriv->sata_ref_clk); | ||
172 | } | 208 | } |
173 | 209 | ||
174 | static int imx_ahci_suspend(struct device *dev) | 210 | static int imx_ahci_suspend(struct device *dev) |
@@ -179,12 +215,8 @@ static int imx_ahci_suspend(struct device *dev) | |||
179 | * If no_device is set, The CLKs had been gated off in the | 215 | * If no_device is set, The CLKs had been gated off in the |
180 | * initialization so don't do it again here. | 216 | * initialization so don't do it again here. |
181 | */ | 217 | */ |
182 | if (!imxpriv->no_device) { | 218 | if (!imxpriv->no_device) |
183 | regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13, | 219 | imx_sata_clock_disable(dev); |
184 | IMX6Q_GPR13_SATA_MPLL_CLK_EN, | ||
185 | !IMX6Q_GPR13_SATA_MPLL_CLK_EN); | ||
186 | clk_disable_unprepare(imxpriv->sata_ref_clk); | ||
187 | } | ||
188 | 220 | ||
189 | return 0; | 221 | return 0; |
190 | } | 222 | } |
@@ -192,34 +224,26 @@ static int imx_ahci_suspend(struct device *dev) | |||
192 | static int imx_ahci_resume(struct device *dev) | 224 | static int imx_ahci_resume(struct device *dev) |
193 | { | 225 | { |
194 | struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); | 226 | struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); |
195 | int ret; | 227 | int ret = 0; |
196 | |||
197 | if (!imxpriv->no_device) { | ||
198 | ret = clk_prepare_enable(imxpriv->sata_ref_clk); | ||
199 | if (ret < 0) { | ||
200 | dev_err(dev, "pre-enable sata_ref clock err:%d\n", ret); | ||
201 | return ret; | ||
202 | } | ||
203 | 228 | ||
204 | regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13, | 229 | if (!imxpriv->no_device) |
205 | IMX6Q_GPR13_SATA_MPLL_CLK_EN, | 230 | ret = imx_sata_clock_enable(dev); |
206 | IMX6Q_GPR13_SATA_MPLL_CLK_EN); | ||
207 | usleep_range(1000, 2000); | ||
208 | } | ||
209 | 231 | ||
210 | return 0; | 232 | return ret; |
211 | } | 233 | } |
212 | 234 | ||
213 | static struct ahci_platform_data imx6q_sata_pdata = { | 235 | static struct ahci_platform_data imx_sata_pdata = { |
214 | .init = imx6q_sata_init, | 236 | .init = imx_sata_init, |
215 | .exit = imx6q_sata_exit, | 237 | .exit = imx_sata_exit, |
216 | .ata_port_info = &ahci_imx_port_info, | 238 | .ata_port_info = &ahci_imx_port_info, |
217 | .suspend = imx_ahci_suspend, | 239 | .suspend = imx_ahci_suspend, |
218 | .resume = imx_ahci_resume, | 240 | .resume = imx_ahci_resume, |
241 | |||
219 | }; | 242 | }; |
220 | 243 | ||
221 | static const struct of_device_id imx_ahci_of_match[] = { | 244 | static const struct of_device_id imx_ahci_of_match[] = { |
222 | { .compatible = "fsl,imx6q-ahci", .data = &imx6q_sata_pdata}, | 245 | { .compatible = "fsl,imx53-ahci", .data = (void *)AHCI_IMX53 }, |
246 | { .compatible = "fsl,imx6q-ahci", .data = (void *)AHCI_IMX6Q }, | ||
223 | {}, | 247 | {}, |
224 | }; | 248 | }; |
225 | MODULE_DEVICE_TABLE(of, imx_ahci_of_match); | 249 | MODULE_DEVICE_TABLE(of, imx_ahci_of_match); |
@@ -229,12 +253,20 @@ static int imx_ahci_probe(struct platform_device *pdev) | |||
229 | struct device *dev = &pdev->dev; | 253 | struct device *dev = &pdev->dev; |
230 | struct resource *mem, *irq, res[2]; | 254 | struct resource *mem, *irq, res[2]; |
231 | const struct of_device_id *of_id; | 255 | const struct of_device_id *of_id; |
256 | enum ahci_imx_type type; | ||
232 | const struct ahci_platform_data *pdata = NULL; | 257 | const struct ahci_platform_data *pdata = NULL; |
233 | struct imx_ahci_priv *imxpriv; | 258 | struct imx_ahci_priv *imxpriv; |
234 | struct device *ahci_dev; | 259 | struct device *ahci_dev; |
235 | struct platform_device *ahci_pdev; | 260 | struct platform_device *ahci_pdev; |
236 | int ret; | 261 | int ret; |
237 | 262 | ||
263 | of_id = of_match_device(imx_ahci_of_match, dev); | ||
264 | if (!of_id) | ||
265 | return -EINVAL; | ||
266 | |||
267 | type = (enum ahci_imx_type)of_id->data; | ||
268 | pdata = &imx_sata_pdata; | ||
269 | |||
238 | imxpriv = devm_kzalloc(dev, sizeof(*imxpriv), GFP_KERNEL); | 270 | imxpriv = devm_kzalloc(dev, sizeof(*imxpriv), GFP_KERNEL); |
239 | if (!imxpriv) { | 271 | if (!imxpriv) { |
240 | dev_err(dev, "can't alloc ahci_host_priv\n"); | 272 | dev_err(dev, "can't alloc ahci_host_priv\n"); |
@@ -250,6 +282,8 @@ static int imx_ahci_probe(struct platform_device *pdev) | |||
250 | 282 | ||
251 | imxpriv->no_device = false; | 283 | imxpriv->no_device = false; |
252 | imxpriv->first_time = true; | 284 | imxpriv->first_time = true; |
285 | imxpriv->type = type; | ||
286 | |||
253 | imxpriv->ahb_clk = devm_clk_get(dev, "ahb"); | 287 | imxpriv->ahb_clk = devm_clk_get(dev, "ahb"); |
254 | if (IS_ERR(imxpriv->ahb_clk)) { | 288 | if (IS_ERR(imxpriv->ahb_clk)) { |
255 | dev_err(dev, "can't get ahb clock.\n"); | 289 | dev_err(dev, "can't get ahb clock.\n"); |
@@ -257,6 +291,15 @@ static int imx_ahci_probe(struct platform_device *pdev) | |||
257 | goto err_out; | 291 | goto err_out; |
258 | } | 292 | } |
259 | 293 | ||
294 | if (type == AHCI_IMX53) { | ||
295 | imxpriv->sata_gate_clk = devm_clk_get(dev, "sata_gate"); | ||
296 | if (IS_ERR(imxpriv->sata_gate_clk)) { | ||
297 | dev_err(dev, "can't get sata_gate clock.\n"); | ||
298 | ret = PTR_ERR(imxpriv->sata_gate_clk); | ||
299 | goto err_out; | ||
300 | } | ||
301 | } | ||
302 | |||
260 | imxpriv->sata_ref_clk = devm_clk_get(dev, "sata_ref"); | 303 | imxpriv->sata_ref_clk = devm_clk_get(dev, "sata_ref"); |
261 | if (IS_ERR(imxpriv->sata_ref_clk)) { | 304 | if (IS_ERR(imxpriv->sata_ref_clk)) { |
262 | dev_err(dev, "can't get sata_ref clock.\n"); | 305 | dev_err(dev, "can't get sata_ref clock.\n"); |
@@ -267,14 +310,6 @@ static int imx_ahci_probe(struct platform_device *pdev) | |||
267 | imxpriv->ahci_pdev = ahci_pdev; | 310 | imxpriv->ahci_pdev = ahci_pdev; |
268 | platform_set_drvdata(pdev, imxpriv); | 311 | platform_set_drvdata(pdev, imxpriv); |
269 | 312 | ||
270 | of_id = of_match_device(imx_ahci_of_match, dev); | ||
271 | if (of_id) { | ||
272 | pdata = of_id->data; | ||
273 | } else { | ||
274 | ret = -EINVAL; | ||
275 | goto err_out; | ||
276 | } | ||
277 | |||
278 | mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); | 313 | mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
279 | irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); | 314 | irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); |
280 | if (!mem || !irq) { | 315 | if (!mem || !irq) { |
@@ -290,6 +325,43 @@ static int imx_ahci_probe(struct platform_device *pdev) | |||
290 | ahci_dev->dma_mask = &ahci_dev->coherent_dma_mask; | 325 | ahci_dev->dma_mask = &ahci_dev->coherent_dma_mask; |
291 | ahci_dev->of_node = dev->of_node; | 326 | ahci_dev->of_node = dev->of_node; |
292 | 327 | ||
328 | if (type == AHCI_IMX6Q) { | ||
329 | imxpriv->gpr = syscon_regmap_lookup_by_compatible( | ||
330 | "fsl,imx6q-iomuxc-gpr"); | ||
331 | if (IS_ERR(imxpriv->gpr)) { | ||
332 | dev_err(dev, | ||
333 | "failed to find fsl,imx6q-iomux-gpr regmap\n"); | ||
334 | ret = PTR_ERR(imxpriv->gpr); | ||
335 | goto err_out; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Set PHY Paremeters, two steps to configure the GPR13, | ||
340 | * one write for rest of parameters, mask of first write | ||
341 | * is 0x07fffffe, and the other one write for setting | ||
342 | * the mpll_clk_en happens in imx_sata_clock_enable(). | ||
343 | */ | ||
344 | regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13, | ||
345 | IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK | | ||
346 | IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK | | ||
347 | IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK | | ||
348 | IMX6Q_GPR13_SATA_SPD_MODE_MASK | | ||
349 | IMX6Q_GPR13_SATA_MPLL_SS_EN | | ||
350 | IMX6Q_GPR13_SATA_TX_ATTEN_MASK | | ||
351 | IMX6Q_GPR13_SATA_TX_BOOST_MASK | | ||
352 | IMX6Q_GPR13_SATA_TX_LVL_MASK | | ||
353 | IMX6Q_GPR13_SATA_MPLL_CLK_EN | | ||
354 | IMX6Q_GPR13_SATA_TX_EDGE_RATE, | ||
355 | IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB | | ||
356 | IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M | | ||
357 | IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F | | ||
358 | IMX6Q_GPR13_SATA_SPD_MODE_3P0G | | ||
359 | IMX6Q_GPR13_SATA_MPLL_SS_EN | | ||
360 | IMX6Q_GPR13_SATA_TX_ATTEN_9_16 | | ||
361 | IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB | | ||
362 | IMX6Q_GPR13_SATA_TX_LVL_1_025_V); | ||
363 | } | ||
364 | |||
293 | ret = platform_device_add_resources(ahci_pdev, res, 2); | 365 | ret = platform_device_add_resources(ahci_pdev, res, 2); |
294 | if (ret) | 366 | if (ret) |
295 | goto err_out; | 367 | goto err_out; |
diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c index f8f38a08abc5..7d196656adb5 100644 --- a/drivers/ata/ata_generic.c +++ b/drivers/ata/ata_generic.c | |||
@@ -221,13 +221,6 @@ static struct pci_device_id ata_generic[] = { | |||
221 | { PCI_DEVICE(PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C558), }, | 221 | { PCI_DEVICE(PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C558), }, |
222 | { PCI_DEVICE(PCI_VENDOR_ID_CENATEK,PCI_DEVICE_ID_CENATEK_IDE), | 222 | { PCI_DEVICE(PCI_VENDOR_ID_CENATEK,PCI_DEVICE_ID_CENATEK_IDE), |
223 | .driver_data = ATA_GEN_FORCE_DMA }, | 223 | .driver_data = ATA_GEN_FORCE_DMA }, |
224 | /* | ||
225 | * For some reason, MCP89 on MacBook 7,1 doesn't work with | ||
226 | * ahci, use ata_generic instead. | ||
227 | */ | ||
228 | { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP89_SATA, | ||
229 | PCI_VENDOR_ID_APPLE, 0xcb89, | ||
230 | .driver_data = ATA_GEN_FORCE_DMA }, | ||
231 | #if !defined(CONFIG_PATA_TOSHIBA) && !defined(CONFIG_PATA_TOSHIBA_MODULE) | 224 | #if !defined(CONFIG_PATA_TOSHIBA) && !defined(CONFIG_PATA_TOSHIBA_MODULE) |
232 | { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), }, | 225 | { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), }, |
233 | { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2), }, | 226 | { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2), }, |
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index c482f8cadd7a..36605abe5a67 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c | |||
@@ -1764,7 +1764,7 @@ static void ahci_handle_port_interrupt(struct ata_port *ap, | |||
1764 | } | 1764 | } |
1765 | } | 1765 | } |
1766 | 1766 | ||
1767 | void ahci_port_intr(struct ata_port *ap) | 1767 | static void ahci_port_intr(struct ata_port *ap) |
1768 | { | 1768 | { |
1769 | void __iomem *port_mmio = ahci_port_base(ap); | 1769 | void __iomem *port_mmio = ahci_port_base(ap); |
1770 | u32 status; | 1770 | u32 status; |
@@ -1797,7 +1797,7 @@ irqreturn_t ahci_thread_fn(int irq, void *dev_instance) | |||
1797 | } | 1797 | } |
1798 | EXPORT_SYMBOL_GPL(ahci_thread_fn); | 1798 | EXPORT_SYMBOL_GPL(ahci_thread_fn); |
1799 | 1799 | ||
1800 | void ahci_hw_port_interrupt(struct ata_port *ap) | 1800 | static void ahci_hw_port_interrupt(struct ata_port *ap) |
1801 | { | 1801 | { |
1802 | void __iomem *port_mmio = ahci_port_base(ap); | 1802 | void __iomem *port_mmio = ahci_port_base(ap); |
1803 | struct ahci_port_priv *pp = ap->private_data; | 1803 | struct ahci_port_priv *pp = ap->private_data; |
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 1393a5890ed5..1a3dbd1b196e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c | |||
@@ -2222,6 +2222,16 @@ int ata_dev_configure(struct ata_device *dev) | |||
2222 | if (rc) | 2222 | if (rc) |
2223 | return rc; | 2223 | return rc; |
2224 | 2224 | ||
2225 | /* some WD SATA-1 drives have issues with LPM, turn on NOLPM for them */ | ||
2226 | if ((dev->horkage & ATA_HORKAGE_WD_BROKEN_LPM) && | ||
2227 | (id[ATA_ID_SATA_CAPABILITY] & 0xe) == 0x2) | ||
2228 | dev->horkage |= ATA_HORKAGE_NOLPM; | ||
2229 | |||
2230 | if (dev->horkage & ATA_HORKAGE_NOLPM) { | ||
2231 | ata_dev_warn(dev, "LPM support broken, forcing max_power\n"); | ||
2232 | dev->link->ap->target_lpm_policy = ATA_LPM_MAX_POWER; | ||
2233 | } | ||
2234 | |||
2225 | /* let ACPI work its magic */ | 2235 | /* let ACPI work its magic */ |
2226 | rc = ata_acpi_on_devcfg(dev); | 2236 | rc = ata_acpi_on_devcfg(dev); |
2227 | if (rc) | 2237 | if (rc) |
@@ -4216,6 +4226,23 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { | |||
4216 | { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, | 4226 | { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, |
4217 | { "Crucial_CT???M500SSD1", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, | 4227 | { "Crucial_CT???M500SSD1", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, |
4218 | 4228 | ||
4229 | /* | ||
4230 | * Some WD SATA-I drives spin up and down erratically when the link | ||
4231 | * is put into the slumber mode. We don't have full list of the | ||
4232 | * affected devices. Disable LPM if the device matches one of the | ||
4233 | * known prefixes and is SATA-1. As a side effect LPM partial is | ||
4234 | * lost too. | ||
4235 | * | ||
4236 | * https://bugzilla.kernel.org/show_bug.cgi?id=57211 | ||
4237 | */ | ||
4238 | { "WDC WD800JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, | ||
4239 | { "WDC WD1200JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, | ||
4240 | { "WDC WD1600JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, | ||
4241 | { "WDC WD2000JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, | ||
4242 | { "WDC WD2500JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, | ||
4243 | { "WDC WD3000JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, | ||
4244 | { "WDC WD3200JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, | ||
4245 | |||
4219 | /* End Marker */ | 4246 | /* End Marker */ |
4220 | { } | 4247 | { } |
4221 | }; | 4248 | }; |
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 92d7797223be..6d8757008318 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c | |||
@@ -2402,7 +2402,7 @@ static void ata_eh_link_report(struct ata_link *link) | |||
2402 | struct ata_port *ap = link->ap; | 2402 | struct ata_port *ap = link->ap; |
2403 | struct ata_eh_context *ehc = &link->eh_context; | 2403 | struct ata_eh_context *ehc = &link->eh_context; |
2404 | const char *frozen, *desc; | 2404 | const char *frozen, *desc; |
2405 | char tries_buf[6]; | 2405 | char tries_buf[6] = ""; |
2406 | int tag, nr_failed = 0; | 2406 | int tag, nr_failed = 0; |
2407 | 2407 | ||
2408 | if (ehc->i.flags & ATA_EHI_QUIET) | 2408 | if (ehc->i.flags & ATA_EHI_QUIET) |
@@ -2433,9 +2433,8 @@ static void ata_eh_link_report(struct ata_link *link) | |||
2433 | if (ap->pflags & ATA_PFLAG_FROZEN) | 2433 | if (ap->pflags & ATA_PFLAG_FROZEN) |
2434 | frozen = " frozen"; | 2434 | frozen = " frozen"; |
2435 | 2435 | ||
2436 | memset(tries_buf, 0, sizeof(tries_buf)); | ||
2437 | if (ap->eh_tries < ATA_EH_MAX_TRIES) | 2436 | if (ap->eh_tries < ATA_EH_MAX_TRIES) |
2438 | snprintf(tries_buf, sizeof(tries_buf) - 1, " t%d", | 2437 | snprintf(tries_buf, sizeof(tries_buf), " t%d", |
2439 | ap->eh_tries); | 2438 | ap->eh_tries); |
2440 | 2439 | ||
2441 | if (ehc->i.dev) { | 2440 | if (ehc->i.dev) { |
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 377eb889f555..ef8567de6a75 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c | |||
@@ -111,12 +111,14 @@ static const char *ata_lpm_policy_names[] = { | |||
111 | [ATA_LPM_MIN_POWER] = "min_power", | 111 | [ATA_LPM_MIN_POWER] = "min_power", |
112 | }; | 112 | }; |
113 | 113 | ||
114 | static ssize_t ata_scsi_lpm_store(struct device *dev, | 114 | static ssize_t ata_scsi_lpm_store(struct device *device, |
115 | struct device_attribute *attr, | 115 | struct device_attribute *attr, |
116 | const char *buf, size_t count) | 116 | const char *buf, size_t count) |
117 | { | 117 | { |
118 | struct Scsi_Host *shost = class_to_shost(dev); | 118 | struct Scsi_Host *shost = class_to_shost(device); |
119 | struct ata_port *ap = ata_shost_to_port(shost); | 119 | struct ata_port *ap = ata_shost_to_port(shost); |
120 | struct ata_link *link; | ||
121 | struct ata_device *dev; | ||
120 | enum ata_lpm_policy policy; | 122 | enum ata_lpm_policy policy; |
121 | unsigned long flags; | 123 | unsigned long flags; |
122 | 124 | ||
@@ -132,10 +134,20 @@ static ssize_t ata_scsi_lpm_store(struct device *dev, | |||
132 | return -EINVAL; | 134 | return -EINVAL; |
133 | 135 | ||
134 | spin_lock_irqsave(ap->lock, flags); | 136 | spin_lock_irqsave(ap->lock, flags); |
137 | |||
138 | ata_for_each_link(link, ap, EDGE) { | ||
139 | ata_for_each_dev(dev, &ap->link, ENABLED) { | ||
140 | if (dev->horkage & ATA_HORKAGE_NOLPM) { | ||
141 | count = -EOPNOTSUPP; | ||
142 | goto out_unlock; | ||
143 | } | ||
144 | } | ||
145 | } | ||
146 | |||
135 | ap->target_lpm_policy = policy; | 147 | ap->target_lpm_policy = policy; |
136 | ata_port_schedule_eh(ap); | 148 | ata_port_schedule_eh(ap); |
149 | out_unlock: | ||
137 | spin_unlock_irqrestore(ap->lock, flags); | 150 | spin_unlock_irqrestore(ap->lock, flags); |
138 | |||
139 | return count; | 151 | return count; |
140 | } | 152 | } |
141 | 153 | ||
diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c index 898e544a7ae8..a79566d05666 100644 --- a/drivers/ata/pata_samsung_cf.c +++ b/drivers/ata/pata_samsung_cf.c | |||
@@ -24,11 +24,34 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | 25 | ||
26 | #include <linux/platform_data/ata-samsung_cf.h> | 26 | #include <linux/platform_data/ata-samsung_cf.h> |
27 | #include <plat/regs-ata.h> | ||
28 | 27 | ||
29 | #define DRV_NAME "pata_samsung_cf" | 28 | #define DRV_NAME "pata_samsung_cf" |
30 | #define DRV_VERSION "0.1" | 29 | #define DRV_VERSION "0.1" |
31 | 30 | ||
31 | #define S3C_CFATA_REG(x) (x) | ||
32 | #define S3C_CFATA_MUX S3C_CFATA_REG(0x0) | ||
33 | #define S3C_ATA_CTRL S3C_CFATA_REG(0x0) | ||
34 | #define S3C_ATA_CMD S3C_CFATA_REG(0x8) | ||
35 | #define S3C_ATA_IRQ S3C_CFATA_REG(0x10) | ||
36 | #define S3C_ATA_IRQ_MSK S3C_CFATA_REG(0x14) | ||
37 | #define S3C_ATA_CFG S3C_CFATA_REG(0x18) | ||
38 | |||
39 | #define S3C_ATA_PIO_TIME S3C_CFATA_REG(0x2c) | ||
40 | #define S3C_ATA_PIO_DTR S3C_CFATA_REG(0x54) | ||
41 | #define S3C_ATA_PIO_FED S3C_CFATA_REG(0x58) | ||
42 | #define S3C_ATA_PIO_SCR S3C_CFATA_REG(0x5c) | ||
43 | #define S3C_ATA_PIO_LLR S3C_CFATA_REG(0x60) | ||
44 | #define S3C_ATA_PIO_LMR S3C_CFATA_REG(0x64) | ||
45 | #define S3C_ATA_PIO_LHR S3C_CFATA_REG(0x68) | ||
46 | #define S3C_ATA_PIO_DVR S3C_CFATA_REG(0x6c) | ||
47 | #define S3C_ATA_PIO_CSD S3C_CFATA_REG(0x70) | ||
48 | #define S3C_ATA_PIO_DAD S3C_CFATA_REG(0x74) | ||
49 | #define S3C_ATA_PIO_RDATA S3C_CFATA_REG(0x7c) | ||
50 | |||
51 | #define S3C_CFATA_MUX_TRUEIDE 0x01 | ||
52 | #define S3C_ATA_CFG_SWAP 0x40 | ||
53 | #define S3C_ATA_CFG_IORDYEN 0x02 | ||
54 | |||
32 | enum s3c_cpu_type { | 55 | enum s3c_cpu_type { |
33 | TYPE_S3C64XX, | 56 | TYPE_S3C64XX, |
34 | TYPE_S5PC100, | 57 | TYPE_S5PC100, |
@@ -495,22 +518,10 @@ static int __init pata_s3c_probe(struct platform_device *pdev) | |||
495 | info->irq = platform_get_irq(pdev, 0); | 518 | info->irq = platform_get_irq(pdev, 0); |
496 | 519 | ||
497 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | 520 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
498 | if (res == NULL) { | ||
499 | dev_err(dev, "failed to get mem resource\n"); | ||
500 | return -EINVAL; | ||
501 | } | ||
502 | |||
503 | if (!devm_request_mem_region(dev, res->start, | ||
504 | resource_size(res), DRV_NAME)) { | ||
505 | dev_err(dev, "error requesting register region\n"); | ||
506 | return -EBUSY; | ||
507 | } | ||
508 | 521 | ||
509 | info->ide_addr = devm_ioremap(dev, res->start, resource_size(res)); | 522 | info->ide_addr = devm_ioremap_resource(dev, res); |
510 | if (!info->ide_addr) { | 523 | if (IS_ERR(info->ide_addr)) |
511 | dev_err(dev, "failed to map IO base address\n"); | 524 | return PTR_ERR(info->ide_addr); |
512 | return -ENOMEM; | ||
513 | } | ||
514 | 525 | ||
515 | info->clk = devm_clk_get(&pdev->dev, "cfcon"); | 526 | info->clk = devm_clk_get(&pdev->dev, "cfcon"); |
516 | if (IS_ERR(info->clk)) { | 527 | if (IS_ERR(info->clk)) { |
diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index ea3b3dc10f33..870b11eadc6d 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c | |||
@@ -29,7 +29,6 @@ | |||
29 | #include <linux/of_address.h> | 29 | #include <linux/of_address.h> |
30 | #include <linux/platform_device.h> | 30 | #include <linux/platform_device.h> |
31 | #include <linux/libata.h> | 31 | #include <linux/libata.h> |
32 | #include <linux/ahci_platform.h> | ||
33 | #include <linux/interrupt.h> | 32 | #include <linux/interrupt.h> |
34 | #include <linux/delay.h> | 33 | #include <linux/delay.h> |
35 | #include <linux/export.h> | 34 | #include <linux/export.h> |
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 56be31819897..20a7517bd339 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include <linux/dma-mapping.h> | 60 | #include <linux/dma-mapping.h> |
61 | #include <linux/device.h> | 61 | #include <linux/device.h> |
62 | #include <linux/clk.h> | 62 | #include <linux/clk.h> |
63 | #include <linux/phy/phy.h> | ||
63 | #include <linux/platform_device.h> | 64 | #include <linux/platform_device.h> |
64 | #include <linux/ata_platform.h> | 65 | #include <linux/ata_platform.h> |
65 | #include <linux/mbus.h> | 66 | #include <linux/mbus.h> |
@@ -304,6 +305,7 @@ enum { | |||
304 | MV5_LTMODE = 0x30, | 305 | MV5_LTMODE = 0x30, |
305 | MV5_PHY_CTL = 0x0C, | 306 | MV5_PHY_CTL = 0x0C, |
306 | SATA_IFCFG = 0x050, | 307 | SATA_IFCFG = 0x050, |
308 | LP_PHY_CTL = 0x058, | ||
307 | 309 | ||
308 | MV_M2_PREAMP_MASK = 0x7e0, | 310 | MV_M2_PREAMP_MASK = 0x7e0, |
309 | 311 | ||
@@ -431,6 +433,7 @@ enum { | |||
431 | MV_HP_CUT_THROUGH = (1 << 10), /* can use EDMA cut-through */ | 433 | MV_HP_CUT_THROUGH = (1 << 10), /* can use EDMA cut-through */ |
432 | MV_HP_FLAG_SOC = (1 << 11), /* SystemOnChip, no PCI */ | 434 | MV_HP_FLAG_SOC = (1 << 11), /* SystemOnChip, no PCI */ |
433 | MV_HP_QUIRK_LED_BLINK_EN = (1 << 12), /* is led blinking enabled? */ | 435 | MV_HP_QUIRK_LED_BLINK_EN = (1 << 12), /* is led blinking enabled? */ |
436 | MV_HP_FIX_LP_PHY_CTL = (1 << 13), /* fix speed in LP_PHY_CTL ? */ | ||
434 | 437 | ||
435 | /* Port private flags (pp_flags) */ | 438 | /* Port private flags (pp_flags) */ |
436 | MV_PP_FLAG_EDMA_EN = (1 << 0), /* is EDMA engine enabled? */ | 439 | MV_PP_FLAG_EDMA_EN = (1 << 0), /* is EDMA engine enabled? */ |
@@ -563,6 +566,12 @@ struct mv_host_priv { | |||
563 | struct clk *clk; | 566 | struct clk *clk; |
564 | struct clk **port_clks; | 567 | struct clk **port_clks; |
565 | /* | 568 | /* |
569 | * Some devices have a SATA PHY which can be enabled/disabled | ||
570 | * in order to save power. These are optional: if the platform | ||
571 | * devices does not have any phy, they won't be used. | ||
572 | */ | ||
573 | struct phy **port_phys; | ||
574 | /* | ||
566 | * These consistent DMA memory pools give us guaranteed | 575 | * These consistent DMA memory pools give us guaranteed |
567 | * alignment for hardware-accessed data structures, | 576 | * alignment for hardware-accessed data structures, |
568 | * and less memory waste in accomplishing the alignment. | 577 | * and less memory waste in accomplishing the alignment. |
@@ -1358,6 +1367,7 @@ static int mv_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val) | |||
1358 | 1367 | ||
1359 | if (ofs != 0xffffffffU) { | 1368 | if (ofs != 0xffffffffU) { |
1360 | void __iomem *addr = mv_ap_base(link->ap) + ofs; | 1369 | void __iomem *addr = mv_ap_base(link->ap) + ofs; |
1370 | struct mv_host_priv *hpriv = link->ap->host->private_data; | ||
1361 | if (sc_reg_in == SCR_CONTROL) { | 1371 | if (sc_reg_in == SCR_CONTROL) { |
1362 | /* | 1372 | /* |
1363 | * Workaround for 88SX60x1 FEr SATA#26: | 1373 | * Workaround for 88SX60x1 FEr SATA#26: |
@@ -1374,6 +1384,18 @@ static int mv_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val) | |||
1374 | */ | 1384 | */ |
1375 | if ((val & 0xf) == 1 || (readl(addr) & 0xf) == 1) | 1385 | if ((val & 0xf) == 1 || (readl(addr) & 0xf) == 1) |
1376 | val |= 0xf000; | 1386 | val |= 0xf000; |
1387 | |||
1388 | if (hpriv->hp_flags & MV_HP_FIX_LP_PHY_CTL) { | ||
1389 | void __iomem *lp_phy_addr = | ||
1390 | mv_ap_base(link->ap) + LP_PHY_CTL; | ||
1391 | /* | ||
1392 | * Set PHY speed according to SControl speed. | ||
1393 | */ | ||
1394 | if ((val & 0xf0) == 0x10) | ||
1395 | writelfl(0x7, lp_phy_addr); | ||
1396 | else | ||
1397 | writelfl(0x227, lp_phy_addr); | ||
1398 | } | ||
1377 | } | 1399 | } |
1378 | writelfl(val, addr); | 1400 | writelfl(val, addr); |
1379 | return 0; | 1401 | return 0; |
@@ -4076,6 +4098,11 @@ static int mv_platform_probe(struct platform_device *pdev) | |||
4076 | GFP_KERNEL); | 4098 | GFP_KERNEL); |
4077 | if (!hpriv->port_clks) | 4099 | if (!hpriv->port_clks) |
4078 | return -ENOMEM; | 4100 | return -ENOMEM; |
4101 | hpriv->port_phys = devm_kzalloc(&pdev->dev, | ||
4102 | sizeof(struct phy *) * n_ports, | ||
4103 | GFP_KERNEL); | ||
4104 | if (!hpriv->port_phys) | ||
4105 | return -ENOMEM; | ||
4079 | host->private_data = hpriv; | 4106 | host->private_data = hpriv; |
4080 | hpriv->n_ports = n_ports; | 4107 | hpriv->n_ports = n_ports; |
4081 | hpriv->board_idx = chip_soc; | 4108 | hpriv->board_idx = chip_soc; |
@@ -4097,6 +4124,17 @@ static int mv_platform_probe(struct platform_device *pdev) | |||
4097 | hpriv->port_clks[port] = clk_get(&pdev->dev, port_number); | 4124 | hpriv->port_clks[port] = clk_get(&pdev->dev, port_number); |
4098 | if (!IS_ERR(hpriv->port_clks[port])) | 4125 | if (!IS_ERR(hpriv->port_clks[port])) |
4099 | clk_prepare_enable(hpriv->port_clks[port]); | 4126 | clk_prepare_enable(hpriv->port_clks[port]); |
4127 | |||
4128 | sprintf(port_number, "port%d", port); | ||
4129 | hpriv->port_phys[port] = devm_phy_get(&pdev->dev, port_number); | ||
4130 | if (IS_ERR(hpriv->port_phys[port])) { | ||
4131 | rc = PTR_ERR(hpriv->port_phys[port]); | ||
4132 | hpriv->port_phys[port] = NULL; | ||
4133 | if ((rc != -EPROBE_DEFER) && (rc != -ENODEV)) | ||
4134 | dev_warn(&pdev->dev, "error getting phy"); | ||
4135 | goto err; | ||
4136 | } else | ||
4137 | phy_power_on(hpriv->port_phys[port]); | ||
4100 | } | 4138 | } |
4101 | 4139 | ||
4102 | /* | 4140 | /* |
@@ -4110,6 +4148,15 @@ static int mv_platform_probe(struct platform_device *pdev) | |||
4110 | if (rc) | 4148 | if (rc) |
4111 | goto err; | 4149 | goto err; |
4112 | 4150 | ||
4151 | /* | ||
4152 | * To allow disk hotplug on Armada 370/XP SoCs, the PHY speed must be | ||
4153 | * updated in the LP_PHY_CTL register. | ||
4154 | */ | ||
4155 | if (pdev->dev.of_node && | ||
4156 | of_device_is_compatible(pdev->dev.of_node, | ||
4157 | "marvell,armada-370-sata")) | ||
4158 | hpriv->hp_flags |= MV_HP_FIX_LP_PHY_CTL; | ||
4159 | |||
4113 | /* initialize adapter */ | 4160 | /* initialize adapter */ |
4114 | rc = mv_init_host(host); | 4161 | rc = mv_init_host(host); |
4115 | if (rc) | 4162 | if (rc) |
@@ -4132,6 +4179,8 @@ err: | |||
4132 | clk_disable_unprepare(hpriv->port_clks[port]); | 4179 | clk_disable_unprepare(hpriv->port_clks[port]); |
4133 | clk_put(hpriv->port_clks[port]); | 4180 | clk_put(hpriv->port_clks[port]); |
4134 | } | 4181 | } |
4182 | if (hpriv->port_phys[port]) | ||
4183 | phy_power_off(hpriv->port_phys[port]); | ||
4135 | } | 4184 | } |
4136 | 4185 | ||
4137 | return rc; | 4186 | return rc; |
@@ -4161,6 +4210,8 @@ static int mv_platform_remove(struct platform_device *pdev) | |||
4161 | clk_disable_unprepare(hpriv->port_clks[port]); | 4210 | clk_disable_unprepare(hpriv->port_clks[port]); |
4162 | clk_put(hpriv->port_clks[port]); | 4211 | clk_put(hpriv->port_clks[port]); |
4163 | } | 4212 | } |
4213 | if (hpriv->port_phys[port]) | ||
4214 | phy_power_off(hpriv->port_phys[port]); | ||
4164 | } | 4215 | } |
4165 | return 0; | 4216 | return 0; |
4166 | } | 4217 | } |
@@ -4209,6 +4260,7 @@ static int mv_platform_resume(struct platform_device *pdev) | |||
4209 | 4260 | ||
4210 | #ifdef CONFIG_OF | 4261 | #ifdef CONFIG_OF |
4211 | static struct of_device_id mv_sata_dt_ids[] = { | 4262 | static struct of_device_id mv_sata_dt_ids[] = { |
4263 | { .compatible = "marvell,armada-370-sata", }, | ||
4212 | { .compatible = "marvell,orion-sata", }, | 4264 | { .compatible = "marvell,orion-sata", }, |
4213 | {}, | 4265 | {}, |
4214 | }; | 4266 | }; |
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 1dae9a9009f7..2b25bd83fc9d 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/ata.h> | 16 | #include <linux/ata.h> |
17 | #include <linux/libata.h> | 17 | #include <linux/libata.h> |
18 | #include <linux/of_device.h> | ||
18 | #include <linux/platform_device.h> | 19 | #include <linux/platform_device.h> |
19 | #include <linux/clk.h> | 20 | #include <linux/clk.h> |
20 | #include <linux/err.h> | 21 | #include <linux/err.h> |
@@ -123,12 +124,37 @@ | |||
123 | 124 | ||
124 | #define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFEUL | 125 | #define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFEUL |
125 | 126 | ||
127 | /* Gen2 Physical Layer Control Registers */ | ||
128 | #define RCAR_GEN2_PHY_CTL1_REG 0x1704 | ||
129 | #define RCAR_GEN2_PHY_CTL1 0x34180002 | ||
130 | #define RCAR_GEN2_PHY_CTL1_SS 0xC180 /* Spread Spectrum */ | ||
131 | |||
132 | #define RCAR_GEN2_PHY_CTL2_REG 0x170C | ||
133 | #define RCAR_GEN2_PHY_CTL2 0x00002303 | ||
134 | |||
135 | #define RCAR_GEN2_PHY_CTL3_REG 0x171C | ||
136 | #define RCAR_GEN2_PHY_CTL3 0x000B0194 | ||
137 | |||
138 | #define RCAR_GEN2_PHY_CTL4_REG 0x1724 | ||
139 | #define RCAR_GEN2_PHY_CTL4 0x00030994 | ||
140 | |||
141 | #define RCAR_GEN2_PHY_CTL5_REG 0x1740 | ||
142 | #define RCAR_GEN2_PHY_CTL5 0x03004001 | ||
143 | #define RCAR_GEN2_PHY_CTL5_DC BIT(1) /* DC connection */ | ||
144 | #define RCAR_GEN2_PHY_CTL5_TR BIT(2) /* Termination Resistor */ | ||
145 | |||
146 | enum sata_rcar_type { | ||
147 | RCAR_GEN1_SATA, | ||
148 | RCAR_GEN2_SATA, | ||
149 | }; | ||
150 | |||
126 | struct sata_rcar_priv { | 151 | struct sata_rcar_priv { |
127 | void __iomem *base; | 152 | void __iomem *base; |
128 | struct clk *clk; | 153 | struct clk *clk; |
154 | enum sata_rcar_type type; | ||
129 | }; | 155 | }; |
130 | 156 | ||
131 | static void sata_rcar_phy_initialize(struct sata_rcar_priv *priv) | 157 | static void sata_rcar_gen1_phy_preinit(struct sata_rcar_priv *priv) |
132 | { | 158 | { |
133 | void __iomem *base = priv->base; | 159 | void __iomem *base = priv->base; |
134 | 160 | ||
@@ -141,8 +167,8 @@ static void sata_rcar_phy_initialize(struct sata_rcar_priv *priv) | |||
141 | iowrite32(0, base + SATAPHYRESET_REG); | 167 | iowrite32(0, base + SATAPHYRESET_REG); |
142 | } | 168 | } |
143 | 169 | ||
144 | static void sata_rcar_phy_write(struct sata_rcar_priv *priv, u16 reg, u32 val, | 170 | static void sata_rcar_gen1_phy_write(struct sata_rcar_priv *priv, u16 reg, |
145 | int group) | 171 | u32 val, int group) |
146 | { | 172 | { |
147 | void __iomem *base = priv->base; | 173 | void __iomem *base = priv->base; |
148 | int timeout; | 174 | int timeout; |
@@ -170,6 +196,29 @@ static void sata_rcar_phy_write(struct sata_rcar_priv *priv, u16 reg, u32 val, | |||
170 | iowrite32(0, base + SATAPHYADDR_REG); | 196 | iowrite32(0, base + SATAPHYADDR_REG); |
171 | } | 197 | } |
172 | 198 | ||
199 | static void sata_rcar_gen1_phy_init(struct sata_rcar_priv *priv) | ||
200 | { | ||
201 | sata_rcar_gen1_phy_preinit(priv); | ||
202 | sata_rcar_gen1_phy_write(priv, SATAPCTLR1_REG, 0x00200188, 0); | ||
203 | sata_rcar_gen1_phy_write(priv, SATAPCTLR1_REG, 0x00200188, 1); | ||
204 | sata_rcar_gen1_phy_write(priv, SATAPCTLR3_REG, 0x0000A061, 0); | ||
205 | sata_rcar_gen1_phy_write(priv, SATAPCTLR2_REG, 0x20000000, 0); | ||
206 | sata_rcar_gen1_phy_write(priv, SATAPCTLR2_REG, 0x20000000, 1); | ||
207 | sata_rcar_gen1_phy_write(priv, SATAPCTLR4_REG, 0x28E80000, 0); | ||
208 | } | ||
209 | |||
210 | static void sata_rcar_gen2_phy_init(struct sata_rcar_priv *priv) | ||
211 | { | ||
212 | void __iomem *base = priv->base; | ||
213 | |||
214 | iowrite32(RCAR_GEN2_PHY_CTL1, base + RCAR_GEN2_PHY_CTL1_REG); | ||
215 | iowrite32(RCAR_GEN2_PHY_CTL2, base + RCAR_GEN2_PHY_CTL2_REG); | ||
216 | iowrite32(RCAR_GEN2_PHY_CTL3, base + RCAR_GEN2_PHY_CTL3_REG); | ||
217 | iowrite32(RCAR_GEN2_PHY_CTL4, base + RCAR_GEN2_PHY_CTL4_REG); | ||
218 | iowrite32(RCAR_GEN2_PHY_CTL5 | RCAR_GEN2_PHY_CTL5_DC | | ||
219 | RCAR_GEN2_PHY_CTL5_TR, base + RCAR_GEN2_PHY_CTL5_REG); | ||
220 | } | ||
221 | |||
173 | static void sata_rcar_freeze(struct ata_port *ap) | 222 | static void sata_rcar_freeze(struct ata_port *ap) |
174 | { | 223 | { |
175 | struct sata_rcar_priv *priv = ap->host->private_data; | 224 | struct sata_rcar_priv *priv = ap->host->private_data; |
@@ -738,13 +787,17 @@ static void sata_rcar_init_controller(struct ata_host *host) | |||
738 | u32 val; | 787 | u32 val; |
739 | 788 | ||
740 | /* reset and setup phy */ | 789 | /* reset and setup phy */ |
741 | sata_rcar_phy_initialize(priv); | 790 | switch (priv->type) { |
742 | sata_rcar_phy_write(priv, SATAPCTLR1_REG, 0x00200188, 0); | 791 | case RCAR_GEN1_SATA: |
743 | sata_rcar_phy_write(priv, SATAPCTLR1_REG, 0x00200188, 1); | 792 | sata_rcar_gen1_phy_init(priv); |
744 | sata_rcar_phy_write(priv, SATAPCTLR3_REG, 0x0000A061, 0); | 793 | break; |
745 | sata_rcar_phy_write(priv, SATAPCTLR2_REG, 0x20000000, 0); | 794 | case RCAR_GEN2_SATA: |
746 | sata_rcar_phy_write(priv, SATAPCTLR2_REG, 0x20000000, 1); | 795 | sata_rcar_gen2_phy_init(priv); |
747 | sata_rcar_phy_write(priv, SATAPCTLR4_REG, 0x28E80000, 0); | 796 | break; |
797 | default: | ||
798 | dev_warn(host->dev, "SATA phy is not initialized\n"); | ||
799 | break; | ||
800 | } | ||
748 | 801 | ||
749 | /* SATA-IP reset state */ | 802 | /* SATA-IP reset state */ |
750 | val = ioread32(base + ATAPI_CONTROL1_REG); | 803 | val = ioread32(base + ATAPI_CONTROL1_REG); |
@@ -770,8 +823,40 @@ static void sata_rcar_init_controller(struct ata_host *host) | |||
770 | iowrite32(ATAPI_INT_ENABLE_SATAINT, base + ATAPI_INT_ENABLE_REG); | 823 | iowrite32(ATAPI_INT_ENABLE_SATAINT, base + ATAPI_INT_ENABLE_REG); |
771 | } | 824 | } |
772 | 825 | ||
826 | static struct of_device_id sata_rcar_match[] = { | ||
827 | { | ||
828 | /* Deprecated by "renesas,sata-r8a7779" */ | ||
829 | .compatible = "renesas,rcar-sata", | ||
830 | .data = (void *)RCAR_GEN1_SATA, | ||
831 | }, | ||
832 | { | ||
833 | .compatible = "renesas,sata-r8a7779", | ||
834 | .data = (void *)RCAR_GEN1_SATA, | ||
835 | }, | ||
836 | { | ||
837 | .compatible = "renesas,sata-r8a7790", | ||
838 | .data = (void *)RCAR_GEN2_SATA | ||
839 | }, | ||
840 | { | ||
841 | .compatible = "renesas,sata-r8a7791", | ||
842 | .data = (void *)RCAR_GEN2_SATA | ||
843 | }, | ||
844 | { }, | ||
845 | }; | ||
846 | MODULE_DEVICE_TABLE(of, sata_rcar_match); | ||
847 | |||
848 | static const struct platform_device_id sata_rcar_id_table[] = { | ||
849 | { "sata_rcar", RCAR_GEN1_SATA }, /* Deprecated by "sata-r8a7779" */ | ||
850 | { "sata-r8a7779", RCAR_GEN1_SATA }, | ||
851 | { "sata-r8a7790", RCAR_GEN2_SATA }, | ||
852 | { "sata-r8a7791", RCAR_GEN2_SATA }, | ||
853 | { }, | ||
854 | }; | ||
855 | MODULE_DEVICE_TABLE(platform, sata_rcar_id_table); | ||
856 | |||
773 | static int sata_rcar_probe(struct platform_device *pdev) | 857 | static int sata_rcar_probe(struct platform_device *pdev) |
774 | { | 858 | { |
859 | const struct of_device_id *of_id; | ||
775 | struct ata_host *host; | 860 | struct ata_host *host; |
776 | struct sata_rcar_priv *priv; | 861 | struct sata_rcar_priv *priv; |
777 | struct resource *mem; | 862 | struct resource *mem; |
@@ -787,6 +872,12 @@ static int sata_rcar_probe(struct platform_device *pdev) | |||
787 | if (!priv) | 872 | if (!priv) |
788 | return -ENOMEM; | 873 | return -ENOMEM; |
789 | 874 | ||
875 | of_id = of_match_device(sata_rcar_match, &pdev->dev); | ||
876 | if (of_id) | ||
877 | priv->type = (enum sata_rcar_type)of_id->data; | ||
878 | else | ||
879 | priv->type = platform_get_device_id(pdev)->driver_data; | ||
880 | |||
790 | priv->clk = devm_clk_get(&pdev->dev, NULL); | 881 | priv->clk = devm_clk_get(&pdev->dev, NULL); |
791 | if (IS_ERR(priv->clk)) { | 882 | if (IS_ERR(priv->clk)) { |
792 | dev_err(&pdev->dev, "failed to get access to sata clock\n"); | 883 | dev_err(&pdev->dev, "failed to get access to sata clock\n"); |
@@ -892,15 +983,10 @@ static const struct dev_pm_ops sata_rcar_pm_ops = { | |||
892 | }; | 983 | }; |
893 | #endif | 984 | #endif |
894 | 985 | ||
895 | static struct of_device_id sata_rcar_match[] = { | ||
896 | { .compatible = "renesas,rcar-sata", }, | ||
897 | {}, | ||
898 | }; | ||
899 | MODULE_DEVICE_TABLE(of, sata_rcar_match); | ||
900 | |||
901 | static struct platform_driver sata_rcar_driver = { | 986 | static struct platform_driver sata_rcar_driver = { |
902 | .probe = sata_rcar_probe, | 987 | .probe = sata_rcar_probe, |
903 | .remove = sata_rcar_remove, | 988 | .remove = sata_rcar_remove, |
989 | .id_table = sata_rcar_id_table, | ||
904 | .driver = { | 990 | .driver = { |
905 | .name = DRV_NAME, | 991 | .name = DRV_NAME, |
906 | .owner = THIS_MODULE, | 992 | .owner = THIS_MODULE, |
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index fbcc851ed5a5..61bcfc21d2a0 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
@@ -163,7 +163,6 @@ static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup) | |||
163 | static void bcachecg_destroy(struct cgroup *cgroup) | 163 | static void bcachecg_destroy(struct cgroup *cgroup) |
164 | { | 164 | { |
165 | struct bch_cgroup *cg = cgroup_to_bcache(cgroup); | 165 | struct bch_cgroup *cg = cgroup_to_bcache(cgroup); |
166 | free_css_id(&bcache_subsys, &cg->css); | ||
167 | kfree(cg); | 166 | kfree(cg); |
168 | } | 167 | } |
169 | 168 | ||
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index d90909ec6aa6..a5e34dd6a32c 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -649,6 +649,7 @@ static void process_sctp_notification(struct connection *con, | |||
649 | struct msghdr *msg, char *buf) | 649 | struct msghdr *msg, char *buf) |
650 | { | 650 | { |
651 | union sctp_notification *sn = (union sctp_notification *)buf; | 651 | union sctp_notification *sn = (union sctp_notification *)buf; |
652 | struct linger linger; | ||
652 | 653 | ||
653 | switch (sn->sn_header.sn_type) { | 654 | switch (sn->sn_header.sn_type) { |
654 | case SCTP_SEND_FAILED: | 655 | case SCTP_SEND_FAILED: |
@@ -727,6 +728,13 @@ static void process_sctp_notification(struct connection *con, | |||
727 | } | 728 | } |
728 | add_sock(new_con->sock, new_con); | 729 | add_sock(new_con->sock, new_con); |
729 | 730 | ||
731 | linger.l_onoff = 1; | ||
732 | linger.l_linger = 0; | ||
733 | ret = kernel_setsockopt(new_con->sock, SOL_SOCKET, SO_LINGER, | ||
734 | (char *)&linger, sizeof(linger)); | ||
735 | if (ret < 0) | ||
736 | log_print("set socket option SO_LINGER failed"); | ||
737 | |||
730 | log_print("connecting to %d sctp association %d", | 738 | log_print("connecting to %d sctp association %d", |
731 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); | 739 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); |
732 | 740 | ||
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 73f3e4ee4037..49436fa7cd4f 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -1032,8 +1032,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
1032 | unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); | 1032 | unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); |
1033 | rv = filemap_write_and_wait_range(mapping, lstart, end); | 1033 | rv = filemap_write_and_wait_range(mapping, lstart, end); |
1034 | if (rv) | 1034 | if (rv) |
1035 | return rv; | 1035 | goto out; |
1036 | truncate_inode_pages_range(mapping, lstart, end); | 1036 | if (rw == WRITE) |
1037 | truncate_inode_pages_range(mapping, lstart, end); | ||
1037 | } | 1038 | } |
1038 | 1039 | ||
1039 | rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 1040 | rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
@@ -1080,30 +1081,22 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) | |||
1080 | bh = bh->b_this_page; | 1081 | bh = bh->b_this_page; |
1081 | } while(bh != head); | 1082 | } while(bh != head); |
1082 | spin_unlock(&sdp->sd_ail_lock); | 1083 | spin_unlock(&sdp->sd_ail_lock); |
1083 | gfs2_log_unlock(sdp); | ||
1084 | 1084 | ||
1085 | head = bh = page_buffers(page); | 1085 | head = bh = page_buffers(page); |
1086 | do { | 1086 | do { |
1087 | gfs2_log_lock(sdp); | ||
1088 | bd = bh->b_private; | 1087 | bd = bh->b_private; |
1089 | if (bd) { | 1088 | if (bd) { |
1090 | gfs2_assert_warn(sdp, bd->bd_bh == bh); | 1089 | gfs2_assert_warn(sdp, bd->bd_bh == bh); |
1091 | if (!list_empty(&bd->bd_list)) { | 1090 | if (!list_empty(&bd->bd_list)) |
1092 | if (!buffer_pinned(bh)) | 1091 | list_del_init(&bd->bd_list); |
1093 | list_del_init(&bd->bd_list); | 1092 | bd->bd_bh = NULL; |
1094 | else | ||
1095 | bd = NULL; | ||
1096 | } | ||
1097 | if (bd) | ||
1098 | bd->bd_bh = NULL; | ||
1099 | bh->b_private = NULL; | 1093 | bh->b_private = NULL; |
1100 | } | ||
1101 | gfs2_log_unlock(sdp); | ||
1102 | if (bd) | ||
1103 | kmem_cache_free(gfs2_bufdata_cachep, bd); | 1094 | kmem_cache_free(gfs2_bufdata_cachep, bd); |
1095 | } | ||
1104 | 1096 | ||
1105 | bh = bh->b_this_page; | 1097 | bh = bh->b_this_page; |
1106 | } while (bh != head); | 1098 | } while (bh != head); |
1099 | gfs2_log_unlock(sdp); | ||
1107 | 1100 | ||
1108 | return try_to_free_buffers(page); | 1101 | return try_to_free_buffers(page); |
1109 | 1102 | ||
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 2e5fc268d324..fa32655449c8 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -834,6 +834,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, | |||
834 | struct gfs2_leaf *leaf; | 834 | struct gfs2_leaf *leaf; |
835 | struct gfs2_dirent *dent; | 835 | struct gfs2_dirent *dent; |
836 | struct qstr name = { .name = "" }; | 836 | struct qstr name = { .name = "" }; |
837 | struct timespec tv = CURRENT_TIME; | ||
837 | 838 | ||
838 | error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); | 839 | error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); |
839 | if (error) | 840 | if (error) |
@@ -850,7 +851,11 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, | |||
850 | leaf->lf_entries = 0; | 851 | leaf->lf_entries = 0; |
851 | leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); | 852 | leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); |
852 | leaf->lf_next = 0; | 853 | leaf->lf_next = 0; |
853 | memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved)); | 854 | leaf->lf_inode = cpu_to_be64(ip->i_no_addr); |
855 | leaf->lf_dist = cpu_to_be32(1); | ||
856 | leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); | ||
857 | leaf->lf_sec = cpu_to_be64(tv.tv_sec); | ||
858 | memset(leaf->lf_reserved2, 0, sizeof(leaf->lf_reserved2)); | ||
854 | dent = (struct gfs2_dirent *)(leaf+1); | 859 | dent = (struct gfs2_dirent *)(leaf+1); |
855 | gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent); | 860 | gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent); |
856 | *pbh = bh; | 861 | *pbh = bh; |
@@ -1612,11 +1617,31 @@ out: | |||
1612 | return ret; | 1617 | return ret; |
1613 | } | 1618 | } |
1614 | 1619 | ||
1620 | /** | ||
1621 | * dir_new_leaf - Add a new leaf onto hash chain | ||
1622 | * @inode: The directory | ||
1623 | * @name: The name we are adding | ||
1624 | * | ||
1625 | * This adds a new dir leaf onto an existing leaf when there is not | ||
1626 | * enough space to add a new dir entry. This is a last resort after | ||
1627 | * we've expanded the hash table to max size and also split existing | ||
1628 | * leaf blocks, so it will only occur for very large directories. | ||
1629 | * | ||
1630 | * The dist parameter is set to 1 for leaf blocks directly attached | ||
1631 | * to the hash table, 2 for one layer of indirection, 3 for two layers | ||
1632 | * etc. We are thus able to tell the difference between an old leaf | ||
1633 | * with dist set to zero (i.e. "don't know") and a new one where we | ||
1634 | * set this information for debug/fsck purposes. | ||
1635 | * | ||
1636 | * Returns: 0 on success, or -ve on error | ||
1637 | */ | ||
1638 | |||
1615 | static int dir_new_leaf(struct inode *inode, const struct qstr *name) | 1639 | static int dir_new_leaf(struct inode *inode, const struct qstr *name) |
1616 | { | 1640 | { |
1617 | struct buffer_head *bh, *obh; | 1641 | struct buffer_head *bh, *obh; |
1618 | struct gfs2_inode *ip = GFS2_I(inode); | 1642 | struct gfs2_inode *ip = GFS2_I(inode); |
1619 | struct gfs2_leaf *leaf, *oleaf; | 1643 | struct gfs2_leaf *leaf, *oleaf; |
1644 | u32 dist = 1; | ||
1620 | int error; | 1645 | int error; |
1621 | u32 index; | 1646 | u32 index; |
1622 | u64 bn; | 1647 | u64 bn; |
@@ -1626,6 +1651,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) | |||
1626 | if (error) | 1651 | if (error) |
1627 | return error; | 1652 | return error; |
1628 | do { | 1653 | do { |
1654 | dist++; | ||
1629 | oleaf = (struct gfs2_leaf *)obh->b_data; | 1655 | oleaf = (struct gfs2_leaf *)obh->b_data; |
1630 | bn = be64_to_cpu(oleaf->lf_next); | 1656 | bn = be64_to_cpu(oleaf->lf_next); |
1631 | if (!bn) | 1657 | if (!bn) |
@@ -1643,6 +1669,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) | |||
1643 | brelse(obh); | 1669 | brelse(obh); |
1644 | return -ENOSPC; | 1670 | return -ENOSPC; |
1645 | } | 1671 | } |
1672 | leaf->lf_dist = cpu_to_be32(dist); | ||
1646 | oleaf->lf_next = cpu_to_be64(bh->b_blocknr); | 1673 | oleaf->lf_next = cpu_to_be64(bh->b_blocknr); |
1647 | brelse(bh); | 1674 | brelse(bh); |
1648 | brelse(obh); | 1675 | brelse(obh); |
@@ -1659,39 +1686,53 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) | |||
1659 | 1686 | ||
1660 | /** | 1687 | /** |
1661 | * gfs2_dir_add - Add new filename into directory | 1688 | * gfs2_dir_add - Add new filename into directory |
1662 | * @dip: The GFS2 inode | 1689 | * @inode: The directory inode |
1663 | * @filename: The new name | 1690 | * @name: The new name |
1664 | * @inode: The inode number of the entry | 1691 | * @nip: The GFS2 inode to be linked in to the directory |
1665 | * @type: The type of the entry | 1692 | * @da: The directory addition info |
1693 | * | ||
1694 | * If the call to gfs2_diradd_alloc_required resulted in there being | ||
1695 | * no need to allocate any new directory blocks, then it will contain | ||
1696 | * a pointer to the directory entry and the bh in which it resides. We | ||
1697 | * can use that without having to repeat the search. If there was no | ||
1698 | * free space, then we must now create more space. | ||
1666 | * | 1699 | * |
1667 | * Returns: 0 on success, error code on failure | 1700 | * Returns: 0 on success, error code on failure |
1668 | */ | 1701 | */ |
1669 | 1702 | ||
1670 | int gfs2_dir_add(struct inode *inode, const struct qstr *name, | 1703 | int gfs2_dir_add(struct inode *inode, const struct qstr *name, |
1671 | const struct gfs2_inode *nip) | 1704 | const struct gfs2_inode *nip, struct gfs2_diradd *da) |
1672 | { | 1705 | { |
1673 | struct gfs2_inode *ip = GFS2_I(inode); | 1706 | struct gfs2_inode *ip = GFS2_I(inode); |
1674 | struct buffer_head *bh; | 1707 | struct buffer_head *bh = da->bh; |
1675 | struct gfs2_dirent *dent; | 1708 | struct gfs2_dirent *dent = da->dent; |
1709 | struct timespec tv; | ||
1676 | struct gfs2_leaf *leaf; | 1710 | struct gfs2_leaf *leaf; |
1677 | int error; | 1711 | int error; |
1678 | 1712 | ||
1679 | while(1) { | 1713 | while(1) { |
1680 | dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, | 1714 | if (da->bh == NULL) { |
1681 | &bh); | 1715 | dent = gfs2_dirent_search(inode, name, |
1716 | gfs2_dirent_find_space, &bh); | ||
1717 | } | ||
1682 | if (dent) { | 1718 | if (dent) { |
1683 | if (IS_ERR(dent)) | 1719 | if (IS_ERR(dent)) |
1684 | return PTR_ERR(dent); | 1720 | return PTR_ERR(dent); |
1685 | dent = gfs2_init_dirent(inode, dent, name, bh); | 1721 | dent = gfs2_init_dirent(inode, dent, name, bh); |
1686 | gfs2_inum_out(nip, dent); | 1722 | gfs2_inum_out(nip, dent); |
1687 | dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); | 1723 | dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); |
1724 | tv = CURRENT_TIME; | ||
1688 | if (ip->i_diskflags & GFS2_DIF_EXHASH) { | 1725 | if (ip->i_diskflags & GFS2_DIF_EXHASH) { |
1689 | leaf = (struct gfs2_leaf *)bh->b_data; | 1726 | leaf = (struct gfs2_leaf *)bh->b_data; |
1690 | be16_add_cpu(&leaf->lf_entries, 1); | 1727 | be16_add_cpu(&leaf->lf_entries, 1); |
1728 | leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); | ||
1729 | leaf->lf_sec = cpu_to_be64(tv.tv_sec); | ||
1691 | } | 1730 | } |
1731 | da->dent = NULL; | ||
1732 | da->bh = NULL; | ||
1692 | brelse(bh); | 1733 | brelse(bh); |
1693 | ip->i_entries++; | 1734 | ip->i_entries++; |
1694 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1735 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv; |
1695 | if (S_ISDIR(nip->i_inode.i_mode)) | 1736 | if (S_ISDIR(nip->i_inode.i_mode)) |
1696 | inc_nlink(&ip->i_inode); | 1737 | inc_nlink(&ip->i_inode); |
1697 | mark_inode_dirty(inode); | 1738 | mark_inode_dirty(inode); |
@@ -1742,6 +1783,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) | |||
1742 | const struct qstr *name = &dentry->d_name; | 1783 | const struct qstr *name = &dentry->d_name; |
1743 | struct gfs2_dirent *dent, *prev = NULL; | 1784 | struct gfs2_dirent *dent, *prev = NULL; |
1744 | struct buffer_head *bh; | 1785 | struct buffer_head *bh; |
1786 | struct timespec tv = CURRENT_TIME; | ||
1745 | 1787 | ||
1746 | /* Returns _either_ the entry (if its first in block) or the | 1788 | /* Returns _either_ the entry (if its first in block) or the |
1747 | previous entry otherwise */ | 1789 | previous entry otherwise */ |
@@ -1767,13 +1809,15 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) | |||
1767 | if (!entries) | 1809 | if (!entries) |
1768 | gfs2_consist_inode(dip); | 1810 | gfs2_consist_inode(dip); |
1769 | leaf->lf_entries = cpu_to_be16(--entries); | 1811 | leaf->lf_entries = cpu_to_be16(--entries); |
1812 | leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); | ||
1813 | leaf->lf_sec = cpu_to_be64(tv.tv_sec); | ||
1770 | } | 1814 | } |
1771 | brelse(bh); | 1815 | brelse(bh); |
1772 | 1816 | ||
1773 | if (!dip->i_entries) | 1817 | if (!dip->i_entries) |
1774 | gfs2_consist_inode(dip); | 1818 | gfs2_consist_inode(dip); |
1775 | dip->i_entries--; | 1819 | dip->i_entries--; |
1776 | dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; | 1820 | dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv; |
1777 | if (S_ISDIR(dentry->d_inode->i_mode)) | 1821 | if (S_ISDIR(dentry->d_inode->i_mode)) |
1778 | drop_nlink(&dip->i_inode); | 1822 | drop_nlink(&dip->i_inode); |
1779 | mark_inode_dirty(&dip->i_inode); | 1823 | mark_inode_dirty(&dip->i_inode); |
@@ -2017,22 +2061,36 @@ out: | |||
2017 | * gfs2_diradd_alloc_required - find if adding entry will require an allocation | 2061 | * gfs2_diradd_alloc_required - find if adding entry will require an allocation |
2018 | * @ip: the file being written to | 2062 | * @ip: the file being written to |
2019 | * @filname: the filename that's going to be added | 2063 | * @filname: the filename that's going to be added |
2064 | * @da: The structure to return dir alloc info | ||
2020 | * | 2065 | * |
2021 | * Returns: 1 if alloc required, 0 if not, -ve on error | 2066 | * Returns: 0 if ok, -ve on error |
2022 | */ | 2067 | */ |
2023 | 2068 | ||
2024 | int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name) | 2069 | int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name, |
2070 | struct gfs2_diradd *da) | ||
2025 | { | 2071 | { |
2072 | struct gfs2_inode *ip = GFS2_I(inode); | ||
2073 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
2074 | const unsigned int extra = sizeof(struct gfs2_dinode) - sizeof(struct gfs2_leaf); | ||
2026 | struct gfs2_dirent *dent; | 2075 | struct gfs2_dirent *dent; |
2027 | struct buffer_head *bh; | 2076 | struct buffer_head *bh; |
2028 | 2077 | ||
2078 | da->nr_blocks = 0; | ||
2079 | da->bh = NULL; | ||
2080 | da->dent = NULL; | ||
2081 | |||
2029 | dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); | 2082 | dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); |
2030 | if (!dent) { | 2083 | if (!dent) { |
2031 | return 1; | 2084 | da->nr_blocks = sdp->sd_max_dirres; |
2085 | if (!(ip->i_diskflags & GFS2_DIF_EXHASH) && | ||
2086 | (GFS2_DIRENT_SIZE(name->len) < extra)) | ||
2087 | da->nr_blocks = 1; | ||
2088 | return 0; | ||
2032 | } | 2089 | } |
2033 | if (IS_ERR(dent)) | 2090 | if (IS_ERR(dent)) |
2034 | return PTR_ERR(dent); | 2091 | return PTR_ERR(dent); |
2035 | brelse(bh); | 2092 | da->bh = bh; |
2093 | da->dent = dent; | ||
2036 | return 0; | 2094 | return 0; |
2037 | } | 2095 | } |
2038 | 2096 | ||
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 4f03bbd1873f..126c65dda028 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h | |||
@@ -16,6 +16,14 @@ | |||
16 | struct inode; | 16 | struct inode; |
17 | struct gfs2_inode; | 17 | struct gfs2_inode; |
18 | struct gfs2_inum; | 18 | struct gfs2_inum; |
19 | struct buffer_head; | ||
20 | struct gfs2_dirent; | ||
21 | |||
22 | struct gfs2_diradd { | ||
23 | unsigned nr_blocks; | ||
24 | struct gfs2_dirent *dent; | ||
25 | struct buffer_head *bh; | ||
26 | }; | ||
19 | 27 | ||
20 | extern struct inode *gfs2_dir_search(struct inode *dir, | 28 | extern struct inode *gfs2_dir_search(struct inode *dir, |
21 | const struct qstr *filename, | 29 | const struct qstr *filename, |
@@ -23,7 +31,13 @@ extern struct inode *gfs2_dir_search(struct inode *dir, | |||
23 | extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, | 31 | extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, |
24 | const struct gfs2_inode *ip); | 32 | const struct gfs2_inode *ip); |
25 | extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, | 33 | extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, |
26 | const struct gfs2_inode *ip); | 34 | const struct gfs2_inode *ip, struct gfs2_diradd *da); |
35 | static inline void gfs2_dir_no_add(struct gfs2_diradd *da) | ||
36 | { | ||
37 | if (da->bh) | ||
38 | brelse(da->bh); | ||
39 | da->bh = NULL; | ||
40 | } | ||
27 | extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); | 41 | extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); |
28 | extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, | 42 | extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, |
29 | struct file_ra_state *f_ra); | 43 | struct file_ra_state *f_ra); |
@@ -33,7 +47,8 @@ extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | |||
33 | extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); | 47 | extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); |
34 | 48 | ||
35 | extern int gfs2_diradd_alloc_required(struct inode *dir, | 49 | extern int gfs2_diradd_alloc_required(struct inode *dir, |
36 | const struct qstr *filename); | 50 | const struct qstr *filename, |
51 | struct gfs2_diradd *da); | ||
37 | extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, | 52 | extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, |
38 | struct buffer_head **bhp); | 53 | struct buffer_head **bhp); |
39 | extern void gfs2_dir_hash_inval(struct gfs2_inode *ip); | 54 | extern void gfs2_dir_hash_inval(struct gfs2_inode *ip); |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 6f7a47c05259..ca0be6c69a26 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1552,13 +1552,11 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) | |||
1552 | glock_hash_walk(thaw_glock, sdp); | 1552 | glock_hash_walk(thaw_glock, sdp); |
1553 | } | 1553 | } |
1554 | 1554 | ||
1555 | static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) | 1555 | static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl) |
1556 | { | 1556 | { |
1557 | int ret; | ||
1558 | spin_lock(&gl->gl_spin); | 1557 | spin_lock(&gl->gl_spin); |
1559 | ret = gfs2_dump_glock(seq, gl); | 1558 | gfs2_dump_glock(seq, gl); |
1560 | spin_unlock(&gl->gl_spin); | 1559 | spin_unlock(&gl->gl_spin); |
1561 | return ret; | ||
1562 | } | 1560 | } |
1563 | 1561 | ||
1564 | static void dump_glock_func(struct gfs2_glock *gl) | 1562 | static void dump_glock_func(struct gfs2_glock *gl) |
@@ -1647,10 +1645,9 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) | |||
1647 | * @seq: the seq_file struct | 1645 | * @seq: the seq_file struct |
1648 | * @gh: the glock holder | 1646 | * @gh: the glock holder |
1649 | * | 1647 | * |
1650 | * Returns: 0 on success, -ENOBUFS when we run out of space | ||
1651 | */ | 1648 | */ |
1652 | 1649 | ||
1653 | static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) | 1650 | static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) |
1654 | { | 1651 | { |
1655 | struct task_struct *gh_owner = NULL; | 1652 | struct task_struct *gh_owner = NULL; |
1656 | char flags_buf[32]; | 1653 | char flags_buf[32]; |
@@ -1666,7 +1663,6 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) | |||
1666 | gh_owner ? gh_owner->comm : "(ended)", | 1663 | gh_owner ? gh_owner->comm : "(ended)", |
1667 | (void *)gh->gh_ip); | 1664 | (void *)gh->gh_ip); |
1668 | rcu_read_unlock(); | 1665 | rcu_read_unlock(); |
1669 | return 0; | ||
1670 | } | 1666 | } |
1671 | 1667 | ||
1672 | static const char *gflags2str(char *buf, const struct gfs2_glock *gl) | 1668 | static const char *gflags2str(char *buf, const struct gfs2_glock *gl) |
@@ -1721,16 +1717,14 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) | |||
1721 | * example. The field's are n = number (id of the object), f = flags, | 1717 | * example. The field's are n = number (id of the object), f = flags, |
1722 | * t = type, s = state, r = refcount, e = error, p = pid. | 1718 | * t = type, s = state, r = refcount, e = error, p = pid. |
1723 | * | 1719 | * |
1724 | * Returns: 0 on success, -ENOBUFS when we run out of space | ||
1725 | */ | 1720 | */ |
1726 | 1721 | ||
1727 | int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) | 1722 | void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) |
1728 | { | 1723 | { |
1729 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 1724 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
1730 | unsigned long long dtime; | 1725 | unsigned long long dtime; |
1731 | const struct gfs2_holder *gh; | 1726 | const struct gfs2_holder *gh; |
1732 | char gflags_buf[32]; | 1727 | char gflags_buf[32]; |
1733 | int error = 0; | ||
1734 | 1728 | ||
1735 | dtime = jiffies - gl->gl_demote_time; | 1729 | dtime = jiffies - gl->gl_demote_time; |
1736 | dtime *= 1000000/HZ; /* demote time in uSec */ | 1730 | dtime *= 1000000/HZ; /* demote time in uSec */ |
@@ -1747,15 +1741,11 @@ int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) | |||
1747 | atomic_read(&gl->gl_revokes), | 1741 | atomic_read(&gl->gl_revokes), |
1748 | (int)gl->gl_lockref.count, gl->gl_hold_time); | 1742 | (int)gl->gl_lockref.count, gl->gl_hold_time); |
1749 | 1743 | ||
1750 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | 1744 | list_for_each_entry(gh, &gl->gl_holders, gh_list) |
1751 | error = dump_holder(seq, gh); | 1745 | dump_holder(seq, gh); |
1752 | if (error) | 1746 | |
1753 | goto out; | ||
1754 | } | ||
1755 | if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) | 1747 | if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) |
1756 | error = glops->go_dump(seq, gl); | 1748 | glops->go_dump(seq, gl); |
1757 | out: | ||
1758 | return error; | ||
1759 | } | 1749 | } |
1760 | 1750 | ||
1761 | static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) | 1751 | static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) |
@@ -1953,7 +1943,8 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) | |||
1953 | 1943 | ||
1954 | static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) | 1944 | static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) |
1955 | { | 1945 | { |
1956 | return dump_glock(seq, iter_ptr); | 1946 | dump_glock(seq, iter_ptr); |
1947 | return 0; | ||
1957 | } | 1948 | } |
1958 | 1949 | ||
1959 | static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) | 1950 | static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 6647d77366ba..32572f71f027 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -199,7 +199,7 @@ extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, | |||
199 | struct gfs2_holder *gh); | 199 | struct gfs2_holder *gh); |
200 | extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 200 | extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
201 | extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 201 | extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
202 | extern int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); | 202 | extern void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); |
203 | #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0) | 203 | #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0) |
204 | extern __printf(2, 3) | 204 | extern __printf(2, 3) |
205 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); | 205 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index f88dcd925010..3bf0631b5d56 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -133,7 +133,8 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) | |||
133 | 133 | ||
134 | static void rgrp_go_sync(struct gfs2_glock *gl) | 134 | static void rgrp_go_sync(struct gfs2_glock *gl) |
135 | { | 135 | { |
136 | struct address_space *metamapping = gfs2_glock2aspace(gl); | 136 | struct gfs2_sbd *sdp = gl->gl_sbd; |
137 | struct address_space *mapping = &sdp->sd_aspace; | ||
137 | struct gfs2_rgrpd *rgd; | 138 | struct gfs2_rgrpd *rgd; |
138 | int error; | 139 | int error; |
139 | 140 | ||
@@ -141,10 +142,10 @@ static void rgrp_go_sync(struct gfs2_glock *gl) | |||
141 | return; | 142 | return; |
142 | GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); | 143 | GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); |
143 | 144 | ||
144 | gfs2_log_flush(gl->gl_sbd, gl); | 145 | gfs2_log_flush(sdp, gl); |
145 | filemap_fdatawrite(metamapping); | 146 | filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end); |
146 | error = filemap_fdatawait(metamapping); | 147 | error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end); |
147 | mapping_set_error(metamapping, error); | 148 | mapping_set_error(mapping, error); |
148 | gfs2_ail_empty_gl(gl); | 149 | gfs2_ail_empty_gl(gl); |
149 | 150 | ||
150 | spin_lock(&gl->gl_spin); | 151 | spin_lock(&gl->gl_spin); |
@@ -166,11 +167,12 @@ static void rgrp_go_sync(struct gfs2_glock *gl) | |||
166 | 167 | ||
167 | static void rgrp_go_inval(struct gfs2_glock *gl, int flags) | 168 | static void rgrp_go_inval(struct gfs2_glock *gl, int flags) |
168 | { | 169 | { |
169 | struct address_space *mapping = gfs2_glock2aspace(gl); | 170 | struct gfs2_sbd *sdp = gl->gl_sbd; |
171 | struct address_space *mapping = &sdp->sd_aspace; | ||
170 | 172 | ||
171 | WARN_ON_ONCE(!(flags & DIO_METADATA)); | 173 | WARN_ON_ONCE(!(flags & DIO_METADATA)); |
172 | gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); | 174 | gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); |
173 | truncate_inode_pages(mapping, 0); | 175 | truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end); |
174 | 176 | ||
175 | if (gl->gl_object) { | 177 | if (gl->gl_object) { |
176 | struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; | 178 | struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; |
@@ -435,21 +437,19 @@ static int inode_go_lock(struct gfs2_holder *gh) | |||
435 | * @seq: The iterator | 437 | * @seq: The iterator |
436 | * @ip: the inode | 438 | * @ip: the inode |
437 | * | 439 | * |
438 | * Returns: 0 on success, -ENOBUFS when we run out of space | ||
439 | */ | 440 | */ |
440 | 441 | ||
441 | static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | 442 | static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) |
442 | { | 443 | { |
443 | const struct gfs2_inode *ip = gl->gl_object; | 444 | const struct gfs2_inode *ip = gl->gl_object; |
444 | if (ip == NULL) | 445 | if (ip == NULL) |
445 | return 0; | 446 | return; |
446 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", | 447 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", |
447 | (unsigned long long)ip->i_no_formal_ino, | 448 | (unsigned long long)ip->i_no_formal_ino, |
448 | (unsigned long long)ip->i_no_addr, | 449 | (unsigned long long)ip->i_no_addr, |
449 | IF2DT(ip->i_inode.i_mode), ip->i_flags, | 450 | IF2DT(ip->i_inode.i_mode), ip->i_flags, |
450 | (unsigned int)ip->i_diskflags, | 451 | (unsigned int)ip->i_diskflags, |
451 | (unsigned long long)i_size_read(&ip->i_inode)); | 452 | (unsigned long long)i_size_read(&ip->i_inode)); |
452 | return 0; | ||
453 | } | 453 | } |
454 | 454 | ||
455 | /** | 455 | /** |
@@ -558,7 +558,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { | |||
558 | .go_unlock = gfs2_rgrp_go_unlock, | 558 | .go_unlock = gfs2_rgrp_go_unlock, |
559 | .go_dump = gfs2_rgrp_dump, | 559 | .go_dump = gfs2_rgrp_dump, |
560 | .go_type = LM_TYPE_RGRP, | 560 | .go_type = LM_TYPE_RGRP, |
561 | .go_flags = GLOF_ASPACE | GLOF_LVB, | 561 | .go_flags = GLOF_LVB, |
562 | }; | 562 | }; |
563 | 563 | ||
564 | const struct gfs2_glock_operations gfs2_trans_glops = { | 564 | const struct gfs2_glock_operations gfs2_trans_glops = { |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index ba1ea67f4eeb..cf0e34400f71 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -93,6 +93,7 @@ struct gfs2_rgrpd { | |||
93 | struct gfs2_rgrp_lvb *rd_rgl; | 93 | struct gfs2_rgrp_lvb *rd_rgl; |
94 | u32 rd_last_alloc; | 94 | u32 rd_last_alloc; |
95 | u32 rd_flags; | 95 | u32 rd_flags; |
96 | u32 rd_extfail_pt; /* extent failure point */ | ||
96 | #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ | 97 | #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ |
97 | #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ | 98 | #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ |
98 | #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ | 99 | #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ |
@@ -217,7 +218,7 @@ struct gfs2_glock_operations { | |||
217 | int (*go_demote_ok) (const struct gfs2_glock *gl); | 218 | int (*go_demote_ok) (const struct gfs2_glock *gl); |
218 | int (*go_lock) (struct gfs2_holder *gh); | 219 | int (*go_lock) (struct gfs2_holder *gh); |
219 | void (*go_unlock) (struct gfs2_holder *gh); | 220 | void (*go_unlock) (struct gfs2_holder *gh); |
220 | int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); | 221 | void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); |
221 | void (*go_callback)(struct gfs2_glock *gl, bool remote); | 222 | void (*go_callback)(struct gfs2_glock *gl, bool remote); |
222 | const int go_type; | 223 | const int go_type; |
223 | const unsigned long go_flags; | 224 | const unsigned long go_flags; |
@@ -350,7 +351,15 @@ struct gfs2_glock { | |||
350 | atomic_t gl_ail_count; | 351 | atomic_t gl_ail_count; |
351 | atomic_t gl_revokes; | 352 | atomic_t gl_revokes; |
352 | struct delayed_work gl_work; | 353 | struct delayed_work gl_work; |
353 | struct work_struct gl_delete; | 354 | union { |
355 | /* For inode and iopen glocks only */ | ||
356 | struct work_struct gl_delete; | ||
357 | /* For rgrp glocks only */ | ||
358 | struct { | ||
359 | loff_t start; | ||
360 | loff_t end; | ||
361 | } gl_vm; | ||
362 | }; | ||
354 | struct rcu_head gl_rcu; | 363 | struct rcu_head gl_rcu; |
355 | }; | 364 | }; |
356 | 365 | ||
@@ -419,10 +428,13 @@ enum { | |||
419 | }; | 428 | }; |
420 | 429 | ||
421 | struct gfs2_quota_data { | 430 | struct gfs2_quota_data { |
431 | struct hlist_bl_node qd_hlist; | ||
422 | struct list_head qd_list; | 432 | struct list_head qd_list; |
423 | struct kqid qd_id; | 433 | struct kqid qd_id; |
434 | struct gfs2_sbd *qd_sbd; | ||
424 | struct lockref qd_lockref; | 435 | struct lockref qd_lockref; |
425 | struct list_head qd_lru; | 436 | struct list_head qd_lru; |
437 | unsigned qd_hash; | ||
426 | 438 | ||
427 | unsigned long qd_flags; /* QDF_... */ | 439 | unsigned long qd_flags; /* QDF_... */ |
428 | 440 | ||
@@ -441,6 +453,7 @@ struct gfs2_quota_data { | |||
441 | 453 | ||
442 | u64 qd_sync_gen; | 454 | u64 qd_sync_gen; |
443 | unsigned long qd_last_warn; | 455 | unsigned long qd_last_warn; |
456 | struct rcu_head qd_rcu; | ||
444 | }; | 457 | }; |
445 | 458 | ||
446 | struct gfs2_trans { | 459 | struct gfs2_trans { |
@@ -720,13 +733,15 @@ struct gfs2_sbd { | |||
720 | spinlock_t sd_trunc_lock; | 733 | spinlock_t sd_trunc_lock; |
721 | 734 | ||
722 | unsigned int sd_quota_slots; | 735 | unsigned int sd_quota_slots; |
723 | unsigned int sd_quota_chunks; | 736 | unsigned long *sd_quota_bitmap; |
724 | unsigned char **sd_quota_bitmap; | 737 | spinlock_t sd_bitmap_lock; |
725 | 738 | ||
726 | u64 sd_quota_sync_gen; | 739 | u64 sd_quota_sync_gen; |
727 | 740 | ||
728 | /* Log stuff */ | 741 | /* Log stuff */ |
729 | 742 | ||
743 | struct address_space sd_aspace; | ||
744 | |||
730 | spinlock_t sd_log_lock; | 745 | spinlock_t sd_log_lock; |
731 | 746 | ||
732 | struct gfs2_trans *sd_log_tr; | 747 | struct gfs2_trans *sd_log_tr; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 7119504159f1..890588c7fb33 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -149,7 +149,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, | |||
149 | ip = GFS2_I(inode); | 149 | ip = GFS2_I(inode); |
150 | 150 | ||
151 | if (!inode) | 151 | if (!inode) |
152 | return ERR_PTR(-ENOBUFS); | 152 | return ERR_PTR(-ENOMEM); |
153 | 153 | ||
154 | if (inode->i_state & I_NEW) { | 154 | if (inode->i_state & I_NEW) { |
155 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 155 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
@@ -469,14 +469,36 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, | |||
469 | brelse(dibh); | 469 | brelse(dibh); |
470 | } | 470 | } |
471 | 471 | ||
472 | /** | ||
473 | * gfs2_trans_da_blocks - Calculate number of blocks to link inode | ||
474 | * @dip: The directory we are linking into | ||
475 | * @da: The dir add information | ||
476 | * @nr_inodes: The number of inodes involved | ||
477 | * | ||
478 | * This calculate the number of blocks we need to reserve in a | ||
479 | * transaction to link @nr_inodes into a directory. In most cases | ||
480 | * @nr_inodes will be 2 (the directory plus the inode being linked in) | ||
481 | * but in case of rename, 4 may be required. | ||
482 | * | ||
483 | * Returns: Number of blocks | ||
484 | */ | ||
485 | |||
486 | static unsigned gfs2_trans_da_blks(const struct gfs2_inode *dip, | ||
487 | const struct gfs2_diradd *da, | ||
488 | unsigned nr_inodes) | ||
489 | { | ||
490 | return da->nr_blocks + gfs2_rg_blocks(dip, da->nr_blocks) + | ||
491 | (nr_inodes * RES_DINODE) + RES_QUOTA + RES_STATFS; | ||
492 | } | ||
493 | |||
472 | static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | 494 | static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, |
473 | struct gfs2_inode *ip, int arq) | 495 | struct gfs2_inode *ip, struct gfs2_diradd *da) |
474 | { | 496 | { |
475 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 497 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
476 | struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, }; | 498 | struct gfs2_alloc_parms ap = { .target = da->nr_blocks, }; |
477 | int error; | 499 | int error; |
478 | 500 | ||
479 | if (arq) { | 501 | if (da->nr_blocks) { |
480 | error = gfs2_quota_lock_check(dip); | 502 | error = gfs2_quota_lock_check(dip); |
481 | if (error) | 503 | if (error) |
482 | goto fail_quota_locks; | 504 | goto fail_quota_locks; |
@@ -485,10 +507,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
485 | if (error) | 507 | if (error) |
486 | goto fail_quota_locks; | 508 | goto fail_quota_locks; |
487 | 509 | ||
488 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 510 | error = gfs2_trans_begin(sdp, gfs2_trans_da_blks(dip, da, 2), 0); |
489 | dip->i_rgd->rd_length + | ||
490 | 2 * RES_DINODE + | ||
491 | RES_STATFS + RES_QUOTA, 0); | ||
492 | if (error) | 511 | if (error) |
493 | goto fail_ipreserv; | 512 | goto fail_ipreserv; |
494 | } else { | 513 | } else { |
@@ -497,7 +516,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
497 | goto fail_quota_locks; | 516 | goto fail_quota_locks; |
498 | } | 517 | } |
499 | 518 | ||
500 | error = gfs2_dir_add(&dip->i_inode, name, ip); | 519 | error = gfs2_dir_add(&dip->i_inode, name, ip, da); |
501 | if (error) | 520 | if (error) |
502 | goto fail_end_trans; | 521 | goto fail_end_trans; |
503 | 522 | ||
@@ -560,7 +579,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
560 | struct dentry *d; | 579 | struct dentry *d; |
561 | int error; | 580 | int error; |
562 | u32 aflags = 0; | 581 | u32 aflags = 0; |
563 | int arq; | 582 | struct gfs2_diradd da = { .bh = NULL, }; |
564 | 583 | ||
565 | if (!name->len || name->len > GFS2_FNAMESIZE) | 584 | if (!name->len || name->len > GFS2_FNAMESIZE) |
566 | return -ENAMETOOLONG; | 585 | return -ENAMETOOLONG; |
@@ -585,6 +604,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
585 | error = PTR_ERR(inode); | 604 | error = PTR_ERR(inode); |
586 | if (!IS_ERR(inode)) { | 605 | if (!IS_ERR(inode)) { |
587 | d = d_splice_alias(inode, dentry); | 606 | d = d_splice_alias(inode, dentry); |
607 | error = PTR_ERR(d); | ||
608 | if (IS_ERR(d)) | ||
609 | goto fail_gunlock; | ||
588 | error = 0; | 610 | error = 0; |
589 | if (file) { | 611 | if (file) { |
590 | if (S_ISREG(inode->i_mode)) { | 612 | if (S_ISREG(inode->i_mode)) { |
@@ -602,7 +624,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
602 | goto fail_gunlock; | 624 | goto fail_gunlock; |
603 | } | 625 | } |
604 | 626 | ||
605 | arq = error = gfs2_diradd_alloc_required(dir, name); | 627 | error = gfs2_diradd_alloc_required(dir, name, &da); |
606 | if (error < 0) | 628 | if (error < 0) |
607 | goto fail_gunlock; | 629 | goto fail_gunlock; |
608 | 630 | ||
@@ -690,7 +712,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
690 | if (error) | 712 | if (error) |
691 | goto fail_gunlock3; | 713 | goto fail_gunlock3; |
692 | 714 | ||
693 | error = link_dinode(dip, name, ip, arq); | 715 | error = link_dinode(dip, name, ip, &da); |
694 | if (error) | 716 | if (error) |
695 | goto fail_gunlock3; | 717 | goto fail_gunlock3; |
696 | 718 | ||
@@ -719,6 +741,7 @@ fail_free_inode: | |||
719 | free_inode_nonrcu(inode); | 741 | free_inode_nonrcu(inode); |
720 | inode = NULL; | 742 | inode = NULL; |
721 | fail_gunlock: | 743 | fail_gunlock: |
744 | gfs2_dir_no_add(&da); | ||
722 | gfs2_glock_dq_uninit(ghs); | 745 | gfs2_glock_dq_uninit(ghs); |
723 | if (inode && !IS_ERR(inode)) { | 746 | if (inode && !IS_ERR(inode)) { |
724 | clear_nlink(inode); | 747 | clear_nlink(inode); |
@@ -779,6 +802,11 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
779 | } | 802 | } |
780 | 803 | ||
781 | d = d_splice_alias(inode, dentry); | 804 | d = d_splice_alias(inode, dentry); |
805 | if (IS_ERR(d)) { | ||
806 | iput(inode); | ||
807 | gfs2_glock_dq_uninit(&gh); | ||
808 | return d; | ||
809 | } | ||
782 | if (file && S_ISREG(inode->i_mode)) | 810 | if (file && S_ISREG(inode->i_mode)) |
783 | error = finish_open(file, dentry, gfs2_open_common, opened); | 811 | error = finish_open(file, dentry, gfs2_open_common, opened); |
784 | 812 | ||
@@ -817,7 +845,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
817 | struct gfs2_inode *ip = GFS2_I(inode); | 845 | struct gfs2_inode *ip = GFS2_I(inode); |
818 | struct gfs2_holder ghs[2]; | 846 | struct gfs2_holder ghs[2]; |
819 | struct buffer_head *dibh; | 847 | struct buffer_head *dibh; |
820 | int alloc_required; | 848 | struct gfs2_diradd da = { .bh = NULL, }; |
821 | int error; | 849 | int error; |
822 | 850 | ||
823 | if (S_ISDIR(inode->i_mode)) | 851 | if (S_ISDIR(inode->i_mode)) |
@@ -872,13 +900,12 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
872 | if (ip->i_inode.i_nlink == (u32)-1) | 900 | if (ip->i_inode.i_nlink == (u32)-1) |
873 | goto out_gunlock; | 901 | goto out_gunlock; |
874 | 902 | ||
875 | alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name); | 903 | error = gfs2_diradd_alloc_required(dir, &dentry->d_name, &da); |
876 | if (error < 0) | 904 | if (error < 0) |
877 | goto out_gunlock; | 905 | goto out_gunlock; |
878 | error = 0; | ||
879 | 906 | ||
880 | if (alloc_required) { | 907 | if (da.nr_blocks) { |
881 | struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, }; | 908 | struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; |
882 | error = gfs2_quota_lock_check(dip); | 909 | error = gfs2_quota_lock_check(dip); |
883 | if (error) | 910 | if (error) |
884 | goto out_gunlock; | 911 | goto out_gunlock; |
@@ -887,10 +914,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
887 | if (error) | 914 | if (error) |
888 | goto out_gunlock_q; | 915 | goto out_gunlock_q; |
889 | 916 | ||
890 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 917 | error = gfs2_trans_begin(sdp, gfs2_trans_da_blks(dip, &da, 2), 0); |
891 | gfs2_rg_blocks(dip, sdp->sd_max_dirres) + | ||
892 | 2 * RES_DINODE + RES_STATFS + | ||
893 | RES_QUOTA, 0); | ||
894 | if (error) | 918 | if (error) |
895 | goto out_ipres; | 919 | goto out_ipres; |
896 | } else { | 920 | } else { |
@@ -903,7 +927,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
903 | if (error) | 927 | if (error) |
904 | goto out_end_trans; | 928 | goto out_end_trans; |
905 | 929 | ||
906 | error = gfs2_dir_add(dir, &dentry->d_name, ip); | 930 | error = gfs2_dir_add(dir, &dentry->d_name, ip, &da); |
907 | if (error) | 931 | if (error) |
908 | goto out_brelse; | 932 | goto out_brelse; |
909 | 933 | ||
@@ -919,12 +943,13 @@ out_brelse: | |||
919 | out_end_trans: | 943 | out_end_trans: |
920 | gfs2_trans_end(sdp); | 944 | gfs2_trans_end(sdp); |
921 | out_ipres: | 945 | out_ipres: |
922 | if (alloc_required) | 946 | if (da.nr_blocks) |
923 | gfs2_inplace_release(dip); | 947 | gfs2_inplace_release(dip); |
924 | out_gunlock_q: | 948 | out_gunlock_q: |
925 | if (alloc_required) | 949 | if (da.nr_blocks) |
926 | gfs2_quota_unlock(dip); | 950 | gfs2_quota_unlock(dip); |
927 | out_gunlock: | 951 | out_gunlock: |
952 | gfs2_dir_no_add(&da); | ||
928 | gfs2_glock_dq(ghs + 1); | 953 | gfs2_glock_dq(ghs + 1); |
929 | out_child: | 954 | out_child: |
930 | gfs2_glock_dq(ghs); | 955 | gfs2_glock_dq(ghs); |
@@ -1254,7 +1279,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1254 | struct gfs2_rgrpd *nrgd; | 1279 | struct gfs2_rgrpd *nrgd; |
1255 | unsigned int num_gh; | 1280 | unsigned int num_gh; |
1256 | int dir_rename = 0; | 1281 | int dir_rename = 0; |
1257 | int alloc_required = 0; | 1282 | struct gfs2_diradd da = { .nr_blocks = 0, }; |
1258 | unsigned int x; | 1283 | unsigned int x; |
1259 | int error; | 1284 | int error; |
1260 | 1285 | ||
@@ -1388,14 +1413,14 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1388 | goto out_gunlock; | 1413 | goto out_gunlock; |
1389 | } | 1414 | } |
1390 | 1415 | ||
1391 | if (nip == NULL) | 1416 | if (nip == NULL) { |
1392 | alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); | 1417 | error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name, &da); |
1393 | error = alloc_required; | 1418 | if (error) |
1394 | if (error < 0) | 1419 | goto out_gunlock; |
1395 | goto out_gunlock; | 1420 | } |
1396 | 1421 | ||
1397 | if (alloc_required) { | 1422 | if (da.nr_blocks) { |
1398 | struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, }; | 1423 | struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; |
1399 | error = gfs2_quota_lock_check(ndip); | 1424 | error = gfs2_quota_lock_check(ndip); |
1400 | if (error) | 1425 | if (error) |
1401 | goto out_gunlock; | 1426 | goto out_gunlock; |
@@ -1404,10 +1429,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1404 | if (error) | 1429 | if (error) |
1405 | goto out_gunlock_q; | 1430 | goto out_gunlock_q; |
1406 | 1431 | ||
1407 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 1432 | error = gfs2_trans_begin(sdp, gfs2_trans_da_blks(ndip, &da, 4) + |
1408 | gfs2_rg_blocks(ndip, sdp->sd_max_dirres) + | 1433 | 4 * RES_LEAF + 4, 0); |
1409 | 4 * RES_DINODE + 4 * RES_LEAF + | ||
1410 | RES_STATFS + RES_QUOTA + 4, 0); | ||
1411 | if (error) | 1434 | if (error) |
1412 | goto out_ipreserv; | 1435 | goto out_ipreserv; |
1413 | } else { | 1436 | } else { |
@@ -1441,19 +1464,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1441 | if (error) | 1464 | if (error) |
1442 | goto out_end_trans; | 1465 | goto out_end_trans; |
1443 | 1466 | ||
1444 | error = gfs2_dir_add(ndir, &ndentry->d_name, ip); | 1467 | error = gfs2_dir_add(ndir, &ndentry->d_name, ip, &da); |
1445 | if (error) | 1468 | if (error) |
1446 | goto out_end_trans; | 1469 | goto out_end_trans; |
1447 | 1470 | ||
1448 | out_end_trans: | 1471 | out_end_trans: |
1449 | gfs2_trans_end(sdp); | 1472 | gfs2_trans_end(sdp); |
1450 | out_ipreserv: | 1473 | out_ipreserv: |
1451 | if (alloc_required) | 1474 | if (da.nr_blocks) |
1452 | gfs2_inplace_release(ndip); | 1475 | gfs2_inplace_release(ndip); |
1453 | out_gunlock_q: | 1476 | out_gunlock_q: |
1454 | if (alloc_required) | 1477 | if (da.nr_blocks) |
1455 | gfs2_quota_unlock(ndip); | 1478 | gfs2_quota_unlock(ndip); |
1456 | out_gunlock: | 1479 | out_gunlock: |
1480 | gfs2_dir_no_add(&da); | ||
1457 | while (x--) { | 1481 | while (x--) { |
1458 | gfs2_glock_dq(ghs + x); | 1482 | gfs2_glock_dq(ghs + x); |
1459 | gfs2_holder_uninit(ghs + x); | 1483 | gfs2_holder_uninit(ghs + x); |
@@ -1607,10 +1631,22 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
1607 | if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) | 1631 | if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) |
1608 | ogid = ngid = NO_GID_QUOTA_CHANGE; | 1632 | ogid = ngid = NO_GID_QUOTA_CHANGE; |
1609 | 1633 | ||
1610 | error = gfs2_quota_lock(ip, nuid, ngid); | 1634 | error = get_write_access(inode); |
1611 | if (error) | 1635 | if (error) |
1612 | return error; | 1636 | return error; |
1613 | 1637 | ||
1638 | error = gfs2_rs_alloc(ip); | ||
1639 | if (error) | ||
1640 | goto out; | ||
1641 | |||
1642 | error = gfs2_rindex_update(sdp); | ||
1643 | if (error) | ||
1644 | goto out; | ||
1645 | |||
1646 | error = gfs2_quota_lock(ip, nuid, ngid); | ||
1647 | if (error) | ||
1648 | goto out; | ||
1649 | |||
1614 | if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || | 1650 | if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || |
1615 | !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { | 1651 | !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { |
1616 | error = gfs2_quota_check(ip, nuid, ngid); | 1652 | error = gfs2_quota_check(ip, nuid, ngid); |
@@ -1637,6 +1673,8 @@ out_end_trans: | |||
1637 | gfs2_trans_end(sdp); | 1673 | gfs2_trans_end(sdp); |
1638 | out_gunlock_q: | 1674 | out_gunlock_q: |
1639 | gfs2_quota_unlock(ip); | 1675 | gfs2_quota_unlock(ip); |
1676 | out: | ||
1677 | put_write_access(inode); | ||
1640 | return error; | 1678 | return error; |
1641 | } | 1679 | } |
1642 | 1680 | ||
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 010b9fb9fec6..58f06400b7b8 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -83,6 +83,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd) | |||
83 | bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); | 83 | bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); |
84 | clear_bit(GBF_FULL, &bi->bi_flags); | 84 | clear_bit(GBF_FULL, &bi->bi_flags); |
85 | rgd->rd_free_clone = rgd->rd_free; | 85 | rgd->rd_free_clone = rgd->rd_free; |
86 | rgd->rd_extfail_pt = rgd->rd_free; | ||
86 | } | 87 | } |
87 | 88 | ||
88 | /** | 89 | /** |
@@ -588,8 +589,12 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
588 | static void gfs2_meta_sync(struct gfs2_glock *gl) | 589 | static void gfs2_meta_sync(struct gfs2_glock *gl) |
589 | { | 590 | { |
590 | struct address_space *mapping = gfs2_glock2aspace(gl); | 591 | struct address_space *mapping = gfs2_glock2aspace(gl); |
592 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
591 | int error; | 593 | int error; |
592 | 594 | ||
595 | if (mapping == NULL) | ||
596 | mapping = &sdp->sd_aspace; | ||
597 | |||
593 | filemap_fdatawrite(mapping); | 598 | filemap_fdatawrite(mapping); |
594 | error = filemap_fdatawait(mapping); | 599 | error = filemap_fdatawait(mapping); |
595 | 600 | ||
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 0650db2541ef..c272e73063de 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -76,6 +76,7 @@ static int __init init_gfs2_fs(void) | |||
76 | 76 | ||
77 | gfs2_str2qstr(&gfs2_qdot, "."); | 77 | gfs2_str2qstr(&gfs2_qdot, "."); |
78 | gfs2_str2qstr(&gfs2_qdotdot, ".."); | 78 | gfs2_str2qstr(&gfs2_qdotdot, ".."); |
79 | gfs2_quota_hash_init(); | ||
79 | 80 | ||
80 | error = gfs2_sys_init(); | 81 | error = gfs2_sys_init(); |
81 | if (error) | 82 | if (error) |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 52f177be3bf8..c7f24690ed05 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -116,6 +116,9 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) | |||
116 | unsigned long index; | 116 | unsigned long index; |
117 | unsigned int bufnum; | 117 | unsigned int bufnum; |
118 | 118 | ||
119 | if (mapping == NULL) | ||
120 | mapping = &sdp->sd_aspace; | ||
121 | |||
119 | shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift; | 122 | shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift; |
120 | index = blkno >> shift; /* convert block to page */ | 123 | index = blkno >> shift; /* convert block to page */ |
121 | bufnum = blkno - (index << shift); /* block buf index within page */ | 124 | bufnum = blkno - (index << shift); /* block buf index within page */ |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 52fa88314f5c..1e712b566d76 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include "log.h" | 36 | #include "log.h" |
37 | #include "quota.h" | 37 | #include "quota.h" |
38 | #include "dir.h" | 38 | #include "dir.h" |
39 | #include "meta_io.h" | ||
39 | #include "trace_gfs2.h" | 40 | #include "trace_gfs2.h" |
40 | 41 | ||
41 | #define DO 0 | 42 | #define DO 0 |
@@ -62,6 +63,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt) | |||
62 | static struct gfs2_sbd *init_sbd(struct super_block *sb) | 63 | static struct gfs2_sbd *init_sbd(struct super_block *sb) |
63 | { | 64 | { |
64 | struct gfs2_sbd *sdp; | 65 | struct gfs2_sbd *sdp; |
66 | struct address_space *mapping; | ||
65 | 67 | ||
66 | sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL); | 68 | sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL); |
67 | if (!sdp) | 69 | if (!sdp) |
@@ -97,6 +99,18 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
97 | init_waitqueue_head(&sdp->sd_quota_wait); | 99 | init_waitqueue_head(&sdp->sd_quota_wait); |
98 | INIT_LIST_HEAD(&sdp->sd_trunc_list); | 100 | INIT_LIST_HEAD(&sdp->sd_trunc_list); |
99 | spin_lock_init(&sdp->sd_trunc_lock); | 101 | spin_lock_init(&sdp->sd_trunc_lock); |
102 | spin_lock_init(&sdp->sd_bitmap_lock); | ||
103 | |||
104 | mapping = &sdp->sd_aspace; | ||
105 | |||
106 | address_space_init_once(mapping); | ||
107 | mapping->a_ops = &gfs2_meta_aops; | ||
108 | mapping->host = sb->s_bdev->bd_inode; | ||
109 | mapping->flags = 0; | ||
110 | mapping_set_gfp_mask(mapping, GFP_NOFS); | ||
111 | mapping->private_data = NULL; | ||
112 | mapping->backing_dev_info = sb->s_bdi; | ||
113 | mapping->writeback_index = 0; | ||
100 | 114 | ||
101 | spin_lock_init(&sdp->sd_log_lock); | 115 | spin_lock_init(&sdp->sd_log_lock); |
102 | atomic_set(&sdp->sd_log_pinned, 0); | 116 | atomic_set(&sdp->sd_log_pinned, 0); |
@@ -217,7 +231,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) | |||
217 | 231 | ||
218 | page = alloc_page(GFP_NOFS); | 232 | page = alloc_page(GFP_NOFS); |
219 | if (unlikely(!page)) | 233 | if (unlikely(!page)) |
220 | return -ENOBUFS; | 234 | return -ENOMEM; |
221 | 235 | ||
222 | ClearPageUptodate(page); | 236 | ClearPageUptodate(page); |
223 | ClearPageDirty(page); | 237 | ClearPageDirty(page); |
@@ -956,40 +970,6 @@ fail: | |||
956 | return error; | 970 | return error; |
957 | } | 971 | } |
958 | 972 | ||
959 | static int init_threads(struct gfs2_sbd *sdp, int undo) | ||
960 | { | ||
961 | struct task_struct *p; | ||
962 | int error = 0; | ||
963 | |||
964 | if (undo) | ||
965 | goto fail_quotad; | ||
966 | |||
967 | p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); | ||
968 | if (IS_ERR(p)) { | ||
969 | error = PTR_ERR(p); | ||
970 | fs_err(sdp, "can't start logd thread: %d\n", error); | ||
971 | return error; | ||
972 | } | ||
973 | sdp->sd_logd_process = p; | ||
974 | |||
975 | p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); | ||
976 | if (IS_ERR(p)) { | ||
977 | error = PTR_ERR(p); | ||
978 | fs_err(sdp, "can't start quotad thread: %d\n", error); | ||
979 | goto fail; | ||
980 | } | ||
981 | sdp->sd_quotad_process = p; | ||
982 | |||
983 | return 0; | ||
984 | |||
985 | |||
986 | fail_quotad: | ||
987 | kthread_stop(sdp->sd_quotad_process); | ||
988 | fail: | ||
989 | kthread_stop(sdp->sd_logd_process); | ||
990 | return error; | ||
991 | } | ||
992 | |||
993 | static const match_table_t nolock_tokens = { | 973 | static const match_table_t nolock_tokens = { |
994 | { Opt_jid, "jid=%d\n", }, | 974 | { Opt_jid, "jid=%d\n", }, |
995 | { Opt_err, NULL }, | 975 | { Opt_err, NULL }, |
@@ -1254,15 +1234,11 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
1254 | goto fail_per_node; | 1234 | goto fail_per_node; |
1255 | } | 1235 | } |
1256 | 1236 | ||
1257 | error = init_threads(sdp, DO); | ||
1258 | if (error) | ||
1259 | goto fail_per_node; | ||
1260 | |||
1261 | if (!(sb->s_flags & MS_RDONLY)) { | 1237 | if (!(sb->s_flags & MS_RDONLY)) { |
1262 | error = gfs2_make_fs_rw(sdp); | 1238 | error = gfs2_make_fs_rw(sdp); |
1263 | if (error) { | 1239 | if (error) { |
1264 | fs_err(sdp, "can't make FS RW: %d\n", error); | 1240 | fs_err(sdp, "can't make FS RW: %d\n", error); |
1265 | goto fail_threads; | 1241 | goto fail_per_node; |
1266 | } | 1242 | } |
1267 | } | 1243 | } |
1268 | 1244 | ||
@@ -1270,8 +1246,6 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
1270 | gfs2_online_uevent(sdp); | 1246 | gfs2_online_uevent(sdp); |
1271 | return 0; | 1247 | return 0; |
1272 | 1248 | ||
1273 | fail_threads: | ||
1274 | init_threads(sdp, UNDO); | ||
1275 | fail_per_node: | 1249 | fail_per_node: |
1276 | init_per_node(sdp, UNDO); | 1250 | init_per_node(sdp, UNDO); |
1277 | fail_inodes: | 1251 | fail_inodes: |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 98236d0df3ca..8bec0e3192dd 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -52,6 +52,11 @@ | |||
52 | #include <linux/dqblk_xfs.h> | 52 | #include <linux/dqblk_xfs.h> |
53 | #include <linux/lockref.h> | 53 | #include <linux/lockref.h> |
54 | #include <linux/list_lru.h> | 54 | #include <linux/list_lru.h> |
55 | #include <linux/rcupdate.h> | ||
56 | #include <linux/rculist_bl.h> | ||
57 | #include <linux/bit_spinlock.h> | ||
58 | #include <linux/jhash.h> | ||
59 | #include <linux/vmalloc.h> | ||
55 | 60 | ||
56 | #include "gfs2.h" | 61 | #include "gfs2.h" |
57 | #include "incore.h" | 62 | #include "incore.h" |
@@ -67,16 +72,44 @@ | |||
67 | #include "inode.h" | 72 | #include "inode.h" |
68 | #include "util.h" | 73 | #include "util.h" |
69 | 74 | ||
70 | struct gfs2_quota_change_host { | 75 | #define GFS2_QD_HASH_SHIFT 12 |
71 | u64 qc_change; | 76 | #define GFS2_QD_HASH_SIZE (1 << GFS2_QD_HASH_SHIFT) |
72 | u32 qc_flags; /* GFS2_QCF_... */ | 77 | #define GFS2_QD_HASH_MASK (GFS2_QD_HASH_SIZE - 1) |
73 | struct kqid qc_id; | ||
74 | }; | ||
75 | 78 | ||
76 | /* Lock order: qd_lock -> qd->lockref.lock -> lru lock */ | 79 | /* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */ |
80 | /* -> sd_bitmap_lock */ | ||
77 | static DEFINE_SPINLOCK(qd_lock); | 81 | static DEFINE_SPINLOCK(qd_lock); |
78 | struct list_lru gfs2_qd_lru; | 82 | struct list_lru gfs2_qd_lru; |
79 | 83 | ||
84 | static struct hlist_bl_head qd_hash_table[GFS2_QD_HASH_SIZE]; | ||
85 | |||
86 | static unsigned int gfs2_qd_hash(const struct gfs2_sbd *sdp, | ||
87 | const struct kqid qid) | ||
88 | { | ||
89 | unsigned int h; | ||
90 | |||
91 | h = jhash(&sdp, sizeof(struct gfs2_sbd *), 0); | ||
92 | h = jhash(&qid, sizeof(struct kqid), h); | ||
93 | |||
94 | return h & GFS2_QD_HASH_MASK; | ||
95 | } | ||
96 | |||
97 | static inline void spin_lock_bucket(unsigned int hash) | ||
98 | { | ||
99 | hlist_bl_lock(&qd_hash_table[hash]); | ||
100 | } | ||
101 | |||
102 | static inline void spin_unlock_bucket(unsigned int hash) | ||
103 | { | ||
104 | hlist_bl_unlock(&qd_hash_table[hash]); | ||
105 | } | ||
106 | |||
107 | static void gfs2_qd_dealloc(struct rcu_head *rcu) | ||
108 | { | ||
109 | struct gfs2_quota_data *qd = container_of(rcu, struct gfs2_quota_data, qd_rcu); | ||
110 | kmem_cache_free(gfs2_quotad_cachep, qd); | ||
111 | } | ||
112 | |||
80 | static void gfs2_qd_dispose(struct list_head *list) | 113 | static void gfs2_qd_dispose(struct list_head *list) |
81 | { | 114 | { |
82 | struct gfs2_quota_data *qd; | 115 | struct gfs2_quota_data *qd; |
@@ -93,6 +126,10 @@ static void gfs2_qd_dispose(struct list_head *list) | |||
93 | list_del(&qd->qd_list); | 126 | list_del(&qd->qd_list); |
94 | spin_unlock(&qd_lock); | 127 | spin_unlock(&qd_lock); |
95 | 128 | ||
129 | spin_lock_bucket(qd->qd_hash); | ||
130 | hlist_bl_del_rcu(&qd->qd_hlist); | ||
131 | spin_unlock_bucket(qd->qd_hash); | ||
132 | |||
96 | gfs2_assert_warn(sdp, !qd->qd_change); | 133 | gfs2_assert_warn(sdp, !qd->qd_change); |
97 | gfs2_assert_warn(sdp, !qd->qd_slot_count); | 134 | gfs2_assert_warn(sdp, !qd->qd_slot_count); |
98 | gfs2_assert_warn(sdp, !qd->qd_bh_count); | 135 | gfs2_assert_warn(sdp, !qd->qd_bh_count); |
@@ -101,7 +138,7 @@ static void gfs2_qd_dispose(struct list_head *list) | |||
101 | atomic_dec(&sdp->sd_quota_count); | 138 | atomic_dec(&sdp->sd_quota_count); |
102 | 139 | ||
103 | /* Delete it from the common reclaim list */ | 140 | /* Delete it from the common reclaim list */ |
104 | kmem_cache_free(gfs2_quotad_cachep, qd); | 141 | call_rcu(&qd->qd_rcu, gfs2_qd_dealloc); |
105 | } | 142 | } |
106 | } | 143 | } |
107 | 144 | ||
@@ -171,83 +208,95 @@ static u64 qd2offset(struct gfs2_quota_data *qd) | |||
171 | return offset; | 208 | return offset; |
172 | } | 209 | } |
173 | 210 | ||
174 | static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid, | 211 | static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, struct kqid qid) |
175 | struct gfs2_quota_data **qdp) | ||
176 | { | 212 | { |
177 | struct gfs2_quota_data *qd; | 213 | struct gfs2_quota_data *qd; |
178 | int error; | 214 | int error; |
179 | 215 | ||
180 | qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS); | 216 | qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS); |
181 | if (!qd) | 217 | if (!qd) |
182 | return -ENOMEM; | 218 | return NULL; |
183 | 219 | ||
220 | qd->qd_sbd = sdp; | ||
184 | qd->qd_lockref.count = 1; | 221 | qd->qd_lockref.count = 1; |
185 | spin_lock_init(&qd->qd_lockref.lock); | 222 | spin_lock_init(&qd->qd_lockref.lock); |
186 | qd->qd_id = qid; | 223 | qd->qd_id = qid; |
187 | qd->qd_slot = -1; | 224 | qd->qd_slot = -1; |
188 | INIT_LIST_HEAD(&qd->qd_lru); | 225 | INIT_LIST_HEAD(&qd->qd_lru); |
226 | qd->qd_hash = hash; | ||
189 | 227 | ||
190 | error = gfs2_glock_get(sdp, qd2index(qd), | 228 | error = gfs2_glock_get(sdp, qd2index(qd), |
191 | &gfs2_quota_glops, CREATE, &qd->qd_gl); | 229 | &gfs2_quota_glops, CREATE, &qd->qd_gl); |
192 | if (error) | 230 | if (error) |
193 | goto fail; | 231 | goto fail; |
194 | 232 | ||
195 | *qdp = qd; | 233 | return qd; |
196 | |||
197 | return 0; | ||
198 | 234 | ||
199 | fail: | 235 | fail: |
200 | kmem_cache_free(gfs2_quotad_cachep, qd); | 236 | kmem_cache_free(gfs2_quotad_cachep, qd); |
201 | return error; | 237 | return NULL; |
202 | } | 238 | } |
203 | 239 | ||
204 | static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, | 240 | static struct gfs2_quota_data *gfs2_qd_search_bucket(unsigned int hash, |
205 | struct gfs2_quota_data **qdp) | 241 | const struct gfs2_sbd *sdp, |
242 | struct kqid qid) | ||
206 | { | 243 | { |
207 | struct gfs2_quota_data *qd = NULL, *new_qd = NULL; | 244 | struct gfs2_quota_data *qd; |
208 | int error, found; | 245 | struct hlist_bl_node *h; |
209 | |||
210 | *qdp = NULL; | ||
211 | 246 | ||
212 | for (;;) { | 247 | hlist_bl_for_each_entry_rcu(qd, h, &qd_hash_table[hash], qd_hlist) { |
213 | found = 0; | 248 | if (!qid_eq(qd->qd_id, qid)) |
214 | spin_lock(&qd_lock); | 249 | continue; |
215 | list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { | 250 | if (qd->qd_sbd != sdp) |
216 | if (qid_eq(qd->qd_id, qid) && | 251 | continue; |
217 | lockref_get_not_dead(&qd->qd_lockref)) { | 252 | if (lockref_get_not_dead(&qd->qd_lockref)) { |
218 | list_lru_del(&gfs2_qd_lru, &qd->qd_lru); | 253 | list_lru_del(&gfs2_qd_lru, &qd->qd_lru); |
219 | found = 1; | 254 | return qd; |
220 | break; | ||
221 | } | ||
222 | } | 255 | } |
256 | } | ||
223 | 257 | ||
224 | if (!found) | 258 | return NULL; |
225 | qd = NULL; | 259 | } |
226 | 260 | ||
227 | if (!qd && new_qd) { | ||
228 | qd = new_qd; | ||
229 | list_add(&qd->qd_list, &sdp->sd_quota_list); | ||
230 | atomic_inc(&sdp->sd_quota_count); | ||
231 | new_qd = NULL; | ||
232 | } | ||
233 | 261 | ||
234 | spin_unlock(&qd_lock); | 262 | static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, |
263 | struct gfs2_quota_data **qdp) | ||
264 | { | ||
265 | struct gfs2_quota_data *qd, *new_qd; | ||
266 | unsigned int hash = gfs2_qd_hash(sdp, qid); | ||
235 | 267 | ||
236 | if (qd) { | 268 | rcu_read_lock(); |
237 | if (new_qd) { | 269 | *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid); |
238 | gfs2_glock_put(new_qd->qd_gl); | 270 | rcu_read_unlock(); |
239 | kmem_cache_free(gfs2_quotad_cachep, new_qd); | ||
240 | } | ||
241 | *qdp = qd; | ||
242 | return 0; | ||
243 | } | ||
244 | 271 | ||
245 | error = qd_alloc(sdp, qid, &new_qd); | 272 | if (qd) |
246 | if (error) | 273 | return 0; |
247 | return error; | 274 | |
275 | new_qd = qd_alloc(hash, sdp, qid); | ||
276 | if (!new_qd) | ||
277 | return -ENOMEM; | ||
278 | |||
279 | spin_lock(&qd_lock); | ||
280 | spin_lock_bucket(hash); | ||
281 | *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid); | ||
282 | if (qd == NULL) { | ||
283 | *qdp = new_qd; | ||
284 | list_add(&new_qd->qd_list, &sdp->sd_quota_list); | ||
285 | hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]); | ||
286 | atomic_inc(&sdp->sd_quota_count); | ||
248 | } | 287 | } |
288 | spin_unlock_bucket(hash); | ||
289 | spin_unlock(&qd_lock); | ||
290 | |||
291 | if (qd) { | ||
292 | gfs2_glock_put(new_qd->qd_gl); | ||
293 | kmem_cache_free(gfs2_quotad_cachep, new_qd); | ||
294 | } | ||
295 | |||
296 | return 0; | ||
249 | } | 297 | } |
250 | 298 | ||
299 | |||
251 | static void qd_hold(struct gfs2_quota_data *qd) | 300 | static void qd_hold(struct gfs2_quota_data *qd) |
252 | { | 301 | { |
253 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | 302 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; |
@@ -268,88 +317,48 @@ static void qd_put(struct gfs2_quota_data *qd) | |||
268 | 317 | ||
269 | static int slot_get(struct gfs2_quota_data *qd) | 318 | static int slot_get(struct gfs2_quota_data *qd) |
270 | { | 319 | { |
271 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | 320 | struct gfs2_sbd *sdp = qd->qd_sbd; |
272 | unsigned int c, o = 0, b; | 321 | unsigned int bit; |
273 | unsigned char byte = 0; | 322 | int error = 0; |
274 | 323 | ||
275 | spin_lock(&qd_lock); | 324 | spin_lock(&sdp->sd_bitmap_lock); |
325 | if (qd->qd_slot_count != 0) | ||
326 | goto out; | ||
276 | 327 | ||
277 | if (qd->qd_slot_count++) { | 328 | error = -ENOSPC; |
278 | spin_unlock(&qd_lock); | 329 | bit = find_first_zero_bit(sdp->sd_quota_bitmap, sdp->sd_quota_slots); |
279 | return 0; | 330 | if (bit < sdp->sd_quota_slots) { |
331 | set_bit(bit, sdp->sd_quota_bitmap); | ||
332 | qd->qd_slot = bit; | ||
333 | out: | ||
334 | qd->qd_slot_count++; | ||
280 | } | 335 | } |
336 | spin_unlock(&sdp->sd_bitmap_lock); | ||
281 | 337 | ||
282 | for (c = 0; c < sdp->sd_quota_chunks; c++) | 338 | return error; |
283 | for (o = 0; o < PAGE_SIZE; o++) { | ||
284 | byte = sdp->sd_quota_bitmap[c][o]; | ||
285 | if (byte != 0xFF) | ||
286 | goto found; | ||
287 | } | ||
288 | |||
289 | goto fail; | ||
290 | |||
291 | found: | ||
292 | for (b = 0; b < 8; b++) | ||
293 | if (!(byte & (1 << b))) | ||
294 | break; | ||
295 | qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b; | ||
296 | |||
297 | if (qd->qd_slot >= sdp->sd_quota_slots) | ||
298 | goto fail; | ||
299 | |||
300 | sdp->sd_quota_bitmap[c][o] |= 1 << b; | ||
301 | |||
302 | spin_unlock(&qd_lock); | ||
303 | |||
304 | return 0; | ||
305 | |||
306 | fail: | ||
307 | qd->qd_slot_count--; | ||
308 | spin_unlock(&qd_lock); | ||
309 | return -ENOSPC; | ||
310 | } | 339 | } |
311 | 340 | ||
312 | static void slot_hold(struct gfs2_quota_data *qd) | 341 | static void slot_hold(struct gfs2_quota_data *qd) |
313 | { | 342 | { |
314 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | 343 | struct gfs2_sbd *sdp = qd->qd_sbd; |
315 | 344 | ||
316 | spin_lock(&qd_lock); | 345 | spin_lock(&sdp->sd_bitmap_lock); |
317 | gfs2_assert(sdp, qd->qd_slot_count); | 346 | gfs2_assert(sdp, qd->qd_slot_count); |
318 | qd->qd_slot_count++; | 347 | qd->qd_slot_count++; |
319 | spin_unlock(&qd_lock); | 348 | spin_unlock(&sdp->sd_bitmap_lock); |
320 | } | ||
321 | |||
322 | static void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, | ||
323 | unsigned int bit, int new_value) | ||
324 | { | ||
325 | unsigned int c, o, b = bit; | ||
326 | int old_value; | ||
327 | |||
328 | c = b / (8 * PAGE_SIZE); | ||
329 | b %= 8 * PAGE_SIZE; | ||
330 | o = b / 8; | ||
331 | b %= 8; | ||
332 | |||
333 | old_value = (bitmap[c][o] & (1 << b)); | ||
334 | gfs2_assert_withdraw(sdp, !old_value != !new_value); | ||
335 | |||
336 | if (new_value) | ||
337 | bitmap[c][o] |= 1 << b; | ||
338 | else | ||
339 | bitmap[c][o] &= ~(1 << b); | ||
340 | } | 349 | } |
341 | 350 | ||
342 | static void slot_put(struct gfs2_quota_data *qd) | 351 | static void slot_put(struct gfs2_quota_data *qd) |
343 | { | 352 | { |
344 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | 353 | struct gfs2_sbd *sdp = qd->qd_sbd; |
345 | 354 | ||
346 | spin_lock(&qd_lock); | 355 | spin_lock(&sdp->sd_bitmap_lock); |
347 | gfs2_assert(sdp, qd->qd_slot_count); | 356 | gfs2_assert(sdp, qd->qd_slot_count); |
348 | if (!--qd->qd_slot_count) { | 357 | if (!--qd->qd_slot_count) { |
349 | gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0); | 358 | BUG_ON(!test_and_clear_bit(qd->qd_slot, sdp->sd_quota_bitmap)); |
350 | qd->qd_slot = -1; | 359 | qd->qd_slot = -1; |
351 | } | 360 | } |
352 | spin_unlock(&qd_lock); | 361 | spin_unlock(&sdp->sd_bitmap_lock); |
353 | } | 362 | } |
354 | 363 | ||
355 | static int bh_get(struct gfs2_quota_data *qd) | 364 | static int bh_get(struct gfs2_quota_data *qd) |
@@ -427,8 +436,7 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, | |||
427 | list_move_tail(&qd->qd_list, &sdp->sd_quota_list); | 436 | list_move_tail(&qd->qd_list, &sdp->sd_quota_list); |
428 | set_bit(QDF_LOCKED, &qd->qd_flags); | 437 | set_bit(QDF_LOCKED, &qd->qd_flags); |
429 | qd->qd_change_sync = qd->qd_change; | 438 | qd->qd_change_sync = qd->qd_change; |
430 | gfs2_assert_warn(sdp, qd->qd_slot_count); | 439 | slot_hold(qd); |
431 | qd->qd_slot_count++; | ||
432 | return 1; | 440 | return 1; |
433 | } | 441 | } |
434 | 442 | ||
@@ -1214,17 +1222,6 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid) | |||
1214 | return error; | 1222 | return error; |
1215 | } | 1223 | } |
1216 | 1224 | ||
1217 | static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf) | ||
1218 | { | ||
1219 | const struct gfs2_quota_change *str = buf; | ||
1220 | |||
1221 | qc->qc_change = be64_to_cpu(str->qc_change); | ||
1222 | qc->qc_flags = be32_to_cpu(str->qc_flags); | ||
1223 | qc->qc_id = make_kqid(&init_user_ns, | ||
1224 | (qc->qc_flags & GFS2_QCF_USER)?USRQUOTA:GRPQUOTA, | ||
1225 | be32_to_cpu(str->qc_id)); | ||
1226 | } | ||
1227 | |||
1228 | int gfs2_quota_init(struct gfs2_sbd *sdp) | 1225 | int gfs2_quota_init(struct gfs2_sbd *sdp) |
1229 | { | 1226 | { |
1230 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); | 1227 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); |
@@ -1232,6 +1229,8 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) | |||
1232 | unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift; | 1229 | unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift; |
1233 | unsigned int x, slot = 0; | 1230 | unsigned int x, slot = 0; |
1234 | unsigned int found = 0; | 1231 | unsigned int found = 0; |
1232 | unsigned int hash; | ||
1233 | unsigned int bm_size; | ||
1235 | u64 dblock; | 1234 | u64 dblock; |
1236 | u32 extlen = 0; | 1235 | u32 extlen = 0; |
1237 | int error; | 1236 | int error; |
@@ -1240,23 +1239,20 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) | |||
1240 | return -EIO; | 1239 | return -EIO; |
1241 | 1240 | ||
1242 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; | 1241 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; |
1243 | sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); | 1242 | bm_size = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * sizeof(unsigned long)); |
1244 | 1243 | bm_size *= sizeof(unsigned long); | |
1245 | error = -ENOMEM; | 1244 | error = -ENOMEM; |
1246 | 1245 | sdp->sd_quota_bitmap = kmalloc(bm_size, GFP_NOFS|__GFP_NOWARN); | |
1247 | sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks, | 1246 | if (sdp->sd_quota_bitmap == NULL) |
1248 | sizeof(unsigned char *), GFP_NOFS); | 1247 | sdp->sd_quota_bitmap = __vmalloc(bm_size, GFP_NOFS, PAGE_KERNEL); |
1249 | if (!sdp->sd_quota_bitmap) | 1248 | if (!sdp->sd_quota_bitmap) |
1250 | return error; | 1249 | return error; |
1251 | 1250 | ||
1252 | for (x = 0; x < sdp->sd_quota_chunks; x++) { | 1251 | memset(sdp->sd_quota_bitmap, 0, bm_size); |
1253 | sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_NOFS); | ||
1254 | if (!sdp->sd_quota_bitmap[x]) | ||
1255 | goto fail; | ||
1256 | } | ||
1257 | 1252 | ||
1258 | for (x = 0; x < blocks; x++) { | 1253 | for (x = 0; x < blocks; x++) { |
1259 | struct buffer_head *bh; | 1254 | struct buffer_head *bh; |
1255 | const struct gfs2_quota_change *qc; | ||
1260 | unsigned int y; | 1256 | unsigned int y; |
1261 | 1257 | ||
1262 | if (!extlen) { | 1258 | if (!extlen) { |
@@ -1274,34 +1270,42 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) | |||
1274 | goto fail; | 1270 | goto fail; |
1275 | } | 1271 | } |
1276 | 1272 | ||
1273 | qc = (const struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header)); | ||
1277 | for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; | 1274 | for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; |
1278 | y++, slot++) { | 1275 | y++, slot++) { |
1279 | struct gfs2_quota_change_host qc; | ||
1280 | struct gfs2_quota_data *qd; | 1276 | struct gfs2_quota_data *qd; |
1281 | 1277 | s64 qc_change = be64_to_cpu(qc->qc_change); | |
1282 | gfs2_quota_change_in(&qc, bh->b_data + | 1278 | u32 qc_flags = be32_to_cpu(qc->qc_flags); |
1283 | sizeof(struct gfs2_meta_header) + | 1279 | enum quota_type qtype = (qc_flags & GFS2_QCF_USER) ? |
1284 | y * sizeof(struct gfs2_quota_change)); | 1280 | USRQUOTA : GRPQUOTA; |
1285 | if (!qc.qc_change) | 1281 | struct kqid qc_id = make_kqid(&init_user_ns, qtype, |
1282 | be32_to_cpu(qc->qc_id)); | ||
1283 | qc++; | ||
1284 | if (!qc_change) | ||
1286 | continue; | 1285 | continue; |
1287 | 1286 | ||
1288 | error = qd_alloc(sdp, qc.qc_id, &qd); | 1287 | hash = gfs2_qd_hash(sdp, qc_id); |
1289 | if (error) { | 1288 | qd = qd_alloc(hash, sdp, qc_id); |
1289 | if (qd == NULL) { | ||
1290 | brelse(bh); | 1290 | brelse(bh); |
1291 | goto fail; | 1291 | goto fail; |
1292 | } | 1292 | } |
1293 | 1293 | ||
1294 | set_bit(QDF_CHANGE, &qd->qd_flags); | 1294 | set_bit(QDF_CHANGE, &qd->qd_flags); |
1295 | qd->qd_change = qc.qc_change; | 1295 | qd->qd_change = qc_change; |
1296 | qd->qd_slot = slot; | 1296 | qd->qd_slot = slot; |
1297 | qd->qd_slot_count = 1; | 1297 | qd->qd_slot_count = 1; |
1298 | 1298 | ||
1299 | spin_lock(&qd_lock); | 1299 | spin_lock(&qd_lock); |
1300 | gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1); | 1300 | BUG_ON(test_and_set_bit(slot, sdp->sd_quota_bitmap)); |
1301 | list_add(&qd->qd_list, &sdp->sd_quota_list); | 1301 | list_add(&qd->qd_list, &sdp->sd_quota_list); |
1302 | atomic_inc(&sdp->sd_quota_count); | 1302 | atomic_inc(&sdp->sd_quota_count); |
1303 | spin_unlock(&qd_lock); | 1303 | spin_unlock(&qd_lock); |
1304 | 1304 | ||
1305 | spin_lock_bucket(hash); | ||
1306 | hlist_bl_add_head_rcu(&qd->qd_hlist, &qd_hash_table[hash]); | ||
1307 | spin_unlock_bucket(hash); | ||
1308 | |||
1305 | found++; | 1309 | found++; |
1306 | } | 1310 | } |
1307 | 1311 | ||
@@ -1324,44 +1328,28 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) | |||
1324 | { | 1328 | { |
1325 | struct list_head *head = &sdp->sd_quota_list; | 1329 | struct list_head *head = &sdp->sd_quota_list; |
1326 | struct gfs2_quota_data *qd; | 1330 | struct gfs2_quota_data *qd; |
1327 | unsigned int x; | ||
1328 | 1331 | ||
1329 | spin_lock(&qd_lock); | 1332 | spin_lock(&qd_lock); |
1330 | while (!list_empty(head)) { | 1333 | while (!list_empty(head)) { |
1331 | qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); | 1334 | qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); |
1332 | 1335 | ||
1333 | /* | ||
1334 | * To be removed in due course... we should be able to | ||
1335 | * ensure that all refs to the qd have done by this point | ||
1336 | * so that this rather odd test is not required | ||
1337 | */ | ||
1338 | spin_lock(&qd->qd_lockref.lock); | ||
1339 | if (qd->qd_lockref.count > 1 || | ||
1340 | (qd->qd_lockref.count && !test_bit(QDF_CHANGE, &qd->qd_flags))) { | ||
1341 | spin_unlock(&qd->qd_lockref.lock); | ||
1342 | list_move(&qd->qd_list, head); | ||
1343 | spin_unlock(&qd_lock); | ||
1344 | schedule(); | ||
1345 | spin_lock(&qd_lock); | ||
1346 | continue; | ||
1347 | } | ||
1348 | spin_unlock(&qd->qd_lockref.lock); | ||
1349 | |||
1350 | list_del(&qd->qd_list); | 1336 | list_del(&qd->qd_list); |
1337 | |||
1351 | /* Also remove if this qd exists in the reclaim list */ | 1338 | /* Also remove if this qd exists in the reclaim list */ |
1352 | list_lru_del(&gfs2_qd_lru, &qd->qd_lru); | 1339 | list_lru_del(&gfs2_qd_lru, &qd->qd_lru); |
1353 | atomic_dec(&sdp->sd_quota_count); | 1340 | atomic_dec(&sdp->sd_quota_count); |
1354 | spin_unlock(&qd_lock); | 1341 | spin_unlock(&qd_lock); |
1355 | 1342 | ||
1356 | if (!qd->qd_lockref.count) { | 1343 | spin_lock_bucket(qd->qd_hash); |
1357 | gfs2_assert_warn(sdp, !qd->qd_change); | 1344 | hlist_bl_del_rcu(&qd->qd_hlist); |
1358 | gfs2_assert_warn(sdp, !qd->qd_slot_count); | 1345 | spin_unlock_bucket(qd->qd_hash); |
1359 | } else | 1346 | |
1360 | gfs2_assert_warn(sdp, qd->qd_slot_count == 1); | 1347 | gfs2_assert_warn(sdp, !qd->qd_change); |
1348 | gfs2_assert_warn(sdp, !qd->qd_slot_count); | ||
1361 | gfs2_assert_warn(sdp, !qd->qd_bh_count); | 1349 | gfs2_assert_warn(sdp, !qd->qd_bh_count); |
1362 | 1350 | ||
1363 | gfs2_glock_put(qd->qd_gl); | 1351 | gfs2_glock_put(qd->qd_gl); |
1364 | kmem_cache_free(gfs2_quotad_cachep, qd); | 1352 | call_rcu(&qd->qd_rcu, gfs2_qd_dealloc); |
1365 | 1353 | ||
1366 | spin_lock(&qd_lock); | 1354 | spin_lock(&qd_lock); |
1367 | } | 1355 | } |
@@ -1370,9 +1358,11 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) | |||
1370 | gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); | 1358 | gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); |
1371 | 1359 | ||
1372 | if (sdp->sd_quota_bitmap) { | 1360 | if (sdp->sd_quota_bitmap) { |
1373 | for (x = 0; x < sdp->sd_quota_chunks; x++) | 1361 | if (is_vmalloc_addr(sdp->sd_quota_bitmap)) |
1374 | kfree(sdp->sd_quota_bitmap[x]); | 1362 | vfree(sdp->sd_quota_bitmap); |
1375 | kfree(sdp->sd_quota_bitmap); | 1363 | else |
1364 | kfree(sdp->sd_quota_bitmap); | ||
1365 | sdp->sd_quota_bitmap = NULL; | ||
1376 | } | 1366 | } |
1377 | } | 1367 | } |
1378 | 1368 | ||
@@ -1656,3 +1646,11 @@ const struct quotactl_ops gfs2_quotactl_ops = { | |||
1656 | .get_dqblk = gfs2_get_dqblk, | 1646 | .get_dqblk = gfs2_get_dqblk, |
1657 | .set_dqblk = gfs2_set_dqblk, | 1647 | .set_dqblk = gfs2_set_dqblk, |
1658 | }; | 1648 | }; |
1649 | |||
1650 | void __init gfs2_quota_hash_init(void) | ||
1651 | { | ||
1652 | unsigned i; | ||
1653 | |||
1654 | for(i = 0; i < GFS2_QD_HASH_SIZE; i++) | ||
1655 | INIT_HLIST_BL_HEAD(&qd_hash_table[i]); | ||
1656 | } | ||
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 96e4f34a03b0..55d506eb3c4a 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h | |||
@@ -57,5 +57,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) | |||
57 | extern const struct quotactl_ops gfs2_quotactl_ops; | 57 | extern const struct quotactl_ops gfs2_quotactl_ops; |
58 | extern struct shrinker gfs2_qd_shrinker; | 58 | extern struct shrinker gfs2_qd_shrinker; |
59 | extern struct list_lru gfs2_qd_lru; | 59 | extern struct list_lru gfs2_qd_lru; |
60 | extern void __init gfs2_quota_hash_init(void); | ||
60 | 61 | ||
61 | #endif /* __QUOTA_DOT_H__ */ | 62 | #endif /* __QUOTA_DOT_H__ */ |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c8d6161bd682..a1da21349235 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -57,6 +57,11 @@ | |||
57 | * 3 = Used (metadata) | 57 | * 3 = Used (metadata) |
58 | */ | 58 | */ |
59 | 59 | ||
60 | struct gfs2_extent { | ||
61 | struct gfs2_rbm rbm; | ||
62 | u32 len; | ||
63 | }; | ||
64 | |||
60 | static const char valid_change[16] = { | 65 | static const char valid_change[16] = { |
61 | /* current */ | 66 | /* current */ |
62 | /* n */ 0, 1, 1, 1, | 67 | /* n */ 0, 1, 1, 1, |
@@ -65,8 +70,9 @@ static const char valid_change[16] = { | |||
65 | 1, 0, 0, 0 | 70 | 1, 0, 0, 0 |
66 | }; | 71 | }; |
67 | 72 | ||
68 | static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, | 73 | static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, |
69 | const struct gfs2_inode *ip, bool nowrap); | 74 | const struct gfs2_inode *ip, bool nowrap, |
75 | const struct gfs2_alloc_parms *ap); | ||
70 | 76 | ||
71 | 77 | ||
72 | /** | 78 | /** |
@@ -635,9 +641,13 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) | |||
635 | /* return reserved blocks to the rgrp */ | 641 | /* return reserved blocks to the rgrp */ |
636 | BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); | 642 | BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); |
637 | rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; | 643 | rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; |
644 | /* The rgrp extent failure point is likely not to increase; | ||
645 | it will only do so if the freed blocks are somehow | ||
646 | contiguous with a span of free blocks that follows. Still, | ||
647 | it will force the number to be recalculated later. */ | ||
648 | rgd->rd_extfail_pt += rs->rs_free; | ||
638 | rs->rs_free = 0; | 649 | rs->rs_free = 0; |
639 | clear_bit(GBF_FULL, &bi->bi_flags); | 650 | clear_bit(GBF_FULL, &bi->bi_flags); |
640 | smp_mb__after_clear_bit(); | ||
641 | } | 651 | } |
642 | } | 652 | } |
643 | 653 | ||
@@ -876,6 +886,7 @@ static int rgd_insert(struct gfs2_rgrpd *rgd) | |||
876 | static int read_rindex_entry(struct gfs2_inode *ip) | 886 | static int read_rindex_entry(struct gfs2_inode *ip) |
877 | { | 887 | { |
878 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 888 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
889 | const unsigned bsize = sdp->sd_sb.sb_bsize; | ||
879 | loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); | 890 | loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); |
880 | struct gfs2_rindex buf; | 891 | struct gfs2_rindex buf; |
881 | int error; | 892 | int error; |
@@ -913,6 +924,8 @@ static int read_rindex_entry(struct gfs2_inode *ip) | |||
913 | goto fail; | 924 | goto fail; |
914 | 925 | ||
915 | rgd->rd_gl->gl_object = rgd; | 926 | rgd->rd_gl->gl_object = rgd; |
927 | rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize; | ||
928 | rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1; | ||
916 | rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; | 929 | rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; |
917 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; | 930 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; |
918 | if (rgd->rd_data > sdp->sd_max_rg_data) | 931 | if (rgd->rd_data > sdp->sd_max_rg_data) |
@@ -1126,6 +1139,8 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) | |||
1126 | gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); | 1139 | gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); |
1127 | rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); | 1140 | rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); |
1128 | rgd->rd_free_clone = rgd->rd_free; | 1141 | rgd->rd_free_clone = rgd->rd_free; |
1142 | /* max out the rgrp allocation failure point */ | ||
1143 | rgd->rd_extfail_pt = rgd->rd_free; | ||
1129 | } | 1144 | } |
1130 | if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { | 1145 | if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { |
1131 | rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); | 1146 | rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); |
@@ -1184,7 +1199,7 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) | |||
1184 | 1199 | ||
1185 | if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) | 1200 | if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) |
1186 | return 0; | 1201 | return 0; |
1187 | return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); | 1202 | return gfs2_rgrp_bh_get(rgd); |
1188 | } | 1203 | } |
1189 | 1204 | ||
1190 | /** | 1205 | /** |
@@ -1455,7 +1470,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, | |||
1455 | if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) | 1470 | if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) |
1456 | return; | 1471 | return; |
1457 | 1472 | ||
1458 | ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); | 1473 | ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap); |
1459 | if (ret == 0) { | 1474 | if (ret == 0) { |
1460 | rs->rs_rbm = rbm; | 1475 | rs->rs_rbm = rbm; |
1461 | rs->rs_free = extlen; | 1476 | rs->rs_free = extlen; |
@@ -1520,6 +1535,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, | |||
1520 | * @rbm: The current position in the resource group | 1535 | * @rbm: The current position in the resource group |
1521 | * @ip: The inode for which we are searching for blocks | 1536 | * @ip: The inode for which we are searching for blocks |
1522 | * @minext: The minimum extent length | 1537 | * @minext: The minimum extent length |
1538 | * @maxext: A pointer to the maximum extent structure | ||
1523 | * | 1539 | * |
1524 | * This checks the current position in the rgrp to see whether there is | 1540 | * This checks the current position in the rgrp to see whether there is |
1525 | * a reservation covering this block. If not then this function is a | 1541 | * a reservation covering this block. If not then this function is a |
@@ -1532,7 +1548,8 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, | |||
1532 | 1548 | ||
1533 | static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, | 1549 | static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, |
1534 | const struct gfs2_inode *ip, | 1550 | const struct gfs2_inode *ip, |
1535 | u32 minext) | 1551 | u32 minext, |
1552 | struct gfs2_extent *maxext) | ||
1536 | { | 1553 | { |
1537 | u64 block = gfs2_rbm_to_block(rbm); | 1554 | u64 block = gfs2_rbm_to_block(rbm); |
1538 | u32 extlen = 1; | 1555 | u32 extlen = 1; |
@@ -1545,8 +1562,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, | |||
1545 | */ | 1562 | */ |
1546 | if (minext) { | 1563 | if (minext) { |
1547 | extlen = gfs2_free_extlen(rbm, minext); | 1564 | extlen = gfs2_free_extlen(rbm, minext); |
1548 | nblock = block + extlen; | 1565 | if (extlen <= maxext->len) |
1549 | if (extlen < minext) | ||
1550 | goto fail; | 1566 | goto fail; |
1551 | } | 1567 | } |
1552 | 1568 | ||
@@ -1555,9 +1571,17 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, | |||
1555 | * and skip if parts of it are already reserved | 1571 | * and skip if parts of it are already reserved |
1556 | */ | 1572 | */ |
1557 | nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); | 1573 | nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); |
1558 | if (nblock == block) | 1574 | if (nblock == block) { |
1559 | return 0; | 1575 | if (!minext || extlen >= minext) |
1576 | return 0; | ||
1577 | |||
1578 | if (extlen > maxext->len) { | ||
1579 | maxext->len = extlen; | ||
1580 | maxext->rbm = *rbm; | ||
1581 | } | ||
1560 | fail: | 1582 | fail: |
1583 | nblock = block + extlen; | ||
1584 | } | ||
1561 | ret = gfs2_rbm_from_block(rbm, nblock); | 1585 | ret = gfs2_rbm_from_block(rbm, nblock); |
1562 | if (ret < 0) | 1586 | if (ret < 0) |
1563 | return ret; | 1587 | return ret; |
@@ -1568,30 +1592,38 @@ fail: | |||
1568 | * gfs2_rbm_find - Look for blocks of a particular state | 1592 | * gfs2_rbm_find - Look for blocks of a particular state |
1569 | * @rbm: Value/result starting position and final position | 1593 | * @rbm: Value/result starting position and final position |
1570 | * @state: The state which we want to find | 1594 | * @state: The state which we want to find |
1571 | * @minext: The requested extent length (0 for a single block) | 1595 | * @minext: Pointer to the requested extent length (NULL for a single block) |
1596 | * This is updated to be the actual reservation size. | ||
1572 | * @ip: If set, check for reservations | 1597 | * @ip: If set, check for reservations |
1573 | * @nowrap: Stop looking at the end of the rgrp, rather than wrapping | 1598 | * @nowrap: Stop looking at the end of the rgrp, rather than wrapping |
1574 | * around until we've reached the starting point. | 1599 | * around until we've reached the starting point. |
1600 | * @ap: the allocation parameters | ||
1575 | * | 1601 | * |
1576 | * Side effects: | 1602 | * Side effects: |
1577 | * - If looking for free blocks, we set GBF_FULL on each bitmap which | 1603 | * - If looking for free blocks, we set GBF_FULL on each bitmap which |
1578 | * has no free blocks in it. | 1604 | * has no free blocks in it. |
1605 | * - If looking for free blocks, we set rd_extfail_pt on each rgrp which | ||
1606 | * has come up short on a free block search. | ||
1579 | * | 1607 | * |
1580 | * Returns: 0 on success, -ENOSPC if there is no block of the requested state | 1608 | * Returns: 0 on success, -ENOSPC if there is no block of the requested state |
1581 | */ | 1609 | */ |
1582 | 1610 | ||
1583 | static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, | 1611 | static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, |
1584 | const struct gfs2_inode *ip, bool nowrap) | 1612 | const struct gfs2_inode *ip, bool nowrap, |
1613 | const struct gfs2_alloc_parms *ap) | ||
1585 | { | 1614 | { |
1586 | struct buffer_head *bh; | 1615 | struct buffer_head *bh; |
1587 | int initial_bii; | 1616 | int initial_bii; |
1588 | u32 initial_offset; | 1617 | u32 initial_offset; |
1618 | int first_bii = rbm->bii; | ||
1619 | u32 first_offset = rbm->offset; | ||
1589 | u32 offset; | 1620 | u32 offset; |
1590 | u8 *buffer; | 1621 | u8 *buffer; |
1591 | int n = 0; | 1622 | int n = 0; |
1592 | int iters = rbm->rgd->rd_length; | 1623 | int iters = rbm->rgd->rd_length; |
1593 | int ret; | 1624 | int ret; |
1594 | struct gfs2_bitmap *bi; | 1625 | struct gfs2_bitmap *bi; |
1626 | struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, }; | ||
1595 | 1627 | ||
1596 | /* If we are not starting at the beginning of a bitmap, then we | 1628 | /* If we are not starting at the beginning of a bitmap, then we |
1597 | * need to add one to the bitmap count to ensure that we search | 1629 | * need to add one to the bitmap count to ensure that we search |
@@ -1620,7 +1652,9 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, | |||
1620 | return 0; | 1652 | return 0; |
1621 | 1653 | ||
1622 | initial_bii = rbm->bii; | 1654 | initial_bii = rbm->bii; |
1623 | ret = gfs2_reservation_check_and_update(rbm, ip, minext); | 1655 | ret = gfs2_reservation_check_and_update(rbm, ip, |
1656 | minext ? *minext : 0, | ||
1657 | &maxext); | ||
1624 | if (ret == 0) | 1658 | if (ret == 0) |
1625 | return 0; | 1659 | return 0; |
1626 | if (ret > 0) { | 1660 | if (ret > 0) { |
@@ -1655,6 +1689,24 @@ next_iter: | |||
1655 | break; | 1689 | break; |
1656 | } | 1690 | } |
1657 | 1691 | ||
1692 | if (minext == NULL || state != GFS2_BLKST_FREE) | ||
1693 | return -ENOSPC; | ||
1694 | |||
1695 | /* If the extent was too small, and it's smaller than the smallest | ||
1696 | to have failed before, remember for future reference that it's | ||
1697 | useless to search this rgrp again for this amount or more. */ | ||
1698 | if ((first_offset == 0) && (first_bii == 0) && | ||
1699 | (*minext < rbm->rgd->rd_extfail_pt)) | ||
1700 | rbm->rgd->rd_extfail_pt = *minext; | ||
1701 | |||
1702 | /* If the maximum extent we found is big enough to fulfill the | ||
1703 | minimum requirements, use it anyway. */ | ||
1704 | if (maxext.len) { | ||
1705 | *rbm = maxext.rbm; | ||
1706 | *minext = maxext.len; | ||
1707 | return 0; | ||
1708 | } | ||
1709 | |||
1658 | return -ENOSPC; | 1710 | return -ENOSPC; |
1659 | } | 1711 | } |
1660 | 1712 | ||
@@ -1680,7 +1732,8 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip | |||
1680 | 1732 | ||
1681 | while (1) { | 1733 | while (1) { |
1682 | down_write(&sdp->sd_log_flush_lock); | 1734 | down_write(&sdp->sd_log_flush_lock); |
1683 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); | 1735 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL, |
1736 | true, NULL); | ||
1684 | up_write(&sdp->sd_log_flush_lock); | 1737 | up_write(&sdp->sd_log_flush_lock); |
1685 | if (error == -ENOSPC) | 1738 | if (error == -ENOSPC) |
1686 | break; | 1739 | break; |
@@ -1891,7 +1944,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a | |||
1891 | } | 1944 | } |
1892 | 1945 | ||
1893 | /* Skip unuseable resource groups */ | 1946 | /* Skip unuseable resource groups */ |
1894 | if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) | 1947 | if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | |
1948 | GFS2_RDF_ERROR)) || | ||
1949 | (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) | ||
1895 | goto skip_rgrp; | 1950 | goto skip_rgrp; |
1896 | 1951 | ||
1897 | if (sdp->sd_args.ar_rgrplvb) | 1952 | if (sdp->sd_args.ar_rgrplvb) |
@@ -1911,15 +1966,16 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a | |||
1911 | return 0; | 1966 | return 0; |
1912 | } | 1967 | } |
1913 | 1968 | ||
1914 | /* Drop reservation, if we couldn't use reserved rgrp */ | ||
1915 | if (gfs2_rs_active(rs)) | ||
1916 | gfs2_rs_deltree(rs); | ||
1917 | check_rgrp: | 1969 | check_rgrp: |
1918 | /* Check for unlinked inodes which can be reclaimed */ | 1970 | /* Check for unlinked inodes which can be reclaimed */ |
1919 | if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) | 1971 | if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) |
1920 | try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, | 1972 | try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, |
1921 | ip->i_no_addr); | 1973 | ip->i_no_addr); |
1922 | skip_rgrp: | 1974 | skip_rgrp: |
1975 | /* Drop reservation, if we couldn't use reserved rgrp */ | ||
1976 | if (gfs2_rs_active(rs)) | ||
1977 | gfs2_rs_deltree(rs); | ||
1978 | |||
1923 | /* Unlock rgrp if required */ | 1979 | /* Unlock rgrp if required */ |
1924 | if (!rg_locked) | 1980 | if (!rg_locked) |
1925 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); | 1981 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); |
@@ -2064,25 +2120,24 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
2064 | * | 2120 | * |
2065 | */ | 2121 | */ |
2066 | 2122 | ||
2067 | int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) | 2123 | void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) |
2068 | { | 2124 | { |
2069 | struct gfs2_rgrpd *rgd = gl->gl_object; | 2125 | struct gfs2_rgrpd *rgd = gl->gl_object; |
2070 | struct gfs2_blkreserv *trs; | 2126 | struct gfs2_blkreserv *trs; |
2071 | const struct rb_node *n; | 2127 | const struct rb_node *n; |
2072 | 2128 | ||
2073 | if (rgd == NULL) | 2129 | if (rgd == NULL) |
2074 | return 0; | 2130 | return; |
2075 | gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", | 2131 | gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", |
2076 | (unsigned long long)rgd->rd_addr, rgd->rd_flags, | 2132 | (unsigned long long)rgd->rd_addr, rgd->rd_flags, |
2077 | rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, | 2133 | rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, |
2078 | rgd->rd_reserved); | 2134 | rgd->rd_reserved, rgd->rd_extfail_pt); |
2079 | spin_lock(&rgd->rd_rsspin); | 2135 | spin_lock(&rgd->rd_rsspin); |
2080 | for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { | 2136 | for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { |
2081 | trs = rb_entry(n, struct gfs2_blkreserv, rs_node); | 2137 | trs = rb_entry(n, struct gfs2_blkreserv, rs_node); |
2082 | dump_rs(seq, trs); | 2138 | dump_rs(seq, trs); |
2083 | } | 2139 | } |
2084 | spin_unlock(&rgd->rd_rsspin); | 2140 | spin_unlock(&rgd->rd_rsspin); |
2085 | return 0; | ||
2086 | } | 2141 | } |
2087 | 2142 | ||
2088 | static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) | 2143 | static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) |
@@ -2184,18 +2239,20 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, | |||
2184 | int error; | 2239 | int error; |
2185 | 2240 | ||
2186 | gfs2_set_alloc_start(&rbm, ip, dinode); | 2241 | gfs2_set_alloc_start(&rbm, ip, dinode); |
2187 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); | 2242 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL); |
2188 | 2243 | ||
2189 | if (error == -ENOSPC) { | 2244 | if (error == -ENOSPC) { |
2190 | gfs2_set_alloc_start(&rbm, ip, dinode); | 2245 | gfs2_set_alloc_start(&rbm, ip, dinode); |
2191 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); | 2246 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false, |
2247 | NULL); | ||
2192 | } | 2248 | } |
2193 | 2249 | ||
2194 | /* Since all blocks are reserved in advance, this shouldn't happen */ | 2250 | /* Since all blocks are reserved in advance, this shouldn't happen */ |
2195 | if (error) { | 2251 | if (error) { |
2196 | fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", | 2252 | fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n", |
2197 | (unsigned long long)ip->i_no_addr, error, *nblocks, | 2253 | (unsigned long long)ip->i_no_addr, error, *nblocks, |
2198 | test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); | 2254 | test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags), |
2255 | rbm.rgd->rd_extfail_pt); | ||
2199 | goto rgrp_error; | 2256 | goto rgrp_error; |
2200 | } | 2257 | } |
2201 | 2258 | ||
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3a10d2ffbbe7..463ab2e95d1c 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -68,7 +68,7 @@ extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, | |||
68 | extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); | 68 | extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); |
69 | extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); | 69 | extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); |
70 | extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); | 70 | extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); |
71 | extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); | 71 | extern void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); |
72 | extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | 72 | extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, |
73 | struct buffer_head *bh, | 73 | struct buffer_head *bh, |
74 | const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); | 74 | const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 35da5b19c0de..60f60f6181f3 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -369,6 +369,33 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) | |||
369 | return 0; | 369 | return 0; |
370 | } | 370 | } |
371 | 371 | ||
372 | static int init_threads(struct gfs2_sbd *sdp) | ||
373 | { | ||
374 | struct task_struct *p; | ||
375 | int error = 0; | ||
376 | |||
377 | p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); | ||
378 | if (IS_ERR(p)) { | ||
379 | error = PTR_ERR(p); | ||
380 | fs_err(sdp, "can't start logd thread: %d\n", error); | ||
381 | return error; | ||
382 | } | ||
383 | sdp->sd_logd_process = p; | ||
384 | |||
385 | p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); | ||
386 | if (IS_ERR(p)) { | ||
387 | error = PTR_ERR(p); | ||
388 | fs_err(sdp, "can't start quotad thread: %d\n", error); | ||
389 | goto fail; | ||
390 | } | ||
391 | sdp->sd_quotad_process = p; | ||
392 | return 0; | ||
393 | |||
394 | fail: | ||
395 | kthread_stop(sdp->sd_logd_process); | ||
396 | return error; | ||
397 | } | ||
398 | |||
372 | /** | 399 | /** |
373 | * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one | 400 | * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one |
374 | * @sdp: the filesystem | 401 | * @sdp: the filesystem |
@@ -384,10 +411,14 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) | |||
384 | struct gfs2_log_header_host head; | 411 | struct gfs2_log_header_host head; |
385 | int error; | 412 | int error; |
386 | 413 | ||
387 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh); | 414 | error = init_threads(sdp); |
388 | if (error) | 415 | if (error) |
389 | return error; | 416 | return error; |
390 | 417 | ||
418 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh); | ||
419 | if (error) | ||
420 | goto fail_threads; | ||
421 | |||
391 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); | 422 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); |
392 | 423 | ||
393 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); | 424 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); |
@@ -417,7 +448,9 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) | |||
417 | fail: | 448 | fail: |
418 | t_gh.gh_flags |= GL_NOCACHE; | 449 | t_gh.gh_flags |= GL_NOCACHE; |
419 | gfs2_glock_dq_uninit(&t_gh); | 450 | gfs2_glock_dq_uninit(&t_gh); |
420 | 451 | fail_threads: | |
452 | kthread_stop(sdp->sd_quotad_process); | ||
453 | kthread_stop(sdp->sd_logd_process); | ||
421 | return error; | 454 | return error; |
422 | } | 455 | } |
423 | 456 | ||
@@ -800,6 +833,9 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | |||
800 | struct gfs2_holder t_gh; | 833 | struct gfs2_holder t_gh; |
801 | int error; | 834 | int error; |
802 | 835 | ||
836 | kthread_stop(sdp->sd_quotad_process); | ||
837 | kthread_stop(sdp->sd_logd_process); | ||
838 | |||
803 | flush_workqueue(gfs2_delete_workqueue); | 839 | flush_workqueue(gfs2_delete_workqueue); |
804 | gfs2_quota_sync(sdp->sd_vfs, 0); | 840 | gfs2_quota_sync(sdp->sd_vfs, 0); |
805 | gfs2_statfs_sync(sdp->sd_vfs, 0); | 841 | gfs2_statfs_sync(sdp->sd_vfs, 0); |
@@ -857,9 +893,6 @@ restart: | |||
857 | } | 893 | } |
858 | spin_unlock(&sdp->sd_jindex_spin); | 894 | spin_unlock(&sdp->sd_jindex_spin); |
859 | 895 | ||
860 | kthread_stop(sdp->sd_quotad_process); | ||
861 | kthread_stop(sdp->sd_logd_process); | ||
862 | |||
863 | if (!(sb->s_flags & MS_RDONLY)) { | 896 | if (!(sb->s_flags & MS_RDONLY)) { |
864 | error = gfs2_make_fs_ro(sdp); | 897 | error = gfs2_make_fs_ro(sdp); |
865 | if (error) | 898 | if (error) |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 39c1d9469677..5c097596104b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/xattr.h> | 21 | #include <linux/xattr.h> |
22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
23 | #include <linux/percpu-refcount.h> | 23 | #include <linux/percpu-refcount.h> |
24 | #include <linux/seq_file.h> | ||
24 | 25 | ||
25 | #ifdef CONFIG_CGROUPS | 26 | #ifdef CONFIG_CGROUPS |
26 | 27 | ||
@@ -28,8 +29,6 @@ struct cgroupfs_root; | |||
28 | struct cgroup_subsys; | 29 | struct cgroup_subsys; |
29 | struct inode; | 30 | struct inode; |
30 | struct cgroup; | 31 | struct cgroup; |
31 | struct css_id; | ||
32 | struct eventfd_ctx; | ||
33 | 32 | ||
34 | extern int cgroup_init_early(void); | 33 | extern int cgroup_init_early(void); |
35 | extern int cgroup_init(void); | 34 | extern int cgroup_init(void); |
@@ -79,8 +78,6 @@ struct cgroup_subsys_state { | |||
79 | struct cgroup_subsys_state *parent; | 78 | struct cgroup_subsys_state *parent; |
80 | 79 | ||
81 | unsigned long flags; | 80 | unsigned long flags; |
82 | /* ID for this css, if possible */ | ||
83 | struct css_id __rcu *id; | ||
84 | 81 | ||
85 | /* percpu_ref killing and RCU release */ | 82 | /* percpu_ref killing and RCU release */ |
86 | struct rcu_head rcu_head; | 83 | struct rcu_head rcu_head; |
@@ -239,10 +236,6 @@ struct cgroup { | |||
239 | struct rcu_head rcu_head; | 236 | struct rcu_head rcu_head; |
240 | struct work_struct destroy_work; | 237 | struct work_struct destroy_work; |
241 | 238 | ||
242 | /* List of events which userspace want to receive */ | ||
243 | struct list_head event_list; | ||
244 | spinlock_t event_list_lock; | ||
245 | |||
246 | /* directory xattrs */ | 239 | /* directory xattrs */ |
247 | struct simple_xattrs xattrs; | 240 | struct simple_xattrs xattrs; |
248 | }; | 241 | }; |
@@ -280,6 +273,9 @@ enum { | |||
280 | * - "tasks" is removed. Everything should be at process | 273 | * - "tasks" is removed. Everything should be at process |
281 | * granularity. Use "cgroup.procs" instead. | 274 | * granularity. Use "cgroup.procs" instead. |
282 | * | 275 | * |
276 | * - "cgroup.procs" is not sorted. pids will be unique unless they | ||
277 | * got recycled inbetween reads. | ||
278 | * | ||
283 | * - "release_agent" and "notify_on_release" are removed. | 279 | * - "release_agent" and "notify_on_release" are removed. |
284 | * Replacement notification mechanism will be implemented. | 280 | * Replacement notification mechanism will be implemented. |
285 | * | 281 | * |
@@ -320,9 +316,6 @@ struct cgroupfs_root { | |||
320 | /* Unique id for this hierarchy. */ | 316 | /* Unique id for this hierarchy. */ |
321 | int hierarchy_id; | 317 | int hierarchy_id; |
322 | 318 | ||
323 | /* A list running through the attached subsystems */ | ||
324 | struct list_head subsys_list; | ||
325 | |||
326 | /* The root cgroup for this hierarchy */ | 319 | /* The root cgroup for this hierarchy */ |
327 | struct cgroup top_cgroup; | 320 | struct cgroup top_cgroup; |
328 | 321 | ||
@@ -389,16 +382,6 @@ struct css_set { | |||
389 | }; | 382 | }; |
390 | 383 | ||
391 | /* | 384 | /* |
392 | * cgroup_map_cb is an abstract callback API for reporting map-valued | ||
393 | * control files | ||
394 | */ | ||
395 | |||
396 | struct cgroup_map_cb { | ||
397 | int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value); | ||
398 | void *state; | ||
399 | }; | ||
400 | |||
401 | /* | ||
402 | * struct cftype: handler definitions for cgroup control files | 385 | * struct cftype: handler definitions for cgroup control files |
403 | * | 386 | * |
404 | * When reading/writing to a file: | 387 | * When reading/writing to a file: |
@@ -445,10 +428,6 @@ struct cftype { | |||
445 | */ | 428 | */ |
446 | struct cgroup_subsys *ss; | 429 | struct cgroup_subsys *ss; |
447 | 430 | ||
448 | int (*open)(struct inode *inode, struct file *file); | ||
449 | ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft, | ||
450 | struct file *file, | ||
451 | char __user *buf, size_t nbytes, loff_t *ppos); | ||
452 | /* | 431 | /* |
453 | * read_u64() is a shortcut for the common case of returning a | 432 | * read_u64() is a shortcut for the common case of returning a |
454 | * single integer. Use it in place of read() | 433 | * single integer. Use it in place of read() |
@@ -458,24 +437,14 @@ struct cftype { | |||
458 | * read_s64() is a signed version of read_u64() | 437 | * read_s64() is a signed version of read_u64() |
459 | */ | 438 | */ |
460 | s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); | 439 | s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); |
461 | /* | ||
462 | * read_map() is used for defining a map of key/value | ||
463 | * pairs. It should call cb->fill(cb, key, value) for each | ||
464 | * entry. The key/value pairs (and their ordering) should not | ||
465 | * change between reboots. | ||
466 | */ | ||
467 | int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft, | ||
468 | struct cgroup_map_cb *cb); | ||
469 | /* | ||
470 | * read_seq_string() is used for outputting a simple sequence | ||
471 | * using seqfile. | ||
472 | */ | ||
473 | int (*read_seq_string)(struct cgroup_subsys_state *css, | ||
474 | struct cftype *cft, struct seq_file *m); | ||
475 | 440 | ||
476 | ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft, | 441 | /* generic seq_file read interface */ |
477 | struct file *file, | 442 | int (*seq_show)(struct seq_file *sf, void *v); |
478 | const char __user *buf, size_t nbytes, loff_t *ppos); | 443 | |
444 | /* optional ops, implement all or none */ | ||
445 | void *(*seq_start)(struct seq_file *sf, loff_t *ppos); | ||
446 | void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); | ||
447 | void (*seq_stop)(struct seq_file *sf, void *v); | ||
479 | 448 | ||
480 | /* | 449 | /* |
481 | * write_u64() is a shortcut for the common case of accepting | 450 | * write_u64() is a shortcut for the common case of accepting |
@@ -504,27 +473,6 @@ struct cftype { | |||
504 | * kick type for multiplexing. | 473 | * kick type for multiplexing. |
505 | */ | 474 | */ |
506 | int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); | 475 | int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); |
507 | |||
508 | int (*release)(struct inode *inode, struct file *file); | ||
509 | |||
510 | /* | ||
511 | * register_event() callback will be used to add new userspace | ||
512 | * waiter for changes related to the cftype. Implement it if | ||
513 | * you want to provide this functionality. Use eventfd_signal() | ||
514 | * on eventfd to send notification to userspace. | ||
515 | */ | ||
516 | int (*register_event)(struct cgroup_subsys_state *css, | ||
517 | struct cftype *cft, struct eventfd_ctx *eventfd, | ||
518 | const char *args); | ||
519 | /* | ||
520 | * unregister_event() callback will be called when userspace | ||
521 | * closes the eventfd or on cgroup removing. | ||
522 | * This callback must be implemented, if you want provide | ||
523 | * notification functionality. | ||
524 | */ | ||
525 | void (*unregister_event)(struct cgroup_subsys_state *css, | ||
526 | struct cftype *cft, | ||
527 | struct eventfd_ctx *eventfd); | ||
528 | }; | 476 | }; |
529 | 477 | ||
530 | /* | 478 | /* |
@@ -538,6 +486,26 @@ struct cftype_set { | |||
538 | }; | 486 | }; |
539 | 487 | ||
540 | /* | 488 | /* |
489 | * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. Don't | ||
490 | * access directly. | ||
491 | */ | ||
492 | struct cfent { | ||
493 | struct list_head node; | ||
494 | struct dentry *dentry; | ||
495 | struct cftype *type; | ||
496 | struct cgroup_subsys_state *css; | ||
497 | |||
498 | /* file xattrs */ | ||
499 | struct simple_xattrs xattrs; | ||
500 | }; | ||
501 | |||
502 | /* seq_file->private points to the following, only ->priv is public */ | ||
503 | struct cgroup_open_file { | ||
504 | struct cfent *cfe; | ||
505 | void *priv; | ||
506 | }; | ||
507 | |||
508 | /* | ||
541 | * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This | 509 | * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This |
542 | * function can be called as long as @cgrp is accessible. | 510 | * function can be called as long as @cgrp is accessible. |
543 | */ | 511 | */ |
@@ -552,6 +520,18 @@ static inline const char *cgroup_name(const struct cgroup *cgrp) | |||
552 | return rcu_dereference(cgrp->name)->name; | 520 | return rcu_dereference(cgrp->name)->name; |
553 | } | 521 | } |
554 | 522 | ||
523 | static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq) | ||
524 | { | ||
525 | struct cgroup_open_file *of = seq->private; | ||
526 | return of->cfe->css; | ||
527 | } | ||
528 | |||
529 | static inline struct cftype *seq_cft(struct seq_file *seq) | ||
530 | { | ||
531 | struct cgroup_open_file *of = seq->private; | ||
532 | return of->cfe->type; | ||
533 | } | ||
534 | |||
555 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); | 535 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); |
556 | int cgroup_rm_cftypes(struct cftype *cfts); | 536 | int cgroup_rm_cftypes(struct cftype *cfts); |
557 | 537 | ||
@@ -631,12 +611,8 @@ struct cgroup_subsys { | |||
631 | #define MAX_CGROUP_TYPE_NAMELEN 32 | 611 | #define MAX_CGROUP_TYPE_NAMELEN 32 |
632 | const char *name; | 612 | const char *name; |
633 | 613 | ||
634 | /* | 614 | /* link to parent, protected by cgroup_lock() */ |
635 | * Link to parent, and list entry in parent's children. | ||
636 | * Protected by cgroup_lock() | ||
637 | */ | ||
638 | struct cgroupfs_root *root; | 615 | struct cgroupfs_root *root; |
639 | struct list_head sibling; | ||
640 | 616 | ||
641 | /* list of cftype_sets */ | 617 | /* list of cftype_sets */ |
642 | struct list_head cftsets; | 618 | struct list_head cftsets; |
diff --git a/include/linux/libata.h b/include/linux/libata.h index 9b503376738f..bec6dbe939a0 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h | |||
@@ -419,6 +419,8 @@ enum { | |||
419 | ATA_HORKAGE_MAX_SEC_LBA48 = (1 << 17), /* Set max sects to 65535 */ | 419 | ATA_HORKAGE_MAX_SEC_LBA48 = (1 << 17), /* Set max sects to 65535 */ |
420 | ATA_HORKAGE_ATAPI_DMADIR = (1 << 18), /* device requires dmadir */ | 420 | ATA_HORKAGE_ATAPI_DMADIR = (1 << 18), /* device requires dmadir */ |
421 | ATA_HORKAGE_NO_NCQ_TRIM = (1 << 19), /* don't use queued TRIM */ | 421 | ATA_HORKAGE_NO_NCQ_TRIM = (1 << 19), /* don't use queued TRIM */ |
422 | ATA_HORKAGE_NOLPM = (1 << 20), /* don't use LPM */ | ||
423 | ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ | ||
422 | 424 | ||
423 | /* DMA mask for user DMA control: User visible values; DO NOT | 425 | /* DMA mask for user DMA control: User visible values; DO NOT |
424 | renumber */ | 426 | renumber */ |
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 3f3788d49362..3e4535876d37 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/gfp.h> | 7 | #include <linux/gfp.h> |
8 | #include <linux/types.h> | 8 | #include <linux/types.h> |
9 | #include <linux/cgroup.h> | 9 | #include <linux/cgroup.h> |
10 | #include <linux/eventfd.h> | ||
10 | 11 | ||
11 | struct vmpressure { | 12 | struct vmpressure { |
12 | unsigned long scanned; | 13 | unsigned long scanned; |
@@ -33,13 +34,10 @@ extern void vmpressure_init(struct vmpressure *vmpr); | |||
33 | extern void vmpressure_cleanup(struct vmpressure *vmpr); | 34 | extern void vmpressure_cleanup(struct vmpressure *vmpr); |
34 | extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); | 35 | extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); |
35 | extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); | 36 | extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); |
36 | extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); | 37 | extern int vmpressure_register_event(struct mem_cgroup *memcg, |
37 | extern int vmpressure_register_event(struct cgroup_subsys_state *css, | ||
38 | struct cftype *cft, | ||
39 | struct eventfd_ctx *eventfd, | 38 | struct eventfd_ctx *eventfd, |
40 | const char *args); | 39 | const char *args); |
41 | extern void vmpressure_unregister_event(struct cgroup_subsys_state *css, | 40 | extern void vmpressure_unregister_event(struct mem_cgroup *memcg, |
42 | struct cftype *cft, | ||
43 | struct eventfd_ctx *eventfd); | 41 | struct eventfd_ctx *eventfd); |
44 | #else | 42 | #else |
45 | static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, | 43 | static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, |
diff --git a/include/uapi/linux/gfs2_ondisk.h b/include/uapi/linux/gfs2_ondisk.h index b2de1f9a88d6..0f24c07aed51 100644 --- a/include/uapi/linux/gfs2_ondisk.h +++ b/include/uapi/linux/gfs2_ondisk.h | |||
@@ -319,7 +319,16 @@ struct gfs2_leaf { | |||
319 | __be32 lf_dirent_format; /* Format of the dirents */ | 319 | __be32 lf_dirent_format; /* Format of the dirents */ |
320 | __be64 lf_next; /* Next leaf, if overflow */ | 320 | __be64 lf_next; /* Next leaf, if overflow */ |
321 | 321 | ||
322 | __u8 lf_reserved[64]; | 322 | union { |
323 | __u8 lf_reserved[64]; | ||
324 | struct { | ||
325 | __be64 lf_inode; /* Dir inode number */ | ||
326 | __be32 lf_dist; /* Dist from inode on chain */ | ||
327 | __be32 lf_nsec; /* Last ins/del usecs */ | ||
328 | __be64 lf_sec; /* Last ins/del in secs */ | ||
329 | __u8 lf_reserved2[40]; | ||
330 | }; | ||
331 | }; | ||
323 | }; | 332 | }; |
324 | 333 | ||
325 | /* | 334 | /* |
diff --git a/init/Kconfig b/init/Kconfig index 5236dc562a36..8d402e33b7fc 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -854,7 +854,6 @@ config NUMA_BALANCING | |||
854 | 854 | ||
855 | menuconfig CGROUPS | 855 | menuconfig CGROUPS |
856 | boolean "Control Group support" | 856 | boolean "Control Group support" |
857 | depends on EVENTFD | ||
858 | help | 857 | help |
859 | This option adds support for grouping sets of processes together, for | 858 | This option adds support for grouping sets of processes together, for |
860 | use with process control subsystems such as Cpusets, CFS, memory | 859 | use with process control subsystems such as Cpusets, CFS, memory |
@@ -921,6 +920,7 @@ config MEMCG | |||
921 | bool "Memory Resource Controller for Control Groups" | 920 | bool "Memory Resource Controller for Control Groups" |
922 | depends on RESOURCE_COUNTERS | 921 | depends on RESOURCE_COUNTERS |
923 | select MM_OWNER | 922 | select MM_OWNER |
923 | select EVENTFD | ||
924 | help | 924 | help |
925 | Provides a memory resource controller that manages both anonymous | 925 | Provides a memory resource controller that manages both anonymous |
926 | memory and page cache. (See Documentation/cgroups/memory.txt) | 926 | memory and page cache. (See Documentation/cgroups/memory.txt) |
@@ -1160,7 +1160,6 @@ config UIDGID_STRICT_TYPE_CHECKS | |||
1160 | 1160 | ||
1161 | config SCHED_AUTOGROUP | 1161 | config SCHED_AUTOGROUP |
1162 | bool "Automatic process group scheduling" | 1162 | bool "Automatic process group scheduling" |
1163 | select EVENTFD | ||
1164 | select CGROUPS | 1163 | select CGROUPS |
1165 | select CGROUP_SCHED | 1164 | select CGROUP_SCHED |
1166 | select FAIR_GROUP_SCHED | 1165 | select FAIR_GROUP_SCHED |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index bc1dcabe9217..e2f46ba37f72 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -41,7 +41,6 @@ | |||
41 | #include <linux/rcupdate.h> | 41 | #include <linux/rcupdate.h> |
42 | #include <linux/sched.h> | 42 | #include <linux/sched.h> |
43 | #include <linux/backing-dev.h> | 43 | #include <linux/backing-dev.h> |
44 | #include <linux/seq_file.h> | ||
45 | #include <linux/slab.h> | 44 | #include <linux/slab.h> |
46 | #include <linux/magic.h> | 45 | #include <linux/magic.h> |
47 | #include <linux/spinlock.h> | 46 | #include <linux/spinlock.h> |
@@ -56,15 +55,20 @@ | |||
56 | #include <linux/pid_namespace.h> | 55 | #include <linux/pid_namespace.h> |
57 | #include <linux/idr.h> | 56 | #include <linux/idr.h> |
58 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ | 57 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ |
59 | #include <linux/eventfd.h> | ||
60 | #include <linux/poll.h> | ||
61 | #include <linux/flex_array.h> /* used in cgroup_attach_task */ | 58 | #include <linux/flex_array.h> /* used in cgroup_attach_task */ |
62 | #include <linux/kthread.h> | 59 | #include <linux/kthread.h> |
63 | #include <linux/file.h> | ||
64 | 60 | ||
65 | #include <linux/atomic.h> | 61 | #include <linux/atomic.h> |
66 | 62 | ||
67 | /* | 63 | /* |
64 | * pidlists linger the following amount before being destroyed. The goal | ||
65 | * is avoiding frequent destruction in the middle of consecutive read calls | ||
66 | * Expiring in the middle is a performance problem not a correctness one. | ||
67 | * 1 sec should be enough. | ||
68 | */ | ||
69 | #define CGROUP_PIDLIST_DESTROY_DELAY HZ | ||
70 | |||
71 | /* | ||
68 | * cgroup_mutex is the master lock. Any modification to cgroup or its | 72 | * cgroup_mutex is the master lock. Any modification to cgroup or its |
69 | * hierarchy must be performed while holding it. | 73 | * hierarchy must be performed while holding it. |
70 | * | 74 | * |
@@ -89,6 +93,19 @@ static DEFINE_MUTEX(cgroup_mutex); | |||
89 | 93 | ||
90 | static DEFINE_MUTEX(cgroup_root_mutex); | 94 | static DEFINE_MUTEX(cgroup_root_mutex); |
91 | 95 | ||
96 | #define cgroup_assert_mutex_or_rcu_locked() \ | ||
97 | rcu_lockdep_assert(rcu_read_lock_held() || \ | ||
98 | lockdep_is_held(&cgroup_mutex), \ | ||
99 | "cgroup_mutex or RCU read lock required"); | ||
100 | |||
101 | #ifdef CONFIG_LOCKDEP | ||
102 | #define cgroup_assert_mutex_or_root_locked() \ | ||
103 | WARN_ON_ONCE(debug_locks && (!lockdep_is_held(&cgroup_mutex) && \ | ||
104 | !lockdep_is_held(&cgroup_root_mutex))) | ||
105 | #else | ||
106 | #define cgroup_assert_mutex_or_root_locked() do { } while (0) | ||
107 | #endif | ||
108 | |||
92 | /* | 109 | /* |
93 | * cgroup destruction makes heavy use of work items and there can be a lot | 110 | * cgroup destruction makes heavy use of work items and there can be a lot |
94 | * of concurrent destructions. Use a separate workqueue so that cgroup | 111 | * of concurrent destructions. Use a separate workqueue so that cgroup |
@@ -98,6 +115,12 @@ static DEFINE_MUTEX(cgroup_root_mutex); | |||
98 | static struct workqueue_struct *cgroup_destroy_wq; | 115 | static struct workqueue_struct *cgroup_destroy_wq; |
99 | 116 | ||
100 | /* | 117 | /* |
118 | * pidlist destructions need to be flushed on cgroup destruction. Use a | ||
119 | * separate workqueue as flush domain. | ||
120 | */ | ||
121 | static struct workqueue_struct *cgroup_pidlist_destroy_wq; | ||
122 | |||
123 | /* | ||
101 | * Generate an array of cgroup subsystem pointers. At boot time, this is | 124 | * Generate an array of cgroup subsystem pointers. At boot time, this is |
102 | * populated with the built in subsystems, and modular subsystems are | 125 | * populated with the built in subsystems, and modular subsystems are |
103 | * registered after that. The mutable section of this array is protected by | 126 | * registered after that. The mutable section of this array is protected by |
@@ -119,49 +142,6 @@ static struct cgroupfs_root cgroup_dummy_root; | |||
119 | /* dummy_top is a shorthand for the dummy hierarchy's top cgroup */ | 142 | /* dummy_top is a shorthand for the dummy hierarchy's top cgroup */ |
120 | static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup; | 143 | static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup; |
121 | 144 | ||
122 | /* | ||
123 | * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. | ||
124 | */ | ||
125 | struct cfent { | ||
126 | struct list_head node; | ||
127 | struct dentry *dentry; | ||
128 | struct cftype *type; | ||
129 | struct cgroup_subsys_state *css; | ||
130 | |||
131 | /* file xattrs */ | ||
132 | struct simple_xattrs xattrs; | ||
133 | }; | ||
134 | |||
135 | /* | ||
136 | * cgroup_event represents events which userspace want to receive. | ||
137 | */ | ||
138 | struct cgroup_event { | ||
139 | /* | ||
140 | * css which the event belongs to. | ||
141 | */ | ||
142 | struct cgroup_subsys_state *css; | ||
143 | /* | ||
144 | * Control file which the event associated. | ||
145 | */ | ||
146 | struct cftype *cft; | ||
147 | /* | ||
148 | * eventfd to signal userspace about the event. | ||
149 | */ | ||
150 | struct eventfd_ctx *eventfd; | ||
151 | /* | ||
152 | * Each of these stored in a list by the cgroup. | ||
153 | */ | ||
154 | struct list_head list; | ||
155 | /* | ||
156 | * All fields below needed to unregister event when | ||
157 | * userspace closes eventfd. | ||
158 | */ | ||
159 | poll_table pt; | ||
160 | wait_queue_head_t *wqh; | ||
161 | wait_queue_t wait; | ||
162 | struct work_struct remove; | ||
163 | }; | ||
164 | |||
165 | /* The list of hierarchy roots */ | 145 | /* The list of hierarchy roots */ |
166 | 146 | ||
167 | static LIST_HEAD(cgroup_roots); | 147 | static LIST_HEAD(cgroup_roots); |
@@ -200,6 +180,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp); | |||
200 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], | 180 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], |
201 | bool is_add); | 181 | bool is_add); |
202 | static int cgroup_file_release(struct inode *inode, struct file *file); | 182 | static int cgroup_file_release(struct inode *inode, struct file *file); |
183 | static void cgroup_pidlist_destroy_all(struct cgroup *cgrp); | ||
203 | 184 | ||
204 | /** | 185 | /** |
205 | * cgroup_css - obtain a cgroup's css for the specified subsystem | 186 | * cgroup_css - obtain a cgroup's css for the specified subsystem |
@@ -262,16 +243,32 @@ static int notify_on_release(const struct cgroup *cgrp) | |||
262 | } | 243 | } |
263 | 244 | ||
264 | /** | 245 | /** |
246 | * for_each_css - iterate all css's of a cgroup | ||
247 | * @css: the iteration cursor | ||
248 | * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end | ||
249 | * @cgrp: the target cgroup to iterate css's of | ||
250 | * | ||
251 | * Should be called under cgroup_mutex. | ||
252 | */ | ||
253 | #define for_each_css(css, ssid, cgrp) \ | ||
254 | for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ | ||
255 | if (!((css) = rcu_dereference_check( \ | ||
256 | (cgrp)->subsys[(ssid)], \ | ||
257 | lockdep_is_held(&cgroup_mutex)))) { } \ | ||
258 | else | ||
259 | |||
260 | /** | ||
265 | * for_each_subsys - iterate all loaded cgroup subsystems | 261 | * for_each_subsys - iterate all loaded cgroup subsystems |
266 | * @ss: the iteration cursor | 262 | * @ss: the iteration cursor |
267 | * @i: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end | 263 | * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end |
268 | * | 264 | * |
269 | * Should be called under cgroup_mutex. | 265 | * Iterates through all loaded subsystems. Should be called under |
266 | * cgroup_mutex or cgroup_root_mutex. | ||
270 | */ | 267 | */ |
271 | #define for_each_subsys(ss, i) \ | 268 | #define for_each_subsys(ss, ssid) \ |
272 | for ((i) = 0; (i) < CGROUP_SUBSYS_COUNT; (i)++) \ | 269 | for (({ cgroup_assert_mutex_or_root_locked(); (ssid) = 0; }); \ |
273 | if (({ lockdep_assert_held(&cgroup_mutex); \ | 270 | (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ |
274 | !((ss) = cgroup_subsys[i]); })) { } \ | 271 | if (!((ss) = cgroup_subsys[(ssid)])) { } \ |
275 | else | 272 | else |
276 | 273 | ||
277 | /** | 274 | /** |
@@ -286,10 +283,6 @@ static int notify_on_release(const struct cgroup *cgrp) | |||
286 | for ((i) = 0; (i) < CGROUP_BUILTIN_SUBSYS_COUNT && \ | 283 | for ((i) = 0; (i) < CGROUP_BUILTIN_SUBSYS_COUNT && \ |
287 | (((ss) = cgroup_subsys[i]) || true); (i)++) | 284 | (((ss) = cgroup_subsys[i]) || true); (i)++) |
288 | 285 | ||
289 | /* iterate each subsystem attached to a hierarchy */ | ||
290 | #define for_each_root_subsys(root, ss) \ | ||
291 | list_for_each_entry((ss), &(root)->subsys_list, sibling) | ||
292 | |||
293 | /* iterate across the active hierarchies */ | 286 | /* iterate across the active hierarchies */ |
294 | #define for_each_active_root(root) \ | 287 | #define for_each_active_root(root) \ |
295 | list_for_each_entry((root), &cgroup_roots, root_list) | 288 | list_for_each_entry((root), &cgroup_roots, root_list) |
@@ -863,11 +856,7 @@ static void cgroup_free_fn(struct work_struct *work) | |||
863 | */ | 856 | */ |
864 | deactivate_super(cgrp->root->sb); | 857 | deactivate_super(cgrp->root->sb); |
865 | 858 | ||
866 | /* | 859 | cgroup_pidlist_destroy_all(cgrp); |
867 | * if we're getting rid of the cgroup, refcount should ensure | ||
868 | * that there are no pidlists left. | ||
869 | */ | ||
870 | BUG_ON(!list_empty(&cgrp->pidlists)); | ||
871 | 860 | ||
872 | simple_xattrs_free(&cgrp->xattrs); | 861 | simple_xattrs_free(&cgrp->xattrs); |
873 | 862 | ||
@@ -1050,7 +1039,6 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
1050 | cgroup_css(cgroup_dummy_top, ss)); | 1039 | cgroup_css(cgroup_dummy_top, ss)); |
1051 | cgroup_css(cgrp, ss)->cgroup = cgrp; | 1040 | cgroup_css(cgrp, ss)->cgroup = cgrp; |
1052 | 1041 | ||
1053 | list_move(&ss->sibling, &root->subsys_list); | ||
1054 | ss->root = root; | 1042 | ss->root = root; |
1055 | if (ss->bind) | 1043 | if (ss->bind) |
1056 | ss->bind(cgroup_css(cgrp, ss)); | 1044 | ss->bind(cgroup_css(cgrp, ss)); |
@@ -1069,7 +1057,6 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
1069 | RCU_INIT_POINTER(cgrp->subsys[i], NULL); | 1057 | RCU_INIT_POINTER(cgrp->subsys[i], NULL); |
1070 | 1058 | ||
1071 | cgroup_subsys[i]->root = &cgroup_dummy_root; | 1059 | cgroup_subsys[i]->root = &cgroup_dummy_root; |
1072 | list_move(&ss->sibling, &cgroup_dummy_root.subsys_list); | ||
1073 | 1060 | ||
1074 | /* subsystem is now free - drop reference on module */ | 1061 | /* subsystem is now free - drop reference on module */ |
1075 | module_put(ss->module); | 1062 | module_put(ss->module); |
@@ -1096,10 +1083,12 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) | |||
1096 | { | 1083 | { |
1097 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; | 1084 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; |
1098 | struct cgroup_subsys *ss; | 1085 | struct cgroup_subsys *ss; |
1086 | int ssid; | ||
1099 | 1087 | ||
1100 | mutex_lock(&cgroup_root_mutex); | 1088 | mutex_lock(&cgroup_root_mutex); |
1101 | for_each_root_subsys(root, ss) | 1089 | for_each_subsys(ss, ssid) |
1102 | seq_printf(seq, ",%s", ss->name); | 1090 | if (root->subsys_mask & (1 << ssid)) |
1091 | seq_printf(seq, ",%s", ss->name); | ||
1103 | if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) | 1092 | if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) |
1104 | seq_puts(seq, ",sane_behavior"); | 1093 | seq_puts(seq, ",sane_behavior"); |
1105 | if (root->flags & CGRP_ROOT_NOPREFIX) | 1094 | if (root->flags & CGRP_ROOT_NOPREFIX) |
@@ -1362,8 +1351,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
1362 | INIT_LIST_HEAD(&cgrp->pidlists); | 1351 | INIT_LIST_HEAD(&cgrp->pidlists); |
1363 | mutex_init(&cgrp->pidlist_mutex); | 1352 | mutex_init(&cgrp->pidlist_mutex); |
1364 | cgrp->dummy_css.cgroup = cgrp; | 1353 | cgrp->dummy_css.cgroup = cgrp; |
1365 | INIT_LIST_HEAD(&cgrp->event_list); | ||
1366 | spin_lock_init(&cgrp->event_list_lock); | ||
1367 | simple_xattrs_init(&cgrp->xattrs); | 1354 | simple_xattrs_init(&cgrp->xattrs); |
1368 | } | 1355 | } |
1369 | 1356 | ||
@@ -1371,7 +1358,6 @@ static void init_cgroup_root(struct cgroupfs_root *root) | |||
1371 | { | 1358 | { |
1372 | struct cgroup *cgrp = &root->top_cgroup; | 1359 | struct cgroup *cgrp = &root->top_cgroup; |
1373 | 1360 | ||
1374 | INIT_LIST_HEAD(&root->subsys_list); | ||
1375 | INIT_LIST_HEAD(&root->root_list); | 1361 | INIT_LIST_HEAD(&root->root_list); |
1376 | root->number_of_cgroups = 1; | 1362 | root->number_of_cgroups = 1; |
1377 | cgrp->root = root; | 1363 | cgrp->root = root; |
@@ -1693,7 +1679,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1693 | return ERR_PTR(ret); | 1679 | return ERR_PTR(ret); |
1694 | } | 1680 | } |
1695 | 1681 | ||
1696 | static void cgroup_kill_sb(struct super_block *sb) { | 1682 | static void cgroup_kill_sb(struct super_block *sb) |
1683 | { | ||
1697 | struct cgroupfs_root *root = sb->s_fs_info; | 1684 | struct cgroupfs_root *root = sb->s_fs_info; |
1698 | struct cgroup *cgrp = &root->top_cgroup; | 1685 | struct cgroup *cgrp = &root->top_cgroup; |
1699 | struct cgrp_cset_link *link, *tmp_link; | 1686 | struct cgrp_cset_link *link, *tmp_link; |
@@ -1976,8 +1963,8 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
1976 | bool threadgroup) | 1963 | bool threadgroup) |
1977 | { | 1964 | { |
1978 | int retval, i, group_size; | 1965 | int retval, i, group_size; |
1979 | struct cgroup_subsys *ss, *failed_ss = NULL; | ||
1980 | struct cgroupfs_root *root = cgrp->root; | 1966 | struct cgroupfs_root *root = cgrp->root; |
1967 | struct cgroup_subsys_state *css, *failed_css = NULL; | ||
1981 | /* threadgroup list cursor and array */ | 1968 | /* threadgroup list cursor and array */ |
1982 | struct task_struct *leader = tsk; | 1969 | struct task_struct *leader = tsk; |
1983 | struct task_and_cgroup *tc; | 1970 | struct task_and_cgroup *tc; |
@@ -2050,13 +2037,11 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
2050 | /* | 2037 | /* |
2051 | * step 1: check that we can legitimately attach to the cgroup. | 2038 | * step 1: check that we can legitimately attach to the cgroup. |
2052 | */ | 2039 | */ |
2053 | for_each_root_subsys(root, ss) { | 2040 | for_each_css(css, i, cgrp) { |
2054 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | 2041 | if (css->ss->can_attach) { |
2055 | 2042 | retval = css->ss->can_attach(css, &tset); | |
2056 | if (ss->can_attach) { | ||
2057 | retval = ss->can_attach(css, &tset); | ||
2058 | if (retval) { | 2043 | if (retval) { |
2059 | failed_ss = ss; | 2044 | failed_css = css; |
2060 | goto out_cancel_attach; | 2045 | goto out_cancel_attach; |
2061 | } | 2046 | } |
2062 | } | 2047 | } |
@@ -2092,12 +2077,9 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
2092 | /* | 2077 | /* |
2093 | * step 4: do subsystem attach callbacks. | 2078 | * step 4: do subsystem attach callbacks. |
2094 | */ | 2079 | */ |
2095 | for_each_root_subsys(root, ss) { | 2080 | for_each_css(css, i, cgrp) |
2096 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | 2081 | if (css->ss->attach) |
2097 | 2082 | css->ss->attach(css, &tset); | |
2098 | if (ss->attach) | ||
2099 | ss->attach(css, &tset); | ||
2100 | } | ||
2101 | 2083 | ||
2102 | /* | 2084 | /* |
2103 | * step 5: success! and cleanup | 2085 | * step 5: success! and cleanup |
@@ -2114,13 +2096,11 @@ out_put_css_set_refs: | |||
2114 | } | 2096 | } |
2115 | out_cancel_attach: | 2097 | out_cancel_attach: |
2116 | if (retval) { | 2098 | if (retval) { |
2117 | for_each_root_subsys(root, ss) { | 2099 | for_each_css(css, i, cgrp) { |
2118 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | 2100 | if (css == failed_css) |
2119 | |||
2120 | if (ss == failed_ss) | ||
2121 | break; | 2101 | break; |
2122 | if (ss->cancel_attach) | 2102 | if (css->ss->cancel_attach) |
2123 | ss->cancel_attach(css, &tset); | 2103 | css->ss->cancel_attach(css, &tset); |
2124 | } | 2104 | } |
2125 | } | 2105 | } |
2126 | out_free_group_list: | 2106 | out_free_group_list: |
@@ -2148,7 +2128,7 @@ retry_find_task: | |||
2148 | tsk = find_task_by_vpid(pid); | 2128 | tsk = find_task_by_vpid(pid); |
2149 | if (!tsk) { | 2129 | if (!tsk) { |
2150 | rcu_read_unlock(); | 2130 | rcu_read_unlock(); |
2151 | ret= -ESRCH; | 2131 | ret = -ESRCH; |
2152 | goto out_unlock_cgroup; | 2132 | goto out_unlock_cgroup; |
2153 | } | 2133 | } |
2154 | /* | 2134 | /* |
@@ -2260,10 +2240,9 @@ static int cgroup_release_agent_write(struct cgroup_subsys_state *css, | |||
2260 | return 0; | 2240 | return 0; |
2261 | } | 2241 | } |
2262 | 2242 | ||
2263 | static int cgroup_release_agent_show(struct cgroup_subsys_state *css, | 2243 | static int cgroup_release_agent_show(struct seq_file *seq, void *v) |
2264 | struct cftype *cft, struct seq_file *seq) | ||
2265 | { | 2244 | { |
2266 | struct cgroup *cgrp = css->cgroup; | 2245 | struct cgroup *cgrp = seq_css(seq)->cgroup; |
2267 | 2246 | ||
2268 | if (!cgroup_lock_live_group(cgrp)) | 2247 | if (!cgroup_lock_live_group(cgrp)) |
2269 | return -ENODEV; | 2248 | return -ENODEV; |
@@ -2273,174 +2252,129 @@ static int cgroup_release_agent_show(struct cgroup_subsys_state *css, | |||
2273 | return 0; | 2252 | return 0; |
2274 | } | 2253 | } |
2275 | 2254 | ||
2276 | static int cgroup_sane_behavior_show(struct cgroup_subsys_state *css, | 2255 | static int cgroup_sane_behavior_show(struct seq_file *seq, void *v) |
2277 | struct cftype *cft, struct seq_file *seq) | ||
2278 | { | 2256 | { |
2279 | seq_printf(seq, "%d\n", cgroup_sane_behavior(css->cgroup)); | 2257 | struct cgroup *cgrp = seq_css(seq)->cgroup; |
2258 | |||
2259 | seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp)); | ||
2280 | return 0; | 2260 | return 0; |
2281 | } | 2261 | } |
2282 | 2262 | ||
2283 | /* A buffer size big enough for numbers or short strings */ | 2263 | /* A buffer size big enough for numbers or short strings */ |
2284 | #define CGROUP_LOCAL_BUFFER_SIZE 64 | 2264 | #define CGROUP_LOCAL_BUFFER_SIZE 64 |
2285 | 2265 | ||
2286 | static ssize_t cgroup_write_X64(struct cgroup_subsys_state *css, | 2266 | static ssize_t cgroup_file_write(struct file *file, const char __user *userbuf, |
2287 | struct cftype *cft, struct file *file, | 2267 | size_t nbytes, loff_t *ppos) |
2288 | const char __user *userbuf, size_t nbytes, | ||
2289 | loff_t *unused_ppos) | ||
2290 | { | 2268 | { |
2291 | char buffer[CGROUP_LOCAL_BUFFER_SIZE]; | 2269 | struct cfent *cfe = __d_cfe(file->f_dentry); |
2292 | int retval = 0; | 2270 | struct cftype *cft = __d_cft(file->f_dentry); |
2293 | char *end; | 2271 | struct cgroup_subsys_state *css = cfe->css; |
2272 | size_t max_bytes = cft->max_write_len ?: CGROUP_LOCAL_BUFFER_SIZE - 1; | ||
2273 | char *buf; | ||
2274 | int ret; | ||
2294 | 2275 | ||
2295 | if (!nbytes) | 2276 | if (nbytes >= max_bytes) |
2296 | return -EINVAL; | ||
2297 | if (nbytes >= sizeof(buffer)) | ||
2298 | return -E2BIG; | 2277 | return -E2BIG; |
2299 | if (copy_from_user(buffer, userbuf, nbytes)) | ||
2300 | return -EFAULT; | ||
2301 | 2278 | ||
2302 | buffer[nbytes] = 0; /* nul-terminate */ | 2279 | buf = kmalloc(nbytes + 1, GFP_KERNEL); |
2303 | if (cft->write_u64) { | 2280 | if (!buf) |
2304 | u64 val = simple_strtoull(strstrip(buffer), &end, 0); | 2281 | return -ENOMEM; |
2305 | if (*end) | 2282 | |
2306 | return -EINVAL; | 2283 | if (copy_from_user(buf, userbuf, nbytes)) { |
2307 | retval = cft->write_u64(css, cft, val); | 2284 | ret = -EFAULT; |
2285 | goto out_free; | ||
2286 | } | ||
2287 | |||
2288 | buf[nbytes] = '\0'; | ||
2289 | |||
2290 | if (cft->write_string) { | ||
2291 | ret = cft->write_string(css, cft, strstrip(buf)); | ||
2292 | } else if (cft->write_u64) { | ||
2293 | unsigned long long v; | ||
2294 | ret = kstrtoull(buf, 0, &v); | ||
2295 | if (!ret) | ||
2296 | ret = cft->write_u64(css, cft, v); | ||
2297 | } else if (cft->write_s64) { | ||
2298 | long long v; | ||
2299 | ret = kstrtoll(buf, 0, &v); | ||
2300 | if (!ret) | ||
2301 | ret = cft->write_s64(css, cft, v); | ||
2302 | } else if (cft->trigger) { | ||
2303 | ret = cft->trigger(css, (unsigned int)cft->private); | ||
2308 | } else { | 2304 | } else { |
2309 | s64 val = simple_strtoll(strstrip(buffer), &end, 0); | 2305 | ret = -EINVAL; |
2310 | if (*end) | ||
2311 | return -EINVAL; | ||
2312 | retval = cft->write_s64(css, cft, val); | ||
2313 | } | 2306 | } |
2314 | if (!retval) | 2307 | out_free: |
2315 | retval = nbytes; | 2308 | kfree(buf); |
2316 | return retval; | 2309 | return ret ?: nbytes; |
2317 | } | 2310 | } |
2318 | 2311 | ||
2319 | static ssize_t cgroup_write_string(struct cgroup_subsys_state *css, | 2312 | /* |
2320 | struct cftype *cft, struct file *file, | 2313 | * seqfile ops/methods for returning structured data. Currently just |
2321 | const char __user *userbuf, size_t nbytes, | 2314 | * supports string->u64 maps, but can be extended in future. |
2322 | loff_t *unused_ppos) | 2315 | */ |
2316 | |||
2317 | static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos) | ||
2323 | { | 2318 | { |
2324 | char local_buffer[CGROUP_LOCAL_BUFFER_SIZE]; | 2319 | struct cftype *cft = seq_cft(seq); |
2325 | int retval = 0; | ||
2326 | size_t max_bytes = cft->max_write_len; | ||
2327 | char *buffer = local_buffer; | ||
2328 | 2320 | ||
2329 | if (!max_bytes) | 2321 | if (cft->seq_start) { |
2330 | max_bytes = sizeof(local_buffer) - 1; | 2322 | return cft->seq_start(seq, ppos); |
2331 | if (nbytes >= max_bytes) | 2323 | } else { |
2332 | return -E2BIG; | 2324 | /* |
2333 | /* Allocate a dynamic buffer if we need one */ | 2325 | * The same behavior and code as single_open(). Returns |
2334 | if (nbytes >= sizeof(local_buffer)) { | 2326 | * !NULL if pos is at the beginning; otherwise, NULL. |
2335 | buffer = kmalloc(nbytes + 1, GFP_KERNEL); | 2327 | */ |
2336 | if (buffer == NULL) | 2328 | return NULL + !*ppos; |
2337 | return -ENOMEM; | ||
2338 | } | ||
2339 | if (nbytes && copy_from_user(buffer, userbuf, nbytes)) { | ||
2340 | retval = -EFAULT; | ||
2341 | goto out; | ||
2342 | } | 2329 | } |
2343 | |||
2344 | buffer[nbytes] = 0; /* nul-terminate */ | ||
2345 | retval = cft->write_string(css, cft, strstrip(buffer)); | ||
2346 | if (!retval) | ||
2347 | retval = nbytes; | ||
2348 | out: | ||
2349 | if (buffer != local_buffer) | ||
2350 | kfree(buffer); | ||
2351 | return retval; | ||
2352 | } | 2330 | } |
2353 | 2331 | ||
2354 | static ssize_t cgroup_file_write(struct file *file, const char __user *buf, | 2332 | static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos) |
2355 | size_t nbytes, loff_t *ppos) | ||
2356 | { | 2333 | { |
2357 | struct cfent *cfe = __d_cfe(file->f_dentry); | 2334 | struct cftype *cft = seq_cft(seq); |
2358 | struct cftype *cft = __d_cft(file->f_dentry); | ||
2359 | struct cgroup_subsys_state *css = cfe->css; | ||
2360 | 2335 | ||
2361 | if (cft->write) | 2336 | if (cft->seq_next) { |
2362 | return cft->write(css, cft, file, buf, nbytes, ppos); | 2337 | return cft->seq_next(seq, v, ppos); |
2363 | if (cft->write_u64 || cft->write_s64) | 2338 | } else { |
2364 | return cgroup_write_X64(css, cft, file, buf, nbytes, ppos); | 2339 | /* |
2365 | if (cft->write_string) | 2340 | * The same behavior and code as single_open(), always |
2366 | return cgroup_write_string(css, cft, file, buf, nbytes, ppos); | 2341 | * terminate after the initial read. |
2367 | if (cft->trigger) { | 2342 | */ |
2368 | int ret = cft->trigger(css, (unsigned int)cft->private); | 2343 | ++*ppos; |
2369 | return ret ? ret : nbytes; | 2344 | return NULL; |
2370 | } | 2345 | } |
2371 | return -EINVAL; | ||
2372 | } | 2346 | } |
2373 | 2347 | ||
2374 | static ssize_t cgroup_read_u64(struct cgroup_subsys_state *css, | 2348 | static void cgroup_seqfile_stop(struct seq_file *seq, void *v) |
2375 | struct cftype *cft, struct file *file, | ||
2376 | char __user *buf, size_t nbytes, loff_t *ppos) | ||
2377 | { | 2349 | { |
2378 | char tmp[CGROUP_LOCAL_BUFFER_SIZE]; | 2350 | struct cftype *cft = seq_cft(seq); |
2379 | u64 val = cft->read_u64(css, cft); | ||
2380 | int len = sprintf(tmp, "%llu\n", (unsigned long long) val); | ||
2381 | 2351 | ||
2382 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | 2352 | if (cft->seq_stop) |
2353 | cft->seq_stop(seq, v); | ||
2383 | } | 2354 | } |
2384 | 2355 | ||
2385 | static ssize_t cgroup_read_s64(struct cgroup_subsys_state *css, | 2356 | static int cgroup_seqfile_show(struct seq_file *m, void *arg) |
2386 | struct cftype *cft, struct file *file, | ||
2387 | char __user *buf, size_t nbytes, loff_t *ppos) | ||
2388 | { | 2357 | { |
2389 | char tmp[CGROUP_LOCAL_BUFFER_SIZE]; | 2358 | struct cftype *cft = seq_cft(m); |
2390 | s64 val = cft->read_s64(css, cft); | 2359 | struct cgroup_subsys_state *css = seq_css(m); |
2391 | int len = sprintf(tmp, "%lld\n", (long long) val); | ||
2392 | 2360 | ||
2393 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | 2361 | if (cft->seq_show) |
2394 | } | 2362 | return cft->seq_show(m, arg); |
2395 | 2363 | ||
2396 | static ssize_t cgroup_file_read(struct file *file, char __user *buf, | ||
2397 | size_t nbytes, loff_t *ppos) | ||
2398 | { | ||
2399 | struct cfent *cfe = __d_cfe(file->f_dentry); | ||
2400 | struct cftype *cft = __d_cft(file->f_dentry); | ||
2401 | struct cgroup_subsys_state *css = cfe->css; | ||
2402 | |||
2403 | if (cft->read) | ||
2404 | return cft->read(css, cft, file, buf, nbytes, ppos); | ||
2405 | if (cft->read_u64) | 2364 | if (cft->read_u64) |
2406 | return cgroup_read_u64(css, cft, file, buf, nbytes, ppos); | 2365 | seq_printf(m, "%llu\n", cft->read_u64(css, cft)); |
2407 | if (cft->read_s64) | 2366 | else if (cft->read_s64) |
2408 | return cgroup_read_s64(css, cft, file, buf, nbytes, ppos); | 2367 | seq_printf(m, "%lld\n", cft->read_s64(css, cft)); |
2409 | return -EINVAL; | 2368 | else |
2410 | } | 2369 | return -EINVAL; |
2411 | 2370 | return 0; | |
2412 | /* | ||
2413 | * seqfile ops/methods for returning structured data. Currently just | ||
2414 | * supports string->u64 maps, but can be extended in future. | ||
2415 | */ | ||
2416 | |||
2417 | static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) | ||
2418 | { | ||
2419 | struct seq_file *sf = cb->state; | ||
2420 | return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value); | ||
2421 | } | ||
2422 | |||
2423 | static int cgroup_seqfile_show(struct seq_file *m, void *arg) | ||
2424 | { | ||
2425 | struct cfent *cfe = m->private; | ||
2426 | struct cftype *cft = cfe->type; | ||
2427 | struct cgroup_subsys_state *css = cfe->css; | ||
2428 | |||
2429 | if (cft->read_map) { | ||
2430 | struct cgroup_map_cb cb = { | ||
2431 | .fill = cgroup_map_add, | ||
2432 | .state = m, | ||
2433 | }; | ||
2434 | return cft->read_map(css, cft, &cb); | ||
2435 | } | ||
2436 | return cft->read_seq_string(css, cft, m); | ||
2437 | } | 2371 | } |
2438 | 2372 | ||
2439 | static const struct file_operations cgroup_seqfile_operations = { | 2373 | static struct seq_operations cgroup_seq_operations = { |
2440 | .read = seq_read, | 2374 | .start = cgroup_seqfile_start, |
2441 | .write = cgroup_file_write, | 2375 | .next = cgroup_seqfile_next, |
2442 | .llseek = seq_lseek, | 2376 | .stop = cgroup_seqfile_stop, |
2443 | .release = cgroup_file_release, | 2377 | .show = cgroup_seqfile_show, |
2444 | }; | 2378 | }; |
2445 | 2379 | ||
2446 | static int cgroup_file_open(struct inode *inode, struct file *file) | 2380 | static int cgroup_file_open(struct inode *inode, struct file *file) |
@@ -2449,6 +2383,7 @@ static int cgroup_file_open(struct inode *inode, struct file *file) | |||
2449 | struct cftype *cft = __d_cft(file->f_dentry); | 2383 | struct cftype *cft = __d_cft(file->f_dentry); |
2450 | struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent); | 2384 | struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent); |
2451 | struct cgroup_subsys_state *css; | 2385 | struct cgroup_subsys_state *css; |
2386 | struct cgroup_open_file *of; | ||
2452 | int err; | 2387 | int err; |
2453 | 2388 | ||
2454 | err = generic_file_open(inode, file); | 2389 | err = generic_file_open(inode, file); |
@@ -2478,32 +2413,26 @@ static int cgroup_file_open(struct inode *inode, struct file *file) | |||
2478 | WARN_ON_ONCE(cfe->css && cfe->css != css); | 2413 | WARN_ON_ONCE(cfe->css && cfe->css != css); |
2479 | cfe->css = css; | 2414 | cfe->css = css; |
2480 | 2415 | ||
2481 | if (cft->read_map || cft->read_seq_string) { | 2416 | of = __seq_open_private(file, &cgroup_seq_operations, |
2482 | file->f_op = &cgroup_seqfile_operations; | 2417 | sizeof(struct cgroup_open_file)); |
2483 | err = single_open(file, cgroup_seqfile_show, cfe); | 2418 | if (of) { |
2484 | } else if (cft->open) { | 2419 | of->cfe = cfe; |
2485 | err = cft->open(inode, file); | 2420 | return 0; |
2486 | } | 2421 | } |
2487 | 2422 | ||
2488 | if (css->ss && err) | 2423 | if (css->ss) |
2489 | css_put(css); | 2424 | css_put(css); |
2490 | return err; | 2425 | return -ENOMEM; |
2491 | } | 2426 | } |
2492 | 2427 | ||
2493 | static int cgroup_file_release(struct inode *inode, struct file *file) | 2428 | static int cgroup_file_release(struct inode *inode, struct file *file) |
2494 | { | 2429 | { |
2495 | struct cfent *cfe = __d_cfe(file->f_dentry); | 2430 | struct cfent *cfe = __d_cfe(file->f_dentry); |
2496 | struct cftype *cft = __d_cft(file->f_dentry); | ||
2497 | struct cgroup_subsys_state *css = cfe->css; | 2431 | struct cgroup_subsys_state *css = cfe->css; |
2498 | int ret = 0; | ||
2499 | 2432 | ||
2500 | if (cft->release) | ||
2501 | ret = cft->release(inode, file); | ||
2502 | if (css->ss) | 2433 | if (css->ss) |
2503 | css_put(css); | 2434 | css_put(css); |
2504 | if (file->f_op == &cgroup_seqfile_operations) | 2435 | return seq_release_private(inode, file); |
2505 | single_release(inode, file); | ||
2506 | return ret; | ||
2507 | } | 2436 | } |
2508 | 2437 | ||
2509 | /* | 2438 | /* |
@@ -2614,7 +2543,7 @@ static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size) | |||
2614 | } | 2543 | } |
2615 | 2544 | ||
2616 | static const struct file_operations cgroup_file_operations = { | 2545 | static const struct file_operations cgroup_file_operations = { |
2617 | .read = cgroup_file_read, | 2546 | .read = seq_read, |
2618 | .write = cgroup_file_write, | 2547 | .write = cgroup_file_write, |
2619 | .llseek = generic_file_llseek, | 2548 | .llseek = generic_file_llseek, |
2620 | .open = cgroup_file_open, | 2549 | .open = cgroup_file_open, |
@@ -2639,16 +2568,6 @@ static const struct inode_operations cgroup_dir_inode_operations = { | |||
2639 | .removexattr = cgroup_removexattr, | 2568 | .removexattr = cgroup_removexattr, |
2640 | }; | 2569 | }; |
2641 | 2570 | ||
2642 | /* | ||
2643 | * Check if a file is a control file | ||
2644 | */ | ||
2645 | static inline struct cftype *__file_cft(struct file *file) | ||
2646 | { | ||
2647 | if (file_inode(file)->i_fop != &cgroup_file_operations) | ||
2648 | return ERR_PTR(-EINVAL); | ||
2649 | return __d_cft(file->f_dentry); | ||
2650 | } | ||
2651 | |||
2652 | static int cgroup_create_file(struct dentry *dentry, umode_t mode, | 2571 | static int cgroup_create_file(struct dentry *dentry, umode_t mode, |
2653 | struct super_block *sb) | 2572 | struct super_block *sb) |
2654 | { | 2573 | { |
@@ -2706,12 +2625,11 @@ static umode_t cgroup_file_mode(const struct cftype *cft) | |||
2706 | if (cft->mode) | 2625 | if (cft->mode) |
2707 | return cft->mode; | 2626 | return cft->mode; |
2708 | 2627 | ||
2709 | if (cft->read || cft->read_u64 || cft->read_s64 || | 2628 | if (cft->read_u64 || cft->read_s64 || cft->seq_show) |
2710 | cft->read_map || cft->read_seq_string) | ||
2711 | mode |= S_IRUGO; | 2629 | mode |= S_IRUGO; |
2712 | 2630 | ||
2713 | if (cft->write || cft->write_u64 || cft->write_s64 || | 2631 | if (cft->write_u64 || cft->write_s64 || cft->write_string || |
2714 | cft->write_string || cft->trigger) | 2632 | cft->trigger) |
2715 | mode |= S_IWUSR; | 2633 | mode |= S_IWUSR; |
2716 | 2634 | ||
2717 | return mode; | 2635 | return mode; |
@@ -3007,9 +2925,9 @@ static void cgroup_enable_task_cg_lists(void) | |||
3007 | * @parent_css: css whose children to walk | 2925 | * @parent_css: css whose children to walk |
3008 | * | 2926 | * |
3009 | * This function returns the next child of @parent_css and should be called | 2927 | * This function returns the next child of @parent_css and should be called |
3010 | * under RCU read lock. The only requirement is that @parent_css and | 2928 | * under either cgroup_mutex or RCU read lock. The only requirement is |
3011 | * @pos_css are accessible. The next sibling is guaranteed to be returned | 2929 | * that @parent_css and @pos_css are accessible. The next sibling is |
3012 | * regardless of their states. | 2930 | * guaranteed to be returned regardless of their states. |
3013 | */ | 2931 | */ |
3014 | struct cgroup_subsys_state * | 2932 | struct cgroup_subsys_state * |
3015 | css_next_child(struct cgroup_subsys_state *pos_css, | 2933 | css_next_child(struct cgroup_subsys_state *pos_css, |
@@ -3019,7 +2937,7 @@ css_next_child(struct cgroup_subsys_state *pos_css, | |||
3019 | struct cgroup *cgrp = parent_css->cgroup; | 2937 | struct cgroup *cgrp = parent_css->cgroup; |
3020 | struct cgroup *next; | 2938 | struct cgroup *next; |
3021 | 2939 | ||
3022 | WARN_ON_ONCE(!rcu_read_lock_held()); | 2940 | cgroup_assert_mutex_or_rcu_locked(); |
3023 | 2941 | ||
3024 | /* | 2942 | /* |
3025 | * @pos could already have been removed. Once a cgroup is removed, | 2943 | * @pos could already have been removed. Once a cgroup is removed, |
@@ -3066,10 +2984,10 @@ EXPORT_SYMBOL_GPL(css_next_child); | |||
3066 | * to visit for pre-order traversal of @root's descendants. @root is | 2984 | * to visit for pre-order traversal of @root's descendants. @root is |
3067 | * included in the iteration and the first node to be visited. | 2985 | * included in the iteration and the first node to be visited. |
3068 | * | 2986 | * |
3069 | * While this function requires RCU read locking, it doesn't require the | 2987 | * While this function requires cgroup_mutex or RCU read locking, it |
3070 | * whole traversal to be contained in a single RCU critical section. This | 2988 | * doesn't require the whole traversal to be contained in a single critical |
3071 | * function will return the correct next descendant as long as both @pos | 2989 | * section. This function will return the correct next descendant as long |
3072 | * and @root are accessible and @pos is a descendant of @root. | 2990 | * as both @pos and @root are accessible and @pos is a descendant of @root. |
3073 | */ | 2991 | */ |
3074 | struct cgroup_subsys_state * | 2992 | struct cgroup_subsys_state * |
3075 | css_next_descendant_pre(struct cgroup_subsys_state *pos, | 2993 | css_next_descendant_pre(struct cgroup_subsys_state *pos, |
@@ -3077,7 +2995,7 @@ css_next_descendant_pre(struct cgroup_subsys_state *pos, | |||
3077 | { | 2995 | { |
3078 | struct cgroup_subsys_state *next; | 2996 | struct cgroup_subsys_state *next; |
3079 | 2997 | ||
3080 | WARN_ON_ONCE(!rcu_read_lock_held()); | 2998 | cgroup_assert_mutex_or_rcu_locked(); |
3081 | 2999 | ||
3082 | /* if first iteration, visit @root */ | 3000 | /* if first iteration, visit @root */ |
3083 | if (!pos) | 3001 | if (!pos) |
@@ -3108,17 +3026,17 @@ EXPORT_SYMBOL_GPL(css_next_descendant_pre); | |||
3108 | * is returned. This can be used during pre-order traversal to skip | 3026 | * is returned. This can be used during pre-order traversal to skip |
3109 | * subtree of @pos. | 3027 | * subtree of @pos. |
3110 | * | 3028 | * |
3111 | * While this function requires RCU read locking, it doesn't require the | 3029 | * While this function requires cgroup_mutex or RCU read locking, it |
3112 | * whole traversal to be contained in a single RCU critical section. This | 3030 | * doesn't require the whole traversal to be contained in a single critical |
3113 | * function will return the correct rightmost descendant as long as @pos is | 3031 | * section. This function will return the correct rightmost descendant as |
3114 | * accessible. | 3032 | * long as @pos is accessible. |
3115 | */ | 3033 | */ |
3116 | struct cgroup_subsys_state * | 3034 | struct cgroup_subsys_state * |
3117 | css_rightmost_descendant(struct cgroup_subsys_state *pos) | 3035 | css_rightmost_descendant(struct cgroup_subsys_state *pos) |
3118 | { | 3036 | { |
3119 | struct cgroup_subsys_state *last, *tmp; | 3037 | struct cgroup_subsys_state *last, *tmp; |
3120 | 3038 | ||
3121 | WARN_ON_ONCE(!rcu_read_lock_held()); | 3039 | cgroup_assert_mutex_or_rcu_locked(); |
3122 | 3040 | ||
3123 | do { | 3041 | do { |
3124 | last = pos; | 3042 | last = pos; |
@@ -3154,10 +3072,11 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos) | |||
3154 | * to visit for post-order traversal of @root's descendants. @root is | 3072 | * to visit for post-order traversal of @root's descendants. @root is |
3155 | * included in the iteration and the last node to be visited. | 3073 | * included in the iteration and the last node to be visited. |
3156 | * | 3074 | * |
3157 | * While this function requires RCU read locking, it doesn't require the | 3075 | * While this function requires cgroup_mutex or RCU read locking, it |
3158 | * whole traversal to be contained in a single RCU critical section. This | 3076 | * doesn't require the whole traversal to be contained in a single critical |
3159 | * function will return the correct next descendant as long as both @pos | 3077 | * section. This function will return the correct next descendant as long |
3160 | * and @cgroup are accessible and @pos is a descendant of @cgroup. | 3078 | * as both @pos and @cgroup are accessible and @pos is a descendant of |
3079 | * @cgroup. | ||
3161 | */ | 3080 | */ |
3162 | struct cgroup_subsys_state * | 3081 | struct cgroup_subsys_state * |
3163 | css_next_descendant_post(struct cgroup_subsys_state *pos, | 3082 | css_next_descendant_post(struct cgroup_subsys_state *pos, |
@@ -3165,7 +3084,7 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, | |||
3165 | { | 3084 | { |
3166 | struct cgroup_subsys_state *next; | 3085 | struct cgroup_subsys_state *next; |
3167 | 3086 | ||
3168 | WARN_ON_ONCE(!rcu_read_lock_held()); | 3087 | cgroup_assert_mutex_or_rcu_locked(); |
3169 | 3088 | ||
3170 | /* if first iteration, visit leftmost descendant which may be @root */ | 3089 | /* if first iteration, visit leftmost descendant which may be @root */ |
3171 | if (!pos) | 3090 | if (!pos) |
@@ -3504,14 +3423,12 @@ struct cgroup_pidlist { | |||
3504 | pid_t *list; | 3423 | pid_t *list; |
3505 | /* how many elements the above list has */ | 3424 | /* how many elements the above list has */ |
3506 | int length; | 3425 | int length; |
3507 | /* how many files are using the current array */ | ||
3508 | int use_count; | ||
3509 | /* each of these stored in a list by its cgroup */ | 3426 | /* each of these stored in a list by its cgroup */ |
3510 | struct list_head links; | 3427 | struct list_head links; |
3511 | /* pointer to the cgroup we belong to, for list removal purposes */ | 3428 | /* pointer to the cgroup we belong to, for list removal purposes */ |
3512 | struct cgroup *owner; | 3429 | struct cgroup *owner; |
3513 | /* protects the other fields */ | 3430 | /* for delayed destruction */ |
3514 | struct rw_semaphore rwsem; | 3431 | struct delayed_work destroy_dwork; |
3515 | }; | 3432 | }; |
3516 | 3433 | ||
3517 | /* | 3434 | /* |
@@ -3527,6 +3444,7 @@ static void *pidlist_allocate(int count) | |||
3527 | else | 3444 | else |
3528 | return kmalloc(count * sizeof(pid_t), GFP_KERNEL); | 3445 | return kmalloc(count * sizeof(pid_t), GFP_KERNEL); |
3529 | } | 3446 | } |
3447 | |||
3530 | static void pidlist_free(void *p) | 3448 | static void pidlist_free(void *p) |
3531 | { | 3449 | { |
3532 | if (is_vmalloc_addr(p)) | 3450 | if (is_vmalloc_addr(p)) |
@@ -3536,6 +3454,47 @@ static void pidlist_free(void *p) | |||
3536 | } | 3454 | } |
3537 | 3455 | ||
3538 | /* | 3456 | /* |
3457 | * Used to destroy all pidlists lingering waiting for destroy timer. None | ||
3458 | * should be left afterwards. | ||
3459 | */ | ||
3460 | static void cgroup_pidlist_destroy_all(struct cgroup *cgrp) | ||
3461 | { | ||
3462 | struct cgroup_pidlist *l, *tmp_l; | ||
3463 | |||
3464 | mutex_lock(&cgrp->pidlist_mutex); | ||
3465 | list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links) | ||
3466 | mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0); | ||
3467 | mutex_unlock(&cgrp->pidlist_mutex); | ||
3468 | |||
3469 | flush_workqueue(cgroup_pidlist_destroy_wq); | ||
3470 | BUG_ON(!list_empty(&cgrp->pidlists)); | ||
3471 | } | ||
3472 | |||
3473 | static void cgroup_pidlist_destroy_work_fn(struct work_struct *work) | ||
3474 | { | ||
3475 | struct delayed_work *dwork = to_delayed_work(work); | ||
3476 | struct cgroup_pidlist *l = container_of(dwork, struct cgroup_pidlist, | ||
3477 | destroy_dwork); | ||
3478 | struct cgroup_pidlist *tofree = NULL; | ||
3479 | |||
3480 | mutex_lock(&l->owner->pidlist_mutex); | ||
3481 | |||
3482 | /* | ||
3483 | * Destroy iff we didn't get queued again. The state won't change | ||
3484 | * as destroy_dwork can only be queued while locked. | ||
3485 | */ | ||
3486 | if (!delayed_work_pending(dwork)) { | ||
3487 | list_del(&l->links); | ||
3488 | pidlist_free(l->list); | ||
3489 | put_pid_ns(l->key.ns); | ||
3490 | tofree = l; | ||
3491 | } | ||
3492 | |||
3493 | mutex_unlock(&l->owner->pidlist_mutex); | ||
3494 | kfree(tofree); | ||
3495 | } | ||
3496 | |||
3497 | /* | ||
3539 | * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries | 3498 | * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries |
3540 | * Returns the number of unique elements. | 3499 | * Returns the number of unique elements. |
3541 | */ | 3500 | */ |
@@ -3565,52 +3524,92 @@ after: | |||
3565 | return dest; | 3524 | return dest; |
3566 | } | 3525 | } |
3567 | 3526 | ||
3527 | /* | ||
3528 | * The two pid files - task and cgroup.procs - guaranteed that the result | ||
3529 | * is sorted, which forced this whole pidlist fiasco. As pid order is | ||
3530 | * different per namespace, each namespace needs differently sorted list, | ||
3531 | * making it impossible to use, for example, single rbtree of member tasks | ||
3532 | * sorted by task pointer. As pidlists can be fairly large, allocating one | ||
3533 | * per open file is dangerous, so cgroup had to implement shared pool of | ||
3534 | * pidlists keyed by cgroup and namespace. | ||
3535 | * | ||
3536 | * All this extra complexity was caused by the original implementation | ||
3537 | * committing to an entirely unnecessary property. In the long term, we | ||
3538 | * want to do away with it. Explicitly scramble sort order if | ||
3539 | * sane_behavior so that no such expectation exists in the new interface. | ||
3540 | * | ||
3541 | * Scrambling is done by swapping every two consecutive bits, which is | ||
3542 | * non-identity one-to-one mapping which disturbs sort order sufficiently. | ||
3543 | */ | ||
3544 | static pid_t pid_fry(pid_t pid) | ||
3545 | { | ||
3546 | unsigned a = pid & 0x55555555; | ||
3547 | unsigned b = pid & 0xAAAAAAAA; | ||
3548 | |||
3549 | return (a << 1) | (b >> 1); | ||
3550 | } | ||
3551 | |||
3552 | static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid) | ||
3553 | { | ||
3554 | if (cgroup_sane_behavior(cgrp)) | ||
3555 | return pid_fry(pid); | ||
3556 | else | ||
3557 | return pid; | ||
3558 | } | ||
3559 | |||
3568 | static int cmppid(const void *a, const void *b) | 3560 | static int cmppid(const void *a, const void *b) |
3569 | { | 3561 | { |
3570 | return *(pid_t *)a - *(pid_t *)b; | 3562 | return *(pid_t *)a - *(pid_t *)b; |
3571 | } | 3563 | } |
3572 | 3564 | ||
3565 | static int fried_cmppid(const void *a, const void *b) | ||
3566 | { | ||
3567 | return pid_fry(*(pid_t *)a) - pid_fry(*(pid_t *)b); | ||
3568 | } | ||
3569 | |||
3570 | static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | ||
3571 | enum cgroup_filetype type) | ||
3572 | { | ||
3573 | struct cgroup_pidlist *l; | ||
3574 | /* don't need task_nsproxy() if we're looking at ourself */ | ||
3575 | struct pid_namespace *ns = task_active_pid_ns(current); | ||
3576 | |||
3577 | lockdep_assert_held(&cgrp->pidlist_mutex); | ||
3578 | |||
3579 | list_for_each_entry(l, &cgrp->pidlists, links) | ||
3580 | if (l->key.type == type && l->key.ns == ns) | ||
3581 | return l; | ||
3582 | return NULL; | ||
3583 | } | ||
3584 | |||
3573 | /* | 3585 | /* |
3574 | * find the appropriate pidlist for our purpose (given procs vs tasks) | 3586 | * find the appropriate pidlist for our purpose (given procs vs tasks) |
3575 | * returns with the lock on that pidlist already held, and takes care | 3587 | * returns with the lock on that pidlist already held, and takes care |
3576 | * of the use count, or returns NULL with no locks held if we're out of | 3588 | * of the use count, or returns NULL with no locks held if we're out of |
3577 | * memory. | 3589 | * memory. |
3578 | */ | 3590 | */ |
3579 | static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | 3591 | static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp, |
3580 | enum cgroup_filetype type) | 3592 | enum cgroup_filetype type) |
3581 | { | 3593 | { |
3582 | struct cgroup_pidlist *l; | 3594 | struct cgroup_pidlist *l; |
3583 | /* don't need task_nsproxy() if we're looking at ourself */ | ||
3584 | struct pid_namespace *ns = task_active_pid_ns(current); | ||
3585 | 3595 | ||
3586 | /* | 3596 | lockdep_assert_held(&cgrp->pidlist_mutex); |
3587 | * We can't drop the pidlist_mutex before taking the l->rwsem in case | 3597 | |
3588 | * the last ref-holder is trying to remove l from the list at the same | 3598 | l = cgroup_pidlist_find(cgrp, type); |
3589 | * time. Holding the pidlist_mutex precludes somebody taking whichever | 3599 | if (l) |
3590 | * list we find out from under us - compare release_pid_array(). | 3600 | return l; |
3591 | */ | 3601 | |
3592 | mutex_lock(&cgrp->pidlist_mutex); | ||
3593 | list_for_each_entry(l, &cgrp->pidlists, links) { | ||
3594 | if (l->key.type == type && l->key.ns == ns) { | ||
3595 | /* make sure l doesn't vanish out from under us */ | ||
3596 | down_write(&l->rwsem); | ||
3597 | mutex_unlock(&cgrp->pidlist_mutex); | ||
3598 | return l; | ||
3599 | } | ||
3600 | } | ||
3601 | /* entry not found; create a new one */ | 3602 | /* entry not found; create a new one */ |
3602 | l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); | 3603 | l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); |
3603 | if (!l) { | 3604 | if (!l) |
3604 | mutex_unlock(&cgrp->pidlist_mutex); | ||
3605 | return l; | 3605 | return l; |
3606 | } | 3606 | |
3607 | init_rwsem(&l->rwsem); | 3607 | INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn); |
3608 | down_write(&l->rwsem); | ||
3609 | l->key.type = type; | 3608 | l->key.type = type; |
3610 | l->key.ns = get_pid_ns(ns); | 3609 | /* don't need task_nsproxy() if we're looking at ourself */ |
3610 | l->key.ns = get_pid_ns(task_active_pid_ns(current)); | ||
3611 | l->owner = cgrp; | 3611 | l->owner = cgrp; |
3612 | list_add(&l->links, &cgrp->pidlists); | 3612 | list_add(&l->links, &cgrp->pidlists); |
3613 | mutex_unlock(&cgrp->pidlist_mutex); | ||
3614 | return l; | 3613 | return l; |
3615 | } | 3614 | } |
3616 | 3615 | ||
@@ -3627,6 +3626,8 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
3627 | struct task_struct *tsk; | 3626 | struct task_struct *tsk; |
3628 | struct cgroup_pidlist *l; | 3627 | struct cgroup_pidlist *l; |
3629 | 3628 | ||
3629 | lockdep_assert_held(&cgrp->pidlist_mutex); | ||
3630 | |||
3630 | /* | 3631 | /* |
3631 | * If cgroup gets more users after we read count, we won't have | 3632 | * If cgroup gets more users after we read count, we won't have |
3632 | * enough space - tough. This race is indistinguishable to the | 3633 | * enough space - tough. This race is indistinguishable to the |
@@ -3653,20 +3654,24 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
3653 | css_task_iter_end(&it); | 3654 | css_task_iter_end(&it); |
3654 | length = n; | 3655 | length = n; |
3655 | /* now sort & (if procs) strip out duplicates */ | 3656 | /* now sort & (if procs) strip out duplicates */ |
3656 | sort(array, length, sizeof(pid_t), cmppid, NULL); | 3657 | if (cgroup_sane_behavior(cgrp)) |
3658 | sort(array, length, sizeof(pid_t), fried_cmppid, NULL); | ||
3659 | else | ||
3660 | sort(array, length, sizeof(pid_t), cmppid, NULL); | ||
3657 | if (type == CGROUP_FILE_PROCS) | 3661 | if (type == CGROUP_FILE_PROCS) |
3658 | length = pidlist_uniq(array, length); | 3662 | length = pidlist_uniq(array, length); |
3659 | l = cgroup_pidlist_find(cgrp, type); | 3663 | |
3664 | l = cgroup_pidlist_find_create(cgrp, type); | ||
3660 | if (!l) { | 3665 | if (!l) { |
3666 | mutex_unlock(&cgrp->pidlist_mutex); | ||
3661 | pidlist_free(array); | 3667 | pidlist_free(array); |
3662 | return -ENOMEM; | 3668 | return -ENOMEM; |
3663 | } | 3669 | } |
3664 | /* store array, freeing old if necessary - lock already held */ | 3670 | |
3671 | /* store array, freeing old if necessary */ | ||
3665 | pidlist_free(l->list); | 3672 | pidlist_free(l->list); |
3666 | l->list = array; | 3673 | l->list = array; |
3667 | l->length = length; | 3674 | l->length = length; |
3668 | l->use_count++; | ||
3669 | up_write(&l->rwsem); | ||
3670 | *lp = l; | 3675 | *lp = l; |
3671 | return 0; | 3676 | return 0; |
3672 | } | 3677 | } |
@@ -3740,20 +3745,45 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) | |||
3740 | * after a seek to the start). Use a binary-search to find the | 3745 | * after a seek to the start). Use a binary-search to find the |
3741 | * next pid to display, if any | 3746 | * next pid to display, if any |
3742 | */ | 3747 | */ |
3743 | struct cgroup_pidlist *l = s->private; | 3748 | struct cgroup_open_file *of = s->private; |
3749 | struct cgroup *cgrp = seq_css(s)->cgroup; | ||
3750 | struct cgroup_pidlist *l; | ||
3751 | enum cgroup_filetype type = seq_cft(s)->private; | ||
3744 | int index = 0, pid = *pos; | 3752 | int index = 0, pid = *pos; |
3745 | int *iter; | 3753 | int *iter, ret; |
3754 | |||
3755 | mutex_lock(&cgrp->pidlist_mutex); | ||
3756 | |||
3757 | /* | ||
3758 | * !NULL @of->priv indicates that this isn't the first start() | ||
3759 | * after open. If the matching pidlist is around, we can use that. | ||
3760 | * Look for it. Note that @of->priv can't be used directly. It | ||
3761 | * could already have been destroyed. | ||
3762 | */ | ||
3763 | if (of->priv) | ||
3764 | of->priv = cgroup_pidlist_find(cgrp, type); | ||
3765 | |||
3766 | /* | ||
3767 | * Either this is the first start() after open or the matching | ||
3768 | * pidlist has been destroyed inbetween. Create a new one. | ||
3769 | */ | ||
3770 | if (!of->priv) { | ||
3771 | ret = pidlist_array_load(cgrp, type, | ||
3772 | (struct cgroup_pidlist **)&of->priv); | ||
3773 | if (ret) | ||
3774 | return ERR_PTR(ret); | ||
3775 | } | ||
3776 | l = of->priv; | ||
3746 | 3777 | ||
3747 | down_read(&l->rwsem); | ||
3748 | if (pid) { | 3778 | if (pid) { |
3749 | int end = l->length; | 3779 | int end = l->length; |
3750 | 3780 | ||
3751 | while (index < end) { | 3781 | while (index < end) { |
3752 | int mid = (index + end) / 2; | 3782 | int mid = (index + end) / 2; |
3753 | if (l->list[mid] == pid) { | 3783 | if (cgroup_pid_fry(cgrp, l->list[mid]) == pid) { |
3754 | index = mid; | 3784 | index = mid; |
3755 | break; | 3785 | break; |
3756 | } else if (l->list[mid] <= pid) | 3786 | } else if (cgroup_pid_fry(cgrp, l->list[mid]) <= pid) |
3757 | index = mid + 1; | 3787 | index = mid + 1; |
3758 | else | 3788 | else |
3759 | end = mid; | 3789 | end = mid; |
@@ -3764,19 +3794,25 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) | |||
3764 | return NULL; | 3794 | return NULL; |
3765 | /* Update the abstract position to be the actual pid that we found */ | 3795 | /* Update the abstract position to be the actual pid that we found */ |
3766 | iter = l->list + index; | 3796 | iter = l->list + index; |
3767 | *pos = *iter; | 3797 | *pos = cgroup_pid_fry(cgrp, *iter); |
3768 | return iter; | 3798 | return iter; |
3769 | } | 3799 | } |
3770 | 3800 | ||
3771 | static void cgroup_pidlist_stop(struct seq_file *s, void *v) | 3801 | static void cgroup_pidlist_stop(struct seq_file *s, void *v) |
3772 | { | 3802 | { |
3773 | struct cgroup_pidlist *l = s->private; | 3803 | struct cgroup_open_file *of = s->private; |
3774 | up_read(&l->rwsem); | 3804 | struct cgroup_pidlist *l = of->priv; |
3805 | |||
3806 | if (l) | ||
3807 | mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, | ||
3808 | CGROUP_PIDLIST_DESTROY_DELAY); | ||
3809 | mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex); | ||
3775 | } | 3810 | } |
3776 | 3811 | ||
3777 | static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) | 3812 | static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) |
3778 | { | 3813 | { |
3779 | struct cgroup_pidlist *l = s->private; | 3814 | struct cgroup_open_file *of = s->private; |
3815 | struct cgroup_pidlist *l = of->priv; | ||
3780 | pid_t *p = v; | 3816 | pid_t *p = v; |
3781 | pid_t *end = l->list + l->length; | 3817 | pid_t *end = l->list + l->length; |
3782 | /* | 3818 | /* |
@@ -3787,7 +3823,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) | |||
3787 | if (p >= end) { | 3823 | if (p >= end) { |
3788 | return NULL; | 3824 | return NULL; |
3789 | } else { | 3825 | } else { |
3790 | *pos = *p; | 3826 | *pos = cgroup_pid_fry(seq_css(s)->cgroup, *p); |
3791 | return p; | 3827 | return p; |
3792 | } | 3828 | } |
3793 | } | 3829 | } |
@@ -3808,92 +3844,6 @@ static const struct seq_operations cgroup_pidlist_seq_operations = { | |||
3808 | .show = cgroup_pidlist_show, | 3844 | .show = cgroup_pidlist_show, |
3809 | }; | 3845 | }; |
3810 | 3846 | ||
3811 | static void cgroup_release_pid_array(struct cgroup_pidlist *l) | ||
3812 | { | ||
3813 | /* | ||
3814 | * the case where we're the last user of this particular pidlist will | ||
3815 | * have us remove it from the cgroup's list, which entails taking the | ||
3816 | * mutex. since in pidlist_find the pidlist->lock depends on cgroup-> | ||
3817 | * pidlist_mutex, we have to take pidlist_mutex first. | ||
3818 | */ | ||
3819 | mutex_lock(&l->owner->pidlist_mutex); | ||
3820 | down_write(&l->rwsem); | ||
3821 | BUG_ON(!l->use_count); | ||
3822 | if (!--l->use_count) { | ||
3823 | /* we're the last user if refcount is 0; remove and free */ | ||
3824 | list_del(&l->links); | ||
3825 | mutex_unlock(&l->owner->pidlist_mutex); | ||
3826 | pidlist_free(l->list); | ||
3827 | put_pid_ns(l->key.ns); | ||
3828 | up_write(&l->rwsem); | ||
3829 | kfree(l); | ||
3830 | return; | ||
3831 | } | ||
3832 | mutex_unlock(&l->owner->pidlist_mutex); | ||
3833 | up_write(&l->rwsem); | ||
3834 | } | ||
3835 | |||
3836 | static int cgroup_pidlist_release(struct inode *inode, struct file *file) | ||
3837 | { | ||
3838 | struct cgroup_pidlist *l; | ||
3839 | if (!(file->f_mode & FMODE_READ)) | ||
3840 | return 0; | ||
3841 | /* | ||
3842 | * the seq_file will only be initialized if the file was opened for | ||
3843 | * reading; hence we check if it's not null only in that case. | ||
3844 | */ | ||
3845 | l = ((struct seq_file *)file->private_data)->private; | ||
3846 | cgroup_release_pid_array(l); | ||
3847 | return seq_release(inode, file); | ||
3848 | } | ||
3849 | |||
3850 | static const struct file_operations cgroup_pidlist_operations = { | ||
3851 | .read = seq_read, | ||
3852 | .llseek = seq_lseek, | ||
3853 | .write = cgroup_file_write, | ||
3854 | .release = cgroup_pidlist_release, | ||
3855 | }; | ||
3856 | |||
3857 | /* | ||
3858 | * The following functions handle opens on a file that displays a pidlist | ||
3859 | * (tasks or procs). Prepare an array of the process/thread IDs of whoever's | ||
3860 | * in the cgroup. | ||
3861 | */ | ||
3862 | /* helper function for the two below it */ | ||
3863 | static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type) | ||
3864 | { | ||
3865 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | ||
3866 | struct cgroup_pidlist *l; | ||
3867 | int retval; | ||
3868 | |||
3869 | /* Nothing to do for write-only files */ | ||
3870 | if (!(file->f_mode & FMODE_READ)) | ||
3871 | return 0; | ||
3872 | |||
3873 | /* have the array populated */ | ||
3874 | retval = pidlist_array_load(cgrp, type, &l); | ||
3875 | if (retval) | ||
3876 | return retval; | ||
3877 | /* configure file information */ | ||
3878 | file->f_op = &cgroup_pidlist_operations; | ||
3879 | |||
3880 | retval = seq_open(file, &cgroup_pidlist_seq_operations); | ||
3881 | if (retval) { | ||
3882 | cgroup_release_pid_array(l); | ||
3883 | return retval; | ||
3884 | } | ||
3885 | ((struct seq_file *)file->private_data)->private = l; | ||
3886 | return 0; | ||
3887 | } | ||
3888 | static int cgroup_tasks_open(struct inode *unused, struct file *file) | ||
3889 | { | ||
3890 | return cgroup_pidlist_open(file, CGROUP_FILE_TASKS); | ||
3891 | } | ||
3892 | static int cgroup_procs_open(struct inode *unused, struct file *file) | ||
3893 | { | ||
3894 | return cgroup_pidlist_open(file, CGROUP_FILE_PROCS); | ||
3895 | } | ||
3896 | |||
3897 | static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css, | 3847 | static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css, |
3898 | struct cftype *cft) | 3848 | struct cftype *cft) |
3899 | { | 3849 | { |
@@ -3928,202 +3878,6 @@ static void cgroup_dput(struct cgroup *cgrp) | |||
3928 | deactivate_super(sb); | 3878 | deactivate_super(sb); |
3929 | } | 3879 | } |
3930 | 3880 | ||
3931 | /* | ||
3932 | * Unregister event and free resources. | ||
3933 | * | ||
3934 | * Gets called from workqueue. | ||
3935 | */ | ||
3936 | static void cgroup_event_remove(struct work_struct *work) | ||
3937 | { | ||
3938 | struct cgroup_event *event = container_of(work, struct cgroup_event, | ||
3939 | remove); | ||
3940 | struct cgroup_subsys_state *css = event->css; | ||
3941 | |||
3942 | remove_wait_queue(event->wqh, &event->wait); | ||
3943 | |||
3944 | event->cft->unregister_event(css, event->cft, event->eventfd); | ||
3945 | |||
3946 | /* Notify userspace the event is going away. */ | ||
3947 | eventfd_signal(event->eventfd, 1); | ||
3948 | |||
3949 | eventfd_ctx_put(event->eventfd); | ||
3950 | kfree(event); | ||
3951 | css_put(css); | ||
3952 | } | ||
3953 | |||
3954 | /* | ||
3955 | * Gets called on POLLHUP on eventfd when user closes it. | ||
3956 | * | ||
3957 | * Called with wqh->lock held and interrupts disabled. | ||
3958 | */ | ||
3959 | static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, | ||
3960 | int sync, void *key) | ||
3961 | { | ||
3962 | struct cgroup_event *event = container_of(wait, | ||
3963 | struct cgroup_event, wait); | ||
3964 | struct cgroup *cgrp = event->css->cgroup; | ||
3965 | unsigned long flags = (unsigned long)key; | ||
3966 | |||
3967 | if (flags & POLLHUP) { | ||
3968 | /* | ||
3969 | * If the event has been detached at cgroup removal, we | ||
3970 | * can simply return knowing the other side will cleanup | ||
3971 | * for us. | ||
3972 | * | ||
3973 | * We can't race against event freeing since the other | ||
3974 | * side will require wqh->lock via remove_wait_queue(), | ||
3975 | * which we hold. | ||
3976 | */ | ||
3977 | spin_lock(&cgrp->event_list_lock); | ||
3978 | if (!list_empty(&event->list)) { | ||
3979 | list_del_init(&event->list); | ||
3980 | /* | ||
3981 | * We are in atomic context, but cgroup_event_remove() | ||
3982 | * may sleep, so we have to call it in workqueue. | ||
3983 | */ | ||
3984 | schedule_work(&event->remove); | ||
3985 | } | ||
3986 | spin_unlock(&cgrp->event_list_lock); | ||
3987 | } | ||
3988 | |||
3989 | return 0; | ||
3990 | } | ||
3991 | |||
3992 | static void cgroup_event_ptable_queue_proc(struct file *file, | ||
3993 | wait_queue_head_t *wqh, poll_table *pt) | ||
3994 | { | ||
3995 | struct cgroup_event *event = container_of(pt, | ||
3996 | struct cgroup_event, pt); | ||
3997 | |||
3998 | event->wqh = wqh; | ||
3999 | add_wait_queue(wqh, &event->wait); | ||
4000 | } | ||
4001 | |||
4002 | /* | ||
4003 | * Parse input and register new cgroup event handler. | ||
4004 | * | ||
4005 | * Input must be in format '<event_fd> <control_fd> <args>'. | ||
4006 | * Interpretation of args is defined by control file implementation. | ||
4007 | */ | ||
4008 | static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, | ||
4009 | struct cftype *cft, const char *buffer) | ||
4010 | { | ||
4011 | struct cgroup *cgrp = dummy_css->cgroup; | ||
4012 | struct cgroup_event *event; | ||
4013 | struct cgroup_subsys_state *cfile_css; | ||
4014 | unsigned int efd, cfd; | ||
4015 | struct fd efile; | ||
4016 | struct fd cfile; | ||
4017 | char *endp; | ||
4018 | int ret; | ||
4019 | |||
4020 | efd = simple_strtoul(buffer, &endp, 10); | ||
4021 | if (*endp != ' ') | ||
4022 | return -EINVAL; | ||
4023 | buffer = endp + 1; | ||
4024 | |||
4025 | cfd = simple_strtoul(buffer, &endp, 10); | ||
4026 | if ((*endp != ' ') && (*endp != '\0')) | ||
4027 | return -EINVAL; | ||
4028 | buffer = endp + 1; | ||
4029 | |||
4030 | event = kzalloc(sizeof(*event), GFP_KERNEL); | ||
4031 | if (!event) | ||
4032 | return -ENOMEM; | ||
4033 | |||
4034 | INIT_LIST_HEAD(&event->list); | ||
4035 | init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc); | ||
4036 | init_waitqueue_func_entry(&event->wait, cgroup_event_wake); | ||
4037 | INIT_WORK(&event->remove, cgroup_event_remove); | ||
4038 | |||
4039 | efile = fdget(efd); | ||
4040 | if (!efile.file) { | ||
4041 | ret = -EBADF; | ||
4042 | goto out_kfree; | ||
4043 | } | ||
4044 | |||
4045 | event->eventfd = eventfd_ctx_fileget(efile.file); | ||
4046 | if (IS_ERR(event->eventfd)) { | ||
4047 | ret = PTR_ERR(event->eventfd); | ||
4048 | goto out_put_efile; | ||
4049 | } | ||
4050 | |||
4051 | cfile = fdget(cfd); | ||
4052 | if (!cfile.file) { | ||
4053 | ret = -EBADF; | ||
4054 | goto out_put_eventfd; | ||
4055 | } | ||
4056 | |||
4057 | /* the process need read permission on control file */ | ||
4058 | /* AV: shouldn't we check that it's been opened for read instead? */ | ||
4059 | ret = inode_permission(file_inode(cfile.file), MAY_READ); | ||
4060 | if (ret < 0) | ||
4061 | goto out_put_cfile; | ||
4062 | |||
4063 | event->cft = __file_cft(cfile.file); | ||
4064 | if (IS_ERR(event->cft)) { | ||
4065 | ret = PTR_ERR(event->cft); | ||
4066 | goto out_put_cfile; | ||
4067 | } | ||
4068 | |||
4069 | if (!event->cft->ss) { | ||
4070 | ret = -EBADF; | ||
4071 | goto out_put_cfile; | ||
4072 | } | ||
4073 | |||
4074 | /* | ||
4075 | * Determine the css of @cfile, verify it belongs to the same | ||
4076 | * cgroup as cgroup.event_control, and associate @event with it. | ||
4077 | * Remaining events are automatically removed on cgroup destruction | ||
4078 | * but the removal is asynchronous, so take an extra ref. | ||
4079 | */ | ||
4080 | rcu_read_lock(); | ||
4081 | |||
4082 | ret = -EINVAL; | ||
4083 | event->css = cgroup_css(cgrp, event->cft->ss); | ||
4084 | cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss); | ||
4085 | if (event->css && event->css == cfile_css && css_tryget(event->css)) | ||
4086 | ret = 0; | ||
4087 | |||
4088 | rcu_read_unlock(); | ||
4089 | if (ret) | ||
4090 | goto out_put_cfile; | ||
4091 | |||
4092 | if (!event->cft->register_event || !event->cft->unregister_event) { | ||
4093 | ret = -EINVAL; | ||
4094 | goto out_put_css; | ||
4095 | } | ||
4096 | |||
4097 | ret = event->cft->register_event(event->css, event->cft, | ||
4098 | event->eventfd, buffer); | ||
4099 | if (ret) | ||
4100 | goto out_put_css; | ||
4101 | |||
4102 | efile.file->f_op->poll(efile.file, &event->pt); | ||
4103 | |||
4104 | spin_lock(&cgrp->event_list_lock); | ||
4105 | list_add(&event->list, &cgrp->event_list); | ||
4106 | spin_unlock(&cgrp->event_list_lock); | ||
4107 | |||
4108 | fdput(cfile); | ||
4109 | fdput(efile); | ||
4110 | |||
4111 | return 0; | ||
4112 | |||
4113 | out_put_css: | ||
4114 | css_put(event->css); | ||
4115 | out_put_cfile: | ||
4116 | fdput(cfile); | ||
4117 | out_put_eventfd: | ||
4118 | eventfd_ctx_put(event->eventfd); | ||
4119 | out_put_efile: | ||
4120 | fdput(efile); | ||
4121 | out_kfree: | ||
4122 | kfree(event); | ||
4123 | |||
4124 | return ret; | ||
4125 | } | ||
4126 | |||
4127 | static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css, | 3881 | static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css, |
4128 | struct cftype *cft) | 3882 | struct cftype *cft) |
4129 | { | 3883 | { |
@@ -4143,17 +3897,15 @@ static int cgroup_clone_children_write(struct cgroup_subsys_state *css, | |||
4143 | static struct cftype cgroup_base_files[] = { | 3897 | static struct cftype cgroup_base_files[] = { |
4144 | { | 3898 | { |
4145 | .name = "cgroup.procs", | 3899 | .name = "cgroup.procs", |
4146 | .open = cgroup_procs_open, | 3900 | .seq_start = cgroup_pidlist_start, |
3901 | .seq_next = cgroup_pidlist_next, | ||
3902 | .seq_stop = cgroup_pidlist_stop, | ||
3903 | .seq_show = cgroup_pidlist_show, | ||
3904 | .private = CGROUP_FILE_PROCS, | ||
4147 | .write_u64 = cgroup_procs_write, | 3905 | .write_u64 = cgroup_procs_write, |
4148 | .release = cgroup_pidlist_release, | ||
4149 | .mode = S_IRUGO | S_IWUSR, | 3906 | .mode = S_IRUGO | S_IWUSR, |
4150 | }, | 3907 | }, |
4151 | { | 3908 | { |
4152 | .name = "cgroup.event_control", | ||
4153 | .write_string = cgroup_write_event_control, | ||
4154 | .mode = S_IWUGO, | ||
4155 | }, | ||
4156 | { | ||
4157 | .name = "cgroup.clone_children", | 3909 | .name = "cgroup.clone_children", |
4158 | .flags = CFTYPE_INSANE, | 3910 | .flags = CFTYPE_INSANE, |
4159 | .read_u64 = cgroup_clone_children_read, | 3911 | .read_u64 = cgroup_clone_children_read, |
@@ -4162,7 +3914,7 @@ static struct cftype cgroup_base_files[] = { | |||
4162 | { | 3914 | { |
4163 | .name = "cgroup.sane_behavior", | 3915 | .name = "cgroup.sane_behavior", |
4164 | .flags = CFTYPE_ONLY_ON_ROOT, | 3916 | .flags = CFTYPE_ONLY_ON_ROOT, |
4165 | .read_seq_string = cgroup_sane_behavior_show, | 3917 | .seq_show = cgroup_sane_behavior_show, |
4166 | }, | 3918 | }, |
4167 | 3919 | ||
4168 | /* | 3920 | /* |
@@ -4173,9 +3925,12 @@ static struct cftype cgroup_base_files[] = { | |||
4173 | { | 3925 | { |
4174 | .name = "tasks", | 3926 | .name = "tasks", |
4175 | .flags = CFTYPE_INSANE, /* use "procs" instead */ | 3927 | .flags = CFTYPE_INSANE, /* use "procs" instead */ |
4176 | .open = cgroup_tasks_open, | 3928 | .seq_start = cgroup_pidlist_start, |
3929 | .seq_next = cgroup_pidlist_next, | ||
3930 | .seq_stop = cgroup_pidlist_stop, | ||
3931 | .seq_show = cgroup_pidlist_show, | ||
3932 | .private = CGROUP_FILE_TASKS, | ||
4177 | .write_u64 = cgroup_tasks_write, | 3933 | .write_u64 = cgroup_tasks_write, |
4178 | .release = cgroup_pidlist_release, | ||
4179 | .mode = S_IRUGO | S_IWUSR, | 3934 | .mode = S_IRUGO | S_IWUSR, |
4180 | }, | 3935 | }, |
4181 | { | 3936 | { |
@@ -4187,7 +3942,7 @@ static struct cftype cgroup_base_files[] = { | |||
4187 | { | 3942 | { |
4188 | .name = "release_agent", | 3943 | .name = "release_agent", |
4189 | .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT, | 3944 | .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT, |
4190 | .read_seq_string = cgroup_release_agent_show, | 3945 | .seq_show = cgroup_release_agent_show, |
4191 | .write_string = cgroup_release_agent_write, | 3946 | .write_string = cgroup_release_agent_write, |
4192 | .max_write_len = PATH_MAX, | 3947 | .max_write_len = PATH_MAX, |
4193 | }, | 3948 | }, |
@@ -4333,6 +4088,62 @@ static void offline_css(struct cgroup_subsys_state *css) | |||
4333 | RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css); | 4088 | RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css); |
4334 | } | 4089 | } |
4335 | 4090 | ||
4091 | /** | ||
4092 | * create_css - create a cgroup_subsys_state | ||
4093 | * @cgrp: the cgroup new css will be associated with | ||
4094 | * @ss: the subsys of new css | ||
4095 | * | ||
4096 | * Create a new css associated with @cgrp - @ss pair. On success, the new | ||
4097 | * css is online and installed in @cgrp with all interface files created. | ||
4098 | * Returns 0 on success, -errno on failure. | ||
4099 | */ | ||
4100 | static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) | ||
4101 | { | ||
4102 | struct cgroup *parent = cgrp->parent; | ||
4103 | struct cgroup_subsys_state *css; | ||
4104 | int err; | ||
4105 | |||
4106 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); | ||
4107 | lockdep_assert_held(&cgroup_mutex); | ||
4108 | |||
4109 | css = ss->css_alloc(cgroup_css(parent, ss)); | ||
4110 | if (IS_ERR(css)) | ||
4111 | return PTR_ERR(css); | ||
4112 | |||
4113 | err = percpu_ref_init(&css->refcnt, css_release); | ||
4114 | if (err) | ||
4115 | goto err_free; | ||
4116 | |||
4117 | init_css(css, ss, cgrp); | ||
4118 | |||
4119 | err = cgroup_populate_dir(cgrp, 1 << ss->subsys_id); | ||
4120 | if (err) | ||
4121 | goto err_free; | ||
4122 | |||
4123 | err = online_css(css); | ||
4124 | if (err) | ||
4125 | goto err_free; | ||
4126 | |||
4127 | dget(cgrp->dentry); | ||
4128 | css_get(css->parent); | ||
4129 | |||
4130 | if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && | ||
4131 | parent->parent) { | ||
4132 | pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", | ||
4133 | current->comm, current->pid, ss->name); | ||
4134 | if (!strcmp(ss->name, "memory")) | ||
4135 | pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n"); | ||
4136 | ss->warned_broken_hierarchy = true; | ||
4137 | } | ||
4138 | |||
4139 | return 0; | ||
4140 | |||
4141 | err_free: | ||
4142 | percpu_ref_cancel_init(&css->refcnt); | ||
4143 | ss->css_free(css); | ||
4144 | return err; | ||
4145 | } | ||
4146 | |||
4336 | /* | 4147 | /* |
4337 | * cgroup_create - create a cgroup | 4148 | * cgroup_create - create a cgroup |
4338 | * @parent: cgroup that will be parent of the new cgroup | 4149 | * @parent: cgroup that will be parent of the new cgroup |
@@ -4344,11 +4155,10 @@ static void offline_css(struct cgroup_subsys_state *css) | |||
4344 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | 4155 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, |
4345 | umode_t mode) | 4156 | umode_t mode) |
4346 | { | 4157 | { |
4347 | struct cgroup_subsys_state *css_ar[CGROUP_SUBSYS_COUNT] = { }; | ||
4348 | struct cgroup *cgrp; | 4158 | struct cgroup *cgrp; |
4349 | struct cgroup_name *name; | 4159 | struct cgroup_name *name; |
4350 | struct cgroupfs_root *root = parent->root; | 4160 | struct cgroupfs_root *root = parent->root; |
4351 | int err = 0; | 4161 | int ssid, err = 0; |
4352 | struct cgroup_subsys *ss; | 4162 | struct cgroup_subsys *ss; |
4353 | struct super_block *sb = root->sb; | 4163 | struct super_block *sb = root->sb; |
4354 | 4164 | ||
@@ -4404,23 +4214,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4404 | if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags)) | 4214 | if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags)) |
4405 | set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); | 4215 | set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); |
4406 | 4216 | ||
4407 | for_each_root_subsys(root, ss) { | ||
4408 | struct cgroup_subsys_state *css; | ||
4409 | |||
4410 | css = ss->css_alloc(cgroup_css(parent, ss)); | ||
4411 | if (IS_ERR(css)) { | ||
4412 | err = PTR_ERR(css); | ||
4413 | goto err_free_all; | ||
4414 | } | ||
4415 | css_ar[ss->subsys_id] = css; | ||
4416 | |||
4417 | err = percpu_ref_init(&css->refcnt, css_release); | ||
4418 | if (err) | ||
4419 | goto err_free_all; | ||
4420 | |||
4421 | init_css(css, ss, cgrp); | ||
4422 | } | ||
4423 | |||
4424 | /* | 4217 | /* |
4425 | * Create directory. cgroup_create_file() returns with the new | 4218 | * Create directory. cgroup_create_file() returns with the new |
4426 | * directory locked on success so that it can be populated without | 4219 | * directory locked on success so that it can be populated without |
@@ -4428,7 +4221,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4428 | */ | 4221 | */ |
4429 | err = cgroup_create_file(dentry, S_IFDIR | mode, sb); | 4222 | err = cgroup_create_file(dentry, S_IFDIR | mode, sb); |
4430 | if (err < 0) | 4223 | if (err < 0) |
4431 | goto err_free_all; | 4224 | goto err_unlock; |
4432 | lockdep_assert_held(&dentry->d_inode->i_mutex); | 4225 | lockdep_assert_held(&dentry->d_inode->i_mutex); |
4433 | 4226 | ||
4434 | cgrp->serial_nr = cgroup_serial_nr_next++; | 4227 | cgrp->serial_nr = cgroup_serial_nr_next++; |
@@ -4440,55 +4233,31 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4440 | /* hold a ref to the parent's dentry */ | 4233 | /* hold a ref to the parent's dentry */ |
4441 | dget(parent->dentry); | 4234 | dget(parent->dentry); |
4442 | 4235 | ||
4443 | /* creation succeeded, notify subsystems */ | 4236 | /* |
4444 | for_each_root_subsys(root, ss) { | 4237 | * @cgrp is now fully operational. If something fails after this |
4445 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; | 4238 | * point, it'll be released via the normal destruction path. |
4446 | 4239 | */ | |
4447 | err = online_css(css); | ||
4448 | if (err) | ||
4449 | goto err_destroy; | ||
4450 | |||
4451 | /* each css holds a ref to the cgroup's dentry and parent css */ | ||
4452 | dget(dentry); | ||
4453 | css_get(css->parent); | ||
4454 | |||
4455 | /* mark it consumed for error path */ | ||
4456 | css_ar[ss->subsys_id] = NULL; | ||
4457 | |||
4458 | if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && | ||
4459 | parent->parent) { | ||
4460 | pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", | ||
4461 | current->comm, current->pid, ss->name); | ||
4462 | if (!strcmp(ss->name, "memory")) | ||
4463 | pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n"); | ||
4464 | ss->warned_broken_hierarchy = true; | ||
4465 | } | ||
4466 | } | ||
4467 | |||
4468 | idr_replace(&root->cgroup_idr, cgrp, cgrp->id); | 4240 | idr_replace(&root->cgroup_idr, cgrp, cgrp->id); |
4469 | 4241 | ||
4470 | err = cgroup_addrm_files(cgrp, cgroup_base_files, true); | 4242 | err = cgroup_addrm_files(cgrp, cgroup_base_files, true); |
4471 | if (err) | 4243 | if (err) |
4472 | goto err_destroy; | 4244 | goto err_destroy; |
4473 | 4245 | ||
4474 | err = cgroup_populate_dir(cgrp, root->subsys_mask); | 4246 | /* let's create and online css's */ |
4475 | if (err) | 4247 | for_each_subsys(ss, ssid) { |
4476 | goto err_destroy; | 4248 | if (root->subsys_mask & (1 << ssid)) { |
4249 | err = create_css(cgrp, ss); | ||
4250 | if (err) | ||
4251 | goto err_destroy; | ||
4252 | } | ||
4253 | } | ||
4477 | 4254 | ||
4478 | mutex_unlock(&cgroup_mutex); | 4255 | mutex_unlock(&cgroup_mutex); |
4479 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 4256 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
4480 | 4257 | ||
4481 | return 0; | 4258 | return 0; |
4482 | 4259 | ||
4483 | err_free_all: | 4260 | err_unlock: |
4484 | for_each_root_subsys(root, ss) { | ||
4485 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; | ||
4486 | |||
4487 | if (css) { | ||
4488 | percpu_ref_cancel_init(&css->refcnt); | ||
4489 | ss->css_free(css); | ||
4490 | } | ||
4491 | } | ||
4492 | mutex_unlock(&cgroup_mutex); | 4261 | mutex_unlock(&cgroup_mutex); |
4493 | /* Release the reference count that we took on the superblock */ | 4262 | /* Release the reference count that we took on the superblock */ |
4494 | deactivate_super(sb); | 4263 | deactivate_super(sb); |
@@ -4501,14 +4270,6 @@ err_free_cgrp: | |||
4501 | return err; | 4270 | return err; |
4502 | 4271 | ||
4503 | err_destroy: | 4272 | err_destroy: |
4504 | for_each_root_subsys(root, ss) { | ||
4505 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; | ||
4506 | |||
4507 | if (css) { | ||
4508 | percpu_ref_cancel_init(&css->refcnt); | ||
4509 | ss->css_free(css); | ||
4510 | } | ||
4511 | } | ||
4512 | cgroup_destroy_locked(cgrp); | 4273 | cgroup_destroy_locked(cgrp); |
4513 | mutex_unlock(&cgroup_mutex); | 4274 | mutex_unlock(&cgroup_mutex); |
4514 | mutex_unlock(&dentry->d_inode->i_mutex); | 4275 | mutex_unlock(&dentry->d_inode->i_mutex); |
@@ -4631,10 +4392,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4631 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) | 4392 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) |
4632 | { | 4393 | { |
4633 | struct dentry *d = cgrp->dentry; | 4394 | struct dentry *d = cgrp->dentry; |
4634 | struct cgroup_event *event, *tmp; | 4395 | struct cgroup_subsys_state *css; |
4635 | struct cgroup_subsys *ss; | ||
4636 | struct cgroup *child; | 4396 | struct cgroup *child; |
4637 | bool empty; | 4397 | bool empty; |
4398 | int ssid; | ||
4638 | 4399 | ||
4639 | lockdep_assert_held(&d->d_inode->i_mutex); | 4400 | lockdep_assert_held(&d->d_inode->i_mutex); |
4640 | lockdep_assert_held(&cgroup_mutex); | 4401 | lockdep_assert_held(&cgroup_mutex); |
@@ -4670,12 +4431,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4670 | * will be invoked to perform the rest of destruction once the | 4431 | * will be invoked to perform the rest of destruction once the |
4671 | * percpu refs of all css's are confirmed to be killed. | 4432 | * percpu refs of all css's are confirmed to be killed. |
4672 | */ | 4433 | */ |
4673 | for_each_root_subsys(cgrp->root, ss) { | 4434 | for_each_css(css, ssid, cgrp) |
4674 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | 4435 | kill_css(css); |
4675 | |||
4676 | if (css) | ||
4677 | kill_css(css); | ||
4678 | } | ||
4679 | 4436 | ||
4680 | /* | 4437 | /* |
4681 | * Mark @cgrp dead. This prevents further task migration and child | 4438 | * Mark @cgrp dead. This prevents further task migration and child |
@@ -4710,18 +4467,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4710 | dget(d); | 4467 | dget(d); |
4711 | cgroup_d_remove_dir(d); | 4468 | cgroup_d_remove_dir(d); |
4712 | 4469 | ||
4713 | /* | ||
4714 | * Unregister events and notify userspace. | ||
4715 | * Notify userspace about cgroup removing only after rmdir of cgroup | ||
4716 | * directory to avoid race between userspace and kernelspace. | ||
4717 | */ | ||
4718 | spin_lock(&cgrp->event_list_lock); | ||
4719 | list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) { | ||
4720 | list_del_init(&event->list); | ||
4721 | schedule_work(&event->remove); | ||
4722 | } | ||
4723 | spin_unlock(&cgrp->event_list_lock); | ||
4724 | |||
4725 | return 0; | 4470 | return 0; |
4726 | }; | 4471 | }; |
4727 | 4472 | ||
@@ -4792,7 +4537,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
4792 | cgroup_init_cftsets(ss); | 4537 | cgroup_init_cftsets(ss); |
4793 | 4538 | ||
4794 | /* Create the top cgroup state for this subsystem */ | 4539 | /* Create the top cgroup state for this subsystem */ |
4795 | list_add(&ss->sibling, &cgroup_dummy_root.subsys_list); | ||
4796 | ss->root = &cgroup_dummy_root; | 4540 | ss->root = &cgroup_dummy_root; |
4797 | css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss)); | 4541 | css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss)); |
4798 | /* We don't handle early failures gracefully */ | 4542 | /* We don't handle early failures gracefully */ |
@@ -4866,6 +4610,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4866 | cgroup_init_cftsets(ss); | 4610 | cgroup_init_cftsets(ss); |
4867 | 4611 | ||
4868 | mutex_lock(&cgroup_mutex); | 4612 | mutex_lock(&cgroup_mutex); |
4613 | mutex_lock(&cgroup_root_mutex); | ||
4869 | cgroup_subsys[ss->subsys_id] = ss; | 4614 | cgroup_subsys[ss->subsys_id] = ss; |
4870 | 4615 | ||
4871 | /* | 4616 | /* |
@@ -4877,11 +4622,11 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4877 | if (IS_ERR(css)) { | 4622 | if (IS_ERR(css)) { |
4878 | /* failure case - need to deassign the cgroup_subsys[] slot. */ | 4623 | /* failure case - need to deassign the cgroup_subsys[] slot. */ |
4879 | cgroup_subsys[ss->subsys_id] = NULL; | 4624 | cgroup_subsys[ss->subsys_id] = NULL; |
4625 | mutex_unlock(&cgroup_root_mutex); | ||
4880 | mutex_unlock(&cgroup_mutex); | 4626 | mutex_unlock(&cgroup_mutex); |
4881 | return PTR_ERR(css); | 4627 | return PTR_ERR(css); |
4882 | } | 4628 | } |
4883 | 4629 | ||
4884 | list_add(&ss->sibling, &cgroup_dummy_root.subsys_list); | ||
4885 | ss->root = &cgroup_dummy_root; | 4630 | ss->root = &cgroup_dummy_root; |
4886 | 4631 | ||
4887 | /* our new subsystem will be attached to the dummy hierarchy. */ | 4632 | /* our new subsystem will be attached to the dummy hierarchy. */ |
@@ -4911,14 +4656,18 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4911 | write_unlock(&css_set_lock); | 4656 | write_unlock(&css_set_lock); |
4912 | 4657 | ||
4913 | ret = online_css(css); | 4658 | ret = online_css(css); |
4914 | if (ret) | 4659 | if (ret) { |
4660 | ss->css_free(css); | ||
4915 | goto err_unload; | 4661 | goto err_unload; |
4662 | } | ||
4916 | 4663 | ||
4917 | /* success! */ | 4664 | /* success! */ |
4665 | mutex_unlock(&cgroup_root_mutex); | ||
4918 | mutex_unlock(&cgroup_mutex); | 4666 | mutex_unlock(&cgroup_mutex); |
4919 | return 0; | 4667 | return 0; |
4920 | 4668 | ||
4921 | err_unload: | 4669 | err_unload: |
4670 | mutex_unlock(&cgroup_root_mutex); | ||
4922 | mutex_unlock(&cgroup_mutex); | 4671 | mutex_unlock(&cgroup_mutex); |
4923 | /* @ss can't be mounted here as try_module_get() would fail */ | 4672 | /* @ss can't be mounted here as try_module_get() would fail */ |
4924 | cgroup_unload_subsys(ss); | 4673 | cgroup_unload_subsys(ss); |
@@ -4937,6 +4686,7 @@ EXPORT_SYMBOL_GPL(cgroup_load_subsys); | |||
4937 | void cgroup_unload_subsys(struct cgroup_subsys *ss) | 4686 | void cgroup_unload_subsys(struct cgroup_subsys *ss) |
4938 | { | 4687 | { |
4939 | struct cgrp_cset_link *link; | 4688 | struct cgrp_cset_link *link; |
4689 | struct cgroup_subsys_state *css; | ||
4940 | 4690 | ||
4941 | BUG_ON(ss->module == NULL); | 4691 | BUG_ON(ss->module == NULL); |
4942 | 4692 | ||
@@ -4948,15 +4698,15 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
4948 | BUG_ON(ss->root != &cgroup_dummy_root); | 4698 | BUG_ON(ss->root != &cgroup_dummy_root); |
4949 | 4699 | ||
4950 | mutex_lock(&cgroup_mutex); | 4700 | mutex_lock(&cgroup_mutex); |
4701 | mutex_lock(&cgroup_root_mutex); | ||
4951 | 4702 | ||
4952 | offline_css(cgroup_css(cgroup_dummy_top, ss)); | 4703 | css = cgroup_css(cgroup_dummy_top, ss); |
4704 | if (css) | ||
4705 | offline_css(css); | ||
4953 | 4706 | ||
4954 | /* deassign the subsys_id */ | 4707 | /* deassign the subsys_id */ |
4955 | cgroup_subsys[ss->subsys_id] = NULL; | 4708 | cgroup_subsys[ss->subsys_id] = NULL; |
4956 | 4709 | ||
4957 | /* remove subsystem from the dummy root's list of subsystems */ | ||
4958 | list_del_init(&ss->sibling); | ||
4959 | |||
4960 | /* | 4710 | /* |
4961 | * disentangle the css from all css_sets attached to the dummy | 4711 | * disentangle the css from all css_sets attached to the dummy |
4962 | * top. as in loading, we need to pay our respects to the hashtable | 4712 | * top. as in loading, we need to pay our respects to the hashtable |
@@ -4979,9 +4729,11 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
4979 | * need to free before marking as null because ss->css_free needs | 4729 | * need to free before marking as null because ss->css_free needs |
4980 | * the cgrp->subsys pointer to find their state. | 4730 | * the cgrp->subsys pointer to find their state. |
4981 | */ | 4731 | */ |
4982 | ss->css_free(cgroup_css(cgroup_dummy_top, ss)); | 4732 | if (css) |
4733 | ss->css_free(css); | ||
4983 | RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL); | 4734 | RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL); |
4984 | 4735 | ||
4736 | mutex_unlock(&cgroup_root_mutex); | ||
4985 | mutex_unlock(&cgroup_mutex); | 4737 | mutex_unlock(&cgroup_mutex); |
4986 | } | 4738 | } |
4987 | EXPORT_SYMBOL_GPL(cgroup_unload_subsys); | 4739 | EXPORT_SYMBOL_GPL(cgroup_unload_subsys); |
@@ -5100,6 +4852,15 @@ static int __init cgroup_wq_init(void) | |||
5100 | */ | 4852 | */ |
5101 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); | 4853 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); |
5102 | BUG_ON(!cgroup_destroy_wq); | 4854 | BUG_ON(!cgroup_destroy_wq); |
4855 | |||
4856 | /* | ||
4857 | * Used to destroy pidlists and separate to serve as flush domain. | ||
4858 | * Cap @max_active to 1 too. | ||
4859 | */ | ||
4860 | cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy", | ||
4861 | 0, 1); | ||
4862 | BUG_ON(!cgroup_pidlist_destroy_wq); | ||
4863 | |||
5103 | return 0; | 4864 | return 0; |
5104 | } | 4865 | } |
5105 | core_initcall(cgroup_wq_init); | 4866 | core_initcall(cgroup_wq_init); |
@@ -5143,11 +4904,12 @@ int proc_cgroup_show(struct seq_file *m, void *v) | |||
5143 | for_each_active_root(root) { | 4904 | for_each_active_root(root) { |
5144 | struct cgroup_subsys *ss; | 4905 | struct cgroup_subsys *ss; |
5145 | struct cgroup *cgrp; | 4906 | struct cgroup *cgrp; |
5146 | int count = 0; | 4907 | int ssid, count = 0; |
5147 | 4908 | ||
5148 | seq_printf(m, "%d:", root->hierarchy_id); | 4909 | seq_printf(m, "%d:", root->hierarchy_id); |
5149 | for_each_root_subsys(root, ss) | 4910 | for_each_subsys(ss, ssid) |
5150 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); | 4911 | if (root->subsys_mask & (1 << ssid)) |
4912 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); | ||
5151 | if (strlen(root->name)) | 4913 | if (strlen(root->name)) |
5152 | seq_printf(m, "%sname=%s", count ? "," : "", | 4914 | seq_printf(m, "%sname=%s", count ? "," : "", |
5153 | root->name); | 4915 | root->name); |
@@ -5488,16 +5250,16 @@ __setup("cgroup_disable=", cgroup_disable); | |||
5488 | * @dentry: directory dentry of interest | 5250 | * @dentry: directory dentry of interest |
5489 | * @ss: subsystem of interest | 5251 | * @ss: subsystem of interest |
5490 | * | 5252 | * |
5491 | * Must be called under RCU read lock. The caller is responsible for | 5253 | * Must be called under cgroup_mutex or RCU read lock. The caller is |
5492 | * pinning the returned css if it needs to be accessed outside the RCU | 5254 | * responsible for pinning the returned css if it needs to be accessed |
5493 | * critical section. | 5255 | * outside the critical section. |
5494 | */ | 5256 | */ |
5495 | struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, | 5257 | struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, |
5496 | struct cgroup_subsys *ss) | 5258 | struct cgroup_subsys *ss) |
5497 | { | 5259 | { |
5498 | struct cgroup *cgrp; | 5260 | struct cgroup *cgrp; |
5499 | 5261 | ||
5500 | WARN_ON_ONCE(!rcu_read_lock_held()); | 5262 | cgroup_assert_mutex_or_rcu_locked(); |
5501 | 5263 | ||
5502 | /* is @dentry a cgroup dir? */ | 5264 | /* is @dentry a cgroup dir? */ |
5503 | if (!dentry->d_inode || | 5265 | if (!dentry->d_inode || |
@@ -5520,9 +5282,7 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) | |||
5520 | { | 5282 | { |
5521 | struct cgroup *cgrp; | 5283 | struct cgroup *cgrp; |
5522 | 5284 | ||
5523 | rcu_lockdep_assert(rcu_read_lock_held() || | 5285 | cgroup_assert_mutex_or_rcu_locked(); |
5524 | lockdep_is_held(&cgroup_mutex), | ||
5525 | "css_from_id() needs proper protection"); | ||
5526 | 5286 | ||
5527 | cgrp = idr_find(&ss->root->cgroup_idr, id); | 5287 | cgrp = idr_find(&ss->root->cgroup_idr, id); |
5528 | if (cgrp) | 5288 | if (cgrp) |
@@ -5570,9 +5330,7 @@ static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css, | |||
5570 | return count; | 5330 | return count; |
5571 | } | 5331 | } |
5572 | 5332 | ||
5573 | static int current_css_set_cg_links_read(struct cgroup_subsys_state *css, | 5333 | static int current_css_set_cg_links_read(struct seq_file *seq, void *v) |
5574 | struct cftype *cft, | ||
5575 | struct seq_file *seq) | ||
5576 | { | 5334 | { |
5577 | struct cgrp_cset_link *link; | 5335 | struct cgrp_cset_link *link; |
5578 | struct css_set *cset; | 5336 | struct css_set *cset; |
@@ -5597,9 +5355,9 @@ static int current_css_set_cg_links_read(struct cgroup_subsys_state *css, | |||
5597 | } | 5355 | } |
5598 | 5356 | ||
5599 | #define MAX_TASKS_SHOWN_PER_CSS 25 | 5357 | #define MAX_TASKS_SHOWN_PER_CSS 25 |
5600 | static int cgroup_css_links_read(struct cgroup_subsys_state *css, | 5358 | static int cgroup_css_links_read(struct seq_file *seq, void *v) |
5601 | struct cftype *cft, struct seq_file *seq) | ||
5602 | { | 5359 | { |
5360 | struct cgroup_subsys_state *css = seq_css(seq); | ||
5603 | struct cgrp_cset_link *link; | 5361 | struct cgrp_cset_link *link; |
5604 | 5362 | ||
5605 | read_lock(&css_set_lock); | 5363 | read_lock(&css_set_lock); |
@@ -5645,12 +5403,12 @@ static struct cftype debug_files[] = { | |||
5645 | 5403 | ||
5646 | { | 5404 | { |
5647 | .name = "current_css_set_cg_links", | 5405 | .name = "current_css_set_cg_links", |
5648 | .read_seq_string = current_css_set_cg_links_read, | 5406 | .seq_show = current_css_set_cg_links_read, |
5649 | }, | 5407 | }, |
5650 | 5408 | ||
5651 | { | 5409 | { |
5652 | .name = "cgroup_css_links", | 5410 | .name = "cgroup_css_links", |
5653 | .read_seq_string = cgroup_css_links_read, | 5411 | .seq_show = cgroup_css_links_read, |
5654 | }, | 5412 | }, |
5655 | 5413 | ||
5656 | { | 5414 | { |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index f0ff64d0ebaa..6c3154e477f6 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -301,10 +301,9 @@ out_unlock: | |||
301 | spin_unlock_irq(&freezer->lock); | 301 | spin_unlock_irq(&freezer->lock); |
302 | } | 302 | } |
303 | 303 | ||
304 | static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft, | 304 | static int freezer_read(struct seq_file *m, void *v) |
305 | struct seq_file *m) | ||
306 | { | 305 | { |
307 | struct cgroup_subsys_state *pos; | 306 | struct cgroup_subsys_state *css = seq_css(m), *pos; |
308 | 307 | ||
309 | rcu_read_lock(); | 308 | rcu_read_lock(); |
310 | 309 | ||
@@ -458,7 +457,7 @@ static struct cftype files[] = { | |||
458 | { | 457 | { |
459 | .name = "state", | 458 | .name = "state", |
460 | .flags = CFTYPE_NOT_ON_ROOT, | 459 | .flags = CFTYPE_NOT_ON_ROOT, |
461 | .read_seq_string = freezer_read, | 460 | .seq_show = freezer_read, |
462 | .write_string = freezer_write, | 461 | .write_string = freezer_write, |
463 | }, | 462 | }, |
464 | { | 463 | { |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4772034b4b17..4410ac6a55f1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1731,66 +1731,41 @@ out_unlock: | |||
1731 | * used, list of ranges of sequential numbers, is variable length, | 1731 | * used, list of ranges of sequential numbers, is variable length, |
1732 | * and since these maps can change value dynamically, one could read | 1732 | * and since these maps can change value dynamically, one could read |
1733 | * gibberish by doing partial reads while a list was changing. | 1733 | * gibberish by doing partial reads while a list was changing. |
1734 | * A single large read to a buffer that crosses a page boundary is | ||
1735 | * ok, because the result being copied to user land is not recomputed | ||
1736 | * across a page fault. | ||
1737 | */ | 1734 | */ |
1738 | 1735 | static int cpuset_common_seq_show(struct seq_file *sf, void *v) | |
1739 | static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs) | ||
1740 | { | 1736 | { |
1741 | size_t count; | 1737 | struct cpuset *cs = css_cs(seq_css(sf)); |
1742 | 1738 | cpuset_filetype_t type = seq_cft(sf)->private; | |
1743 | mutex_lock(&callback_mutex); | 1739 | ssize_t count; |
1744 | count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); | 1740 | char *buf, *s; |
1745 | mutex_unlock(&callback_mutex); | 1741 | int ret = 0; |
1746 | 1742 | ||
1747 | return count; | 1743 | count = seq_get_buf(sf, &buf); |
1748 | } | 1744 | s = buf; |
1749 | |||
1750 | static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs) | ||
1751 | { | ||
1752 | size_t count; | ||
1753 | 1745 | ||
1754 | mutex_lock(&callback_mutex); | 1746 | mutex_lock(&callback_mutex); |
1755 | count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed); | ||
1756 | mutex_unlock(&callback_mutex); | ||
1757 | |||
1758 | return count; | ||
1759 | } | ||
1760 | |||
1761 | static ssize_t cpuset_common_file_read(struct cgroup_subsys_state *css, | ||
1762 | struct cftype *cft, struct file *file, | ||
1763 | char __user *buf, size_t nbytes, | ||
1764 | loff_t *ppos) | ||
1765 | { | ||
1766 | struct cpuset *cs = css_cs(css); | ||
1767 | cpuset_filetype_t type = cft->private; | ||
1768 | char *page; | ||
1769 | ssize_t retval = 0; | ||
1770 | char *s; | ||
1771 | |||
1772 | if (!(page = (char *)__get_free_page(GFP_TEMPORARY))) | ||
1773 | return -ENOMEM; | ||
1774 | |||
1775 | s = page; | ||
1776 | 1747 | ||
1777 | switch (type) { | 1748 | switch (type) { |
1778 | case FILE_CPULIST: | 1749 | case FILE_CPULIST: |
1779 | s += cpuset_sprintf_cpulist(s, cs); | 1750 | s += cpulist_scnprintf(s, count, cs->cpus_allowed); |
1780 | break; | 1751 | break; |
1781 | case FILE_MEMLIST: | 1752 | case FILE_MEMLIST: |
1782 | s += cpuset_sprintf_memlist(s, cs); | 1753 | s += nodelist_scnprintf(s, count, cs->mems_allowed); |
1783 | break; | 1754 | break; |
1784 | default: | 1755 | default: |
1785 | retval = -EINVAL; | 1756 | ret = -EINVAL; |
1786 | goto out; | 1757 | goto out_unlock; |
1787 | } | 1758 | } |
1788 | *s++ = '\n'; | ||
1789 | 1759 | ||
1790 | retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page); | 1760 | if (s < buf + count - 1) { |
1791 | out: | 1761 | *s++ = '\n'; |
1792 | free_page((unsigned long)page); | 1762 | seq_commit(sf, s - buf); |
1793 | return retval; | 1763 | } else { |
1764 | seq_commit(sf, -1); | ||
1765 | } | ||
1766 | out_unlock: | ||
1767 | mutex_unlock(&callback_mutex); | ||
1768 | return ret; | ||
1794 | } | 1769 | } |
1795 | 1770 | ||
1796 | static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft) | 1771 | static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft) |
@@ -1847,7 +1822,7 @@ static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) | |||
1847 | static struct cftype files[] = { | 1822 | static struct cftype files[] = { |
1848 | { | 1823 | { |
1849 | .name = "cpus", | 1824 | .name = "cpus", |
1850 | .read = cpuset_common_file_read, | 1825 | .seq_show = cpuset_common_seq_show, |
1851 | .write_string = cpuset_write_resmask, | 1826 | .write_string = cpuset_write_resmask, |
1852 | .max_write_len = (100U + 6 * NR_CPUS), | 1827 | .max_write_len = (100U + 6 * NR_CPUS), |
1853 | .private = FILE_CPULIST, | 1828 | .private = FILE_CPULIST, |
@@ -1855,7 +1830,7 @@ static struct cftype files[] = { | |||
1855 | 1830 | ||
1856 | { | 1831 | { |
1857 | .name = "mems", | 1832 | .name = "mems", |
1858 | .read = cpuset_common_file_read, | 1833 | .seq_show = cpuset_common_seq_show, |
1859 | .write_string = cpuset_write_resmask, | 1834 | .write_string = cpuset_write_resmask, |
1860 | .max_write_len = (100U + 6 * MAX_NUMNODES), | 1835 | .max_write_len = (100U + 6 * MAX_NUMNODES), |
1861 | .private = FILE_MEMLIST, | 1836 | .private = FILE_MEMLIST, |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5ae36cc11fe5..4d6964e49711 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -7854,15 +7854,14 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) | |||
7854 | return ret; | 7854 | return ret; |
7855 | } | 7855 | } |
7856 | 7856 | ||
7857 | static int cpu_stats_show(struct cgroup_subsys_state *css, struct cftype *cft, | 7857 | static int cpu_stats_show(struct seq_file *sf, void *v) |
7858 | struct cgroup_map_cb *cb) | ||
7859 | { | 7858 | { |
7860 | struct task_group *tg = css_tg(css); | 7859 | struct task_group *tg = css_tg(seq_css(sf)); |
7861 | struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; | 7860 | struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; |
7862 | 7861 | ||
7863 | cb->fill(cb, "nr_periods", cfs_b->nr_periods); | 7862 | seq_printf(sf, "nr_periods %d\n", cfs_b->nr_periods); |
7864 | cb->fill(cb, "nr_throttled", cfs_b->nr_throttled); | 7863 | seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled); |
7865 | cb->fill(cb, "throttled_time", cfs_b->throttled_time); | 7864 | seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time); |
7866 | 7865 | ||
7867 | return 0; | 7866 | return 0; |
7868 | } | 7867 | } |
@@ -7916,7 +7915,7 @@ static struct cftype cpu_files[] = { | |||
7916 | }, | 7915 | }, |
7917 | { | 7916 | { |
7918 | .name = "stat", | 7917 | .name = "stat", |
7919 | .read_map = cpu_stats_show, | 7918 | .seq_show = cpu_stats_show, |
7920 | }, | 7919 | }, |
7921 | #endif | 7920 | #endif |
7922 | #ifdef CONFIG_RT_GROUP_SCHED | 7921 | #ifdef CONFIG_RT_GROUP_SCHED |
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index f64722ff0299..622e0818f905 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c | |||
@@ -163,10 +163,9 @@ out: | |||
163 | return err; | 163 | return err; |
164 | } | 164 | } |
165 | 165 | ||
166 | static int cpuacct_percpu_seq_read(struct cgroup_subsys_state *css, | 166 | static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) |
167 | struct cftype *cft, struct seq_file *m) | ||
168 | { | 167 | { |
169 | struct cpuacct *ca = css_ca(css); | 168 | struct cpuacct *ca = css_ca(seq_css(m)); |
170 | u64 percpu; | 169 | u64 percpu; |
171 | int i; | 170 | int i; |
172 | 171 | ||
@@ -183,10 +182,9 @@ static const char * const cpuacct_stat_desc[] = { | |||
183 | [CPUACCT_STAT_SYSTEM] = "system", | 182 | [CPUACCT_STAT_SYSTEM] = "system", |
184 | }; | 183 | }; |
185 | 184 | ||
186 | static int cpuacct_stats_show(struct cgroup_subsys_state *css, | 185 | static int cpuacct_stats_show(struct seq_file *sf, void *v) |
187 | struct cftype *cft, struct cgroup_map_cb *cb) | ||
188 | { | 186 | { |
189 | struct cpuacct *ca = css_ca(css); | 187 | struct cpuacct *ca = css_ca(seq_css(sf)); |
190 | int cpu; | 188 | int cpu; |
191 | s64 val = 0; | 189 | s64 val = 0; |
192 | 190 | ||
@@ -196,7 +194,7 @@ static int cpuacct_stats_show(struct cgroup_subsys_state *css, | |||
196 | val += kcpustat->cpustat[CPUTIME_NICE]; | 194 | val += kcpustat->cpustat[CPUTIME_NICE]; |
197 | } | 195 | } |
198 | val = cputime64_to_clock_t(val); | 196 | val = cputime64_to_clock_t(val); |
199 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val); | 197 | seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val); |
200 | 198 | ||
201 | val = 0; | 199 | val = 0; |
202 | for_each_online_cpu(cpu) { | 200 | for_each_online_cpu(cpu) { |
@@ -207,7 +205,7 @@ static int cpuacct_stats_show(struct cgroup_subsys_state *css, | |||
207 | } | 205 | } |
208 | 206 | ||
209 | val = cputime64_to_clock_t(val); | 207 | val = cputime64_to_clock_t(val); |
210 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); | 208 | seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); |
211 | 209 | ||
212 | return 0; | 210 | return 0; |
213 | } | 211 | } |
@@ -220,11 +218,11 @@ static struct cftype files[] = { | |||
220 | }, | 218 | }, |
221 | { | 219 | { |
222 | .name = "usage_percpu", | 220 | .name = "usage_percpu", |
223 | .read_seq_string = cpuacct_percpu_seq_read, | 221 | .seq_show = cpuacct_percpu_seq_show, |
224 | }, | 222 | }, |
225 | { | 223 | { |
226 | .name = "stat", | 224 | .name = "stat", |
227 | .read_map = cpuacct_stats_show, | 225 | .seq_show = cpuacct_stats_show, |
228 | }, | 226 | }, |
229 | { } /* terminate */ | 227 | { } /* terminate */ |
230 | }; | 228 | }; |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b010eac595d2..82ef9f3b7473 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -4789,6 +4789,7 @@ static int workqueue_cpu_down_callback(struct notifier_block *nfb, | |||
4789 | 4789 | ||
4790 | /* wait for per-cpu unbinding to finish */ | 4790 | /* wait for per-cpu unbinding to finish */ |
4791 | flush_work(&unbind_work); | 4791 | flush_work(&unbind_work); |
4792 | destroy_work_on_stack(&unbind_work); | ||
4792 | break; | 4793 | break; |
4793 | } | 4794 | } |
4794 | return NOTIFY_OK; | 4795 | return NOTIFY_OK; |
@@ -4828,6 +4829,7 @@ long work_on_cpu(int cpu, long (*fn)(void *), void *arg) | |||
4828 | INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); | 4829 | INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); |
4829 | schedule_work_on(cpu, &wfc.work); | 4830 | schedule_work_on(cpu, &wfc.work); |
4830 | flush_work(&wfc.work); | 4831 | flush_work(&wfc.work); |
4832 | destroy_work_on_stack(&wfc.work); | ||
4831 | return wfc.ret; | 4833 | return wfc.ret; |
4832 | } | 4834 | } |
4833 | EXPORT_SYMBOL_GPL(work_on_cpu); | 4835 | EXPORT_SYMBOL_GPL(work_on_cpu); |
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 1a53d497a8c5..963b7034a51b 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c | |||
@@ -120,6 +120,9 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu) | |||
120 | 120 | ||
121 | atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count); | 121 | atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count); |
122 | 122 | ||
123 | WARN_ONCE(atomic_read(&ref->count) <= 0, "percpu ref <= 0 (%i)", | ||
124 | atomic_read(&ref->count)); | ||
125 | |||
123 | /* @ref is viewed as dead on all CPUs, send out kill confirmation */ | 126 | /* @ref is viewed as dead on all CPUs, send out kill confirmation */ |
124 | if (ref->confirm_kill) | 127 | if (ref->confirm_kill) |
125 | ref->confirm_kill(ref); | 128 | ref->confirm_kill(ref); |
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index bda8e44f6fde..d747a84e09b0 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c | |||
@@ -242,22 +242,16 @@ void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, | |||
242 | return; | 242 | return; |
243 | } | 243 | } |
244 | 244 | ||
245 | static ssize_t hugetlb_cgroup_read(struct cgroup_subsys_state *css, | 245 | static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, |
246 | struct cftype *cft, struct file *file, | 246 | struct cftype *cft) |
247 | char __user *buf, size_t nbytes, | ||
248 | loff_t *ppos) | ||
249 | { | 247 | { |
250 | u64 val; | 248 | int idx, name; |
251 | char str[64]; | ||
252 | int idx, name, len; | ||
253 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); | 249 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); |
254 | 250 | ||
255 | idx = MEMFILE_IDX(cft->private); | 251 | idx = MEMFILE_IDX(cft->private); |
256 | name = MEMFILE_ATTR(cft->private); | 252 | name = MEMFILE_ATTR(cft->private); |
257 | 253 | ||
258 | val = res_counter_read_u64(&h_cg->hugepage[idx], name); | 254 | return res_counter_read_u64(&h_cg->hugepage[idx], name); |
259 | len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val); | ||
260 | return simple_read_from_buffer(buf, nbytes, ppos, str, len); | ||
261 | } | 255 | } |
262 | 256 | ||
263 | static int hugetlb_cgroup_write(struct cgroup_subsys_state *css, | 257 | static int hugetlb_cgroup_write(struct cgroup_subsys_state *css, |
@@ -337,28 +331,28 @@ static void __init __hugetlb_cgroup_file_init(int idx) | |||
337 | cft = &h->cgroup_files[0]; | 331 | cft = &h->cgroup_files[0]; |
338 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); | 332 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); |
339 | cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); | 333 | cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); |
340 | cft->read = hugetlb_cgroup_read; | 334 | cft->read_u64 = hugetlb_cgroup_read_u64; |
341 | cft->write_string = hugetlb_cgroup_write; | 335 | cft->write_string = hugetlb_cgroup_write; |
342 | 336 | ||
343 | /* Add the usage file */ | 337 | /* Add the usage file */ |
344 | cft = &h->cgroup_files[1]; | 338 | cft = &h->cgroup_files[1]; |
345 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); | 339 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); |
346 | cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); | 340 | cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); |
347 | cft->read = hugetlb_cgroup_read; | 341 | cft->read_u64 = hugetlb_cgroup_read_u64; |
348 | 342 | ||
349 | /* Add the MAX usage file */ | 343 | /* Add the MAX usage file */ |
350 | cft = &h->cgroup_files[2]; | 344 | cft = &h->cgroup_files[2]; |
351 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); | 345 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); |
352 | cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); | 346 | cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); |
353 | cft->trigger = hugetlb_cgroup_reset; | 347 | cft->trigger = hugetlb_cgroup_reset; |
354 | cft->read = hugetlb_cgroup_read; | 348 | cft->read_u64 = hugetlb_cgroup_read_u64; |
355 | 349 | ||
356 | /* Add the failcntfile */ | 350 | /* Add the failcntfile */ |
357 | cft = &h->cgroup_files[3]; | 351 | cft = &h->cgroup_files[3]; |
358 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); | 352 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); |
359 | cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); | 353 | cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); |
360 | cft->trigger = hugetlb_cgroup_reset; | 354 | cft->trigger = hugetlb_cgroup_reset; |
361 | cft->read = hugetlb_cgroup_read; | 355 | cft->read_u64 = hugetlb_cgroup_read_u64; |
362 | 356 | ||
363 | /* NULL terminate the last cft */ | 357 | /* NULL terminate the last cft */ |
364 | cft = &h->cgroup_files[4]; | 358 | cft = &h->cgroup_files[4]; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 57b16083f046..67dd2a881433 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/swapops.h> | 45 | #include <linux/swapops.h> |
46 | #include <linux/spinlock.h> | 46 | #include <linux/spinlock.h> |
47 | #include <linux/eventfd.h> | 47 | #include <linux/eventfd.h> |
48 | #include <linux/poll.h> | ||
48 | #include <linux/sort.h> | 49 | #include <linux/sort.h> |
49 | #include <linux/fs.h> | 50 | #include <linux/fs.h> |
50 | #include <linux/seq_file.h> | 51 | #include <linux/seq_file.h> |
@@ -55,6 +56,7 @@ | |||
55 | #include <linux/cpu.h> | 56 | #include <linux/cpu.h> |
56 | #include <linux/oom.h> | 57 | #include <linux/oom.h> |
57 | #include <linux/lockdep.h> | 58 | #include <linux/lockdep.h> |
59 | #include <linux/file.h> | ||
58 | #include "internal.h" | 60 | #include "internal.h" |
59 | #include <net/sock.h> | 61 | #include <net/sock.h> |
60 | #include <net/ip.h> | 62 | #include <net/ip.h> |
@@ -227,6 +229,46 @@ struct mem_cgroup_eventfd_list { | |||
227 | struct eventfd_ctx *eventfd; | 229 | struct eventfd_ctx *eventfd; |
228 | }; | 230 | }; |
229 | 231 | ||
232 | /* | ||
233 | * cgroup_event represents events which userspace want to receive. | ||
234 | */ | ||
235 | struct mem_cgroup_event { | ||
236 | /* | ||
237 | * memcg which the event belongs to. | ||
238 | */ | ||
239 | struct mem_cgroup *memcg; | ||
240 | /* | ||
241 | * eventfd to signal userspace about the event. | ||
242 | */ | ||
243 | struct eventfd_ctx *eventfd; | ||
244 | /* | ||
245 | * Each of these stored in a list by the cgroup. | ||
246 | */ | ||
247 | struct list_head list; | ||
248 | /* | ||
249 | * register_event() callback will be used to add new userspace | ||
250 | * waiter for changes related to this event. Use eventfd_signal() | ||
251 | * on eventfd to send notification to userspace. | ||
252 | */ | ||
253 | int (*register_event)(struct mem_cgroup *memcg, | ||
254 | struct eventfd_ctx *eventfd, const char *args); | ||
255 | /* | ||
256 | * unregister_event() callback will be called when userspace closes | ||
257 | * the eventfd or on cgroup removing. This callback must be set, | ||
258 | * if you want provide notification functionality. | ||
259 | */ | ||
260 | void (*unregister_event)(struct mem_cgroup *memcg, | ||
261 | struct eventfd_ctx *eventfd); | ||
262 | /* | ||
263 | * All fields below needed to unregister event when | ||
264 | * userspace closes eventfd. | ||
265 | */ | ||
266 | poll_table pt; | ||
267 | wait_queue_head_t *wqh; | ||
268 | wait_queue_t wait; | ||
269 | struct work_struct remove; | ||
270 | }; | ||
271 | |||
230 | static void mem_cgroup_threshold(struct mem_cgroup *memcg); | 272 | static void mem_cgroup_threshold(struct mem_cgroup *memcg); |
231 | static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); | 273 | static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); |
232 | 274 | ||
@@ -331,6 +373,10 @@ struct mem_cgroup { | |||
331 | atomic_t numainfo_updating; | 373 | atomic_t numainfo_updating; |
332 | #endif | 374 | #endif |
333 | 375 | ||
376 | /* List of events which userspace want to receive */ | ||
377 | struct list_head event_list; | ||
378 | spinlock_t event_list_lock; | ||
379 | |||
334 | struct mem_cgroup_per_node *nodeinfo[0]; | 380 | struct mem_cgroup_per_node *nodeinfo[0]; |
335 | /* WARNING: nodeinfo must be the last member here */ | 381 | /* WARNING: nodeinfo must be the last member here */ |
336 | }; | 382 | }; |
@@ -490,11 +536,6 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr) | |||
490 | return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; | 536 | return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; |
491 | } | 537 | } |
492 | 538 | ||
493 | struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css) | ||
494 | { | ||
495 | return &mem_cgroup_from_css(css)->vmpressure; | ||
496 | } | ||
497 | |||
498 | static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) | 539 | static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) |
499 | { | 540 | { |
500 | return (memcg == root_mem_cgroup); | 541 | return (memcg == root_mem_cgroup); |
@@ -2979,10 +3020,9 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p) | |||
2979 | } | 3020 | } |
2980 | 3021 | ||
2981 | #ifdef CONFIG_SLABINFO | 3022 | #ifdef CONFIG_SLABINFO |
2982 | static int mem_cgroup_slabinfo_read(struct cgroup_subsys_state *css, | 3023 | static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v) |
2983 | struct cftype *cft, struct seq_file *m) | ||
2984 | { | 3024 | { |
2985 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 3025 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
2986 | struct memcg_cache_params *params; | 3026 | struct memcg_cache_params *params; |
2987 | 3027 | ||
2988 | if (!memcg_can_account_kmem(memcg)) | 3028 | if (!memcg_can_account_kmem(memcg)) |
@@ -5115,14 +5155,12 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) | |||
5115 | return val << PAGE_SHIFT; | 5155 | return val << PAGE_SHIFT; |
5116 | } | 5156 | } |
5117 | 5157 | ||
5118 | static ssize_t mem_cgroup_read(struct cgroup_subsys_state *css, | 5158 | static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, |
5119 | struct cftype *cft, struct file *file, | 5159 | struct cftype *cft) |
5120 | char __user *buf, size_t nbytes, loff_t *ppos) | ||
5121 | { | 5160 | { |
5122 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5161 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5123 | char str[64]; | ||
5124 | u64 val; | 5162 | u64 val; |
5125 | int name, len; | 5163 | int name; |
5126 | enum res_type type; | 5164 | enum res_type type; |
5127 | 5165 | ||
5128 | type = MEMFILE_TYPE(cft->private); | 5166 | type = MEMFILE_TYPE(cft->private); |
@@ -5148,8 +5186,7 @@ static ssize_t mem_cgroup_read(struct cgroup_subsys_state *css, | |||
5148 | BUG(); | 5186 | BUG(); |
5149 | } | 5187 | } |
5150 | 5188 | ||
5151 | len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val); | 5189 | return val; |
5152 | return simple_read_from_buffer(buf, nbytes, ppos, str, len); | ||
5153 | } | 5190 | } |
5154 | 5191 | ||
5155 | static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) | 5192 | static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) |
@@ -5386,8 +5423,7 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, | |||
5386 | #endif | 5423 | #endif |
5387 | 5424 | ||
5388 | #ifdef CONFIG_NUMA | 5425 | #ifdef CONFIG_NUMA |
5389 | static int memcg_numa_stat_show(struct cgroup_subsys_state *css, | 5426 | static int memcg_numa_stat_show(struct seq_file *m, void *v) |
5390 | struct cftype *cft, struct seq_file *m) | ||
5391 | { | 5427 | { |
5392 | struct numa_stat { | 5428 | struct numa_stat { |
5393 | const char *name; | 5429 | const char *name; |
@@ -5403,7 +5439,7 @@ static int memcg_numa_stat_show(struct cgroup_subsys_state *css, | |||
5403 | const struct numa_stat *stat; | 5439 | const struct numa_stat *stat; |
5404 | int nid; | 5440 | int nid; |
5405 | unsigned long nr; | 5441 | unsigned long nr; |
5406 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5442 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
5407 | 5443 | ||
5408 | for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { | 5444 | for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { |
5409 | nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask); | 5445 | nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask); |
@@ -5442,10 +5478,9 @@ static inline void mem_cgroup_lru_names_not_uptodate(void) | |||
5442 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); | 5478 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); |
5443 | } | 5479 | } |
5444 | 5480 | ||
5445 | static int memcg_stat_show(struct cgroup_subsys_state *css, struct cftype *cft, | 5481 | static int memcg_stat_show(struct seq_file *m, void *v) |
5446 | struct seq_file *m) | ||
5447 | { | 5482 | { |
5448 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5483 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
5449 | struct mem_cgroup *mi; | 5484 | struct mem_cgroup *mi; |
5450 | unsigned int i; | 5485 | unsigned int i; |
5451 | 5486 | ||
@@ -5654,13 +5689,11 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg) | |||
5654 | mem_cgroup_oom_notify_cb(iter); | 5689 | mem_cgroup_oom_notify_cb(iter); |
5655 | } | 5690 | } |
5656 | 5691 | ||
5657 | static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css, | 5692 | static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, |
5658 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) | 5693 | struct eventfd_ctx *eventfd, const char *args, enum res_type type) |
5659 | { | 5694 | { |
5660 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
5661 | struct mem_cgroup_thresholds *thresholds; | 5695 | struct mem_cgroup_thresholds *thresholds; |
5662 | struct mem_cgroup_threshold_ary *new; | 5696 | struct mem_cgroup_threshold_ary *new; |
5663 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
5664 | u64 threshold, usage; | 5697 | u64 threshold, usage; |
5665 | int i, size, ret; | 5698 | int i, size, ret; |
5666 | 5699 | ||
@@ -5737,13 +5770,23 @@ unlock: | |||
5737 | return ret; | 5770 | return ret; |
5738 | } | 5771 | } |
5739 | 5772 | ||
5740 | static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css, | 5773 | static int mem_cgroup_usage_register_event(struct mem_cgroup *memcg, |
5741 | struct cftype *cft, struct eventfd_ctx *eventfd) | 5774 | struct eventfd_ctx *eventfd, const char *args) |
5775 | { | ||
5776 | return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEM); | ||
5777 | } | ||
5778 | |||
5779 | static int memsw_cgroup_usage_register_event(struct mem_cgroup *memcg, | ||
5780 | struct eventfd_ctx *eventfd, const char *args) | ||
5781 | { | ||
5782 | return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEMSWAP); | ||
5783 | } | ||
5784 | |||
5785 | static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, | ||
5786 | struct eventfd_ctx *eventfd, enum res_type type) | ||
5742 | { | 5787 | { |
5743 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
5744 | struct mem_cgroup_thresholds *thresholds; | 5788 | struct mem_cgroup_thresholds *thresholds; |
5745 | struct mem_cgroup_threshold_ary *new; | 5789 | struct mem_cgroup_threshold_ary *new; |
5746 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
5747 | u64 usage; | 5790 | u64 usage; |
5748 | int i, j, size; | 5791 | int i, j, size; |
5749 | 5792 | ||
@@ -5816,14 +5859,23 @@ unlock: | |||
5816 | mutex_unlock(&memcg->thresholds_lock); | 5859 | mutex_unlock(&memcg->thresholds_lock); |
5817 | } | 5860 | } |
5818 | 5861 | ||
5819 | static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, | 5862 | static void mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, |
5820 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) | 5863 | struct eventfd_ctx *eventfd) |
5864 | { | ||
5865 | return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEM); | ||
5866 | } | ||
5867 | |||
5868 | static void memsw_cgroup_usage_unregister_event(struct mem_cgroup *memcg, | ||
5869 | struct eventfd_ctx *eventfd) | ||
5870 | { | ||
5871 | return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEMSWAP); | ||
5872 | } | ||
5873 | |||
5874 | static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg, | ||
5875 | struct eventfd_ctx *eventfd, const char *args) | ||
5821 | { | 5876 | { |
5822 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
5823 | struct mem_cgroup_eventfd_list *event; | 5877 | struct mem_cgroup_eventfd_list *event; |
5824 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
5825 | 5878 | ||
5826 | BUG_ON(type != _OOM_TYPE); | ||
5827 | event = kmalloc(sizeof(*event), GFP_KERNEL); | 5879 | event = kmalloc(sizeof(*event), GFP_KERNEL); |
5828 | if (!event) | 5880 | if (!event) |
5829 | return -ENOMEM; | 5881 | return -ENOMEM; |
@@ -5841,14 +5893,10 @@ static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, | |||
5841 | return 0; | 5893 | return 0; |
5842 | } | 5894 | } |
5843 | 5895 | ||
5844 | static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css, | 5896 | static void mem_cgroup_oom_unregister_event(struct mem_cgroup *memcg, |
5845 | struct cftype *cft, struct eventfd_ctx *eventfd) | 5897 | struct eventfd_ctx *eventfd) |
5846 | { | 5898 | { |
5847 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
5848 | struct mem_cgroup_eventfd_list *ev, *tmp; | 5899 | struct mem_cgroup_eventfd_list *ev, *tmp; |
5849 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
5850 | |||
5851 | BUG_ON(type != _OOM_TYPE); | ||
5852 | 5900 | ||
5853 | spin_lock(&memcg_oom_lock); | 5901 | spin_lock(&memcg_oom_lock); |
5854 | 5902 | ||
@@ -5862,17 +5910,12 @@ static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css, | |||
5862 | spin_unlock(&memcg_oom_lock); | 5910 | spin_unlock(&memcg_oom_lock); |
5863 | } | 5911 | } |
5864 | 5912 | ||
5865 | static int mem_cgroup_oom_control_read(struct cgroup_subsys_state *css, | 5913 | static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) |
5866 | struct cftype *cft, struct cgroup_map_cb *cb) | ||
5867 | { | 5914 | { |
5868 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5915 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(sf)); |
5869 | |||
5870 | cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable); | ||
5871 | 5916 | ||
5872 | if (atomic_read(&memcg->under_oom)) | 5917 | seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable); |
5873 | cb->fill(cb, "under_oom", 1); | 5918 | seq_printf(sf, "under_oom %d\n", (bool)atomic_read(&memcg->under_oom)); |
5874 | else | ||
5875 | cb->fill(cb, "under_oom", 0); | ||
5876 | return 0; | 5919 | return 0; |
5877 | } | 5920 | } |
5878 | 5921 | ||
@@ -5965,41 +6008,261 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) | |||
5965 | } | 6008 | } |
5966 | #endif | 6009 | #endif |
5967 | 6010 | ||
6011 | /* | ||
6012 | * DO NOT USE IN NEW FILES. | ||
6013 | * | ||
6014 | * "cgroup.event_control" implementation. | ||
6015 | * | ||
6016 | * This is way over-engineered. It tries to support fully configurable | ||
6017 | * events for each user. Such level of flexibility is completely | ||
6018 | * unnecessary especially in the light of the planned unified hierarchy. | ||
6019 | * | ||
6020 | * Please deprecate this and replace with something simpler if at all | ||
6021 | * possible. | ||
6022 | */ | ||
6023 | |||
6024 | /* | ||
6025 | * Unregister event and free resources. | ||
6026 | * | ||
6027 | * Gets called from workqueue. | ||
6028 | */ | ||
6029 | static void memcg_event_remove(struct work_struct *work) | ||
6030 | { | ||
6031 | struct mem_cgroup_event *event = | ||
6032 | container_of(work, struct mem_cgroup_event, remove); | ||
6033 | struct mem_cgroup *memcg = event->memcg; | ||
6034 | |||
6035 | remove_wait_queue(event->wqh, &event->wait); | ||
6036 | |||
6037 | event->unregister_event(memcg, event->eventfd); | ||
6038 | |||
6039 | /* Notify userspace the event is going away. */ | ||
6040 | eventfd_signal(event->eventfd, 1); | ||
6041 | |||
6042 | eventfd_ctx_put(event->eventfd); | ||
6043 | kfree(event); | ||
6044 | css_put(&memcg->css); | ||
6045 | } | ||
6046 | |||
6047 | /* | ||
6048 | * Gets called on POLLHUP on eventfd when user closes it. | ||
6049 | * | ||
6050 | * Called with wqh->lock held and interrupts disabled. | ||
6051 | */ | ||
6052 | static int memcg_event_wake(wait_queue_t *wait, unsigned mode, | ||
6053 | int sync, void *key) | ||
6054 | { | ||
6055 | struct mem_cgroup_event *event = | ||
6056 | container_of(wait, struct mem_cgroup_event, wait); | ||
6057 | struct mem_cgroup *memcg = event->memcg; | ||
6058 | unsigned long flags = (unsigned long)key; | ||
6059 | |||
6060 | if (flags & POLLHUP) { | ||
6061 | /* | ||
6062 | * If the event has been detached at cgroup removal, we | ||
6063 | * can simply return knowing the other side will cleanup | ||
6064 | * for us. | ||
6065 | * | ||
6066 | * We can't race against event freeing since the other | ||
6067 | * side will require wqh->lock via remove_wait_queue(), | ||
6068 | * which we hold. | ||
6069 | */ | ||
6070 | spin_lock(&memcg->event_list_lock); | ||
6071 | if (!list_empty(&event->list)) { | ||
6072 | list_del_init(&event->list); | ||
6073 | /* | ||
6074 | * We are in atomic context, but cgroup_event_remove() | ||
6075 | * may sleep, so we have to call it in workqueue. | ||
6076 | */ | ||
6077 | schedule_work(&event->remove); | ||
6078 | } | ||
6079 | spin_unlock(&memcg->event_list_lock); | ||
6080 | } | ||
6081 | |||
6082 | return 0; | ||
6083 | } | ||
6084 | |||
6085 | static void memcg_event_ptable_queue_proc(struct file *file, | ||
6086 | wait_queue_head_t *wqh, poll_table *pt) | ||
6087 | { | ||
6088 | struct mem_cgroup_event *event = | ||
6089 | container_of(pt, struct mem_cgroup_event, pt); | ||
6090 | |||
6091 | event->wqh = wqh; | ||
6092 | add_wait_queue(wqh, &event->wait); | ||
6093 | } | ||
6094 | |||
6095 | /* | ||
6096 | * DO NOT USE IN NEW FILES. | ||
6097 | * | ||
6098 | * Parse input and register new cgroup event handler. | ||
6099 | * | ||
6100 | * Input must be in format '<event_fd> <control_fd> <args>'. | ||
6101 | * Interpretation of args is defined by control file implementation. | ||
6102 | */ | ||
6103 | static int memcg_write_event_control(struct cgroup_subsys_state *css, | ||
6104 | struct cftype *cft, const char *buffer) | ||
6105 | { | ||
6106 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
6107 | struct mem_cgroup_event *event; | ||
6108 | struct cgroup_subsys_state *cfile_css; | ||
6109 | unsigned int efd, cfd; | ||
6110 | struct fd efile; | ||
6111 | struct fd cfile; | ||
6112 | const char *name; | ||
6113 | char *endp; | ||
6114 | int ret; | ||
6115 | |||
6116 | efd = simple_strtoul(buffer, &endp, 10); | ||
6117 | if (*endp != ' ') | ||
6118 | return -EINVAL; | ||
6119 | buffer = endp + 1; | ||
6120 | |||
6121 | cfd = simple_strtoul(buffer, &endp, 10); | ||
6122 | if ((*endp != ' ') && (*endp != '\0')) | ||
6123 | return -EINVAL; | ||
6124 | buffer = endp + 1; | ||
6125 | |||
6126 | event = kzalloc(sizeof(*event), GFP_KERNEL); | ||
6127 | if (!event) | ||
6128 | return -ENOMEM; | ||
6129 | |||
6130 | event->memcg = memcg; | ||
6131 | INIT_LIST_HEAD(&event->list); | ||
6132 | init_poll_funcptr(&event->pt, memcg_event_ptable_queue_proc); | ||
6133 | init_waitqueue_func_entry(&event->wait, memcg_event_wake); | ||
6134 | INIT_WORK(&event->remove, memcg_event_remove); | ||
6135 | |||
6136 | efile = fdget(efd); | ||
6137 | if (!efile.file) { | ||
6138 | ret = -EBADF; | ||
6139 | goto out_kfree; | ||
6140 | } | ||
6141 | |||
6142 | event->eventfd = eventfd_ctx_fileget(efile.file); | ||
6143 | if (IS_ERR(event->eventfd)) { | ||
6144 | ret = PTR_ERR(event->eventfd); | ||
6145 | goto out_put_efile; | ||
6146 | } | ||
6147 | |||
6148 | cfile = fdget(cfd); | ||
6149 | if (!cfile.file) { | ||
6150 | ret = -EBADF; | ||
6151 | goto out_put_eventfd; | ||
6152 | } | ||
6153 | |||
6154 | /* the process need read permission on control file */ | ||
6155 | /* AV: shouldn't we check that it's been opened for read instead? */ | ||
6156 | ret = inode_permission(file_inode(cfile.file), MAY_READ); | ||
6157 | if (ret < 0) | ||
6158 | goto out_put_cfile; | ||
6159 | |||
6160 | /* | ||
6161 | * Determine the event callbacks and set them in @event. This used | ||
6162 | * to be done via struct cftype but cgroup core no longer knows | ||
6163 | * about these events. The following is crude but the whole thing | ||
6164 | * is for compatibility anyway. | ||
6165 | * | ||
6166 | * DO NOT ADD NEW FILES. | ||
6167 | */ | ||
6168 | name = cfile.file->f_dentry->d_name.name; | ||
6169 | |||
6170 | if (!strcmp(name, "memory.usage_in_bytes")) { | ||
6171 | event->register_event = mem_cgroup_usage_register_event; | ||
6172 | event->unregister_event = mem_cgroup_usage_unregister_event; | ||
6173 | } else if (!strcmp(name, "memory.oom_control")) { | ||
6174 | event->register_event = mem_cgroup_oom_register_event; | ||
6175 | event->unregister_event = mem_cgroup_oom_unregister_event; | ||
6176 | } else if (!strcmp(name, "memory.pressure_level")) { | ||
6177 | event->register_event = vmpressure_register_event; | ||
6178 | event->unregister_event = vmpressure_unregister_event; | ||
6179 | } else if (!strcmp(name, "memory.memsw.usage_in_bytes")) { | ||
6180 | event->register_event = memsw_cgroup_usage_register_event; | ||
6181 | event->unregister_event = memsw_cgroup_usage_unregister_event; | ||
6182 | } else { | ||
6183 | ret = -EINVAL; | ||
6184 | goto out_put_cfile; | ||
6185 | } | ||
6186 | |||
6187 | /* | ||
6188 | * Verify @cfile should belong to @css. Also, remaining events are | ||
6189 | * automatically removed on cgroup destruction but the removal is | ||
6190 | * asynchronous, so take an extra ref on @css. | ||
6191 | */ | ||
6192 | rcu_read_lock(); | ||
6193 | |||
6194 | ret = -EINVAL; | ||
6195 | cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, | ||
6196 | &mem_cgroup_subsys); | ||
6197 | if (cfile_css == css && css_tryget(css)) | ||
6198 | ret = 0; | ||
6199 | |||
6200 | rcu_read_unlock(); | ||
6201 | if (ret) | ||
6202 | goto out_put_cfile; | ||
6203 | |||
6204 | ret = event->register_event(memcg, event->eventfd, buffer); | ||
6205 | if (ret) | ||
6206 | goto out_put_css; | ||
6207 | |||
6208 | efile.file->f_op->poll(efile.file, &event->pt); | ||
6209 | |||
6210 | spin_lock(&memcg->event_list_lock); | ||
6211 | list_add(&event->list, &memcg->event_list); | ||
6212 | spin_unlock(&memcg->event_list_lock); | ||
6213 | |||
6214 | fdput(cfile); | ||
6215 | fdput(efile); | ||
6216 | |||
6217 | return 0; | ||
6218 | |||
6219 | out_put_css: | ||
6220 | css_put(css); | ||
6221 | out_put_cfile: | ||
6222 | fdput(cfile); | ||
6223 | out_put_eventfd: | ||
6224 | eventfd_ctx_put(event->eventfd); | ||
6225 | out_put_efile: | ||
6226 | fdput(efile); | ||
6227 | out_kfree: | ||
6228 | kfree(event); | ||
6229 | |||
6230 | return ret; | ||
6231 | } | ||
6232 | |||
5968 | static struct cftype mem_cgroup_files[] = { | 6233 | static struct cftype mem_cgroup_files[] = { |
5969 | { | 6234 | { |
5970 | .name = "usage_in_bytes", | 6235 | .name = "usage_in_bytes", |
5971 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), | 6236 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), |
5972 | .read = mem_cgroup_read, | 6237 | .read_u64 = mem_cgroup_read_u64, |
5973 | .register_event = mem_cgroup_usage_register_event, | ||
5974 | .unregister_event = mem_cgroup_usage_unregister_event, | ||
5975 | }, | 6238 | }, |
5976 | { | 6239 | { |
5977 | .name = "max_usage_in_bytes", | 6240 | .name = "max_usage_in_bytes", |
5978 | .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), | 6241 | .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), |
5979 | .trigger = mem_cgroup_reset, | 6242 | .trigger = mem_cgroup_reset, |
5980 | .read = mem_cgroup_read, | 6243 | .read_u64 = mem_cgroup_read_u64, |
5981 | }, | 6244 | }, |
5982 | { | 6245 | { |
5983 | .name = "limit_in_bytes", | 6246 | .name = "limit_in_bytes", |
5984 | .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), | 6247 | .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), |
5985 | .write_string = mem_cgroup_write, | 6248 | .write_string = mem_cgroup_write, |
5986 | .read = mem_cgroup_read, | 6249 | .read_u64 = mem_cgroup_read_u64, |
5987 | }, | 6250 | }, |
5988 | { | 6251 | { |
5989 | .name = "soft_limit_in_bytes", | 6252 | .name = "soft_limit_in_bytes", |
5990 | .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), | 6253 | .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), |
5991 | .write_string = mem_cgroup_write, | 6254 | .write_string = mem_cgroup_write, |
5992 | .read = mem_cgroup_read, | 6255 | .read_u64 = mem_cgroup_read_u64, |
5993 | }, | 6256 | }, |
5994 | { | 6257 | { |
5995 | .name = "failcnt", | 6258 | .name = "failcnt", |
5996 | .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), | 6259 | .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), |
5997 | .trigger = mem_cgroup_reset, | 6260 | .trigger = mem_cgroup_reset, |
5998 | .read = mem_cgroup_read, | 6261 | .read_u64 = mem_cgroup_read_u64, |
5999 | }, | 6262 | }, |
6000 | { | 6263 | { |
6001 | .name = "stat", | 6264 | .name = "stat", |
6002 | .read_seq_string = memcg_stat_show, | 6265 | .seq_show = memcg_stat_show, |
6003 | }, | 6266 | }, |
6004 | { | 6267 | { |
6005 | .name = "force_empty", | 6268 | .name = "force_empty", |
@@ -6012,6 +6275,12 @@ static struct cftype mem_cgroup_files[] = { | |||
6012 | .read_u64 = mem_cgroup_hierarchy_read, | 6275 | .read_u64 = mem_cgroup_hierarchy_read, |
6013 | }, | 6276 | }, |
6014 | { | 6277 | { |
6278 | .name = "cgroup.event_control", /* XXX: for compat */ | ||
6279 | .write_string = memcg_write_event_control, | ||
6280 | .flags = CFTYPE_NO_PREFIX, | ||
6281 | .mode = S_IWUGO, | ||
6282 | }, | ||
6283 | { | ||
6015 | .name = "swappiness", | 6284 | .name = "swappiness", |
6016 | .read_u64 = mem_cgroup_swappiness_read, | 6285 | .read_u64 = mem_cgroup_swappiness_read, |
6017 | .write_u64 = mem_cgroup_swappiness_write, | 6286 | .write_u64 = mem_cgroup_swappiness_write, |
@@ -6023,21 +6292,17 @@ static struct cftype mem_cgroup_files[] = { | |||
6023 | }, | 6292 | }, |
6024 | { | 6293 | { |
6025 | .name = "oom_control", | 6294 | .name = "oom_control", |
6026 | .read_map = mem_cgroup_oom_control_read, | 6295 | .seq_show = mem_cgroup_oom_control_read, |
6027 | .write_u64 = mem_cgroup_oom_control_write, | 6296 | .write_u64 = mem_cgroup_oom_control_write, |
6028 | .register_event = mem_cgroup_oom_register_event, | ||
6029 | .unregister_event = mem_cgroup_oom_unregister_event, | ||
6030 | .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), | 6297 | .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), |
6031 | }, | 6298 | }, |
6032 | { | 6299 | { |
6033 | .name = "pressure_level", | 6300 | .name = "pressure_level", |
6034 | .register_event = vmpressure_register_event, | ||
6035 | .unregister_event = vmpressure_unregister_event, | ||
6036 | }, | 6301 | }, |
6037 | #ifdef CONFIG_NUMA | 6302 | #ifdef CONFIG_NUMA |
6038 | { | 6303 | { |
6039 | .name = "numa_stat", | 6304 | .name = "numa_stat", |
6040 | .read_seq_string = memcg_numa_stat_show, | 6305 | .seq_show = memcg_numa_stat_show, |
6041 | }, | 6306 | }, |
6042 | #endif | 6307 | #endif |
6043 | #ifdef CONFIG_MEMCG_KMEM | 6308 | #ifdef CONFIG_MEMCG_KMEM |
@@ -6045,29 +6310,29 @@ static struct cftype mem_cgroup_files[] = { | |||
6045 | .name = "kmem.limit_in_bytes", | 6310 | .name = "kmem.limit_in_bytes", |
6046 | .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), | 6311 | .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), |
6047 | .write_string = mem_cgroup_write, | 6312 | .write_string = mem_cgroup_write, |
6048 | .read = mem_cgroup_read, | 6313 | .read_u64 = mem_cgroup_read_u64, |
6049 | }, | 6314 | }, |
6050 | { | 6315 | { |
6051 | .name = "kmem.usage_in_bytes", | 6316 | .name = "kmem.usage_in_bytes", |
6052 | .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), | 6317 | .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), |
6053 | .read = mem_cgroup_read, | 6318 | .read_u64 = mem_cgroup_read_u64, |
6054 | }, | 6319 | }, |
6055 | { | 6320 | { |
6056 | .name = "kmem.failcnt", | 6321 | .name = "kmem.failcnt", |
6057 | .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), | 6322 | .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), |
6058 | .trigger = mem_cgroup_reset, | 6323 | .trigger = mem_cgroup_reset, |
6059 | .read = mem_cgroup_read, | 6324 | .read_u64 = mem_cgroup_read_u64, |
6060 | }, | 6325 | }, |
6061 | { | 6326 | { |
6062 | .name = "kmem.max_usage_in_bytes", | 6327 | .name = "kmem.max_usage_in_bytes", |
6063 | .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), | 6328 | .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), |
6064 | .trigger = mem_cgroup_reset, | 6329 | .trigger = mem_cgroup_reset, |
6065 | .read = mem_cgroup_read, | 6330 | .read_u64 = mem_cgroup_read_u64, |
6066 | }, | 6331 | }, |
6067 | #ifdef CONFIG_SLABINFO | 6332 | #ifdef CONFIG_SLABINFO |
6068 | { | 6333 | { |
6069 | .name = "kmem.slabinfo", | 6334 | .name = "kmem.slabinfo", |
6070 | .read_seq_string = mem_cgroup_slabinfo_read, | 6335 | .seq_show = mem_cgroup_slabinfo_read, |
6071 | }, | 6336 | }, |
6072 | #endif | 6337 | #endif |
6073 | #endif | 6338 | #endif |
@@ -6079,27 +6344,25 @@ static struct cftype memsw_cgroup_files[] = { | |||
6079 | { | 6344 | { |
6080 | .name = "memsw.usage_in_bytes", | 6345 | .name = "memsw.usage_in_bytes", |
6081 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), | 6346 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), |
6082 | .read = mem_cgroup_read, | 6347 | .read_u64 = mem_cgroup_read_u64, |
6083 | .register_event = mem_cgroup_usage_register_event, | ||
6084 | .unregister_event = mem_cgroup_usage_unregister_event, | ||
6085 | }, | 6348 | }, |
6086 | { | 6349 | { |
6087 | .name = "memsw.max_usage_in_bytes", | 6350 | .name = "memsw.max_usage_in_bytes", |
6088 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), | 6351 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), |
6089 | .trigger = mem_cgroup_reset, | 6352 | .trigger = mem_cgroup_reset, |
6090 | .read = mem_cgroup_read, | 6353 | .read_u64 = mem_cgroup_read_u64, |
6091 | }, | 6354 | }, |
6092 | { | 6355 | { |
6093 | .name = "memsw.limit_in_bytes", | 6356 | .name = "memsw.limit_in_bytes", |
6094 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), | 6357 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), |
6095 | .write_string = mem_cgroup_write, | 6358 | .write_string = mem_cgroup_write, |
6096 | .read = mem_cgroup_read, | 6359 | .read_u64 = mem_cgroup_read_u64, |
6097 | }, | 6360 | }, |
6098 | { | 6361 | { |
6099 | .name = "memsw.failcnt", | 6362 | .name = "memsw.failcnt", |
6100 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), | 6363 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), |
6101 | .trigger = mem_cgroup_reset, | 6364 | .trigger = mem_cgroup_reset, |
6102 | .read = mem_cgroup_read, | 6365 | .read_u64 = mem_cgroup_read_u64, |
6103 | }, | 6366 | }, |
6104 | { }, /* terminate */ | 6367 | { }, /* terminate */ |
6105 | }; | 6368 | }; |
@@ -6271,6 +6534,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | |||
6271 | mutex_init(&memcg->thresholds_lock); | 6534 | mutex_init(&memcg->thresholds_lock); |
6272 | spin_lock_init(&memcg->move_lock); | 6535 | spin_lock_init(&memcg->move_lock); |
6273 | vmpressure_init(&memcg->vmpressure); | 6536 | vmpressure_init(&memcg->vmpressure); |
6537 | INIT_LIST_HEAD(&memcg->event_list); | ||
6538 | spin_lock_init(&memcg->event_list_lock); | ||
6274 | 6539 | ||
6275 | return &memcg->css; | 6540 | return &memcg->css; |
6276 | 6541 | ||
@@ -6346,6 +6611,19 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) | |||
6346 | static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | 6611 | static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) |
6347 | { | 6612 | { |
6348 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 6613 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
6614 | struct mem_cgroup_event *event, *tmp; | ||
6615 | |||
6616 | /* | ||
6617 | * Unregister events and notify userspace. | ||
6618 | * Notify userspace about cgroup removing only after rmdir of cgroup | ||
6619 | * directory to avoid race between userspace and kernelspace. | ||
6620 | */ | ||
6621 | spin_lock(&memcg->event_list_lock); | ||
6622 | list_for_each_entry_safe(event, tmp, &memcg->event_list, list) { | ||
6623 | list_del_init(&event->list); | ||
6624 | schedule_work(&event->remove); | ||
6625 | } | ||
6626 | spin_unlock(&memcg->event_list_lock); | ||
6349 | 6627 | ||
6350 | kmem_cgroup_css_offline(memcg); | 6628 | kmem_cgroup_css_offline(memcg); |
6351 | 6629 | ||
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index d8bd2c500aa4..cfd162882c00 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -452,7 +452,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) | |||
452 | * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry | 452 | * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry |
453 | * @ent: swap entry to be looked up. | 453 | * @ent: swap entry to be looked up. |
454 | * | 454 | * |
455 | * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) | 455 | * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) |
456 | */ | 456 | */ |
457 | unsigned short lookup_swap_cgroup_id(swp_entry_t ent) | 457 | unsigned short lookup_swap_cgroup_id(swp_entry_t ent) |
458 | { | 458 | { |
diff --git a/mm/percpu.c b/mm/percpu.c index 65fd8a749712..036cfe07050f 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -1689,10 +1689,10 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, | |||
1689 | max_distance += ai->unit_size; | 1689 | max_distance += ai->unit_size; |
1690 | 1690 | ||
1691 | /* warn if maximum distance is further than 75% of vmalloc space */ | 1691 | /* warn if maximum distance is further than 75% of vmalloc space */ |
1692 | if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) { | 1692 | if (max_distance > VMALLOC_TOTAL * 3 / 4) { |
1693 | pr_warning("PERCPU: max_distance=0x%zx too large for vmalloc " | 1693 | pr_warning("PERCPU: max_distance=0x%zx too large for vmalloc " |
1694 | "space 0x%lx\n", max_distance, | 1694 | "space 0x%lx\n", max_distance, |
1695 | (unsigned long)(VMALLOC_END - VMALLOC_START)); | 1695 | VMALLOC_TOTAL); |
1696 | #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK | 1696 | #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK |
1697 | /* and fail if we have fallback */ | 1697 | /* and fail if we have fallback */ |
1698 | rc = -EINVAL; | 1698 | rc = -EINVAL; |
diff --git a/mm/vmpressure.c b/mm/vmpressure.c index e0f62837c3f4..196970a4541f 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c | |||
@@ -278,8 +278,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) | |||
278 | 278 | ||
279 | /** | 279 | /** |
280 | * vmpressure_register_event() - Bind vmpressure notifications to an eventfd | 280 | * vmpressure_register_event() - Bind vmpressure notifications to an eventfd |
281 | * @css: css that is interested in vmpressure notifications | 281 | * @memcg: memcg that is interested in vmpressure notifications |
282 | * @cft: cgroup control files handle | ||
283 | * @eventfd: eventfd context to link notifications with | 282 | * @eventfd: eventfd context to link notifications with |
284 | * @args: event arguments (used to set up a pressure level threshold) | 283 | * @args: event arguments (used to set up a pressure level threshold) |
285 | * | 284 | * |
@@ -289,15 +288,12 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) | |||
289 | * threshold (one of vmpressure_str_levels, i.e. "low", "medium", or | 288 | * threshold (one of vmpressure_str_levels, i.e. "low", "medium", or |
290 | * "critical"). | 289 | * "critical"). |
291 | * | 290 | * |
292 | * This function should not be used directly, just pass it to (struct | 291 | * To be used as memcg event method. |
293 | * cftype).register_event, and then cgroup core will handle everything by | ||
294 | * itself. | ||
295 | */ | 292 | */ |
296 | int vmpressure_register_event(struct cgroup_subsys_state *css, | 293 | int vmpressure_register_event(struct mem_cgroup *memcg, |
297 | struct cftype *cft, struct eventfd_ctx *eventfd, | 294 | struct eventfd_ctx *eventfd, const char *args) |
298 | const char *args) | ||
299 | { | 295 | { |
300 | struct vmpressure *vmpr = css_to_vmpressure(css); | 296 | struct vmpressure *vmpr = memcg_to_vmpressure(memcg); |
301 | struct vmpressure_event *ev; | 297 | struct vmpressure_event *ev; |
302 | int level; | 298 | int level; |
303 | 299 | ||
@@ -325,23 +321,19 @@ int vmpressure_register_event(struct cgroup_subsys_state *css, | |||
325 | 321 | ||
326 | /** | 322 | /** |
327 | * vmpressure_unregister_event() - Unbind eventfd from vmpressure | 323 | * vmpressure_unregister_event() - Unbind eventfd from vmpressure |
328 | * @css: css handle | 324 | * @memcg: memcg handle |
329 | * @cft: cgroup control files handle | ||
330 | * @eventfd: eventfd context that was used to link vmpressure with the @cg | 325 | * @eventfd: eventfd context that was used to link vmpressure with the @cg |
331 | * | 326 | * |
332 | * This function does internal manipulations to detach the @eventfd from | 327 | * This function does internal manipulations to detach the @eventfd from |
333 | * the vmpressure notifications, and then frees internal resources | 328 | * the vmpressure notifications, and then frees internal resources |
334 | * associated with the @eventfd (but the @eventfd itself is not freed). | 329 | * associated with the @eventfd (but the @eventfd itself is not freed). |
335 | * | 330 | * |
336 | * This function should not be used directly, just pass it to (struct | 331 | * To be used as memcg event method. |
337 | * cftype).unregister_event, and then cgroup core will handle everything | ||
338 | * by itself. | ||
339 | */ | 332 | */ |
340 | void vmpressure_unregister_event(struct cgroup_subsys_state *css, | 333 | void vmpressure_unregister_event(struct mem_cgroup *memcg, |
341 | struct cftype *cft, | ||
342 | struct eventfd_ctx *eventfd) | 334 | struct eventfd_ctx *eventfd) |
343 | { | 335 | { |
344 | struct vmpressure *vmpr = css_to_vmpressure(css); | 336 | struct vmpressure *vmpr = memcg_to_vmpressure(memcg); |
345 | struct vmpressure_event *ev; | 337 | struct vmpressure_event *ev; |
346 | 338 | ||
347 | mutex_lock(&vmpr->events_lock); | 339 | mutex_lock(&vmpr->events_lock); |
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 9b7cf6c85f82..56cbb69ba024 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c | |||
@@ -173,14 +173,14 @@ static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft) | |||
173 | return css->cgroup->id; | 173 | return css->cgroup->id; |
174 | } | 174 | } |
175 | 175 | ||
176 | static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft, | 176 | static int read_priomap(struct seq_file *sf, void *v) |
177 | struct cgroup_map_cb *cb) | ||
178 | { | 177 | { |
179 | struct net_device *dev; | 178 | struct net_device *dev; |
180 | 179 | ||
181 | rcu_read_lock(); | 180 | rcu_read_lock(); |
182 | for_each_netdev_rcu(&init_net, dev) | 181 | for_each_netdev_rcu(&init_net, dev) |
183 | cb->fill(cb, dev->name, netprio_prio(css, dev)); | 182 | seq_printf(sf, "%s %u\n", dev->name, |
183 | netprio_prio(seq_css(sf), dev)); | ||
184 | rcu_read_unlock(); | 184 | rcu_read_unlock(); |
185 | return 0; | 185 | return 0; |
186 | } | 186 | } |
@@ -238,7 +238,7 @@ static struct cftype ss_files[] = { | |||
238 | }, | 238 | }, |
239 | { | 239 | { |
240 | .name = "ifpriomap", | 240 | .name = "ifpriomap", |
241 | .read_map = read_priomap, | 241 | .seq_show = read_priomap, |
242 | .write_string = write_priomap, | 242 | .write_string = write_priomap, |
243 | }, | 243 | }, |
244 | { } /* terminate */ | 244 | { } /* terminate */ |
diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 7c2a0a71049e..d3b6d2cd3a06 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c | |||
@@ -274,10 +274,9 @@ static void set_majmin(char *str, unsigned m) | |||
274 | sprintf(str, "%u", m); | 274 | sprintf(str, "%u", m); |
275 | } | 275 | } |
276 | 276 | ||
277 | static int devcgroup_seq_read(struct cgroup_subsys_state *css, | 277 | static int devcgroup_seq_show(struct seq_file *m, void *v) |
278 | struct cftype *cft, struct seq_file *m) | ||
279 | { | 278 | { |
280 | struct dev_cgroup *devcgroup = css_to_devcgroup(css); | 279 | struct dev_cgroup *devcgroup = css_to_devcgroup(seq_css(m)); |
281 | struct dev_exception_item *ex; | 280 | struct dev_exception_item *ex; |
282 | char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; | 281 | char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; |
283 | 282 | ||
@@ -679,7 +678,7 @@ static struct cftype dev_cgroup_files[] = { | |||
679 | }, | 678 | }, |
680 | { | 679 | { |
681 | .name = "list", | 680 | .name = "list", |
682 | .read_seq_string = devcgroup_seq_read, | 681 | .seq_show = devcgroup_seq_show, |
683 | .private = DEVCG_LIST, | 682 | .private = DEVCG_LIST, |
684 | }, | 683 | }, |
685 | { } /* terminate */ | 684 | { } /* terminate */ |