diff options
61 files changed, 1476 insertions, 895 deletions
diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml b/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml new file mode 100644 index 000000000000..20adc1c8e9cc --- /dev/null +++ b/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml | |||
@@ -0,0 +1,102 @@ | |||
1 | # SPDX-License-Identifier: GPL-2.0 | ||
2 | %YAML 1.2 | ||
3 | --- | ||
4 | $id: http://devicetree.org/schemas/timer/allwinner,sun4i-a10-timer.yaml# | ||
5 | $schema: http://devicetree.org/meta-schemas/core.yaml# | ||
6 | |||
7 | title: Allwinner A10 Timer Device Tree Bindings | ||
8 | |||
9 | maintainers: | ||
10 | - Chen-Yu Tsai <wens@csie.org> | ||
11 | - Maxime Ripard <maxime.ripard@bootlin.com> | ||
12 | |||
13 | properties: | ||
14 | compatible: | ||
15 | enum: | ||
16 | - allwinner,sun4i-a10-timer | ||
17 | - allwinner,sun8i-a23-timer | ||
18 | - allwinner,sun8i-v3s-timer | ||
19 | - allwinner,suniv-f1c100s-timer | ||
20 | |||
21 | reg: | ||
22 | maxItems: 1 | ||
23 | |||
24 | interrupts: | ||
25 | description: | ||
26 | List of timers interrupts | ||
27 | |||
28 | clocks: | ||
29 | maxItems: 1 | ||
30 | |||
31 | allOf: | ||
32 | - if: | ||
33 | properties: | ||
34 | compatible: | ||
35 | items: | ||
36 | const: allwinner,sun4i-a10-timer | ||
37 | |||
38 | then: | ||
39 | properties: | ||
40 | interrupts: | ||
41 | minItems: 6 | ||
42 | maxItems: 6 | ||
43 | |||
44 | - if: | ||
45 | properties: | ||
46 | compatible: | ||
47 | items: | ||
48 | const: allwinner,sun8i-a23-timer | ||
49 | |||
50 | then: | ||
51 | properties: | ||
52 | interrupts: | ||
53 | minItems: 2 | ||
54 | maxItems: 2 | ||
55 | |||
56 | - if: | ||
57 | properties: | ||
58 | compatible: | ||
59 | items: | ||
60 | const: allwinner,sun8i-v3s-timer | ||
61 | |||
62 | then: | ||
63 | properties: | ||
64 | interrupts: | ||
65 | minItems: 3 | ||
66 | maxItems: 3 | ||
67 | |||
68 | - if: | ||
69 | properties: | ||
70 | compatible: | ||
71 | items: | ||
72 | const: allwinner,suniv-f1c100s-timer | ||
73 | |||
74 | then: | ||
75 | properties: | ||
76 | interrupts: | ||
77 | minItems: 3 | ||
78 | maxItems: 3 | ||
79 | |||
80 | required: | ||
81 | - compatible | ||
82 | - reg | ||
83 | - interrupts | ||
84 | - clocks | ||
85 | |||
86 | additionalProperties: false | ||
87 | |||
88 | examples: | ||
89 | - | | ||
90 | timer { | ||
91 | compatible = "allwinner,sun4i-a10-timer"; | ||
92 | reg = <0x01c20c00 0x400>; | ||
93 | interrupts = <22>, | ||
94 | <23>, | ||
95 | <24>, | ||
96 | <25>, | ||
97 | <67>, | ||
98 | <68>; | ||
99 | clocks = <&osc>; | ||
100 | }; | ||
101 | |||
102 | ... | ||
diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun4i-timer.txt b/Documentation/devicetree/bindings/timer/allwinner,sun4i-timer.txt deleted file mode 100644 index 3da9d515c03a..000000000000 --- a/Documentation/devicetree/bindings/timer/allwinner,sun4i-timer.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | Allwinner A1X SoCs Timer Controller | ||
2 | |||
3 | Required properties: | ||
4 | |||
5 | - compatible : should be one of the following: | ||
6 | "allwinner,sun4i-a10-timer" | ||
7 | "allwinner,suniv-f1c100s-timer" | ||
8 | - reg : Specifies base physical address and size of the registers. | ||
9 | - interrupts : The interrupt of the first timer | ||
10 | - clocks: phandle to the source clock (usually a 24 MHz fixed clock) | ||
11 | |||
12 | Example: | ||
13 | |||
14 | timer { | ||
15 | compatible = "allwinner,sun4i-a10-timer"; | ||
16 | reg = <0x01c20c00 0x400>; | ||
17 | interrupts = <22>; | ||
18 | clocks = <&osc>; | ||
19 | }; | ||
diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt deleted file mode 100644 index 2c5c1be78360..000000000000 --- a/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | Allwinner SoCs High Speed Timer Controller | ||
2 | |||
3 | Required properties: | ||
4 | |||
5 | - compatible : should be "allwinner,sun5i-a13-hstimer" or | ||
6 | "allwinner,sun7i-a20-hstimer" | ||
7 | - reg : Specifies base physical address and size of the registers. | ||
8 | - interrupts : The interrupts of these timers (2 for the sun5i IP, 4 for the sun7i | ||
9 | one) | ||
10 | - clocks: phandle to the source clock (usually the AHB clock) | ||
11 | |||
12 | Optional properties: | ||
13 | - resets: phandle to a reset controller asserting the timer | ||
14 | |||
15 | Example: | ||
16 | |||
17 | timer@1c60000 { | ||
18 | compatible = "allwinner,sun7i-a20-hstimer"; | ||
19 | reg = <0x01c60000 0x1000>; | ||
20 | interrupts = <0 51 1>, | ||
21 | <0 52 1>, | ||
22 | <0 53 1>, | ||
23 | <0 54 1>; | ||
24 | clocks = <&ahb1_gates 19>; | ||
25 | resets = <&ahb1rst 19>; | ||
26 | }; | ||
diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml new file mode 100644 index 000000000000..dfa0c41fd261 --- /dev/null +++ b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml | |||
@@ -0,0 +1,79 @@ | |||
1 | # SPDX-License-Identifier: GPL-2.0 | ||
2 | %YAML 1.2 | ||
3 | --- | ||
4 | $id: http://devicetree.org/schemas/timer/allwinner,sun5i-a13-hstimer.yaml# | ||
5 | $schema: http://devicetree.org/meta-schemas/core.yaml# | ||
6 | |||
7 | title: Allwinner A13 High-Speed Timer Device Tree Bindings | ||
8 | |||
9 | maintainers: | ||
10 | - Chen-Yu Tsai <wens@csie.org> | ||
11 | - Maxime Ripard <maxime.ripard@bootlin.com> | ||
12 | |||
13 | properties: | ||
14 | compatible: | ||
15 | oneOf: | ||
16 | - const: allwinner,sun5i-a13-hstimer | ||
17 | - const: allwinner,sun7i-a20-hstimer | ||
18 | - items: | ||
19 | - const: allwinner,sun6i-a31-hstimer | ||
20 | - const: allwinner,sun7i-a20-hstimer | ||
21 | |||
22 | reg: | ||
23 | maxItems: 1 | ||
24 | |||
25 | interrupts: | ||
26 | minItems: 2 | ||
27 | maxItems: 4 | ||
28 | items: | ||
29 | - description: Timer 0 Interrupt | ||
30 | - description: Timer 1 Interrupt | ||
31 | - description: Timer 2 Interrupt | ||
32 | - description: Timer 3 Interrupt | ||
33 | |||
34 | clocks: | ||
35 | maxItems: 1 | ||
36 | |||
37 | resets: | ||
38 | maxItems: 1 | ||
39 | |||
40 | required: | ||
41 | - compatible | ||
42 | - reg | ||
43 | - interrupts | ||
44 | - clocks | ||
45 | |||
46 | if: | ||
47 | properties: | ||
48 | compatible: | ||
49 | items: | ||
50 | const: allwinner,sun5i-a13-hstimer | ||
51 | |||
52 | then: | ||
53 | properties: | ||
54 | interrupts: | ||
55 | minItems: 2 | ||
56 | maxItems: 2 | ||
57 | |||
58 | else: | ||
59 | properties: | ||
60 | interrupts: | ||
61 | minItems: 4 | ||
62 | maxItems: 4 | ||
63 | |||
64 | additionalProperties: false | ||
65 | |||
66 | examples: | ||
67 | - | | ||
68 | timer@1c60000 { | ||
69 | compatible = "allwinner,sun7i-a20-hstimer"; | ||
70 | reg = <0x01c60000 0x1000>; | ||
71 | interrupts = <0 51 1>, | ||
72 | <0 52 1>, | ||
73 | <0 53 1>, | ||
74 | <0 54 1>; | ||
75 | clocks = <&ahb1_gates 19>; | ||
76 | resets = <&ahb1rst 19>; | ||
77 | }; | ||
78 | |||
79 | ... | ||
diff --git a/Documentation/devicetree/bindings/timer/renesas,cmt.txt b/Documentation/devicetree/bindings/timer/renesas,cmt.txt index c5220bcd852b..a444cfc5852a 100644 --- a/Documentation/devicetree/bindings/timer/renesas,cmt.txt +++ b/Documentation/devicetree/bindings/timer/renesas,cmt.txt | |||
@@ -12,16 +12,13 @@ datasheets. | |||
12 | Required Properties: | 12 | Required Properties: |
13 | 13 | ||
14 | - compatible: must contain one or more of the following: | 14 | - compatible: must contain one or more of the following: |
15 | - "renesas,cmt-48-sh73a0" for the sh73A0 48-bit CMT | ||
16 | (CMT1) | ||
17 | - "renesas,cmt-48-r8a7740" for the r8a7740 48-bit CMT | ||
18 | (CMT1) | ||
19 | - "renesas,cmt-48" for all non-second generation 48-bit CMT | ||
20 | (CMT1 on sh73a0 and r8a7740) | ||
21 | This is a fallback for the above renesas,cmt-48-* entries. | ||
22 | |||
23 | - "renesas,r8a73a4-cmt0" for the 32-bit CMT0 device included in r8a73a4. | 15 | - "renesas,r8a73a4-cmt0" for the 32-bit CMT0 device included in r8a73a4. |
24 | - "renesas,r8a73a4-cmt1" for the 48-bit CMT1 device included in r8a73a4. | 16 | - "renesas,r8a73a4-cmt1" for the 48-bit CMT1 device included in r8a73a4. |
17 | - "renesas,r8a7740-cmt0" for the 32-bit CMT0 device included in r8a7740. | ||
18 | - "renesas,r8a7740-cmt1" for the 48-bit CMT1 device included in r8a7740. | ||
19 | - "renesas,r8a7740-cmt2" for the 32-bit CMT2 device included in r8a7740. | ||
20 | - "renesas,r8a7740-cmt3" for the 32-bit CMT3 device included in r8a7740. | ||
21 | - "renesas,r8a7740-cmt4" for the 32-bit CMT4 device included in r8a7740. | ||
25 | - "renesas,r8a7743-cmt0" for the 32-bit CMT0 device included in r8a7743. | 22 | - "renesas,r8a7743-cmt0" for the 32-bit CMT0 device included in r8a7743. |
26 | - "renesas,r8a7743-cmt1" for the 48-bit CMT1 device included in r8a7743. | 23 | - "renesas,r8a7743-cmt1" for the 48-bit CMT1 device included in r8a7743. |
27 | - "renesas,r8a7744-cmt0" for the 32-bit CMT0 device included in r8a7744. | 24 | - "renesas,r8a7744-cmt0" for the 32-bit CMT0 device included in r8a7744. |
@@ -31,29 +28,38 @@ Required Properties: | |||
31 | - "renesas,r8a77470-cmt0" for the 32-bit CMT0 device included in r8a77470. | 28 | - "renesas,r8a77470-cmt0" for the 32-bit CMT0 device included in r8a77470. |
32 | - "renesas,r8a77470-cmt1" for the 48-bit CMT1 device included in r8a77470. | 29 | - "renesas,r8a77470-cmt1" for the 48-bit CMT1 device included in r8a77470. |
33 | - "renesas,r8a774a1-cmt0" for the 32-bit CMT0 device included in r8a774a1. | 30 | - "renesas,r8a774a1-cmt0" for the 32-bit CMT0 device included in r8a774a1. |
34 | - "renesas,r8a774a1-cmt1" for the 48-bit CMT1 device included in r8a774a1. | 31 | - "renesas,r8a774a1-cmt1" for the 48-bit CMT devices included in r8a774a1. |
35 | - "renesas,r8a774c0-cmt0" for the 32-bit CMT0 device included in r8a774c0. | 32 | - "renesas,r8a774c0-cmt0" for the 32-bit CMT0 device included in r8a774c0. |
36 | - "renesas,r8a774c0-cmt1" for the 48-bit CMT1 device included in r8a774c0. | 33 | - "renesas,r8a774c0-cmt1" for the 48-bit CMT devices included in r8a774c0. |
37 | - "renesas,r8a7790-cmt0" for the 32-bit CMT0 device included in r8a7790. | 34 | - "renesas,r8a7790-cmt0" for the 32-bit CMT0 device included in r8a7790. |
38 | - "renesas,r8a7790-cmt1" for the 48-bit CMT1 device included in r8a7790. | 35 | - "renesas,r8a7790-cmt1" for the 48-bit CMT1 device included in r8a7790. |
39 | - "renesas,r8a7791-cmt0" for the 32-bit CMT0 device included in r8a7791. | 36 | - "renesas,r8a7791-cmt0" for the 32-bit CMT0 device included in r8a7791. |
40 | - "renesas,r8a7791-cmt1" for the 48-bit CMT1 device included in r8a7791. | 37 | - "renesas,r8a7791-cmt1" for the 48-bit CMT1 device included in r8a7791. |
38 | - "renesas,r8a7792-cmt0" for the 32-bit CMT0 device included in r8a7792. | ||
39 | - "renesas,r8a7792-cmt1" for the 48-bit CMT1 device included in r8a7792. | ||
41 | - "renesas,r8a7793-cmt0" for the 32-bit CMT0 device included in r8a7793. | 40 | - "renesas,r8a7793-cmt0" for the 32-bit CMT0 device included in r8a7793. |
42 | - "renesas,r8a7793-cmt1" for the 48-bit CMT1 device included in r8a7793. | 41 | - "renesas,r8a7793-cmt1" for the 48-bit CMT1 device included in r8a7793. |
43 | - "renesas,r8a7794-cmt0" for the 32-bit CMT0 device included in r8a7794. | 42 | - "renesas,r8a7794-cmt0" for the 32-bit CMT0 device included in r8a7794. |
44 | - "renesas,r8a7794-cmt1" for the 48-bit CMT1 device included in r8a7794. | 43 | - "renesas,r8a7794-cmt1" for the 48-bit CMT1 device included in r8a7794. |
45 | - "renesas,r8a7795-cmt0" for the 32-bit CMT0 device included in r8a7795. | 44 | - "renesas,r8a7795-cmt0" for the 32-bit CMT0 device included in r8a7795. |
46 | - "renesas,r8a7795-cmt1" for the 48-bit CMT1 device included in r8a7795. | 45 | - "renesas,r8a7795-cmt1" for the 48-bit CMT devices included in r8a7795. |
47 | - "renesas,r8a7796-cmt0" for the 32-bit CMT0 device included in r8a7796. | 46 | - "renesas,r8a7796-cmt0" for the 32-bit CMT0 device included in r8a7796. |
48 | - "renesas,r8a7796-cmt1" for the 48-bit CMT1 device included in r8a7796. | 47 | - "renesas,r8a7796-cmt1" for the 48-bit CMT devices included in r8a7796. |
49 | - "renesas,r8a77965-cmt0" for the 32-bit CMT0 device included in r8a77965. | 48 | - "renesas,r8a77965-cmt0" for the 32-bit CMT0 device included in r8a77965. |
50 | - "renesas,r8a77965-cmt1" for the 48-bit CMT1 device included in r8a77965. | 49 | - "renesas,r8a77965-cmt1" for the 48-bit CMT devices included in r8a77965. |
51 | - "renesas,r8a77970-cmt0" for the 32-bit CMT0 device included in r8a77970. | 50 | - "renesas,r8a77970-cmt0" for the 32-bit CMT0 device included in r8a77970. |
52 | - "renesas,r8a77970-cmt1" for the 48-bit CMT1 device included in r8a77970. | 51 | - "renesas,r8a77970-cmt1" for the 48-bit CMT devices included in r8a77970. |
53 | - "renesas,r8a77980-cmt0" for the 32-bit CMT0 device included in r8a77980. | 52 | - "renesas,r8a77980-cmt0" for the 32-bit CMT0 device included in r8a77980. |
54 | - "renesas,r8a77980-cmt1" for the 48-bit CMT1 device included in r8a77980. | 53 | - "renesas,r8a77980-cmt1" for the 48-bit CMT devices included in r8a77980. |
55 | - "renesas,r8a77990-cmt0" for the 32-bit CMT0 device included in r8a77990. | 54 | - "renesas,r8a77990-cmt0" for the 32-bit CMT0 device included in r8a77990. |
56 | - "renesas,r8a77990-cmt1" for the 48-bit CMT1 device included in r8a77990. | 55 | - "renesas,r8a77990-cmt1" for the 48-bit CMT devices included in r8a77990. |
56 | - "renesas,r8a77995-cmt0" for the 32-bit CMT0 device included in r8a77995. | ||
57 | - "renesas,r8a77995-cmt1" for the 48-bit CMT devices included in r8a77995. | ||
58 | - "renesas,sh73a0-cmt0" for the 32-bit CMT0 device included in sh73a0. | ||
59 | - "renesas,sh73a0-cmt1" for the 48-bit CMT1 device included in sh73a0. | ||
60 | - "renesas,sh73a0-cmt2" for the 32-bit CMT2 device included in sh73a0. | ||
61 | - "renesas,sh73a0-cmt3" for the 32-bit CMT3 device included in sh73a0. | ||
62 | - "renesas,sh73a0-cmt4" for the 32-bit CMT4 device included in sh73a0. | ||
57 | 63 | ||
58 | - "renesas,rcar-gen2-cmt0" for 32-bit CMT0 devices included in R-Car Gen2 | 64 | - "renesas,rcar-gen2-cmt0" for 32-bit CMT0 devices included in R-Car Gen2 |
59 | and RZ/G1. | 65 | and RZ/G1. |
@@ -63,7 +69,7 @@ Required Properties: | |||
63 | listed above. | 69 | listed above. |
64 | - "renesas,rcar-gen3-cmt0" for 32-bit CMT0 devices included in R-Car Gen3 | 70 | - "renesas,rcar-gen3-cmt0" for 32-bit CMT0 devices included in R-Car Gen3 |
65 | and RZ/G2. | 71 | and RZ/G2. |
66 | - "renesas,rcar-gen3-cmt1" for 48-bit CMT1 devices included in R-Car Gen3 | 72 | - "renesas,rcar-gen3-cmt1" for 48-bit CMT devices included in R-Car Gen3 |
67 | and RZ/G2. | 73 | and RZ/G2. |
68 | These are fallbacks for R-Car Gen3 and RZ/G2 entries listed | 74 | These are fallbacks for R-Car Gen3 and RZ/G2 entries listed |
69 | above. | 75 | above. |
diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index 984ea7b3fd9f..5f9d0da196e1 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi | |||
@@ -546,6 +546,14 @@ | |||
546 | #pwm-cells = <2>; | 546 | #pwm-cells = <2>; |
547 | status = "disabled"; | 547 | status = "disabled"; |
548 | }; | 548 | }; |
549 | |||
550 | system_counter: timer@306a0000 { | ||
551 | compatible = "nxp,sysctr-timer"; | ||
552 | reg = <0x306a0000 0x20000>; | ||
553 | interrupts = <GIC_SPI 47 IRQ_TYPE_LEVEL_HIGH>; | ||
554 | clocks = <&osc_24m>; | ||
555 | clock-names = "per"; | ||
556 | }; | ||
549 | }; | 557 | }; |
550 | 558 | ||
551 | aips3: bus@30800000 { | 559 | aips3: bus@30800000 { |
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 046a0c8c8dd5..3f3594d9485c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi | |||
@@ -651,6 +651,14 @@ | |||
651 | #pwm-cells = <2>; | 651 | #pwm-cells = <2>; |
652 | status = "disabled"; | 652 | status = "disabled"; |
653 | }; | 653 | }; |
654 | |||
655 | system_counter: timer@306a0000 { | ||
656 | compatible = "nxp,sysctr-timer"; | ||
657 | reg = <0x306a0000 0x20000>; | ||
658 | interrupts = <GIC_SPI 47 IRQ_TYPE_LEVEL_HIGH>; | ||
659 | clocks = <&osc_25m>; | ||
660 | clock-names = "per"; | ||
661 | }; | ||
654 | }; | 662 | }; |
655 | 663 | ||
656 | bus@30800000 { /* AIPS3 */ | 664 | bus@30800000 { /* AIPS3 */ |
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 349a61d8bf34..f5937742b290 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c | |||
@@ -122,7 +122,7 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, | |||
122 | 122 | ||
123 | if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK)) | 123 | if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK)) |
124 | return vmf_insert_pfn(vma, vmf->address, | 124 | return vmf_insert_pfn(vma, vmf->address, |
125 | vmalloc_to_pfn(tsc_pg)); | 125 | virt_to_phys(tsc_pg) >> PAGE_SHIFT); |
126 | } | 126 | } |
127 | 127 | ||
128 | return VM_FAULT_SIGBUS; | 128 | return VM_FAULT_SIGBUS; |
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index d314cf1e15fd..2db3972c0e0f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c | |||
@@ -315,8 +315,6 @@ void __init hyperv_init(void) | |||
315 | 315 | ||
316 | x86_init.pci.arch_init = hv_pci_init; | 316 | x86_init.pci.arch_init = hv_pci_init; |
317 | 317 | ||
318 | /* Register Hyper-V specific clocksource */ | ||
319 | hv_init_clocksource(); | ||
320 | return; | 318 | return; |
321 | 319 | ||
322 | remove_cpuhp_state: | 320 | remove_cpuhp_state: |
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index ba71a63cdac4..e9ee139cf29e 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h | |||
@@ -51,7 +51,7 @@ extern struct pvclock_vsyscall_time_info pvclock_page | |||
51 | __attribute__((visibility("hidden"))); | 51 | __attribute__((visibility("hidden"))); |
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | #ifdef CONFIG_HYPERV_TSCPAGE | 54 | #ifdef CONFIG_HYPERV_TIMER |
55 | extern struct ms_hyperv_tsc_page hvclock_page | 55 | extern struct ms_hyperv_tsc_page hvclock_page |
56 | __attribute__((visibility("hidden"))); | 56 | __attribute__((visibility("hidden"))); |
57 | #endif | 57 | #endif |
@@ -228,7 +228,7 @@ static u64 vread_pvclock(void) | |||
228 | } | 228 | } |
229 | #endif | 229 | #endif |
230 | 230 | ||
231 | #ifdef CONFIG_HYPERV_TSCPAGE | 231 | #ifdef CONFIG_HYPERV_TIMER |
232 | static u64 vread_hvclock(void) | 232 | static u64 vread_hvclock(void) |
233 | { | 233 | { |
234 | return hv_read_tsc_page(&hvclock_page); | 234 | return hv_read_tsc_page(&hvclock_page); |
@@ -251,7 +251,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode) | |||
251 | return vread_pvclock(); | 251 | return vread_pvclock(); |
252 | } | 252 | } |
253 | #endif | 253 | #endif |
254 | #ifdef CONFIG_HYPERV_TSCPAGE | 254 | #ifdef CONFIG_HYPERV_TIMER |
255 | if (clock_mode == VCLOCK_HVCLOCK) { | 255 | if (clock_mode == VCLOCK_HVCLOCK) { |
256 | barrier(); | 256 | barrier(); |
257 | return vread_hvclock(); | 257 | return vread_hvclock(); |
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 062f77279ce3..267daad8c036 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/timer.h> | 29 | #include <asm/timer.h> |
30 | #include <asm/reboot.h> | 30 | #include <asm/reboot.h> |
31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <clocksource/hyperv_timer.h> | ||
32 | 33 | ||
33 | struct ms_hyperv_info ms_hyperv; | 34 | struct ms_hyperv_info ms_hyperv; |
34 | EXPORT_SYMBOL_GPL(ms_hyperv); | 35 | EXPORT_SYMBOL_GPL(ms_hyperv); |
@@ -338,6 +339,15 @@ static void __init ms_hyperv_init_platform(void) | |||
338 | x2apic_phys = 1; | 339 | x2apic_phys = 1; |
339 | # endif | 340 | # endif |
340 | 341 | ||
342 | /* Register Hyper-V specific clocksource */ | ||
343 | hv_init_clocksource(); | ||
344 | #endif | ||
345 | } | ||
346 | |||
347 | void hv_setup_sched_clock(void *sched_clock) | ||
348 | { | ||
349 | #ifdef CONFIG_PARAVIRT | ||
350 | pv_ops.time.sched_clock = sched_clock; | ||
341 | #endif | 351 | #endif |
342 | } | 352 | } |
343 | 353 | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e904ff06a83d..2a4f278f3b56 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -1598,7 +1598,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) | |||
1598 | likely(ns > apic->lapic_timer.timer_advance_ns)) { | 1598 | likely(ns > apic->lapic_timer.timer_advance_ns)) { |
1599 | expire = ktime_add_ns(now, ns); | 1599 | expire = ktime_add_ns(now, ns); |
1600 | expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); | 1600 | expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); |
1601 | hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS); | 1601 | hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD); |
1602 | } else | 1602 | } else |
1603 | apic_timer_expired(apic); | 1603 | apic_timer_expired(apic); |
1604 | 1604 | ||
@@ -2299,7 +2299,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) | |||
2299 | apic->vcpu = vcpu; | 2299 | apic->vcpu = vcpu; |
2300 | 2300 | ||
2301 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, | 2301 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, |
2302 | HRTIMER_MODE_ABS); | 2302 | HRTIMER_MODE_ABS_HARD); |
2303 | apic->lapic_timer.timer.function = apic_timer_fn; | 2303 | apic->lapic_timer.timer.function = apic_timer_fn; |
2304 | if (timer_advance_ns == -1) { | 2304 | if (timer_advance_ns == -1) { |
2305 | apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; | 2305 | apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; |
@@ -2484,7 +2484,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | |||
2484 | 2484 | ||
2485 | timer = &vcpu->arch.apic->lapic_timer.timer; | 2485 | timer = &vcpu->arch.apic->lapic_timer.timer; |
2486 | if (hrtimer_cancel(timer)) | 2486 | if (hrtimer_cancel(timer)) |
2487 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 2487 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD); |
2488 | } | 2488 | } |
2489 | 2489 | ||
2490 | /* | 2490 | /* |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 0835f4d8d42e..e0b849bfe74d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -3411,15 +3411,14 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, | |||
3411 | kt = nsecs; | 3411 | kt = nsecs; |
3412 | 3412 | ||
3413 | mode = HRTIMER_MODE_REL; | 3413 | mode = HRTIMER_MODE_REL; |
3414 | hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode); | 3414 | hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode); |
3415 | hrtimer_set_expires(&hs.timer, kt); | 3415 | hrtimer_set_expires(&hs.timer, kt); |
3416 | 3416 | ||
3417 | hrtimer_init_sleeper(&hs, current); | ||
3418 | do { | 3417 | do { |
3419 | if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE) | 3418 | if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE) |
3420 | break; | 3419 | break; |
3421 | set_current_state(TASK_UNINTERRUPTIBLE); | 3420 | set_current_state(TASK_UNINTERRUPTIBLE); |
3422 | hrtimer_start_expires(&hs.timer, mode); | 3421 | hrtimer_sleeper_start_expires(&hs, mode); |
3423 | if (hs.task) | 3422 | if (hs.task) |
3424 | io_schedule(); | 3423 | io_schedule(); |
3425 | hrtimer_cancel(&hs.timer); | 3424 | hrtimer_cancel(&hs.timer); |
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 5e9317dc3d39..a642c23b2fba 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig | |||
@@ -429,7 +429,7 @@ config ATMEL_ST | |||
429 | 429 | ||
430 | config ATMEL_TCB_CLKSRC | 430 | config ATMEL_TCB_CLKSRC |
431 | bool "Atmel TC Block timer driver" if COMPILE_TEST | 431 | bool "Atmel TC Block timer driver" if COMPILE_TEST |
432 | depends on HAS_IOMEM | 432 | depends on ARM && HAS_IOMEM |
433 | select TIMER_OF if OF | 433 | select TIMER_OF if OF |
434 | help | 434 | help |
435 | Support for Timer Counter Blocks on Atmel SoCs. | 435 | Support for Timer Counter Blocks on Atmel SoCs. |
diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c index 8e12b11e81b0..9039df4f90e2 100644 --- a/drivers/clocksource/em_sti.c +++ b/drivers/clocksource/em_sti.c | |||
@@ -291,10 +291,8 @@ static int em_sti_probe(struct platform_device *pdev) | |||
291 | platform_set_drvdata(pdev, p); | 291 | platform_set_drvdata(pdev, p); |
292 | 292 | ||
293 | irq = platform_get_irq(pdev, 0); | 293 | irq = platform_get_irq(pdev, 0); |
294 | if (irq < 0) { | 294 | if (irq < 0) |
295 | dev_err(&pdev->dev, "failed to get irq\n"); | ||
296 | return irq; | 295 | return irq; |
297 | } | ||
298 | 296 | ||
299 | /* map memory, let base point to the STI instance */ | 297 | /* map memory, let base point to the STI instance */ |
300 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | 298 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index ba2c79e6a0ee..2317d4e3daaf 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/mshyperv.h> | 22 | #include <asm/mshyperv.h> |
23 | 23 | ||
24 | static struct clock_event_device __percpu *hv_clock_event; | 24 | static struct clock_event_device __percpu *hv_clock_event; |
25 | static u64 hv_sched_clock_offset __ro_after_init; | ||
25 | 26 | ||
26 | /* | 27 | /* |
27 | * If false, we're using the old mechanism for stimer0 interrupts | 28 | * If false, we're using the old mechanism for stimer0 interrupts |
@@ -212,19 +213,17 @@ EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); | |||
212 | struct clocksource *hyperv_cs; | 213 | struct clocksource *hyperv_cs; |
213 | EXPORT_SYMBOL_GPL(hyperv_cs); | 214 | EXPORT_SYMBOL_GPL(hyperv_cs); |
214 | 215 | ||
215 | #ifdef CONFIG_HYPERV_TSCPAGE | 216 | static struct ms_hyperv_tsc_page tsc_pg __aligned(PAGE_SIZE); |
216 | |||
217 | static struct ms_hyperv_tsc_page *tsc_pg; | ||
218 | 217 | ||
219 | struct ms_hyperv_tsc_page *hv_get_tsc_page(void) | 218 | struct ms_hyperv_tsc_page *hv_get_tsc_page(void) |
220 | { | 219 | { |
221 | return tsc_pg; | 220 | return &tsc_pg; |
222 | } | 221 | } |
223 | EXPORT_SYMBOL_GPL(hv_get_tsc_page); | 222 | EXPORT_SYMBOL_GPL(hv_get_tsc_page); |
224 | 223 | ||
225 | static u64 notrace read_hv_sched_clock_tsc(void) | 224 | static u64 notrace read_hv_clock_tsc(struct clocksource *arg) |
226 | { | 225 | { |
227 | u64 current_tick = hv_read_tsc_page(tsc_pg); | 226 | u64 current_tick = hv_read_tsc_page(&tsc_pg); |
228 | 227 | ||
229 | if (current_tick == U64_MAX) | 228 | if (current_tick == U64_MAX) |
230 | hv_get_time_ref_count(current_tick); | 229 | hv_get_time_ref_count(current_tick); |
@@ -232,9 +231,9 @@ static u64 notrace read_hv_sched_clock_tsc(void) | |||
232 | return current_tick; | 231 | return current_tick; |
233 | } | 232 | } |
234 | 233 | ||
235 | static u64 read_hv_clock_tsc(struct clocksource *arg) | 234 | static u64 read_hv_sched_clock_tsc(void) |
236 | { | 235 | { |
237 | return read_hv_sched_clock_tsc(); | 236 | return read_hv_clock_tsc(NULL) - hv_sched_clock_offset; |
238 | } | 237 | } |
239 | 238 | ||
240 | static struct clocksource hyperv_cs_tsc = { | 239 | static struct clocksource hyperv_cs_tsc = { |
@@ -244,9 +243,8 @@ static struct clocksource hyperv_cs_tsc = { | |||
244 | .mask = CLOCKSOURCE_MASK(64), | 243 | .mask = CLOCKSOURCE_MASK(64), |
245 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 244 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
246 | }; | 245 | }; |
247 | #endif | ||
248 | 246 | ||
249 | static u64 notrace read_hv_sched_clock_msr(void) | 247 | static u64 notrace read_hv_clock_msr(struct clocksource *arg) |
250 | { | 248 | { |
251 | u64 current_tick; | 249 | u64 current_tick; |
252 | /* | 250 | /* |
@@ -258,9 +256,9 @@ static u64 notrace read_hv_sched_clock_msr(void) | |||
258 | return current_tick; | 256 | return current_tick; |
259 | } | 257 | } |
260 | 258 | ||
261 | static u64 read_hv_clock_msr(struct clocksource *arg) | 259 | static u64 read_hv_sched_clock_msr(void) |
262 | { | 260 | { |
263 | return read_hv_sched_clock_msr(); | 261 | return read_hv_clock_msr(NULL) - hv_sched_clock_offset; |
264 | } | 262 | } |
265 | 263 | ||
266 | static struct clocksource hyperv_cs_msr = { | 264 | static struct clocksource hyperv_cs_msr = { |
@@ -271,7 +269,6 @@ static struct clocksource hyperv_cs_msr = { | |||
271 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 269 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
272 | }; | 270 | }; |
273 | 271 | ||
274 | #ifdef CONFIG_HYPERV_TSCPAGE | ||
275 | static bool __init hv_init_tsc_clocksource(void) | 272 | static bool __init hv_init_tsc_clocksource(void) |
276 | { | 273 | { |
277 | u64 tsc_msr; | 274 | u64 tsc_msr; |
@@ -280,12 +277,8 @@ static bool __init hv_init_tsc_clocksource(void) | |||
280 | if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) | 277 | if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) |
281 | return false; | 278 | return false; |
282 | 279 | ||
283 | tsc_pg = vmalloc(PAGE_SIZE); | ||
284 | if (!tsc_pg) | ||
285 | return false; | ||
286 | |||
287 | hyperv_cs = &hyperv_cs_tsc; | 280 | hyperv_cs = &hyperv_cs_tsc; |
288 | phys_addr = page_to_phys(vmalloc_to_page(tsc_pg)); | 281 | phys_addr = virt_to_phys(&tsc_pg); |
289 | 282 | ||
290 | /* | 283 | /* |
291 | * The Hyper-V TLFS specifies to preserve the value of reserved | 284 | * The Hyper-V TLFS specifies to preserve the value of reserved |
@@ -302,17 +295,11 @@ static bool __init hv_init_tsc_clocksource(void) | |||
302 | hv_set_clocksource_vdso(hyperv_cs_tsc); | 295 | hv_set_clocksource_vdso(hyperv_cs_tsc); |
303 | clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); | 296 | clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); |
304 | 297 | ||
305 | /* sched_clock_register is needed on ARM64 but is a no-op on x86 */ | 298 | hv_sched_clock_offset = hyperv_cs->read(hyperv_cs); |
306 | sched_clock_register(read_hv_sched_clock_tsc, 64, HV_CLOCK_HZ); | 299 | hv_setup_sched_clock(read_hv_sched_clock_tsc); |
300 | |||
307 | return true; | 301 | return true; |
308 | } | 302 | } |
309 | #else | ||
310 | static bool __init hv_init_tsc_clocksource(void) | ||
311 | { | ||
312 | return false; | ||
313 | } | ||
314 | #endif | ||
315 | |||
316 | 303 | ||
317 | void __init hv_init_clocksource(void) | 304 | void __init hv_init_clocksource(void) |
318 | { | 305 | { |
@@ -333,7 +320,7 @@ void __init hv_init_clocksource(void) | |||
333 | hyperv_cs = &hyperv_cs_msr; | 320 | hyperv_cs = &hyperv_cs_msr; |
334 | clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); | 321 | clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); |
335 | 322 | ||
336 | /* sched_clock_register is needed on ARM64 but is a no-op on x86 */ | 323 | hv_sched_clock_offset = hyperv_cs->read(hyperv_cs); |
337 | sched_clock_register(read_hv_sched_clock_msr, 64, HV_CLOCK_HZ); | 324 | hv_setup_sched_clock(read_hv_sched_clock_msr); |
338 | } | 325 | } |
339 | EXPORT_SYMBOL_GPL(hv_init_clocksource); | 326 | EXPORT_SYMBOL_GPL(hv_init_clocksource); |
diff --git a/drivers/clocksource/renesas-ostm.c b/drivers/clocksource/renesas-ostm.c index 61d5f3b539ce..37c39b901bb1 100644 --- a/drivers/clocksource/renesas-ostm.c +++ b/drivers/clocksource/renesas-ostm.c | |||
@@ -221,7 +221,7 @@ static int __init ostm_init(struct device_node *np) | |||
221 | } | 221 | } |
222 | 222 | ||
223 | rate = clk_get_rate(ostm_clk); | 223 | rate = clk_get_rate(ostm_clk); |
224 | ostm->ticks_per_jiffy = (rate + HZ / 2) / HZ; | 224 | ostm->ticks_per_jiffy = DIV_ROUND_CLOSEST(rate, HZ); |
225 | 225 | ||
226 | /* | 226 | /* |
227 | * First probed device will be used as system clocksource. Any | 227 | * First probed device will be used as system clocksource. Any |
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 55d3e03f2cd4..ef773db080e9 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c | |||
@@ -776,11 +776,8 @@ static int sh_cmt_register_clockevent(struct sh_cmt_channel *ch, | |||
776 | int ret; | 776 | int ret; |
777 | 777 | ||
778 | irq = platform_get_irq(ch->cmt->pdev, ch->index); | 778 | irq = platform_get_irq(ch->cmt->pdev, ch->index); |
779 | if (irq < 0) { | 779 | if (irq < 0) |
780 | dev_err(&ch->cmt->pdev->dev, "ch%u: failed to get irq\n", | ||
781 | ch->index); | ||
782 | return irq; | 780 | return irq; |
783 | } | ||
784 | 781 | ||
785 | ret = request_irq(irq, sh_cmt_interrupt, | 782 | ret = request_irq(irq, sh_cmt_interrupt, |
786 | IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, | 783 | IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, |
@@ -921,13 +918,25 @@ static const struct platform_device_id sh_cmt_id_table[] = { | |||
921 | MODULE_DEVICE_TABLE(platform, sh_cmt_id_table); | 918 | MODULE_DEVICE_TABLE(platform, sh_cmt_id_table); |
922 | 919 | ||
923 | static const struct of_device_id sh_cmt_of_table[] __maybe_unused = { | 920 | static const struct of_device_id sh_cmt_of_table[] __maybe_unused = { |
924 | { .compatible = "renesas,cmt-48", .data = &sh_cmt_info[SH_CMT_48BIT] }, | 921 | { |
922 | /* deprecated, preserved for backward compatibility */ | ||
923 | .compatible = "renesas,cmt-48", | ||
924 | .data = &sh_cmt_info[SH_CMT_48BIT] | ||
925 | }, | ||
925 | { | 926 | { |
926 | /* deprecated, preserved for backward compatibility */ | 927 | /* deprecated, preserved for backward compatibility */ |
927 | .compatible = "renesas,cmt-48-gen2", | 928 | .compatible = "renesas,cmt-48-gen2", |
928 | .data = &sh_cmt_info[SH_CMT0_RCAR_GEN2] | 929 | .data = &sh_cmt_info[SH_CMT0_RCAR_GEN2] |
929 | }, | 930 | }, |
930 | { | 931 | { |
932 | .compatible = "renesas,r8a7740-cmt1", | ||
933 | .data = &sh_cmt_info[SH_CMT_48BIT] | ||
934 | }, | ||
935 | { | ||
936 | .compatible = "renesas,sh73a0-cmt1", | ||
937 | .data = &sh_cmt_info[SH_CMT_48BIT] | ||
938 | }, | ||
939 | { | ||
931 | .compatible = "renesas,rcar-gen2-cmt0", | 940 | .compatible = "renesas,rcar-gen2-cmt0", |
932 | .data = &sh_cmt_info[SH_CMT0_RCAR_GEN2] | 941 | .data = &sh_cmt_info[SH_CMT0_RCAR_GEN2] |
933 | }, | 942 | }, |
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index 49f1c805fc95..8c4f3753b36e 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c | |||
@@ -462,11 +462,8 @@ static int sh_tmu_channel_setup(struct sh_tmu_channel *ch, unsigned int index, | |||
462 | ch->base = tmu->mapbase + 8 + ch->index * 12; | 462 | ch->base = tmu->mapbase + 8 + ch->index * 12; |
463 | 463 | ||
464 | ch->irq = platform_get_irq(tmu->pdev, index); | 464 | ch->irq = platform_get_irq(tmu->pdev, index); |
465 | if (ch->irq < 0) { | 465 | if (ch->irq < 0) |
466 | dev_err(&tmu->pdev->dev, "ch%u: failed to get irq\n", | ||
467 | ch->index); | ||
468 | return ch->irq; | 466 | return ch->irq; |
469 | } | ||
470 | 467 | ||
471 | ch->cs_enabled = false; | 468 | ch->cs_enabled = false; |
472 | ch->enable_count = 0; | 469 | ch->enable_count = 0; |
diff --git a/drivers/clocksource/timer-atmel-tcb.c b/drivers/clocksource/timer-atmel-tcb.c index 6ed31f9def7e..7427b07495a8 100644 --- a/drivers/clocksource/timer-atmel-tcb.c +++ b/drivers/clocksource/timer-atmel-tcb.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/irq.h> | 6 | #include <linux/irq.h> |
7 | 7 | ||
8 | #include <linux/clk.h> | 8 | #include <linux/clk.h> |
9 | #include <linux/delay.h> | ||
9 | #include <linux/err.h> | 10 | #include <linux/err.h> |
10 | #include <linux/ioport.h> | 11 | #include <linux/ioport.h> |
11 | #include <linux/io.h> | 12 | #include <linux/io.h> |
@@ -125,6 +126,18 @@ static u64 notrace tc_sched_clock_read32(void) | |||
125 | return tc_get_cycles32(&clksrc); | 126 | return tc_get_cycles32(&clksrc); |
126 | } | 127 | } |
127 | 128 | ||
129 | static struct delay_timer tc_delay_timer; | ||
130 | |||
131 | static unsigned long tc_delay_timer_read(void) | ||
132 | { | ||
133 | return tc_get_cycles(&clksrc); | ||
134 | } | ||
135 | |||
136 | static unsigned long notrace tc_delay_timer_read32(void) | ||
137 | { | ||
138 | return tc_get_cycles32(&clksrc); | ||
139 | } | ||
140 | |||
128 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | 141 | #ifdef CONFIG_GENERIC_CLOCKEVENTS |
129 | 142 | ||
130 | struct tc_clkevt_device { | 143 | struct tc_clkevt_device { |
@@ -432,6 +445,7 @@ static int __init tcb_clksrc_init(struct device_node *node) | |||
432 | /* setup ony channel 0 */ | 445 | /* setup ony channel 0 */ |
433 | tcb_setup_single_chan(&tc, best_divisor_idx); | 446 | tcb_setup_single_chan(&tc, best_divisor_idx); |
434 | tc_sched_clock = tc_sched_clock_read32; | 447 | tc_sched_clock = tc_sched_clock_read32; |
448 | tc_delay_timer.read_current_timer = tc_delay_timer_read32; | ||
435 | } else { | 449 | } else { |
436 | /* we have three clocks no matter what the | 450 | /* we have three clocks no matter what the |
437 | * underlying platform supports. | 451 | * underlying platform supports. |
@@ -444,6 +458,7 @@ static int __init tcb_clksrc_init(struct device_node *node) | |||
444 | /* setup both channel 0 & 1 */ | 458 | /* setup both channel 0 & 1 */ |
445 | tcb_setup_dual_chan(&tc, best_divisor_idx); | 459 | tcb_setup_dual_chan(&tc, best_divisor_idx); |
446 | tc_sched_clock = tc_sched_clock_read; | 460 | tc_sched_clock = tc_sched_clock_read; |
461 | tc_delay_timer.read_current_timer = tc_delay_timer_read; | ||
447 | } | 462 | } |
448 | 463 | ||
449 | /* and away we go! */ | 464 | /* and away we go! */ |
@@ -458,6 +473,9 @@ static int __init tcb_clksrc_init(struct device_node *node) | |||
458 | 473 | ||
459 | sched_clock_register(tc_sched_clock, 32, divided_rate); | 474 | sched_clock_register(tc_sched_clock, 32, divided_rate); |
460 | 475 | ||
476 | tc_delay_timer.freq = divided_rate; | ||
477 | register_current_timer_delay(&tc_delay_timer); | ||
478 | |||
461 | return 0; | 479 | return 0; |
462 | 480 | ||
463 | err_unregister_clksrc: | 481 | err_unregister_clksrc: |
diff --git a/drivers/clocksource/timer-imx-sysctr.c b/drivers/clocksource/timer-imx-sysctr.c index fd7d68066efb..b7c80a368a1b 100644 --- a/drivers/clocksource/timer-imx-sysctr.c +++ b/drivers/clocksource/timer-imx-sysctr.c | |||
@@ -20,6 +20,8 @@ | |||
20 | #define SYS_CTR_EN 0x1 | 20 | #define SYS_CTR_EN 0x1 |
21 | #define SYS_CTR_IRQ_MASK 0x2 | 21 | #define SYS_CTR_IRQ_MASK 0x2 |
22 | 22 | ||
23 | #define SYS_CTR_CLK_DIV 0x3 | ||
24 | |||
23 | static void __iomem *sys_ctr_base; | 25 | static void __iomem *sys_ctr_base; |
24 | static u32 cmpcr; | 26 | static u32 cmpcr; |
25 | 27 | ||
@@ -134,6 +136,9 @@ static int __init sysctr_timer_init(struct device_node *np) | |||
134 | if (ret) | 136 | if (ret) |
135 | return ret; | 137 | return ret; |
136 | 138 | ||
139 | /* system counter clock is divided by 3 internally */ | ||
140 | to_sysctr.of_clk.rate /= SYS_CTR_CLK_DIV; | ||
141 | |||
137 | sys_ctr_base = timer_of_base(&to_sysctr); | 142 | sys_ctr_base = timer_of_base(&to_sysctr); |
138 | cmpcr = readl(sys_ctr_base + CMPCR); | 143 | cmpcr = readl(sys_ctr_base + CMPCR); |
139 | cmpcr &= ~SYS_CTR_EN; | 144 | cmpcr &= ~SYS_CTR_EN; |
diff --git a/drivers/clocksource/timer-npcm7xx.c b/drivers/clocksource/timer-npcm7xx.c index 8a30da7f083b..9780ffd8010e 100644 --- a/drivers/clocksource/timer-npcm7xx.c +++ b/drivers/clocksource/timer-npcm7xx.c | |||
@@ -32,7 +32,7 @@ | |||
32 | #define NPCM7XX_Tx_INTEN BIT(29) | 32 | #define NPCM7XX_Tx_INTEN BIT(29) |
33 | #define NPCM7XX_Tx_COUNTEN BIT(30) | 33 | #define NPCM7XX_Tx_COUNTEN BIT(30) |
34 | #define NPCM7XX_Tx_ONESHOT 0x0 | 34 | #define NPCM7XX_Tx_ONESHOT 0x0 |
35 | #define NPCM7XX_Tx_OPER GENMASK(27, 3) | 35 | #define NPCM7XX_Tx_OPER GENMASK(28, 27) |
36 | #define NPCM7XX_Tx_MIN_PRESCALE 0x1 | 36 | #define NPCM7XX_Tx_MIN_PRESCALE 0x1 |
37 | #define NPCM7XX_Tx_TDR_MASK_BITS 24 | 37 | #define NPCM7XX_Tx_TDR_MASK_BITS 24 |
38 | #define NPCM7XX_Tx_MAX_CNT 0xFFFFFF | 38 | #define NPCM7XX_Tx_MAX_CNT 0xFFFFFF |
@@ -84,8 +84,6 @@ static int npcm7xx_timer_oneshot(struct clock_event_device *evt) | |||
84 | 84 | ||
85 | val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); | 85 | val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); |
86 | val &= ~NPCM7XX_Tx_OPER; | 86 | val &= ~NPCM7XX_Tx_OPER; |
87 | |||
88 | val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); | ||
89 | val |= NPCM7XX_START_ONESHOT_Tx; | 87 | val |= NPCM7XX_START_ONESHOT_Tx; |
90 | writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); | 88 | writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); |
91 | 89 | ||
@@ -97,12 +95,11 @@ static int npcm7xx_timer_periodic(struct clock_event_device *evt) | |||
97 | struct timer_of *to = to_timer_of(evt); | 95 | struct timer_of *to = to_timer_of(evt); |
98 | u32 val; | 96 | u32 val; |
99 | 97 | ||
98 | writel(timer_of_period(to), timer_of_base(to) + NPCM7XX_REG_TICR0); | ||
99 | |||
100 | val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); | 100 | val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); |
101 | val &= ~NPCM7XX_Tx_OPER; | 101 | val &= ~NPCM7XX_Tx_OPER; |
102 | |||
103 | writel(timer_of_period(to), timer_of_base(to) + NPCM7XX_REG_TICR0); | ||
104 | val |= NPCM7XX_START_PERIODIC_Tx; | 102 | val |= NPCM7XX_START_PERIODIC_Tx; |
105 | |||
106 | writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); | 103 | writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); |
107 | 104 | ||
108 | return 0; | 105 | return 0; |
diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index 80542289fae7..d8c2bd4391d0 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c | |||
@@ -113,8 +113,10 @@ static __init int timer_of_clk_init(struct device_node *np, | |||
113 | of_clk->clk = of_clk->name ? of_clk_get_by_name(np, of_clk->name) : | 113 | of_clk->clk = of_clk->name ? of_clk_get_by_name(np, of_clk->name) : |
114 | of_clk_get(np, of_clk->index); | 114 | of_clk_get(np, of_clk->index); |
115 | if (IS_ERR(of_clk->clk)) { | 115 | if (IS_ERR(of_clk->clk)) { |
116 | pr_err("Failed to get clock for %pOF\n", np); | 116 | ret = PTR_ERR(of_clk->clk); |
117 | return PTR_ERR(of_clk->clk); | 117 | if (ret != -EPROBE_DEFER) |
118 | pr_err("Failed to get clock for %pOF\n", np); | ||
119 | goto out; | ||
118 | } | 120 | } |
119 | 121 | ||
120 | ret = clk_prepare_enable(of_clk->clk); | 122 | ret = clk_prepare_enable(of_clk->clk); |
diff --git a/drivers/clocksource/timer-probe.c b/drivers/clocksource/timer-probe.c index dda1946e84dd..ee9574da53c0 100644 --- a/drivers/clocksource/timer-probe.c +++ b/drivers/clocksource/timer-probe.c | |||
@@ -29,7 +29,9 @@ void __init timer_probe(void) | |||
29 | 29 | ||
30 | ret = init_func_ret(np); | 30 | ret = init_func_ret(np); |
31 | if (ret) { | 31 | if (ret) { |
32 | pr_err("Failed to initialize '%pOF': %d\n", np, ret); | 32 | if (ret != -EPROBE_DEFER) |
33 | pr_err("Failed to initialize '%pOF': %d\n", np, | ||
34 | ret); | ||
33 | continue; | 35 | continue; |
34 | } | 36 | } |
35 | 37 | ||
diff --git a/drivers/clocksource/timer-sun4i.c b/drivers/clocksource/timer-sun4i.c index 65f38f6ca714..0ba8155b8287 100644 --- a/drivers/clocksource/timer-sun4i.c +++ b/drivers/clocksource/timer-sun4i.c | |||
@@ -219,5 +219,9 @@ static int __init sun4i_timer_init(struct device_node *node) | |||
219 | } | 219 | } |
220 | TIMER_OF_DECLARE(sun4i, "allwinner,sun4i-a10-timer", | 220 | TIMER_OF_DECLARE(sun4i, "allwinner,sun4i-a10-timer", |
221 | sun4i_timer_init); | 221 | sun4i_timer_init); |
222 | TIMER_OF_DECLARE(sun8i_a23, "allwinner,sun8i-a23-timer", | ||
223 | sun4i_timer_init); | ||
224 | TIMER_OF_DECLARE(sun8i_v3s, "allwinner,sun8i-v3s-timer", | ||
225 | sun4i_timer_init); | ||
222 | TIMER_OF_DECLARE(suniv, "allwinner,suniv-f1c100s-timer", | 226 | TIMER_OF_DECLARE(suniv, "allwinner,suniv-f1c100s-timer", |
223 | sun4i_timer_init); | 227 | sun4i_timer_init); |
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 9a59957922d4..79e5356a737a 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig | |||
@@ -14,9 +14,6 @@ config HYPERV | |||
14 | config HYPERV_TIMER | 14 | config HYPERV_TIMER |
15 | def_bool HYPERV | 15 | def_bool HYPERV |
16 | 16 | ||
17 | config HYPERV_TSCPAGE | ||
18 | def_bool HYPERV && X86_64 | ||
19 | |||
20 | config HYPERV_UTILS | 17 | config HYPERV_UTILS |
21 | tristate "Microsoft Hyper-V Utilities driver" | 18 | tristate "Microsoft Hyper-V Utilities driver" |
22 | depends on HYPERV && CONNECTOR && NLS | 19 | depends on HYPERV && CONNECTOR && NLS |
diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c index 00a1ec7b9154..1240bb0317d9 100644 --- a/drivers/staging/android/vsoc.c +++ b/drivers/staging/android/vsoc.c | |||
@@ -437,12 +437,10 @@ static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) | |||
437 | return -EINVAL; | 437 | return -EINVAL; |
438 | wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec); | 438 | wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec); |
439 | 439 | ||
440 | hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC, | 440 | hrtimer_init_sleeper_on_stack(to, CLOCK_MONOTONIC, |
441 | HRTIMER_MODE_ABS); | 441 | HRTIMER_MODE_ABS); |
442 | hrtimer_set_expires_range_ns(&to->timer, wake_time, | 442 | hrtimer_set_expires_range_ns(&to->timer, wake_time, |
443 | current->timer_slack_ns); | 443 | current->timer_slack_ns); |
444 | |||
445 | hrtimer_init_sleeper(to, current); | ||
446 | } | 444 | } |
447 | 445 | ||
448 | while (1) { | 446 | while (1) { |
@@ -460,7 +458,7 @@ static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) | |||
460 | break; | 458 | break; |
461 | } | 459 | } |
462 | if (to) { | 460 | if (to) { |
463 | hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); | 461 | hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS); |
464 | if (likely(to->task)) | 462 | if (likely(to->task)) |
465 | freezable_schedule(); | 463 | freezable_schedule(); |
466 | hrtimer_cancel(&to->timer); | 464 | hrtimer_cancel(&to->timer); |
diff --git a/fs/timerfd.c b/fs/timerfd.c index 6a6fc8aa1de7..48305ba41e3c 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -471,7 +471,11 @@ static int do_timerfd_settime(int ufd, int flags, | |||
471 | break; | 471 | break; |
472 | } | 472 | } |
473 | spin_unlock_irq(&ctx->wqh.lock); | 473 | spin_unlock_irq(&ctx->wqh.lock); |
474 | cpu_relax(); | 474 | |
475 | if (isalarm(ctx)) | ||
476 | hrtimer_cancel_wait_running(&ctx->t.alarm.timer); | ||
477 | else | ||
478 | hrtimer_cancel_wait_running(&ctx->t.tmr); | ||
475 | } | 479 | } |
476 | 480 | ||
477 | /* | 481 | /* |
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 0becb7d9704d..18d8e2d8210f 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h | |||
@@ -167,6 +167,7 @@ void hyperv_report_panic(struct pt_regs *regs, long err); | |||
167 | void hyperv_report_panic_msg(phys_addr_t pa, size_t size); | 167 | void hyperv_report_panic_msg(phys_addr_t pa, size_t size); |
168 | bool hv_is_hyperv_initialized(void); | 168 | bool hv_is_hyperv_initialized(void); |
169 | void hyperv_cleanup(void); | 169 | void hyperv_cleanup(void); |
170 | void hv_setup_sched_clock(void *sched_clock); | ||
170 | #else /* CONFIG_HYPERV */ | 171 | #else /* CONFIG_HYPERV */ |
171 | static inline bool hv_is_hyperv_initialized(void) { return false; } | 172 | static inline bool hv_is_hyperv_initialized(void) { return false; } |
172 | static inline void hyperv_cleanup(void) {} | 173 | static inline void hyperv_cleanup(void) {} |
diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index a821deb8ecb2..422f5e5237be 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h | |||
@@ -28,12 +28,10 @@ extern void hv_stimer_cleanup(unsigned int cpu); | |||
28 | extern void hv_stimer_global_cleanup(void); | 28 | extern void hv_stimer_global_cleanup(void); |
29 | extern void hv_stimer0_isr(void); | 29 | extern void hv_stimer0_isr(void); |
30 | 30 | ||
31 | #if IS_ENABLED(CONFIG_HYPERV) | 31 | #ifdef CONFIG_HYPERV_TIMER |
32 | extern struct clocksource *hyperv_cs; | 32 | extern struct clocksource *hyperv_cs; |
33 | extern void hv_init_clocksource(void); | 33 | extern void hv_init_clocksource(void); |
34 | #endif /* CONFIG_HYPERV */ | ||
35 | 34 | ||
36 | #ifdef CONFIG_HYPERV_TSCPAGE | ||
37 | extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void); | 35 | extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void); |
38 | 36 | ||
39 | static inline notrace u64 | 37 | static inline notrace u64 |
@@ -91,7 +89,7 @@ hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) | |||
91 | return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); | 89 | return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); |
92 | } | 90 | } |
93 | 91 | ||
94 | #else /* CONFIG_HYPERV_TSC_PAGE */ | 92 | #else /* CONFIG_HYPERV_TIMER */ |
95 | static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) | 93 | static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) |
96 | { | 94 | { |
97 | return NULL; | 95 | return NULL; |
@@ -102,6 +100,6 @@ static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, | |||
102 | { | 100 | { |
103 | return U64_MAX; | 101 | return U64_MAX; |
104 | } | 102 | } |
105 | #endif /* CONFIG_HYPERV_TSCPAGE */ | 103 | #endif /* CONFIG_HYPERV_TIMER */ |
106 | 104 | ||
107 | #endif | 105 | #endif |
diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 0760ca1cb009..74748e306f4b 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h | |||
@@ -5,7 +5,8 @@ | |||
5 | #include <linux/time.h> | 5 | #include <linux/time.h> |
6 | #include <linux/hrtimer.h> | 6 | #include <linux/hrtimer.h> |
7 | #include <linux/timerqueue.h> | 7 | #include <linux/timerqueue.h> |
8 | #include <linux/rtc.h> | 8 | |
9 | struct rtc_device; | ||
9 | 10 | ||
10 | enum alarmtimer_type { | 11 | enum alarmtimer_type { |
11 | ALARM_REALTIME, | 12 | ALARM_REALTIME, |
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4971100a8cab..1b9a51a1bccb 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h | |||
@@ -32,12 +32,15 @@ struct hrtimer_cpu_base; | |||
32 | * when starting the timer) | 32 | * when starting the timer) |
33 | * HRTIMER_MODE_SOFT - Timer callback function will be executed in | 33 | * HRTIMER_MODE_SOFT - Timer callback function will be executed in |
34 | * soft irq context | 34 | * soft irq context |
35 | * HRTIMER_MODE_HARD - Timer callback function will be executed in | ||
36 | * hard irq context even on PREEMPT_RT. | ||
35 | */ | 37 | */ |
36 | enum hrtimer_mode { | 38 | enum hrtimer_mode { |
37 | HRTIMER_MODE_ABS = 0x00, | 39 | HRTIMER_MODE_ABS = 0x00, |
38 | HRTIMER_MODE_REL = 0x01, | 40 | HRTIMER_MODE_REL = 0x01, |
39 | HRTIMER_MODE_PINNED = 0x02, | 41 | HRTIMER_MODE_PINNED = 0x02, |
40 | HRTIMER_MODE_SOFT = 0x04, | 42 | HRTIMER_MODE_SOFT = 0x04, |
43 | HRTIMER_MODE_HARD = 0x08, | ||
41 | 44 | ||
42 | HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, | 45 | HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, |
43 | HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, | 46 | HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, |
@@ -48,6 +51,11 @@ enum hrtimer_mode { | |||
48 | HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, | 51 | HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, |
49 | HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, | 52 | HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, |
50 | 53 | ||
54 | HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD, | ||
55 | HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD, | ||
56 | |||
57 | HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD, | ||
58 | HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD, | ||
51 | }; | 59 | }; |
52 | 60 | ||
53 | /* | 61 | /* |
@@ -101,6 +109,8 @@ enum hrtimer_restart { | |||
101 | * @state: state information (See bit values above) | 109 | * @state: state information (See bit values above) |
102 | * @is_rel: Set if the timer was armed relative | 110 | * @is_rel: Set if the timer was armed relative |
103 | * @is_soft: Set if hrtimer will be expired in soft interrupt context. | 111 | * @is_soft: Set if hrtimer will be expired in soft interrupt context. |
112 | * @is_hard: Set if hrtimer will be expired in hard interrupt context | ||
113 | * even on RT. | ||
104 | * | 114 | * |
105 | * The hrtimer structure must be initialized by hrtimer_init() | 115 | * The hrtimer structure must be initialized by hrtimer_init() |
106 | */ | 116 | */ |
@@ -112,6 +122,7 @@ struct hrtimer { | |||
112 | u8 state; | 122 | u8 state; |
113 | u8 is_rel; | 123 | u8 is_rel; |
114 | u8 is_soft; | 124 | u8 is_soft; |
125 | u8 is_hard; | ||
115 | }; | 126 | }; |
116 | 127 | ||
117 | /** | 128 | /** |
@@ -183,6 +194,10 @@ enum hrtimer_base_type { | |||
183 | * @nr_retries: Total number of hrtimer interrupt retries | 194 | * @nr_retries: Total number of hrtimer interrupt retries |
184 | * @nr_hangs: Total number of hrtimer interrupt hangs | 195 | * @nr_hangs: Total number of hrtimer interrupt hangs |
185 | * @max_hang_time: Maximum time spent in hrtimer_interrupt | 196 | * @max_hang_time: Maximum time spent in hrtimer_interrupt |
197 | * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are | ||
198 | * expired | ||
199 | * @timer_waiters: A hrtimer_cancel() invocation waits for the timer | ||
200 | * callback to finish. | ||
186 | * @expires_next: absolute time of the next event, is required for remote | 201 | * @expires_next: absolute time of the next event, is required for remote |
187 | * hrtimer enqueue; it is the total first expiry time (hard | 202 | * hrtimer enqueue; it is the total first expiry time (hard |
188 | * and soft hrtimer are taken into account) | 203 | * and soft hrtimer are taken into account) |
@@ -210,6 +225,10 @@ struct hrtimer_cpu_base { | |||
210 | unsigned short nr_hangs; | 225 | unsigned short nr_hangs; |
211 | unsigned int max_hang_time; | 226 | unsigned int max_hang_time; |
212 | #endif | 227 | #endif |
228 | #ifdef CONFIG_PREEMPT_RT | ||
229 | spinlock_t softirq_expiry_lock; | ||
230 | atomic_t timer_waiters; | ||
231 | #endif | ||
213 | ktime_t expires_next; | 232 | ktime_t expires_next; |
214 | struct hrtimer *next_timer; | 233 | struct hrtimer *next_timer; |
215 | ktime_t softirq_expires_next; | 234 | ktime_t softirq_expires_next; |
@@ -341,16 +360,29 @@ extern void hrtimers_resume(void); | |||
341 | 360 | ||
342 | DECLARE_PER_CPU(struct tick_device, tick_cpu_device); | 361 | DECLARE_PER_CPU(struct tick_device, tick_cpu_device); |
343 | 362 | ||
363 | #ifdef CONFIG_PREEMPT_RT | ||
364 | void hrtimer_cancel_wait_running(const struct hrtimer *timer); | ||
365 | #else | ||
366 | static inline void hrtimer_cancel_wait_running(struct hrtimer *timer) | ||
367 | { | ||
368 | cpu_relax(); | ||
369 | } | ||
370 | #endif | ||
344 | 371 | ||
345 | /* Exported timer functions: */ | 372 | /* Exported timer functions: */ |
346 | 373 | ||
347 | /* Initialize timers: */ | 374 | /* Initialize timers: */ |
348 | extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, | 375 | extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, |
349 | enum hrtimer_mode mode); | 376 | enum hrtimer_mode mode); |
377 | extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, | ||
378 | enum hrtimer_mode mode); | ||
350 | 379 | ||
351 | #ifdef CONFIG_DEBUG_OBJECTS_TIMERS | 380 | #ifdef CONFIG_DEBUG_OBJECTS_TIMERS |
352 | extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, | 381 | extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, |
353 | enum hrtimer_mode mode); | 382 | enum hrtimer_mode mode); |
383 | extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, | ||
384 | clockid_t clock_id, | ||
385 | enum hrtimer_mode mode); | ||
354 | 386 | ||
355 | extern void destroy_hrtimer_on_stack(struct hrtimer *timer); | 387 | extern void destroy_hrtimer_on_stack(struct hrtimer *timer); |
356 | #else | 388 | #else |
@@ -360,6 +392,14 @@ static inline void hrtimer_init_on_stack(struct hrtimer *timer, | |||
360 | { | 392 | { |
361 | hrtimer_init(timer, which_clock, mode); | 393 | hrtimer_init(timer, which_clock, mode); |
362 | } | 394 | } |
395 | |||
396 | static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, | ||
397 | clockid_t clock_id, | ||
398 | enum hrtimer_mode mode) | ||
399 | { | ||
400 | hrtimer_init_sleeper(sl, clock_id, mode); | ||
401 | } | ||
402 | |||
363 | static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } | 403 | static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } |
364 | #endif | 404 | #endif |
365 | 405 | ||
@@ -395,6 +435,9 @@ static inline void hrtimer_start_expires(struct hrtimer *timer, | |||
395 | hrtimer_start_range_ns(timer, soft, delta, mode); | 435 | hrtimer_start_range_ns(timer, soft, delta, mode); |
396 | } | 436 | } |
397 | 437 | ||
438 | void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, | ||
439 | enum hrtimer_mode mode); | ||
440 | |||
398 | static inline void hrtimer_restart(struct hrtimer *timer) | 441 | static inline void hrtimer_restart(struct hrtimer *timer) |
399 | { | 442 | { |
400 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 443 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
@@ -463,11 +506,8 @@ extern long hrtimer_nanosleep(const struct timespec64 *rqtp, | |||
463 | const enum hrtimer_mode mode, | 506 | const enum hrtimer_mode mode, |
464 | const clockid_t clockid); | 507 | const clockid_t clockid); |
465 | 508 | ||
466 | extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, | ||
467 | struct task_struct *tsk); | ||
468 | |||
469 | extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, | 509 | extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, |
470 | const enum hrtimer_mode mode); | 510 | const enum hrtimer_mode mode); |
471 | extern int schedule_hrtimeout_range_clock(ktime_t *expires, | 511 | extern int schedule_hrtimeout_range_clock(ktime_t *expires, |
472 | u64 delta, | 512 | u64 delta, |
473 | const enum hrtimer_mode mode, | 513 | const enum hrtimer_mode mode, |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6049baa5b8bc..2c620d7ac432 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -36,17 +36,6 @@ extern struct cred init_cred; | |||
36 | #define INIT_PREV_CPUTIME(x) | 36 | #define INIT_PREV_CPUTIME(x) |
37 | #endif | 37 | #endif |
38 | 38 | ||
39 | #ifdef CONFIG_POSIX_TIMERS | ||
40 | #define INIT_CPU_TIMERS(s) \ | ||
41 | .cpu_timers = { \ | ||
42 | LIST_HEAD_INIT(s.cpu_timers[0]), \ | ||
43 | LIST_HEAD_INIT(s.cpu_timers[1]), \ | ||
44 | LIST_HEAD_INIT(s.cpu_timers[2]), \ | ||
45 | }, | ||
46 | #else | ||
47 | #define INIT_CPU_TIMERS(s) | ||
48 | #endif | ||
49 | |||
50 | #define INIT_TASK_COMM "swapper" | 39 | #define INIT_TASK_COMM "swapper" |
51 | 40 | ||
52 | /* Attach to the init_task data structure for proper alignment */ | 41 | /* Attach to the init_task data structure for proper alignment */ |
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index b20798fc5191..3d10c84a97a9 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h | |||
@@ -4,18 +4,11 @@ | |||
4 | 4 | ||
5 | #include <linux/spinlock.h> | 5 | #include <linux/spinlock.h> |
6 | #include <linux/list.h> | 6 | #include <linux/list.h> |
7 | #include <linux/sched.h> | ||
8 | #include <linux/timex.h> | ||
9 | #include <linux/alarmtimer.h> | 7 | #include <linux/alarmtimer.h> |
8 | #include <linux/timerqueue.h> | ||
10 | 9 | ||
11 | struct siginfo; | 10 | struct kernel_siginfo; |
12 | 11 | struct task_struct; | |
13 | struct cpu_timer_list { | ||
14 | struct list_head entry; | ||
15 | u64 expires; | ||
16 | struct task_struct *task; | ||
17 | int firing; | ||
18 | }; | ||
19 | 12 | ||
20 | /* | 13 | /* |
21 | * Bit fields within a clockid: | 14 | * Bit fields within a clockid: |
@@ -63,6 +56,115 @@ static inline int clockid_to_fd(const clockid_t clk) | |||
63 | return ~(clk >> 3); | 56 | return ~(clk >> 3); |
64 | } | 57 | } |
65 | 58 | ||
59 | #ifdef CONFIG_POSIX_TIMERS | ||
60 | |||
61 | /** | ||
62 | * cpu_timer - Posix CPU timer representation for k_itimer | ||
63 | * @node: timerqueue node to queue in the task/sig | ||
64 | * @head: timerqueue head on which this timer is queued | ||
65 | * @task: Pointer to target task | ||
66 | * @elist: List head for the expiry list | ||
67 | * @firing: Timer is currently firing | ||
68 | */ | ||
69 | struct cpu_timer { | ||
70 | struct timerqueue_node node; | ||
71 | struct timerqueue_head *head; | ||
72 | struct task_struct *task; | ||
73 | struct list_head elist; | ||
74 | int firing; | ||
75 | }; | ||
76 | |||
77 | static inline bool cpu_timer_enqueue(struct timerqueue_head *head, | ||
78 | struct cpu_timer *ctmr) | ||
79 | { | ||
80 | ctmr->head = head; | ||
81 | return timerqueue_add(head, &ctmr->node); | ||
82 | } | ||
83 | |||
84 | static inline void cpu_timer_dequeue(struct cpu_timer *ctmr) | ||
85 | { | ||
86 | if (ctmr->head) { | ||
87 | timerqueue_del(ctmr->head, &ctmr->node); | ||
88 | ctmr->head = NULL; | ||
89 | } | ||
90 | } | ||
91 | |||
92 | static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr) | ||
93 | { | ||
94 | return ctmr->node.expires; | ||
95 | } | ||
96 | |||
97 | static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp) | ||
98 | { | ||
99 | ctmr->node.expires = exp; | ||
100 | } | ||
101 | |||
102 | /** | ||
103 | * posix_cputimer_base - Container per posix CPU clock | ||
104 | * @nextevt: Earliest-expiration cache | ||
105 | * @tqhead: timerqueue head for cpu_timers | ||
106 | */ | ||
107 | struct posix_cputimer_base { | ||
108 | u64 nextevt; | ||
109 | struct timerqueue_head tqhead; | ||
110 | }; | ||
111 | |||
112 | /** | ||
113 | * posix_cputimers - Container for posix CPU timer related data | ||
114 | * @bases: Base container for posix CPU clocks | ||
115 | * @timers_active: Timers are queued. | ||
116 | * @expiry_active: Timer expiry is active. Used for | ||
117 | * process wide timers to avoid multiple | ||
118 | * task trying to handle expiry concurrently | ||
119 | * | ||
120 | * Used in task_struct and signal_struct | ||
121 | */ | ||
122 | struct posix_cputimers { | ||
123 | struct posix_cputimer_base bases[CPUCLOCK_MAX]; | ||
124 | unsigned int timers_active; | ||
125 | unsigned int expiry_active; | ||
126 | }; | ||
127 | |||
128 | static inline void posix_cputimers_init(struct posix_cputimers *pct) | ||
129 | { | ||
130 | memset(pct, 0, sizeof(*pct)); | ||
131 | pct->bases[0].nextevt = U64_MAX; | ||
132 | pct->bases[1].nextevt = U64_MAX; | ||
133 | pct->bases[2].nextevt = U64_MAX; | ||
134 | } | ||
135 | |||
136 | void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit); | ||
137 | |||
138 | static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, | ||
139 | u64 runtime) | ||
140 | { | ||
141 | pct->bases[CPUCLOCK_SCHED].nextevt = runtime; | ||
142 | } | ||
143 | |||
144 | /* Init task static initializer */ | ||
145 | #define INIT_CPU_TIMERBASE(b) { \ | ||
146 | .nextevt = U64_MAX, \ | ||
147 | } | ||
148 | |||
149 | #define INIT_CPU_TIMERBASES(b) { \ | ||
150 | INIT_CPU_TIMERBASE(b[0]), \ | ||
151 | INIT_CPU_TIMERBASE(b[1]), \ | ||
152 | INIT_CPU_TIMERBASE(b[2]), \ | ||
153 | } | ||
154 | |||
155 | #define INIT_CPU_TIMERS(s) \ | ||
156 | .posix_cputimers = { \ | ||
157 | .bases = INIT_CPU_TIMERBASES(s.posix_cputimers.bases), \ | ||
158 | }, | ||
159 | #else | ||
160 | struct posix_cputimers { }; | ||
161 | struct cpu_timer { }; | ||
162 | #define INIT_CPU_TIMERS(s) | ||
163 | static inline void posix_cputimers_init(struct posix_cputimers *pct) { } | ||
164 | static inline void posix_cputimers_group_init(struct posix_cputimers *pct, | ||
165 | u64 cpu_limit) { } | ||
166 | #endif | ||
167 | |||
66 | #define REQUEUE_PENDING 1 | 168 | #define REQUEUE_PENDING 1 |
67 | 169 | ||
68 | /** | 170 | /** |
@@ -85,7 +187,8 @@ static inline int clockid_to_fd(const clockid_t clk) | |||
85 | * @it_process: The task to wakeup on clock_nanosleep (CPU timers) | 187 | * @it_process: The task to wakeup on clock_nanosleep (CPU timers) |
86 | * @sigq: Pointer to preallocated sigqueue | 188 | * @sigq: Pointer to preallocated sigqueue |
87 | * @it: Union representing the various posix timer type | 189 | * @it: Union representing the various posix timer type |
88 | * internals. Also used for rcu freeing the timer. | 190 | * internals. |
191 | * @rcu: RCU head for freeing the timer. | ||
89 | */ | 192 | */ |
90 | struct k_itimer { | 193 | struct k_itimer { |
91 | struct list_head list; | 194 | struct list_head list; |
@@ -110,15 +213,15 @@ struct k_itimer { | |||
110 | struct { | 213 | struct { |
111 | struct hrtimer timer; | 214 | struct hrtimer timer; |
112 | } real; | 215 | } real; |
113 | struct cpu_timer_list cpu; | 216 | struct cpu_timer cpu; |
114 | struct { | 217 | struct { |
115 | struct alarm alarmtimer; | 218 | struct alarm alarmtimer; |
116 | } alarm; | 219 | } alarm; |
117 | struct rcu_head rcu; | ||
118 | } it; | 220 | } it; |
221 | struct rcu_head rcu; | ||
119 | }; | 222 | }; |
120 | 223 | ||
121 | void run_posix_cpu_timers(struct task_struct *task); | 224 | void run_posix_cpu_timers(void); |
122 | void posix_cpu_timers_exit(struct task_struct *task); | 225 | void posix_cpu_timers_exit(struct task_struct *task); |
123 | void posix_cpu_timers_exit_group(struct task_struct *task); | 226 | void posix_cpu_timers_exit_group(struct task_struct *task); |
124 | void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, | 227 | void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, |
diff --git a/include/linux/sched.h b/include/linux/sched.h index f0edee94834a..b75b28287005 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -25,9 +25,11 @@ | |||
25 | #include <linux/resource.h> | 25 | #include <linux/resource.h> |
26 | #include <linux/latencytop.h> | 26 | #include <linux/latencytop.h> |
27 | #include <linux/sched/prio.h> | 27 | #include <linux/sched/prio.h> |
28 | #include <linux/sched/types.h> | ||
28 | #include <linux/signal_types.h> | 29 | #include <linux/signal_types.h> |
29 | #include <linux/mm_types_task.h> | 30 | #include <linux/mm_types_task.h> |
30 | #include <linux/task_io_accounting.h> | 31 | #include <linux/task_io_accounting.h> |
32 | #include <linux/posix-timers.h> | ||
31 | #include <linux/rseq.h> | 33 | #include <linux/rseq.h> |
32 | 34 | ||
33 | /* task_struct member predeclarations (sorted alphabetically): */ | 35 | /* task_struct member predeclarations (sorted alphabetically): */ |
@@ -244,27 +246,6 @@ struct prev_cputime { | |||
244 | #endif | 246 | #endif |
245 | }; | 247 | }; |
246 | 248 | ||
247 | /** | ||
248 | * struct task_cputime - collected CPU time counts | ||
249 | * @utime: time spent in user mode, in nanoseconds | ||
250 | * @stime: time spent in kernel mode, in nanoseconds | ||
251 | * @sum_exec_runtime: total time spent on the CPU, in nanoseconds | ||
252 | * | ||
253 | * This structure groups together three kinds of CPU time that are tracked for | ||
254 | * threads and thread groups. Most things considering CPU time want to group | ||
255 | * these counts together and treat all three of them in parallel. | ||
256 | */ | ||
257 | struct task_cputime { | ||
258 | u64 utime; | ||
259 | u64 stime; | ||
260 | unsigned long long sum_exec_runtime; | ||
261 | }; | ||
262 | |||
263 | /* Alternate field names when used on cache expirations: */ | ||
264 | #define virt_exp utime | ||
265 | #define prof_exp stime | ||
266 | #define sched_exp sum_exec_runtime | ||
267 | |||
268 | enum vtime_state { | 249 | enum vtime_state { |
269 | /* Task is sleeping or running in a CPU with VTIME inactive: */ | 250 | /* Task is sleeping or running in a CPU with VTIME inactive: */ |
270 | VTIME_INACTIVE = 0, | 251 | VTIME_INACTIVE = 0, |
@@ -881,10 +862,8 @@ struct task_struct { | |||
881 | unsigned long min_flt; | 862 | unsigned long min_flt; |
882 | unsigned long maj_flt; | 863 | unsigned long maj_flt; |
883 | 864 | ||
884 | #ifdef CONFIG_POSIX_TIMERS | 865 | /* Empty if CONFIG_POSIX_CPUTIMERS=n */ |
885 | struct task_cputime cputime_expires; | 866 | struct posix_cputimers posix_cputimers; |
886 | struct list_head cpu_timers[3]; | ||
887 | #endif | ||
888 | 867 | ||
889 | /* Process credentials: */ | 868 | /* Process credentials: */ |
890 | 869 | ||
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 53f883f5a2fd..6c9f19a33865 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h | |||
@@ -61,8 +61,7 @@ extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, | |||
61 | * Thread group CPU time accounting. | 61 | * Thread group CPU time accounting. |
62 | */ | 62 | */ |
63 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); | 63 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); |
64 | void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); | 64 | void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples); |
65 | |||
66 | 65 | ||
67 | /* | 66 | /* |
68 | * The following are functions that support scheduler-internal time accounting. | 67 | * The following are functions that support scheduler-internal time accounting. |
@@ -71,7 +70,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); | |||
71 | */ | 70 | */ |
72 | 71 | ||
73 | /** | 72 | /** |
74 | * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running | 73 | * get_running_cputimer - return &tsk->signal->cputimer if cputimers are active |
75 | * | 74 | * |
76 | * @tsk: Pointer to target task. | 75 | * @tsk: Pointer to target task. |
77 | */ | 76 | */ |
@@ -81,8 +80,11 @@ struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) | |||
81 | { | 80 | { |
82 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 81 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
83 | 82 | ||
84 | /* Check if cputimer isn't running. This is accessed without locking. */ | 83 | /* |
85 | if (!READ_ONCE(cputimer->running)) | 84 | * Check whether posix CPU timers are active. If not the thread |
85 | * group accounting is not active either. Lockless check. | ||
86 | */ | ||
87 | if (!READ_ONCE(tsk->signal->posix_cputimers.timers_active)) | ||
86 | return NULL; | 88 | return NULL; |
87 | 89 | ||
88 | /* | 90 | /* |
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index efd8ce7675ed..88050259c466 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/sched/task.h> | 9 | #include <linux/sched/task.h> |
10 | #include <linux/cred.h> | 10 | #include <linux/cred.h> |
11 | #include <linux/refcount.h> | 11 | #include <linux/refcount.h> |
12 | #include <linux/posix-timers.h> | ||
12 | 13 | ||
13 | /* | 14 | /* |
14 | * Types defining task->signal and task->sighand and APIs using them: | 15 | * Types defining task->signal and task->sighand and APIs using them: |
@@ -56,18 +57,12 @@ struct task_cputime_atomic { | |||
56 | /** | 57 | /** |
57 | * struct thread_group_cputimer - thread group interval timer counts | 58 | * struct thread_group_cputimer - thread group interval timer counts |
58 | * @cputime_atomic: atomic thread group interval timers. | 59 | * @cputime_atomic: atomic thread group interval timers. |
59 | * @running: true when there are timers running and | ||
60 | * @cputime_atomic receives updates. | ||
61 | * @checking_timer: true when a thread in the group is in the | ||
62 | * process of checking for thread group timers. | ||
63 | * | 60 | * |
64 | * This structure contains the version of task_cputime, above, that is | 61 | * This structure contains the version of task_cputime, above, that is |
65 | * used for thread group CPU timer calculations. | 62 | * used for thread group CPU timer calculations. |
66 | */ | 63 | */ |
67 | struct thread_group_cputimer { | 64 | struct thread_group_cputimer { |
68 | struct task_cputime_atomic cputime_atomic; | 65 | struct task_cputime_atomic cputime_atomic; |
69 | bool running; | ||
70 | bool checking_timer; | ||
71 | }; | 66 | }; |
72 | 67 | ||
73 | struct multiprocess_signals { | 68 | struct multiprocess_signals { |
@@ -148,12 +143,9 @@ struct signal_struct { | |||
148 | */ | 143 | */ |
149 | struct thread_group_cputimer cputimer; | 144 | struct thread_group_cputimer cputimer; |
150 | 145 | ||
151 | /* Earliest-expiration cache. */ | ||
152 | struct task_cputime cputime_expires; | ||
153 | |||
154 | struct list_head cpu_timers[3]; | ||
155 | |||
156 | #endif | 146 | #endif |
147 | /* Empty if CONFIG_POSIX_TIMERS=n */ | ||
148 | struct posix_cputimers posix_cputimers; | ||
157 | 149 | ||
158 | /* PID/PID hash table linkage. */ | 150 | /* PID/PID hash table linkage. */ |
159 | struct pid *pids[PIDTYPE_MAX]; | 151 | struct pid *pids[PIDTYPE_MAX]; |
diff --git a/include/linux/sched/types.h b/include/linux/sched/types.h new file mode 100644 index 000000000000..3c3e049224ae --- /dev/null +++ b/include/linux/sched/types.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #ifndef _LINUX_SCHED_TYPES_H | ||
3 | #define _LINUX_SCHED_TYPES_H | ||
4 | |||
5 | #include <linux/types.h> | ||
6 | |||
7 | /** | ||
8 | * struct task_cputime - collected CPU time counts | ||
9 | * @stime: time spent in kernel mode, in nanoseconds | ||
10 | * @utime: time spent in user mode, in nanoseconds | ||
11 | * @sum_exec_runtime: total time spent on the CPU, in nanoseconds | ||
12 | * | ||
13 | * This structure groups together three kinds of CPU time that are tracked for | ||
14 | * threads and thread groups. Most things considering CPU time want to group | ||
15 | * these counts together and treat all three of them in parallel. | ||
16 | */ | ||
17 | struct task_cputime { | ||
18 | u64 stime; | ||
19 | u64 utime; | ||
20 | unsigned long long sum_exec_runtime; | ||
21 | }; | ||
22 | |||
23 | #endif | ||
diff --git a/include/linux/timer.h b/include/linux/timer.h index 282e4f2a532a..1e6650ed066d 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h | |||
@@ -183,7 +183,7 @@ extern void add_timer(struct timer_list *timer); | |||
183 | 183 | ||
184 | extern int try_to_del_timer_sync(struct timer_list *timer); | 184 | extern int try_to_del_timer_sync(struct timer_list *timer); |
185 | 185 | ||
186 | #ifdef CONFIG_SMP | 186 | #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) |
187 | extern int del_timer_sync(struct timer_list *timer); | 187 | extern int del_timer_sync(struct timer_list *timer); |
188 | #else | 188 | #else |
189 | # define del_timer_sync(t) del_timer(t) | 189 | # define del_timer_sync(t) del_timer(t) |
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index 78b8cc73f12f..93884086f392 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h | |||
@@ -12,8 +12,7 @@ struct timerqueue_node { | |||
12 | }; | 12 | }; |
13 | 13 | ||
14 | struct timerqueue_head { | 14 | struct timerqueue_head { |
15 | struct rb_root head; | 15 | struct rb_root_cached rb_root; |
16 | struct timerqueue_node *next; | ||
17 | }; | 16 | }; |
18 | 17 | ||
19 | 18 | ||
@@ -29,13 +28,14 @@ extern struct timerqueue_node *timerqueue_iterate_next( | |||
29 | * | 28 | * |
30 | * @head: head of timerqueue | 29 | * @head: head of timerqueue |
31 | * | 30 | * |
32 | * Returns a pointer to the timer node that has the | 31 | * Returns a pointer to the timer node that has the earliest expiration time. |
33 | * earliest expiration time. | ||
34 | */ | 32 | */ |
35 | static inline | 33 | static inline |
36 | struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) | 34 | struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) |
37 | { | 35 | { |
38 | return head->next; | 36 | struct rb_node *leftmost = rb_first_cached(&head->rb_root); |
37 | |||
38 | return rb_entry(leftmost, struct timerqueue_node, node); | ||
39 | } | 39 | } |
40 | 40 | ||
41 | static inline void timerqueue_init(struct timerqueue_node *node) | 41 | static inline void timerqueue_init(struct timerqueue_node *node) |
@@ -43,9 +43,18 @@ static inline void timerqueue_init(struct timerqueue_node *node) | |||
43 | RB_CLEAR_NODE(&node->node); | 43 | RB_CLEAR_NODE(&node->node); |
44 | } | 44 | } |
45 | 45 | ||
46 | static inline bool timerqueue_node_queued(struct timerqueue_node *node) | ||
47 | { | ||
48 | return !RB_EMPTY_NODE(&node->node); | ||
49 | } | ||
50 | |||
51 | static inline bool timerqueue_node_expires(struct timerqueue_node *node) | ||
52 | { | ||
53 | return node->expires; | ||
54 | } | ||
55 | |||
46 | static inline void timerqueue_init_head(struct timerqueue_head *head) | 56 | static inline void timerqueue_init_head(struct timerqueue_head *head) |
47 | { | 57 | { |
48 | head->head = RB_ROOT; | 58 | head->rb_root = RB_ROOT_CACHED; |
49 | head->next = NULL; | ||
50 | } | 59 | } |
51 | #endif /* _LINUX_TIMERQUEUE_H */ | 60 | #endif /* _LINUX_TIMERQUEUE_H */ |
diff --git a/include/linux/wait.h b/include/linux/wait.h index 30c515520fb2..3eb7cae8206c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h | |||
@@ -501,8 +501,8 @@ do { \ | |||
501 | int __ret = 0; \ | 501 | int __ret = 0; \ |
502 | struct hrtimer_sleeper __t; \ | 502 | struct hrtimer_sleeper __t; \ |
503 | \ | 503 | \ |
504 | hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); \ | 504 | hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \ |
505 | hrtimer_init_sleeper(&__t, current); \ | 505 | HRTIMER_MODE_REL); \ |
506 | if ((timeout) != KTIME_MAX) \ | 506 | if ((timeout) != KTIME_MAX) \ |
507 | hrtimer_start_range_ns(&__t.timer, timeout, \ | 507 | hrtimer_start_range_ns(&__t.timer, timeout, \ |
508 | current->timer_slack_ns, \ | 508 | current->timer_slack_ns, \ |
diff --git a/init/init_task.c b/init/init_task.c index bfe06c53b14e..9e5cbe5eab7b 100644 --- a/init/init_task.c +++ b/init/init_task.c | |||
@@ -30,8 +30,6 @@ static struct signal_struct init_signals = { | |||
30 | .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), | 30 | .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), |
31 | .cputimer = { | 31 | .cputimer = { |
32 | .cputime_atomic = INIT_CPUTIME_ATOMIC, | 32 | .cputime_atomic = INIT_CPUTIME_ATOMIC, |
33 | .running = false, | ||
34 | .checking_timer = false, | ||
35 | }, | 33 | }, |
36 | #endif | 34 | #endif |
37 | INIT_CPU_TIMERS(init_signals) | 35 | INIT_CPU_TIMERS(init_signals) |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 1c414b8866b4..4f08b17d6426 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1103,7 +1103,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) | |||
1103 | cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval); | 1103 | cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval); |
1104 | 1104 | ||
1105 | raw_spin_lock_init(&cpuctx->hrtimer_lock); | 1105 | raw_spin_lock_init(&cpuctx->hrtimer_lock); |
1106 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); | 1106 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); |
1107 | timer->function = perf_mux_hrtimer_handler; | 1107 | timer->function = perf_mux_hrtimer_handler; |
1108 | } | 1108 | } |
1109 | 1109 | ||
@@ -1121,7 +1121,7 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx) | |||
1121 | if (!cpuctx->hrtimer_active) { | 1121 | if (!cpuctx->hrtimer_active) { |
1122 | cpuctx->hrtimer_active = 1; | 1122 | cpuctx->hrtimer_active = 1; |
1123 | hrtimer_forward_now(timer, cpuctx->hrtimer_interval); | 1123 | hrtimer_forward_now(timer, cpuctx->hrtimer_interval); |
1124 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); | 1124 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD); |
1125 | } | 1125 | } |
1126 | raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags); | 1126 | raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags); |
1127 | 1127 | ||
@@ -9574,7 +9574,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event) | |||
9574 | period = max_t(u64, 10000, hwc->sample_period); | 9574 | period = max_t(u64, 10000, hwc->sample_period); |
9575 | } | 9575 | } |
9576 | hrtimer_start(&hwc->hrtimer, ns_to_ktime(period), | 9576 | hrtimer_start(&hwc->hrtimer, ns_to_ktime(period), |
9577 | HRTIMER_MODE_REL_PINNED); | 9577 | HRTIMER_MODE_REL_PINNED_HARD); |
9578 | } | 9578 | } |
9579 | 9579 | ||
9580 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | 9580 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) |
@@ -9596,7 +9596,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) | |||
9596 | if (!is_sampling_event(event)) | 9596 | if (!is_sampling_event(event)) |
9597 | return; | 9597 | return; |
9598 | 9598 | ||
9599 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 9599 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); |
9600 | hwc->hrtimer.function = perf_swevent_hrtimer; | 9600 | hwc->hrtimer.function = perf_swevent_hrtimer; |
9601 | 9601 | ||
9602 | /* | 9602 | /* |
diff --git a/kernel/fork.c b/kernel/fork.c index 1d1cd06edbc1..53e780748fe3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1519,28 +1519,17 @@ void __cleanup_sighand(struct sighand_struct *sighand) | |||
1519 | } | 1519 | } |
1520 | } | 1520 | } |
1521 | 1521 | ||
1522 | #ifdef CONFIG_POSIX_TIMERS | ||
1523 | /* | 1522 | /* |
1524 | * Initialize POSIX timer handling for a thread group. | 1523 | * Initialize POSIX timer handling for a thread group. |
1525 | */ | 1524 | */ |
1526 | static void posix_cpu_timers_init_group(struct signal_struct *sig) | 1525 | static void posix_cpu_timers_init_group(struct signal_struct *sig) |
1527 | { | 1526 | { |
1527 | struct posix_cputimers *pct = &sig->posix_cputimers; | ||
1528 | unsigned long cpu_limit; | 1528 | unsigned long cpu_limit; |
1529 | 1529 | ||
1530 | cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); | 1530 | cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); |
1531 | if (cpu_limit != RLIM_INFINITY) { | 1531 | posix_cputimers_group_init(pct, cpu_limit); |
1532 | sig->cputime_expires.prof_exp = cpu_limit * NSEC_PER_SEC; | ||
1533 | sig->cputimer.running = true; | ||
1534 | } | ||
1535 | |||
1536 | /* The timer lists. */ | ||
1537 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | ||
1538 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | ||
1539 | INIT_LIST_HEAD(&sig->cpu_timers[2]); | ||
1540 | } | 1532 | } |
1541 | #else | ||
1542 | static inline void posix_cpu_timers_init_group(struct signal_struct *sig) { } | ||
1543 | #endif | ||
1544 | 1533 | ||
1545 | static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | 1534 | static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) |
1546 | { | 1535 | { |
@@ -1642,23 +1631,6 @@ static void rt_mutex_init_task(struct task_struct *p) | |||
1642 | #endif | 1631 | #endif |
1643 | } | 1632 | } |
1644 | 1633 | ||
1645 | #ifdef CONFIG_POSIX_TIMERS | ||
1646 | /* | ||
1647 | * Initialize POSIX timer handling for a single task. | ||
1648 | */ | ||
1649 | static void posix_cpu_timers_init(struct task_struct *tsk) | ||
1650 | { | ||
1651 | tsk->cputime_expires.prof_exp = 0; | ||
1652 | tsk->cputime_expires.virt_exp = 0; | ||
1653 | tsk->cputime_expires.sched_exp = 0; | ||
1654 | INIT_LIST_HEAD(&tsk->cpu_timers[0]); | ||
1655 | INIT_LIST_HEAD(&tsk->cpu_timers[1]); | ||
1656 | INIT_LIST_HEAD(&tsk->cpu_timers[2]); | ||
1657 | } | ||
1658 | #else | ||
1659 | static inline void posix_cpu_timers_init(struct task_struct *tsk) { } | ||
1660 | #endif | ||
1661 | |||
1662 | static inline void init_task_pid_links(struct task_struct *task) | 1634 | static inline void init_task_pid_links(struct task_struct *task) |
1663 | { | 1635 | { |
1664 | enum pid_type type; | 1636 | enum pid_type type; |
@@ -1945,7 +1917,7 @@ static __latent_entropy struct task_struct *copy_process( | |||
1945 | task_io_accounting_init(&p->ioac); | 1917 | task_io_accounting_init(&p->ioac); |
1946 | acct_clear_integrals(p); | 1918 | acct_clear_integrals(p); |
1947 | 1919 | ||
1948 | posix_cpu_timers_init(p); | 1920 | posix_cputimers_init(&p->posix_cputimers); |
1949 | 1921 | ||
1950 | p->io_context = NULL; | 1922 | p->io_context = NULL; |
1951 | audit_set_context(p, NULL); | 1923 | audit_set_context(p, NULL); |
diff --git a/kernel/futex.c b/kernel/futex.c index 6d50728ef2e7..bd18f60e4c6c 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -487,11 +487,9 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, | |||
487 | if (!time) | 487 | if (!time) |
488 | return NULL; | 488 | return NULL; |
489 | 489 | ||
490 | hrtimer_init_on_stack(&timeout->timer, (flags & FLAGS_CLOCKRT) ? | 490 | hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ? |
491 | CLOCK_REALTIME : CLOCK_MONOTONIC, | 491 | CLOCK_REALTIME : CLOCK_MONOTONIC, |
492 | HRTIMER_MODE_ABS); | 492 | HRTIMER_MODE_ABS); |
493 | hrtimer_init_sleeper(timeout, current); | ||
494 | |||
495 | /* | 493 | /* |
496 | * If range_ns is 0, calling hrtimer_set_expires_range_ns() is | 494 | * If range_ns is 0, calling hrtimer_set_expires_range_ns() is |
497 | * effectively the same as calling hrtimer_set_expires(). | 495 | * effectively the same as calling hrtimer_set_expires(). |
@@ -2613,7 +2611,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
2613 | 2611 | ||
2614 | /* Arm the timer */ | 2612 | /* Arm the timer */ |
2615 | if (timeout) | 2613 | if (timeout) |
2616 | hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); | 2614 | hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS); |
2617 | 2615 | ||
2618 | /* | 2616 | /* |
2619 | * If we have been removed from the hash list, then another task | 2617 | * If we have been removed from the hash list, then another task |
@@ -2899,7 +2897,7 @@ retry_private: | |||
2899 | } | 2897 | } |
2900 | 2898 | ||
2901 | if (unlikely(to)) | 2899 | if (unlikely(to)) |
2902 | hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); | 2900 | hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS); |
2903 | 2901 | ||
2904 | ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); | 2902 | ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); |
2905 | 2903 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 06961b997ed6..5e8387bdd09c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -255,7 +255,7 @@ static void __hrtick_restart(struct rq *rq) | |||
255 | { | 255 | { |
256 | struct hrtimer *timer = &rq->hrtick_timer; | 256 | struct hrtimer *timer = &rq->hrtick_timer; |
257 | 257 | ||
258 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); | 258 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD); |
259 | } | 259 | } |
260 | 260 | ||
261 | /* | 261 | /* |
@@ -314,7 +314,7 @@ void hrtick_start(struct rq *rq, u64 delay) | |||
314 | */ | 314 | */ |
315 | delay = max_t(u64, delay, 10000LL); | 315 | delay = max_t(u64, delay, 10000LL); |
316 | hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), | 316 | hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), |
317 | HRTIMER_MODE_REL_PINNED); | 317 | HRTIMER_MODE_REL_PINNED_HARD); |
318 | } | 318 | } |
319 | #endif /* CONFIG_SMP */ | 319 | #endif /* CONFIG_SMP */ |
320 | 320 | ||
@@ -328,7 +328,7 @@ static void hrtick_rq_init(struct rq *rq) | |||
328 | rq->hrtick_csd.info = rq; | 328 | rq->hrtick_csd.info = rq; |
329 | #endif | 329 | #endif |
330 | 330 | ||
331 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 331 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); |
332 | rq->hrtick_timer.function = hrtick; | 332 | rq->hrtick_timer.function = hrtick; |
333 | } | 333 | } |
334 | #else /* CONFIG_SCHED_HRTICK */ | 334 | #else /* CONFIG_SCHED_HRTICK */ |
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 39dc9f74f289..2dc48720f189 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -287,7 +287,7 @@ static void task_non_contending(struct task_struct *p) | |||
287 | 287 | ||
288 | dl_se->dl_non_contending = 1; | 288 | dl_se->dl_non_contending = 1; |
289 | get_task_struct(p); | 289 | get_task_struct(p); |
290 | hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL); | 290 | hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL_HARD); |
291 | } | 291 | } |
292 | 292 | ||
293 | static void task_contending(struct sched_dl_entity *dl_se, int flags) | 293 | static void task_contending(struct sched_dl_entity *dl_se, int flags) |
@@ -956,7 +956,7 @@ static int start_dl_timer(struct task_struct *p) | |||
956 | */ | 956 | */ |
957 | if (!hrtimer_is_queued(timer)) { | 957 | if (!hrtimer_is_queued(timer)) { |
958 | get_task_struct(p); | 958 | get_task_struct(p); |
959 | hrtimer_start(timer, act, HRTIMER_MODE_ABS); | 959 | hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD); |
960 | } | 960 | } |
961 | 961 | ||
962 | return 1; | 962 | return 1; |
@@ -1086,7 +1086,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) | |||
1086 | { | 1086 | { |
1087 | struct hrtimer *timer = &dl_se->dl_timer; | 1087 | struct hrtimer *timer = &dl_se->dl_timer; |
1088 | 1088 | ||
1089 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 1089 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); |
1090 | timer->function = dl_task_timer; | 1090 | timer->function = dl_task_timer; |
1091 | } | 1091 | } |
1092 | 1092 | ||
@@ -1325,7 +1325,7 @@ void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se) | |||
1325 | { | 1325 | { |
1326 | struct hrtimer *timer = &dl_se->inactive_timer; | 1326 | struct hrtimer *timer = &dl_se->inactive_timer; |
1327 | 1327 | ||
1328 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 1328 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); |
1329 | timer->function = inactive_task_timer; | 1329 | timer->function = inactive_task_timer; |
1330 | } | 1330 | } |
1331 | 1331 | ||
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 858c4cc6f99b..ebaa4e619684 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -45,8 +45,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |||
45 | 45 | ||
46 | raw_spin_lock_init(&rt_b->rt_runtime_lock); | 46 | raw_spin_lock_init(&rt_b->rt_runtime_lock); |
47 | 47 | ||
48 | hrtimer_init(&rt_b->rt_period_timer, | 48 | hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, |
49 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 49 | HRTIMER_MODE_REL_HARD); |
50 | rt_b->rt_period_timer.function = sched_rt_period_timer; | 50 | rt_b->rt_period_timer.function = sched_rt_period_timer; |
51 | } | 51 | } |
52 | 52 | ||
@@ -67,7 +67,8 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
67 | * to update the period. | 67 | * to update the period. |
68 | */ | 68 | */ |
69 | hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0)); | 69 | hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0)); |
70 | hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED); | 70 | hrtimer_start_expires(&rt_b->rt_period_timer, |
71 | HRTIMER_MODE_ABS_PINNED_HARD); | ||
71 | } | 72 | } |
72 | raw_spin_unlock(&rt_b->rt_runtime_lock); | 73 | raw_spin_unlock(&rt_b->rt_runtime_lock); |
73 | } | 74 | } |
@@ -2289,8 +2290,10 @@ static void watchdog(struct rq *rq, struct task_struct *p) | |||
2289 | } | 2290 | } |
2290 | 2291 | ||
2291 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); | 2292 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); |
2292 | if (p->rt.timeout > next) | 2293 | if (p->rt.timeout > next) { |
2293 | p->cputime_expires.sched_exp = p->se.sum_exec_runtime; | 2294 | posix_cputimers_rt_watchdog(&p->posix_cputimers, |
2295 | p->se.sum_exec_runtime); | ||
2296 | } | ||
2294 | } | 2297 | } |
2295 | } | 2298 | } |
2296 | #else | 2299 | #else |
diff --git a/kernel/sys.c b/kernel/sys.c index d605fe5e58a5..a611d1d58c7d 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1557,15 +1557,6 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource, | |||
1557 | retval = -EPERM; | 1557 | retval = -EPERM; |
1558 | if (!retval) | 1558 | if (!retval) |
1559 | retval = security_task_setrlimit(tsk, resource, new_rlim); | 1559 | retval = security_task_setrlimit(tsk, resource, new_rlim); |
1560 | if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { | ||
1561 | /* | ||
1562 | * The caller is asking for an immediate RLIMIT_CPU | ||
1563 | * expiry. But we use the zero value to mean "it was | ||
1564 | * never set". So let's cheat and make it one second | ||
1565 | * instead | ||
1566 | */ | ||
1567 | new_rlim->rlim_cur = 1; | ||
1568 | } | ||
1569 | } | 1560 | } |
1570 | if (!retval) { | 1561 | if (!retval) { |
1571 | if (old_rlim) | 1562 | if (old_rlim) |
@@ -1576,10 +1567,9 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource, | |||
1576 | task_unlock(tsk->group_leader); | 1567 | task_unlock(tsk->group_leader); |
1577 | 1568 | ||
1578 | /* | 1569 | /* |
1579 | * RLIMIT_CPU handling. Note that the kernel fails to return an error | 1570 | * RLIMIT_CPU handling. Arm the posix CPU timer if the limit is not |
1580 | * code if it rejected the user's attempt to set RLIMIT_CPU. This is a | 1571 | * infite. In case of RLIM_INFINITY the posix CPU timer code |
1581 | * very long-standing error, and fixing it now risks breakage of | 1572 | * ignores the rlimit. |
1582 | * applications, so we live with it | ||
1583 | */ | 1573 | */ |
1584 | if (!retval && new_rlim && resource == RLIMIT_CPU && | 1574 | if (!retval && new_rlim && resource == RLIMIT_CPU && |
1585 | new_rlim->rlim_cur != RLIM_INFINITY && | 1575 | new_rlim->rlim_cur != RLIM_INFINITY && |
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index b7d75a9e8ccf..271ce6c12907 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c | |||
@@ -432,7 +432,7 @@ int alarm_cancel(struct alarm *alarm) | |||
432 | int ret = alarm_try_to_cancel(alarm); | 432 | int ret = alarm_try_to_cancel(alarm); |
433 | if (ret >= 0) | 433 | if (ret >= 0) |
434 | return ret; | 434 | return ret; |
435 | cpu_relax(); | 435 | hrtimer_cancel_wait_running(&alarm->timer); |
436 | } | 436 | } |
437 | } | 437 | } |
438 | EXPORT_SYMBOL_GPL(alarm_cancel); | 438 | EXPORT_SYMBOL_GPL(alarm_cancel); |
@@ -606,6 +606,19 @@ static int alarm_timer_try_to_cancel(struct k_itimer *timr) | |||
606 | } | 606 | } |
607 | 607 | ||
608 | /** | 608 | /** |
609 | * alarm_timer_wait_running - Posix timer callback to wait for a timer | ||
610 | * @timr: Pointer to the posixtimer data struct | ||
611 | * | ||
612 | * Called from the core code when timer cancel detected that the callback | ||
613 | * is running. @timr is unlocked and rcu read lock is held to prevent it | ||
614 | * from being freed. | ||
615 | */ | ||
616 | static void alarm_timer_wait_running(struct k_itimer *timr) | ||
617 | { | ||
618 | hrtimer_cancel_wait_running(&timr->it.alarm.alarmtimer.timer); | ||
619 | } | ||
620 | |||
621 | /** | ||
609 | * alarm_timer_arm - Posix timer callback to arm a timer | 622 | * alarm_timer_arm - Posix timer callback to arm a timer |
610 | * @timr: Pointer to the posixtimer data struct | 623 | * @timr: Pointer to the posixtimer data struct |
611 | * @expires: The new expiry time | 624 | * @expires: The new expiry time |
@@ -834,6 +847,7 @@ const struct k_clock alarm_clock = { | |||
834 | .timer_forward = alarm_timer_forward, | 847 | .timer_forward = alarm_timer_forward, |
835 | .timer_remaining = alarm_timer_remaining, | 848 | .timer_remaining = alarm_timer_remaining, |
836 | .timer_try_to_cancel = alarm_timer_try_to_cancel, | 849 | .timer_try_to_cancel = alarm_timer_try_to_cancel, |
850 | .timer_wait_running = alarm_timer_wait_running, | ||
837 | .nsleep = alarm_timer_nsleep, | 851 | .nsleep = alarm_timer_nsleep, |
838 | }; | 852 | }; |
839 | #endif /* CONFIG_POSIX_TIMERS */ | 853 | #endif /* CONFIG_POSIX_TIMERS */ |
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5ee77f1a8a92..0d4dc241c0fb 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c | |||
@@ -140,6 +140,11 @@ static struct hrtimer_cpu_base migration_cpu_base = { | |||
140 | 140 | ||
141 | #define migration_base migration_cpu_base.clock_base[0] | 141 | #define migration_base migration_cpu_base.clock_base[0] |
142 | 142 | ||
143 | static inline bool is_migration_base(struct hrtimer_clock_base *base) | ||
144 | { | ||
145 | return base == &migration_base; | ||
146 | } | ||
147 | |||
143 | /* | 148 | /* |
144 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock | 149 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock |
145 | * means that all timers which are tied to this base via timer->base are | 150 | * means that all timers which are tied to this base via timer->base are |
@@ -264,6 +269,11 @@ again: | |||
264 | 269 | ||
265 | #else /* CONFIG_SMP */ | 270 | #else /* CONFIG_SMP */ |
266 | 271 | ||
272 | static inline bool is_migration_base(struct hrtimer_clock_base *base) | ||
273 | { | ||
274 | return false; | ||
275 | } | ||
276 | |||
267 | static inline struct hrtimer_clock_base * | 277 | static inline struct hrtimer_clock_base * |
268 | lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | 278 | lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) |
269 | { | 279 | { |
@@ -427,6 +437,17 @@ void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, | |||
427 | } | 437 | } |
428 | EXPORT_SYMBOL_GPL(hrtimer_init_on_stack); | 438 | EXPORT_SYMBOL_GPL(hrtimer_init_on_stack); |
429 | 439 | ||
440 | static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, | ||
441 | clockid_t clock_id, enum hrtimer_mode mode); | ||
442 | |||
443 | void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, | ||
444 | clockid_t clock_id, enum hrtimer_mode mode) | ||
445 | { | ||
446 | debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr); | ||
447 | __hrtimer_init_sleeper(sl, clock_id, mode); | ||
448 | } | ||
449 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack); | ||
450 | |||
430 | void destroy_hrtimer_on_stack(struct hrtimer *timer) | 451 | void destroy_hrtimer_on_stack(struct hrtimer *timer) |
431 | { | 452 | { |
432 | debug_object_free(timer, &hrtimer_debug_descr); | 453 | debug_object_free(timer, &hrtimer_debug_descr); |
@@ -1096,9 +1117,13 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
1096 | 1117 | ||
1097 | /* | 1118 | /* |
1098 | * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft | 1119 | * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft |
1099 | * match. | 1120 | * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard |
1121 | * expiry mode because unmarked timers are moved to softirq expiry. | ||
1100 | */ | 1122 | */ |
1101 | WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); | 1123 | if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
1124 | WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); | ||
1125 | else | ||
1126 | WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard); | ||
1102 | 1127 | ||
1103 | base = lock_hrtimer_base(timer, &flags); | 1128 | base = lock_hrtimer_base(timer, &flags); |
1104 | 1129 | ||
@@ -1147,6 +1172,93 @@ int hrtimer_try_to_cancel(struct hrtimer *timer) | |||
1147 | } | 1172 | } |
1148 | EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); | 1173 | EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); |
1149 | 1174 | ||
1175 | #ifdef CONFIG_PREEMPT_RT | ||
1176 | static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) | ||
1177 | { | ||
1178 | spin_lock_init(&base->softirq_expiry_lock); | ||
1179 | } | ||
1180 | |||
1181 | static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) | ||
1182 | { | ||
1183 | spin_lock(&base->softirq_expiry_lock); | ||
1184 | } | ||
1185 | |||
1186 | static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) | ||
1187 | { | ||
1188 | spin_unlock(&base->softirq_expiry_lock); | ||
1189 | } | ||
1190 | |||
1191 | /* | ||
1192 | * The counterpart to hrtimer_cancel_wait_running(). | ||
1193 | * | ||
1194 | * If there is a waiter for cpu_base->expiry_lock, then it was waiting for | ||
1195 | * the timer callback to finish. Drop expiry_lock and reaquire it. That | ||
1196 | * allows the waiter to acquire the lock and make progress. | ||
1197 | */ | ||
1198 | static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, | ||
1199 | unsigned long flags) | ||
1200 | { | ||
1201 | if (atomic_read(&cpu_base->timer_waiters)) { | ||
1202 | raw_spin_unlock_irqrestore(&cpu_base->lock, flags); | ||
1203 | spin_unlock(&cpu_base->softirq_expiry_lock); | ||
1204 | spin_lock(&cpu_base->softirq_expiry_lock); | ||
1205 | raw_spin_lock_irq(&cpu_base->lock); | ||
1206 | } | ||
1207 | } | ||
1208 | |||
1209 | /* | ||
1210 | * This function is called on PREEMPT_RT kernels when the fast path | ||
1211 | * deletion of a timer failed because the timer callback function was | ||
1212 | * running. | ||
1213 | * | ||
1214 | * This prevents priority inversion: if the soft irq thread is preempted | ||
1215 | * in the middle of a timer callback, then calling del_timer_sync() can | ||
1216 | * lead to two issues: | ||
1217 | * | ||
1218 | * - If the caller is on a remote CPU then it has to spin wait for the timer | ||
1219 | * handler to complete. This can result in unbound priority inversion. | ||
1220 | * | ||
1221 | * - If the caller originates from the task which preempted the timer | ||
1222 | * handler on the same CPU, then spin waiting for the timer handler to | ||
1223 | * complete is never going to end. | ||
1224 | */ | ||
1225 | void hrtimer_cancel_wait_running(const struct hrtimer *timer) | ||
1226 | { | ||
1227 | /* Lockless read. Prevent the compiler from reloading it below */ | ||
1228 | struct hrtimer_clock_base *base = READ_ONCE(timer->base); | ||
1229 | |||
1230 | /* | ||
1231 | * Just relax if the timer expires in hard interrupt context or if | ||
1232 | * it is currently on the migration base. | ||
1233 | */ | ||
1234 | if (!timer->is_soft || is_migration_base(base)) { | ||
1235 | cpu_relax(); | ||
1236 | return; | ||
1237 | } | ||
1238 | |||
1239 | /* | ||
1240 | * Mark the base as contended and grab the expiry lock, which is | ||
1241 | * held by the softirq across the timer callback. Drop the lock | ||
1242 | * immediately so the softirq can expire the next timer. In theory | ||
1243 | * the timer could already be running again, but that's more than | ||
1244 | * unlikely and just causes another wait loop. | ||
1245 | */ | ||
1246 | atomic_inc(&base->cpu_base->timer_waiters); | ||
1247 | spin_lock_bh(&base->cpu_base->softirq_expiry_lock); | ||
1248 | atomic_dec(&base->cpu_base->timer_waiters); | ||
1249 | spin_unlock_bh(&base->cpu_base->softirq_expiry_lock); | ||
1250 | } | ||
1251 | #else | ||
1252 | static inline void | ||
1253 | hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { } | ||
1254 | static inline void | ||
1255 | hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { } | ||
1256 | static inline void | ||
1257 | hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { } | ||
1258 | static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base, | ||
1259 | unsigned long flags) { } | ||
1260 | #endif | ||
1261 | |||
1150 | /** | 1262 | /** |
1151 | * hrtimer_cancel - cancel a timer and wait for the handler to finish. | 1263 | * hrtimer_cancel - cancel a timer and wait for the handler to finish. |
1152 | * @timer: the timer to be cancelled | 1264 | * @timer: the timer to be cancelled |
@@ -1157,13 +1269,15 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); | |||
1157 | */ | 1269 | */ |
1158 | int hrtimer_cancel(struct hrtimer *timer) | 1270 | int hrtimer_cancel(struct hrtimer *timer) |
1159 | { | 1271 | { |
1160 | for (;;) { | 1272 | int ret; |
1161 | int ret = hrtimer_try_to_cancel(timer); | ||
1162 | 1273 | ||
1163 | if (ret >= 0) | 1274 | do { |
1164 | return ret; | 1275 | ret = hrtimer_try_to_cancel(timer); |
1165 | cpu_relax(); | 1276 | |
1166 | } | 1277 | if (ret < 0) |
1278 | hrtimer_cancel_wait_running(timer); | ||
1279 | } while (ret < 0); | ||
1280 | return ret; | ||
1167 | } | 1281 | } |
1168 | EXPORT_SYMBOL_GPL(hrtimer_cancel); | 1282 | EXPORT_SYMBOL_GPL(hrtimer_cancel); |
1169 | 1283 | ||
@@ -1260,8 +1374,17 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |||
1260 | enum hrtimer_mode mode) | 1374 | enum hrtimer_mode mode) |
1261 | { | 1375 | { |
1262 | bool softtimer = !!(mode & HRTIMER_MODE_SOFT); | 1376 | bool softtimer = !!(mode & HRTIMER_MODE_SOFT); |
1263 | int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; | ||
1264 | struct hrtimer_cpu_base *cpu_base; | 1377 | struct hrtimer_cpu_base *cpu_base; |
1378 | int base; | ||
1379 | |||
1380 | /* | ||
1381 | * On PREEMPT_RT enabled kernels hrtimers which are not explicitely | ||
1382 | * marked for hard interrupt expiry mode are moved into soft | ||
1383 | * interrupt context for latency reasons and because the callbacks | ||
1384 | * can invoke functions which might sleep on RT, e.g. spin_lock(). | ||
1385 | */ | ||
1386 | if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD)) | ||
1387 | softtimer = true; | ||
1265 | 1388 | ||
1266 | memset(timer, 0, sizeof(struct hrtimer)); | 1389 | memset(timer, 0, sizeof(struct hrtimer)); |
1267 | 1390 | ||
@@ -1275,8 +1398,10 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |||
1275 | if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) | 1398 | if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) |
1276 | clock_id = CLOCK_MONOTONIC; | 1399 | clock_id = CLOCK_MONOTONIC; |
1277 | 1400 | ||
1401 | base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; | ||
1278 | base += hrtimer_clockid_to_base(clock_id); | 1402 | base += hrtimer_clockid_to_base(clock_id); |
1279 | timer->is_soft = softtimer; | 1403 | timer->is_soft = softtimer; |
1404 | timer->is_hard = !softtimer; | ||
1280 | timer->base = &cpu_base->clock_base[base]; | 1405 | timer->base = &cpu_base->clock_base[base]; |
1281 | timerqueue_init(&timer->node); | 1406 | timerqueue_init(&timer->node); |
1282 | } | 1407 | } |
@@ -1449,6 +1574,8 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, | |||
1449 | break; | 1574 | break; |
1450 | 1575 | ||
1451 | __run_hrtimer(cpu_base, base, timer, &basenow, flags); | 1576 | __run_hrtimer(cpu_base, base, timer, &basenow, flags); |
1577 | if (active_mask == HRTIMER_ACTIVE_SOFT) | ||
1578 | hrtimer_sync_wait_running(cpu_base, flags); | ||
1452 | } | 1579 | } |
1453 | } | 1580 | } |
1454 | } | 1581 | } |
@@ -1459,6 +1586,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) | |||
1459 | unsigned long flags; | 1586 | unsigned long flags; |
1460 | ktime_t now; | 1587 | ktime_t now; |
1461 | 1588 | ||
1589 | hrtimer_cpu_base_lock_expiry(cpu_base); | ||
1462 | raw_spin_lock_irqsave(&cpu_base->lock, flags); | 1590 | raw_spin_lock_irqsave(&cpu_base->lock, flags); |
1463 | 1591 | ||
1464 | now = hrtimer_update_base(cpu_base); | 1592 | now = hrtimer_update_base(cpu_base); |
@@ -1468,6 +1596,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) | |||
1468 | hrtimer_update_softirq_timer(cpu_base, true); | 1596 | hrtimer_update_softirq_timer(cpu_base, true); |
1469 | 1597 | ||
1470 | raw_spin_unlock_irqrestore(&cpu_base->lock, flags); | 1598 | raw_spin_unlock_irqrestore(&cpu_base->lock, flags); |
1599 | hrtimer_cpu_base_unlock_expiry(cpu_base); | ||
1471 | } | 1600 | } |
1472 | 1601 | ||
1473 | #ifdef CONFIG_HIGH_RES_TIMERS | 1602 | #ifdef CONFIG_HIGH_RES_TIMERS |
@@ -1639,10 +1768,75 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) | |||
1639 | return HRTIMER_NORESTART; | 1768 | return HRTIMER_NORESTART; |
1640 | } | 1769 | } |
1641 | 1770 | ||
1642 | void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) | 1771 | /** |
1772 | * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer | ||
1773 | * @sl: sleeper to be started | ||
1774 | * @mode: timer mode abs/rel | ||
1775 | * | ||
1776 | * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers | ||
1777 | * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context) | ||
1778 | */ | ||
1779 | void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, | ||
1780 | enum hrtimer_mode mode) | ||
1781 | { | ||
1782 | /* | ||
1783 | * Make the enqueue delivery mode check work on RT. If the sleeper | ||
1784 | * was initialized for hard interrupt delivery, force the mode bit. | ||
1785 | * This is a special case for hrtimer_sleepers because | ||
1786 | * hrtimer_init_sleeper() determines the delivery mode on RT so the | ||
1787 | * fiddling with this decision is avoided at the call sites. | ||
1788 | */ | ||
1789 | if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard) | ||
1790 | mode |= HRTIMER_MODE_HARD; | ||
1791 | |||
1792 | hrtimer_start_expires(&sl->timer, mode); | ||
1793 | } | ||
1794 | EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires); | ||
1795 | |||
1796 | static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, | ||
1797 | clockid_t clock_id, enum hrtimer_mode mode) | ||
1643 | { | 1798 | { |
1799 | /* | ||
1800 | * On PREEMPT_RT enabled kernels hrtimers which are not explicitely | ||
1801 | * marked for hard interrupt expiry mode are moved into soft | ||
1802 | * interrupt context either for latency reasons or because the | ||
1803 | * hrtimer callback takes regular spinlocks or invokes other | ||
1804 | * functions which are not suitable for hard interrupt context on | ||
1805 | * PREEMPT_RT. | ||
1806 | * | ||
1807 | * The hrtimer_sleeper callback is RT compatible in hard interrupt | ||
1808 | * context, but there is a latency concern: Untrusted userspace can | ||
1809 | * spawn many threads which arm timers for the same expiry time on | ||
1810 | * the same CPU. That causes a latency spike due to the wakeup of | ||
1811 | * a gazillion threads. | ||
1812 | * | ||
1813 | * OTOH, priviledged real-time user space applications rely on the | ||
1814 | * low latency of hard interrupt wakeups. If the current task is in | ||
1815 | * a real-time scheduling class, mark the mode for hard interrupt | ||
1816 | * expiry. | ||
1817 | */ | ||
1818 | if (IS_ENABLED(CONFIG_PREEMPT_RT)) { | ||
1819 | if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT)) | ||
1820 | mode |= HRTIMER_MODE_HARD; | ||
1821 | } | ||
1822 | |||
1823 | __hrtimer_init(&sl->timer, clock_id, mode); | ||
1644 | sl->timer.function = hrtimer_wakeup; | 1824 | sl->timer.function = hrtimer_wakeup; |
1645 | sl->task = task; | 1825 | sl->task = current; |
1826 | } | ||
1827 | |||
1828 | /** | ||
1829 | * hrtimer_init_sleeper - initialize sleeper to the given clock | ||
1830 | * @sl: sleeper to be initialized | ||
1831 | * @clock_id: the clock to be used | ||
1832 | * @mode: timer mode abs/rel | ||
1833 | */ | ||
1834 | void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, | ||
1835 | enum hrtimer_mode mode) | ||
1836 | { | ||
1837 | debug_init(&sl->timer, clock_id, mode); | ||
1838 | __hrtimer_init_sleeper(sl, clock_id, mode); | ||
1839 | |||
1646 | } | 1840 | } |
1647 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); | 1841 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); |
1648 | 1842 | ||
@@ -1669,11 +1863,9 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod | |||
1669 | { | 1863 | { |
1670 | struct restart_block *restart; | 1864 | struct restart_block *restart; |
1671 | 1865 | ||
1672 | hrtimer_init_sleeper(t, current); | ||
1673 | |||
1674 | do { | 1866 | do { |
1675 | set_current_state(TASK_INTERRUPTIBLE); | 1867 | set_current_state(TASK_INTERRUPTIBLE); |
1676 | hrtimer_start_expires(&t->timer, mode); | 1868 | hrtimer_sleeper_start_expires(t, mode); |
1677 | 1869 | ||
1678 | if (likely(t->task)) | 1870 | if (likely(t->task)) |
1679 | freezable_schedule(); | 1871 | freezable_schedule(); |
@@ -1707,10 +1899,9 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |||
1707 | struct hrtimer_sleeper t; | 1899 | struct hrtimer_sleeper t; |
1708 | int ret; | 1900 | int ret; |
1709 | 1901 | ||
1710 | hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, | 1902 | hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid, |
1711 | HRTIMER_MODE_ABS); | 1903 | HRTIMER_MODE_ABS); |
1712 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); | 1904 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); |
1713 | |||
1714 | ret = do_nanosleep(&t, HRTIMER_MODE_ABS); | 1905 | ret = do_nanosleep(&t, HRTIMER_MODE_ABS); |
1715 | destroy_hrtimer_on_stack(&t.timer); | 1906 | destroy_hrtimer_on_stack(&t.timer); |
1716 | return ret; | 1907 | return ret; |
@@ -1728,7 +1919,7 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp, | |||
1728 | if (dl_task(current) || rt_task(current)) | 1919 | if (dl_task(current) || rt_task(current)) |
1729 | slack = 0; | 1920 | slack = 0; |
1730 | 1921 | ||
1731 | hrtimer_init_on_stack(&t.timer, clockid, mode); | 1922 | hrtimer_init_sleeper_on_stack(&t, clockid, mode); |
1732 | hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); | 1923 | hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); |
1733 | ret = do_nanosleep(&t, mode); | 1924 | ret = do_nanosleep(&t, mode); |
1734 | if (ret != -ERESTART_RESTARTBLOCK) | 1925 | if (ret != -ERESTART_RESTARTBLOCK) |
@@ -1809,6 +2000,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) | |||
1809 | cpu_base->softirq_next_timer = NULL; | 2000 | cpu_base->softirq_next_timer = NULL; |
1810 | cpu_base->expires_next = KTIME_MAX; | 2001 | cpu_base->expires_next = KTIME_MAX; |
1811 | cpu_base->softirq_expires_next = KTIME_MAX; | 2002 | cpu_base->softirq_expires_next = KTIME_MAX; |
2003 | hrtimer_cpu_base_init_expiry_lock(cpu_base); | ||
1812 | return 0; | 2004 | return 0; |
1813 | } | 2005 | } |
1814 | 2006 | ||
@@ -1927,12 +2119,9 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, | |||
1927 | return -EINTR; | 2119 | return -EINTR; |
1928 | } | 2120 | } |
1929 | 2121 | ||
1930 | hrtimer_init_on_stack(&t.timer, clock_id, mode); | 2122 | hrtimer_init_sleeper_on_stack(&t, clock_id, mode); |
1931 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); | 2123 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); |
1932 | 2124 | hrtimer_sleeper_start_expires(&t, mode); | |
1933 | hrtimer_init_sleeper(&t, current); | ||
1934 | |||
1935 | hrtimer_start_expires(&t.timer, mode); | ||
1936 | 2125 | ||
1937 | if (likely(t.task)) | 2126 | if (likely(t.task)) |
1938 | schedule(); | 2127 | schedule(); |
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 02068b2d5862..77f1e5635cc1 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c | |||
@@ -55,15 +55,10 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, | |||
55 | val = it->expires; | 55 | val = it->expires; |
56 | interval = it->incr; | 56 | interval = it->incr; |
57 | if (val) { | 57 | if (val) { |
58 | struct task_cputime cputime; | 58 | u64 t, samples[CPUCLOCK_MAX]; |
59 | u64 t; | ||
60 | 59 | ||
61 | thread_group_cputimer(tsk, &cputime); | 60 | thread_group_sample_cputime(tsk, samples); |
62 | if (clock_id == CPUCLOCK_PROF) | 61 | t = samples[clock_id]; |
63 | t = cputime.utime + cputime.stime; | ||
64 | else | ||
65 | /* CPUCLOCK_VIRT */ | ||
66 | t = cputime.utime; | ||
67 | 62 | ||
68 | if (val < t) | 63 | if (val < t) |
69 | /* about to fire */ | 64 | /* about to fire */ |
@@ -213,6 +208,7 @@ again: | |||
213 | /* We are sharing ->siglock with it_real_fn() */ | 208 | /* We are sharing ->siglock with it_real_fn() */ |
214 | if (hrtimer_try_to_cancel(timer) < 0) { | 209 | if (hrtimer_try_to_cancel(timer) < 0) { |
215 | spin_unlock_irq(&tsk->sighand->siglock); | 210 | spin_unlock_irq(&tsk->sighand->siglock); |
211 | hrtimer_cancel_wait_running(timer); | ||
216 | goto again; | 212 | goto again; |
217 | } | 213 | } |
218 | expires = timeval_to_ktime(value->it_value); | 214 | expires = timeval_to_ktime(value->it_value); |
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 0a426f4e3125..92a431981b1c 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c | |||
@@ -20,11 +20,20 @@ | |||
20 | 20 | ||
21 | static void posix_cpu_timer_rearm(struct k_itimer *timer); | 21 | static void posix_cpu_timer_rearm(struct k_itimer *timer); |
22 | 22 | ||
23 | void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) | ||
24 | { | ||
25 | posix_cputimers_init(pct); | ||
26 | if (cpu_limit != RLIM_INFINITY) { | ||
27 | pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC; | ||
28 | pct->timers_active = true; | ||
29 | } | ||
30 | } | ||
31 | |||
23 | /* | 32 | /* |
24 | * Called after updating RLIMIT_CPU to run cpu timer and update | 33 | * Called after updating RLIMIT_CPU to run cpu timer and update |
25 | * tsk->signal->cputime_expires expiration cache if necessary. Needs | 34 | * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if |
26 | * siglock protection since other code may update expiration cache as | 35 | * necessary. Needs siglock protection since other code may update the |
27 | * well. | 36 | * expiration cache as well. |
28 | */ | 37 | */ |
29 | void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) | 38 | void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) |
30 | { | 39 | { |
@@ -35,46 +44,97 @@ void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) | |||
35 | spin_unlock_irq(&task->sighand->siglock); | 44 | spin_unlock_irq(&task->sighand->siglock); |
36 | } | 45 | } |
37 | 46 | ||
38 | static int check_clock(const clockid_t which_clock) | 47 | /* |
48 | * Functions for validating access to tasks. | ||
49 | */ | ||
50 | static struct task_struct *lookup_task(const pid_t pid, bool thread, | ||
51 | bool gettime) | ||
39 | { | 52 | { |
40 | int error = 0; | ||
41 | struct task_struct *p; | 53 | struct task_struct *p; |
42 | const pid_t pid = CPUCLOCK_PID(which_clock); | ||
43 | |||
44 | if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX) | ||
45 | return -EINVAL; | ||
46 | 54 | ||
47 | if (pid == 0) | 55 | /* |
48 | return 0; | 56 | * If the encoded PID is 0, then the timer is targeted at current |
57 | * or the process to which current belongs. | ||
58 | */ | ||
59 | if (!pid) | ||
60 | return thread ? current : current->group_leader; | ||
49 | 61 | ||
50 | rcu_read_lock(); | ||
51 | p = find_task_by_vpid(pid); | 62 | p = find_task_by_vpid(pid); |
52 | if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? | 63 | if (!p) |
53 | same_thread_group(p, current) : has_group_leader_pid(p))) { | 64 | return p; |
54 | error = -EINVAL; | 65 | |
66 | if (thread) | ||
67 | return same_thread_group(p, current) ? p : NULL; | ||
68 | |||
69 | if (gettime) { | ||
70 | /* | ||
71 | * For clock_gettime(PROCESS) the task does not need to be | ||
72 | * the actual group leader. tsk->sighand gives | ||
73 | * access to the group's clock. | ||
74 | * | ||
75 | * Timers need the group leader because they take a | ||
76 | * reference on it and store the task pointer until the | ||
77 | * timer is destroyed. | ||
78 | */ | ||
79 | return (p == current || thread_group_leader(p)) ? p : NULL; | ||
55 | } | 80 | } |
81 | |||
82 | /* | ||
83 | * For processes require that p is group leader. | ||
84 | */ | ||
85 | return has_group_leader_pid(p) ? p : NULL; | ||
86 | } | ||
87 | |||
88 | static struct task_struct *__get_task_for_clock(const clockid_t clock, | ||
89 | bool getref, bool gettime) | ||
90 | { | ||
91 | const bool thread = !!CPUCLOCK_PERTHREAD(clock); | ||
92 | const pid_t pid = CPUCLOCK_PID(clock); | ||
93 | struct task_struct *p; | ||
94 | |||
95 | if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX) | ||
96 | return NULL; | ||
97 | |||
98 | rcu_read_lock(); | ||
99 | p = lookup_task(pid, thread, gettime); | ||
100 | if (p && getref) | ||
101 | get_task_struct(p); | ||
56 | rcu_read_unlock(); | 102 | rcu_read_unlock(); |
103 | return p; | ||
104 | } | ||
57 | 105 | ||
58 | return error; | 106 | static inline struct task_struct *get_task_for_clock(const clockid_t clock) |
107 | { | ||
108 | return __get_task_for_clock(clock, true, false); | ||
109 | } | ||
110 | |||
111 | static inline struct task_struct *get_task_for_clock_get(const clockid_t clock) | ||
112 | { | ||
113 | return __get_task_for_clock(clock, true, true); | ||
114 | } | ||
115 | |||
116 | static inline int validate_clock_permissions(const clockid_t clock) | ||
117 | { | ||
118 | return __get_task_for_clock(clock, false, false) ? 0 : -EINVAL; | ||
59 | } | 119 | } |
60 | 120 | ||
61 | /* | 121 | /* |
62 | * Update expiry time from increment, and increase overrun count, | 122 | * Update expiry time from increment, and increase overrun count, |
63 | * given the current clock sample. | 123 | * given the current clock sample. |
64 | */ | 124 | */ |
65 | static void bump_cpu_timer(struct k_itimer *timer, u64 now) | 125 | static u64 bump_cpu_timer(struct k_itimer *timer, u64 now) |
66 | { | 126 | { |
127 | u64 delta, incr, expires = timer->it.cpu.node.expires; | ||
67 | int i; | 128 | int i; |
68 | u64 delta, incr; | ||
69 | 129 | ||
70 | if (!timer->it_interval) | 130 | if (!timer->it_interval) |
71 | return; | 131 | return expires; |
72 | 132 | ||
73 | if (now < timer->it.cpu.expires) | 133 | if (now < expires) |
74 | return; | 134 | return expires; |
75 | 135 | ||
76 | incr = timer->it_interval; | 136 | incr = timer->it_interval; |
77 | delta = now + incr - timer->it.cpu.expires; | 137 | delta = now + incr - expires; |
78 | 138 | ||
79 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ | 139 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ |
80 | for (i = 0; incr < delta - incr; i++) | 140 | for (i = 0; incr < delta - incr; i++) |
@@ -84,48 +144,26 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now) | |||
84 | if (delta < incr) | 144 | if (delta < incr) |
85 | continue; | 145 | continue; |
86 | 146 | ||
87 | timer->it.cpu.expires += incr; | 147 | timer->it.cpu.node.expires += incr; |
88 | timer->it_overrun += 1LL << i; | 148 | timer->it_overrun += 1LL << i; |
89 | delta -= incr; | 149 | delta -= incr; |
90 | } | 150 | } |
151 | return timer->it.cpu.node.expires; | ||
91 | } | 152 | } |
92 | 153 | ||
93 | /** | 154 | /* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */ |
94 | * task_cputime_zero - Check a task_cputime struct for all zero fields. | 155 | static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct) |
95 | * | ||
96 | * @cputime: The struct to compare. | ||
97 | * | ||
98 | * Checks @cputime to see if all fields are zero. Returns true if all fields | ||
99 | * are zero, false if any field is nonzero. | ||
100 | */ | ||
101 | static inline int task_cputime_zero(const struct task_cputime *cputime) | ||
102 | { | 156 | { |
103 | if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime) | 157 | return !(~pct->bases[CPUCLOCK_PROF].nextevt | |
104 | return 1; | 158 | ~pct->bases[CPUCLOCK_VIRT].nextevt | |
105 | return 0; | 159 | ~pct->bases[CPUCLOCK_SCHED].nextevt); |
106 | } | ||
107 | |||
108 | static inline u64 prof_ticks(struct task_struct *p) | ||
109 | { | ||
110 | u64 utime, stime; | ||
111 | |||
112 | task_cputime(p, &utime, &stime); | ||
113 | |||
114 | return utime + stime; | ||
115 | } | ||
116 | static inline u64 virt_ticks(struct task_struct *p) | ||
117 | { | ||
118 | u64 utime, stime; | ||
119 | |||
120 | task_cputime(p, &utime, &stime); | ||
121 | |||
122 | return utime; | ||
123 | } | 160 | } |
124 | 161 | ||
125 | static int | 162 | static int |
126 | posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) | 163 | posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) |
127 | { | 164 | { |
128 | int error = check_clock(which_clock); | 165 | int error = validate_clock_permissions(which_clock); |
166 | |||
129 | if (!error) { | 167 | if (!error) { |
130 | tp->tv_sec = 0; | 168 | tp->tv_sec = 0; |
131 | tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ); | 169 | tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ); |
@@ -142,42 +180,66 @@ posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) | |||
142 | } | 180 | } |
143 | 181 | ||
144 | static int | 182 | static int |
145 | posix_cpu_clock_set(const clockid_t which_clock, const struct timespec64 *tp) | 183 | posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp) |
146 | { | 184 | { |
185 | int error = validate_clock_permissions(clock); | ||
186 | |||
147 | /* | 187 | /* |
148 | * You can never reset a CPU clock, but we check for other errors | 188 | * You can never reset a CPU clock, but we check for other errors |
149 | * in the call before failing with EPERM. | 189 | * in the call before failing with EPERM. |
150 | */ | 190 | */ |
151 | int error = check_clock(which_clock); | 191 | return error ? : -EPERM; |
152 | if (error == 0) { | ||
153 | error = -EPERM; | ||
154 | } | ||
155 | return error; | ||
156 | } | 192 | } |
157 | 193 | ||
158 | |||
159 | /* | 194 | /* |
160 | * Sample a per-thread clock for the given task. | 195 | * Sample a per-thread clock for the given task. clkid is validated. |
161 | */ | 196 | */ |
162 | static int cpu_clock_sample(const clockid_t which_clock, | 197 | static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p) |
163 | struct task_struct *p, u64 *sample) | ||
164 | { | 198 | { |
165 | switch (CPUCLOCK_WHICH(which_clock)) { | 199 | u64 utime, stime; |
166 | default: | 200 | |
167 | return -EINVAL; | 201 | if (clkid == CPUCLOCK_SCHED) |
202 | return task_sched_runtime(p); | ||
203 | |||
204 | task_cputime(p, &utime, &stime); | ||
205 | |||
206 | switch (clkid) { | ||
168 | case CPUCLOCK_PROF: | 207 | case CPUCLOCK_PROF: |
169 | *sample = prof_ticks(p); | 208 | return utime + stime; |
170 | break; | ||
171 | case CPUCLOCK_VIRT: | 209 | case CPUCLOCK_VIRT: |
172 | *sample = virt_ticks(p); | 210 | return utime; |
173 | break; | 211 | default: |
174 | case CPUCLOCK_SCHED: | 212 | WARN_ON_ONCE(1); |
175 | *sample = task_sched_runtime(p); | ||
176 | break; | ||
177 | } | 213 | } |
178 | return 0; | 214 | return 0; |
179 | } | 215 | } |
180 | 216 | ||
217 | static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime) | ||
218 | { | ||
219 | samples[CPUCLOCK_PROF] = stime + utime; | ||
220 | samples[CPUCLOCK_VIRT] = utime; | ||
221 | samples[CPUCLOCK_SCHED] = rtime; | ||
222 | } | ||
223 | |||
224 | static void task_sample_cputime(struct task_struct *p, u64 *samples) | ||
225 | { | ||
226 | u64 stime, utime; | ||
227 | |||
228 | task_cputime(p, &utime, &stime); | ||
229 | store_samples(samples, stime, utime, p->se.sum_exec_runtime); | ||
230 | } | ||
231 | |||
232 | static void proc_sample_cputime_atomic(struct task_cputime_atomic *at, | ||
233 | u64 *samples) | ||
234 | { | ||
235 | u64 stime, utime, rtime; | ||
236 | |||
237 | utime = atomic64_read(&at->utime); | ||
238 | stime = atomic64_read(&at->stime); | ||
239 | rtime = atomic64_read(&at->sum_exec_runtime); | ||
240 | store_samples(samples, stime, utime, rtime); | ||
241 | } | ||
242 | |||
181 | /* | 243 | /* |
182 | * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg | 244 | * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg |
183 | * to avoid race conditions with concurrent updates to cputime. | 245 | * to avoid race conditions with concurrent updates to cputime. |
@@ -193,29 +255,56 @@ retry: | |||
193 | } | 255 | } |
194 | } | 256 | } |
195 | 257 | ||
196 | static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum) | 258 | static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, |
259 | struct task_cputime *sum) | ||
197 | { | 260 | { |
198 | __update_gt_cputime(&cputime_atomic->utime, sum->utime); | 261 | __update_gt_cputime(&cputime_atomic->utime, sum->utime); |
199 | __update_gt_cputime(&cputime_atomic->stime, sum->stime); | 262 | __update_gt_cputime(&cputime_atomic->stime, sum->stime); |
200 | __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime); | 263 | __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime); |
201 | } | 264 | } |
202 | 265 | ||
203 | /* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */ | 266 | /** |
204 | static inline void sample_cputime_atomic(struct task_cputime *times, | 267 | * thread_group_sample_cputime - Sample cputime for a given task |
205 | struct task_cputime_atomic *atomic_times) | 268 | * @tsk: Task for which cputime needs to be started |
269 | * @iimes: Storage for time samples | ||
270 | * | ||
271 | * Called from sys_getitimer() to calculate the expiry time of an active | ||
272 | * timer. That means group cputime accounting is already active. Called | ||
273 | * with task sighand lock held. | ||
274 | * | ||
275 | * Updates @times with an uptodate sample of the thread group cputimes. | ||
276 | */ | ||
277 | void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples) | ||
206 | { | 278 | { |
207 | times->utime = atomic64_read(&atomic_times->utime); | 279 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
208 | times->stime = atomic64_read(&atomic_times->stime); | 280 | struct posix_cputimers *pct = &tsk->signal->posix_cputimers; |
209 | times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime); | 281 | |
282 | WARN_ON_ONCE(!pct->timers_active); | ||
283 | |||
284 | proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); | ||
210 | } | 285 | } |
211 | 286 | ||
212 | void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) | 287 | /** |
288 | * thread_group_start_cputime - Start cputime and return a sample | ||
289 | * @tsk: Task for which cputime needs to be started | ||
290 | * @samples: Storage for time samples | ||
291 | * | ||
292 | * The thread group cputime accouting is avoided when there are no posix | ||
293 | * CPU timers armed. Before starting a timer it's required to check whether | ||
294 | * the time accounting is active. If not, a full update of the atomic | ||
295 | * accounting store needs to be done and the accounting enabled. | ||
296 | * | ||
297 | * Updates @times with an uptodate sample of the thread group cputimes. | ||
298 | */ | ||
299 | static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples) | ||
213 | { | 300 | { |
214 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 301 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
215 | struct task_cputime sum; | 302 | struct posix_cputimers *pct = &tsk->signal->posix_cputimers; |
216 | 303 | ||
217 | /* Check if cputimer isn't running. This is accessed without locking. */ | 304 | /* Check if cputimer isn't running. This is accessed without locking. */ |
218 | if (!READ_ONCE(cputimer->running)) { | 305 | if (!READ_ONCE(pct->timers_active)) { |
306 | struct task_cputime sum; | ||
307 | |||
219 | /* | 308 | /* |
220 | * The POSIX timer interface allows for absolute time expiry | 309 | * The POSIX timer interface allows for absolute time expiry |
221 | * values through the TIMER_ABSTIME flag, therefore we have | 310 | * values through the TIMER_ABSTIME flag, therefore we have |
@@ -225,94 +314,69 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) | |||
225 | update_gt_cputime(&cputimer->cputime_atomic, &sum); | 314 | update_gt_cputime(&cputimer->cputime_atomic, &sum); |
226 | 315 | ||
227 | /* | 316 | /* |
228 | * We're setting cputimer->running without a lock. Ensure | 317 | * We're setting timers_active without a lock. Ensure this |
229 | * this only gets written to in one operation. We set | 318 | * only gets written to in one operation. We set it after |
230 | * running after update_gt_cputime() as a small optimization, | 319 | * update_gt_cputime() as a small optimization, but |
231 | * but barriers are not required because update_gt_cputime() | 320 | * barriers are not required because update_gt_cputime() |
232 | * can handle concurrent updates. | 321 | * can handle concurrent updates. |
233 | */ | 322 | */ |
234 | WRITE_ONCE(cputimer->running, true); | 323 | WRITE_ONCE(pct->timers_active, true); |
235 | } | 324 | } |
236 | sample_cputime_atomic(times, &cputimer->cputime_atomic); | 325 | proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); |
237 | } | 326 | } |
238 | 327 | ||
239 | /* | 328 | static void __thread_group_cputime(struct task_struct *tsk, u64 *samples) |
240 | * Sample a process (thread group) clock for the given group_leader task. | ||
241 | * Must be called with task sighand lock held for safe while_each_thread() | ||
242 | * traversal. | ||
243 | */ | ||
244 | static int cpu_clock_sample_group(const clockid_t which_clock, | ||
245 | struct task_struct *p, | ||
246 | u64 *sample) | ||
247 | { | 329 | { |
248 | struct task_cputime cputime; | 330 | struct task_cputime ct; |
249 | 331 | ||
250 | switch (CPUCLOCK_WHICH(which_clock)) { | 332 | thread_group_cputime(tsk, &ct); |
251 | default: | 333 | store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime); |
252 | return -EINVAL; | ||
253 | case CPUCLOCK_PROF: | ||
254 | thread_group_cputime(p, &cputime); | ||
255 | *sample = cputime.utime + cputime.stime; | ||
256 | break; | ||
257 | case CPUCLOCK_VIRT: | ||
258 | thread_group_cputime(p, &cputime); | ||
259 | *sample = cputime.utime; | ||
260 | break; | ||
261 | case CPUCLOCK_SCHED: | ||
262 | thread_group_cputime(p, &cputime); | ||
263 | *sample = cputime.sum_exec_runtime; | ||
264 | break; | ||
265 | } | ||
266 | return 0; | ||
267 | } | 334 | } |
268 | 335 | ||
269 | static int posix_cpu_clock_get_task(struct task_struct *tsk, | 336 | /* |
270 | const clockid_t which_clock, | 337 | * Sample a process (thread group) clock for the given task clkid. If the |
271 | struct timespec64 *tp) | 338 | * group's cputime accounting is already enabled, read the atomic |
339 | * store. Otherwise a full update is required. Task's sighand lock must be | ||
340 | * held to protect the task traversal on a full update. clkid is already | ||
341 | * validated. | ||
342 | */ | ||
343 | static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p, | ||
344 | bool start) | ||
272 | { | 345 | { |
273 | int err = -EINVAL; | 346 | struct thread_group_cputimer *cputimer = &p->signal->cputimer; |
274 | u64 rtn; | 347 | struct posix_cputimers *pct = &p->signal->posix_cputimers; |
348 | u64 samples[CPUCLOCK_MAX]; | ||
275 | 349 | ||
276 | if (CPUCLOCK_PERTHREAD(which_clock)) { | 350 | if (!READ_ONCE(pct->timers_active)) { |
277 | if (same_thread_group(tsk, current)) | 351 | if (start) |
278 | err = cpu_clock_sample(which_clock, tsk, &rtn); | 352 | thread_group_start_cputime(p, samples); |
353 | else | ||
354 | __thread_group_cputime(p, samples); | ||
279 | } else { | 355 | } else { |
280 | if (tsk == current || thread_group_leader(tsk)) | 356 | proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); |
281 | err = cpu_clock_sample_group(which_clock, tsk, &rtn); | ||
282 | } | 357 | } |
283 | 358 | ||
284 | if (!err) | 359 | return samples[clkid]; |
285 | *tp = ns_to_timespec64(rtn); | ||
286 | |||
287 | return err; | ||
288 | } | 360 | } |
289 | 361 | ||
290 | 362 | static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp) | |
291 | static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec64 *tp) | ||
292 | { | 363 | { |
293 | const pid_t pid = CPUCLOCK_PID(which_clock); | 364 | const clockid_t clkid = CPUCLOCK_WHICH(clock); |
294 | int err = -EINVAL; | 365 | struct task_struct *tsk; |
366 | u64 t; | ||
295 | 367 | ||
296 | if (pid == 0) { | 368 | tsk = get_task_for_clock_get(clock); |
297 | /* | 369 | if (!tsk) |
298 | * Special case constant value for our own clocks. | 370 | return -EINVAL; |
299 | * We don't have to do any lookup to find ourselves. | ||
300 | */ | ||
301 | err = posix_cpu_clock_get_task(current, which_clock, tp); | ||
302 | } else { | ||
303 | /* | ||
304 | * Find the given PID, and validate that the caller | ||
305 | * should be able to see it. | ||
306 | */ | ||
307 | struct task_struct *p; | ||
308 | rcu_read_lock(); | ||
309 | p = find_task_by_vpid(pid); | ||
310 | if (p) | ||
311 | err = posix_cpu_clock_get_task(p, which_clock, tp); | ||
312 | rcu_read_unlock(); | ||
313 | } | ||
314 | 371 | ||
315 | return err; | 372 | if (CPUCLOCK_PERTHREAD(clock)) |
373 | t = cpu_clock_sample(clkid, tsk); | ||
374 | else | ||
375 | t = cpu_clock_sample_group(clkid, tsk, false); | ||
376 | put_task_struct(tsk); | ||
377 | |||
378 | *tp = ns_to_timespec64(t); | ||
379 | return 0; | ||
316 | } | 380 | } |
317 | 381 | ||
318 | /* | 382 | /* |
@@ -322,44 +386,15 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec64 *t | |||
322 | */ | 386 | */ |
323 | static int posix_cpu_timer_create(struct k_itimer *new_timer) | 387 | static int posix_cpu_timer_create(struct k_itimer *new_timer) |
324 | { | 388 | { |
325 | int ret = 0; | 389 | struct task_struct *p = get_task_for_clock(new_timer->it_clock); |
326 | const pid_t pid = CPUCLOCK_PID(new_timer->it_clock); | ||
327 | struct task_struct *p; | ||
328 | 390 | ||
329 | if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX) | 391 | if (!p) |
330 | return -EINVAL; | 392 | return -EINVAL; |
331 | 393 | ||
332 | new_timer->kclock = &clock_posix_cpu; | 394 | new_timer->kclock = &clock_posix_cpu; |
333 | 395 | timerqueue_init(&new_timer->it.cpu.node); | |
334 | INIT_LIST_HEAD(&new_timer->it.cpu.entry); | ||
335 | |||
336 | rcu_read_lock(); | ||
337 | if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { | ||
338 | if (pid == 0) { | ||
339 | p = current; | ||
340 | } else { | ||
341 | p = find_task_by_vpid(pid); | ||
342 | if (p && !same_thread_group(p, current)) | ||
343 | p = NULL; | ||
344 | } | ||
345 | } else { | ||
346 | if (pid == 0) { | ||
347 | p = current->group_leader; | ||
348 | } else { | ||
349 | p = find_task_by_vpid(pid); | ||
350 | if (p && !has_group_leader_pid(p)) | ||
351 | p = NULL; | ||
352 | } | ||
353 | } | ||
354 | new_timer->it.cpu.task = p; | 396 | new_timer->it.cpu.task = p; |
355 | if (p) { | 397 | return 0; |
356 | get_task_struct(p); | ||
357 | } else { | ||
358 | ret = -EINVAL; | ||
359 | } | ||
360 | rcu_read_unlock(); | ||
361 | |||
362 | return ret; | ||
363 | } | 398 | } |
364 | 399 | ||
365 | /* | 400 | /* |
@@ -370,12 +405,14 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
370 | */ | 405 | */ |
371 | static int posix_cpu_timer_del(struct k_itimer *timer) | 406 | static int posix_cpu_timer_del(struct k_itimer *timer) |
372 | { | 407 | { |
373 | int ret = 0; | 408 | struct cpu_timer *ctmr = &timer->it.cpu; |
374 | unsigned long flags; | 409 | struct task_struct *p = ctmr->task; |
375 | struct sighand_struct *sighand; | 410 | struct sighand_struct *sighand; |
376 | struct task_struct *p = timer->it.cpu.task; | 411 | unsigned long flags; |
412 | int ret = 0; | ||
377 | 413 | ||
378 | WARN_ON_ONCE(p == NULL); | 414 | if (WARN_ON_ONCE(!p)) |
415 | return -EINVAL; | ||
379 | 416 | ||
380 | /* | 417 | /* |
381 | * Protect against sighand release/switch in exit/exec and process/ | 418 | * Protect against sighand release/switch in exit/exec and process/ |
@@ -384,15 +421,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer) | |||
384 | sighand = lock_task_sighand(p, &flags); | 421 | sighand = lock_task_sighand(p, &flags); |
385 | if (unlikely(sighand == NULL)) { | 422 | if (unlikely(sighand == NULL)) { |
386 | /* | 423 | /* |
387 | * We raced with the reaping of the task. | 424 | * This raced with the reaping of the task. The exit cleanup |
388 | * The deletion should have cleared us off the list. | 425 | * should have removed this timer from the timer queue. |
389 | */ | 426 | */ |
390 | WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry)); | 427 | WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node)); |
391 | } else { | 428 | } else { |
392 | if (timer->it.cpu.firing) | 429 | if (timer->it.cpu.firing) |
393 | ret = TIMER_RETRY; | 430 | ret = TIMER_RETRY; |
394 | else | 431 | else |
395 | list_del(&timer->it.cpu.entry); | 432 | cpu_timer_dequeue(ctmr); |
396 | 433 | ||
397 | unlock_task_sighand(p, &flags); | 434 | unlock_task_sighand(p, &flags); |
398 | } | 435 | } |
@@ -403,25 +440,30 @@ static int posix_cpu_timer_del(struct k_itimer *timer) | |||
403 | return ret; | 440 | return ret; |
404 | } | 441 | } |
405 | 442 | ||
406 | static void cleanup_timers_list(struct list_head *head) | 443 | static void cleanup_timerqueue(struct timerqueue_head *head) |
407 | { | 444 | { |
408 | struct cpu_timer_list *timer, *next; | 445 | struct timerqueue_node *node; |
446 | struct cpu_timer *ctmr; | ||
409 | 447 | ||
410 | list_for_each_entry_safe(timer, next, head, entry) | 448 | while ((node = timerqueue_getnext(head))) { |
411 | list_del_init(&timer->entry); | 449 | timerqueue_del(head, node); |
450 | ctmr = container_of(node, struct cpu_timer, node); | ||
451 | ctmr->head = NULL; | ||
452 | } | ||
412 | } | 453 | } |
413 | 454 | ||
414 | /* | 455 | /* |
415 | * Clean out CPU timers still ticking when a thread exited. The task | 456 | * Clean out CPU timers which are still armed when a thread exits. The |
416 | * pointer is cleared, and the expiry time is replaced with the residual | 457 | * timers are only removed from the list. No other updates are done. The |
417 | * time for later timer_gettime calls to return. | 458 | * corresponding posix timers are still accessible, but cannot be rearmed. |
459 | * | ||
418 | * This must be called with the siglock held. | 460 | * This must be called with the siglock held. |
419 | */ | 461 | */ |
420 | static void cleanup_timers(struct list_head *head) | 462 | static void cleanup_timers(struct posix_cputimers *pct) |
421 | { | 463 | { |
422 | cleanup_timers_list(head); | 464 | cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead); |
423 | cleanup_timers_list(++head); | 465 | cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead); |
424 | cleanup_timers_list(++head); | 466 | cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead); |
425 | } | 467 | } |
426 | 468 | ||
427 | /* | 469 | /* |
@@ -431,16 +473,11 @@ static void cleanup_timers(struct list_head *head) | |||
431 | */ | 473 | */ |
432 | void posix_cpu_timers_exit(struct task_struct *tsk) | 474 | void posix_cpu_timers_exit(struct task_struct *tsk) |
433 | { | 475 | { |
434 | cleanup_timers(tsk->cpu_timers); | 476 | cleanup_timers(&tsk->posix_cputimers); |
435 | } | 477 | } |
436 | void posix_cpu_timers_exit_group(struct task_struct *tsk) | 478 | void posix_cpu_timers_exit_group(struct task_struct *tsk) |
437 | { | 479 | { |
438 | cleanup_timers(tsk->signal->cpu_timers); | 480 | cleanup_timers(&tsk->signal->posix_cputimers); |
439 | } | ||
440 | |||
441 | static inline int expires_gt(u64 expires, u64 new_exp) | ||
442 | { | ||
443 | return expires == 0 || expires > new_exp; | ||
444 | } | 481 | } |
445 | 482 | ||
446 | /* | 483 | /* |
@@ -449,58 +486,33 @@ static inline int expires_gt(u64 expires, u64 new_exp) | |||
449 | */ | 486 | */ |
450 | static void arm_timer(struct k_itimer *timer) | 487 | static void arm_timer(struct k_itimer *timer) |
451 | { | 488 | { |
452 | struct task_struct *p = timer->it.cpu.task; | 489 | int clkidx = CPUCLOCK_WHICH(timer->it_clock); |
453 | struct list_head *head, *listpos; | 490 | struct cpu_timer *ctmr = &timer->it.cpu; |
454 | struct task_cputime *cputime_expires; | 491 | u64 newexp = cpu_timer_getexpires(ctmr); |
455 | struct cpu_timer_list *const nt = &timer->it.cpu; | 492 | struct task_struct *p = ctmr->task; |
456 | struct cpu_timer_list *next; | 493 | struct posix_cputimer_base *base; |
457 | 494 | ||
458 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { | 495 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) |
459 | head = p->cpu_timers; | 496 | base = p->posix_cputimers.bases + clkidx; |
460 | cputime_expires = &p->cputime_expires; | 497 | else |
461 | } else { | 498 | base = p->signal->posix_cputimers.bases + clkidx; |
462 | head = p->signal->cpu_timers; | 499 | |
463 | cputime_expires = &p->signal->cputime_expires; | 500 | if (!cpu_timer_enqueue(&base->tqhead, ctmr)) |
464 | } | 501 | return; |
465 | head += CPUCLOCK_WHICH(timer->it_clock); | ||
466 | |||
467 | listpos = head; | ||
468 | list_for_each_entry(next, head, entry) { | ||
469 | if (nt->expires < next->expires) | ||
470 | break; | ||
471 | listpos = &next->entry; | ||
472 | } | ||
473 | list_add(&nt->entry, listpos); | ||
474 | |||
475 | if (listpos == head) { | ||
476 | u64 exp = nt->expires; | ||
477 | 502 | ||
478 | /* | 503 | /* |
479 | * We are the new earliest-expiring POSIX 1.b timer, hence | 504 | * We are the new earliest-expiring POSIX 1.b timer, hence |
480 | * need to update expiration cache. Take into account that | 505 | * need to update expiration cache. Take into account that |
481 | * for process timers we share expiration cache with itimers | 506 | * for process timers we share expiration cache with itimers |
482 | * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. | 507 | * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. |
483 | */ | 508 | */ |
509 | if (newexp < base->nextevt) | ||
510 | base->nextevt = newexp; | ||
484 | 511 | ||
485 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | 512 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) |
486 | case CPUCLOCK_PROF: | 513 | tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); |
487 | if (expires_gt(cputime_expires->prof_exp, exp)) | 514 | else |
488 | cputime_expires->prof_exp = exp; | 515 | tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER); |
489 | break; | ||
490 | case CPUCLOCK_VIRT: | ||
491 | if (expires_gt(cputime_expires->virt_exp, exp)) | ||
492 | cputime_expires->virt_exp = exp; | ||
493 | break; | ||
494 | case CPUCLOCK_SCHED: | ||
495 | if (expires_gt(cputime_expires->sched_exp, exp)) | ||
496 | cputime_expires->sched_exp = exp; | ||
497 | break; | ||
498 | } | ||
499 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) | ||
500 | tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); | ||
501 | else | ||
502 | tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER); | ||
503 | } | ||
504 | } | 516 | } |
505 | 517 | ||
506 | /* | 518 | /* |
@@ -508,24 +520,26 @@ static void arm_timer(struct k_itimer *timer) | |||
508 | */ | 520 | */ |
509 | static void cpu_timer_fire(struct k_itimer *timer) | 521 | static void cpu_timer_fire(struct k_itimer *timer) |
510 | { | 522 | { |
523 | struct cpu_timer *ctmr = &timer->it.cpu; | ||
524 | |||
511 | if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { | 525 | if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { |
512 | /* | 526 | /* |
513 | * User don't want any signal. | 527 | * User don't want any signal. |
514 | */ | 528 | */ |
515 | timer->it.cpu.expires = 0; | 529 | cpu_timer_setexpires(ctmr, 0); |
516 | } else if (unlikely(timer->sigq == NULL)) { | 530 | } else if (unlikely(timer->sigq == NULL)) { |
517 | /* | 531 | /* |
518 | * This a special case for clock_nanosleep, | 532 | * This a special case for clock_nanosleep, |
519 | * not a normal timer from sys_timer_create. | 533 | * not a normal timer from sys_timer_create. |
520 | */ | 534 | */ |
521 | wake_up_process(timer->it_process); | 535 | wake_up_process(timer->it_process); |
522 | timer->it.cpu.expires = 0; | 536 | cpu_timer_setexpires(ctmr, 0); |
523 | } else if (!timer->it_interval) { | 537 | } else if (!timer->it_interval) { |
524 | /* | 538 | /* |
525 | * One-shot timer. Clear it as soon as it's fired. | 539 | * One-shot timer. Clear it as soon as it's fired. |
526 | */ | 540 | */ |
527 | posix_timer_event(timer, 0); | 541 | posix_timer_event(timer, 0); |
528 | timer->it.cpu.expires = 0; | 542 | cpu_timer_setexpires(ctmr, 0); |
529 | } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { | 543 | } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { |
530 | /* | 544 | /* |
531 | * The signal did not get queued because the signal | 545 | * The signal did not get queued because the signal |
@@ -539,33 +553,6 @@ static void cpu_timer_fire(struct k_itimer *timer) | |||
539 | } | 553 | } |
540 | 554 | ||
541 | /* | 555 | /* |
542 | * Sample a process (thread group) timer for the given group_leader task. | ||
543 | * Must be called with task sighand lock held for safe while_each_thread() | ||
544 | * traversal. | ||
545 | */ | ||
546 | static int cpu_timer_sample_group(const clockid_t which_clock, | ||
547 | struct task_struct *p, u64 *sample) | ||
548 | { | ||
549 | struct task_cputime cputime; | ||
550 | |||
551 | thread_group_cputimer(p, &cputime); | ||
552 | switch (CPUCLOCK_WHICH(which_clock)) { | ||
553 | default: | ||
554 | return -EINVAL; | ||
555 | case CPUCLOCK_PROF: | ||
556 | *sample = cputime.utime + cputime.stime; | ||
557 | break; | ||
558 | case CPUCLOCK_VIRT: | ||
559 | *sample = cputime.utime; | ||
560 | break; | ||
561 | case CPUCLOCK_SCHED: | ||
562 | *sample = cputime.sum_exec_runtime; | ||
563 | break; | ||
564 | } | ||
565 | return 0; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * Guts of sys_timer_settime for CPU timers. | 556 | * Guts of sys_timer_settime for CPU timers. |
570 | * This is called with the timer locked and interrupts disabled. | 557 | * This is called with the timer locked and interrupts disabled. |
571 | * If we return TIMER_RETRY, it's necessary to release the timer's lock | 558 | * If we return TIMER_RETRY, it's necessary to release the timer's lock |
@@ -574,13 +561,16 @@ static int cpu_timer_sample_group(const clockid_t which_clock, | |||
574 | static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | 561 | static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, |
575 | struct itimerspec64 *new, struct itimerspec64 *old) | 562 | struct itimerspec64 *new, struct itimerspec64 *old) |
576 | { | 563 | { |
577 | unsigned long flags; | 564 | clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); |
578 | struct sighand_struct *sighand; | ||
579 | struct task_struct *p = timer->it.cpu.task; | ||
580 | u64 old_expires, new_expires, old_incr, val; | 565 | u64 old_expires, new_expires, old_incr, val; |
581 | int ret; | 566 | struct cpu_timer *ctmr = &timer->it.cpu; |
567 | struct task_struct *p = ctmr->task; | ||
568 | struct sighand_struct *sighand; | ||
569 | unsigned long flags; | ||
570 | int ret = 0; | ||
582 | 571 | ||
583 | WARN_ON_ONCE(p == NULL); | 572 | if (WARN_ON_ONCE(!p)) |
573 | return -EINVAL; | ||
584 | 574 | ||
585 | /* | 575 | /* |
586 | * Use the to_ktime conversion because that clamps the maximum | 576 | * Use the to_ktime conversion because that clamps the maximum |
@@ -597,22 +587,21 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
597 | * If p has just been reaped, we can no | 587 | * If p has just been reaped, we can no |
598 | * longer get any information about it at all. | 588 | * longer get any information about it at all. |
599 | */ | 589 | */ |
600 | if (unlikely(sighand == NULL)) { | 590 | if (unlikely(sighand == NULL)) |
601 | return -ESRCH; | 591 | return -ESRCH; |
602 | } | ||
603 | 592 | ||
604 | /* | 593 | /* |
605 | * Disarm any old timer after extracting its expiry time. | 594 | * Disarm any old timer after extracting its expiry time. |
606 | */ | 595 | */ |
607 | |||
608 | ret = 0; | ||
609 | old_incr = timer->it_interval; | 596 | old_incr = timer->it_interval; |
610 | old_expires = timer->it.cpu.expires; | 597 | old_expires = cpu_timer_getexpires(ctmr); |
598 | |||
611 | if (unlikely(timer->it.cpu.firing)) { | 599 | if (unlikely(timer->it.cpu.firing)) { |
612 | timer->it.cpu.firing = -1; | 600 | timer->it.cpu.firing = -1; |
613 | ret = TIMER_RETRY; | 601 | ret = TIMER_RETRY; |
614 | } else | 602 | } else { |
615 | list_del_init(&timer->it.cpu.entry); | 603 | cpu_timer_dequeue(ctmr); |
604 | } | ||
616 | 605 | ||
617 | /* | 606 | /* |
618 | * We need to sample the current value to convert the new | 607 | * We need to sample the current value to convert the new |
@@ -622,11 +611,10 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
622 | * times (in arm_timer). With an absolute time, we must | 611 | * times (in arm_timer). With an absolute time, we must |
623 | * check if it's already passed. In short, we need a sample. | 612 | * check if it's already passed. In short, we need a sample. |
624 | */ | 613 | */ |
625 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { | 614 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) |
626 | cpu_clock_sample(timer->it_clock, p, &val); | 615 | val = cpu_clock_sample(clkid, p); |
627 | } else { | 616 | else |
628 | cpu_timer_sample_group(timer->it_clock, p, &val); | 617 | val = cpu_clock_sample_group(clkid, p, true); |
629 | } | ||
630 | 618 | ||
631 | if (old) { | 619 | if (old) { |
632 | if (old_expires == 0) { | 620 | if (old_expires == 0) { |
@@ -634,18 +622,16 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
634 | old->it_value.tv_nsec = 0; | 622 | old->it_value.tv_nsec = 0; |
635 | } else { | 623 | } else { |
636 | /* | 624 | /* |
637 | * Update the timer in case it has | 625 | * Update the timer in case it has overrun already. |
638 | * overrun already. If it has, | 626 | * If it has, we'll report it as having overrun and |
639 | * we'll report it as having overrun | 627 | * with the next reloaded timer already ticking, |
640 | * and with the next reloaded timer | 628 | * though we are swallowing that pending |
641 | * already ticking, though we are | 629 | * notification here to install the new setting. |
642 | * swallowing that pending | ||
643 | * notification here to install the | ||
644 | * new setting. | ||
645 | */ | 630 | */ |
646 | bump_cpu_timer(timer, val); | 631 | u64 exp = bump_cpu_timer(timer, val); |
647 | if (val < timer->it.cpu.expires) { | 632 | |
648 | old_expires = timer->it.cpu.expires - val; | 633 | if (val < exp) { |
634 | old_expires = exp - val; | ||
649 | old->it_value = ns_to_timespec64(old_expires); | 635 | old->it_value = ns_to_timespec64(old_expires); |
650 | } else { | 636 | } else { |
651 | old->it_value.tv_nsec = 1; | 637 | old->it_value.tv_nsec = 1; |
@@ -674,7 +660,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
674 | * For a timer with no notification action, we don't actually | 660 | * For a timer with no notification action, we don't actually |
675 | * arm the timer (we'll just fake it for timer_gettime). | 661 | * arm the timer (we'll just fake it for timer_gettime). |
676 | */ | 662 | */ |
677 | timer->it.cpu.expires = new_expires; | 663 | cpu_timer_setexpires(ctmr, new_expires); |
678 | if (new_expires != 0 && val < new_expires) { | 664 | if (new_expires != 0 && val < new_expires) { |
679 | arm_timer(timer); | 665 | arm_timer(timer); |
680 | } | 666 | } |
@@ -715,24 +701,27 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
715 | 701 | ||
716 | static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) | 702 | static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) |
717 | { | 703 | { |
718 | u64 now; | 704 | clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); |
719 | struct task_struct *p = timer->it.cpu.task; | 705 | struct cpu_timer *ctmr = &timer->it.cpu; |
706 | u64 now, expires = cpu_timer_getexpires(ctmr); | ||
707 | struct task_struct *p = ctmr->task; | ||
720 | 708 | ||
721 | WARN_ON_ONCE(p == NULL); | 709 | if (WARN_ON_ONCE(!p)) |
710 | return; | ||
722 | 711 | ||
723 | /* | 712 | /* |
724 | * Easy part: convert the reload time. | 713 | * Easy part: convert the reload time. |
725 | */ | 714 | */ |
726 | itp->it_interval = ktime_to_timespec64(timer->it_interval); | 715 | itp->it_interval = ktime_to_timespec64(timer->it_interval); |
727 | 716 | ||
728 | if (!timer->it.cpu.expires) | 717 | if (!expires) |
729 | return; | 718 | return; |
730 | 719 | ||
731 | /* | 720 | /* |
732 | * Sample the clock to take the difference with the expiry time. | 721 | * Sample the clock to take the difference with the expiry time. |
733 | */ | 722 | */ |
734 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { | 723 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { |
735 | cpu_clock_sample(timer->it_clock, p, &now); | 724 | now = cpu_clock_sample(clkid, p); |
736 | } else { | 725 | } else { |
737 | struct sighand_struct *sighand; | 726 | struct sighand_struct *sighand; |
738 | unsigned long flags; | 727 | unsigned long flags; |
@@ -747,18 +736,18 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp | |||
747 | /* | 736 | /* |
748 | * The process has been reaped. | 737 | * The process has been reaped. |
749 | * We can't even collect a sample any more. | 738 | * We can't even collect a sample any more. |
750 | * Call the timer disarmed, nothing else to do. | 739 | * Disarm the timer, nothing else to do. |
751 | */ | 740 | */ |
752 | timer->it.cpu.expires = 0; | 741 | cpu_timer_setexpires(ctmr, 0); |
753 | return; | 742 | return; |
754 | } else { | 743 | } else { |
755 | cpu_timer_sample_group(timer->it_clock, p, &now); | 744 | now = cpu_clock_sample_group(clkid, p, false); |
756 | unlock_task_sighand(p, &flags); | 745 | unlock_task_sighand(p, &flags); |
757 | } | 746 | } |
758 | } | 747 | } |
759 | 748 | ||
760 | if (now < timer->it.cpu.expires) { | 749 | if (now < expires) { |
761 | itp->it_value = ns_to_timespec64(timer->it.cpu.expires - now); | 750 | itp->it_value = ns_to_timespec64(expires - now); |
762 | } else { | 751 | } else { |
763 | /* | 752 | /* |
764 | * The timer should have expired already, but the firing | 753 | * The timer should have expired already, but the firing |
@@ -769,26 +758,42 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp | |||
769 | } | 758 | } |
770 | } | 759 | } |
771 | 760 | ||
772 | static unsigned long long | 761 | #define MAX_COLLECTED 20 |
773 | check_timers_list(struct list_head *timers, | ||
774 | struct list_head *firing, | ||
775 | unsigned long long curr) | ||
776 | { | ||
777 | int maxfire = 20; | ||
778 | 762 | ||
779 | while (!list_empty(timers)) { | 763 | static u64 collect_timerqueue(struct timerqueue_head *head, |
780 | struct cpu_timer_list *t; | 764 | struct list_head *firing, u64 now) |
765 | { | ||
766 | struct timerqueue_node *next; | ||
767 | int i = 0; | ||
768 | |||
769 | while ((next = timerqueue_getnext(head))) { | ||
770 | struct cpu_timer *ctmr; | ||
771 | u64 expires; | ||
772 | |||
773 | ctmr = container_of(next, struct cpu_timer, node); | ||
774 | expires = cpu_timer_getexpires(ctmr); | ||
775 | /* Limit the number of timers to expire at once */ | ||
776 | if (++i == MAX_COLLECTED || now < expires) | ||
777 | return expires; | ||
778 | |||
779 | ctmr->firing = 1; | ||
780 | cpu_timer_dequeue(ctmr); | ||
781 | list_add_tail(&ctmr->elist, firing); | ||
782 | } | ||
781 | 783 | ||
782 | t = list_first_entry(timers, struct cpu_timer_list, entry); | 784 | return U64_MAX; |
785 | } | ||
783 | 786 | ||
784 | if (!--maxfire || curr < t->expires) | 787 | static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples, |
785 | return t->expires; | 788 | struct list_head *firing) |
789 | { | ||
790 | struct posix_cputimer_base *base = pct->bases; | ||
791 | int i; | ||
786 | 792 | ||
787 | t->firing = 1; | 793 | for (i = 0; i < CPUCLOCK_MAX; i++, base++) { |
788 | list_move_tail(&t->entry, firing); | 794 | base->nextevt = collect_timerqueue(&base->tqhead, firing, |
795 | samples[i]); | ||
789 | } | 796 | } |
790 | |||
791 | return 0; | ||
792 | } | 797 | } |
793 | 798 | ||
794 | static inline void check_dl_overrun(struct task_struct *tsk) | 799 | static inline void check_dl_overrun(struct task_struct *tsk) |
@@ -799,6 +804,20 @@ static inline void check_dl_overrun(struct task_struct *tsk) | |||
799 | } | 804 | } |
800 | } | 805 | } |
801 | 806 | ||
807 | static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) | ||
808 | { | ||
809 | if (time < limit) | ||
810 | return false; | ||
811 | |||
812 | if (print_fatal_signals) { | ||
813 | pr_info("%s Watchdog Timeout (%s): %s[%d]\n", | ||
814 | rt ? "RT" : "CPU", hard ? "hard" : "soft", | ||
815 | current->comm, task_pid_nr(current)); | ||
816 | } | ||
817 | __group_send_sig_info(signo, SEND_SIG_PRIV, current); | ||
818 | return true; | ||
819 | } | ||
820 | |||
802 | /* | 821 | /* |
803 | * Check for any per-thread CPU timers that have fired and move them off | 822 | * Check for any per-thread CPU timers that have fired and move them off |
804 | * the tsk->cpu_timers[N] list onto the firing list. Here we update the | 823 | * the tsk->cpu_timers[N] list onto the firing list. Here we update the |
@@ -807,76 +826,50 @@ static inline void check_dl_overrun(struct task_struct *tsk) | |||
807 | static void check_thread_timers(struct task_struct *tsk, | 826 | static void check_thread_timers(struct task_struct *tsk, |
808 | struct list_head *firing) | 827 | struct list_head *firing) |
809 | { | 828 | { |
810 | struct list_head *timers = tsk->cpu_timers; | 829 | struct posix_cputimers *pct = &tsk->posix_cputimers; |
811 | struct task_cputime *tsk_expires = &tsk->cputime_expires; | 830 | u64 samples[CPUCLOCK_MAX]; |
812 | u64 expires; | ||
813 | unsigned long soft; | 831 | unsigned long soft; |
814 | 832 | ||
815 | if (dl_task(tsk)) | 833 | if (dl_task(tsk)) |
816 | check_dl_overrun(tsk); | 834 | check_dl_overrun(tsk); |
817 | 835 | ||
818 | /* | 836 | if (expiry_cache_is_inactive(pct)) |
819 | * If cputime_expires is zero, then there are no active | ||
820 | * per thread CPU timers. | ||
821 | */ | ||
822 | if (task_cputime_zero(&tsk->cputime_expires)) | ||
823 | return; | 837 | return; |
824 | 838 | ||
825 | expires = check_timers_list(timers, firing, prof_ticks(tsk)); | 839 | task_sample_cputime(tsk, samples); |
826 | tsk_expires->prof_exp = expires; | 840 | collect_posix_cputimers(pct, samples, firing); |
827 | |||
828 | expires = check_timers_list(++timers, firing, virt_ticks(tsk)); | ||
829 | tsk_expires->virt_exp = expires; | ||
830 | |||
831 | tsk_expires->sched_exp = check_timers_list(++timers, firing, | ||
832 | tsk->se.sum_exec_runtime); | ||
833 | 841 | ||
834 | /* | 842 | /* |
835 | * Check for the special case thread timers. | 843 | * Check for the special case thread timers. |
836 | */ | 844 | */ |
837 | soft = task_rlimit(tsk, RLIMIT_RTTIME); | 845 | soft = task_rlimit(tsk, RLIMIT_RTTIME); |
838 | if (soft != RLIM_INFINITY) { | 846 | if (soft != RLIM_INFINITY) { |
847 | /* Task RT timeout is accounted in jiffies. RTTIME is usec */ | ||
848 | unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ); | ||
839 | unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); | 849 | unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); |
840 | 850 | ||
851 | /* At the hard limit, send SIGKILL. No further action. */ | ||
841 | if (hard != RLIM_INFINITY && | 852 | if (hard != RLIM_INFINITY && |
842 | tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) { | 853 | check_rlimit(rttime, hard, SIGKILL, true, true)) |
843 | /* | ||
844 | * At the hard limit, we just die. | ||
845 | * No need to calculate anything else now. | ||
846 | */ | ||
847 | if (print_fatal_signals) { | ||
848 | pr_info("CPU Watchdog Timeout (hard): %s[%d]\n", | ||
849 | tsk->comm, task_pid_nr(tsk)); | ||
850 | } | ||
851 | __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); | ||
852 | return; | 854 | return; |
853 | } | 855 | |
854 | if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) { | 856 | /* At the soft limit, send a SIGXCPU every second */ |
855 | /* | 857 | if (check_rlimit(rttime, soft, SIGXCPU, true, false)) { |
856 | * At the soft limit, send a SIGXCPU every second. | 858 | soft += USEC_PER_SEC; |
857 | */ | 859 | tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft; |
858 | if (soft < hard) { | ||
859 | soft += USEC_PER_SEC; | ||
860 | tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = | ||
861 | soft; | ||
862 | } | ||
863 | if (print_fatal_signals) { | ||
864 | pr_info("RT Watchdog Timeout (soft): %s[%d]\n", | ||
865 | tsk->comm, task_pid_nr(tsk)); | ||
866 | } | ||
867 | __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); | ||
868 | } | 860 | } |
869 | } | 861 | } |
870 | if (task_cputime_zero(tsk_expires)) | 862 | |
863 | if (expiry_cache_is_inactive(pct)) | ||
871 | tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); | 864 | tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); |
872 | } | 865 | } |
873 | 866 | ||
874 | static inline void stop_process_timers(struct signal_struct *sig) | 867 | static inline void stop_process_timers(struct signal_struct *sig) |
875 | { | 868 | { |
876 | struct thread_group_cputimer *cputimer = &sig->cputimer; | 869 | struct posix_cputimers *pct = &sig->posix_cputimers; |
877 | 870 | ||
878 | /* Turn off cputimer->running. This is done without locking. */ | 871 | /* Turn off the active flag. This is done without locking. */ |
879 | WRITE_ONCE(cputimer->running, false); | 872 | WRITE_ONCE(pct->timers_active, false); |
880 | tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER); | 873 | tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER); |
881 | } | 874 | } |
882 | 875 | ||
@@ -898,7 +891,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, | |||
898 | __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); | 891 | __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); |
899 | } | 892 | } |
900 | 893 | ||
901 | if (it->expires && (!*expires || it->expires < *expires)) | 894 | if (it->expires && it->expires < *expires) |
902 | *expires = it->expires; | 895 | *expires = it->expires; |
903 | } | 896 | } |
904 | 897 | ||
@@ -911,87 +904,69 @@ static void check_process_timers(struct task_struct *tsk, | |||
911 | struct list_head *firing) | 904 | struct list_head *firing) |
912 | { | 905 | { |
913 | struct signal_struct *const sig = tsk->signal; | 906 | struct signal_struct *const sig = tsk->signal; |
914 | u64 utime, ptime, virt_expires, prof_expires; | 907 | struct posix_cputimers *pct = &sig->posix_cputimers; |
915 | u64 sum_sched_runtime, sched_expires; | 908 | u64 samples[CPUCLOCK_MAX]; |
916 | struct list_head *timers = sig->cpu_timers; | ||
917 | struct task_cputime cputime; | ||
918 | unsigned long soft; | 909 | unsigned long soft; |
919 | 910 | ||
920 | /* | 911 | /* |
921 | * If cputimer is not running, then there are no active | 912 | * If there are no active process wide timers (POSIX 1.b, itimers, |
922 | * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU). | 913 | * RLIMIT_CPU) nothing to check. Also skip the process wide timer |
914 | * processing when there is already another task handling them. | ||
923 | */ | 915 | */ |
924 | if (!READ_ONCE(tsk->signal->cputimer.running)) | 916 | if (!READ_ONCE(pct->timers_active) || pct->expiry_active) |
925 | return; | 917 | return; |
926 | 918 | ||
927 | /* | 919 | /* |
928 | * Signify that a thread is checking for process timers. | 920 | * Signify that a thread is checking for process timers. |
929 | * Write access to this field is protected by the sighand lock. | 921 | * Write access to this field is protected by the sighand lock. |
930 | */ | 922 | */ |
931 | sig->cputimer.checking_timer = true; | 923 | pct->expiry_active = true; |
932 | 924 | ||
933 | /* | 925 | /* |
934 | * Collect the current process totals. | 926 | * Collect the current process totals. Group accounting is active |
927 | * so the sample can be taken directly. | ||
935 | */ | 928 | */ |
936 | thread_group_cputimer(tsk, &cputime); | 929 | proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples); |
937 | utime = cputime.utime; | 930 | collect_posix_cputimers(pct, samples, firing); |
938 | ptime = utime + cputime.stime; | ||
939 | sum_sched_runtime = cputime.sum_exec_runtime; | ||
940 | |||
941 | prof_expires = check_timers_list(timers, firing, ptime); | ||
942 | virt_expires = check_timers_list(++timers, firing, utime); | ||
943 | sched_expires = check_timers_list(++timers, firing, sum_sched_runtime); | ||
944 | 931 | ||
945 | /* | 932 | /* |
946 | * Check for the special case process timers. | 933 | * Check for the special case process timers. |
947 | */ | 934 | */ |
948 | check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime, | 935 | check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], |
949 | SIGPROF); | 936 | &pct->bases[CPUCLOCK_PROF].nextevt, |
950 | check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime, | 937 | samples[CPUCLOCK_PROF], SIGPROF); |
951 | SIGVTALRM); | 938 | check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], |
939 | &pct->bases[CPUCLOCK_VIRT].nextevt, | ||
940 | samples[CPUCLOCK_VIRT], SIGVTALRM); | ||
941 | |||
952 | soft = task_rlimit(tsk, RLIMIT_CPU); | 942 | soft = task_rlimit(tsk, RLIMIT_CPU); |
953 | if (soft != RLIM_INFINITY) { | 943 | if (soft != RLIM_INFINITY) { |
954 | unsigned long psecs = div_u64(ptime, NSEC_PER_SEC); | 944 | /* RLIMIT_CPU is in seconds. Samples are nanoseconds */ |
955 | unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU); | 945 | unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU); |
956 | u64 x; | 946 | u64 ptime = samples[CPUCLOCK_PROF]; |
957 | if (psecs >= hard) { | 947 | u64 softns = (u64)soft * NSEC_PER_SEC; |
958 | /* | 948 | u64 hardns = (u64)hard * NSEC_PER_SEC; |
959 | * At the hard limit, we just die. | 949 | |
960 | * No need to calculate anything else now. | 950 | /* At the hard limit, send SIGKILL. No further action. */ |
961 | */ | 951 | if (hard != RLIM_INFINITY && |
962 | if (print_fatal_signals) { | 952 | check_rlimit(ptime, hardns, SIGKILL, false, true)) |
963 | pr_info("RT Watchdog Timeout (hard): %s[%d]\n", | ||
964 | tsk->comm, task_pid_nr(tsk)); | ||
965 | } | ||
966 | __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); | ||
967 | return; | 953 | return; |
954 | |||
955 | /* At the soft limit, send a SIGXCPU every second */ | ||
956 | if (check_rlimit(ptime, softns, SIGXCPU, false, false)) { | ||
957 | sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1; | ||
958 | softns += NSEC_PER_SEC; | ||
968 | } | 959 | } |
969 | if (psecs >= soft) { | 960 | |
970 | /* | 961 | /* Update the expiry cache */ |
971 | * At the soft limit, send a SIGXCPU every second. | 962 | if (softns < pct->bases[CPUCLOCK_PROF].nextevt) |
972 | */ | 963 | pct->bases[CPUCLOCK_PROF].nextevt = softns; |
973 | if (print_fatal_signals) { | ||
974 | pr_info("CPU Watchdog Timeout (soft): %s[%d]\n", | ||
975 | tsk->comm, task_pid_nr(tsk)); | ||
976 | } | ||
977 | __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); | ||
978 | if (soft < hard) { | ||
979 | soft++; | ||
980 | sig->rlim[RLIMIT_CPU].rlim_cur = soft; | ||
981 | } | ||
982 | } | ||
983 | x = soft * NSEC_PER_SEC; | ||
984 | if (!prof_expires || x < prof_expires) | ||
985 | prof_expires = x; | ||
986 | } | 964 | } |
987 | 965 | ||
988 | sig->cputime_expires.prof_exp = prof_expires; | 966 | if (expiry_cache_is_inactive(pct)) |
989 | sig->cputime_expires.virt_exp = virt_expires; | ||
990 | sig->cputime_expires.sched_exp = sched_expires; | ||
991 | if (task_cputime_zero(&sig->cputime_expires)) | ||
992 | stop_process_timers(sig); | 967 | stop_process_timers(sig); |
993 | 968 | ||
994 | sig->cputimer.checking_timer = false; | 969 | pct->expiry_active = false; |
995 | } | 970 | } |
996 | 971 | ||
997 | /* | 972 | /* |
@@ -1000,18 +975,21 @@ static void check_process_timers(struct task_struct *tsk, | |||
1000 | */ | 975 | */ |
1001 | static void posix_cpu_timer_rearm(struct k_itimer *timer) | 976 | static void posix_cpu_timer_rearm(struct k_itimer *timer) |
1002 | { | 977 | { |
978 | clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); | ||
979 | struct cpu_timer *ctmr = &timer->it.cpu; | ||
980 | struct task_struct *p = ctmr->task; | ||
1003 | struct sighand_struct *sighand; | 981 | struct sighand_struct *sighand; |
1004 | unsigned long flags; | 982 | unsigned long flags; |
1005 | struct task_struct *p = timer->it.cpu.task; | ||
1006 | u64 now; | 983 | u64 now; |
1007 | 984 | ||
1008 | WARN_ON_ONCE(p == NULL); | 985 | if (WARN_ON_ONCE(!p)) |
986 | return; | ||
1009 | 987 | ||
1010 | /* | 988 | /* |
1011 | * Fetch the current sample and update the timer's expiry time. | 989 | * Fetch the current sample and update the timer's expiry time. |
1012 | */ | 990 | */ |
1013 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { | 991 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { |
1014 | cpu_clock_sample(timer->it_clock, p, &now); | 992 | now = cpu_clock_sample(clkid, p); |
1015 | bump_cpu_timer(timer, now); | 993 | bump_cpu_timer(timer, now); |
1016 | if (unlikely(p->exit_state)) | 994 | if (unlikely(p->exit_state)) |
1017 | return; | 995 | return; |
@@ -1031,13 +1009,13 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) | |||
1031 | * The process has been reaped. | 1009 | * The process has been reaped. |
1032 | * We can't even collect a sample any more. | 1010 | * We can't even collect a sample any more. |
1033 | */ | 1011 | */ |
1034 | timer->it.cpu.expires = 0; | 1012 | cpu_timer_setexpires(ctmr, 0); |
1035 | return; | 1013 | return; |
1036 | } else if (unlikely(p->exit_state) && thread_group_empty(p)) { | 1014 | } else if (unlikely(p->exit_state) && thread_group_empty(p)) { |
1037 | /* If the process is dying, no need to rearm */ | 1015 | /* If the process is dying, no need to rearm */ |
1038 | goto unlock; | 1016 | goto unlock; |
1039 | } | 1017 | } |
1040 | cpu_timer_sample_group(timer->it_clock, p, &now); | 1018 | now = cpu_clock_sample_group(clkid, p, true); |
1041 | bump_cpu_timer(timer, now); | 1019 | bump_cpu_timer(timer, now); |
1042 | /* Leave the sighand locked for the call below. */ | 1020 | /* Leave the sighand locked for the call below. */ |
1043 | } | 1021 | } |
@@ -1051,26 +1029,24 @@ unlock: | |||
1051 | } | 1029 | } |
1052 | 1030 | ||
1053 | /** | 1031 | /** |
1054 | * task_cputime_expired - Compare two task_cputime entities. | 1032 | * task_cputimers_expired - Check whether posix CPU timers are expired |
1055 | * | 1033 | * |
1056 | * @sample: The task_cputime structure to be checked for expiration. | 1034 | * @samples: Array of current samples for the CPUCLOCK clocks |
1057 | * @expires: Expiration times, against which @sample will be checked. | 1035 | * @pct: Pointer to a posix_cputimers container |
1058 | * | 1036 | * |
1059 | * Checks @sample against @expires to see if any field of @sample has expired. | 1037 | * Returns true if any member of @samples is greater than the corresponding |
1060 | * Returns true if any field of the former is greater than the corresponding | 1038 | * member of @pct->bases[CLK].nextevt. False otherwise |
1061 | * field of the latter if the latter field is set. Otherwise returns false. | ||
1062 | */ | 1039 | */ |
1063 | static inline int task_cputime_expired(const struct task_cputime *sample, | 1040 | static inline bool |
1064 | const struct task_cputime *expires) | 1041 | task_cputimers_expired(const u64 *sample, struct posix_cputimers *pct) |
1065 | { | 1042 | { |
1066 | if (expires->utime && sample->utime >= expires->utime) | 1043 | int i; |
1067 | return 1; | 1044 | |
1068 | if (expires->stime && sample->utime + sample->stime >= expires->stime) | 1045 | for (i = 0; i < CPUCLOCK_MAX; i++) { |
1069 | return 1; | 1046 | if (sample[i] >= pct->bases[i].nextevt) |
1070 | if (expires->sum_exec_runtime != 0 && | 1047 | return true; |
1071 | sample->sum_exec_runtime >= expires->sum_exec_runtime) | 1048 | } |
1072 | return 1; | 1049 | return false; |
1073 | return 0; | ||
1074 | } | 1050 | } |
1075 | 1051 | ||
1076 | /** | 1052 | /** |
@@ -1083,48 +1059,50 @@ static inline int task_cputime_expired(const struct task_cputime *sample, | |||
1083 | * timers and compare them with the corresponding expiration times. Return | 1059 | * timers and compare them with the corresponding expiration times. Return |
1084 | * true if a timer has expired, else return false. | 1060 | * true if a timer has expired, else return false. |
1085 | */ | 1061 | */ |
1086 | static inline int fastpath_timer_check(struct task_struct *tsk) | 1062 | static inline bool fastpath_timer_check(struct task_struct *tsk) |
1087 | { | 1063 | { |
1064 | struct posix_cputimers *pct = &tsk->posix_cputimers; | ||
1088 | struct signal_struct *sig; | 1065 | struct signal_struct *sig; |
1089 | 1066 | ||
1090 | if (!task_cputime_zero(&tsk->cputime_expires)) { | 1067 | if (!expiry_cache_is_inactive(pct)) { |
1091 | struct task_cputime task_sample; | 1068 | u64 samples[CPUCLOCK_MAX]; |
1092 | 1069 | ||
1093 | task_cputime(tsk, &task_sample.utime, &task_sample.stime); | 1070 | task_sample_cputime(tsk, samples); |
1094 | task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime; | 1071 | if (task_cputimers_expired(samples, pct)) |
1095 | if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) | 1072 | return true; |
1096 | return 1; | ||
1097 | } | 1073 | } |
1098 | 1074 | ||
1099 | sig = tsk->signal; | 1075 | sig = tsk->signal; |
1076 | pct = &sig->posix_cputimers; | ||
1100 | /* | 1077 | /* |
1101 | * Check if thread group timers expired when the cputimer is | 1078 | * Check if thread group timers expired when timers are active and |
1102 | * running and no other thread in the group is already checking | 1079 | * no other thread in the group is already handling expiry for |
1103 | * for thread group cputimers. These fields are read without the | 1080 | * thread group cputimers. These fields are read without the |
1104 | * sighand lock. However, this is fine because this is meant to | 1081 | * sighand lock. However, this is fine because this is meant to be |
1105 | * be a fastpath heuristic to determine whether we should try to | 1082 | * a fastpath heuristic to determine whether we should try to |
1106 | * acquire the sighand lock to check/handle timers. | 1083 | * acquire the sighand lock to handle timer expiry. |
1107 | * | 1084 | * |
1108 | * In the worst case scenario, if 'running' or 'checking_timer' gets | 1085 | * In the worst case scenario, if concurrently timers_active is set |
1109 | * set but the current thread doesn't see the change yet, we'll wait | 1086 | * or expiry_active is cleared, but the current thread doesn't see |
1110 | * until the next thread in the group gets a scheduler interrupt to | 1087 | * the change yet, the timer checks are delayed until the next |
1111 | * handle the timer. This isn't an issue in practice because these | 1088 | * thread in the group gets a scheduler interrupt to handle the |
1112 | * types of delays with signals actually getting sent are expected. | 1089 | * timer. This isn't an issue in practice because these types of |
1090 | * delays with signals actually getting sent are expected. | ||
1113 | */ | 1091 | */ |
1114 | if (READ_ONCE(sig->cputimer.running) && | 1092 | if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) { |
1115 | !READ_ONCE(sig->cputimer.checking_timer)) { | 1093 | u64 samples[CPUCLOCK_MAX]; |
1116 | struct task_cputime group_sample; | ||
1117 | 1094 | ||
1118 | sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic); | 1095 | proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, |
1096 | samples); | ||
1119 | 1097 | ||
1120 | if (task_cputime_expired(&group_sample, &sig->cputime_expires)) | 1098 | if (task_cputimers_expired(samples, pct)) |
1121 | return 1; | 1099 | return true; |
1122 | } | 1100 | } |
1123 | 1101 | ||
1124 | if (dl_task(tsk) && tsk->dl.dl_overrun) | 1102 | if (dl_task(tsk) && tsk->dl.dl_overrun) |
1125 | return 1; | 1103 | return true; |
1126 | 1104 | ||
1127 | return 0; | 1105 | return false; |
1128 | } | 1106 | } |
1129 | 1107 | ||
1130 | /* | 1108 | /* |
@@ -1132,11 +1110,12 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
1132 | * already updated our counts. We need to check if any timers fire now. | 1110 | * already updated our counts. We need to check if any timers fire now. |
1133 | * Interrupts are disabled. | 1111 | * Interrupts are disabled. |
1134 | */ | 1112 | */ |
1135 | void run_posix_cpu_timers(struct task_struct *tsk) | 1113 | void run_posix_cpu_timers(void) |
1136 | { | 1114 | { |
1137 | LIST_HEAD(firing); | 1115 | struct task_struct *tsk = current; |
1138 | struct k_itimer *timer, *next; | 1116 | struct k_itimer *timer, *next; |
1139 | unsigned long flags; | 1117 | unsigned long flags; |
1118 | LIST_HEAD(firing); | ||
1140 | 1119 | ||
1141 | lockdep_assert_irqs_disabled(); | 1120 | lockdep_assert_irqs_disabled(); |
1142 | 1121 | ||
@@ -1174,11 +1153,11 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1174 | * each timer's lock before clearing its firing flag, so no | 1153 | * each timer's lock before clearing its firing flag, so no |
1175 | * timer call will interfere. | 1154 | * timer call will interfere. |
1176 | */ | 1155 | */ |
1177 | list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) { | 1156 | list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) { |
1178 | int cpu_firing; | 1157 | int cpu_firing; |
1179 | 1158 | ||
1180 | spin_lock(&timer->it_lock); | 1159 | spin_lock(&timer->it_lock); |
1181 | list_del_init(&timer->it.cpu.entry); | 1160 | list_del_init(&timer->it.cpu.elist); |
1182 | cpu_firing = timer->it.cpu.firing; | 1161 | cpu_firing = timer->it.cpu.firing; |
1183 | timer->it.cpu.firing = 0; | 1162 | timer->it.cpu.firing = 0; |
1184 | /* | 1163 | /* |
@@ -1196,16 +1175,18 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1196 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. | 1175 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. |
1197 | * The tsk->sighand->siglock must be held by the caller. | 1176 | * The tsk->sighand->siglock must be held by the caller. |
1198 | */ | 1177 | */ |
1199 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | 1178 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid, |
1200 | u64 *newval, u64 *oldval) | 1179 | u64 *newval, u64 *oldval) |
1201 | { | 1180 | { |
1202 | u64 now; | 1181 | u64 now, *nextevt; |
1203 | int ret; | 1182 | |
1183 | if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED)) | ||
1184 | return; | ||
1204 | 1185 | ||
1205 | WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); | 1186 | nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt; |
1206 | ret = cpu_timer_sample_group(clock_idx, tsk, &now); | 1187 | now = cpu_clock_sample_group(clkid, tsk, true); |
1207 | 1188 | ||
1208 | if (oldval && ret != -EINVAL) { | 1189 | if (oldval) { |
1209 | /* | 1190 | /* |
1210 | * We are setting itimer. The *oldval is absolute and we update | 1191 | * We are setting itimer. The *oldval is absolute and we update |
1211 | * it to be relative, *newval argument is relative and we update | 1192 | * it to be relative, *newval argument is relative and we update |
@@ -1226,19 +1207,11 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1226 | } | 1207 | } |
1227 | 1208 | ||
1228 | /* | 1209 | /* |
1229 | * Update expiration cache if we are the earliest timer, or eventually | 1210 | * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF |
1230 | * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire. | 1211 | * expiry cache is also used by RLIMIT_CPU!. |
1231 | */ | 1212 | */ |
1232 | switch (clock_idx) { | 1213 | if (*newval < *nextevt) |
1233 | case CPUCLOCK_PROF: | 1214 | *nextevt = *newval; |
1234 | if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval)) | ||
1235 | tsk->signal->cputime_expires.prof_exp = *newval; | ||
1236 | break; | ||
1237 | case CPUCLOCK_VIRT: | ||
1238 | if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval)) | ||
1239 | tsk->signal->cputime_expires.virt_exp = *newval; | ||
1240 | break; | ||
1241 | } | ||
1242 | 1215 | ||
1243 | tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER); | 1216 | tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER); |
1244 | } | 1217 | } |
@@ -1260,6 +1233,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, | |||
1260 | timer.it_overrun = -1; | 1233 | timer.it_overrun = -1; |
1261 | error = posix_cpu_timer_create(&timer); | 1234 | error = posix_cpu_timer_create(&timer); |
1262 | timer.it_process = current; | 1235 | timer.it_process = current; |
1236 | |||
1263 | if (!error) { | 1237 | if (!error) { |
1264 | static struct itimerspec64 zero_it; | 1238 | static struct itimerspec64 zero_it; |
1265 | struct restart_block *restart; | 1239 | struct restart_block *restart; |
@@ -1275,7 +1249,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, | |||
1275 | } | 1249 | } |
1276 | 1250 | ||
1277 | while (!signal_pending(current)) { | 1251 | while (!signal_pending(current)) { |
1278 | if (timer.it.cpu.expires == 0) { | 1252 | if (!cpu_timer_getexpires(&timer.it.cpu)) { |
1279 | /* | 1253 | /* |
1280 | * Our timer fired and was reset, below | 1254 | * Our timer fired and was reset, below |
1281 | * deletion can not fail. | 1255 | * deletion can not fail. |
@@ -1297,7 +1271,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, | |||
1297 | /* | 1271 | /* |
1298 | * We were interrupted by a signal. | 1272 | * We were interrupted by a signal. |
1299 | */ | 1273 | */ |
1300 | expires = timer.it.cpu.expires; | 1274 | expires = cpu_timer_getexpires(&timer.it.cpu); |
1301 | error = posix_cpu_timer_set(&timer, 0, &zero_it, &it); | 1275 | error = posix_cpu_timer_set(&timer, 0, &zero_it, &it); |
1302 | if (!error) { | 1276 | if (!error) { |
1303 | /* | 1277 | /* |
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index d7f2d91acdac..0ec5b7a1d769 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c | |||
@@ -442,7 +442,7 @@ static struct k_itimer * alloc_posix_timer(void) | |||
442 | 442 | ||
443 | static void k_itimer_rcu_free(struct rcu_head *head) | 443 | static void k_itimer_rcu_free(struct rcu_head *head) |
444 | { | 444 | { |
445 | struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu); | 445 | struct k_itimer *tmr = container_of(head, struct k_itimer, rcu); |
446 | 446 | ||
447 | kmem_cache_free(posix_timers_cache, tmr); | 447 | kmem_cache_free(posix_timers_cache, tmr); |
448 | } | 448 | } |
@@ -459,7 +459,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) | |||
459 | } | 459 | } |
460 | put_pid(tmr->it_pid); | 460 | put_pid(tmr->it_pid); |
461 | sigqueue_free(tmr->sigq); | 461 | sigqueue_free(tmr->sigq); |
462 | call_rcu(&tmr->it.rcu, k_itimer_rcu_free); | 462 | call_rcu(&tmr->rcu, k_itimer_rcu_free); |
463 | } | 463 | } |
464 | 464 | ||
465 | static int common_timer_create(struct k_itimer *new_timer) | 465 | static int common_timer_create(struct k_itimer *new_timer) |
@@ -805,6 +805,35 @@ static int common_hrtimer_try_to_cancel(struct k_itimer *timr) | |||
805 | return hrtimer_try_to_cancel(&timr->it.real.timer); | 805 | return hrtimer_try_to_cancel(&timr->it.real.timer); |
806 | } | 806 | } |
807 | 807 | ||
808 | static void common_timer_wait_running(struct k_itimer *timer) | ||
809 | { | ||
810 | hrtimer_cancel_wait_running(&timer->it.real.timer); | ||
811 | } | ||
812 | |||
813 | /* | ||
814 | * On PREEMPT_RT this prevent priority inversion against softirq kthread in | ||
815 | * case it gets preempted while executing a timer callback. See comments in | ||
816 | * hrtimer_cancel_wait_running. For PREEMPT_RT=n this just results in a | ||
817 | * cpu_relax(). | ||
818 | */ | ||
819 | static struct k_itimer *timer_wait_running(struct k_itimer *timer, | ||
820 | unsigned long *flags) | ||
821 | { | ||
822 | const struct k_clock *kc = READ_ONCE(timer->kclock); | ||
823 | timer_t timer_id = READ_ONCE(timer->it_id); | ||
824 | |||
825 | /* Prevent kfree(timer) after dropping the lock */ | ||
826 | rcu_read_lock(); | ||
827 | unlock_timer(timer, *flags); | ||
828 | |||
829 | if (!WARN_ON_ONCE(!kc->timer_wait_running)) | ||
830 | kc->timer_wait_running(timer); | ||
831 | |||
832 | rcu_read_unlock(); | ||
833 | /* Relock the timer. It might be not longer hashed. */ | ||
834 | return lock_timer(timer_id, flags); | ||
835 | } | ||
836 | |||
808 | /* Set a POSIX.1b interval timer. */ | 837 | /* Set a POSIX.1b interval timer. */ |
809 | int common_timer_set(struct k_itimer *timr, int flags, | 838 | int common_timer_set(struct k_itimer *timr, int flags, |
810 | struct itimerspec64 *new_setting, | 839 | struct itimerspec64 *new_setting, |
@@ -844,13 +873,13 @@ int common_timer_set(struct k_itimer *timr, int flags, | |||
844 | return 0; | 873 | return 0; |
845 | } | 874 | } |
846 | 875 | ||
847 | static int do_timer_settime(timer_t timer_id, int flags, | 876 | static int do_timer_settime(timer_t timer_id, int tmr_flags, |
848 | struct itimerspec64 *new_spec64, | 877 | struct itimerspec64 *new_spec64, |
849 | struct itimerspec64 *old_spec64) | 878 | struct itimerspec64 *old_spec64) |
850 | { | 879 | { |
851 | const struct k_clock *kc; | 880 | const struct k_clock *kc; |
852 | struct k_itimer *timr; | 881 | struct k_itimer *timr; |
853 | unsigned long flag; | 882 | unsigned long flags; |
854 | int error = 0; | 883 | int error = 0; |
855 | 884 | ||
856 | if (!timespec64_valid(&new_spec64->it_interval) || | 885 | if (!timespec64_valid(&new_spec64->it_interval) || |
@@ -859,8 +888,9 @@ static int do_timer_settime(timer_t timer_id, int flags, | |||
859 | 888 | ||
860 | if (old_spec64) | 889 | if (old_spec64) |
861 | memset(old_spec64, 0, sizeof(*old_spec64)); | 890 | memset(old_spec64, 0, sizeof(*old_spec64)); |
891 | |||
892 | timr = lock_timer(timer_id, &flags); | ||
862 | retry: | 893 | retry: |
863 | timr = lock_timer(timer_id, &flag); | ||
864 | if (!timr) | 894 | if (!timr) |
865 | return -EINVAL; | 895 | return -EINVAL; |
866 | 896 | ||
@@ -868,13 +898,16 @@ retry: | |||
868 | if (WARN_ON_ONCE(!kc || !kc->timer_set)) | 898 | if (WARN_ON_ONCE(!kc || !kc->timer_set)) |
869 | error = -EINVAL; | 899 | error = -EINVAL; |
870 | else | 900 | else |
871 | error = kc->timer_set(timr, flags, new_spec64, old_spec64); | 901 | error = kc->timer_set(timr, tmr_flags, new_spec64, old_spec64); |
872 | 902 | ||
873 | unlock_timer(timr, flag); | ||
874 | if (error == TIMER_RETRY) { | 903 | if (error == TIMER_RETRY) { |
875 | old_spec64 = NULL; // We already got the old time... | 904 | // We already got the old time... |
905 | old_spec64 = NULL; | ||
906 | /* Unlocks and relocks the timer if it still exists */ | ||
907 | timr = timer_wait_running(timr, &flags); | ||
876 | goto retry; | 908 | goto retry; |
877 | } | 909 | } |
910 | unlock_timer(timr, flags); | ||
878 | 911 | ||
879 | return error; | 912 | return error; |
880 | } | 913 | } |
@@ -951,13 +984,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) | |||
951 | struct k_itimer *timer; | 984 | struct k_itimer *timer; |
952 | unsigned long flags; | 985 | unsigned long flags; |
953 | 986 | ||
954 | retry_delete: | ||
955 | timer = lock_timer(timer_id, &flags); | 987 | timer = lock_timer(timer_id, &flags); |
988 | |||
989 | retry_delete: | ||
956 | if (!timer) | 990 | if (!timer) |
957 | return -EINVAL; | 991 | return -EINVAL; |
958 | 992 | ||
959 | if (timer_delete_hook(timer) == TIMER_RETRY) { | 993 | if (unlikely(timer_delete_hook(timer) == TIMER_RETRY)) { |
960 | unlock_timer(timer, flags); | 994 | /* Unlocks and relocks the timer if it still exists */ |
995 | timer = timer_wait_running(timer, &flags); | ||
961 | goto retry_delete; | 996 | goto retry_delete; |
962 | } | 997 | } |
963 | 998 | ||
@@ -1238,6 +1273,7 @@ static const struct k_clock clock_realtime = { | |||
1238 | .timer_forward = common_hrtimer_forward, | 1273 | .timer_forward = common_hrtimer_forward, |
1239 | .timer_remaining = common_hrtimer_remaining, | 1274 | .timer_remaining = common_hrtimer_remaining, |
1240 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, | 1275 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, |
1276 | .timer_wait_running = common_timer_wait_running, | ||
1241 | .timer_arm = common_hrtimer_arm, | 1277 | .timer_arm = common_hrtimer_arm, |
1242 | }; | 1278 | }; |
1243 | 1279 | ||
@@ -1253,6 +1289,7 @@ static const struct k_clock clock_monotonic = { | |||
1253 | .timer_forward = common_hrtimer_forward, | 1289 | .timer_forward = common_hrtimer_forward, |
1254 | .timer_remaining = common_hrtimer_remaining, | 1290 | .timer_remaining = common_hrtimer_remaining, |
1255 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, | 1291 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, |
1292 | .timer_wait_running = common_timer_wait_running, | ||
1256 | .timer_arm = common_hrtimer_arm, | 1293 | .timer_arm = common_hrtimer_arm, |
1257 | }; | 1294 | }; |
1258 | 1295 | ||
@@ -1283,6 +1320,7 @@ static const struct k_clock clock_tai = { | |||
1283 | .timer_forward = common_hrtimer_forward, | 1320 | .timer_forward = common_hrtimer_forward, |
1284 | .timer_remaining = common_hrtimer_remaining, | 1321 | .timer_remaining = common_hrtimer_remaining, |
1285 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, | 1322 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, |
1323 | .timer_wait_running = common_timer_wait_running, | ||
1286 | .timer_arm = common_hrtimer_arm, | 1324 | .timer_arm = common_hrtimer_arm, |
1287 | }; | 1325 | }; |
1288 | 1326 | ||
@@ -1298,6 +1336,7 @@ static const struct k_clock clock_boottime = { | |||
1298 | .timer_forward = common_hrtimer_forward, | 1336 | .timer_forward = common_hrtimer_forward, |
1299 | .timer_remaining = common_hrtimer_remaining, | 1337 | .timer_remaining = common_hrtimer_remaining, |
1300 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, | 1338 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, |
1339 | .timer_wait_running = common_timer_wait_running, | ||
1301 | .timer_arm = common_hrtimer_arm, | 1340 | .timer_arm = common_hrtimer_arm, |
1302 | }; | 1341 | }; |
1303 | 1342 | ||
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index de5daa6d975a..897c29e162b9 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h | |||
@@ -24,6 +24,7 @@ struct k_clock { | |||
24 | int (*timer_try_to_cancel)(struct k_itimer *timr); | 24 | int (*timer_try_to_cancel)(struct k_itimer *timr); |
25 | void (*timer_arm)(struct k_itimer *timr, ktime_t expires, | 25 | void (*timer_arm)(struct k_itimer *timr, ktime_t expires, |
26 | bool absolute, bool sigev_none); | 26 | bool absolute, bool sigev_none); |
27 | void (*timer_wait_running)(struct k_itimer *timr); | ||
27 | }; | 28 | }; |
28 | 29 | ||
29 | extern const struct k_clock clock_posix_cpu; | 30 | extern const struct k_clock clock_posix_cpu; |
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index 5be6154e2fd2..c1f5bb590b5e 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c | |||
@@ -59,11 +59,16 @@ static int bc_set_next(ktime_t expires, struct clock_event_device *bc) | |||
59 | * hrtimer_{start/cancel} functions call into tracing, | 59 | * hrtimer_{start/cancel} functions call into tracing, |
60 | * calls to these functions must be bound within RCU_NONIDLE. | 60 | * calls to these functions must be bound within RCU_NONIDLE. |
61 | */ | 61 | */ |
62 | RCU_NONIDLE({ | 62 | RCU_NONIDLE( |
63 | { | ||
63 | bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0; | 64 | bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0; |
64 | if (bc_moved) | 65 | if (bc_moved) { |
65 | hrtimer_start(&bctimer, expires, | 66 | hrtimer_start(&bctimer, expires, |
66 | HRTIMER_MODE_ABS_PINNED);}); | 67 | HRTIMER_MODE_ABS_PINNED_HARD); |
68 | } | ||
69 | } | ||
70 | ); | ||
71 | |||
67 | if (bc_moved) { | 72 | if (bc_moved) { |
68 | /* Bind the "device" to the cpu */ | 73 | /* Bind the "device" to the cpu */ |
69 | bc->bound_on = smp_processor_id(); | 74 | bc->bound_on = smp_processor_id(); |
@@ -104,7 +109,7 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t) | |||
104 | 109 | ||
105 | void tick_setup_hrtimer_broadcast(void) | 110 | void tick_setup_hrtimer_broadcast(void) |
106 | { | 111 | { |
107 | hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 112 | hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); |
108 | bctimer.function = bc_handler; | 113 | bctimer.function = bc_handler; |
109 | clockevents_register_device(&ce_broadcast_hrtimer); | 114 | clockevents_register_device(&ce_broadcast_hrtimer); |
110 | } | 115 | } |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index be9707f68024..955851748dc3 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -634,10 +634,12 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | |||
634 | /* Forward the time to expire in the future */ | 634 | /* Forward the time to expire in the future */ |
635 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 635 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
636 | 636 | ||
637 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) | 637 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
638 | hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); | 638 | hrtimer_start_expires(&ts->sched_timer, |
639 | else | 639 | HRTIMER_MODE_ABS_PINNED_HARD); |
640 | } else { | ||
640 | tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); | 641 | tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); |
642 | } | ||
641 | 643 | ||
642 | /* | 644 | /* |
643 | * Reset to make sure next tick stop doesn't get fooled by past | 645 | * Reset to make sure next tick stop doesn't get fooled by past |
@@ -802,7 +804,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) | |||
802 | } | 804 | } |
803 | 805 | ||
804 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 806 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
805 | hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED); | 807 | hrtimer_start(&ts->sched_timer, tick, |
808 | HRTIMER_MODE_ABS_PINNED_HARD); | ||
806 | } else { | 809 | } else { |
807 | hrtimer_set_expires(&ts->sched_timer, tick); | 810 | hrtimer_set_expires(&ts->sched_timer, tick); |
808 | tick_program_event(tick, 1); | 811 | tick_program_event(tick, 1); |
@@ -1230,7 +1233,7 @@ static void tick_nohz_switch_to_nohz(void) | |||
1230 | * Recycle the hrtimer in ts, so we can share the | 1233 | * Recycle the hrtimer in ts, so we can share the |
1231 | * hrtimer_forward with the highres code. | 1234 | * hrtimer_forward with the highres code. |
1232 | */ | 1235 | */ |
1233 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 1236 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); |
1234 | /* Get the next period */ | 1237 | /* Get the next period */ |
1235 | next = tick_init_jiffy_update(); | 1238 | next = tick_init_jiffy_update(); |
1236 | 1239 | ||
@@ -1327,7 +1330,7 @@ void tick_setup_sched_timer(void) | |||
1327 | /* | 1330 | /* |
1328 | * Emulate tick processing via per-CPU hrtimers: | 1331 | * Emulate tick processing via per-CPU hrtimers: |
1329 | */ | 1332 | */ |
1330 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 1333 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); |
1331 | ts->sched_timer.function = tick_sched_timer; | 1334 | ts->sched_timer.function = tick_sched_timer; |
1332 | 1335 | ||
1333 | /* Get the next period (per-CPU) */ | 1336 | /* Get the next period (per-CPU) */ |
@@ -1342,7 +1345,7 @@ void tick_setup_sched_timer(void) | |||
1342 | } | 1345 | } |
1343 | 1346 | ||
1344 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 1347 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
1345 | hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); | 1348 | hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD); |
1346 | tick_nohz_activate(ts, NOHZ_MODE_HIGHRES); | 1349 | tick_nohz_activate(ts, NOHZ_MODE_HIGHRES); |
1347 | } | 1350 | } |
1348 | #endif /* HIGH_RES_TIMERS */ | 1351 | #endif /* HIGH_RES_TIMERS */ |
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 343c7ba33b1c..0e315a2e77ae 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c | |||
@@ -196,6 +196,10 @@ EXPORT_SYMBOL(jiffies_64); | |||
196 | struct timer_base { | 196 | struct timer_base { |
197 | raw_spinlock_t lock; | 197 | raw_spinlock_t lock; |
198 | struct timer_list *running_timer; | 198 | struct timer_list *running_timer; |
199 | #ifdef CONFIG_PREEMPT_RT | ||
200 | spinlock_t expiry_lock; | ||
201 | atomic_t timer_waiters; | ||
202 | #endif | ||
199 | unsigned long clk; | 203 | unsigned long clk; |
200 | unsigned long next_expiry; | 204 | unsigned long next_expiry; |
201 | unsigned int cpu; | 205 | unsigned int cpu; |
@@ -1227,7 +1231,78 @@ int try_to_del_timer_sync(struct timer_list *timer) | |||
1227 | } | 1231 | } |
1228 | EXPORT_SYMBOL(try_to_del_timer_sync); | 1232 | EXPORT_SYMBOL(try_to_del_timer_sync); |
1229 | 1233 | ||
1230 | #ifdef CONFIG_SMP | 1234 | #ifdef CONFIG_PREEMPT_RT |
1235 | static __init void timer_base_init_expiry_lock(struct timer_base *base) | ||
1236 | { | ||
1237 | spin_lock_init(&base->expiry_lock); | ||
1238 | } | ||
1239 | |||
1240 | static inline void timer_base_lock_expiry(struct timer_base *base) | ||
1241 | { | ||
1242 | spin_lock(&base->expiry_lock); | ||
1243 | } | ||
1244 | |||
1245 | static inline void timer_base_unlock_expiry(struct timer_base *base) | ||
1246 | { | ||
1247 | spin_unlock(&base->expiry_lock); | ||
1248 | } | ||
1249 | |||
1250 | /* | ||
1251 | * The counterpart to del_timer_wait_running(). | ||
1252 | * | ||
1253 | * If there is a waiter for base->expiry_lock, then it was waiting for the | ||
1254 | * timer callback to finish. Drop expiry_lock and reaquire it. That allows | ||
1255 | * the waiter to acquire the lock and make progress. | ||
1256 | */ | ||
1257 | static void timer_sync_wait_running(struct timer_base *base) | ||
1258 | { | ||
1259 | if (atomic_read(&base->timer_waiters)) { | ||
1260 | spin_unlock(&base->expiry_lock); | ||
1261 | spin_lock(&base->expiry_lock); | ||
1262 | } | ||
1263 | } | ||
1264 | |||
1265 | /* | ||
1266 | * This function is called on PREEMPT_RT kernels when the fast path | ||
1267 | * deletion of a timer failed because the timer callback function was | ||
1268 | * running. | ||
1269 | * | ||
1270 | * This prevents priority inversion, if the softirq thread on a remote CPU | ||
1271 | * got preempted, and it prevents a life lock when the task which tries to | ||
1272 | * delete a timer preempted the softirq thread running the timer callback | ||
1273 | * function. | ||
1274 | */ | ||
1275 | static void del_timer_wait_running(struct timer_list *timer) | ||
1276 | { | ||
1277 | u32 tf; | ||
1278 | |||
1279 | tf = READ_ONCE(timer->flags); | ||
1280 | if (!(tf & TIMER_MIGRATING)) { | ||
1281 | struct timer_base *base = get_timer_base(tf); | ||
1282 | |||
1283 | /* | ||
1284 | * Mark the base as contended and grab the expiry lock, | ||
1285 | * which is held by the softirq across the timer | ||
1286 | * callback. Drop the lock immediately so the softirq can | ||
1287 | * expire the next timer. In theory the timer could already | ||
1288 | * be running again, but that's more than unlikely and just | ||
1289 | * causes another wait loop. | ||
1290 | */ | ||
1291 | atomic_inc(&base->timer_waiters); | ||
1292 | spin_lock_bh(&base->expiry_lock); | ||
1293 | atomic_dec(&base->timer_waiters); | ||
1294 | spin_unlock_bh(&base->expiry_lock); | ||
1295 | } | ||
1296 | } | ||
1297 | #else | ||
1298 | static inline void timer_base_init_expiry_lock(struct timer_base *base) { } | ||
1299 | static inline void timer_base_lock_expiry(struct timer_base *base) { } | ||
1300 | static inline void timer_base_unlock_expiry(struct timer_base *base) { } | ||
1301 | static inline void timer_sync_wait_running(struct timer_base *base) { } | ||
1302 | static inline void del_timer_wait_running(struct timer_list *timer) { } | ||
1303 | #endif | ||
1304 | |||
1305 | #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) | ||
1231 | /** | 1306 | /** |
1232 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | 1307 | * del_timer_sync - deactivate a timer and wait for the handler to finish. |
1233 | * @timer: the timer to be deactivated | 1308 | * @timer: the timer to be deactivated |
@@ -1266,6 +1341,8 @@ EXPORT_SYMBOL(try_to_del_timer_sync); | |||
1266 | */ | 1341 | */ |
1267 | int del_timer_sync(struct timer_list *timer) | 1342 | int del_timer_sync(struct timer_list *timer) |
1268 | { | 1343 | { |
1344 | int ret; | ||
1345 | |||
1269 | #ifdef CONFIG_LOCKDEP | 1346 | #ifdef CONFIG_LOCKDEP |
1270 | unsigned long flags; | 1347 | unsigned long flags; |
1271 | 1348 | ||
@@ -1283,12 +1360,17 @@ int del_timer_sync(struct timer_list *timer) | |||
1283 | * could lead to deadlock. | 1360 | * could lead to deadlock. |
1284 | */ | 1361 | */ |
1285 | WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); | 1362 | WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); |
1286 | for (;;) { | 1363 | |
1287 | int ret = try_to_del_timer_sync(timer); | 1364 | do { |
1288 | if (ret >= 0) | 1365 | ret = try_to_del_timer_sync(timer); |
1289 | return ret; | 1366 | |
1290 | cpu_relax(); | 1367 | if (unlikely(ret < 0)) { |
1291 | } | 1368 | del_timer_wait_running(timer); |
1369 | cpu_relax(); | ||
1370 | } | ||
1371 | } while (ret < 0); | ||
1372 | |||
1373 | return ret; | ||
1292 | } | 1374 | } |
1293 | EXPORT_SYMBOL(del_timer_sync); | 1375 | EXPORT_SYMBOL(del_timer_sync); |
1294 | #endif | 1376 | #endif |
@@ -1360,10 +1442,13 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) | |||
1360 | if (timer->flags & TIMER_IRQSAFE) { | 1442 | if (timer->flags & TIMER_IRQSAFE) { |
1361 | raw_spin_unlock(&base->lock); | 1443 | raw_spin_unlock(&base->lock); |
1362 | call_timer_fn(timer, fn, baseclk); | 1444 | call_timer_fn(timer, fn, baseclk); |
1445 | base->running_timer = NULL; | ||
1363 | raw_spin_lock(&base->lock); | 1446 | raw_spin_lock(&base->lock); |
1364 | } else { | 1447 | } else { |
1365 | raw_spin_unlock_irq(&base->lock); | 1448 | raw_spin_unlock_irq(&base->lock); |
1366 | call_timer_fn(timer, fn, baseclk); | 1449 | call_timer_fn(timer, fn, baseclk); |
1450 | base->running_timer = NULL; | ||
1451 | timer_sync_wait_running(base); | ||
1367 | raw_spin_lock_irq(&base->lock); | 1452 | raw_spin_lock_irq(&base->lock); |
1368 | } | 1453 | } |
1369 | } | 1454 | } |
@@ -1643,7 +1728,7 @@ void update_process_times(int user_tick) | |||
1643 | #endif | 1728 | #endif |
1644 | scheduler_tick(); | 1729 | scheduler_tick(); |
1645 | if (IS_ENABLED(CONFIG_POSIX_TIMERS)) | 1730 | if (IS_ENABLED(CONFIG_POSIX_TIMERS)) |
1646 | run_posix_cpu_timers(p); | 1731 | run_posix_cpu_timers(); |
1647 | } | 1732 | } |
1648 | 1733 | ||
1649 | /** | 1734 | /** |
@@ -1658,6 +1743,7 @@ static inline void __run_timers(struct timer_base *base) | |||
1658 | if (!time_after_eq(jiffies, base->clk)) | 1743 | if (!time_after_eq(jiffies, base->clk)) |
1659 | return; | 1744 | return; |
1660 | 1745 | ||
1746 | timer_base_lock_expiry(base); | ||
1661 | raw_spin_lock_irq(&base->lock); | 1747 | raw_spin_lock_irq(&base->lock); |
1662 | 1748 | ||
1663 | /* | 1749 | /* |
@@ -1684,8 +1770,8 @@ static inline void __run_timers(struct timer_base *base) | |||
1684 | while (levels--) | 1770 | while (levels--) |
1685 | expire_timers(base, heads + levels); | 1771 | expire_timers(base, heads + levels); |
1686 | } | 1772 | } |
1687 | base->running_timer = NULL; | ||
1688 | raw_spin_unlock_irq(&base->lock); | 1773 | raw_spin_unlock_irq(&base->lock); |
1774 | timer_base_unlock_expiry(base); | ||
1689 | } | 1775 | } |
1690 | 1776 | ||
1691 | /* | 1777 | /* |
@@ -1930,6 +2016,7 @@ static void __init init_timer_cpu(int cpu) | |||
1930 | base->cpu = cpu; | 2016 | base->cpu = cpu; |
1931 | raw_spin_lock_init(&base->lock); | 2017 | raw_spin_lock_init(&base->lock); |
1932 | base->clk = jiffies; | 2018 | base->clk = jiffies; |
2019 | timer_base_init_expiry_lock(base); | ||
1933 | } | 2020 | } |
1934 | } | 2021 | } |
1935 | 2022 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7f9e7b9306fe..f41334ef0971 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -490,10 +490,10 @@ static void watchdog_enable(unsigned int cpu) | |||
490 | * Start the timer first to prevent the NMI watchdog triggering | 490 | * Start the timer first to prevent the NMI watchdog triggering |
491 | * before the timer has a chance to fire. | 491 | * before the timer has a chance to fire. |
492 | */ | 492 | */ |
493 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 493 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); |
494 | hrtimer->function = watchdog_timer_fn; | 494 | hrtimer->function = watchdog_timer_fn; |
495 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), | 495 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), |
496 | HRTIMER_MODE_REL_PINNED); | 496 | HRTIMER_MODE_REL_PINNED_HARD); |
497 | 497 | ||
498 | /* Initialize timestamp */ | 498 | /* Initialize timestamp */ |
499 | __touch_watchdog(); | 499 | __touch_watchdog(); |
diff --git a/lib/timerqueue.c b/lib/timerqueue.c index bc7e64df27df..c52710964593 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c | |||
@@ -26,9 +26,10 @@ | |||
26 | */ | 26 | */ |
27 | bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) | 27 | bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) |
28 | { | 28 | { |
29 | struct rb_node **p = &head->head.rb_node; | 29 | struct rb_node **p = &head->rb_root.rb_root.rb_node; |
30 | struct rb_node *parent = NULL; | 30 | struct rb_node *parent = NULL; |
31 | struct timerqueue_node *ptr; | 31 | struct timerqueue_node *ptr; |
32 | bool leftmost = true; | ||
32 | 33 | ||
33 | /* Make sure we don't add nodes that are already added */ | 34 | /* Make sure we don't add nodes that are already added */ |
34 | WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node)); | 35 | WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node)); |
@@ -36,19 +37,17 @@ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) | |||
36 | while (*p) { | 37 | while (*p) { |
37 | parent = *p; | 38 | parent = *p; |
38 | ptr = rb_entry(parent, struct timerqueue_node, node); | 39 | ptr = rb_entry(parent, struct timerqueue_node, node); |
39 | if (node->expires < ptr->expires) | 40 | if (node->expires < ptr->expires) { |
40 | p = &(*p)->rb_left; | 41 | p = &(*p)->rb_left; |
41 | else | 42 | } else { |
42 | p = &(*p)->rb_right; | 43 | p = &(*p)->rb_right; |
44 | leftmost = false; | ||
45 | } | ||
43 | } | 46 | } |
44 | rb_link_node(&node->node, parent, p); | 47 | rb_link_node(&node->node, parent, p); |
45 | rb_insert_color(&node->node, &head->head); | 48 | rb_insert_color_cached(&node->node, &head->rb_root, leftmost); |
46 | 49 | ||
47 | if (!head->next || node->expires < head->next->expires) { | 50 | return leftmost; |
48 | head->next = node; | ||
49 | return true; | ||
50 | } | ||
51 | return false; | ||
52 | } | 51 | } |
53 | EXPORT_SYMBOL_GPL(timerqueue_add); | 52 | EXPORT_SYMBOL_GPL(timerqueue_add); |
54 | 53 | ||
@@ -65,15 +64,10 @@ bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) | |||
65 | { | 64 | { |
66 | WARN_ON_ONCE(RB_EMPTY_NODE(&node->node)); | 65 | WARN_ON_ONCE(RB_EMPTY_NODE(&node->node)); |
67 | 66 | ||
68 | /* update next pointer */ | 67 | rb_erase_cached(&node->node, &head->rb_root); |
69 | if (head->next == node) { | ||
70 | struct rb_node *rbn = rb_next(&node->node); | ||
71 | |||
72 | head->next = rb_entry_safe(rbn, struct timerqueue_node, node); | ||
73 | } | ||
74 | rb_erase(&node->node, &head->head); | ||
75 | RB_CLEAR_NODE(&node->node); | 68 | RB_CLEAR_NODE(&node->node); |
76 | return head->next != NULL; | 69 | |
70 | return !RB_EMPTY_ROOT(&head->rb_root.rb_root); | ||
77 | } | 71 | } |
78 | EXPORT_SYMBOL_GPL(timerqueue_del); | 72 | EXPORT_SYMBOL_GPL(timerqueue_del); |
79 | 73 | ||
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index bb9915291644..1d0c1b4886d7 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
@@ -2156,7 +2156,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) | |||
2156 | s64 remaining; | 2156 | s64 remaining; |
2157 | struct hrtimer_sleeper t; | 2157 | struct hrtimer_sleeper t; |
2158 | 2158 | ||
2159 | hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 2159 | hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
2160 | hrtimer_set_expires(&t.timer, spin_until); | 2160 | hrtimer_set_expires(&t.timer, spin_until); |
2161 | 2161 | ||
2162 | remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer)); | 2162 | remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer)); |
@@ -2170,11 +2170,9 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) | |||
2170 | end_time = ktime_get(); | 2170 | end_time = ktime_get(); |
2171 | } while (ktime_compare(end_time, spin_until) < 0); | 2171 | } while (ktime_compare(end_time, spin_until) < 0); |
2172 | } else { | 2172 | } else { |
2173 | /* see do_nanosleep */ | ||
2174 | hrtimer_init_sleeper(&t, current); | ||
2175 | do { | 2173 | do { |
2176 | set_current_state(TASK_INTERRUPTIBLE); | 2174 | set_current_state(TASK_INTERRUPTIBLE); |
2177 | hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); | 2175 | hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_ABS); |
2178 | 2176 | ||
2179 | if (likely(t.task)) | 2177 | if (likely(t.task)) |
2180 | schedule(); | 2178 | schedule(); |