diff options
| -rw-r--r-- | Documentation/admin-guide/cgroup-v2.rst | 27 | ||||
| -rw-r--r-- | include/linux/cgroup-defs.h | 33 | ||||
| -rw-r--r-- | include/linux/cgroup.h | 43 | ||||
| -rw-r--r-- | include/linux/sched.h | 2 | ||||
| -rw-r--r-- | include/linux/sched/jobctl.h | 2 | ||||
| -rw-r--r-- | include/trace/events/cgroup.h | 55 | ||||
| -rw-r--r-- | kernel/cgroup/Makefile | 4 | ||||
| -rw-r--r-- | kernel/cgroup/cgroup-internal.h | 8 | ||||
| -rw-r--r-- | kernel/cgroup/cgroup-v1.c | 16 | ||||
| -rw-r--r-- | kernel/cgroup/cgroup.c | 152 | ||||
| -rw-r--r-- | kernel/cgroup/debug.c | 8 | ||||
| -rw-r--r-- | kernel/cgroup/freezer.c | 639 | ||||
| -rw-r--r-- | kernel/cgroup/legacy_freezer.c | 481 | ||||
| -rw-r--r-- | kernel/fork.c | 2 | ||||
| -rw-r--r-- | kernel/signal.c | 66 | ||||
| -rw-r--r-- | tools/testing/selftests/cgroup/.gitignore | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/cgroup/Makefile | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/cgroup/cgroup_util.c | 58 | ||||
| -rw-r--r-- | tools/testing/selftests/cgroup/cgroup_util.h | 5 | ||||
| -rw-r--r-- | tools/testing/selftests/cgroup/test_freezer.c | 851 |
20 files changed, 2012 insertions, 443 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 20f92c16ffbf..88e746074252 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst | |||
| @@ -864,6 +864,8 @@ All cgroup core files are prefixed with "cgroup." | |||
| 864 | populated | 864 | populated |
| 865 | 1 if the cgroup or its descendants contains any live | 865 | 1 if the cgroup or its descendants contains any live |
| 866 | processes; otherwise, 0. | 866 | processes; otherwise, 0. |
| 867 | frozen | ||
| 868 | 1 if the cgroup is frozen; otherwise, 0. | ||
| 867 | 869 | ||
| 868 | cgroup.max.descendants | 870 | cgroup.max.descendants |
| 869 | A read-write single value files. The default is "max". | 871 | A read-write single value files. The default is "max". |
| @@ -897,6 +899,31 @@ All cgroup core files are prefixed with "cgroup." | |||
| 897 | A dying cgroup can consume system resources not exceeding | 899 | A dying cgroup can consume system resources not exceeding |
| 898 | limits, which were active at the moment of cgroup deletion. | 900 | limits, which were active at the moment of cgroup deletion. |
| 899 | 901 | ||
| 902 | cgroup.freeze | ||
| 903 | A read-write single value file which exists on non-root cgroups. | ||
| 904 | Allowed values are "0" and "1". The default is "0". | ||
| 905 | |||
| 906 | Writing "1" to the file causes freezing of the cgroup and all | ||
| 907 | descendant cgroups. This means that all belonging processes will | ||
| 908 | be stopped and will not run until the cgroup will be explicitly | ||
| 909 | unfrozen. Freezing of the cgroup may take some time; when this action | ||
| 910 | is completed, the "frozen" value in the cgroup.events control file | ||
| 911 | will be updated to "1" and the corresponding notification will be | ||
| 912 | issued. | ||
| 913 | |||
| 914 | A cgroup can be frozen either by its own settings, or by settings | ||
| 915 | of any ancestor cgroups. If any of ancestor cgroups is frozen, the | ||
| 916 | cgroup will remain frozen. | ||
| 917 | |||
| 918 | Processes in the frozen cgroup can be killed by a fatal signal. | ||
| 919 | They also can enter and leave a frozen cgroup: either by an explicit | ||
| 920 | move by a user, or if freezing of the cgroup races with fork(). | ||
| 921 | If a process is moved to a frozen cgroup, it stops. If a process is | ||
| 922 | moved out of a frozen cgroup, it becomes running. | ||
| 923 | |||
| 924 | Frozen status of a cgroup doesn't affect any cgroup tree operations: | ||
| 925 | it's possible to delete a frozen (and empty) cgroup, as well as | ||
| 926 | create new sub-cgroups. | ||
| 900 | 927 | ||
| 901 | Controllers | 928 | Controllers |
| 902 | =========== | 929 | =========== |
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1c70803e9f77..77258d276f93 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h | |||
| @@ -65,6 +65,12 @@ enum { | |||
| 65 | * specified at mount time and thus is implemented here. | 65 | * specified at mount time and thus is implemented here. |
| 66 | */ | 66 | */ |
| 67 | CGRP_CPUSET_CLONE_CHILDREN, | 67 | CGRP_CPUSET_CLONE_CHILDREN, |
| 68 | |||
| 69 | /* Control group has to be frozen. */ | ||
| 70 | CGRP_FREEZE, | ||
| 71 | |||
| 72 | /* Cgroup is frozen. */ | ||
| 73 | CGRP_FROZEN, | ||
| 68 | }; | 74 | }; |
| 69 | 75 | ||
| 70 | /* cgroup_root->flags */ | 76 | /* cgroup_root->flags */ |
| @@ -317,6 +323,25 @@ struct cgroup_rstat_cpu { | |||
| 317 | struct cgroup *updated_next; /* NULL iff not on the list */ | 323 | struct cgroup *updated_next; /* NULL iff not on the list */ |
| 318 | }; | 324 | }; |
| 319 | 325 | ||
| 326 | struct cgroup_freezer_state { | ||
| 327 | /* Should the cgroup and its descendants be frozen. */ | ||
| 328 | bool freeze; | ||
| 329 | |||
| 330 | /* Should the cgroup actually be frozen? */ | ||
| 331 | int e_freeze; | ||
| 332 | |||
| 333 | /* Fields below are protected by css_set_lock */ | ||
| 334 | |||
| 335 | /* Number of frozen descendant cgroups */ | ||
| 336 | int nr_frozen_descendants; | ||
| 337 | |||
| 338 | /* | ||
| 339 | * Number of tasks, which are counted as frozen: | ||
| 340 | * frozen, SIGSTOPped, and PTRACEd. | ||
| 341 | */ | ||
| 342 | int nr_frozen_tasks; | ||
| 343 | }; | ||
| 344 | |||
| 320 | struct cgroup { | 345 | struct cgroup { |
| 321 | /* self css with NULL ->ss, points back to this cgroup */ | 346 | /* self css with NULL ->ss, points back to this cgroup */ |
| 322 | struct cgroup_subsys_state self; | 347 | struct cgroup_subsys_state self; |
| @@ -349,6 +374,11 @@ struct cgroup { | |||
| 349 | * Dying cgroups are cgroups which were deleted by a user, | 374 | * Dying cgroups are cgroups which were deleted by a user, |
| 350 | * but are still existing because someone else is holding a reference. | 375 | * but are still existing because someone else is holding a reference. |
| 351 | * max_descendants is a maximum allowed number of descent cgroups. | 376 | * max_descendants is a maximum allowed number of descent cgroups. |
| 377 | * | ||
| 378 | * nr_descendants and nr_dying_descendants are protected | ||
| 379 | * by cgroup_mutex and css_set_lock. It's fine to read them holding | ||
| 380 | * any of cgroup_mutex and css_set_lock; for writing both locks | ||
| 381 | * should be held. | ||
| 352 | */ | 382 | */ |
| 353 | int nr_descendants; | 383 | int nr_descendants; |
| 354 | int nr_dying_descendants; | 384 | int nr_dying_descendants; |
| @@ -448,6 +478,9 @@ struct cgroup { | |||
| 448 | /* If there is block congestion on this cgroup. */ | 478 | /* If there is block congestion on this cgroup. */ |
| 449 | atomic_t congestion_count; | 479 | atomic_t congestion_count; |
| 450 | 480 | ||
| 481 | /* Used to store internal freezer state */ | ||
| 482 | struct cgroup_freezer_state freezer; | ||
| 483 | |||
| 451 | /* ids of the ancestors at each level including self */ | 484 | /* ids of the ancestors at each level including self */ |
| 452 | int ancestor_ids[]; | 485 | int ancestor_ids[]; |
| 453 | }; | 486 | }; |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 81f58b4a5418..c0077adeea83 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
| @@ -881,4 +881,47 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns) | |||
| 881 | free_cgroup_ns(ns); | 881 | free_cgroup_ns(ns); |
| 882 | } | 882 | } |
| 883 | 883 | ||
| 884 | #ifdef CONFIG_CGROUPS | ||
| 885 | |||
| 886 | void cgroup_enter_frozen(void); | ||
| 887 | void cgroup_leave_frozen(bool always_leave); | ||
| 888 | void cgroup_update_frozen(struct cgroup *cgrp); | ||
| 889 | void cgroup_freeze(struct cgroup *cgrp, bool freeze); | ||
| 890 | void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src, | ||
| 891 | struct cgroup *dst); | ||
| 892 | |||
| 893 | static inline bool cgroup_task_freeze(struct task_struct *task) | ||
| 894 | { | ||
| 895 | bool ret; | ||
| 896 | |||
| 897 | if (task->flags & PF_KTHREAD) | ||
| 898 | return false; | ||
| 899 | |||
| 900 | rcu_read_lock(); | ||
| 901 | ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags); | ||
| 902 | rcu_read_unlock(); | ||
| 903 | |||
| 904 | return ret; | ||
| 905 | } | ||
| 906 | |||
| 907 | static inline bool cgroup_task_frozen(struct task_struct *task) | ||
| 908 | { | ||
| 909 | return task->frozen; | ||
| 910 | } | ||
| 911 | |||
| 912 | #else /* !CONFIG_CGROUPS */ | ||
| 913 | |||
| 914 | static inline void cgroup_enter_frozen(void) { } | ||
| 915 | static inline void cgroup_leave_frozen(bool always_leave) { } | ||
| 916 | static inline bool cgroup_task_freeze(struct task_struct *task) | ||
| 917 | { | ||
| 918 | return false; | ||
| 919 | } | ||
| 920 | static inline bool cgroup_task_frozen(struct task_struct *task) | ||
| 921 | { | ||
| 922 | return false; | ||
| 923 | } | ||
| 924 | |||
| 925 | #endif /* !CONFIG_CGROUPS */ | ||
| 926 | |||
| 884 | #endif /* _LINUX_CGROUP_H */ | 927 | #endif /* _LINUX_CGROUP_H */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 50606a6e73d6..a2cd15855bad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -726,6 +726,8 @@ struct task_struct { | |||
| 726 | #ifdef CONFIG_CGROUPS | 726 | #ifdef CONFIG_CGROUPS |
| 727 | /* disallow userland-initiated cgroup migration */ | 727 | /* disallow userland-initiated cgroup migration */ |
| 728 | unsigned no_cgroup_migration:1; | 728 | unsigned no_cgroup_migration:1; |
| 729 | /* task is frozen/stopped (used by the cgroup freezer) */ | ||
| 730 | unsigned frozen:1; | ||
| 729 | #endif | 731 | #endif |
| 730 | #ifdef CONFIG_BLK_CGROUP | 732 | #ifdef CONFIG_BLK_CGROUP |
| 731 | /* to be used once the psi infrastructure lands upstream. */ | 733 | /* to be used once the psi infrastructure lands upstream. */ |
diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index 98228bd48aee..fa067de9f1a9 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h | |||
| @@ -18,6 +18,7 @@ struct task_struct; | |||
| 18 | #define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ | 18 | #define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ |
| 19 | #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ | 19 | #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ |
| 20 | #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ | 20 | #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ |
| 21 | #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ | ||
| 21 | 22 | ||
| 22 | #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) | 23 | #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) |
| 23 | #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) | 24 | #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) |
| @@ -26,6 +27,7 @@ struct task_struct; | |||
| 26 | #define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) | 27 | #define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) |
| 27 | #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) | 28 | #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) |
| 28 | #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) | 29 | #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) |
| 30 | #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) | ||
| 29 | 31 | ||
| 30 | #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) | 32 | #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) |
| 31 | #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) | 33 | #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) |
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h index a401ff5e7847..a566cc521476 100644 --- a/include/trace/events/cgroup.h +++ b/include/trace/events/cgroup.h | |||
| @@ -103,6 +103,20 @@ DEFINE_EVENT(cgroup, cgroup_rename, | |||
| 103 | TP_ARGS(cgrp, path) | 103 | TP_ARGS(cgrp, path) |
| 104 | ); | 104 | ); |
| 105 | 105 | ||
| 106 | DEFINE_EVENT(cgroup, cgroup_freeze, | ||
| 107 | |||
| 108 | TP_PROTO(struct cgroup *cgrp, const char *path), | ||
| 109 | |||
| 110 | TP_ARGS(cgrp, path) | ||
| 111 | ); | ||
| 112 | |||
| 113 | DEFINE_EVENT(cgroup, cgroup_unfreeze, | ||
| 114 | |||
| 115 | TP_PROTO(struct cgroup *cgrp, const char *path), | ||
| 116 | |||
| 117 | TP_ARGS(cgrp, path) | ||
| 118 | ); | ||
| 119 | |||
| 106 | DECLARE_EVENT_CLASS(cgroup_migrate, | 120 | DECLARE_EVENT_CLASS(cgroup_migrate, |
| 107 | 121 | ||
| 108 | TP_PROTO(struct cgroup *dst_cgrp, const char *path, | 122 | TP_PROTO(struct cgroup *dst_cgrp, const char *path, |
| @@ -149,6 +163,47 @@ DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks, | |||
| 149 | TP_ARGS(dst_cgrp, path, task, threadgroup) | 163 | TP_ARGS(dst_cgrp, path, task, threadgroup) |
| 150 | ); | 164 | ); |
| 151 | 165 | ||
| 166 | DECLARE_EVENT_CLASS(cgroup_event, | ||
| 167 | |||
| 168 | TP_PROTO(struct cgroup *cgrp, const char *path, int val), | ||
| 169 | |||
| 170 | TP_ARGS(cgrp, path, val), | ||
| 171 | |||
| 172 | TP_STRUCT__entry( | ||
| 173 | __field( int, root ) | ||
| 174 | __field( int, id ) | ||
| 175 | __field( int, level ) | ||
| 176 | __string( path, path ) | ||
| 177 | __field( int, val ) | ||
| 178 | ), | ||
| 179 | |||
| 180 | TP_fast_assign( | ||
| 181 | __entry->root = cgrp->root->hierarchy_id; | ||
| 182 | __entry->id = cgrp->id; | ||
| 183 | __entry->level = cgrp->level; | ||
| 184 | __assign_str(path, path); | ||
| 185 | __entry->val = val; | ||
| 186 | ), | ||
| 187 | |||
| 188 | TP_printk("root=%d id=%d level=%d path=%s val=%d", | ||
| 189 | __entry->root, __entry->id, __entry->level, __get_str(path), | ||
| 190 | __entry->val) | ||
| 191 | ); | ||
| 192 | |||
| 193 | DEFINE_EVENT(cgroup_event, cgroup_notify_populated, | ||
| 194 | |||
| 195 | TP_PROTO(struct cgroup *cgrp, const char *path, int val), | ||
| 196 | |||
| 197 | TP_ARGS(cgrp, path, val) | ||
| 198 | ); | ||
| 199 | |||
| 200 | DEFINE_EVENT(cgroup_event, cgroup_notify_frozen, | ||
| 201 | |||
| 202 | TP_PROTO(struct cgroup *cgrp, const char *path, int val), | ||
| 203 | |||
| 204 | TP_ARGS(cgrp, path, val) | ||
| 205 | ); | ||
| 206 | |||
| 152 | #endif /* _TRACE_CGROUP_H */ | 207 | #endif /* _TRACE_CGROUP_H */ |
| 153 | 208 | ||
| 154 | /* This part must be outside protection */ | 209 | /* This part must be outside protection */ |
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile index bfcdae896122..5d7a76bfbbb7 100644 --- a/kernel/cgroup/Makefile +++ b/kernel/cgroup/Makefile | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | # SPDX-License-Identifier: GPL-2.0 | 1 | # SPDX-License-Identifier: GPL-2.0 |
| 2 | obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o | 2 | obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o freezer.o |
| 3 | 3 | ||
| 4 | obj-$(CONFIG_CGROUP_FREEZER) += freezer.o | 4 | obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o |
| 5 | obj-$(CONFIG_CGROUP_PIDS) += pids.o | 5 | obj-$(CONFIG_CGROUP_PIDS) += pids.o |
| 6 | obj-$(CONFIG_CGROUP_RDMA) += rdma.o | 6 | obj-$(CONFIG_CGROUP_RDMA) += rdma.o |
| 7 | obj-$(CONFIG_CPUSETS) += cpuset.o | 7 | obj-$(CONFIG_CPUSETS) += cpuset.o |
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 30e39f3932ad..809e34a3c017 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h | |||
| @@ -28,12 +28,15 @@ extern void __init enable_debug_cgroup(void); | |||
| 28 | #define TRACE_CGROUP_PATH(type, cgrp, ...) \ | 28 | #define TRACE_CGROUP_PATH(type, cgrp, ...) \ |
| 29 | do { \ | 29 | do { \ |
| 30 | if (trace_cgroup_##type##_enabled()) { \ | 30 | if (trace_cgroup_##type##_enabled()) { \ |
| 31 | spin_lock(&trace_cgroup_path_lock); \ | 31 | unsigned long flags; \ |
| 32 | spin_lock_irqsave(&trace_cgroup_path_lock, \ | ||
| 33 | flags); \ | ||
| 32 | cgroup_path(cgrp, trace_cgroup_path, \ | 34 | cgroup_path(cgrp, trace_cgroup_path, \ |
| 33 | TRACE_CGROUP_PATH_LEN); \ | 35 | TRACE_CGROUP_PATH_LEN); \ |
| 34 | trace_cgroup_##type(cgrp, trace_cgroup_path, \ | 36 | trace_cgroup_##type(cgrp, trace_cgroup_path, \ |
| 35 | ##__VA_ARGS__); \ | 37 | ##__VA_ARGS__); \ |
| 36 | spin_unlock(&trace_cgroup_path_lock); \ | 38 | spin_unlock_irqrestore(&trace_cgroup_path_lock, \ |
| 39 | flags); \ | ||
| 37 | } \ | 40 | } \ |
| 38 | } while (0) | 41 | } while (0) |
| 39 | 42 | ||
| @@ -240,6 +243,7 @@ int cgroup_rmdir(struct kernfs_node *kn); | |||
| 240 | int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, | 243 | int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, |
| 241 | struct kernfs_root *kf_root); | 244 | struct kernfs_root *kf_root); |
| 242 | 245 | ||
| 246 | int __cgroup_task_count(const struct cgroup *cgrp); | ||
| 243 | int cgroup_task_count(const struct cgroup *cgrp); | 247 | int cgroup_task_count(const struct cgroup *cgrp); |
| 244 | 248 | ||
| 245 | /* | 249 | /* |
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index c126b34fd4ff..68ca5de7ec27 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c | |||
| @@ -342,22 +342,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp, | |||
| 342 | return l; | 342 | return l; |
| 343 | } | 343 | } |
| 344 | 344 | ||
| 345 | /** | ||
| 346 | * cgroup_task_count - count the number of tasks in a cgroup. | ||
| 347 | * @cgrp: the cgroup in question | ||
| 348 | */ | ||
| 349 | int cgroup_task_count(const struct cgroup *cgrp) | ||
| 350 | { | ||
| 351 | int count = 0; | ||
| 352 | struct cgrp_cset_link *link; | ||
| 353 | |||
| 354 | spin_lock_irq(&css_set_lock); | ||
| 355 | list_for_each_entry(link, &cgrp->cset_links, cset_link) | ||
| 356 | count += link->cset->nr_tasks; | ||
| 357 | spin_unlock_irq(&css_set_lock); | ||
| 358 | return count; | ||
| 359 | } | ||
| 360 | |||
| 361 | /* | 345 | /* |
| 362 | * Load a cgroup's pidarray with either procs' tgids or tasks' pids | 346 | * Load a cgroup's pidarray with either procs' tgids or tasks' pids |
| 363 | */ | 347 | */ |
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 3f2b4bde0f9c..327f37c9fdfa 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c | |||
| @@ -593,6 +593,39 @@ static void cgroup_get_live(struct cgroup *cgrp) | |||
| 593 | css_get(&cgrp->self); | 593 | css_get(&cgrp->self); |
| 594 | } | 594 | } |
| 595 | 595 | ||
| 596 | /** | ||
| 597 | * __cgroup_task_count - count the number of tasks in a cgroup. The caller | ||
| 598 | * is responsible for taking the css_set_lock. | ||
| 599 | * @cgrp: the cgroup in question | ||
| 600 | */ | ||
| 601 | int __cgroup_task_count(const struct cgroup *cgrp) | ||
| 602 | { | ||
| 603 | int count = 0; | ||
| 604 | struct cgrp_cset_link *link; | ||
| 605 | |||
| 606 | lockdep_assert_held(&css_set_lock); | ||
| 607 | |||
| 608 | list_for_each_entry(link, &cgrp->cset_links, cset_link) | ||
| 609 | count += link->cset->nr_tasks; | ||
| 610 | |||
| 611 | return count; | ||
| 612 | } | ||
| 613 | |||
| 614 | /** | ||
| 615 | * cgroup_task_count - count the number of tasks in a cgroup. | ||
| 616 | * @cgrp: the cgroup in question | ||
| 617 | */ | ||
| 618 | int cgroup_task_count(const struct cgroup *cgrp) | ||
| 619 | { | ||
| 620 | int count; | ||
| 621 | |||
| 622 | spin_lock_irq(&css_set_lock); | ||
| 623 | count = __cgroup_task_count(cgrp); | ||
| 624 | spin_unlock_irq(&css_set_lock); | ||
| 625 | |||
| 626 | return count; | ||
| 627 | } | ||
| 628 | |||
| 596 | struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) | 629 | struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) |
| 597 | { | 630 | { |
| 598 | struct cgroup *cgrp = of->kn->parent->priv; | 631 | struct cgroup *cgrp = of->kn->parent->priv; |
| @@ -783,6 +816,8 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated) | |||
| 783 | break; | 816 | break; |
| 784 | 817 | ||
| 785 | cgroup1_check_for_release(cgrp); | 818 | cgroup1_check_for_release(cgrp); |
| 819 | TRACE_CGROUP_PATH(notify_populated, cgrp, | ||
| 820 | cgroup_is_populated(cgrp)); | ||
| 786 | cgroup_file_notify(&cgrp->events_file); | 821 | cgroup_file_notify(&cgrp->events_file); |
| 787 | 822 | ||
| 788 | child = cgrp; | 823 | child = cgrp; |
| @@ -2402,8 +2437,15 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) | |||
| 2402 | get_css_set(to_cset); | 2437 | get_css_set(to_cset); |
| 2403 | to_cset->nr_tasks++; | 2438 | to_cset->nr_tasks++; |
| 2404 | css_set_move_task(task, from_cset, to_cset, true); | 2439 | css_set_move_task(task, from_cset, to_cset, true); |
| 2405 | put_css_set_locked(from_cset); | ||
| 2406 | from_cset->nr_tasks--; | 2440 | from_cset->nr_tasks--; |
| 2441 | /* | ||
| 2442 | * If the source or destination cgroup is frozen, | ||
| 2443 | * the task might require to change its state. | ||
| 2444 | */ | ||
| 2445 | cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp, | ||
| 2446 | to_cset->dfl_cgrp); | ||
| 2447 | put_css_set_locked(from_cset); | ||
| 2448 | |||
| 2407 | } | 2449 | } |
| 2408 | } | 2450 | } |
| 2409 | spin_unlock_irq(&css_set_lock); | 2451 | spin_unlock_irq(&css_set_lock); |
| @@ -2602,7 +2644,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) | |||
| 2602 | 2644 | ||
| 2603 | dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp); | 2645 | dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp); |
| 2604 | if (!dst_cset) | 2646 | if (!dst_cset) |
| 2605 | goto err; | 2647 | return -ENOMEM; |
| 2606 | 2648 | ||
| 2607 | WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset); | 2649 | WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset); |
| 2608 | 2650 | ||
| @@ -2634,9 +2676,6 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) | |||
| 2634 | } | 2676 | } |
| 2635 | 2677 | ||
| 2636 | return 0; | 2678 | return 0; |
| 2637 | err: | ||
| 2638 | cgroup_migrate_finish(mgctx); | ||
| 2639 | return -ENOMEM; | ||
| 2640 | } | 2679 | } |
| 2641 | 2680 | ||
| 2642 | /** | 2681 | /** |
| @@ -3447,8 +3486,11 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of, | |||
| 3447 | 3486 | ||
| 3448 | static int cgroup_events_show(struct seq_file *seq, void *v) | 3487 | static int cgroup_events_show(struct seq_file *seq, void *v) |
| 3449 | { | 3488 | { |
| 3450 | seq_printf(seq, "populated %d\n", | 3489 | struct cgroup *cgrp = seq_css(seq)->cgroup; |
| 3451 | cgroup_is_populated(seq_css(seq)->cgroup)); | 3490 | |
| 3491 | seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp)); | ||
| 3492 | seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags)); | ||
| 3493 | |||
| 3452 | return 0; | 3494 | return 0; |
| 3453 | } | 3495 | } |
| 3454 | 3496 | ||
| @@ -3510,6 +3552,40 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) | |||
| 3510 | } | 3552 | } |
| 3511 | #endif | 3553 | #endif |
| 3512 | 3554 | ||
| 3555 | static int cgroup_freeze_show(struct seq_file *seq, void *v) | ||
| 3556 | { | ||
| 3557 | struct cgroup *cgrp = seq_css(seq)->cgroup; | ||
| 3558 | |||
| 3559 | seq_printf(seq, "%d\n", cgrp->freezer.freeze); | ||
| 3560 | |||
| 3561 | return 0; | ||
| 3562 | } | ||
| 3563 | |||
| 3564 | static ssize_t cgroup_freeze_write(struct kernfs_open_file *of, | ||
| 3565 | char *buf, size_t nbytes, loff_t off) | ||
| 3566 | { | ||
| 3567 | struct cgroup *cgrp; | ||
| 3568 | ssize_t ret; | ||
| 3569 | int freeze; | ||
| 3570 | |||
| 3571 | ret = kstrtoint(strstrip(buf), 0, &freeze); | ||
| 3572 | if (ret) | ||
| 3573 | return ret; | ||
| 3574 | |||
| 3575 | if (freeze < 0 || freeze > 1) | ||
| 3576 | return -ERANGE; | ||
| 3577 | |||
| 3578 | cgrp = cgroup_kn_lock_live(of->kn, false); | ||
| 3579 | if (!cgrp) | ||
| 3580 | return -ENOENT; | ||
| 3581 | |||
| 3582 | cgroup_freeze(cgrp, freeze); | ||
| 3583 | |||
| 3584 | cgroup_kn_unlock(of->kn); | ||
| 3585 | |||
| 3586 | return nbytes; | ||
| 3587 | } | ||
| 3588 | |||
| 3513 | static int cgroup_file_open(struct kernfs_open_file *of) | 3589 | static int cgroup_file_open(struct kernfs_open_file *of) |
| 3514 | { | 3590 | { |
| 3515 | struct cftype *cft = of->kn->priv; | 3591 | struct cftype *cft = of->kn->priv; |
| @@ -4654,6 +4730,12 @@ static struct cftype cgroup_base_files[] = { | |||
| 4654 | .seq_show = cgroup_stat_show, | 4730 | .seq_show = cgroup_stat_show, |
| 4655 | }, | 4731 | }, |
| 4656 | { | 4732 | { |
| 4733 | .name = "cgroup.freeze", | ||
| 4734 | .flags = CFTYPE_NOT_ON_ROOT, | ||
| 4735 | .seq_show = cgroup_freeze_show, | ||
| 4736 | .write = cgroup_freeze_write, | ||
| 4737 | }, | ||
| 4738 | { | ||
| 4657 | .name = "cpu.stat", | 4739 | .name = "cpu.stat", |
| 4658 | .flags = CFTYPE_NOT_ON_ROOT, | 4740 | .flags = CFTYPE_NOT_ON_ROOT, |
| 4659 | .seq_show = cpu_stat_show, | 4741 | .seq_show = cpu_stat_show, |
| @@ -4781,9 +4863,11 @@ static void css_release_work_fn(struct work_struct *work) | |||
| 4781 | if (cgroup_on_dfl(cgrp)) | 4863 | if (cgroup_on_dfl(cgrp)) |
| 4782 | cgroup_rstat_flush(cgrp); | 4864 | cgroup_rstat_flush(cgrp); |
| 4783 | 4865 | ||
| 4866 | spin_lock_irq(&css_set_lock); | ||
| 4784 | for (tcgrp = cgroup_parent(cgrp); tcgrp; | 4867 | for (tcgrp = cgroup_parent(cgrp); tcgrp; |
| 4785 | tcgrp = cgroup_parent(tcgrp)) | 4868 | tcgrp = cgroup_parent(tcgrp)) |
| 4786 | tcgrp->nr_dying_descendants--; | 4869 | tcgrp->nr_dying_descendants--; |
| 4870 | spin_unlock_irq(&css_set_lock); | ||
| 4787 | 4871 | ||
| 4788 | cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); | 4872 | cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); |
| 4789 | cgrp->id = -1; | 4873 | cgrp->id = -1; |
| @@ -5001,12 +5085,31 @@ static struct cgroup *cgroup_create(struct cgroup *parent) | |||
| 5001 | if (ret) | 5085 | if (ret) |
| 5002 | goto out_psi_free; | 5086 | goto out_psi_free; |
| 5003 | 5087 | ||
| 5088 | /* | ||
| 5089 | * New cgroup inherits effective freeze counter, and | ||
| 5090 | * if the parent has to be frozen, the child has too. | ||
| 5091 | */ | ||
| 5092 | cgrp->freezer.e_freeze = parent->freezer.e_freeze; | ||
| 5093 | if (cgrp->freezer.e_freeze) | ||
| 5094 | set_bit(CGRP_FROZEN, &cgrp->flags); | ||
| 5095 | |||
| 5096 | spin_lock_irq(&css_set_lock); | ||
| 5004 | for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { | 5097 | for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { |
| 5005 | cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; | 5098 | cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; |
| 5006 | 5099 | ||
| 5007 | if (tcgrp != cgrp) | 5100 | if (tcgrp != cgrp) { |
| 5008 | tcgrp->nr_descendants++; | 5101 | tcgrp->nr_descendants++; |
| 5102 | |||
| 5103 | /* | ||
| 5104 | * If the new cgroup is frozen, all ancestor cgroups | ||
| 5105 | * get a new frozen descendant, but their state can't | ||
| 5106 | * change because of this. | ||
| 5107 | */ | ||
| 5108 | if (cgrp->freezer.e_freeze) | ||
| 5109 | tcgrp->freezer.nr_frozen_descendants++; | ||
| 5110 | } | ||
| 5009 | } | 5111 | } |
| 5112 | spin_unlock_irq(&css_set_lock); | ||
| 5010 | 5113 | ||
| 5011 | if (notify_on_release(parent)) | 5114 | if (notify_on_release(parent)) |
| 5012 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 5115 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
| @@ -5291,10 +5394,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
| 5291 | if (parent && cgroup_is_threaded(cgrp)) | 5394 | if (parent && cgroup_is_threaded(cgrp)) |
| 5292 | parent->nr_threaded_children--; | 5395 | parent->nr_threaded_children--; |
| 5293 | 5396 | ||
| 5397 | spin_lock_irq(&css_set_lock); | ||
| 5294 | for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) { | 5398 | for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) { |
| 5295 | tcgrp->nr_descendants--; | 5399 | tcgrp->nr_descendants--; |
| 5296 | tcgrp->nr_dying_descendants++; | 5400 | tcgrp->nr_dying_descendants++; |
| 5401 | /* | ||
| 5402 | * If the dying cgroup is frozen, decrease frozen descendants | ||
| 5403 | * counters of ancestor cgroups. | ||
| 5404 | */ | ||
| 5405 | if (test_bit(CGRP_FROZEN, &cgrp->flags)) | ||
| 5406 | tcgrp->freezer.nr_frozen_descendants--; | ||
| 5297 | } | 5407 | } |
| 5408 | spin_unlock_irq(&css_set_lock); | ||
| 5298 | 5409 | ||
| 5299 | cgroup1_check_for_release(parent); | 5410 | cgroup1_check_for_release(parent); |
| 5300 | 5411 | ||
| @@ -5746,6 +5857,26 @@ void cgroup_post_fork(struct task_struct *child) | |||
| 5746 | cset->nr_tasks++; | 5857 | cset->nr_tasks++; |
| 5747 | css_set_move_task(child, NULL, cset, false); | 5858 | css_set_move_task(child, NULL, cset, false); |
| 5748 | } | 5859 | } |
| 5860 | |||
| 5861 | /* | ||
| 5862 | * If the cgroup has to be frozen, the new task has too. | ||
| 5863 | * Let's set the JOBCTL_TRAP_FREEZE jobctl bit to get | ||
| 5864 | * the task into the frozen state. | ||
| 5865 | */ | ||
| 5866 | if (unlikely(cgroup_task_freeze(child))) { | ||
| 5867 | spin_lock(&child->sighand->siglock); | ||
| 5868 | WARN_ON_ONCE(child->frozen); | ||
| 5869 | child->jobctl |= JOBCTL_TRAP_FREEZE; | ||
| 5870 | spin_unlock(&child->sighand->siglock); | ||
| 5871 | |||
| 5872 | /* | ||
| 5873 | * Calling cgroup_update_frozen() isn't required here, | ||
| 5874 | * because it will be called anyway a bit later | ||
| 5875 | * from do_freezer_trap(). So we avoid cgroup's | ||
| 5876 | * transient switch from the frozen state and back. | ||
| 5877 | */ | ||
| 5878 | } | ||
| 5879 | |||
| 5749 | spin_unlock_irq(&css_set_lock); | 5880 | spin_unlock_irq(&css_set_lock); |
| 5750 | } | 5881 | } |
| 5751 | 5882 | ||
| @@ -5794,6 +5925,11 @@ void cgroup_exit(struct task_struct *tsk) | |||
| 5794 | spin_lock_irq(&css_set_lock); | 5925 | spin_lock_irq(&css_set_lock); |
| 5795 | css_set_move_task(tsk, cset, NULL, false); | 5926 | css_set_move_task(tsk, cset, NULL, false); |
| 5796 | cset->nr_tasks--; | 5927 | cset->nr_tasks--; |
| 5928 | |||
| 5929 | WARN_ON_ONCE(cgroup_task_frozen(tsk)); | ||
| 5930 | if (unlikely(cgroup_task_freeze(tsk))) | ||
| 5931 | cgroup_update_frozen(task_dfl_cgroup(tsk)); | ||
| 5932 | |||
| 5797 | spin_unlock_irq(&css_set_lock); | 5933 | spin_unlock_irq(&css_set_lock); |
| 5798 | } else { | 5934 | } else { |
| 5799 | get_css_set(cset); | 5935 | get_css_set(cset); |
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c index 5f1b87330bee..80aa3f027ac3 100644 --- a/kernel/cgroup/debug.c +++ b/kernel/cgroup/debug.c | |||
| @@ -64,8 +64,8 @@ static int current_css_set_read(struct seq_file *seq, void *v) | |||
| 64 | css = cset->subsys[ss->id]; | 64 | css = cset->subsys[ss->id]; |
| 65 | if (!css) | 65 | if (!css) |
| 66 | continue; | 66 | continue; |
| 67 | seq_printf(seq, "%2d: %-4s\t- %lx[%d]\n", ss->id, ss->name, | 67 | seq_printf(seq, "%2d: %-4s\t- %p[%d]\n", ss->id, ss->name, |
| 68 | (unsigned long)css, css->id); | 68 | css, css->id); |
| 69 | } | 69 | } |
| 70 | rcu_read_unlock(); | 70 | rcu_read_unlock(); |
| 71 | spin_unlock_irq(&css_set_lock); | 71 | spin_unlock_irq(&css_set_lock); |
| @@ -224,8 +224,8 @@ static int cgroup_subsys_states_read(struct seq_file *seq, void *v) | |||
| 224 | if (css->parent) | 224 | if (css->parent) |
| 225 | snprintf(pbuf, sizeof(pbuf) - 1, " P=%d", | 225 | snprintf(pbuf, sizeof(pbuf) - 1, " P=%d", |
| 226 | css->parent->id); | 226 | css->parent->id); |
| 227 | seq_printf(seq, "%2d: %-4s\t- %lx[%d] %d%s\n", ss->id, ss->name, | 227 | seq_printf(seq, "%2d: %-4s\t- %p[%d] %d%s\n", ss->id, ss->name, |
| 228 | (unsigned long)css, css->id, | 228 | css, css->id, |
| 229 | atomic_read(&css->online_cnt), pbuf); | 229 | atomic_read(&css->online_cnt), pbuf); |
| 230 | } | 230 | } |
| 231 | 231 | ||
diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c index 08236798d173..8cf010680678 100644 --- a/kernel/cgroup/freezer.c +++ b/kernel/cgroup/freezer.c | |||
| @@ -1,481 +1,314 @@ | |||
| 1 | /* | 1 | //SPDX-License-Identifier: GPL-2.0 |
| 2 | * cgroup_freezer.c - control group freezer subsystem | ||
| 3 | * | ||
| 4 | * Copyright IBM Corporation, 2007 | ||
| 5 | * | ||
| 6 | * Author : Cedric Le Goater <clg@fr.ibm.com> | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify it | ||
| 9 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
| 10 | * as published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it would be useful, but | ||
| 13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/export.h> | ||
| 18 | #include <linux/slab.h> | ||
| 19 | #include <linux/cgroup.h> | 2 | #include <linux/cgroup.h> |
| 20 | #include <linux/fs.h> | 3 | #include <linux/sched.h> |
| 21 | #include <linux/uaccess.h> | 4 | #include <linux/sched/task.h> |
| 22 | #include <linux/freezer.h> | 5 | #include <linux/sched/signal.h> |
| 23 | #include <linux/seq_file.h> | ||
| 24 | #include <linux/mutex.h> | ||
| 25 | |||
| 26 | /* | ||
| 27 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is | ||
| 28 | * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared | ||
| 29 | * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING | ||
| 30 | * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of | ||
| 31 | * its ancestors has FREEZING_SELF set. | ||
| 32 | */ | ||
| 33 | enum freezer_state_flags { | ||
| 34 | CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ | ||
| 35 | CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ | ||
| 36 | CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */ | ||
| 37 | CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */ | ||
| 38 | 6 | ||
| 39 | /* mask for all FREEZING flags */ | 7 | #include "cgroup-internal.h" |
| 40 | CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT, | ||
| 41 | }; | ||
| 42 | 8 | ||
| 43 | struct freezer { | 9 | #include <trace/events/cgroup.h> |
| 44 | struct cgroup_subsys_state css; | ||
| 45 | unsigned int state; | ||
| 46 | }; | ||
| 47 | 10 | ||
| 48 | static DEFINE_MUTEX(freezer_mutex); | 11 | /* |
| 49 | 12 | * Propagate the cgroup frozen state upwards by the cgroup tree. | |
| 50 | static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) | 13 | */ |
| 14 | static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) | ||
| 51 | { | 15 | { |
| 52 | return css ? container_of(css, struct freezer, css) : NULL; | 16 | int desc = 1; |
| 53 | } | ||
| 54 | 17 | ||
| 55 | static inline struct freezer *task_freezer(struct task_struct *task) | 18 | /* |
| 56 | { | 19 | * If the new state is frozen, some freezing ancestor cgroups may change |
| 57 | return css_freezer(task_css(task, freezer_cgrp_id)); | 20 | * their state too, depending on if all their descendants are frozen. |
| 21 | * | ||
| 22 | * Otherwise, all ancestor cgroups are forced into the non-frozen state. | ||
| 23 | */ | ||
| 24 | while ((cgrp = cgroup_parent(cgrp))) { | ||
| 25 | if (frozen) { | ||
| 26 | cgrp->freezer.nr_frozen_descendants += desc; | ||
| 27 | if (!test_bit(CGRP_FROZEN, &cgrp->flags) && | ||
| 28 | test_bit(CGRP_FREEZE, &cgrp->flags) && | ||
| 29 | cgrp->freezer.nr_frozen_descendants == | ||
| 30 | cgrp->nr_descendants) { | ||
| 31 | set_bit(CGRP_FROZEN, &cgrp->flags); | ||
| 32 | cgroup_file_notify(&cgrp->events_file); | ||
| 33 | TRACE_CGROUP_PATH(notify_frozen, cgrp, 1); | ||
| 34 | desc++; | ||
| 35 | } | ||
| 36 | } else { | ||
| 37 | cgrp->freezer.nr_frozen_descendants -= desc; | ||
| 38 | if (test_bit(CGRP_FROZEN, &cgrp->flags)) { | ||
| 39 | clear_bit(CGRP_FROZEN, &cgrp->flags); | ||
| 40 | cgroup_file_notify(&cgrp->events_file); | ||
| 41 | TRACE_CGROUP_PATH(notify_frozen, cgrp, 0); | ||
| 42 | desc++; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | } | ||
| 58 | } | 46 | } |
| 59 | 47 | ||
| 60 | static struct freezer *parent_freezer(struct freezer *freezer) | 48 | /* |
| 49 | * Revisit the cgroup frozen state. | ||
| 50 | * Checks if the cgroup is really frozen and perform all state transitions. | ||
| 51 | */ | ||
| 52 | void cgroup_update_frozen(struct cgroup *cgrp) | ||
| 61 | { | 53 | { |
| 62 | return css_freezer(freezer->css.parent); | 54 | bool frozen; |
| 63 | } | ||
| 64 | 55 | ||
| 65 | bool cgroup_freezing(struct task_struct *task) | 56 | lockdep_assert_held(&css_set_lock); |
| 66 | { | ||
| 67 | bool ret; | ||
| 68 | 57 | ||
| 69 | rcu_read_lock(); | 58 | /* |
| 70 | ret = task_freezer(task)->state & CGROUP_FREEZING; | 59 | * If the cgroup has to be frozen (CGRP_FREEZE bit set), |
| 71 | rcu_read_unlock(); | 60 | * and all tasks are frozen and/or stopped, let's consider |
| 61 | * the cgroup frozen. Otherwise it's not frozen. | ||
| 62 | */ | ||
| 63 | frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && | ||
| 64 | cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); | ||
| 72 | 65 | ||
| 73 | return ret; | 66 | if (frozen) { |
| 74 | } | 67 | /* Already there? */ |
| 68 | if (test_bit(CGRP_FROZEN, &cgrp->flags)) | ||
| 69 | return; | ||
| 75 | 70 | ||
| 76 | static const char *freezer_state_strs(unsigned int state) | 71 | set_bit(CGRP_FROZEN, &cgrp->flags); |
| 77 | { | 72 | } else { |
| 78 | if (state & CGROUP_FROZEN) | 73 | /* Already there? */ |
| 79 | return "FROZEN"; | 74 | if (!test_bit(CGRP_FROZEN, &cgrp->flags)) |
| 80 | if (state & CGROUP_FREEZING) | 75 | return; |
| 81 | return "FREEZING"; | ||
| 82 | return "THAWED"; | ||
| 83 | }; | ||
| 84 | |||
| 85 | static struct cgroup_subsys_state * | ||
| 86 | freezer_css_alloc(struct cgroup_subsys_state *parent_css) | ||
| 87 | { | ||
| 88 | struct freezer *freezer; | ||
| 89 | 76 | ||
| 90 | freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL); | 77 | clear_bit(CGRP_FROZEN, &cgrp->flags); |
| 91 | if (!freezer) | 78 | } |
| 92 | return ERR_PTR(-ENOMEM); | 79 | cgroup_file_notify(&cgrp->events_file); |
| 80 | TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen); | ||
| 93 | 81 | ||
| 94 | return &freezer->css; | 82 | /* Update the state of ancestor cgroups. */ |
| 83 | cgroup_propagate_frozen(cgrp, frozen); | ||
| 95 | } | 84 | } |
| 96 | 85 | ||
| 97 | /** | 86 | /* |
| 98 | * freezer_css_online - commit creation of a freezer css | 87 | * Increment cgroup's nr_frozen_tasks. |
| 99 | * @css: css being created | ||
| 100 | * | ||
| 101 | * We're committing to creation of @css. Mark it online and inherit | ||
| 102 | * parent's freezing state while holding both parent's and our | ||
| 103 | * freezer->lock. | ||
| 104 | */ | 88 | */ |
| 105 | static int freezer_css_online(struct cgroup_subsys_state *css) | 89 | static void cgroup_inc_frozen_cnt(struct cgroup *cgrp) |
| 106 | { | 90 | { |
| 107 | struct freezer *freezer = css_freezer(css); | 91 | cgrp->freezer.nr_frozen_tasks++; |
| 108 | struct freezer *parent = parent_freezer(freezer); | ||
| 109 | |||
| 110 | mutex_lock(&freezer_mutex); | ||
| 111 | |||
| 112 | freezer->state |= CGROUP_FREEZER_ONLINE; | ||
| 113 | |||
| 114 | if (parent && (parent->state & CGROUP_FREEZING)) { | ||
| 115 | freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; | ||
| 116 | atomic_inc(&system_freezing_cnt); | ||
| 117 | } | ||
| 118 | |||
| 119 | mutex_unlock(&freezer_mutex); | ||
| 120 | return 0; | ||
| 121 | } | 92 | } |
| 122 | 93 | ||
| 123 | /** | 94 | /* |
| 124 | * freezer_css_offline - initiate destruction of a freezer css | 95 | * Decrement cgroup's nr_frozen_tasks. |
| 125 | * @css: css being destroyed | ||
| 126 | * | ||
| 127 | * @css is going away. Mark it dead and decrement system_freezing_count if | ||
| 128 | * it was holding one. | ||
| 129 | */ | 96 | */ |
| 130 | static void freezer_css_offline(struct cgroup_subsys_state *css) | 97 | static void cgroup_dec_frozen_cnt(struct cgroup *cgrp) |
| 131 | { | 98 | { |
| 132 | struct freezer *freezer = css_freezer(css); | 99 | cgrp->freezer.nr_frozen_tasks--; |
| 133 | 100 | WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); | |
| 134 | mutex_lock(&freezer_mutex); | ||
| 135 | |||
| 136 | if (freezer->state & CGROUP_FREEZING) | ||
| 137 | atomic_dec(&system_freezing_cnt); | ||
| 138 | |||
| 139 | freezer->state = 0; | ||
| 140 | |||
| 141 | mutex_unlock(&freezer_mutex); | ||
| 142 | } | 101 | } |
| 143 | 102 | ||
| 144 | static void freezer_css_free(struct cgroup_subsys_state *css) | 103 | /* |
| 104 | * Enter frozen/stopped state, if not yet there. Update cgroup's counters, | ||
| 105 | * and revisit the state of the cgroup, if necessary. | ||
| 106 | */ | ||
| 107 | void cgroup_enter_frozen(void) | ||
| 145 | { | 108 | { |
| 146 | kfree(css_freezer(css)); | 109 | struct cgroup *cgrp; |
| 110 | |||
| 111 | if (current->frozen) | ||
| 112 | return; | ||
| 113 | |||
| 114 | spin_lock_irq(&css_set_lock); | ||
| 115 | current->frozen = true; | ||
| 116 | cgrp = task_dfl_cgroup(current); | ||
| 117 | cgroup_inc_frozen_cnt(cgrp); | ||
| 118 | cgroup_update_frozen(cgrp); | ||
| 119 | spin_unlock_irq(&css_set_lock); | ||
| 147 | } | 120 | } |
| 148 | 121 | ||
| 149 | /* | 122 | /* |
| 150 | * Tasks can be migrated into a different freezer anytime regardless of its | 123 | * Conditionally leave frozen/stopped state. Update cgroup's counters, |
| 151 | * current state. freezer_attach() is responsible for making new tasks | 124 | * and revisit the state of the cgroup, if necessary. |
| 152 | * conform to the current state. | ||
| 153 | * | 125 | * |
| 154 | * Freezer state changes and task migration are synchronized via | 126 | * If always_leave is not set, and the cgroup is freezing, |
| 155 | * @freezer->lock. freezer_attach() makes the new tasks conform to the | 127 | * we're racing with the cgroup freezing. In this case, we don't |
| 156 | * current state and all following state changes can see the new tasks. | 128 | * drop the frozen counter to avoid a transient switch to |
| 129 | * the unfrozen state. | ||
| 157 | */ | 130 | */ |
| 158 | static void freezer_attach(struct cgroup_taskset *tset) | 131 | void cgroup_leave_frozen(bool always_leave) |
| 159 | { | 132 | { |
| 160 | struct task_struct *task; | 133 | struct cgroup *cgrp; |
| 161 | struct cgroup_subsys_state *new_css; | 134 | |
| 162 | 135 | spin_lock_irq(&css_set_lock); | |
| 163 | mutex_lock(&freezer_mutex); | 136 | cgrp = task_dfl_cgroup(current); |
| 164 | 137 | if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { | |
| 165 | /* | 138 | cgroup_dec_frozen_cnt(cgrp); |
| 166 | * Make the new tasks conform to the current state of @new_css. | 139 | cgroup_update_frozen(cgrp); |
| 167 | * For simplicity, when migrating any task to a FROZEN cgroup, we | 140 | WARN_ON_ONCE(!current->frozen); |
| 168 | * revert it to FREEZING and let update_if_frozen() determine the | 141 | current->frozen = false; |
| 169 | * correct state later. | 142 | } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) { |
| 170 | * | 143 | spin_lock(¤t->sighand->siglock); |
| 171 | * Tasks in @tset are on @new_css but may not conform to its | 144 | current->jobctl |= JOBCTL_TRAP_FREEZE; |
| 172 | * current state before executing the following - !frozen tasks may | 145 | set_thread_flag(TIF_SIGPENDING); |
| 173 | * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. | 146 | spin_unlock(¤t->sighand->siglock); |
| 174 | */ | ||
| 175 | cgroup_taskset_for_each(task, new_css, tset) { | ||
| 176 | struct freezer *freezer = css_freezer(new_css); | ||
| 177 | |||
| 178 | if (!(freezer->state & CGROUP_FREEZING)) { | ||
| 179 | __thaw_task(task); | ||
| 180 | } else { | ||
| 181 | freeze_task(task); | ||
| 182 | /* clear FROZEN and propagate upwards */ | ||
| 183 | while (freezer && (freezer->state & CGROUP_FROZEN)) { | ||
| 184 | freezer->state &= ~CGROUP_FROZEN; | ||
| 185 | freezer = parent_freezer(freezer); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | } | 147 | } |
| 189 | 148 | spin_unlock_irq(&css_set_lock); | |
| 190 | mutex_unlock(&freezer_mutex); | ||
| 191 | } | 149 | } |
| 192 | 150 | ||
| 193 | /** | 151 | /* |
| 194 | * freezer_fork - cgroup post fork callback | 152 | * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE |
| 195 | * @task: a task which has just been forked | 153 | * jobctl bit. |
| 196 | * | ||
| 197 | * @task has just been created and should conform to the current state of | ||
| 198 | * the cgroup_freezer it belongs to. This function may race against | ||
| 199 | * freezer_attach(). Losing to freezer_attach() means that we don't have | ||
| 200 | * to do anything as freezer_attach() will put @task into the appropriate | ||
| 201 | * state. | ||
| 202 | */ | 154 | */ |
| 203 | static void freezer_fork(struct task_struct *task) | 155 | static void cgroup_freeze_task(struct task_struct *task, bool freeze) |
| 204 | { | 156 | { |
| 205 | struct freezer *freezer; | 157 | unsigned long flags; |
| 206 | 158 | ||
| 207 | /* | 159 | /* If the task is about to die, don't bother with freezing it. */ |
| 208 | * The root cgroup is non-freezable, so we can skip locking the | 160 | if (!lock_task_sighand(task, &flags)) |
| 209 | * freezer. This is safe regardless of race with task migration. | ||
| 210 | * If we didn't race or won, skipping is obviously the right thing | ||
| 211 | * to do. If we lost and root is the new cgroup, noop is still the | ||
| 212 | * right thing to do. | ||
| 213 | */ | ||
| 214 | if (task_css_is_root(task, freezer_cgrp_id)) | ||
| 215 | return; | 161 | return; |
| 216 | 162 | ||
| 217 | mutex_lock(&freezer_mutex); | 163 | if (freeze) { |
| 218 | rcu_read_lock(); | 164 | task->jobctl |= JOBCTL_TRAP_FREEZE; |
| 219 | 165 | signal_wake_up(task, false); | |
| 220 | freezer = task_freezer(task); | 166 | } else { |
| 221 | if (freezer->state & CGROUP_FREEZING) | 167 | task->jobctl &= ~JOBCTL_TRAP_FREEZE; |
| 222 | freeze_task(task); | 168 | wake_up_process(task); |
| 169 | } | ||
| 223 | 170 | ||
| 224 | rcu_read_unlock(); | 171 | unlock_task_sighand(task, &flags); |
| 225 | mutex_unlock(&freezer_mutex); | ||
| 226 | } | 172 | } |
| 227 | 173 | ||
| 228 | /** | 174 | /* |
| 229 | * update_if_frozen - update whether a cgroup finished freezing | 175 | * Freeze or unfreeze all tasks in the given cgroup. |
| 230 | * @css: css of interest | ||
| 231 | * | ||
| 232 | * Once FREEZING is initiated, transition to FROZEN is lazily updated by | ||
| 233 | * calling this function. If the current state is FREEZING but not FROZEN, | ||
| 234 | * this function checks whether all tasks of this cgroup and the descendant | ||
| 235 | * cgroups finished freezing and, if so, sets FROZEN. | ||
| 236 | * | ||
| 237 | * The caller is responsible for grabbing RCU read lock and calling | ||
| 238 | * update_if_frozen() on all descendants prior to invoking this function. | ||
| 239 | * | ||
| 240 | * Task states and freezer state might disagree while tasks are being | ||
| 241 | * migrated into or out of @css, so we can't verify task states against | ||
| 242 | * @freezer state here. See freezer_attach() for details. | ||
| 243 | */ | 176 | */ |
| 244 | static void update_if_frozen(struct cgroup_subsys_state *css) | 177 | static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) |
| 245 | { | 178 | { |
| 246 | struct freezer *freezer = css_freezer(css); | ||
| 247 | struct cgroup_subsys_state *pos; | ||
| 248 | struct css_task_iter it; | 179 | struct css_task_iter it; |
| 249 | struct task_struct *task; | 180 | struct task_struct *task; |
| 250 | 181 | ||
| 251 | lockdep_assert_held(&freezer_mutex); | 182 | lockdep_assert_held(&cgroup_mutex); |
| 252 | |||
| 253 | if (!(freezer->state & CGROUP_FREEZING) || | ||
| 254 | (freezer->state & CGROUP_FROZEN)) | ||
| 255 | return; | ||
| 256 | 183 | ||
| 257 | /* are all (live) children frozen? */ | 184 | spin_lock_irq(&css_set_lock); |
| 258 | rcu_read_lock(); | 185 | if (freeze) |
| 259 | css_for_each_child(pos, css) { | 186 | set_bit(CGRP_FREEZE, &cgrp->flags); |
| 260 | struct freezer *child = css_freezer(pos); | 187 | else |
| 261 | 188 | clear_bit(CGRP_FREEZE, &cgrp->flags); | |
| 262 | if ((child->state & CGROUP_FREEZER_ONLINE) && | 189 | spin_unlock_irq(&css_set_lock); |
| 263 | !(child->state & CGROUP_FROZEN)) { | ||
| 264 | rcu_read_unlock(); | ||
| 265 | return; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | rcu_read_unlock(); | ||
| 269 | 190 | ||
| 270 | /* are all tasks frozen? */ | 191 | if (freeze) |
| 271 | css_task_iter_start(css, 0, &it); | 192 | TRACE_CGROUP_PATH(freeze, cgrp); |
| 193 | else | ||
| 194 | TRACE_CGROUP_PATH(unfreeze, cgrp); | ||
| 272 | 195 | ||
| 196 | css_task_iter_start(&cgrp->self, 0, &it); | ||
| 273 | while ((task = css_task_iter_next(&it))) { | 197 | while ((task = css_task_iter_next(&it))) { |
| 274 | if (freezing(task)) { | 198 | /* |
| 275 | /* | 199 | * Ignore kernel threads here. Freezing cgroups containing |
| 276 | * freezer_should_skip() indicates that the task | 200 | * kthreads isn't supported. |
| 277 | * should be skipped when determining freezing | 201 | */ |
| 278 | * completion. Consider it frozen in addition to | 202 | if (task->flags & PF_KTHREAD) |
| 279 | * the usual frozen condition. | ||
| 280 | */ | ||
| 281 | if (!frozen(task) && !freezer_should_skip(task)) | ||
| 282 | goto out_iter_end; | ||
| 283 | } | ||
| 284 | } | ||
| 285 | |||
| 286 | freezer->state |= CGROUP_FROZEN; | ||
| 287 | out_iter_end: | ||
| 288 | css_task_iter_end(&it); | ||
| 289 | } | ||
| 290 | |||
| 291 | static int freezer_read(struct seq_file *m, void *v) | ||
| 292 | { | ||
| 293 | struct cgroup_subsys_state *css = seq_css(m), *pos; | ||
| 294 | |||
| 295 | mutex_lock(&freezer_mutex); | ||
| 296 | rcu_read_lock(); | ||
| 297 | |||
| 298 | /* update states bottom-up */ | ||
| 299 | css_for_each_descendant_post(pos, css) { | ||
| 300 | if (!css_tryget_online(pos)) | ||
| 301 | continue; | 203 | continue; |
| 302 | rcu_read_unlock(); | 204 | cgroup_freeze_task(task, freeze); |
| 303 | |||
| 304 | update_if_frozen(pos); | ||
| 305 | |||
| 306 | rcu_read_lock(); | ||
| 307 | css_put(pos); | ||
| 308 | } | 205 | } |
| 309 | |||
| 310 | rcu_read_unlock(); | ||
| 311 | mutex_unlock(&freezer_mutex); | ||
| 312 | |||
| 313 | seq_puts(m, freezer_state_strs(css_freezer(css)->state)); | ||
| 314 | seq_putc(m, '\n'); | ||
| 315 | return 0; | ||
| 316 | } | ||
| 317 | |||
| 318 | static void freeze_cgroup(struct freezer *freezer) | ||
| 319 | { | ||
| 320 | struct css_task_iter it; | ||
| 321 | struct task_struct *task; | ||
| 322 | |||
| 323 | css_task_iter_start(&freezer->css, 0, &it); | ||
| 324 | while ((task = css_task_iter_next(&it))) | ||
| 325 | freeze_task(task); | ||
| 326 | css_task_iter_end(&it); | 206 | css_task_iter_end(&it); |
| 327 | } | ||
| 328 | 207 | ||
| 329 | static void unfreeze_cgroup(struct freezer *freezer) | 208 | /* |
| 330 | { | 209 | * Cgroup state should be revisited here to cover empty leaf cgroups |
| 331 | struct css_task_iter it; | 210 | * and cgroups which descendants are already in the desired state. |
| 332 | struct task_struct *task; | 211 | */ |
| 333 | 212 | spin_lock_irq(&css_set_lock); | |
| 334 | css_task_iter_start(&freezer->css, 0, &it); | 213 | if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) |
| 335 | while ((task = css_task_iter_next(&it))) | 214 | cgroup_update_frozen(cgrp); |
| 336 | __thaw_task(task); | 215 | spin_unlock_irq(&css_set_lock); |
| 337 | css_task_iter_end(&it); | ||
| 338 | } | 216 | } |
| 339 | 217 | ||
| 340 | /** | 218 | /* |
| 341 | * freezer_apply_state - apply state change to a single cgroup_freezer | 219 | * Adjust the task state (freeze or unfreeze) and revisit the state of |
| 342 | * @freezer: freezer to apply state change to | 220 | * source and destination cgroups. |
| 343 | * @freeze: whether to freeze or unfreeze | ||
| 344 | * @state: CGROUP_FREEZING_* flag to set or clear | ||
| 345 | * | ||
| 346 | * Set or clear @state on @cgroup according to @freeze, and perform | ||
| 347 | * freezing or thawing as necessary. | ||
| 348 | */ | 221 | */ |
| 349 | static void freezer_apply_state(struct freezer *freezer, bool freeze, | 222 | void cgroup_freezer_migrate_task(struct task_struct *task, |
| 350 | unsigned int state) | 223 | struct cgroup *src, struct cgroup *dst) |
| 351 | { | 224 | { |
| 352 | /* also synchronizes against task migration, see freezer_attach() */ | 225 | lockdep_assert_held(&css_set_lock); |
| 353 | lockdep_assert_held(&freezer_mutex); | ||
| 354 | 226 | ||
| 355 | if (!(freezer->state & CGROUP_FREEZER_ONLINE)) | 227 | /* |
| 228 | * Kernel threads are not supposed to be frozen at all. | ||
| 229 | */ | ||
| 230 | if (task->flags & PF_KTHREAD) | ||
| 356 | return; | 231 | return; |
| 357 | 232 | ||
| 358 | if (freeze) { | 233 | /* |
| 359 | if (!(freezer->state & CGROUP_FREEZING)) | 234 | * Adjust counters of freezing and frozen tasks. |
| 360 | atomic_inc(&system_freezing_cnt); | 235 | * Note, that if the task is frozen, but the destination cgroup is not |
| 361 | freezer->state |= state; | 236 | * frozen, we bump both counters to keep them balanced. |
| 362 | freeze_cgroup(freezer); | 237 | */ |
| 363 | } else { | 238 | if (task->frozen) { |
| 364 | bool was_freezing = freezer->state & CGROUP_FREEZING; | 239 | cgroup_inc_frozen_cnt(dst); |
| 365 | 240 | cgroup_dec_frozen_cnt(src); | |
| 366 | freezer->state &= ~state; | ||
| 367 | |||
| 368 | if (!(freezer->state & CGROUP_FREEZING)) { | ||
| 369 | if (was_freezing) | ||
| 370 | atomic_dec(&system_freezing_cnt); | ||
| 371 | freezer->state &= ~CGROUP_FROZEN; | ||
| 372 | unfreeze_cgroup(freezer); | ||
| 373 | } | ||
| 374 | } | 241 | } |
| 375 | } | 242 | cgroup_update_frozen(dst); |
| 376 | 243 | cgroup_update_frozen(src); | |
| 377 | /** | ||
| 378 | * freezer_change_state - change the freezing state of a cgroup_freezer | ||
| 379 | * @freezer: freezer of interest | ||
| 380 | * @freeze: whether to freeze or thaw | ||
| 381 | * | ||
| 382 | * Freeze or thaw @freezer according to @freeze. The operations are | ||
| 383 | * recursive - all descendants of @freezer will be affected. | ||
| 384 | */ | ||
| 385 | static void freezer_change_state(struct freezer *freezer, bool freeze) | ||
| 386 | { | ||
| 387 | struct cgroup_subsys_state *pos; | ||
| 388 | 244 | ||
| 389 | /* | 245 | /* |
| 390 | * Update all its descendants in pre-order traversal. Each | 246 | * Force the task to the desired state. |
| 391 | * descendant will try to inherit its parent's FREEZING state as | ||
| 392 | * CGROUP_FREEZING_PARENT. | ||
| 393 | */ | 247 | */ |
| 394 | mutex_lock(&freezer_mutex); | 248 | cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); |
| 395 | rcu_read_lock(); | ||
| 396 | css_for_each_descendant_pre(pos, &freezer->css) { | ||
| 397 | struct freezer *pos_f = css_freezer(pos); | ||
| 398 | struct freezer *parent = parent_freezer(pos_f); | ||
| 399 | |||
| 400 | if (!css_tryget_online(pos)) | ||
| 401 | continue; | ||
| 402 | rcu_read_unlock(); | ||
| 403 | |||
| 404 | if (pos_f == freezer) | ||
| 405 | freezer_apply_state(pos_f, freeze, | ||
| 406 | CGROUP_FREEZING_SELF); | ||
| 407 | else | ||
| 408 | freezer_apply_state(pos_f, | ||
| 409 | parent->state & CGROUP_FREEZING, | ||
| 410 | CGROUP_FREEZING_PARENT); | ||
| 411 | |||
| 412 | rcu_read_lock(); | ||
| 413 | css_put(pos); | ||
| 414 | } | ||
| 415 | rcu_read_unlock(); | ||
| 416 | mutex_unlock(&freezer_mutex); | ||
| 417 | } | 249 | } |
| 418 | 250 | ||
| 419 | static ssize_t freezer_write(struct kernfs_open_file *of, | 251 | void cgroup_freeze(struct cgroup *cgrp, bool freeze) |
| 420 | char *buf, size_t nbytes, loff_t off) | ||
| 421 | { | 252 | { |
| 422 | bool freeze; | 253 | struct cgroup_subsys_state *css; |
| 254 | struct cgroup *dsct; | ||
| 255 | bool applied = false; | ||
| 423 | 256 | ||
| 424 | buf = strstrip(buf); | 257 | lockdep_assert_held(&cgroup_mutex); |
| 425 | 258 | ||
| 426 | if (strcmp(buf, freezer_state_strs(0)) == 0) | 259 | /* |
| 427 | freeze = false; | 260 | * Nothing changed? Just exit. |
| 428 | else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0) | 261 | */ |
| 429 | freeze = true; | 262 | if (cgrp->freezer.freeze == freeze) |
| 430 | else | 263 | return; |
| 431 | return -EINVAL; | ||
| 432 | 264 | ||
| 433 | freezer_change_state(css_freezer(of_css(of)), freeze); | 265 | cgrp->freezer.freeze = freeze; |
| 434 | return nbytes; | ||
| 435 | } | ||
| 436 | 266 | ||
| 437 | static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, | 267 | /* |
| 438 | struct cftype *cft) | 268 | * Propagate changes downwards the cgroup tree. |
| 439 | { | 269 | */ |
| 440 | struct freezer *freezer = css_freezer(css); | 270 | css_for_each_descendant_pre(css, &cgrp->self) { |
| 271 | dsct = css->cgroup; | ||
| 441 | 272 | ||
| 442 | return (bool)(freezer->state & CGROUP_FREEZING_SELF); | 273 | if (cgroup_is_dead(dsct)) |
| 443 | } | 274 | continue; |
| 444 | 275 | ||
| 445 | static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css, | 276 | if (freeze) { |
| 446 | struct cftype *cft) | 277 | dsct->freezer.e_freeze++; |
| 447 | { | 278 | /* |
| 448 | struct freezer *freezer = css_freezer(css); | 279 | * Already frozen because of ancestor's settings? |
| 280 | */ | ||
| 281 | if (dsct->freezer.e_freeze > 1) | ||
| 282 | continue; | ||
| 283 | } else { | ||
| 284 | dsct->freezer.e_freeze--; | ||
| 285 | /* | ||
| 286 | * Still frozen because of ancestor's settings? | ||
| 287 | */ | ||
| 288 | if (dsct->freezer.e_freeze > 0) | ||
| 289 | continue; | ||
| 449 | 290 | ||
| 450 | return (bool)(freezer->state & CGROUP_FREEZING_PARENT); | 291 | WARN_ON_ONCE(dsct->freezer.e_freeze < 0); |
| 451 | } | 292 | } |
| 293 | |||
| 294 | /* | ||
| 295 | * Do change actual state: freeze or unfreeze. | ||
| 296 | */ | ||
| 297 | cgroup_do_freeze(dsct, freeze); | ||
| 298 | applied = true; | ||
| 299 | } | ||
| 452 | 300 | ||
| 453 | static struct cftype files[] = { | 301 | /* |
| 454 | { | 302 | * Even if the actual state hasn't changed, let's notify a user. |
| 455 | .name = "state", | 303 | * The state can be enforced by an ancestor cgroup: the cgroup |
| 456 | .flags = CFTYPE_NOT_ON_ROOT, | 304 | * can already be in the desired state or it can be locked in the |
| 457 | .seq_show = freezer_read, | 305 | * opposite state, so that the transition will never happen. |
| 458 | .write = freezer_write, | 306 | * In both cases it's better to notify a user, that there is |
| 459 | }, | 307 | * nothing to wait for. |
| 460 | { | 308 | */ |
| 461 | .name = "self_freezing", | 309 | if (!applied) { |
| 462 | .flags = CFTYPE_NOT_ON_ROOT, | 310 | TRACE_CGROUP_PATH(notify_frozen, cgrp, |
| 463 | .read_u64 = freezer_self_freezing_read, | 311 | test_bit(CGRP_FROZEN, &cgrp->flags)); |
| 464 | }, | 312 | cgroup_file_notify(&cgrp->events_file); |
| 465 | { | 313 | } |
| 466 | .name = "parent_freezing", | 314 | } |
| 467 | .flags = CFTYPE_NOT_ON_ROOT, | ||
| 468 | .read_u64 = freezer_parent_freezing_read, | ||
| 469 | }, | ||
| 470 | { } /* terminate */ | ||
| 471 | }; | ||
| 472 | |||
| 473 | struct cgroup_subsys freezer_cgrp_subsys = { | ||
| 474 | .css_alloc = freezer_css_alloc, | ||
| 475 | .css_online = freezer_css_online, | ||
| 476 | .css_offline = freezer_css_offline, | ||
| 477 | .css_free = freezer_css_free, | ||
| 478 | .attach = freezer_attach, | ||
| 479 | .fork = freezer_fork, | ||
| 480 | .legacy_cftypes = files, | ||
| 481 | }; | ||
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c new file mode 100644 index 000000000000..08236798d173 --- /dev/null +++ b/kernel/cgroup/legacy_freezer.c | |||
| @@ -0,0 +1,481 @@ | |||
| 1 | /* | ||
| 2 | * cgroup_freezer.c - control group freezer subsystem | ||
| 3 | * | ||
| 4 | * Copyright IBM Corporation, 2007 | ||
| 5 | * | ||
| 6 | * Author : Cedric Le Goater <clg@fr.ibm.com> | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify it | ||
| 9 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
| 10 | * as published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it would be useful, but | ||
| 13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/export.h> | ||
| 18 | #include <linux/slab.h> | ||
| 19 | #include <linux/cgroup.h> | ||
| 20 | #include <linux/fs.h> | ||
| 21 | #include <linux/uaccess.h> | ||
| 22 | #include <linux/freezer.h> | ||
| 23 | #include <linux/seq_file.h> | ||
| 24 | #include <linux/mutex.h> | ||
| 25 | |||
| 26 | /* | ||
| 27 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is | ||
| 28 | * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared | ||
| 29 | * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING | ||
| 30 | * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of | ||
| 31 | * its ancestors has FREEZING_SELF set. | ||
| 32 | */ | ||
| 33 | enum freezer_state_flags { | ||
| 34 | CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ | ||
| 35 | CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ | ||
| 36 | CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */ | ||
| 37 | CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */ | ||
| 38 | |||
| 39 | /* mask for all FREEZING flags */ | ||
| 40 | CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT, | ||
| 41 | }; | ||
| 42 | |||
| 43 | struct freezer { | ||
| 44 | struct cgroup_subsys_state css; | ||
| 45 | unsigned int state; | ||
| 46 | }; | ||
| 47 | |||
| 48 | static DEFINE_MUTEX(freezer_mutex); | ||
| 49 | |||
| 50 | static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) | ||
| 51 | { | ||
| 52 | return css ? container_of(css, struct freezer, css) : NULL; | ||
| 53 | } | ||
| 54 | |||
| 55 | static inline struct freezer *task_freezer(struct task_struct *task) | ||
| 56 | { | ||
| 57 | return css_freezer(task_css(task, freezer_cgrp_id)); | ||
| 58 | } | ||
| 59 | |||
| 60 | static struct freezer *parent_freezer(struct freezer *freezer) | ||
| 61 | { | ||
| 62 | return css_freezer(freezer->css.parent); | ||
| 63 | } | ||
| 64 | |||
| 65 | bool cgroup_freezing(struct task_struct *task) | ||
| 66 | { | ||
| 67 | bool ret; | ||
| 68 | |||
| 69 | rcu_read_lock(); | ||
| 70 | ret = task_freezer(task)->state & CGROUP_FREEZING; | ||
| 71 | rcu_read_unlock(); | ||
| 72 | |||
| 73 | return ret; | ||
| 74 | } | ||
| 75 | |||
| 76 | static const char *freezer_state_strs(unsigned int state) | ||
| 77 | { | ||
| 78 | if (state & CGROUP_FROZEN) | ||
| 79 | return "FROZEN"; | ||
| 80 | if (state & CGROUP_FREEZING) | ||
| 81 | return "FREEZING"; | ||
| 82 | return "THAWED"; | ||
| 83 | }; | ||
| 84 | |||
| 85 | static struct cgroup_subsys_state * | ||
| 86 | freezer_css_alloc(struct cgroup_subsys_state *parent_css) | ||
| 87 | { | ||
| 88 | struct freezer *freezer; | ||
| 89 | |||
| 90 | freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL); | ||
| 91 | if (!freezer) | ||
| 92 | return ERR_PTR(-ENOMEM); | ||
| 93 | |||
| 94 | return &freezer->css; | ||
| 95 | } | ||
| 96 | |||
| 97 | /** | ||
| 98 | * freezer_css_online - commit creation of a freezer css | ||
| 99 | * @css: css being created | ||
| 100 | * | ||
| 101 | * We're committing to creation of @css. Mark it online and inherit | ||
| 102 | * parent's freezing state while holding both parent's and our | ||
| 103 | * freezer->lock. | ||
| 104 | */ | ||
| 105 | static int freezer_css_online(struct cgroup_subsys_state *css) | ||
| 106 | { | ||
| 107 | struct freezer *freezer = css_freezer(css); | ||
| 108 | struct freezer *parent = parent_freezer(freezer); | ||
| 109 | |||
| 110 | mutex_lock(&freezer_mutex); | ||
| 111 | |||
| 112 | freezer->state |= CGROUP_FREEZER_ONLINE; | ||
| 113 | |||
| 114 | if (parent && (parent->state & CGROUP_FREEZING)) { | ||
| 115 | freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; | ||
| 116 | atomic_inc(&system_freezing_cnt); | ||
| 117 | } | ||
| 118 | |||
| 119 | mutex_unlock(&freezer_mutex); | ||
| 120 | return 0; | ||
| 121 | } | ||
| 122 | |||
| 123 | /** | ||
| 124 | * freezer_css_offline - initiate destruction of a freezer css | ||
| 125 | * @css: css being destroyed | ||
| 126 | * | ||
| 127 | * @css is going away. Mark it dead and decrement system_freezing_count if | ||
| 128 | * it was holding one. | ||
| 129 | */ | ||
| 130 | static void freezer_css_offline(struct cgroup_subsys_state *css) | ||
| 131 | { | ||
| 132 | struct freezer *freezer = css_freezer(css); | ||
| 133 | |||
| 134 | mutex_lock(&freezer_mutex); | ||
| 135 | |||
| 136 | if (freezer->state & CGROUP_FREEZING) | ||
| 137 | atomic_dec(&system_freezing_cnt); | ||
| 138 | |||
| 139 | freezer->state = 0; | ||
| 140 | |||
| 141 | mutex_unlock(&freezer_mutex); | ||
| 142 | } | ||
| 143 | |||
| 144 | static void freezer_css_free(struct cgroup_subsys_state *css) | ||
| 145 | { | ||
| 146 | kfree(css_freezer(css)); | ||
| 147 | } | ||
| 148 | |||
| 149 | /* | ||
| 150 | * Tasks can be migrated into a different freezer anytime regardless of its | ||
| 151 | * current state. freezer_attach() is responsible for making new tasks | ||
| 152 | * conform to the current state. | ||
| 153 | * | ||
| 154 | * Freezer state changes and task migration are synchronized via | ||
| 155 | * @freezer->lock. freezer_attach() makes the new tasks conform to the | ||
| 156 | * current state and all following state changes can see the new tasks. | ||
| 157 | */ | ||
| 158 | static void freezer_attach(struct cgroup_taskset *tset) | ||
| 159 | { | ||
| 160 | struct task_struct *task; | ||
| 161 | struct cgroup_subsys_state *new_css; | ||
| 162 | |||
| 163 | mutex_lock(&freezer_mutex); | ||
| 164 | |||
| 165 | /* | ||
| 166 | * Make the new tasks conform to the current state of @new_css. | ||
| 167 | * For simplicity, when migrating any task to a FROZEN cgroup, we | ||
| 168 | * revert it to FREEZING and let update_if_frozen() determine the | ||
| 169 | * correct state later. | ||
| 170 | * | ||
| 171 | * Tasks in @tset are on @new_css but may not conform to its | ||
| 172 | * current state before executing the following - !frozen tasks may | ||
| 173 | * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. | ||
| 174 | */ | ||
| 175 | cgroup_taskset_for_each(task, new_css, tset) { | ||
| 176 | struct freezer *freezer = css_freezer(new_css); | ||
| 177 | |||
| 178 | if (!(freezer->state & CGROUP_FREEZING)) { | ||
| 179 | __thaw_task(task); | ||
| 180 | } else { | ||
| 181 | freeze_task(task); | ||
| 182 | /* clear FROZEN and propagate upwards */ | ||
| 183 | while (freezer && (freezer->state & CGROUP_FROZEN)) { | ||
| 184 | freezer->state &= ~CGROUP_FROZEN; | ||
| 185 | freezer = parent_freezer(freezer); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | } | ||
| 189 | |||
| 190 | mutex_unlock(&freezer_mutex); | ||
| 191 | } | ||
| 192 | |||
| 193 | /** | ||
| 194 | * freezer_fork - cgroup post fork callback | ||
| 195 | * @task: a task which has just been forked | ||
| 196 | * | ||
| 197 | * @task has just been created and should conform to the current state of | ||
| 198 | * the cgroup_freezer it belongs to. This function may race against | ||
| 199 | * freezer_attach(). Losing to freezer_attach() means that we don't have | ||
| 200 | * to do anything as freezer_attach() will put @task into the appropriate | ||
| 201 | * state. | ||
| 202 | */ | ||
| 203 | static void freezer_fork(struct task_struct *task) | ||
| 204 | { | ||
| 205 | struct freezer *freezer; | ||
| 206 | |||
| 207 | /* | ||
| 208 | * The root cgroup is non-freezable, so we can skip locking the | ||
| 209 | * freezer. This is safe regardless of race with task migration. | ||
| 210 | * If we didn't race or won, skipping is obviously the right thing | ||
| 211 | * to do. If we lost and root is the new cgroup, noop is still the | ||
| 212 | * right thing to do. | ||
| 213 | */ | ||
| 214 | if (task_css_is_root(task, freezer_cgrp_id)) | ||
| 215 | return; | ||
| 216 | |||
| 217 | mutex_lock(&freezer_mutex); | ||
| 218 | rcu_read_lock(); | ||
| 219 | |||
| 220 | freezer = task_freezer(task); | ||
| 221 | if (freezer->state & CGROUP_FREEZING) | ||
| 222 | freeze_task(task); | ||
| 223 | |||
| 224 | rcu_read_unlock(); | ||
| 225 | mutex_unlock(&freezer_mutex); | ||
| 226 | } | ||
| 227 | |||
| 228 | /** | ||
| 229 | * update_if_frozen - update whether a cgroup finished freezing | ||
| 230 | * @css: css of interest | ||
| 231 | * | ||
| 232 | * Once FREEZING is initiated, transition to FROZEN is lazily updated by | ||
| 233 | * calling this function. If the current state is FREEZING but not FROZEN, | ||
| 234 | * this function checks whether all tasks of this cgroup and the descendant | ||
| 235 | * cgroups finished freezing and, if so, sets FROZEN. | ||
| 236 | * | ||
| 237 | * The caller is responsible for grabbing RCU read lock and calling | ||
| 238 | * update_if_frozen() on all descendants prior to invoking this function. | ||
| 239 | * | ||
| 240 | * Task states and freezer state might disagree while tasks are being | ||
| 241 | * migrated into or out of @css, so we can't verify task states against | ||
| 242 | * @freezer state here. See freezer_attach() for details. | ||
| 243 | */ | ||
| 244 | static void update_if_frozen(struct cgroup_subsys_state *css) | ||
| 245 | { | ||
| 246 | struct freezer *freezer = css_freezer(css); | ||
| 247 | struct cgroup_subsys_state *pos; | ||
| 248 | struct css_task_iter it; | ||
| 249 | struct task_struct *task; | ||
| 250 | |||
| 251 | lockdep_assert_held(&freezer_mutex); | ||
| 252 | |||
| 253 | if (!(freezer->state & CGROUP_FREEZING) || | ||
| 254 | (freezer->state & CGROUP_FROZEN)) | ||
| 255 | return; | ||
| 256 | |||
| 257 | /* are all (live) children frozen? */ | ||
| 258 | rcu_read_lock(); | ||
| 259 | css_for_each_child(pos, css) { | ||
| 260 | struct freezer *child = css_freezer(pos); | ||
| 261 | |||
| 262 | if ((child->state & CGROUP_FREEZER_ONLINE) && | ||
| 263 | !(child->state & CGROUP_FROZEN)) { | ||
| 264 | rcu_read_unlock(); | ||
| 265 | return; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | rcu_read_unlock(); | ||
| 269 | |||
| 270 | /* are all tasks frozen? */ | ||
| 271 | css_task_iter_start(css, 0, &it); | ||
| 272 | |||
| 273 | while ((task = css_task_iter_next(&it))) { | ||
| 274 | if (freezing(task)) { | ||
| 275 | /* | ||
| 276 | * freezer_should_skip() indicates that the task | ||
| 277 | * should be skipped when determining freezing | ||
| 278 | * completion. Consider it frozen in addition to | ||
| 279 | * the usual frozen condition. | ||
| 280 | */ | ||
| 281 | if (!frozen(task) && !freezer_should_skip(task)) | ||
| 282 | goto out_iter_end; | ||
| 283 | } | ||
| 284 | } | ||
| 285 | |||
| 286 | freezer->state |= CGROUP_FROZEN; | ||
| 287 | out_iter_end: | ||
| 288 | css_task_iter_end(&it); | ||
| 289 | } | ||
| 290 | |||
| 291 | static int freezer_read(struct seq_file *m, void *v) | ||
| 292 | { | ||
| 293 | struct cgroup_subsys_state *css = seq_css(m), *pos; | ||
| 294 | |||
| 295 | mutex_lock(&freezer_mutex); | ||
| 296 | rcu_read_lock(); | ||
| 297 | |||
| 298 | /* update states bottom-up */ | ||
| 299 | css_for_each_descendant_post(pos, css) { | ||
| 300 | if (!css_tryget_online(pos)) | ||
| 301 | continue; | ||
| 302 | rcu_read_unlock(); | ||
| 303 | |||
| 304 | update_if_frozen(pos); | ||
| 305 | |||
| 306 | rcu_read_lock(); | ||
| 307 | css_put(pos); | ||
| 308 | } | ||
| 309 | |||
| 310 | rcu_read_unlock(); | ||
| 311 | mutex_unlock(&freezer_mutex); | ||
| 312 | |||
| 313 | seq_puts(m, freezer_state_strs(css_freezer(css)->state)); | ||
| 314 | seq_putc(m, '\n'); | ||
| 315 | return 0; | ||
| 316 | } | ||
| 317 | |||
| 318 | static void freeze_cgroup(struct freezer *freezer) | ||
| 319 | { | ||
| 320 | struct css_task_iter it; | ||
| 321 | struct task_struct *task; | ||
| 322 | |||
| 323 | css_task_iter_start(&freezer->css, 0, &it); | ||
| 324 | while ((task = css_task_iter_next(&it))) | ||
| 325 | freeze_task(task); | ||
| 326 | css_task_iter_end(&it); | ||
| 327 | } | ||
| 328 | |||
| 329 | static void unfreeze_cgroup(struct freezer *freezer) | ||
| 330 | { | ||
| 331 | struct css_task_iter it; | ||
| 332 | struct task_struct *task; | ||
| 333 | |||
| 334 | css_task_iter_start(&freezer->css, 0, &it); | ||
| 335 | while ((task = css_task_iter_next(&it))) | ||
| 336 | __thaw_task(task); | ||
| 337 | css_task_iter_end(&it); | ||
| 338 | } | ||
| 339 | |||
| 340 | /** | ||
| 341 | * freezer_apply_state - apply state change to a single cgroup_freezer | ||
| 342 | * @freezer: freezer to apply state change to | ||
| 343 | * @freeze: whether to freeze or unfreeze | ||
| 344 | * @state: CGROUP_FREEZING_* flag to set or clear | ||
| 345 | * | ||
| 346 | * Set or clear @state on @cgroup according to @freeze, and perform | ||
| 347 | * freezing or thawing as necessary. | ||
| 348 | */ | ||
| 349 | static void freezer_apply_state(struct freezer *freezer, bool freeze, | ||
| 350 | unsigned int state) | ||
| 351 | { | ||
| 352 | /* also synchronizes against task migration, see freezer_attach() */ | ||
| 353 | lockdep_assert_held(&freezer_mutex); | ||
| 354 | |||
| 355 | if (!(freezer->state & CGROUP_FREEZER_ONLINE)) | ||
| 356 | return; | ||
| 357 | |||
| 358 | if (freeze) { | ||
| 359 | if (!(freezer->state & CGROUP_FREEZING)) | ||
| 360 | atomic_inc(&system_freezing_cnt); | ||
| 361 | freezer->state |= state; | ||
| 362 | freeze_cgroup(freezer); | ||
| 363 | } else { | ||
| 364 | bool was_freezing = freezer->state & CGROUP_FREEZING; | ||
| 365 | |||
| 366 | freezer->state &= ~state; | ||
| 367 | |||
| 368 | if (!(freezer->state & CGROUP_FREEZING)) { | ||
| 369 | if (was_freezing) | ||
| 370 | atomic_dec(&system_freezing_cnt); | ||
| 371 | freezer->state &= ~CGROUP_FROZEN; | ||
| 372 | unfreeze_cgroup(freezer); | ||
| 373 | } | ||
| 374 | } | ||
| 375 | } | ||
| 376 | |||
| 377 | /** | ||
| 378 | * freezer_change_state - change the freezing state of a cgroup_freezer | ||
| 379 | * @freezer: freezer of interest | ||
| 380 | * @freeze: whether to freeze or thaw | ||
| 381 | * | ||
| 382 | * Freeze or thaw @freezer according to @freeze. The operations are | ||
| 383 | * recursive - all descendants of @freezer will be affected. | ||
| 384 | */ | ||
| 385 | static void freezer_change_state(struct freezer *freezer, bool freeze) | ||
| 386 | { | ||
| 387 | struct cgroup_subsys_state *pos; | ||
| 388 | |||
| 389 | /* | ||
| 390 | * Update all its descendants in pre-order traversal. Each | ||
| 391 | * descendant will try to inherit its parent's FREEZING state as | ||
| 392 | * CGROUP_FREEZING_PARENT. | ||
| 393 | */ | ||
| 394 | mutex_lock(&freezer_mutex); | ||
| 395 | rcu_read_lock(); | ||
| 396 | css_for_each_descendant_pre(pos, &freezer->css) { | ||
| 397 | struct freezer *pos_f = css_freezer(pos); | ||
| 398 | struct freezer *parent = parent_freezer(pos_f); | ||
| 399 | |||
| 400 | if (!css_tryget_online(pos)) | ||
| 401 | continue; | ||
| 402 | rcu_read_unlock(); | ||
| 403 | |||
| 404 | if (pos_f == freezer) | ||
| 405 | freezer_apply_state(pos_f, freeze, | ||
| 406 | CGROUP_FREEZING_SELF); | ||
| 407 | else | ||
| 408 | freezer_apply_state(pos_f, | ||
| 409 | parent->state & CGROUP_FREEZING, | ||
| 410 | CGROUP_FREEZING_PARENT); | ||
| 411 | |||
| 412 | rcu_read_lock(); | ||
| 413 | css_put(pos); | ||
| 414 | } | ||
| 415 | rcu_read_unlock(); | ||
| 416 | mutex_unlock(&freezer_mutex); | ||
| 417 | } | ||
| 418 | |||
| 419 | static ssize_t freezer_write(struct kernfs_open_file *of, | ||
| 420 | char *buf, size_t nbytes, loff_t off) | ||
| 421 | { | ||
| 422 | bool freeze; | ||
| 423 | |||
| 424 | buf = strstrip(buf); | ||
| 425 | |||
| 426 | if (strcmp(buf, freezer_state_strs(0)) == 0) | ||
| 427 | freeze = false; | ||
| 428 | else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0) | ||
| 429 | freeze = true; | ||
| 430 | else | ||
| 431 | return -EINVAL; | ||
| 432 | |||
| 433 | freezer_change_state(css_freezer(of_css(of)), freeze); | ||
| 434 | return nbytes; | ||
| 435 | } | ||
| 436 | |||
| 437 | static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, | ||
| 438 | struct cftype *cft) | ||
| 439 | { | ||
| 440 | struct freezer *freezer = css_freezer(css); | ||
| 441 | |||
| 442 | return (bool)(freezer->state & CGROUP_FREEZING_SELF); | ||
| 443 | } | ||
| 444 | |||
| 445 | static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css, | ||
| 446 | struct cftype *cft) | ||
| 447 | { | ||
| 448 | struct freezer *freezer = css_freezer(css); | ||
| 449 | |||
| 450 | return (bool)(freezer->state & CGROUP_FREEZING_PARENT); | ||
| 451 | } | ||
| 452 | |||
| 453 | static struct cftype files[] = { | ||
| 454 | { | ||
| 455 | .name = "state", | ||
| 456 | .flags = CFTYPE_NOT_ON_ROOT, | ||
| 457 | .seq_show = freezer_read, | ||
| 458 | .write = freezer_write, | ||
| 459 | }, | ||
| 460 | { | ||
| 461 | .name = "self_freezing", | ||
| 462 | .flags = CFTYPE_NOT_ON_ROOT, | ||
| 463 | .read_u64 = freezer_self_freezing_read, | ||
| 464 | }, | ||
| 465 | { | ||
| 466 | .name = "parent_freezing", | ||
| 467 | .flags = CFTYPE_NOT_ON_ROOT, | ||
| 468 | .read_u64 = freezer_parent_freezing_read, | ||
| 469 | }, | ||
| 470 | { } /* terminate */ | ||
| 471 | }; | ||
| 472 | |||
| 473 | struct cgroup_subsys freezer_cgrp_subsys = { | ||
| 474 | .css_alloc = freezer_css_alloc, | ||
| 475 | .css_online = freezer_css_online, | ||
| 476 | .css_offline = freezer_css_offline, | ||
| 477 | .css_free = freezer_css_free, | ||
| 478 | .attach = freezer_attach, | ||
| 479 | .fork = freezer_fork, | ||
| 480 | .legacy_cftypes = files, | ||
| 481 | }; | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 8b03d93ba068..5359facf9867 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -1225,7 +1225,9 @@ static int wait_for_vfork_done(struct task_struct *child, | |||
| 1225 | int killed; | 1225 | int killed; |
| 1226 | 1226 | ||
| 1227 | freezer_do_not_count(); | 1227 | freezer_do_not_count(); |
| 1228 | cgroup_enter_frozen(); | ||
| 1228 | killed = wait_for_completion_killable(vfork); | 1229 | killed = wait_for_completion_killable(vfork); |
| 1230 | cgroup_leave_frozen(false); | ||
| 1229 | freezer_count(); | 1231 | freezer_count(); |
| 1230 | 1232 | ||
| 1231 | if (killed) { | 1233 | if (killed) { |
diff --git a/kernel/signal.c b/kernel/signal.c index cd83cc376767..62f9aea4a15a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -43,6 +43,7 @@ | |||
| 43 | #include <linux/compiler.h> | 43 | #include <linux/compiler.h> |
| 44 | #include <linux/posix-timers.h> | 44 | #include <linux/posix-timers.h> |
| 45 | #include <linux/livepatch.h> | 45 | #include <linux/livepatch.h> |
| 46 | #include <linux/cgroup.h> | ||
| 46 | 47 | ||
| 47 | #define CREATE_TRACE_POINTS | 48 | #define CREATE_TRACE_POINTS |
| 48 | #include <trace/events/signal.h> | 49 | #include <trace/events/signal.h> |
| @@ -146,9 +147,10 @@ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked) | |||
| 146 | 147 | ||
| 147 | static bool recalc_sigpending_tsk(struct task_struct *t) | 148 | static bool recalc_sigpending_tsk(struct task_struct *t) |
| 148 | { | 149 | { |
| 149 | if ((t->jobctl & JOBCTL_PENDING_MASK) || | 150 | if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) || |
| 150 | PENDING(&t->pending, &t->blocked) || | 151 | PENDING(&t->pending, &t->blocked) || |
| 151 | PENDING(&t->signal->shared_pending, &t->blocked)) { | 152 | PENDING(&t->signal->shared_pending, &t->blocked) || |
| 153 | cgroup_task_frozen(t)) { | ||
| 152 | set_tsk_thread_flag(t, TIF_SIGPENDING); | 154 | set_tsk_thread_flag(t, TIF_SIGPENDING); |
| 153 | return true; | 155 | return true; |
| 154 | } | 156 | } |
| @@ -2108,6 +2110,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t | |||
| 2108 | preempt_disable(); | 2110 | preempt_disable(); |
| 2109 | read_unlock(&tasklist_lock); | 2111 | read_unlock(&tasklist_lock); |
| 2110 | preempt_enable_no_resched(); | 2112 | preempt_enable_no_resched(); |
| 2113 | cgroup_enter_frozen(); | ||
| 2111 | freezable_schedule(); | 2114 | freezable_schedule(); |
| 2112 | } else { | 2115 | } else { |
| 2113 | /* | 2116 | /* |
| @@ -2286,6 +2289,7 @@ static bool do_signal_stop(int signr) | |||
| 2286 | } | 2289 | } |
| 2287 | 2290 | ||
| 2288 | /* Now we don't run again until woken by SIGCONT or SIGKILL */ | 2291 | /* Now we don't run again until woken by SIGCONT or SIGKILL */ |
| 2292 | cgroup_enter_frozen(); | ||
| 2289 | freezable_schedule(); | 2293 | freezable_schedule(); |
| 2290 | return true; | 2294 | return true; |
| 2291 | } else { | 2295 | } else { |
| @@ -2332,6 +2336,43 @@ static void do_jobctl_trap(void) | |||
| 2332 | } | 2336 | } |
| 2333 | } | 2337 | } |
| 2334 | 2338 | ||
| 2339 | /** | ||
| 2340 | * do_freezer_trap - handle the freezer jobctl trap | ||
| 2341 | * | ||
| 2342 | * Puts the task into frozen state, if only the task is not about to quit. | ||
| 2343 | * In this case it drops JOBCTL_TRAP_FREEZE. | ||
| 2344 | * | ||
| 2345 | * CONTEXT: | ||
| 2346 | * Must be called with @current->sighand->siglock held, | ||
| 2347 | * which is always released before returning. | ||
| 2348 | */ | ||
| 2349 | static void do_freezer_trap(void) | ||
| 2350 | __releases(¤t->sighand->siglock) | ||
| 2351 | { | ||
| 2352 | /* | ||
| 2353 | * If there are other trap bits pending except JOBCTL_TRAP_FREEZE, | ||
| 2354 | * let's make another loop to give it a chance to be handled. | ||
| 2355 | * In any case, we'll return back. | ||
| 2356 | */ | ||
| 2357 | if ((current->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) != | ||
| 2358 | JOBCTL_TRAP_FREEZE) { | ||
| 2359 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 2360 | return; | ||
| 2361 | } | ||
| 2362 | |||
| 2363 | /* | ||
| 2364 | * Now we're sure that there is no pending fatal signal and no | ||
| 2365 | * pending traps. Clear TIF_SIGPENDING to not get out of schedule() | ||
| 2366 | * immediately (if there is a non-fatal signal pending), and | ||
| 2367 | * put the task into sleep. | ||
| 2368 | */ | ||
| 2369 | __set_current_state(TASK_INTERRUPTIBLE); | ||
| 2370 | clear_thread_flag(TIF_SIGPENDING); | ||
| 2371 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 2372 | cgroup_enter_frozen(); | ||
| 2373 | freezable_schedule(); | ||
| 2374 | } | ||
| 2375 | |||
| 2335 | static int ptrace_signal(int signr, kernel_siginfo_t *info) | 2376 | static int ptrace_signal(int signr, kernel_siginfo_t *info) |
| 2336 | { | 2377 | { |
| 2337 | /* | 2378 | /* |
| @@ -2452,9 +2493,24 @@ relock: | |||
| 2452 | do_signal_stop(0)) | 2493 | do_signal_stop(0)) |
| 2453 | goto relock; | 2494 | goto relock; |
| 2454 | 2495 | ||
| 2455 | if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) { | 2496 | if (unlikely(current->jobctl & |
| 2456 | do_jobctl_trap(); | 2497 | (JOBCTL_TRAP_MASK | JOBCTL_TRAP_FREEZE))) { |
| 2498 | if (current->jobctl & JOBCTL_TRAP_MASK) { | ||
| 2499 | do_jobctl_trap(); | ||
| 2500 | spin_unlock_irq(&sighand->siglock); | ||
| 2501 | } else if (current->jobctl & JOBCTL_TRAP_FREEZE) | ||
| 2502 | do_freezer_trap(); | ||
| 2503 | |||
| 2504 | goto relock; | ||
| 2505 | } | ||
| 2506 | |||
| 2507 | /* | ||
| 2508 | * If the task is leaving the frozen state, let's update | ||
| 2509 | * cgroup counters and reset the frozen bit. | ||
| 2510 | */ | ||
| 2511 | if (unlikely(cgroup_task_frozen(current))) { | ||
| 2457 | spin_unlock_irq(&sighand->siglock); | 2512 | spin_unlock_irq(&sighand->siglock); |
| 2513 | cgroup_leave_frozen(false); | ||
| 2458 | goto relock; | 2514 | goto relock; |
| 2459 | } | 2515 | } |
| 2460 | 2516 | ||
| @@ -2550,6 +2606,8 @@ relock: | |||
| 2550 | 2606 | ||
| 2551 | fatal: | 2607 | fatal: |
| 2552 | spin_unlock_irq(&sighand->siglock); | 2608 | spin_unlock_irq(&sighand->siglock); |
| 2609 | if (unlikely(cgroup_task_frozen(current))) | ||
| 2610 | cgroup_leave_frozen(true); | ||
| 2553 | 2611 | ||
| 2554 | /* | 2612 | /* |
| 2555 | * Anything else is fatal, maybe with a core dump. | 2613 | * Anything else is fatal, maybe with a core dump. |
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index adacda50a4b2..7f9835624793 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore | |||
| @@ -1,2 +1,3 @@ | |||
| 1 | test_memcontrol | 1 | test_memcontrol |
| 2 | test_core | 2 | test_core |
| 3 | test_freezer | ||
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 23fbaa4a9630..8d369b6a2069 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile | |||
| @@ -5,8 +5,10 @@ all: | |||
| 5 | 5 | ||
| 6 | TEST_GEN_PROGS = test_memcontrol | 6 | TEST_GEN_PROGS = test_memcontrol |
| 7 | TEST_GEN_PROGS += test_core | 7 | TEST_GEN_PROGS += test_core |
| 8 | TEST_GEN_PROGS += test_freezer | ||
| 8 | 9 | ||
| 9 | include ../lib.mk | 10 | include ../lib.mk |
| 10 | 11 | ||
| 11 | $(OUTPUT)/test_memcontrol: cgroup_util.c | 12 | $(OUTPUT)/test_memcontrol: cgroup_util.c |
| 12 | $(OUTPUT)/test_core: cgroup_util.c | 13 | $(OUTPUT)/test_core: cgroup_util.c |
| 14 | $(OUTPUT)/test_freezer: cgroup_util.c | ||
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 14c9fe284806..4c223266299a 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c | |||
| @@ -74,6 +74,16 @@ char *cg_name_indexed(const char *root, const char *name, int index) | |||
| 74 | return ret; | 74 | return ret; |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | char *cg_control(const char *cgroup, const char *control) | ||
| 78 | { | ||
| 79 | size_t len = strlen(cgroup) + strlen(control) + 2; | ||
| 80 | char *ret = malloc(len); | ||
| 81 | |||
| 82 | snprintf(ret, len, "%s/%s", cgroup, control); | ||
| 83 | |||
| 84 | return ret; | ||
| 85 | } | ||
| 86 | |||
| 77 | int cg_read(const char *cgroup, const char *control, char *buf, size_t len) | 87 | int cg_read(const char *cgroup, const char *control, char *buf, size_t len) |
| 78 | { | 88 | { |
| 79 | char path[PATH_MAX]; | 89 | char path[PATH_MAX]; |
| @@ -196,7 +206,32 @@ int cg_create(const char *cgroup) | |||
| 196 | return mkdir(cgroup, 0644); | 206 | return mkdir(cgroup, 0644); |
| 197 | } | 207 | } |
| 198 | 208 | ||
| 199 | static int cg_killall(const char *cgroup) | 209 | int cg_wait_for_proc_count(const char *cgroup, int count) |
| 210 | { | ||
| 211 | char buf[10 * PAGE_SIZE] = {0}; | ||
| 212 | int attempts; | ||
| 213 | char *ptr; | ||
| 214 | |||
| 215 | for (attempts = 10; attempts >= 0; attempts--) { | ||
| 216 | int nr = 0; | ||
| 217 | |||
| 218 | if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) | ||
| 219 | break; | ||
| 220 | |||
| 221 | for (ptr = buf; *ptr; ptr++) | ||
| 222 | if (*ptr == '\n') | ||
| 223 | nr++; | ||
| 224 | |||
| 225 | if (nr >= count) | ||
| 226 | return 0; | ||
| 227 | |||
| 228 | usleep(100000); | ||
| 229 | } | ||
| 230 | |||
| 231 | return -1; | ||
| 232 | } | ||
| 233 | |||
| 234 | int cg_killall(const char *cgroup) | ||
| 200 | { | 235 | { |
| 201 | char buf[PAGE_SIZE]; | 236 | char buf[PAGE_SIZE]; |
| 202 | char *ptr = buf; | 237 | char *ptr = buf; |
| @@ -227,9 +262,7 @@ int cg_destroy(const char *cgroup) | |||
| 227 | retry: | 262 | retry: |
| 228 | ret = rmdir(cgroup); | 263 | ret = rmdir(cgroup); |
| 229 | if (ret && errno == EBUSY) { | 264 | if (ret && errno == EBUSY) { |
| 230 | ret = cg_killall(cgroup); | 265 | cg_killall(cgroup); |
| 231 | if (ret) | ||
| 232 | return ret; | ||
| 233 | usleep(100); | 266 | usleep(100); |
| 234 | goto retry; | 267 | goto retry; |
| 235 | } | 268 | } |
| @@ -240,6 +273,14 @@ retry: | |||
| 240 | return ret; | 273 | return ret; |
| 241 | } | 274 | } |
| 242 | 275 | ||
| 276 | int cg_enter(const char *cgroup, int pid) | ||
| 277 | { | ||
| 278 | char pidbuf[64]; | ||
| 279 | |||
| 280 | snprintf(pidbuf, sizeof(pidbuf), "%d", pid); | ||
| 281 | return cg_write(cgroup, "cgroup.procs", pidbuf); | ||
| 282 | } | ||
| 283 | |||
| 243 | int cg_enter_current(const char *cgroup) | 284 | int cg_enter_current(const char *cgroup) |
| 244 | { | 285 | { |
| 245 | char pidbuf[64]; | 286 | char pidbuf[64]; |
| @@ -369,3 +410,12 @@ int set_oom_adj_score(int pid, int score) | |||
| 369 | close(fd); | 410 | close(fd); |
| 370 | return 0; | 411 | return 0; |
| 371 | } | 412 | } |
| 413 | |||
| 414 | char proc_read_text(int pid, const char *item, char *buf, size_t size) | ||
| 415 | { | ||
| 416 | char path[PATH_MAX]; | ||
| 417 | |||
| 418 | snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); | ||
| 419 | |||
| 420 | return read_text(path, buf, size); | ||
| 421 | } | ||
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 9ac8b7958f83..c72f28046bfa 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h | |||
| @@ -18,6 +18,7 @@ static inline int values_close(long a, long b, int err) | |||
| 18 | extern int cg_find_unified_root(char *root, size_t len); | 18 | extern int cg_find_unified_root(char *root, size_t len); |
| 19 | extern char *cg_name(const char *root, const char *name); | 19 | extern char *cg_name(const char *root, const char *name); |
| 20 | extern char *cg_name_indexed(const char *root, const char *name, int index); | 20 | extern char *cg_name_indexed(const char *root, const char *name, int index); |
| 21 | extern char *cg_control(const char *cgroup, const char *control); | ||
| 21 | extern int cg_create(const char *cgroup); | 22 | extern int cg_create(const char *cgroup); |
| 22 | extern int cg_destroy(const char *cgroup); | 23 | extern int cg_destroy(const char *cgroup); |
| 23 | extern int cg_read(const char *cgroup, const char *control, | 24 | extern int cg_read(const char *cgroup, const char *control, |
| @@ -32,6 +33,7 @@ extern int cg_write(const char *cgroup, const char *control, char *buf); | |||
| 32 | extern int cg_run(const char *cgroup, | 33 | extern int cg_run(const char *cgroup, |
| 33 | int (*fn)(const char *cgroup, void *arg), | 34 | int (*fn)(const char *cgroup, void *arg), |
| 34 | void *arg); | 35 | void *arg); |
| 36 | extern int cg_enter(const char *cgroup, int pid); | ||
| 35 | extern int cg_enter_current(const char *cgroup); | 37 | extern int cg_enter_current(const char *cgroup); |
| 36 | extern int cg_run_nowait(const char *cgroup, | 38 | extern int cg_run_nowait(const char *cgroup, |
| 37 | int (*fn)(const char *cgroup, void *arg), | 39 | int (*fn)(const char *cgroup, void *arg), |
| @@ -41,3 +43,6 @@ extern int alloc_pagecache(int fd, size_t size); | |||
| 41 | extern int alloc_anon(const char *cgroup, void *arg); | 43 | extern int alloc_anon(const char *cgroup, void *arg); |
| 42 | extern int is_swap_enabled(void); | 44 | extern int is_swap_enabled(void); |
| 43 | extern int set_oom_adj_score(int pid, int score); | 45 | extern int set_oom_adj_score(int pid, int score); |
| 46 | extern int cg_wait_for_proc_count(const char *cgroup, int count); | ||
| 47 | extern int cg_killall(const char *cgroup); | ||
| 48 | extern char proc_read_text(int pid, const char *item, char *buf, size_t size); | ||
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c new file mode 100644 index 000000000000..2bfddb6d6d3b --- /dev/null +++ b/tools/testing/selftests/cgroup/test_freezer.c | |||
| @@ -0,0 +1,851 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #include <stdbool.h> | ||
| 3 | #include <linux/limits.h> | ||
| 4 | #include <sys/ptrace.h> | ||
| 5 | #include <sys/types.h> | ||
| 6 | #include <sys/mman.h> | ||
| 7 | #include <unistd.h> | ||
| 8 | #include <stdio.h> | ||
| 9 | #include <errno.h> | ||
| 10 | #include <poll.h> | ||
| 11 | #include <stdlib.h> | ||
| 12 | #include <sys/inotify.h> | ||
| 13 | #include <string.h> | ||
| 14 | #include <sys/types.h> | ||
| 15 | #include <sys/wait.h> | ||
| 16 | |||
| 17 | #include "../kselftest.h" | ||
| 18 | #include "cgroup_util.h" | ||
| 19 | |||
| 20 | #define DEBUG | ||
| 21 | #ifdef DEBUG | ||
| 22 | #define debug(args...) fprintf(stderr, args) | ||
| 23 | #else | ||
| 24 | #define debug(args...) | ||
| 25 | #endif | ||
| 26 | |||
| 27 | /* | ||
| 28 | * Check if the cgroup is frozen by looking at the cgroup.events::frozen value. | ||
| 29 | */ | ||
| 30 | static int cg_check_frozen(const char *cgroup, bool frozen) | ||
| 31 | { | ||
| 32 | if (frozen) { | ||
| 33 | if (cg_read_strstr(cgroup, "cgroup.events", "frozen 1") != 0) { | ||
| 34 | debug("Cgroup %s isn't frozen\n", cgroup); | ||
| 35 | return -1; | ||
| 36 | } | ||
| 37 | } else { | ||
| 38 | /* | ||
| 39 | * Check the cgroup.events::frozen value. | ||
| 40 | */ | ||
| 41 | if (cg_read_strstr(cgroup, "cgroup.events", "frozen 0") != 0) { | ||
| 42 | debug("Cgroup %s is frozen\n", cgroup); | ||
| 43 | return -1; | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | return 0; | ||
| 48 | } | ||
| 49 | |||
| 50 | /* | ||
| 51 | * Freeze the given cgroup. | ||
| 52 | */ | ||
| 53 | static int cg_freeze_nowait(const char *cgroup, bool freeze) | ||
| 54 | { | ||
| 55 | return cg_write(cgroup, "cgroup.freeze", freeze ? "1" : "0"); | ||
| 56 | } | ||
| 57 | |||
| 58 | /* | ||
| 59 | * Prepare for waiting on cgroup.events file. | ||
| 60 | */ | ||
| 61 | static int cg_prepare_for_wait(const char *cgroup) | ||
| 62 | { | ||
| 63 | int fd, ret = -1; | ||
| 64 | |||
| 65 | fd = inotify_init1(0); | ||
| 66 | if (fd == -1) { | ||
| 67 | debug("Error: inotify_init1() failed\n"); | ||
| 68 | return fd; | ||
| 69 | } | ||
| 70 | |||
| 71 | ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"), | ||
| 72 | IN_MODIFY); | ||
| 73 | if (ret == -1) { | ||
| 74 | debug("Error: inotify_add_watch() failed\n"); | ||
| 75 | close(fd); | ||
| 76 | } | ||
| 77 | |||
| 78 | return fd; | ||
| 79 | } | ||
| 80 | |||
| 81 | /* | ||
| 82 | * Wait for an event. If there are no events for 10 seconds, | ||
| 83 | * treat this an error. | ||
| 84 | */ | ||
| 85 | static int cg_wait_for(int fd) | ||
| 86 | { | ||
| 87 | int ret = -1; | ||
| 88 | struct pollfd fds = { | ||
| 89 | .fd = fd, | ||
| 90 | .events = POLLIN, | ||
| 91 | }; | ||
| 92 | |||
| 93 | while (true) { | ||
| 94 | ret = poll(&fds, 1, 10000); | ||
| 95 | |||
| 96 | if (ret == -1) { | ||
| 97 | if (errno == EINTR) | ||
| 98 | continue; | ||
| 99 | debug("Error: poll() failed\n"); | ||
| 100 | break; | ||
| 101 | } | ||
| 102 | |||
| 103 | if (ret > 0 && fds.revents & POLLIN) { | ||
| 104 | ret = 0; | ||
| 105 | break; | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | return ret; | ||
| 110 | } | ||
| 111 | |||
| 112 | /* | ||
| 113 | * Attach a task to the given cgroup and wait for a cgroup frozen event. | ||
| 114 | * All transient events (e.g. populated) are ignored. | ||
| 115 | */ | ||
| 116 | static int cg_enter_and_wait_for_frozen(const char *cgroup, int pid, | ||
| 117 | bool frozen) | ||
| 118 | { | ||
| 119 | int fd, ret = -1; | ||
| 120 | int attempts; | ||
| 121 | |||
| 122 | fd = cg_prepare_for_wait(cgroup); | ||
| 123 | if (fd < 0) | ||
| 124 | return fd; | ||
| 125 | |||
| 126 | ret = cg_enter(cgroup, pid); | ||
| 127 | if (ret) | ||
| 128 | goto out; | ||
| 129 | |||
| 130 | for (attempts = 0; attempts < 10; attempts++) { | ||
| 131 | ret = cg_wait_for(fd); | ||
| 132 | if (ret) | ||
| 133 | break; | ||
| 134 | |||
| 135 | ret = cg_check_frozen(cgroup, frozen); | ||
| 136 | if (ret) | ||
| 137 | continue; | ||
| 138 | } | ||
| 139 | |||
| 140 | out: | ||
| 141 | close(fd); | ||
| 142 | return ret; | ||
| 143 | } | ||
| 144 | |||
| 145 | /* | ||
| 146 | * Freeze the given cgroup and wait for the inotify signal. | ||
| 147 | * If there are no events in 10 seconds, treat this as an error. | ||
| 148 | * Then check that the cgroup is in the desired state. | ||
| 149 | */ | ||
| 150 | static int cg_freeze_wait(const char *cgroup, bool freeze) | ||
| 151 | { | ||
| 152 | int fd, ret = -1; | ||
| 153 | |||
| 154 | fd = cg_prepare_for_wait(cgroup); | ||
| 155 | if (fd < 0) | ||
| 156 | return fd; | ||
| 157 | |||
| 158 | ret = cg_freeze_nowait(cgroup, freeze); | ||
| 159 | if (ret) { | ||
| 160 | debug("Error: cg_freeze_nowait() failed\n"); | ||
| 161 | goto out; | ||
| 162 | } | ||
| 163 | |||
| 164 | ret = cg_wait_for(fd); | ||
| 165 | if (ret) | ||
| 166 | goto out; | ||
| 167 | |||
| 168 | ret = cg_check_frozen(cgroup, freeze); | ||
| 169 | out: | ||
| 170 | close(fd); | ||
| 171 | return ret; | ||
| 172 | } | ||
| 173 | |||
| 174 | /* | ||
| 175 | * A simple process running in a sleep loop until being | ||
| 176 | * re-parented. | ||
| 177 | */ | ||
| 178 | static int child_fn(const char *cgroup, void *arg) | ||
| 179 | { | ||
| 180 | int ppid = getppid(); | ||
| 181 | |||
| 182 | while (getppid() == ppid) | ||
| 183 | usleep(1000); | ||
| 184 | |||
| 185 | return getppid() == ppid; | ||
| 186 | } | ||
| 187 | |||
| 188 | /* | ||
| 189 | * A simple test for the cgroup freezer: populated the cgroup with 100 | ||
| 190 | * running processes and freeze it. Then unfreeze it. Then it kills all | ||
| 191 | * processes and destroys the cgroup. | ||
| 192 | */ | ||
| 193 | static int test_cgfreezer_simple(const char *root) | ||
| 194 | { | ||
| 195 | int ret = KSFT_FAIL; | ||
| 196 | char *cgroup = NULL; | ||
| 197 | int i; | ||
| 198 | |||
| 199 | cgroup = cg_name(root, "cg_test_simple"); | ||
| 200 | if (!cgroup) | ||
| 201 | goto cleanup; | ||
| 202 | |||
| 203 | if (cg_create(cgroup)) | ||
| 204 | goto cleanup; | ||
| 205 | |||
| 206 | for (i = 0; i < 100; i++) | ||
| 207 | cg_run_nowait(cgroup, child_fn, NULL); | ||
| 208 | |||
| 209 | if (cg_wait_for_proc_count(cgroup, 100)) | ||
| 210 | goto cleanup; | ||
| 211 | |||
| 212 | if (cg_check_frozen(cgroup, false)) | ||
| 213 | goto cleanup; | ||
| 214 | |||
| 215 | if (cg_freeze_wait(cgroup, true)) | ||
| 216 | goto cleanup; | ||
| 217 | |||
| 218 | if (cg_freeze_wait(cgroup, false)) | ||
| 219 | goto cleanup; | ||
| 220 | |||
| 221 | ret = KSFT_PASS; | ||
| 222 | |||
| 223 | cleanup: | ||
| 224 | if (cgroup) | ||
| 225 | cg_destroy(cgroup); | ||
| 226 | free(cgroup); | ||
| 227 | return ret; | ||
| 228 | } | ||
| 229 | |||
| 230 | /* | ||
| 231 | * The test creates the following hierarchy: | ||
| 232 | * A | ||
| 233 | * / / \ \ | ||
| 234 | * B E I K | ||
| 235 | * /\ | | ||
| 236 | * C D F | ||
| 237 | * | | ||
| 238 | * G | ||
| 239 | * | | ||
| 240 | * H | ||
| 241 | * | ||
| 242 | * with a process in C, H and 3 processes in K. | ||
| 243 | * Then it tries to freeze and unfreeze the whole tree. | ||
| 244 | */ | ||
| 245 | static int test_cgfreezer_tree(const char *root) | ||
| 246 | { | ||
| 247 | char *cgroup[10] = {0}; | ||
| 248 | int ret = KSFT_FAIL; | ||
| 249 | int i; | ||
| 250 | |||
| 251 | cgroup[0] = cg_name(root, "cg_test_tree_A"); | ||
| 252 | if (!cgroup[0]) | ||
| 253 | goto cleanup; | ||
| 254 | |||
| 255 | cgroup[1] = cg_name(cgroup[0], "B"); | ||
| 256 | if (!cgroup[1]) | ||
| 257 | goto cleanup; | ||
| 258 | |||
| 259 | cgroup[2] = cg_name(cgroup[1], "C"); | ||
| 260 | if (!cgroup[2]) | ||
| 261 | goto cleanup; | ||
| 262 | |||
| 263 | cgroup[3] = cg_name(cgroup[1], "D"); | ||
| 264 | if (!cgroup[3]) | ||
| 265 | goto cleanup; | ||
| 266 | |||
| 267 | cgroup[4] = cg_name(cgroup[0], "E"); | ||
| 268 | if (!cgroup[4]) | ||
| 269 | goto cleanup; | ||
| 270 | |||
| 271 | cgroup[5] = cg_name(cgroup[4], "F"); | ||
| 272 | if (!cgroup[5]) | ||
| 273 | goto cleanup; | ||
| 274 | |||
| 275 | cgroup[6] = cg_name(cgroup[5], "G"); | ||
| 276 | if (!cgroup[6]) | ||
| 277 | goto cleanup; | ||
| 278 | |||
| 279 | cgroup[7] = cg_name(cgroup[6], "H"); | ||
| 280 | if (!cgroup[7]) | ||
| 281 | goto cleanup; | ||
| 282 | |||
| 283 | cgroup[8] = cg_name(cgroup[0], "I"); | ||
| 284 | if (!cgroup[8]) | ||
| 285 | goto cleanup; | ||
| 286 | |||
| 287 | cgroup[9] = cg_name(cgroup[0], "K"); | ||
| 288 | if (!cgroup[9]) | ||
| 289 | goto cleanup; | ||
| 290 | |||
| 291 | for (i = 0; i < 10; i++) | ||
| 292 | if (cg_create(cgroup[i])) | ||
| 293 | goto cleanup; | ||
| 294 | |||
| 295 | cg_run_nowait(cgroup[2], child_fn, NULL); | ||
| 296 | cg_run_nowait(cgroup[7], child_fn, NULL); | ||
| 297 | cg_run_nowait(cgroup[9], child_fn, NULL); | ||
| 298 | cg_run_nowait(cgroup[9], child_fn, NULL); | ||
| 299 | cg_run_nowait(cgroup[9], child_fn, NULL); | ||
| 300 | |||
| 301 | /* | ||
| 302 | * Wait until all child processes will enter | ||
| 303 | * corresponding cgroups. | ||
| 304 | */ | ||
| 305 | |||
| 306 | if (cg_wait_for_proc_count(cgroup[2], 1) || | ||
| 307 | cg_wait_for_proc_count(cgroup[7], 1) || | ||
| 308 | cg_wait_for_proc_count(cgroup[9], 3)) | ||
| 309 | goto cleanup; | ||
| 310 | |||
| 311 | /* | ||
| 312 | * Freeze B. | ||
| 313 | */ | ||
| 314 | if (cg_freeze_wait(cgroup[1], true)) | ||
| 315 | goto cleanup; | ||
| 316 | |||
| 317 | /* | ||
| 318 | * Freeze F. | ||
| 319 | */ | ||
| 320 | if (cg_freeze_wait(cgroup[5], true)) | ||
| 321 | goto cleanup; | ||
| 322 | |||
| 323 | /* | ||
| 324 | * Freeze G. | ||
| 325 | */ | ||
| 326 | if (cg_freeze_wait(cgroup[6], true)) | ||
| 327 | goto cleanup; | ||
| 328 | |||
| 329 | /* | ||
| 330 | * Check that A and E are not frozen. | ||
| 331 | */ | ||
| 332 | if (cg_check_frozen(cgroup[0], false)) | ||
| 333 | goto cleanup; | ||
| 334 | |||
| 335 | if (cg_check_frozen(cgroup[4], false)) | ||
| 336 | goto cleanup; | ||
| 337 | |||
| 338 | /* | ||
| 339 | * Freeze A. Check that A, B and E are frozen. | ||
| 340 | */ | ||
| 341 | if (cg_freeze_wait(cgroup[0], true)) | ||
| 342 | goto cleanup; | ||
| 343 | |||
| 344 | if (cg_check_frozen(cgroup[1], true)) | ||
| 345 | goto cleanup; | ||
| 346 | |||
| 347 | if (cg_check_frozen(cgroup[4], true)) | ||
| 348 | goto cleanup; | ||
| 349 | |||
| 350 | /* | ||
| 351 | * Unfreeze B, F and G | ||
| 352 | */ | ||
| 353 | if (cg_freeze_nowait(cgroup[1], false)) | ||
| 354 | goto cleanup; | ||
| 355 | |||
| 356 | if (cg_freeze_nowait(cgroup[5], false)) | ||
| 357 | goto cleanup; | ||
| 358 | |||
| 359 | if (cg_freeze_nowait(cgroup[6], false)) | ||
| 360 | goto cleanup; | ||
| 361 | |||
| 362 | /* | ||
| 363 | * Check that C and H are still frozen. | ||
| 364 | */ | ||
| 365 | if (cg_check_frozen(cgroup[2], true)) | ||
| 366 | goto cleanup; | ||
| 367 | |||
| 368 | if (cg_check_frozen(cgroup[7], true)) | ||
| 369 | goto cleanup; | ||
| 370 | |||
| 371 | /* | ||
| 372 | * Unfreeze A. Check that A, C and K are not frozen. | ||
| 373 | */ | ||
| 374 | if (cg_freeze_wait(cgroup[0], false)) | ||
| 375 | goto cleanup; | ||
| 376 | |||
| 377 | if (cg_check_frozen(cgroup[2], false)) | ||
| 378 | goto cleanup; | ||
| 379 | |||
| 380 | if (cg_check_frozen(cgroup[9], false)) | ||
| 381 | goto cleanup; | ||
| 382 | |||
| 383 | ret = KSFT_PASS; | ||
| 384 | |||
| 385 | cleanup: | ||
| 386 | for (i = 9; i >= 0 && cgroup[i]; i--) { | ||
| 387 | cg_destroy(cgroup[i]); | ||
| 388 | free(cgroup[i]); | ||
| 389 | } | ||
| 390 | |||
| 391 | return ret; | ||
| 392 | } | ||
| 393 | |||
| 394 | /* | ||
| 395 | * A fork bomb emulator. | ||
| 396 | */ | ||
| 397 | static int forkbomb_fn(const char *cgroup, void *arg) | ||
| 398 | { | ||
| 399 | int ppid; | ||
| 400 | |||
| 401 | fork(); | ||
| 402 | fork(); | ||
| 403 | |||
| 404 | ppid = getppid(); | ||
| 405 | |||
| 406 | while (getppid() == ppid) | ||
| 407 | usleep(1000); | ||
| 408 | |||
| 409 | return getppid() == ppid; | ||
| 410 | } | ||
| 411 | |||
| 412 | /* | ||
| 413 | * The test runs a fork bomb in a cgroup and tries to freeze it. | ||
| 414 | * Then it kills all processes and checks that cgroup isn't populated | ||
| 415 | * anymore. | ||
| 416 | */ | ||
| 417 | static int test_cgfreezer_forkbomb(const char *root) | ||
| 418 | { | ||
| 419 | int ret = KSFT_FAIL; | ||
| 420 | char *cgroup = NULL; | ||
| 421 | |||
| 422 | cgroup = cg_name(root, "cg_forkbomb_test"); | ||
| 423 | if (!cgroup) | ||
| 424 | goto cleanup; | ||
| 425 | |||
| 426 | if (cg_create(cgroup)) | ||
| 427 | goto cleanup; | ||
| 428 | |||
| 429 | cg_run_nowait(cgroup, forkbomb_fn, NULL); | ||
| 430 | |||
| 431 | usleep(100000); | ||
| 432 | |||
| 433 | if (cg_freeze_wait(cgroup, true)) | ||
| 434 | goto cleanup; | ||
| 435 | |||
| 436 | if (cg_killall(cgroup)) | ||
| 437 | goto cleanup; | ||
| 438 | |||
| 439 | if (cg_wait_for_proc_count(cgroup, 0)) | ||
| 440 | goto cleanup; | ||
| 441 | |||
| 442 | ret = KSFT_PASS; | ||
| 443 | |||
| 444 | cleanup: | ||
| 445 | if (cgroup) | ||
| 446 | cg_destroy(cgroup); | ||
| 447 | free(cgroup); | ||
| 448 | return ret; | ||
| 449 | } | ||
| 450 | |||
| 451 | /* | ||
| 452 | * The test creates two nested cgroups, freezes the parent | ||
| 453 | * and removes the child. Then it checks that the parent cgroup | ||
| 454 | * remains frozen and it's possible to create a new child | ||
| 455 | * without unfreezing. The new child is frozen too. | ||
| 456 | */ | ||
| 457 | static int test_cgfreezer_rmdir(const char *root) | ||
| 458 | { | ||
| 459 | int ret = KSFT_FAIL; | ||
| 460 | char *parent, *child = NULL; | ||
| 461 | |||
| 462 | parent = cg_name(root, "cg_test_rmdir_A"); | ||
| 463 | if (!parent) | ||
| 464 | goto cleanup; | ||
| 465 | |||
| 466 | child = cg_name(parent, "cg_test_rmdir_B"); | ||
| 467 | if (!child) | ||
| 468 | goto cleanup; | ||
| 469 | |||
| 470 | if (cg_create(parent)) | ||
| 471 | goto cleanup; | ||
| 472 | |||
| 473 | if (cg_create(child)) | ||
| 474 | goto cleanup; | ||
| 475 | |||
| 476 | if (cg_freeze_wait(parent, true)) | ||
| 477 | goto cleanup; | ||
| 478 | |||
| 479 | if (cg_destroy(child)) | ||
| 480 | goto cleanup; | ||
| 481 | |||
| 482 | if (cg_check_frozen(parent, true)) | ||
| 483 | goto cleanup; | ||
| 484 | |||
| 485 | if (cg_create(child)) | ||
| 486 | goto cleanup; | ||
| 487 | |||
| 488 | if (cg_check_frozen(child, true)) | ||
| 489 | goto cleanup; | ||
| 490 | |||
| 491 | ret = KSFT_PASS; | ||
| 492 | |||
| 493 | cleanup: | ||
| 494 | if (child) | ||
| 495 | cg_destroy(child); | ||
| 496 | free(child); | ||
| 497 | if (parent) | ||
| 498 | cg_destroy(parent); | ||
| 499 | free(parent); | ||
| 500 | return ret; | ||
| 501 | } | ||
| 502 | |||
| 503 | /* | ||
| 504 | * The test creates two cgroups: A and B, runs a process in A | ||
| 505 | * and performs several migrations: | ||
| 506 | * 1) A (running) -> B (frozen) | ||
| 507 | * 2) B (frozen) -> A (running) | ||
| 508 | * 3) A (frozen) -> B (frozen) | ||
| 509 | * | ||
| 510 | * On each step it checks the actual state of both cgroups. | ||
| 511 | */ | ||
| 512 | static int test_cgfreezer_migrate(const char *root) | ||
| 513 | { | ||
| 514 | int ret = KSFT_FAIL; | ||
| 515 | char *cgroup[2] = {0}; | ||
| 516 | int pid; | ||
| 517 | |||
| 518 | cgroup[0] = cg_name(root, "cg_test_migrate_A"); | ||
| 519 | if (!cgroup[0]) | ||
| 520 | goto cleanup; | ||
| 521 | |||
| 522 | cgroup[1] = cg_name(root, "cg_test_migrate_B"); | ||
| 523 | if (!cgroup[1]) | ||
| 524 | goto cleanup; | ||
| 525 | |||
| 526 | if (cg_create(cgroup[0])) | ||
| 527 | goto cleanup; | ||
| 528 | |||
| 529 | if (cg_create(cgroup[1])) | ||
| 530 | goto cleanup; | ||
| 531 | |||
| 532 | pid = cg_run_nowait(cgroup[0], child_fn, NULL); | ||
| 533 | if (pid < 0) | ||
| 534 | goto cleanup; | ||
| 535 | |||
| 536 | if (cg_wait_for_proc_count(cgroup[0], 1)) | ||
| 537 | goto cleanup; | ||
| 538 | |||
| 539 | /* | ||
| 540 | * Migrate from A (running) to B (frozen) | ||
| 541 | */ | ||
| 542 | if (cg_freeze_wait(cgroup[1], true)) | ||
| 543 | goto cleanup; | ||
| 544 | |||
| 545 | if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true)) | ||
| 546 | goto cleanup; | ||
| 547 | |||
| 548 | if (cg_check_frozen(cgroup[0], false)) | ||
| 549 | goto cleanup; | ||
| 550 | |||
| 551 | /* | ||
| 552 | * Migrate from B (frozen) to A (running) | ||
| 553 | */ | ||
| 554 | if (cg_enter_and_wait_for_frozen(cgroup[0], pid, false)) | ||
| 555 | goto cleanup; | ||
| 556 | |||
| 557 | if (cg_check_frozen(cgroup[1], true)) | ||
| 558 | goto cleanup; | ||
| 559 | |||
| 560 | /* | ||
| 561 | * Migrate from A (frozen) to B (frozen) | ||
| 562 | */ | ||
| 563 | if (cg_freeze_wait(cgroup[0], true)) | ||
| 564 | goto cleanup; | ||
| 565 | |||
| 566 | if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true)) | ||
| 567 | goto cleanup; | ||
| 568 | |||
| 569 | if (cg_check_frozen(cgroup[0], true)) | ||
| 570 | goto cleanup; | ||
| 571 | |||
| 572 | ret = KSFT_PASS; | ||
| 573 | |||
| 574 | cleanup: | ||
| 575 | if (cgroup[0]) | ||
| 576 | cg_destroy(cgroup[0]); | ||
| 577 | free(cgroup[0]); | ||
| 578 | if (cgroup[1]) | ||
| 579 | cg_destroy(cgroup[1]); | ||
| 580 | free(cgroup[1]); | ||
| 581 | return ret; | ||
| 582 | } | ||
| 583 | |||
| 584 | /* | ||
| 585 | * The test checks that ptrace works with a tracing process in a frozen cgroup. | ||
| 586 | */ | ||
| 587 | static int test_cgfreezer_ptrace(const char *root) | ||
| 588 | { | ||
| 589 | int ret = KSFT_FAIL; | ||
| 590 | char *cgroup = NULL; | ||
| 591 | siginfo_t siginfo; | ||
| 592 | int pid; | ||
| 593 | |||
| 594 | cgroup = cg_name(root, "cg_test_ptrace"); | ||
| 595 | if (!cgroup) | ||
| 596 | goto cleanup; | ||
| 597 | |||
| 598 | if (cg_create(cgroup)) | ||
| 599 | goto cleanup; | ||
| 600 | |||
| 601 | pid = cg_run_nowait(cgroup, child_fn, NULL); | ||
| 602 | if (pid < 0) | ||
| 603 | goto cleanup; | ||
| 604 | |||
| 605 | if (cg_wait_for_proc_count(cgroup, 1)) | ||
| 606 | goto cleanup; | ||
| 607 | |||
| 608 | if (cg_freeze_wait(cgroup, true)) | ||
| 609 | goto cleanup; | ||
| 610 | |||
| 611 | if (ptrace(PTRACE_SEIZE, pid, NULL, NULL)) | ||
| 612 | goto cleanup; | ||
| 613 | |||
| 614 | if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL)) | ||
| 615 | goto cleanup; | ||
| 616 | |||
| 617 | waitpid(pid, NULL, 0); | ||
| 618 | |||
| 619 | /* | ||
| 620 | * Cgroup has to remain frozen, however the test task | ||
| 621 | * is in traced state. | ||
| 622 | */ | ||
| 623 | if (cg_check_frozen(cgroup, true)) | ||
| 624 | goto cleanup; | ||
| 625 | |||
| 626 | if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo)) | ||
| 627 | goto cleanup; | ||
| 628 | |||
| 629 | if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) | ||
| 630 | goto cleanup; | ||
| 631 | |||
| 632 | if (cg_check_frozen(cgroup, true)) | ||
| 633 | goto cleanup; | ||
| 634 | |||
| 635 | ret = KSFT_PASS; | ||
| 636 | |||
| 637 | cleanup: | ||
| 638 | if (cgroup) | ||
| 639 | cg_destroy(cgroup); | ||
| 640 | free(cgroup); | ||
| 641 | return ret; | ||
| 642 | } | ||
| 643 | |||
| 644 | /* | ||
| 645 | * Check if the process is stopped. | ||
| 646 | */ | ||
| 647 | static int proc_check_stopped(int pid) | ||
| 648 | { | ||
| 649 | char buf[PAGE_SIZE]; | ||
| 650 | int len; | ||
| 651 | |||
| 652 | len = proc_read_text(pid, "stat", buf, sizeof(buf)); | ||
| 653 | if (len == -1) { | ||
| 654 | debug("Can't get %d stat\n", pid); | ||
| 655 | return -1; | ||
| 656 | } | ||
| 657 | |||
| 658 | if (strstr(buf, "(test_freezer) T ") == NULL) { | ||
| 659 | debug("Process %d in the unexpected state: %s\n", pid, buf); | ||
| 660 | return -1; | ||
| 661 | } | ||
| 662 | |||
| 663 | return 0; | ||
| 664 | } | ||
| 665 | |||
| 666 | /* | ||
| 667 | * Test that it's possible to freeze a cgroup with a stopped process. | ||
| 668 | */ | ||
| 669 | static int test_cgfreezer_stopped(const char *root) | ||
| 670 | { | ||
| 671 | int pid, ret = KSFT_FAIL; | ||
| 672 | char *cgroup = NULL; | ||
| 673 | |||
| 674 | cgroup = cg_name(root, "cg_test_stopped"); | ||
| 675 | if (!cgroup) | ||
| 676 | goto cleanup; | ||
| 677 | |||
| 678 | if (cg_create(cgroup)) | ||
| 679 | goto cleanup; | ||
| 680 | |||
| 681 | pid = cg_run_nowait(cgroup, child_fn, NULL); | ||
| 682 | |||
| 683 | if (cg_wait_for_proc_count(cgroup, 1)) | ||
| 684 | goto cleanup; | ||
| 685 | |||
| 686 | if (kill(pid, SIGSTOP)) | ||
| 687 | goto cleanup; | ||
| 688 | |||
| 689 | if (cg_check_frozen(cgroup, false)) | ||
| 690 | goto cleanup; | ||
| 691 | |||
| 692 | if (cg_freeze_wait(cgroup, true)) | ||
| 693 | goto cleanup; | ||
| 694 | |||
| 695 | if (cg_freeze_wait(cgroup, false)) | ||
| 696 | goto cleanup; | ||
| 697 | |||
| 698 | if (proc_check_stopped(pid)) | ||
| 699 | goto cleanup; | ||
| 700 | |||
| 701 | ret = KSFT_PASS; | ||
| 702 | |||
| 703 | cleanup: | ||
| 704 | if (cgroup) | ||
| 705 | cg_destroy(cgroup); | ||
| 706 | free(cgroup); | ||
| 707 | return ret; | ||
| 708 | } | ||
| 709 | |||
| 710 | /* | ||
| 711 | * Test that it's possible to freeze a cgroup with a ptraced process. | ||
| 712 | */ | ||
| 713 | static int test_cgfreezer_ptraced(const char *root) | ||
| 714 | { | ||
| 715 | int pid, ret = KSFT_FAIL; | ||
| 716 | char *cgroup = NULL; | ||
| 717 | siginfo_t siginfo; | ||
| 718 | |||
| 719 | cgroup = cg_name(root, "cg_test_ptraced"); | ||
| 720 | if (!cgroup) | ||
| 721 | goto cleanup; | ||
| 722 | |||
| 723 | if (cg_create(cgroup)) | ||
| 724 | goto cleanup; | ||
| 725 | |||
| 726 | pid = cg_run_nowait(cgroup, child_fn, NULL); | ||
| 727 | |||
| 728 | if (cg_wait_for_proc_count(cgroup, 1)) | ||
| 729 | goto cleanup; | ||
| 730 | |||
| 731 | if (ptrace(PTRACE_SEIZE, pid, NULL, NULL)) | ||
| 732 | goto cleanup; | ||
| 733 | |||
| 734 | if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL)) | ||
| 735 | goto cleanup; | ||
| 736 | |||
| 737 | waitpid(pid, NULL, 0); | ||
| 738 | |||
| 739 | if (cg_check_frozen(cgroup, false)) | ||
| 740 | goto cleanup; | ||
| 741 | |||
| 742 | if (cg_freeze_wait(cgroup, true)) | ||
| 743 | goto cleanup; | ||
| 744 | |||
| 745 | /* | ||
| 746 | * cg_check_frozen(cgroup, true) will fail here, | ||
| 747 | * because the task in in the TRACEd state. | ||
| 748 | */ | ||
| 749 | if (cg_freeze_wait(cgroup, false)) | ||
| 750 | goto cleanup; | ||
| 751 | |||
| 752 | if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo)) | ||
| 753 | goto cleanup; | ||
| 754 | |||
| 755 | if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) | ||
| 756 | goto cleanup; | ||
| 757 | |||
| 758 | ret = KSFT_PASS; | ||
| 759 | |||
| 760 | cleanup: | ||
| 761 | if (cgroup) | ||
| 762 | cg_destroy(cgroup); | ||
| 763 | free(cgroup); | ||
| 764 | return ret; | ||
| 765 | } | ||
| 766 | |||
| 767 | static int vfork_fn(const char *cgroup, void *arg) | ||
| 768 | { | ||
| 769 | int pid = vfork(); | ||
| 770 | |||
| 771 | if (pid == 0) | ||
| 772 | while (true) | ||
| 773 | sleep(1); | ||
| 774 | |||
| 775 | return pid; | ||
| 776 | } | ||
| 777 | |||
| 778 | /* | ||
| 779 | * Test that it's possible to freeze a cgroup with a process, | ||
| 780 | * which called vfork() and is waiting for a child. | ||
| 781 | */ | ||
| 782 | static int test_cgfreezer_vfork(const char *root) | ||
| 783 | { | ||
| 784 | int ret = KSFT_FAIL; | ||
| 785 | char *cgroup = NULL; | ||
| 786 | |||
| 787 | cgroup = cg_name(root, "cg_test_vfork"); | ||
| 788 | if (!cgroup) | ||
| 789 | goto cleanup; | ||
| 790 | |||
| 791 | if (cg_create(cgroup)) | ||
| 792 | goto cleanup; | ||
| 793 | |||
| 794 | cg_run_nowait(cgroup, vfork_fn, NULL); | ||
| 795 | |||
| 796 | if (cg_wait_for_proc_count(cgroup, 2)) | ||
| 797 | goto cleanup; | ||
| 798 | |||
| 799 | if (cg_freeze_wait(cgroup, true)) | ||
| 800 | goto cleanup; | ||
| 801 | |||
| 802 | ret = KSFT_PASS; | ||
| 803 | |||
| 804 | cleanup: | ||
| 805 | if (cgroup) | ||
| 806 | cg_destroy(cgroup); | ||
| 807 | free(cgroup); | ||
| 808 | return ret; | ||
| 809 | } | ||
| 810 | |||
| 811 | #define T(x) { x, #x } | ||
| 812 | struct cgfreezer_test { | ||
| 813 | int (*fn)(const char *root); | ||
| 814 | const char *name; | ||
| 815 | } tests[] = { | ||
| 816 | T(test_cgfreezer_simple), | ||
| 817 | T(test_cgfreezer_tree), | ||
| 818 | T(test_cgfreezer_forkbomb), | ||
| 819 | T(test_cgfreezer_rmdir), | ||
| 820 | T(test_cgfreezer_migrate), | ||
| 821 | T(test_cgfreezer_ptrace), | ||
| 822 | T(test_cgfreezer_stopped), | ||
| 823 | T(test_cgfreezer_ptraced), | ||
| 824 | T(test_cgfreezer_vfork), | ||
| 825 | }; | ||
| 826 | #undef T | ||
| 827 | |||
| 828 | int main(int argc, char *argv[]) | ||
| 829 | { | ||
| 830 | char root[PATH_MAX]; | ||
| 831 | int i, ret = EXIT_SUCCESS; | ||
| 832 | |||
| 833 | if (cg_find_unified_root(root, sizeof(root))) | ||
| 834 | ksft_exit_skip("cgroup v2 isn't mounted\n"); | ||
| 835 | for (i = 0; i < ARRAY_SIZE(tests); i++) { | ||
| 836 | switch (tests[i].fn(root)) { | ||
| 837 | case KSFT_PASS: | ||
| 838 | ksft_test_result_pass("%s\n", tests[i].name); | ||
| 839 | break; | ||
| 840 | case KSFT_SKIP: | ||
| 841 | ksft_test_result_skip("%s\n", tests[i].name); | ||
| 842 | break; | ||
| 843 | default: | ||
| 844 | ret = EXIT_FAILURE; | ||
| 845 | ksft_test_result_fail("%s\n", tests[i].name); | ||
| 846 | break; | ||
| 847 | } | ||
| 848 | } | ||
| 849 | |||
| 850 | return ret; | ||
| 851 | } | ||
