aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst27
-rw-r--r--include/linux/cgroup-defs.h33
-rw-r--r--include/linux/cgroup.h43
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/sched/jobctl.h2
-rw-r--r--include/trace/events/cgroup.h55
-rw-r--r--kernel/cgroup/Makefile4
-rw-r--r--kernel/cgroup/cgroup-internal.h8
-rw-r--r--kernel/cgroup/cgroup-v1.c16
-rw-r--r--kernel/cgroup/cgroup.c152
-rw-r--r--kernel/cgroup/debug.c8
-rw-r--r--kernel/cgroup/freezer.c639
-rw-r--r--kernel/cgroup/legacy_freezer.c481
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/signal.c66
-rw-r--r--tools/testing/selftests/cgroup/.gitignore1
-rw-r--r--tools/testing/selftests/cgroup/Makefile2
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c58
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h5
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c851
20 files changed, 2012 insertions, 443 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 20f92c16ffbf..88e746074252 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -864,6 +864,8 @@ All cgroup core files are prefixed with "cgroup."
864 populated 864 populated
865 1 if the cgroup or its descendants contains any live 865 1 if the cgroup or its descendants contains any live
866 processes; otherwise, 0. 866 processes; otherwise, 0.
867 frozen
868 1 if the cgroup is frozen; otherwise, 0.
867 869
868 cgroup.max.descendants 870 cgroup.max.descendants
869 A read-write single value files. The default is "max". 871 A read-write single value files. The default is "max".
@@ -897,6 +899,31 @@ All cgroup core files are prefixed with "cgroup."
897 A dying cgroup can consume system resources not exceeding 899 A dying cgroup can consume system resources not exceeding
898 limits, which were active at the moment of cgroup deletion. 900 limits, which were active at the moment of cgroup deletion.
899 901
902 cgroup.freeze
903 A read-write single value file which exists on non-root cgroups.
904 Allowed values are "0" and "1". The default is "0".
905
906 Writing "1" to the file causes freezing of the cgroup and all
907 descendant cgroups. This means that all belonging processes will
908 be stopped and will not run until the cgroup will be explicitly
909 unfrozen. Freezing of the cgroup may take some time; when this action
910 is completed, the "frozen" value in the cgroup.events control file
911 will be updated to "1" and the corresponding notification will be
912 issued.
913
914 A cgroup can be frozen either by its own settings, or by settings
915 of any ancestor cgroups. If any of ancestor cgroups is frozen, the
916 cgroup will remain frozen.
917
918 Processes in the frozen cgroup can be killed by a fatal signal.
919 They also can enter and leave a frozen cgroup: either by an explicit
920 move by a user, or if freezing of the cgroup races with fork().
921 If a process is moved to a frozen cgroup, it stops. If a process is
922 moved out of a frozen cgroup, it becomes running.
923
924 Frozen status of a cgroup doesn't affect any cgroup tree operations:
925 it's possible to delete a frozen (and empty) cgroup, as well as
926 create new sub-cgroups.
900 927
901Controllers 928Controllers
902=========== 929===========
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 1c70803e9f77..77258d276f93 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -65,6 +65,12 @@ enum {
65 * specified at mount time and thus is implemented here. 65 * specified at mount time and thus is implemented here.
66 */ 66 */
67 CGRP_CPUSET_CLONE_CHILDREN, 67 CGRP_CPUSET_CLONE_CHILDREN,
68
69 /* Control group has to be frozen. */
70 CGRP_FREEZE,
71
72 /* Cgroup is frozen. */
73 CGRP_FROZEN,
68}; 74};
69 75
70/* cgroup_root->flags */ 76/* cgroup_root->flags */
@@ -317,6 +323,25 @@ struct cgroup_rstat_cpu {
317 struct cgroup *updated_next; /* NULL iff not on the list */ 323 struct cgroup *updated_next; /* NULL iff not on the list */
318}; 324};
319 325
326struct cgroup_freezer_state {
327 /* Should the cgroup and its descendants be frozen. */
328 bool freeze;
329
330 /* Should the cgroup actually be frozen? */
331 int e_freeze;
332
333 /* Fields below are protected by css_set_lock */
334
335 /* Number of frozen descendant cgroups */
336 int nr_frozen_descendants;
337
338 /*
339 * Number of tasks, which are counted as frozen:
340 * frozen, SIGSTOPped, and PTRACEd.
341 */
342 int nr_frozen_tasks;
343};
344
320struct cgroup { 345struct cgroup {
321 /* self css with NULL ->ss, points back to this cgroup */ 346 /* self css with NULL ->ss, points back to this cgroup */
322 struct cgroup_subsys_state self; 347 struct cgroup_subsys_state self;
@@ -349,6 +374,11 @@ struct cgroup {
349 * Dying cgroups are cgroups which were deleted by a user, 374 * Dying cgroups are cgroups which were deleted by a user,
350 * but are still existing because someone else is holding a reference. 375 * but are still existing because someone else is holding a reference.
351 * max_descendants is a maximum allowed number of descent cgroups. 376 * max_descendants is a maximum allowed number of descent cgroups.
377 *
378 * nr_descendants and nr_dying_descendants are protected
379 * by cgroup_mutex and css_set_lock. It's fine to read them holding
380 * any of cgroup_mutex and css_set_lock; for writing both locks
381 * should be held.
352 */ 382 */
353 int nr_descendants; 383 int nr_descendants;
354 int nr_dying_descendants; 384 int nr_dying_descendants;
@@ -448,6 +478,9 @@ struct cgroup {
448 /* If there is block congestion on this cgroup. */ 478 /* If there is block congestion on this cgroup. */
449 atomic_t congestion_count; 479 atomic_t congestion_count;
450 480
481 /* Used to store internal freezer state */
482 struct cgroup_freezer_state freezer;
483
451 /* ids of the ancestors at each level including self */ 484 /* ids of the ancestors at each level including self */
452 int ancestor_ids[]; 485 int ancestor_ids[];
453}; 486};
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 81f58b4a5418..c0077adeea83 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -881,4 +881,47 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns)
881 free_cgroup_ns(ns); 881 free_cgroup_ns(ns);
882} 882}
883 883
884#ifdef CONFIG_CGROUPS
885
886void cgroup_enter_frozen(void);
887void cgroup_leave_frozen(bool always_leave);
888void cgroup_update_frozen(struct cgroup *cgrp);
889void cgroup_freeze(struct cgroup *cgrp, bool freeze);
890void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src,
891 struct cgroup *dst);
892
893static inline bool cgroup_task_freeze(struct task_struct *task)
894{
895 bool ret;
896
897 if (task->flags & PF_KTHREAD)
898 return false;
899
900 rcu_read_lock();
901 ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags);
902 rcu_read_unlock();
903
904 return ret;
905}
906
907static inline bool cgroup_task_frozen(struct task_struct *task)
908{
909 return task->frozen;
910}
911
912#else /* !CONFIG_CGROUPS */
913
914static inline void cgroup_enter_frozen(void) { }
915static inline void cgroup_leave_frozen(bool always_leave) { }
916static inline bool cgroup_task_freeze(struct task_struct *task)
917{
918 return false;
919}
920static inline bool cgroup_task_frozen(struct task_struct *task)
921{
922 return false;
923}
924
925#endif /* !CONFIG_CGROUPS */
926
884#endif /* _LINUX_CGROUP_H */ 927#endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 50606a6e73d6..a2cd15855bad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -726,6 +726,8 @@ struct task_struct {
726#ifdef CONFIG_CGROUPS 726#ifdef CONFIG_CGROUPS
727 /* disallow userland-initiated cgroup migration */ 727 /* disallow userland-initiated cgroup migration */
728 unsigned no_cgroup_migration:1; 728 unsigned no_cgroup_migration:1;
729 /* task is frozen/stopped (used by the cgroup freezer) */
730 unsigned frozen:1;
729#endif 731#endif
730#ifdef CONFIG_BLK_CGROUP 732#ifdef CONFIG_BLK_CGROUP
731 /* to be used once the psi infrastructure lands upstream. */ 733 /* to be used once the psi infrastructure lands upstream. */
diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
index 98228bd48aee..fa067de9f1a9 100644
--- a/include/linux/sched/jobctl.h
+++ b/include/linux/sched/jobctl.h
@@ -18,6 +18,7 @@ struct task_struct;
18#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ 18#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */
19#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ 19#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */
20#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ 20#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */
21#define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */
21 22
22#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) 23#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT)
23#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) 24#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT)
@@ -26,6 +27,7 @@ struct task_struct;
26#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) 27#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT)
27#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) 28#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT)
28#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) 29#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT)
30#define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT)
29 31
30#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) 32#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
31#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) 33#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index a401ff5e7847..a566cc521476 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -103,6 +103,20 @@ DEFINE_EVENT(cgroup, cgroup_rename,
103 TP_ARGS(cgrp, path) 103 TP_ARGS(cgrp, path)
104); 104);
105 105
106DEFINE_EVENT(cgroup, cgroup_freeze,
107
108 TP_PROTO(struct cgroup *cgrp, const char *path),
109
110 TP_ARGS(cgrp, path)
111);
112
113DEFINE_EVENT(cgroup, cgroup_unfreeze,
114
115 TP_PROTO(struct cgroup *cgrp, const char *path),
116
117 TP_ARGS(cgrp, path)
118);
119
106DECLARE_EVENT_CLASS(cgroup_migrate, 120DECLARE_EVENT_CLASS(cgroup_migrate,
107 121
108 TP_PROTO(struct cgroup *dst_cgrp, const char *path, 122 TP_PROTO(struct cgroup *dst_cgrp, const char *path,
@@ -149,6 +163,47 @@ DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
149 TP_ARGS(dst_cgrp, path, task, threadgroup) 163 TP_ARGS(dst_cgrp, path, task, threadgroup)
150); 164);
151 165
166DECLARE_EVENT_CLASS(cgroup_event,
167
168 TP_PROTO(struct cgroup *cgrp, const char *path, int val),
169
170 TP_ARGS(cgrp, path, val),
171
172 TP_STRUCT__entry(
173 __field( int, root )
174 __field( int, id )
175 __field( int, level )
176 __string( path, path )
177 __field( int, val )
178 ),
179
180 TP_fast_assign(
181 __entry->root = cgrp->root->hierarchy_id;
182 __entry->id = cgrp->id;
183 __entry->level = cgrp->level;
184 __assign_str(path, path);
185 __entry->val = val;
186 ),
187
188 TP_printk("root=%d id=%d level=%d path=%s val=%d",
189 __entry->root, __entry->id, __entry->level, __get_str(path),
190 __entry->val)
191);
192
193DEFINE_EVENT(cgroup_event, cgroup_notify_populated,
194
195 TP_PROTO(struct cgroup *cgrp, const char *path, int val),
196
197 TP_ARGS(cgrp, path, val)
198);
199
200DEFINE_EVENT(cgroup_event, cgroup_notify_frozen,
201
202 TP_PROTO(struct cgroup *cgrp, const char *path, int val),
203
204 TP_ARGS(cgrp, path, val)
205);
206
152#endif /* _TRACE_CGROUP_H */ 207#endif /* _TRACE_CGROUP_H */
153 208
154/* This part must be outside protection */ 209/* This part must be outside protection */
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index bfcdae896122..5d7a76bfbbb7 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -1,7 +1,7 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o 2obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o freezer.o
3 3
4obj-$(CONFIG_CGROUP_FREEZER) += freezer.o 4obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o
5obj-$(CONFIG_CGROUP_PIDS) += pids.o 5obj-$(CONFIG_CGROUP_PIDS) += pids.o
6obj-$(CONFIG_CGROUP_RDMA) += rdma.o 6obj-$(CONFIG_CGROUP_RDMA) += rdma.o
7obj-$(CONFIG_CPUSETS) += cpuset.o 7obj-$(CONFIG_CPUSETS) += cpuset.o
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 30e39f3932ad..809e34a3c017 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -28,12 +28,15 @@ extern void __init enable_debug_cgroup(void);
28#define TRACE_CGROUP_PATH(type, cgrp, ...) \ 28#define TRACE_CGROUP_PATH(type, cgrp, ...) \
29 do { \ 29 do { \
30 if (trace_cgroup_##type##_enabled()) { \ 30 if (trace_cgroup_##type##_enabled()) { \
31 spin_lock(&trace_cgroup_path_lock); \ 31 unsigned long flags; \
32 spin_lock_irqsave(&trace_cgroup_path_lock, \
33 flags); \
32 cgroup_path(cgrp, trace_cgroup_path, \ 34 cgroup_path(cgrp, trace_cgroup_path, \
33 TRACE_CGROUP_PATH_LEN); \ 35 TRACE_CGROUP_PATH_LEN); \
34 trace_cgroup_##type(cgrp, trace_cgroup_path, \ 36 trace_cgroup_##type(cgrp, trace_cgroup_path, \
35 ##__VA_ARGS__); \ 37 ##__VA_ARGS__); \
36 spin_unlock(&trace_cgroup_path_lock); \ 38 spin_unlock_irqrestore(&trace_cgroup_path_lock, \
39 flags); \
37 } \ 40 } \
38 } while (0) 41 } while (0)
39 42
@@ -240,6 +243,7 @@ int cgroup_rmdir(struct kernfs_node *kn);
240int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, 243int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
241 struct kernfs_root *kf_root); 244 struct kernfs_root *kf_root);
242 245
246int __cgroup_task_count(const struct cgroup *cgrp);
243int cgroup_task_count(const struct cgroup *cgrp); 247int cgroup_task_count(const struct cgroup *cgrp);
244 248
245/* 249/*
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index c126b34fd4ff..68ca5de7ec27 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -342,22 +342,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
342 return l; 342 return l;
343} 343}
344 344
345/**
346 * cgroup_task_count - count the number of tasks in a cgroup.
347 * @cgrp: the cgroup in question
348 */
349int cgroup_task_count(const struct cgroup *cgrp)
350{
351 int count = 0;
352 struct cgrp_cset_link *link;
353
354 spin_lock_irq(&css_set_lock);
355 list_for_each_entry(link, &cgrp->cset_links, cset_link)
356 count += link->cset->nr_tasks;
357 spin_unlock_irq(&css_set_lock);
358 return count;
359}
360
361/* 345/*
362 * Load a cgroup's pidarray with either procs' tgids or tasks' pids 346 * Load a cgroup's pidarray with either procs' tgids or tasks' pids
363 */ 347 */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 3f2b4bde0f9c..327f37c9fdfa 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -593,6 +593,39 @@ static void cgroup_get_live(struct cgroup *cgrp)
593 css_get(&cgrp->self); 593 css_get(&cgrp->self);
594} 594}
595 595
596/**
597 * __cgroup_task_count - count the number of tasks in a cgroup. The caller
598 * is responsible for taking the css_set_lock.
599 * @cgrp: the cgroup in question
600 */
601int __cgroup_task_count(const struct cgroup *cgrp)
602{
603 int count = 0;
604 struct cgrp_cset_link *link;
605
606 lockdep_assert_held(&css_set_lock);
607
608 list_for_each_entry(link, &cgrp->cset_links, cset_link)
609 count += link->cset->nr_tasks;
610
611 return count;
612}
613
614/**
615 * cgroup_task_count - count the number of tasks in a cgroup.
616 * @cgrp: the cgroup in question
617 */
618int cgroup_task_count(const struct cgroup *cgrp)
619{
620 int count;
621
622 spin_lock_irq(&css_set_lock);
623 count = __cgroup_task_count(cgrp);
624 spin_unlock_irq(&css_set_lock);
625
626 return count;
627}
628
596struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) 629struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
597{ 630{
598 struct cgroup *cgrp = of->kn->parent->priv; 631 struct cgroup *cgrp = of->kn->parent->priv;
@@ -783,6 +816,8 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
783 break; 816 break;
784 817
785 cgroup1_check_for_release(cgrp); 818 cgroup1_check_for_release(cgrp);
819 TRACE_CGROUP_PATH(notify_populated, cgrp,
820 cgroup_is_populated(cgrp));
786 cgroup_file_notify(&cgrp->events_file); 821 cgroup_file_notify(&cgrp->events_file);
787 822
788 child = cgrp; 823 child = cgrp;
@@ -2402,8 +2437,15 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
2402 get_css_set(to_cset); 2437 get_css_set(to_cset);
2403 to_cset->nr_tasks++; 2438 to_cset->nr_tasks++;
2404 css_set_move_task(task, from_cset, to_cset, true); 2439 css_set_move_task(task, from_cset, to_cset, true);
2405 put_css_set_locked(from_cset);
2406 from_cset->nr_tasks--; 2440 from_cset->nr_tasks--;
2441 /*
2442 * If the source or destination cgroup is frozen,
2443 * the task might require to change its state.
2444 */
2445 cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
2446 to_cset->dfl_cgrp);
2447 put_css_set_locked(from_cset);
2448
2407 } 2449 }
2408 } 2450 }
2409 spin_unlock_irq(&css_set_lock); 2451 spin_unlock_irq(&css_set_lock);
@@ -2602,7 +2644,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
2602 2644
2603 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp); 2645 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
2604 if (!dst_cset) 2646 if (!dst_cset)
2605 goto err; 2647 return -ENOMEM;
2606 2648
2607 WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset); 2649 WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
2608 2650
@@ -2634,9 +2676,6 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
2634 } 2676 }
2635 2677
2636 return 0; 2678 return 0;
2637err:
2638 cgroup_migrate_finish(mgctx);
2639 return -ENOMEM;
2640} 2679}
2641 2680
2642/** 2681/**
@@ -3447,8 +3486,11 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
3447 3486
3448static int cgroup_events_show(struct seq_file *seq, void *v) 3487static int cgroup_events_show(struct seq_file *seq, void *v)
3449{ 3488{
3450 seq_printf(seq, "populated %d\n", 3489 struct cgroup *cgrp = seq_css(seq)->cgroup;
3451 cgroup_is_populated(seq_css(seq)->cgroup)); 3490
3491 seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
3492 seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
3493
3452 return 0; 3494 return 0;
3453} 3495}
3454 3496
@@ -3510,6 +3552,40 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
3510} 3552}
3511#endif 3553#endif
3512 3554
3555static int cgroup_freeze_show(struct seq_file *seq, void *v)
3556{
3557 struct cgroup *cgrp = seq_css(seq)->cgroup;
3558
3559 seq_printf(seq, "%d\n", cgrp->freezer.freeze);
3560
3561 return 0;
3562}
3563
3564static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
3565 char *buf, size_t nbytes, loff_t off)
3566{
3567 struct cgroup *cgrp;
3568 ssize_t ret;
3569 int freeze;
3570
3571 ret = kstrtoint(strstrip(buf), 0, &freeze);
3572 if (ret)
3573 return ret;
3574
3575 if (freeze < 0 || freeze > 1)
3576 return -ERANGE;
3577
3578 cgrp = cgroup_kn_lock_live(of->kn, false);
3579 if (!cgrp)
3580 return -ENOENT;
3581
3582 cgroup_freeze(cgrp, freeze);
3583
3584 cgroup_kn_unlock(of->kn);
3585
3586 return nbytes;
3587}
3588
3513static int cgroup_file_open(struct kernfs_open_file *of) 3589static int cgroup_file_open(struct kernfs_open_file *of)
3514{ 3590{
3515 struct cftype *cft = of->kn->priv; 3591 struct cftype *cft = of->kn->priv;
@@ -4654,6 +4730,12 @@ static struct cftype cgroup_base_files[] = {
4654 .seq_show = cgroup_stat_show, 4730 .seq_show = cgroup_stat_show,
4655 }, 4731 },
4656 { 4732 {
4733 .name = "cgroup.freeze",
4734 .flags = CFTYPE_NOT_ON_ROOT,
4735 .seq_show = cgroup_freeze_show,
4736 .write = cgroup_freeze_write,
4737 },
4738 {
4657 .name = "cpu.stat", 4739 .name = "cpu.stat",
4658 .flags = CFTYPE_NOT_ON_ROOT, 4740 .flags = CFTYPE_NOT_ON_ROOT,
4659 .seq_show = cpu_stat_show, 4741 .seq_show = cpu_stat_show,
@@ -4781,9 +4863,11 @@ static void css_release_work_fn(struct work_struct *work)
4781 if (cgroup_on_dfl(cgrp)) 4863 if (cgroup_on_dfl(cgrp))
4782 cgroup_rstat_flush(cgrp); 4864 cgroup_rstat_flush(cgrp);
4783 4865
4866 spin_lock_irq(&css_set_lock);
4784 for (tcgrp = cgroup_parent(cgrp); tcgrp; 4867 for (tcgrp = cgroup_parent(cgrp); tcgrp;
4785 tcgrp = cgroup_parent(tcgrp)) 4868 tcgrp = cgroup_parent(tcgrp))
4786 tcgrp->nr_dying_descendants--; 4869 tcgrp->nr_dying_descendants--;
4870 spin_unlock_irq(&css_set_lock);
4787 4871
4788 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); 4872 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
4789 cgrp->id = -1; 4873 cgrp->id = -1;
@@ -5001,12 +5085,31 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
5001 if (ret) 5085 if (ret)
5002 goto out_psi_free; 5086 goto out_psi_free;
5003 5087
5088 /*
5089 * New cgroup inherits effective freeze counter, and
5090 * if the parent has to be frozen, the child has too.
5091 */
5092 cgrp->freezer.e_freeze = parent->freezer.e_freeze;
5093 if (cgrp->freezer.e_freeze)
5094 set_bit(CGRP_FROZEN, &cgrp->flags);
5095
5096 spin_lock_irq(&css_set_lock);
5004 for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { 5097 for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5005 cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; 5098 cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
5006 5099
5007 if (tcgrp != cgrp) 5100 if (tcgrp != cgrp) {
5008 tcgrp->nr_descendants++; 5101 tcgrp->nr_descendants++;
5102
5103 /*
5104 * If the new cgroup is frozen, all ancestor cgroups
5105 * get a new frozen descendant, but their state can't
5106 * change because of this.
5107 */
5108 if (cgrp->freezer.e_freeze)
5109 tcgrp->freezer.nr_frozen_descendants++;
5110 }
5009 } 5111 }
5112 spin_unlock_irq(&css_set_lock);
5010 5113
5011 if (notify_on_release(parent)) 5114 if (notify_on_release(parent))
5012 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); 5115 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -5291,10 +5394,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
5291 if (parent && cgroup_is_threaded(cgrp)) 5394 if (parent && cgroup_is_threaded(cgrp))
5292 parent->nr_threaded_children--; 5395 parent->nr_threaded_children--;
5293 5396
5397 spin_lock_irq(&css_set_lock);
5294 for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) { 5398 for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5295 tcgrp->nr_descendants--; 5399 tcgrp->nr_descendants--;
5296 tcgrp->nr_dying_descendants++; 5400 tcgrp->nr_dying_descendants++;
5401 /*
5402 * If the dying cgroup is frozen, decrease frozen descendants
5403 * counters of ancestor cgroups.
5404 */
5405 if (test_bit(CGRP_FROZEN, &cgrp->flags))
5406 tcgrp->freezer.nr_frozen_descendants--;
5297 } 5407 }
5408 spin_unlock_irq(&css_set_lock);
5298 5409
5299 cgroup1_check_for_release(parent); 5410 cgroup1_check_for_release(parent);
5300 5411
@@ -5746,6 +5857,26 @@ void cgroup_post_fork(struct task_struct *child)
5746 cset->nr_tasks++; 5857 cset->nr_tasks++;
5747 css_set_move_task(child, NULL, cset, false); 5858 css_set_move_task(child, NULL, cset, false);
5748 } 5859 }
5860
5861 /*
5862 * If the cgroup has to be frozen, the new task has too.
5863 * Let's set the JOBCTL_TRAP_FREEZE jobctl bit to get
5864 * the task into the frozen state.
5865 */
5866 if (unlikely(cgroup_task_freeze(child))) {
5867 spin_lock(&child->sighand->siglock);
5868 WARN_ON_ONCE(child->frozen);
5869 child->jobctl |= JOBCTL_TRAP_FREEZE;
5870 spin_unlock(&child->sighand->siglock);
5871
5872 /*
5873 * Calling cgroup_update_frozen() isn't required here,
5874 * because it will be called anyway a bit later
5875 * from do_freezer_trap(). So we avoid cgroup's
5876 * transient switch from the frozen state and back.
5877 */
5878 }
5879
5749 spin_unlock_irq(&css_set_lock); 5880 spin_unlock_irq(&css_set_lock);
5750 } 5881 }
5751 5882
@@ -5794,6 +5925,11 @@ void cgroup_exit(struct task_struct *tsk)
5794 spin_lock_irq(&css_set_lock); 5925 spin_lock_irq(&css_set_lock);
5795 css_set_move_task(tsk, cset, NULL, false); 5926 css_set_move_task(tsk, cset, NULL, false);
5796 cset->nr_tasks--; 5927 cset->nr_tasks--;
5928
5929 WARN_ON_ONCE(cgroup_task_frozen(tsk));
5930 if (unlikely(cgroup_task_freeze(tsk)))
5931 cgroup_update_frozen(task_dfl_cgroup(tsk));
5932
5797 spin_unlock_irq(&css_set_lock); 5933 spin_unlock_irq(&css_set_lock);
5798 } else { 5934 } else {
5799 get_css_set(cset); 5935 get_css_set(cset);
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c
index 5f1b87330bee..80aa3f027ac3 100644
--- a/kernel/cgroup/debug.c
+++ b/kernel/cgroup/debug.c
@@ -64,8 +64,8 @@ static int current_css_set_read(struct seq_file *seq, void *v)
64 css = cset->subsys[ss->id]; 64 css = cset->subsys[ss->id];
65 if (!css) 65 if (!css)
66 continue; 66 continue;
67 seq_printf(seq, "%2d: %-4s\t- %lx[%d]\n", ss->id, ss->name, 67 seq_printf(seq, "%2d: %-4s\t- %p[%d]\n", ss->id, ss->name,
68 (unsigned long)css, css->id); 68 css, css->id);
69 } 69 }
70 rcu_read_unlock(); 70 rcu_read_unlock();
71 spin_unlock_irq(&css_set_lock); 71 spin_unlock_irq(&css_set_lock);
@@ -224,8 +224,8 @@ static int cgroup_subsys_states_read(struct seq_file *seq, void *v)
224 if (css->parent) 224 if (css->parent)
225 snprintf(pbuf, sizeof(pbuf) - 1, " P=%d", 225 snprintf(pbuf, sizeof(pbuf) - 1, " P=%d",
226 css->parent->id); 226 css->parent->id);
227 seq_printf(seq, "%2d: %-4s\t- %lx[%d] %d%s\n", ss->id, ss->name, 227 seq_printf(seq, "%2d: %-4s\t- %p[%d] %d%s\n", ss->id, ss->name,
228 (unsigned long)css, css->id, 228 css, css->id,
229 atomic_read(&css->online_cnt), pbuf); 229 atomic_read(&css->online_cnt), pbuf);
230 } 230 }
231 231
diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c
index 08236798d173..8cf010680678 100644
--- a/kernel/cgroup/freezer.c
+++ b/kernel/cgroup/freezer.c
@@ -1,481 +1,314 @@
1/* 1//SPDX-License-Identifier: GPL-2.0
2 * cgroup_freezer.c - control group freezer subsystem
3 *
4 * Copyright IBM Corporation, 2007
5 *
6 * Author : Cedric Le Goater <clg@fr.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of version 2.1 of the GNU Lesser General Public License
10 * as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it would be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
15 */
16
17#include <linux/export.h>
18#include <linux/slab.h>
19#include <linux/cgroup.h> 2#include <linux/cgroup.h>
20#include <linux/fs.h> 3#include <linux/sched.h>
21#include <linux/uaccess.h> 4#include <linux/sched/task.h>
22#include <linux/freezer.h> 5#include <linux/sched/signal.h>
23#include <linux/seq_file.h>
24#include <linux/mutex.h>
25
26/*
27 * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
28 * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
29 * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING
30 * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of
31 * its ancestors has FREEZING_SELF set.
32 */
33enum freezer_state_flags {
34 CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */
35 CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */
36 CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */
37 CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */
38 6
39 /* mask for all FREEZING flags */ 7#include "cgroup-internal.h"
40 CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
41};
42 8
43struct freezer { 9#include <trace/events/cgroup.h>
44 struct cgroup_subsys_state css;
45 unsigned int state;
46};
47 10
48static DEFINE_MUTEX(freezer_mutex); 11/*
49 12 * Propagate the cgroup frozen state upwards by the cgroup tree.
50static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) 13 */
14static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
51{ 15{
52 return css ? container_of(css, struct freezer, css) : NULL; 16 int desc = 1;
53}
54 17
55static inline struct freezer *task_freezer(struct task_struct *task) 18 /*
56{ 19 * If the new state is frozen, some freezing ancestor cgroups may change
57 return css_freezer(task_css(task, freezer_cgrp_id)); 20 * their state too, depending on if all their descendants are frozen.
21 *
22 * Otherwise, all ancestor cgroups are forced into the non-frozen state.
23 */
24 while ((cgrp = cgroup_parent(cgrp))) {
25 if (frozen) {
26 cgrp->freezer.nr_frozen_descendants += desc;
27 if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
28 test_bit(CGRP_FREEZE, &cgrp->flags) &&
29 cgrp->freezer.nr_frozen_descendants ==
30 cgrp->nr_descendants) {
31 set_bit(CGRP_FROZEN, &cgrp->flags);
32 cgroup_file_notify(&cgrp->events_file);
33 TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
34 desc++;
35 }
36 } else {
37 cgrp->freezer.nr_frozen_descendants -= desc;
38 if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
39 clear_bit(CGRP_FROZEN, &cgrp->flags);
40 cgroup_file_notify(&cgrp->events_file);
41 TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
42 desc++;
43 }
44 }
45 }
58} 46}
59 47
60static struct freezer *parent_freezer(struct freezer *freezer) 48/*
49 * Revisit the cgroup frozen state.
50 * Checks if the cgroup is really frozen and perform all state transitions.
51 */
52void cgroup_update_frozen(struct cgroup *cgrp)
61{ 53{
62 return css_freezer(freezer->css.parent); 54 bool frozen;
63}
64 55
65bool cgroup_freezing(struct task_struct *task) 56 lockdep_assert_held(&css_set_lock);
66{
67 bool ret;
68 57
69 rcu_read_lock(); 58 /*
70 ret = task_freezer(task)->state & CGROUP_FREEZING; 59 * If the cgroup has to be frozen (CGRP_FREEZE bit set),
71 rcu_read_unlock(); 60 * and all tasks are frozen and/or stopped, let's consider
61 * the cgroup frozen. Otherwise it's not frozen.
62 */
63 frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
64 cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
72 65
73 return ret; 66 if (frozen) {
74} 67 /* Already there? */
68 if (test_bit(CGRP_FROZEN, &cgrp->flags))
69 return;
75 70
76static const char *freezer_state_strs(unsigned int state) 71 set_bit(CGRP_FROZEN, &cgrp->flags);
77{ 72 } else {
78 if (state & CGROUP_FROZEN) 73 /* Already there? */
79 return "FROZEN"; 74 if (!test_bit(CGRP_FROZEN, &cgrp->flags))
80 if (state & CGROUP_FREEZING) 75 return;
81 return "FREEZING";
82 return "THAWED";
83};
84
85static struct cgroup_subsys_state *
86freezer_css_alloc(struct cgroup_subsys_state *parent_css)
87{
88 struct freezer *freezer;
89 76
90 freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL); 77 clear_bit(CGRP_FROZEN, &cgrp->flags);
91 if (!freezer) 78 }
92 return ERR_PTR(-ENOMEM); 79 cgroup_file_notify(&cgrp->events_file);
80 TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
93 81
94 return &freezer->css; 82 /* Update the state of ancestor cgroups. */
83 cgroup_propagate_frozen(cgrp, frozen);
95} 84}
96 85
97/** 86/*
98 * freezer_css_online - commit creation of a freezer css 87 * Increment cgroup's nr_frozen_tasks.
99 * @css: css being created
100 *
101 * We're committing to creation of @css. Mark it online and inherit
102 * parent's freezing state while holding both parent's and our
103 * freezer->lock.
104 */ 88 */
105static int freezer_css_online(struct cgroup_subsys_state *css) 89static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
106{ 90{
107 struct freezer *freezer = css_freezer(css); 91 cgrp->freezer.nr_frozen_tasks++;
108 struct freezer *parent = parent_freezer(freezer);
109
110 mutex_lock(&freezer_mutex);
111
112 freezer->state |= CGROUP_FREEZER_ONLINE;
113
114 if (parent && (parent->state & CGROUP_FREEZING)) {
115 freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
116 atomic_inc(&system_freezing_cnt);
117 }
118
119 mutex_unlock(&freezer_mutex);
120 return 0;
121} 92}
122 93
123/** 94/*
124 * freezer_css_offline - initiate destruction of a freezer css 95 * Decrement cgroup's nr_frozen_tasks.
125 * @css: css being destroyed
126 *
127 * @css is going away. Mark it dead and decrement system_freezing_count if
128 * it was holding one.
129 */ 96 */
130static void freezer_css_offline(struct cgroup_subsys_state *css) 97static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
131{ 98{
132 struct freezer *freezer = css_freezer(css); 99 cgrp->freezer.nr_frozen_tasks--;
133 100 WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
134 mutex_lock(&freezer_mutex);
135
136 if (freezer->state & CGROUP_FREEZING)
137 atomic_dec(&system_freezing_cnt);
138
139 freezer->state = 0;
140
141 mutex_unlock(&freezer_mutex);
142} 101}
143 102
144static void freezer_css_free(struct cgroup_subsys_state *css) 103/*
104 * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
105 * and revisit the state of the cgroup, if necessary.
106 */
107void cgroup_enter_frozen(void)
145{ 108{
146 kfree(css_freezer(css)); 109 struct cgroup *cgrp;
110
111 if (current->frozen)
112 return;
113
114 spin_lock_irq(&css_set_lock);
115 current->frozen = true;
116 cgrp = task_dfl_cgroup(current);
117 cgroup_inc_frozen_cnt(cgrp);
118 cgroup_update_frozen(cgrp);
119 spin_unlock_irq(&css_set_lock);
147} 120}
148 121
149/* 122/*
150 * Tasks can be migrated into a different freezer anytime regardless of its 123 * Conditionally leave frozen/stopped state. Update cgroup's counters,
151 * current state. freezer_attach() is responsible for making new tasks 124 * and revisit the state of the cgroup, if necessary.
152 * conform to the current state.
153 * 125 *
154 * Freezer state changes and task migration are synchronized via 126 * If always_leave is not set, and the cgroup is freezing,
155 * @freezer->lock. freezer_attach() makes the new tasks conform to the 127 * we're racing with the cgroup freezing. In this case, we don't
156 * current state and all following state changes can see the new tasks. 128 * drop the frozen counter to avoid a transient switch to
129 * the unfrozen state.
157 */ 130 */
158static void freezer_attach(struct cgroup_taskset *tset) 131void cgroup_leave_frozen(bool always_leave)
159{ 132{
160 struct task_struct *task; 133 struct cgroup *cgrp;
161 struct cgroup_subsys_state *new_css; 134
162 135 spin_lock_irq(&css_set_lock);
163 mutex_lock(&freezer_mutex); 136 cgrp = task_dfl_cgroup(current);
164 137 if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
165 /* 138 cgroup_dec_frozen_cnt(cgrp);
166 * Make the new tasks conform to the current state of @new_css. 139 cgroup_update_frozen(cgrp);
167 * For simplicity, when migrating any task to a FROZEN cgroup, we 140 WARN_ON_ONCE(!current->frozen);
168 * revert it to FREEZING and let update_if_frozen() determine the 141 current->frozen = false;
169 * correct state later. 142 } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
170 * 143 spin_lock(&current->sighand->siglock);
171 * Tasks in @tset are on @new_css but may not conform to its 144 current->jobctl |= JOBCTL_TRAP_FREEZE;
172 * current state before executing the following - !frozen tasks may 145 set_thread_flag(TIF_SIGPENDING);
173 * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. 146 spin_unlock(&current->sighand->siglock);
174 */
175 cgroup_taskset_for_each(task, new_css, tset) {
176 struct freezer *freezer = css_freezer(new_css);
177
178 if (!(freezer->state & CGROUP_FREEZING)) {
179 __thaw_task(task);
180 } else {
181 freeze_task(task);
182 /* clear FROZEN and propagate upwards */
183 while (freezer && (freezer->state & CGROUP_FROZEN)) {
184 freezer->state &= ~CGROUP_FROZEN;
185 freezer = parent_freezer(freezer);
186 }
187 }
188 } 147 }
189 148 spin_unlock_irq(&css_set_lock);
190 mutex_unlock(&freezer_mutex);
191} 149}
192 150
193/** 151/*
194 * freezer_fork - cgroup post fork callback 152 * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
195 * @task: a task which has just been forked 153 * jobctl bit.
196 *
197 * @task has just been created and should conform to the current state of
198 * the cgroup_freezer it belongs to. This function may race against
199 * freezer_attach(). Losing to freezer_attach() means that we don't have
200 * to do anything as freezer_attach() will put @task into the appropriate
201 * state.
202 */ 154 */
203static void freezer_fork(struct task_struct *task) 155static void cgroup_freeze_task(struct task_struct *task, bool freeze)
204{ 156{
205 struct freezer *freezer; 157 unsigned long flags;
206 158
207 /* 159 /* If the task is about to die, don't bother with freezing it. */
208 * The root cgroup is non-freezable, so we can skip locking the 160 if (!lock_task_sighand(task, &flags))
209 * freezer. This is safe regardless of race with task migration.
210 * If we didn't race or won, skipping is obviously the right thing
211 * to do. If we lost and root is the new cgroup, noop is still the
212 * right thing to do.
213 */
214 if (task_css_is_root(task, freezer_cgrp_id))
215 return; 161 return;
216 162
217 mutex_lock(&freezer_mutex); 163 if (freeze) {
218 rcu_read_lock(); 164 task->jobctl |= JOBCTL_TRAP_FREEZE;
219 165 signal_wake_up(task, false);
220 freezer = task_freezer(task); 166 } else {
221 if (freezer->state & CGROUP_FREEZING) 167 task->jobctl &= ~JOBCTL_TRAP_FREEZE;
222 freeze_task(task); 168 wake_up_process(task);
169 }
223 170
224 rcu_read_unlock(); 171 unlock_task_sighand(task, &flags);
225 mutex_unlock(&freezer_mutex);
226} 172}
227 173
228/** 174/*
229 * update_if_frozen - update whether a cgroup finished freezing 175 * Freeze or unfreeze all tasks in the given cgroup.
230 * @css: css of interest
231 *
232 * Once FREEZING is initiated, transition to FROZEN is lazily updated by
233 * calling this function. If the current state is FREEZING but not FROZEN,
234 * this function checks whether all tasks of this cgroup and the descendant
235 * cgroups finished freezing and, if so, sets FROZEN.
236 *
237 * The caller is responsible for grabbing RCU read lock and calling
238 * update_if_frozen() on all descendants prior to invoking this function.
239 *
240 * Task states and freezer state might disagree while tasks are being
241 * migrated into or out of @css, so we can't verify task states against
242 * @freezer state here. See freezer_attach() for details.
243 */ 176 */
244static void update_if_frozen(struct cgroup_subsys_state *css) 177static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
245{ 178{
246 struct freezer *freezer = css_freezer(css);
247 struct cgroup_subsys_state *pos;
248 struct css_task_iter it; 179 struct css_task_iter it;
249 struct task_struct *task; 180 struct task_struct *task;
250 181
251 lockdep_assert_held(&freezer_mutex); 182 lockdep_assert_held(&cgroup_mutex);
252
253 if (!(freezer->state & CGROUP_FREEZING) ||
254 (freezer->state & CGROUP_FROZEN))
255 return;
256 183
257 /* are all (live) children frozen? */ 184 spin_lock_irq(&css_set_lock);
258 rcu_read_lock(); 185 if (freeze)
259 css_for_each_child(pos, css) { 186 set_bit(CGRP_FREEZE, &cgrp->flags);
260 struct freezer *child = css_freezer(pos); 187 else
261 188 clear_bit(CGRP_FREEZE, &cgrp->flags);
262 if ((child->state & CGROUP_FREEZER_ONLINE) && 189 spin_unlock_irq(&css_set_lock);
263 !(child->state & CGROUP_FROZEN)) {
264 rcu_read_unlock();
265 return;
266 }
267 }
268 rcu_read_unlock();
269 190
270 /* are all tasks frozen? */ 191 if (freeze)
271 css_task_iter_start(css, 0, &it); 192 TRACE_CGROUP_PATH(freeze, cgrp);
193 else
194 TRACE_CGROUP_PATH(unfreeze, cgrp);
272 195
196 css_task_iter_start(&cgrp->self, 0, &it);
273 while ((task = css_task_iter_next(&it))) { 197 while ((task = css_task_iter_next(&it))) {
274 if (freezing(task)) { 198 /*
275 /* 199 * Ignore kernel threads here. Freezing cgroups containing
276 * freezer_should_skip() indicates that the task 200 * kthreads isn't supported.
277 * should be skipped when determining freezing 201 */
278 * completion. Consider it frozen in addition to 202 if (task->flags & PF_KTHREAD)
279 * the usual frozen condition.
280 */
281 if (!frozen(task) && !freezer_should_skip(task))
282 goto out_iter_end;
283 }
284 }
285
286 freezer->state |= CGROUP_FROZEN;
287out_iter_end:
288 css_task_iter_end(&it);
289}
290
291static int freezer_read(struct seq_file *m, void *v)
292{
293 struct cgroup_subsys_state *css = seq_css(m), *pos;
294
295 mutex_lock(&freezer_mutex);
296 rcu_read_lock();
297
298 /* update states bottom-up */
299 css_for_each_descendant_post(pos, css) {
300 if (!css_tryget_online(pos))
301 continue; 203 continue;
302 rcu_read_unlock(); 204 cgroup_freeze_task(task, freeze);
303
304 update_if_frozen(pos);
305
306 rcu_read_lock();
307 css_put(pos);
308 } 205 }
309
310 rcu_read_unlock();
311 mutex_unlock(&freezer_mutex);
312
313 seq_puts(m, freezer_state_strs(css_freezer(css)->state));
314 seq_putc(m, '\n');
315 return 0;
316}
317
318static void freeze_cgroup(struct freezer *freezer)
319{
320 struct css_task_iter it;
321 struct task_struct *task;
322
323 css_task_iter_start(&freezer->css, 0, &it);
324 while ((task = css_task_iter_next(&it)))
325 freeze_task(task);
326 css_task_iter_end(&it); 206 css_task_iter_end(&it);
327}
328 207
329static void unfreeze_cgroup(struct freezer *freezer) 208 /*
330{ 209 * Cgroup state should be revisited here to cover empty leaf cgroups
331 struct css_task_iter it; 210 * and cgroups which descendants are already in the desired state.
332 struct task_struct *task; 211 */
333 212 spin_lock_irq(&css_set_lock);
334 css_task_iter_start(&freezer->css, 0, &it); 213 if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
335 while ((task = css_task_iter_next(&it))) 214 cgroup_update_frozen(cgrp);
336 __thaw_task(task); 215 spin_unlock_irq(&css_set_lock);
337 css_task_iter_end(&it);
338} 216}
339 217
340/** 218/*
341 * freezer_apply_state - apply state change to a single cgroup_freezer 219 * Adjust the task state (freeze or unfreeze) and revisit the state of
342 * @freezer: freezer to apply state change to 220 * source and destination cgroups.
343 * @freeze: whether to freeze or unfreeze
344 * @state: CGROUP_FREEZING_* flag to set or clear
345 *
346 * Set or clear @state on @cgroup according to @freeze, and perform
347 * freezing or thawing as necessary.
348 */ 221 */
349static void freezer_apply_state(struct freezer *freezer, bool freeze, 222void cgroup_freezer_migrate_task(struct task_struct *task,
350 unsigned int state) 223 struct cgroup *src, struct cgroup *dst)
351{ 224{
352 /* also synchronizes against task migration, see freezer_attach() */ 225 lockdep_assert_held(&css_set_lock);
353 lockdep_assert_held(&freezer_mutex);
354 226
355 if (!(freezer->state & CGROUP_FREEZER_ONLINE)) 227 /*
228 * Kernel threads are not supposed to be frozen at all.
229 */
230 if (task->flags & PF_KTHREAD)
356 return; 231 return;
357 232
358 if (freeze) { 233 /*
359 if (!(freezer->state & CGROUP_FREEZING)) 234 * Adjust counters of freezing and frozen tasks.
360 atomic_inc(&system_freezing_cnt); 235 * Note, that if the task is frozen, but the destination cgroup is not
361 freezer->state |= state; 236 * frozen, we bump both counters to keep them balanced.
362 freeze_cgroup(freezer); 237 */
363 } else { 238 if (task->frozen) {
364 bool was_freezing = freezer->state & CGROUP_FREEZING; 239 cgroup_inc_frozen_cnt(dst);
365 240 cgroup_dec_frozen_cnt(src);
366 freezer->state &= ~state;
367
368 if (!(freezer->state & CGROUP_FREEZING)) {
369 if (was_freezing)
370 atomic_dec(&system_freezing_cnt);
371 freezer->state &= ~CGROUP_FROZEN;
372 unfreeze_cgroup(freezer);
373 }
374 } 241 }
375} 242 cgroup_update_frozen(dst);
376 243 cgroup_update_frozen(src);
377/**
378 * freezer_change_state - change the freezing state of a cgroup_freezer
379 * @freezer: freezer of interest
380 * @freeze: whether to freeze or thaw
381 *
382 * Freeze or thaw @freezer according to @freeze. The operations are
383 * recursive - all descendants of @freezer will be affected.
384 */
385static void freezer_change_state(struct freezer *freezer, bool freeze)
386{
387 struct cgroup_subsys_state *pos;
388 244
389 /* 245 /*
390 * Update all its descendants in pre-order traversal. Each 246 * Force the task to the desired state.
391 * descendant will try to inherit its parent's FREEZING state as
392 * CGROUP_FREEZING_PARENT.
393 */ 247 */
394 mutex_lock(&freezer_mutex); 248 cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
395 rcu_read_lock();
396 css_for_each_descendant_pre(pos, &freezer->css) {
397 struct freezer *pos_f = css_freezer(pos);
398 struct freezer *parent = parent_freezer(pos_f);
399
400 if (!css_tryget_online(pos))
401 continue;
402 rcu_read_unlock();
403
404 if (pos_f == freezer)
405 freezer_apply_state(pos_f, freeze,
406 CGROUP_FREEZING_SELF);
407 else
408 freezer_apply_state(pos_f,
409 parent->state & CGROUP_FREEZING,
410 CGROUP_FREEZING_PARENT);
411
412 rcu_read_lock();
413 css_put(pos);
414 }
415 rcu_read_unlock();
416 mutex_unlock(&freezer_mutex);
417} 249}
418 250
419static ssize_t freezer_write(struct kernfs_open_file *of, 251void cgroup_freeze(struct cgroup *cgrp, bool freeze)
420 char *buf, size_t nbytes, loff_t off)
421{ 252{
422 bool freeze; 253 struct cgroup_subsys_state *css;
254 struct cgroup *dsct;
255 bool applied = false;
423 256
424 buf = strstrip(buf); 257 lockdep_assert_held(&cgroup_mutex);
425 258
426 if (strcmp(buf, freezer_state_strs(0)) == 0) 259 /*
427 freeze = false; 260 * Nothing changed? Just exit.
428 else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0) 261 */
429 freeze = true; 262 if (cgrp->freezer.freeze == freeze)
430 else 263 return;
431 return -EINVAL;
432 264
433 freezer_change_state(css_freezer(of_css(of)), freeze); 265 cgrp->freezer.freeze = freeze;
434 return nbytes;
435}
436 266
437static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, 267 /*
438 struct cftype *cft) 268 * Propagate changes downwards the cgroup tree.
439{ 269 */
440 struct freezer *freezer = css_freezer(css); 270 css_for_each_descendant_pre(css, &cgrp->self) {
271 dsct = css->cgroup;
441 272
442 return (bool)(freezer->state & CGROUP_FREEZING_SELF); 273 if (cgroup_is_dead(dsct))
443} 274 continue;
444 275
445static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css, 276 if (freeze) {
446 struct cftype *cft) 277 dsct->freezer.e_freeze++;
447{ 278 /*
448 struct freezer *freezer = css_freezer(css); 279 * Already frozen because of ancestor's settings?
280 */
281 if (dsct->freezer.e_freeze > 1)
282 continue;
283 } else {
284 dsct->freezer.e_freeze--;
285 /*
286 * Still frozen because of ancestor's settings?
287 */
288 if (dsct->freezer.e_freeze > 0)
289 continue;
449 290
450 return (bool)(freezer->state & CGROUP_FREEZING_PARENT); 291 WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
451} 292 }
293
294 /*
295 * Do change actual state: freeze or unfreeze.
296 */
297 cgroup_do_freeze(dsct, freeze);
298 applied = true;
299 }
452 300
453static struct cftype files[] = { 301 /*
454 { 302 * Even if the actual state hasn't changed, let's notify a user.
455 .name = "state", 303 * The state can be enforced by an ancestor cgroup: the cgroup
456 .flags = CFTYPE_NOT_ON_ROOT, 304 * can already be in the desired state or it can be locked in the
457 .seq_show = freezer_read, 305 * opposite state, so that the transition will never happen.
458 .write = freezer_write, 306 * In both cases it's better to notify a user, that there is
459 }, 307 * nothing to wait for.
460 { 308 */
461 .name = "self_freezing", 309 if (!applied) {
462 .flags = CFTYPE_NOT_ON_ROOT, 310 TRACE_CGROUP_PATH(notify_frozen, cgrp,
463 .read_u64 = freezer_self_freezing_read, 311 test_bit(CGRP_FROZEN, &cgrp->flags));
464 }, 312 cgroup_file_notify(&cgrp->events_file);
465 { 313 }
466 .name = "parent_freezing", 314}
467 .flags = CFTYPE_NOT_ON_ROOT,
468 .read_u64 = freezer_parent_freezing_read,
469 },
470 { } /* terminate */
471};
472
473struct cgroup_subsys freezer_cgrp_subsys = {
474 .css_alloc = freezer_css_alloc,
475 .css_online = freezer_css_online,
476 .css_offline = freezer_css_offline,
477 .css_free = freezer_css_free,
478 .attach = freezer_attach,
479 .fork = freezer_fork,
480 .legacy_cftypes = files,
481};
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
new file mode 100644
index 000000000000..08236798d173
--- /dev/null
+++ b/kernel/cgroup/legacy_freezer.c
@@ -0,0 +1,481 @@
1/*
2 * cgroup_freezer.c - control group freezer subsystem
3 *
4 * Copyright IBM Corporation, 2007
5 *
6 * Author : Cedric Le Goater <clg@fr.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of version 2.1 of the GNU Lesser General Public License
10 * as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it would be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
15 */
16
17#include <linux/export.h>
18#include <linux/slab.h>
19#include <linux/cgroup.h>
20#include <linux/fs.h>
21#include <linux/uaccess.h>
22#include <linux/freezer.h>
23#include <linux/seq_file.h>
24#include <linux/mutex.h>
25
26/*
27 * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
28 * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
29 * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING
30 * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of
31 * its ancestors has FREEZING_SELF set.
32 */
33enum freezer_state_flags {
34 CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */
35 CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */
36 CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */
37 CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */
38
39 /* mask for all FREEZING flags */
40 CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
41};
42
43struct freezer {
44 struct cgroup_subsys_state css;
45 unsigned int state;
46};
47
48static DEFINE_MUTEX(freezer_mutex);
49
50static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
51{
52 return css ? container_of(css, struct freezer, css) : NULL;
53}
54
55static inline struct freezer *task_freezer(struct task_struct *task)
56{
57 return css_freezer(task_css(task, freezer_cgrp_id));
58}
59
60static struct freezer *parent_freezer(struct freezer *freezer)
61{
62 return css_freezer(freezer->css.parent);
63}
64
65bool cgroup_freezing(struct task_struct *task)
66{
67 bool ret;
68
69 rcu_read_lock();
70 ret = task_freezer(task)->state & CGROUP_FREEZING;
71 rcu_read_unlock();
72
73 return ret;
74}
75
76static const char *freezer_state_strs(unsigned int state)
77{
78 if (state & CGROUP_FROZEN)
79 return "FROZEN";
80 if (state & CGROUP_FREEZING)
81 return "FREEZING";
82 return "THAWED";
83};
84
85static struct cgroup_subsys_state *
86freezer_css_alloc(struct cgroup_subsys_state *parent_css)
87{
88 struct freezer *freezer;
89
90 freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
91 if (!freezer)
92 return ERR_PTR(-ENOMEM);
93
94 return &freezer->css;
95}
96
97/**
98 * freezer_css_online - commit creation of a freezer css
99 * @css: css being created
100 *
101 * We're committing to creation of @css. Mark it online and inherit
102 * parent's freezing state while holding both parent's and our
103 * freezer->lock.
104 */
105static int freezer_css_online(struct cgroup_subsys_state *css)
106{
107 struct freezer *freezer = css_freezer(css);
108 struct freezer *parent = parent_freezer(freezer);
109
110 mutex_lock(&freezer_mutex);
111
112 freezer->state |= CGROUP_FREEZER_ONLINE;
113
114 if (parent && (parent->state & CGROUP_FREEZING)) {
115 freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
116 atomic_inc(&system_freezing_cnt);
117 }
118
119 mutex_unlock(&freezer_mutex);
120 return 0;
121}
122
123/**
124 * freezer_css_offline - initiate destruction of a freezer css
125 * @css: css being destroyed
126 *
127 * @css is going away. Mark it dead and decrement system_freezing_count if
128 * it was holding one.
129 */
130static void freezer_css_offline(struct cgroup_subsys_state *css)
131{
132 struct freezer *freezer = css_freezer(css);
133
134 mutex_lock(&freezer_mutex);
135
136 if (freezer->state & CGROUP_FREEZING)
137 atomic_dec(&system_freezing_cnt);
138
139 freezer->state = 0;
140
141 mutex_unlock(&freezer_mutex);
142}
143
144static void freezer_css_free(struct cgroup_subsys_state *css)
145{
146 kfree(css_freezer(css));
147}
148
149/*
150 * Tasks can be migrated into a different freezer anytime regardless of its
151 * current state. freezer_attach() is responsible for making new tasks
152 * conform to the current state.
153 *
154 * Freezer state changes and task migration are synchronized via
155 * @freezer->lock. freezer_attach() makes the new tasks conform to the
156 * current state and all following state changes can see the new tasks.
157 */
158static void freezer_attach(struct cgroup_taskset *tset)
159{
160 struct task_struct *task;
161 struct cgroup_subsys_state *new_css;
162
163 mutex_lock(&freezer_mutex);
164
165 /*
166 * Make the new tasks conform to the current state of @new_css.
167 * For simplicity, when migrating any task to a FROZEN cgroup, we
168 * revert it to FREEZING and let update_if_frozen() determine the
169 * correct state later.
170 *
171 * Tasks in @tset are on @new_css but may not conform to its
172 * current state before executing the following - !frozen tasks may
173 * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
174 */
175 cgroup_taskset_for_each(task, new_css, tset) {
176 struct freezer *freezer = css_freezer(new_css);
177
178 if (!(freezer->state & CGROUP_FREEZING)) {
179 __thaw_task(task);
180 } else {
181 freeze_task(task);
182 /* clear FROZEN and propagate upwards */
183 while (freezer && (freezer->state & CGROUP_FROZEN)) {
184 freezer->state &= ~CGROUP_FROZEN;
185 freezer = parent_freezer(freezer);
186 }
187 }
188 }
189
190 mutex_unlock(&freezer_mutex);
191}
192
193/**
194 * freezer_fork - cgroup post fork callback
195 * @task: a task which has just been forked
196 *
197 * @task has just been created and should conform to the current state of
198 * the cgroup_freezer it belongs to. This function may race against
199 * freezer_attach(). Losing to freezer_attach() means that we don't have
200 * to do anything as freezer_attach() will put @task into the appropriate
201 * state.
202 */
203static void freezer_fork(struct task_struct *task)
204{
205 struct freezer *freezer;
206
207 /*
208 * The root cgroup is non-freezable, so we can skip locking the
209 * freezer. This is safe regardless of race with task migration.
210 * If we didn't race or won, skipping is obviously the right thing
211 * to do. If we lost and root is the new cgroup, noop is still the
212 * right thing to do.
213 */
214 if (task_css_is_root(task, freezer_cgrp_id))
215 return;
216
217 mutex_lock(&freezer_mutex);
218 rcu_read_lock();
219
220 freezer = task_freezer(task);
221 if (freezer->state & CGROUP_FREEZING)
222 freeze_task(task);
223
224 rcu_read_unlock();
225 mutex_unlock(&freezer_mutex);
226}
227
228/**
229 * update_if_frozen - update whether a cgroup finished freezing
230 * @css: css of interest
231 *
232 * Once FREEZING is initiated, transition to FROZEN is lazily updated by
233 * calling this function. If the current state is FREEZING but not FROZEN,
234 * this function checks whether all tasks of this cgroup and the descendant
235 * cgroups finished freezing and, if so, sets FROZEN.
236 *
237 * The caller is responsible for grabbing RCU read lock and calling
238 * update_if_frozen() on all descendants prior to invoking this function.
239 *
240 * Task states and freezer state might disagree while tasks are being
241 * migrated into or out of @css, so we can't verify task states against
242 * @freezer state here. See freezer_attach() for details.
243 */
244static void update_if_frozen(struct cgroup_subsys_state *css)
245{
246 struct freezer *freezer = css_freezer(css);
247 struct cgroup_subsys_state *pos;
248 struct css_task_iter it;
249 struct task_struct *task;
250
251 lockdep_assert_held(&freezer_mutex);
252
253 if (!(freezer->state & CGROUP_FREEZING) ||
254 (freezer->state & CGROUP_FROZEN))
255 return;
256
257 /* are all (live) children frozen? */
258 rcu_read_lock();
259 css_for_each_child(pos, css) {
260 struct freezer *child = css_freezer(pos);
261
262 if ((child->state & CGROUP_FREEZER_ONLINE) &&
263 !(child->state & CGROUP_FROZEN)) {
264 rcu_read_unlock();
265 return;
266 }
267 }
268 rcu_read_unlock();
269
270 /* are all tasks frozen? */
271 css_task_iter_start(css, 0, &it);
272
273 while ((task = css_task_iter_next(&it))) {
274 if (freezing(task)) {
275 /*
276 * freezer_should_skip() indicates that the task
277 * should be skipped when determining freezing
278 * completion. Consider it frozen in addition to
279 * the usual frozen condition.
280 */
281 if (!frozen(task) && !freezer_should_skip(task))
282 goto out_iter_end;
283 }
284 }
285
286 freezer->state |= CGROUP_FROZEN;
287out_iter_end:
288 css_task_iter_end(&it);
289}
290
291static int freezer_read(struct seq_file *m, void *v)
292{
293 struct cgroup_subsys_state *css = seq_css(m), *pos;
294
295 mutex_lock(&freezer_mutex);
296 rcu_read_lock();
297
298 /* update states bottom-up */
299 css_for_each_descendant_post(pos, css) {
300 if (!css_tryget_online(pos))
301 continue;
302 rcu_read_unlock();
303
304 update_if_frozen(pos);
305
306 rcu_read_lock();
307 css_put(pos);
308 }
309
310 rcu_read_unlock();
311 mutex_unlock(&freezer_mutex);
312
313 seq_puts(m, freezer_state_strs(css_freezer(css)->state));
314 seq_putc(m, '\n');
315 return 0;
316}
317
318static void freeze_cgroup(struct freezer *freezer)
319{
320 struct css_task_iter it;
321 struct task_struct *task;
322
323 css_task_iter_start(&freezer->css, 0, &it);
324 while ((task = css_task_iter_next(&it)))
325 freeze_task(task);
326 css_task_iter_end(&it);
327}
328
329static void unfreeze_cgroup(struct freezer *freezer)
330{
331 struct css_task_iter it;
332 struct task_struct *task;
333
334 css_task_iter_start(&freezer->css, 0, &it);
335 while ((task = css_task_iter_next(&it)))
336 __thaw_task(task);
337 css_task_iter_end(&it);
338}
339
340/**
341 * freezer_apply_state - apply state change to a single cgroup_freezer
342 * @freezer: freezer to apply state change to
343 * @freeze: whether to freeze or unfreeze
344 * @state: CGROUP_FREEZING_* flag to set or clear
345 *
346 * Set or clear @state on @cgroup according to @freeze, and perform
347 * freezing or thawing as necessary.
348 */
349static void freezer_apply_state(struct freezer *freezer, bool freeze,
350 unsigned int state)
351{
352 /* also synchronizes against task migration, see freezer_attach() */
353 lockdep_assert_held(&freezer_mutex);
354
355 if (!(freezer->state & CGROUP_FREEZER_ONLINE))
356 return;
357
358 if (freeze) {
359 if (!(freezer->state & CGROUP_FREEZING))
360 atomic_inc(&system_freezing_cnt);
361 freezer->state |= state;
362 freeze_cgroup(freezer);
363 } else {
364 bool was_freezing = freezer->state & CGROUP_FREEZING;
365
366 freezer->state &= ~state;
367
368 if (!(freezer->state & CGROUP_FREEZING)) {
369 if (was_freezing)
370 atomic_dec(&system_freezing_cnt);
371 freezer->state &= ~CGROUP_FROZEN;
372 unfreeze_cgroup(freezer);
373 }
374 }
375}
376
377/**
378 * freezer_change_state - change the freezing state of a cgroup_freezer
379 * @freezer: freezer of interest
380 * @freeze: whether to freeze or thaw
381 *
382 * Freeze or thaw @freezer according to @freeze. The operations are
383 * recursive - all descendants of @freezer will be affected.
384 */
385static void freezer_change_state(struct freezer *freezer, bool freeze)
386{
387 struct cgroup_subsys_state *pos;
388
389 /*
390 * Update all its descendants in pre-order traversal. Each
391 * descendant will try to inherit its parent's FREEZING state as
392 * CGROUP_FREEZING_PARENT.
393 */
394 mutex_lock(&freezer_mutex);
395 rcu_read_lock();
396 css_for_each_descendant_pre(pos, &freezer->css) {
397 struct freezer *pos_f = css_freezer(pos);
398 struct freezer *parent = parent_freezer(pos_f);
399
400 if (!css_tryget_online(pos))
401 continue;
402 rcu_read_unlock();
403
404 if (pos_f == freezer)
405 freezer_apply_state(pos_f, freeze,
406 CGROUP_FREEZING_SELF);
407 else
408 freezer_apply_state(pos_f,
409 parent->state & CGROUP_FREEZING,
410 CGROUP_FREEZING_PARENT);
411
412 rcu_read_lock();
413 css_put(pos);
414 }
415 rcu_read_unlock();
416 mutex_unlock(&freezer_mutex);
417}
418
419static ssize_t freezer_write(struct kernfs_open_file *of,
420 char *buf, size_t nbytes, loff_t off)
421{
422 bool freeze;
423
424 buf = strstrip(buf);
425
426 if (strcmp(buf, freezer_state_strs(0)) == 0)
427 freeze = false;
428 else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
429 freeze = true;
430 else
431 return -EINVAL;
432
433 freezer_change_state(css_freezer(of_css(of)), freeze);
434 return nbytes;
435}
436
437static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
438 struct cftype *cft)
439{
440 struct freezer *freezer = css_freezer(css);
441
442 return (bool)(freezer->state & CGROUP_FREEZING_SELF);
443}
444
445static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
446 struct cftype *cft)
447{
448 struct freezer *freezer = css_freezer(css);
449
450 return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
451}
452
453static struct cftype files[] = {
454 {
455 .name = "state",
456 .flags = CFTYPE_NOT_ON_ROOT,
457 .seq_show = freezer_read,
458 .write = freezer_write,
459 },
460 {
461 .name = "self_freezing",
462 .flags = CFTYPE_NOT_ON_ROOT,
463 .read_u64 = freezer_self_freezing_read,
464 },
465 {
466 .name = "parent_freezing",
467 .flags = CFTYPE_NOT_ON_ROOT,
468 .read_u64 = freezer_parent_freezing_read,
469 },
470 { } /* terminate */
471};
472
473struct cgroup_subsys freezer_cgrp_subsys = {
474 .css_alloc = freezer_css_alloc,
475 .css_online = freezer_css_online,
476 .css_offline = freezer_css_offline,
477 .css_free = freezer_css_free,
478 .attach = freezer_attach,
479 .fork = freezer_fork,
480 .legacy_cftypes = files,
481};
diff --git a/kernel/fork.c b/kernel/fork.c
index 8b03d93ba068..5359facf9867 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1225,7 +1225,9 @@ static int wait_for_vfork_done(struct task_struct *child,
1225 int killed; 1225 int killed;
1226 1226
1227 freezer_do_not_count(); 1227 freezer_do_not_count();
1228 cgroup_enter_frozen();
1228 killed = wait_for_completion_killable(vfork); 1229 killed = wait_for_completion_killable(vfork);
1230 cgroup_leave_frozen(false);
1229 freezer_count(); 1231 freezer_count();
1230 1232
1231 if (killed) { 1233 if (killed) {
diff --git a/kernel/signal.c b/kernel/signal.c
index cd83cc376767..62f9aea4a15a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -43,6 +43,7 @@
43#include <linux/compiler.h> 43#include <linux/compiler.h>
44#include <linux/posix-timers.h> 44#include <linux/posix-timers.h>
45#include <linux/livepatch.h> 45#include <linux/livepatch.h>
46#include <linux/cgroup.h>
46 47
47#define CREATE_TRACE_POINTS 48#define CREATE_TRACE_POINTS
48#include <trace/events/signal.h> 49#include <trace/events/signal.h>
@@ -146,9 +147,10 @@ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
146 147
147static bool recalc_sigpending_tsk(struct task_struct *t) 148static bool recalc_sigpending_tsk(struct task_struct *t)
148{ 149{
149 if ((t->jobctl & JOBCTL_PENDING_MASK) || 150 if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) ||
150 PENDING(&t->pending, &t->blocked) || 151 PENDING(&t->pending, &t->blocked) ||
151 PENDING(&t->signal->shared_pending, &t->blocked)) { 152 PENDING(&t->signal->shared_pending, &t->blocked) ||
153 cgroup_task_frozen(t)) {
152 set_tsk_thread_flag(t, TIF_SIGPENDING); 154 set_tsk_thread_flag(t, TIF_SIGPENDING);
153 return true; 155 return true;
154 } 156 }
@@ -2108,6 +2110,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
2108 preempt_disable(); 2110 preempt_disable();
2109 read_unlock(&tasklist_lock); 2111 read_unlock(&tasklist_lock);
2110 preempt_enable_no_resched(); 2112 preempt_enable_no_resched();
2113 cgroup_enter_frozen();
2111 freezable_schedule(); 2114 freezable_schedule();
2112 } else { 2115 } else {
2113 /* 2116 /*
@@ -2286,6 +2289,7 @@ static bool do_signal_stop(int signr)
2286 } 2289 }
2287 2290
2288 /* Now we don't run again until woken by SIGCONT or SIGKILL */ 2291 /* Now we don't run again until woken by SIGCONT or SIGKILL */
2292 cgroup_enter_frozen();
2289 freezable_schedule(); 2293 freezable_schedule();
2290 return true; 2294 return true;
2291 } else { 2295 } else {
@@ -2332,6 +2336,43 @@ static void do_jobctl_trap(void)
2332 } 2336 }
2333} 2337}
2334 2338
2339/**
2340 * do_freezer_trap - handle the freezer jobctl trap
2341 *
2342 * Puts the task into frozen state, if only the task is not about to quit.
2343 * In this case it drops JOBCTL_TRAP_FREEZE.
2344 *
2345 * CONTEXT:
2346 * Must be called with @current->sighand->siglock held,
2347 * which is always released before returning.
2348 */
2349static void do_freezer_trap(void)
2350 __releases(&current->sighand->siglock)
2351{
2352 /*
2353 * If there are other trap bits pending except JOBCTL_TRAP_FREEZE,
2354 * let's make another loop to give it a chance to be handled.
2355 * In any case, we'll return back.
2356 */
2357 if ((current->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) !=
2358 JOBCTL_TRAP_FREEZE) {
2359 spin_unlock_irq(&current->sighand->siglock);
2360 return;
2361 }
2362
2363 /*
2364 * Now we're sure that there is no pending fatal signal and no
2365 * pending traps. Clear TIF_SIGPENDING to not get out of schedule()
2366 * immediately (if there is a non-fatal signal pending), and
2367 * put the task into sleep.
2368 */
2369 __set_current_state(TASK_INTERRUPTIBLE);
2370 clear_thread_flag(TIF_SIGPENDING);
2371 spin_unlock_irq(&current->sighand->siglock);
2372 cgroup_enter_frozen();
2373 freezable_schedule();
2374}
2375
2335static int ptrace_signal(int signr, kernel_siginfo_t *info) 2376static int ptrace_signal(int signr, kernel_siginfo_t *info)
2336{ 2377{
2337 /* 2378 /*
@@ -2452,9 +2493,24 @@ relock:
2452 do_signal_stop(0)) 2493 do_signal_stop(0))
2453 goto relock; 2494 goto relock;
2454 2495
2455 if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) { 2496 if (unlikely(current->jobctl &
2456 do_jobctl_trap(); 2497 (JOBCTL_TRAP_MASK | JOBCTL_TRAP_FREEZE))) {
2498 if (current->jobctl & JOBCTL_TRAP_MASK) {
2499 do_jobctl_trap();
2500 spin_unlock_irq(&sighand->siglock);
2501 } else if (current->jobctl & JOBCTL_TRAP_FREEZE)
2502 do_freezer_trap();
2503
2504 goto relock;
2505 }
2506
2507 /*
2508 * If the task is leaving the frozen state, let's update
2509 * cgroup counters and reset the frozen bit.
2510 */
2511 if (unlikely(cgroup_task_frozen(current))) {
2457 spin_unlock_irq(&sighand->siglock); 2512 spin_unlock_irq(&sighand->siglock);
2513 cgroup_leave_frozen(false);
2458 goto relock; 2514 goto relock;
2459 } 2515 }
2460 2516
@@ -2550,6 +2606,8 @@ relock:
2550 2606
2551 fatal: 2607 fatal:
2552 spin_unlock_irq(&sighand->siglock); 2608 spin_unlock_irq(&sighand->siglock);
2609 if (unlikely(cgroup_task_frozen(current)))
2610 cgroup_leave_frozen(true);
2553 2611
2554 /* 2612 /*
2555 * Anything else is fatal, maybe with a core dump. 2613 * Anything else is fatal, maybe with a core dump.
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index adacda50a4b2..7f9835624793 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -1,2 +1,3 @@
1test_memcontrol 1test_memcontrol
2test_core 2test_core
3test_freezer
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 23fbaa4a9630..8d369b6a2069 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -5,8 +5,10 @@ all:
5 5
6TEST_GEN_PROGS = test_memcontrol 6TEST_GEN_PROGS = test_memcontrol
7TEST_GEN_PROGS += test_core 7TEST_GEN_PROGS += test_core
8TEST_GEN_PROGS += test_freezer
8 9
9include ../lib.mk 10include ../lib.mk
10 11
11$(OUTPUT)/test_memcontrol: cgroup_util.c 12$(OUTPUT)/test_memcontrol: cgroup_util.c
12$(OUTPUT)/test_core: cgroup_util.c 13$(OUTPUT)/test_core: cgroup_util.c
14$(OUTPUT)/test_freezer: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 14c9fe284806..4c223266299a 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -74,6 +74,16 @@ char *cg_name_indexed(const char *root, const char *name, int index)
74 return ret; 74 return ret;
75} 75}
76 76
77char *cg_control(const char *cgroup, const char *control)
78{
79 size_t len = strlen(cgroup) + strlen(control) + 2;
80 char *ret = malloc(len);
81
82 snprintf(ret, len, "%s/%s", cgroup, control);
83
84 return ret;
85}
86
77int cg_read(const char *cgroup, const char *control, char *buf, size_t len) 87int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
78{ 88{
79 char path[PATH_MAX]; 89 char path[PATH_MAX];
@@ -196,7 +206,32 @@ int cg_create(const char *cgroup)
196 return mkdir(cgroup, 0644); 206 return mkdir(cgroup, 0644);
197} 207}
198 208
199static int cg_killall(const char *cgroup) 209int cg_wait_for_proc_count(const char *cgroup, int count)
210{
211 char buf[10 * PAGE_SIZE] = {0};
212 int attempts;
213 char *ptr;
214
215 for (attempts = 10; attempts >= 0; attempts--) {
216 int nr = 0;
217
218 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
219 break;
220
221 for (ptr = buf; *ptr; ptr++)
222 if (*ptr == '\n')
223 nr++;
224
225 if (nr >= count)
226 return 0;
227
228 usleep(100000);
229 }
230
231 return -1;
232}
233
234int cg_killall(const char *cgroup)
200{ 235{
201 char buf[PAGE_SIZE]; 236 char buf[PAGE_SIZE];
202 char *ptr = buf; 237 char *ptr = buf;
@@ -227,9 +262,7 @@ int cg_destroy(const char *cgroup)
227retry: 262retry:
228 ret = rmdir(cgroup); 263 ret = rmdir(cgroup);
229 if (ret && errno == EBUSY) { 264 if (ret && errno == EBUSY) {
230 ret = cg_killall(cgroup); 265 cg_killall(cgroup);
231 if (ret)
232 return ret;
233 usleep(100); 266 usleep(100);
234 goto retry; 267 goto retry;
235 } 268 }
@@ -240,6 +273,14 @@ retry:
240 return ret; 273 return ret;
241} 274}
242 275
276int cg_enter(const char *cgroup, int pid)
277{
278 char pidbuf[64];
279
280 snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
281 return cg_write(cgroup, "cgroup.procs", pidbuf);
282}
283
243int cg_enter_current(const char *cgroup) 284int cg_enter_current(const char *cgroup)
244{ 285{
245 char pidbuf[64]; 286 char pidbuf[64];
@@ -369,3 +410,12 @@ int set_oom_adj_score(int pid, int score)
369 close(fd); 410 close(fd);
370 return 0; 411 return 0;
371} 412}
413
414char proc_read_text(int pid, const char *item, char *buf, size_t size)
415{
416 char path[PATH_MAX];
417
418 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
419
420 return read_text(path, buf, size);
421}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 9ac8b7958f83..c72f28046bfa 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -18,6 +18,7 @@ static inline int values_close(long a, long b, int err)
18extern int cg_find_unified_root(char *root, size_t len); 18extern int cg_find_unified_root(char *root, size_t len);
19extern char *cg_name(const char *root, const char *name); 19extern char *cg_name(const char *root, const char *name);
20extern char *cg_name_indexed(const char *root, const char *name, int index); 20extern char *cg_name_indexed(const char *root, const char *name, int index);
21extern char *cg_control(const char *cgroup, const char *control);
21extern int cg_create(const char *cgroup); 22extern int cg_create(const char *cgroup);
22extern int cg_destroy(const char *cgroup); 23extern int cg_destroy(const char *cgroup);
23extern int cg_read(const char *cgroup, const char *control, 24extern int cg_read(const char *cgroup, const char *control,
@@ -32,6 +33,7 @@ extern int cg_write(const char *cgroup, const char *control, char *buf);
32extern int cg_run(const char *cgroup, 33extern int cg_run(const char *cgroup,
33 int (*fn)(const char *cgroup, void *arg), 34 int (*fn)(const char *cgroup, void *arg),
34 void *arg); 35 void *arg);
36extern int cg_enter(const char *cgroup, int pid);
35extern int cg_enter_current(const char *cgroup); 37extern int cg_enter_current(const char *cgroup);
36extern int cg_run_nowait(const char *cgroup, 38extern int cg_run_nowait(const char *cgroup,
37 int (*fn)(const char *cgroup, void *arg), 39 int (*fn)(const char *cgroup, void *arg),
@@ -41,3 +43,6 @@ extern int alloc_pagecache(int fd, size_t size);
41extern int alloc_anon(const char *cgroup, void *arg); 43extern int alloc_anon(const char *cgroup, void *arg);
42extern int is_swap_enabled(void); 44extern int is_swap_enabled(void);
43extern int set_oom_adj_score(int pid, int score); 45extern int set_oom_adj_score(int pid, int score);
46extern int cg_wait_for_proc_count(const char *cgroup, int count);
47extern int cg_killall(const char *cgroup);
48extern char proc_read_text(int pid, const char *item, char *buf, size_t size);
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
new file mode 100644
index 000000000000..2bfddb6d6d3b
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -0,0 +1,851 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <stdbool.h>
3#include <linux/limits.h>
4#include <sys/ptrace.h>
5#include <sys/types.h>
6#include <sys/mman.h>
7#include <unistd.h>
8#include <stdio.h>
9#include <errno.h>
10#include <poll.h>
11#include <stdlib.h>
12#include <sys/inotify.h>
13#include <string.h>
14#include <sys/types.h>
15#include <sys/wait.h>
16
17#include "../kselftest.h"
18#include "cgroup_util.h"
19
20#define DEBUG
21#ifdef DEBUG
22#define debug(args...) fprintf(stderr, args)
23#else
24#define debug(args...)
25#endif
26
27/*
28 * Check if the cgroup is frozen by looking at the cgroup.events::frozen value.
29 */
30static int cg_check_frozen(const char *cgroup, bool frozen)
31{
32 if (frozen) {
33 if (cg_read_strstr(cgroup, "cgroup.events", "frozen 1") != 0) {
34 debug("Cgroup %s isn't frozen\n", cgroup);
35 return -1;
36 }
37 } else {
38 /*
39 * Check the cgroup.events::frozen value.
40 */
41 if (cg_read_strstr(cgroup, "cgroup.events", "frozen 0") != 0) {
42 debug("Cgroup %s is frozen\n", cgroup);
43 return -1;
44 }
45 }
46
47 return 0;
48}
49
50/*
51 * Freeze the given cgroup.
52 */
53static int cg_freeze_nowait(const char *cgroup, bool freeze)
54{
55 return cg_write(cgroup, "cgroup.freeze", freeze ? "1" : "0");
56}
57
58/*
59 * Prepare for waiting on cgroup.events file.
60 */
61static int cg_prepare_for_wait(const char *cgroup)
62{
63 int fd, ret = -1;
64
65 fd = inotify_init1(0);
66 if (fd == -1) {
67 debug("Error: inotify_init1() failed\n");
68 return fd;
69 }
70
71 ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
72 IN_MODIFY);
73 if (ret == -1) {
74 debug("Error: inotify_add_watch() failed\n");
75 close(fd);
76 }
77
78 return fd;
79}
80
81/*
82 * Wait for an event. If there are no events for 10 seconds,
83 * treat this an error.
84 */
85static int cg_wait_for(int fd)
86{
87 int ret = -1;
88 struct pollfd fds = {
89 .fd = fd,
90 .events = POLLIN,
91 };
92
93 while (true) {
94 ret = poll(&fds, 1, 10000);
95
96 if (ret == -1) {
97 if (errno == EINTR)
98 continue;
99 debug("Error: poll() failed\n");
100 break;
101 }
102
103 if (ret > 0 && fds.revents & POLLIN) {
104 ret = 0;
105 break;
106 }
107 }
108
109 return ret;
110}
111
112/*
113 * Attach a task to the given cgroup and wait for a cgroup frozen event.
114 * All transient events (e.g. populated) are ignored.
115 */
116static int cg_enter_and_wait_for_frozen(const char *cgroup, int pid,
117 bool frozen)
118{
119 int fd, ret = -1;
120 int attempts;
121
122 fd = cg_prepare_for_wait(cgroup);
123 if (fd < 0)
124 return fd;
125
126 ret = cg_enter(cgroup, pid);
127 if (ret)
128 goto out;
129
130 for (attempts = 0; attempts < 10; attempts++) {
131 ret = cg_wait_for(fd);
132 if (ret)
133 break;
134
135 ret = cg_check_frozen(cgroup, frozen);
136 if (ret)
137 continue;
138 }
139
140out:
141 close(fd);
142 return ret;
143}
144
145/*
146 * Freeze the given cgroup and wait for the inotify signal.
147 * If there are no events in 10 seconds, treat this as an error.
148 * Then check that the cgroup is in the desired state.
149 */
150static int cg_freeze_wait(const char *cgroup, bool freeze)
151{
152 int fd, ret = -1;
153
154 fd = cg_prepare_for_wait(cgroup);
155 if (fd < 0)
156 return fd;
157
158 ret = cg_freeze_nowait(cgroup, freeze);
159 if (ret) {
160 debug("Error: cg_freeze_nowait() failed\n");
161 goto out;
162 }
163
164 ret = cg_wait_for(fd);
165 if (ret)
166 goto out;
167
168 ret = cg_check_frozen(cgroup, freeze);
169out:
170 close(fd);
171 return ret;
172}
173
174/*
175 * A simple process running in a sleep loop until being
176 * re-parented.
177 */
178static int child_fn(const char *cgroup, void *arg)
179{
180 int ppid = getppid();
181
182 while (getppid() == ppid)
183 usleep(1000);
184
185 return getppid() == ppid;
186}
187
188/*
189 * A simple test for the cgroup freezer: populated the cgroup with 100
190 * running processes and freeze it. Then unfreeze it. Then it kills all
191 * processes and destroys the cgroup.
192 */
193static int test_cgfreezer_simple(const char *root)
194{
195 int ret = KSFT_FAIL;
196 char *cgroup = NULL;
197 int i;
198
199 cgroup = cg_name(root, "cg_test_simple");
200 if (!cgroup)
201 goto cleanup;
202
203 if (cg_create(cgroup))
204 goto cleanup;
205
206 for (i = 0; i < 100; i++)
207 cg_run_nowait(cgroup, child_fn, NULL);
208
209 if (cg_wait_for_proc_count(cgroup, 100))
210 goto cleanup;
211
212 if (cg_check_frozen(cgroup, false))
213 goto cleanup;
214
215 if (cg_freeze_wait(cgroup, true))
216 goto cleanup;
217
218 if (cg_freeze_wait(cgroup, false))
219 goto cleanup;
220
221 ret = KSFT_PASS;
222
223cleanup:
224 if (cgroup)
225 cg_destroy(cgroup);
226 free(cgroup);
227 return ret;
228}
229
230/*
231 * The test creates the following hierarchy:
232 * A
233 * / / \ \
234 * B E I K
235 * /\ |
236 * C D F
237 * |
238 * G
239 * |
240 * H
241 *
242 * with a process in C, H and 3 processes in K.
243 * Then it tries to freeze and unfreeze the whole tree.
244 */
245static int test_cgfreezer_tree(const char *root)
246{
247 char *cgroup[10] = {0};
248 int ret = KSFT_FAIL;
249 int i;
250
251 cgroup[0] = cg_name(root, "cg_test_tree_A");
252 if (!cgroup[0])
253 goto cleanup;
254
255 cgroup[1] = cg_name(cgroup[0], "B");
256 if (!cgroup[1])
257 goto cleanup;
258
259 cgroup[2] = cg_name(cgroup[1], "C");
260 if (!cgroup[2])
261 goto cleanup;
262
263 cgroup[3] = cg_name(cgroup[1], "D");
264 if (!cgroup[3])
265 goto cleanup;
266
267 cgroup[4] = cg_name(cgroup[0], "E");
268 if (!cgroup[4])
269 goto cleanup;
270
271 cgroup[5] = cg_name(cgroup[4], "F");
272 if (!cgroup[5])
273 goto cleanup;
274
275 cgroup[6] = cg_name(cgroup[5], "G");
276 if (!cgroup[6])
277 goto cleanup;
278
279 cgroup[7] = cg_name(cgroup[6], "H");
280 if (!cgroup[7])
281 goto cleanup;
282
283 cgroup[8] = cg_name(cgroup[0], "I");
284 if (!cgroup[8])
285 goto cleanup;
286
287 cgroup[9] = cg_name(cgroup[0], "K");
288 if (!cgroup[9])
289 goto cleanup;
290
291 for (i = 0; i < 10; i++)
292 if (cg_create(cgroup[i]))
293 goto cleanup;
294
295 cg_run_nowait(cgroup[2], child_fn, NULL);
296 cg_run_nowait(cgroup[7], child_fn, NULL);
297 cg_run_nowait(cgroup[9], child_fn, NULL);
298 cg_run_nowait(cgroup[9], child_fn, NULL);
299 cg_run_nowait(cgroup[9], child_fn, NULL);
300
301 /*
302 * Wait until all child processes will enter
303 * corresponding cgroups.
304 */
305
306 if (cg_wait_for_proc_count(cgroup[2], 1) ||
307 cg_wait_for_proc_count(cgroup[7], 1) ||
308 cg_wait_for_proc_count(cgroup[9], 3))
309 goto cleanup;
310
311 /*
312 * Freeze B.
313 */
314 if (cg_freeze_wait(cgroup[1], true))
315 goto cleanup;
316
317 /*
318 * Freeze F.
319 */
320 if (cg_freeze_wait(cgroup[5], true))
321 goto cleanup;
322
323 /*
324 * Freeze G.
325 */
326 if (cg_freeze_wait(cgroup[6], true))
327 goto cleanup;
328
329 /*
330 * Check that A and E are not frozen.
331 */
332 if (cg_check_frozen(cgroup[0], false))
333 goto cleanup;
334
335 if (cg_check_frozen(cgroup[4], false))
336 goto cleanup;
337
338 /*
339 * Freeze A. Check that A, B and E are frozen.
340 */
341 if (cg_freeze_wait(cgroup[0], true))
342 goto cleanup;
343
344 if (cg_check_frozen(cgroup[1], true))
345 goto cleanup;
346
347 if (cg_check_frozen(cgroup[4], true))
348 goto cleanup;
349
350 /*
351 * Unfreeze B, F and G
352 */
353 if (cg_freeze_nowait(cgroup[1], false))
354 goto cleanup;
355
356 if (cg_freeze_nowait(cgroup[5], false))
357 goto cleanup;
358
359 if (cg_freeze_nowait(cgroup[6], false))
360 goto cleanup;
361
362 /*
363 * Check that C and H are still frozen.
364 */
365 if (cg_check_frozen(cgroup[2], true))
366 goto cleanup;
367
368 if (cg_check_frozen(cgroup[7], true))
369 goto cleanup;
370
371 /*
372 * Unfreeze A. Check that A, C and K are not frozen.
373 */
374 if (cg_freeze_wait(cgroup[0], false))
375 goto cleanup;
376
377 if (cg_check_frozen(cgroup[2], false))
378 goto cleanup;
379
380 if (cg_check_frozen(cgroup[9], false))
381 goto cleanup;
382
383 ret = KSFT_PASS;
384
385cleanup:
386 for (i = 9; i >= 0 && cgroup[i]; i--) {
387 cg_destroy(cgroup[i]);
388 free(cgroup[i]);
389 }
390
391 return ret;
392}
393
394/*
395 * A fork bomb emulator.
396 */
397static int forkbomb_fn(const char *cgroup, void *arg)
398{
399 int ppid;
400
401 fork();
402 fork();
403
404 ppid = getppid();
405
406 while (getppid() == ppid)
407 usleep(1000);
408
409 return getppid() == ppid;
410}
411
412/*
413 * The test runs a fork bomb in a cgroup and tries to freeze it.
414 * Then it kills all processes and checks that cgroup isn't populated
415 * anymore.
416 */
417static int test_cgfreezer_forkbomb(const char *root)
418{
419 int ret = KSFT_FAIL;
420 char *cgroup = NULL;
421
422 cgroup = cg_name(root, "cg_forkbomb_test");
423 if (!cgroup)
424 goto cleanup;
425
426 if (cg_create(cgroup))
427 goto cleanup;
428
429 cg_run_nowait(cgroup, forkbomb_fn, NULL);
430
431 usleep(100000);
432
433 if (cg_freeze_wait(cgroup, true))
434 goto cleanup;
435
436 if (cg_killall(cgroup))
437 goto cleanup;
438
439 if (cg_wait_for_proc_count(cgroup, 0))
440 goto cleanup;
441
442 ret = KSFT_PASS;
443
444cleanup:
445 if (cgroup)
446 cg_destroy(cgroup);
447 free(cgroup);
448 return ret;
449}
450
451/*
452 * The test creates two nested cgroups, freezes the parent
453 * and removes the child. Then it checks that the parent cgroup
454 * remains frozen and it's possible to create a new child
455 * without unfreezing. The new child is frozen too.
456 */
457static int test_cgfreezer_rmdir(const char *root)
458{
459 int ret = KSFT_FAIL;
460 char *parent, *child = NULL;
461
462 parent = cg_name(root, "cg_test_rmdir_A");
463 if (!parent)
464 goto cleanup;
465
466 child = cg_name(parent, "cg_test_rmdir_B");
467 if (!child)
468 goto cleanup;
469
470 if (cg_create(parent))
471 goto cleanup;
472
473 if (cg_create(child))
474 goto cleanup;
475
476 if (cg_freeze_wait(parent, true))
477 goto cleanup;
478
479 if (cg_destroy(child))
480 goto cleanup;
481
482 if (cg_check_frozen(parent, true))
483 goto cleanup;
484
485 if (cg_create(child))
486 goto cleanup;
487
488 if (cg_check_frozen(child, true))
489 goto cleanup;
490
491 ret = KSFT_PASS;
492
493cleanup:
494 if (child)
495 cg_destroy(child);
496 free(child);
497 if (parent)
498 cg_destroy(parent);
499 free(parent);
500 return ret;
501}
502
503/*
504 * The test creates two cgroups: A and B, runs a process in A
505 * and performs several migrations:
506 * 1) A (running) -> B (frozen)
507 * 2) B (frozen) -> A (running)
508 * 3) A (frozen) -> B (frozen)
509 *
510 * On each step it checks the actual state of both cgroups.
511 */
512static int test_cgfreezer_migrate(const char *root)
513{
514 int ret = KSFT_FAIL;
515 char *cgroup[2] = {0};
516 int pid;
517
518 cgroup[0] = cg_name(root, "cg_test_migrate_A");
519 if (!cgroup[0])
520 goto cleanup;
521
522 cgroup[1] = cg_name(root, "cg_test_migrate_B");
523 if (!cgroup[1])
524 goto cleanup;
525
526 if (cg_create(cgroup[0]))
527 goto cleanup;
528
529 if (cg_create(cgroup[1]))
530 goto cleanup;
531
532 pid = cg_run_nowait(cgroup[0], child_fn, NULL);
533 if (pid < 0)
534 goto cleanup;
535
536 if (cg_wait_for_proc_count(cgroup[0], 1))
537 goto cleanup;
538
539 /*
540 * Migrate from A (running) to B (frozen)
541 */
542 if (cg_freeze_wait(cgroup[1], true))
543 goto cleanup;
544
545 if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true))
546 goto cleanup;
547
548 if (cg_check_frozen(cgroup[0], false))
549 goto cleanup;
550
551 /*
552 * Migrate from B (frozen) to A (running)
553 */
554 if (cg_enter_and_wait_for_frozen(cgroup[0], pid, false))
555 goto cleanup;
556
557 if (cg_check_frozen(cgroup[1], true))
558 goto cleanup;
559
560 /*
561 * Migrate from A (frozen) to B (frozen)
562 */
563 if (cg_freeze_wait(cgroup[0], true))
564 goto cleanup;
565
566 if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true))
567 goto cleanup;
568
569 if (cg_check_frozen(cgroup[0], true))
570 goto cleanup;
571
572 ret = KSFT_PASS;
573
574cleanup:
575 if (cgroup[0])
576 cg_destroy(cgroup[0]);
577 free(cgroup[0]);
578 if (cgroup[1])
579 cg_destroy(cgroup[1]);
580 free(cgroup[1]);
581 return ret;
582}
583
584/*
585 * The test checks that ptrace works with a tracing process in a frozen cgroup.
586 */
587static int test_cgfreezer_ptrace(const char *root)
588{
589 int ret = KSFT_FAIL;
590 char *cgroup = NULL;
591 siginfo_t siginfo;
592 int pid;
593
594 cgroup = cg_name(root, "cg_test_ptrace");
595 if (!cgroup)
596 goto cleanup;
597
598 if (cg_create(cgroup))
599 goto cleanup;
600
601 pid = cg_run_nowait(cgroup, child_fn, NULL);
602 if (pid < 0)
603 goto cleanup;
604
605 if (cg_wait_for_proc_count(cgroup, 1))
606 goto cleanup;
607
608 if (cg_freeze_wait(cgroup, true))
609 goto cleanup;
610
611 if (ptrace(PTRACE_SEIZE, pid, NULL, NULL))
612 goto cleanup;
613
614 if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL))
615 goto cleanup;
616
617 waitpid(pid, NULL, 0);
618
619 /*
620 * Cgroup has to remain frozen, however the test task
621 * is in traced state.
622 */
623 if (cg_check_frozen(cgroup, true))
624 goto cleanup;
625
626 if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo))
627 goto cleanup;
628
629 if (ptrace(PTRACE_DETACH, pid, NULL, NULL))
630 goto cleanup;
631
632 if (cg_check_frozen(cgroup, true))
633 goto cleanup;
634
635 ret = KSFT_PASS;
636
637cleanup:
638 if (cgroup)
639 cg_destroy(cgroup);
640 free(cgroup);
641 return ret;
642}
643
644/*
645 * Check if the process is stopped.
646 */
647static int proc_check_stopped(int pid)
648{
649 char buf[PAGE_SIZE];
650 int len;
651
652 len = proc_read_text(pid, "stat", buf, sizeof(buf));
653 if (len == -1) {
654 debug("Can't get %d stat\n", pid);
655 return -1;
656 }
657
658 if (strstr(buf, "(test_freezer) T ") == NULL) {
659 debug("Process %d in the unexpected state: %s\n", pid, buf);
660 return -1;
661 }
662
663 return 0;
664}
665
666/*
667 * Test that it's possible to freeze a cgroup with a stopped process.
668 */
669static int test_cgfreezer_stopped(const char *root)
670{
671 int pid, ret = KSFT_FAIL;
672 char *cgroup = NULL;
673
674 cgroup = cg_name(root, "cg_test_stopped");
675 if (!cgroup)
676 goto cleanup;
677
678 if (cg_create(cgroup))
679 goto cleanup;
680
681 pid = cg_run_nowait(cgroup, child_fn, NULL);
682
683 if (cg_wait_for_proc_count(cgroup, 1))
684 goto cleanup;
685
686 if (kill(pid, SIGSTOP))
687 goto cleanup;
688
689 if (cg_check_frozen(cgroup, false))
690 goto cleanup;
691
692 if (cg_freeze_wait(cgroup, true))
693 goto cleanup;
694
695 if (cg_freeze_wait(cgroup, false))
696 goto cleanup;
697
698 if (proc_check_stopped(pid))
699 goto cleanup;
700
701 ret = KSFT_PASS;
702
703cleanup:
704 if (cgroup)
705 cg_destroy(cgroup);
706 free(cgroup);
707 return ret;
708}
709
710/*
711 * Test that it's possible to freeze a cgroup with a ptraced process.
712 */
713static int test_cgfreezer_ptraced(const char *root)
714{
715 int pid, ret = KSFT_FAIL;
716 char *cgroup = NULL;
717 siginfo_t siginfo;
718
719 cgroup = cg_name(root, "cg_test_ptraced");
720 if (!cgroup)
721 goto cleanup;
722
723 if (cg_create(cgroup))
724 goto cleanup;
725
726 pid = cg_run_nowait(cgroup, child_fn, NULL);
727
728 if (cg_wait_for_proc_count(cgroup, 1))
729 goto cleanup;
730
731 if (ptrace(PTRACE_SEIZE, pid, NULL, NULL))
732 goto cleanup;
733
734 if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL))
735 goto cleanup;
736
737 waitpid(pid, NULL, 0);
738
739 if (cg_check_frozen(cgroup, false))
740 goto cleanup;
741
742 if (cg_freeze_wait(cgroup, true))
743 goto cleanup;
744
745 /*
746 * cg_check_frozen(cgroup, true) will fail here,
747 * because the task in in the TRACEd state.
748 */
749 if (cg_freeze_wait(cgroup, false))
750 goto cleanup;
751
752 if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo))
753 goto cleanup;
754
755 if (ptrace(PTRACE_DETACH, pid, NULL, NULL))
756 goto cleanup;
757
758 ret = KSFT_PASS;
759
760cleanup:
761 if (cgroup)
762 cg_destroy(cgroup);
763 free(cgroup);
764 return ret;
765}
766
767static int vfork_fn(const char *cgroup, void *arg)
768{
769 int pid = vfork();
770
771 if (pid == 0)
772 while (true)
773 sleep(1);
774
775 return pid;
776}
777
778/*
779 * Test that it's possible to freeze a cgroup with a process,
780 * which called vfork() and is waiting for a child.
781 */
782static int test_cgfreezer_vfork(const char *root)
783{
784 int ret = KSFT_FAIL;
785 char *cgroup = NULL;
786
787 cgroup = cg_name(root, "cg_test_vfork");
788 if (!cgroup)
789 goto cleanup;
790
791 if (cg_create(cgroup))
792 goto cleanup;
793
794 cg_run_nowait(cgroup, vfork_fn, NULL);
795
796 if (cg_wait_for_proc_count(cgroup, 2))
797 goto cleanup;
798
799 if (cg_freeze_wait(cgroup, true))
800 goto cleanup;
801
802 ret = KSFT_PASS;
803
804cleanup:
805 if (cgroup)
806 cg_destroy(cgroup);
807 free(cgroup);
808 return ret;
809}
810
811#define T(x) { x, #x }
812struct cgfreezer_test {
813 int (*fn)(const char *root);
814 const char *name;
815} tests[] = {
816 T(test_cgfreezer_simple),
817 T(test_cgfreezer_tree),
818 T(test_cgfreezer_forkbomb),
819 T(test_cgfreezer_rmdir),
820 T(test_cgfreezer_migrate),
821 T(test_cgfreezer_ptrace),
822 T(test_cgfreezer_stopped),
823 T(test_cgfreezer_ptraced),
824 T(test_cgfreezer_vfork),
825};
826#undef T
827
828int main(int argc, char *argv[])
829{
830 char root[PATH_MAX];
831 int i, ret = EXIT_SUCCESS;
832
833 if (cg_find_unified_root(root, sizeof(root)))
834 ksft_exit_skip("cgroup v2 isn't mounted\n");
835 for (i = 0; i < ARRAY_SIZE(tests); i++) {
836 switch (tests[i].fn(root)) {
837 case KSFT_PASS:
838 ksft_test_result_pass("%s\n", tests[i].name);
839 break;
840 case KSFT_SKIP:
841 ksft_test_result_skip("%s\n", tests[i].name);
842 break;
843 default:
844 ret = EXIT_FAILURE;
845 ksft_test_result_fail("%s\n", tests[i].name);
846 break;
847 }
848 }
849
850 return ret;
851}