aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorShailabh Nagar <nagar@watson.ibm.com>2006-07-14 03:24:44 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-07-15 00:53:57 -0400
commitad4ecbcba72855a2b5319b96e2a3a65ed1ca3bfd (patch)
treea2f5b98598948525de77ab594e4432f09a230388 /kernel
parent25890454667b3295f67b3372352be90705f8667c (diff)
[PATCH] delay accounting taskstats interface send tgid once
Send per-tgid data only once during exit of a thread group instead of once with each member thread exit. Currently, when a thread exits, besides its per-tid data, the per-tgid data of its thread group is also sent out, if its thread group is non-empty. The per-tgid data sent consists of the sum of per-tid stats for all *remaining* threads of the thread group. This patch modifies this sending in two ways: - the per-tgid data is sent only when the last thread of a thread group exits. This cuts down heavily on the overhead of sending/receiving per-tgid data, especially when other exploiters of the taskstats interface aren't interested in per-tgid stats - the semantics of the per-tgid data sent are changed. Instead of being the sum of per-tid data for remaining threads, the value now sent is the true total accumalated statistics for all threads that are/were part of the thread group. The patch also addresses a minor issue where failure of one accounting subsystem to fill in the taskstats structure was causing the send of taskstats to not be sent at all. The patch has been tested for stability and run cerberus for over 4 hours on an SMP. [akpm@osdl.org: bugfixes] Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com> Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c8
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/taskstats.c98
3 files changed, 74 insertions, 36 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 9852ed8c2988..67c1e9a4f812 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -845,7 +845,7 @@ static void exit_notify(struct task_struct *tsk)
845fastcall NORET_TYPE void do_exit(long code) 845fastcall NORET_TYPE void do_exit(long code)
846{ 846{
847 struct task_struct *tsk = current; 847 struct task_struct *tsk = current;
848 struct taskstats *tidstats, *tgidstats; 848 struct taskstats *tidstats;
849 int group_dead; 849 int group_dead;
850 850
851 profile_task_exit(tsk); 851 profile_task_exit(tsk);
@@ -884,7 +884,7 @@ fastcall NORET_TYPE void do_exit(long code)
884 current->comm, current->pid, 884 current->comm, current->pid,
885 preempt_count()); 885 preempt_count());
886 886
887 taskstats_exit_alloc(&tidstats, &tgidstats); 887 taskstats_exit_alloc(&tidstats);
888 888
889 acct_update_integrals(tsk); 889 acct_update_integrals(tsk);
890 if (tsk->mm) { 890 if (tsk->mm) {
@@ -905,8 +905,8 @@ fastcall NORET_TYPE void do_exit(long code)
905#endif 905#endif
906 if (unlikely(tsk->audit_context)) 906 if (unlikely(tsk->audit_context))
907 audit_free(tsk); 907 audit_free(tsk);
908 taskstats_exit_send(tsk, tidstats, tgidstats); 908 taskstats_exit_send(tsk, tidstats, group_dead);
909 taskstats_exit_free(tidstats, tgidstats); 909 taskstats_exit_free(tidstats);
910 delayacct_tsk_exit(tsk); 910 delayacct_tsk_exit(tsk);
911 911
912 exit_mm(tsk); 912 exit_mm(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 451cfd35bf22..1b0f7b1e0881 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -44,6 +44,7 @@
44#include <linux/acct.h> 44#include <linux/acct.h>
45#include <linux/cn_proc.h> 45#include <linux/cn_proc.h>
46#include <linux/delayacct.h> 46#include <linux/delayacct.h>
47#include <linux/taskstats_kern.h>
47 48
48#include <asm/pgtable.h> 49#include <asm/pgtable.h>
49#include <asm/pgalloc.h> 50#include <asm/pgalloc.h>
@@ -819,6 +820,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
819 if (clone_flags & CLONE_THREAD) { 820 if (clone_flags & CLONE_THREAD) {
820 atomic_inc(&current->signal->count); 821 atomic_inc(&current->signal->count);
821 atomic_inc(&current->signal->live); 822 atomic_inc(&current->signal->live);
823 taskstats_tgid_alloc(current->signal);
822 return 0; 824 return 0;
823 } 825 }
824 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); 826 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
@@ -863,6 +865,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
863 INIT_LIST_HEAD(&sig->cpu_timers[0]); 865 INIT_LIST_HEAD(&sig->cpu_timers[0]);
864 INIT_LIST_HEAD(&sig->cpu_timers[1]); 866 INIT_LIST_HEAD(&sig->cpu_timers[1]);
865 INIT_LIST_HEAD(&sig->cpu_timers[2]); 867 INIT_LIST_HEAD(&sig->cpu_timers[2]);
868 taskstats_tgid_init(sig);
866 869
867 task_lock(current->group_leader); 870 task_lock(current->group_leader);
868 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 871 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
@@ -884,6 +887,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
884void __cleanup_signal(struct signal_struct *sig) 887void __cleanup_signal(struct signal_struct *sig)
885{ 888{
886 exit_thread_group_keys(sig); 889 exit_thread_group_keys(sig);
890 taskstats_tgid_free(sig);
887 kmem_cache_free(signal_cachep, sig); 891 kmem_cache_free(signal_cachep, sig);
888} 892}
889 893
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index ea9506de3b85..4a0a5022b299 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -132,46 +132,79 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
132static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, 132static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
133 struct taskstats *stats) 133 struct taskstats *stats)
134{ 134{
135 int rc;
136 struct task_struct *tsk, *first; 135 struct task_struct *tsk, *first;
136 unsigned long flags;
137 137
138 /*
139 * Add additional stats from live tasks except zombie thread group
140 * leaders who are already counted with the dead tasks
141 */
138 first = tgidtsk; 142 first = tgidtsk;
139 read_lock(&tasklist_lock);
140 if (!first) { 143 if (!first) {
144 read_lock(&tasklist_lock);
141 first = find_task_by_pid(tgid); 145 first = find_task_by_pid(tgid);
142 if (!first) { 146 if (!first) {
143 read_unlock(&tasklist_lock); 147 read_unlock(&tasklist_lock);
144 return -ESRCH; 148 return -ESRCH;
145 } 149 }
146 } 150 get_task_struct(first);
151 read_unlock(&tasklist_lock);
152 } else
153 get_task_struct(first);
154
155 /* Start with stats from dead tasks */
156 spin_lock_irqsave(&first->signal->stats_lock, flags);
157 if (first->signal->stats)
158 memcpy(stats, first->signal->stats, sizeof(*stats));
159 spin_unlock_irqrestore(&first->signal->stats_lock, flags);
160
147 tsk = first; 161 tsk = first;
162 read_lock(&tasklist_lock);
148 do { 163 do {
164 if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk))
165 continue;
149 /* 166 /*
150 * Each accounting subsystem adds calls its functions to 167 * Accounting subsystem can call its functions here to
151 * fill in relevant parts of struct taskstsats as follows 168 * fill in relevant parts of struct taskstsats as follows
152 * 169 *
153 * rc = per-task-foo(stats, tsk); 170 * per-task-foo(stats, tsk);
154 * if (rc)
155 * break;
156 */ 171 */
157 172 delayacct_add_tsk(stats, tsk);
158 rc = delayacct_add_tsk(stats, tsk);
159 if (rc)
160 break;
161 173
162 } while_each_thread(first, tsk); 174 } while_each_thread(first, tsk);
163 read_unlock(&tasklist_lock); 175 read_unlock(&tasklist_lock);
164 stats->version = TASKSTATS_VERSION; 176 stats->version = TASKSTATS_VERSION;
165 177
166
167 /* 178 /*
168 * Accounting subsytems can also add calls here if they don't 179 * Accounting subsytems can also add calls here to modify
169 * wish to aggregate statistics for per-tgid stats 180 * fields of taskstats.
170 */ 181 */
171 182
172 return rc; 183 return 0;
184}
185
186
187static void fill_tgid_exit(struct task_struct *tsk)
188{
189 unsigned long flags;
190
191 spin_lock_irqsave(&tsk->signal->stats_lock, flags);
192 if (!tsk->signal->stats)
193 goto ret;
194
195 /*
196 * Each accounting subsystem calls its functions here to
197 * accumalate its per-task stats for tsk, into the per-tgid structure
198 *
199 * per-task-foo(tsk->signal->stats, tsk);
200 */
201 delayacct_add_tsk(tsk->signal->stats, tsk);
202ret:
203 spin_unlock_irqrestore(&tsk->signal->stats_lock, flags);
204 return;
173} 205}
174 206
207
175static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info) 208static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info)
176{ 209{
177 int rc = 0; 210 int rc = 0;
@@ -230,7 +263,7 @@ err:
230 263
231/* Send pid data out on exit */ 264/* Send pid data out on exit */
232void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, 265void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
233 struct taskstats *tgidstats) 266 int group_dead)
234{ 267{
235 int rc; 268 int rc;
236 struct sk_buff *rep_skb; 269 struct sk_buff *rep_skb;
@@ -238,13 +271,16 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
238 size_t size; 271 size_t size;
239 int is_thread_group; 272 int is_thread_group;
240 struct nlattr *na; 273 struct nlattr *na;
274 unsigned long flags;
241 275
242 if (!family_registered || !tidstats) 276 if (!family_registered || !tidstats)
243 return; 277 return;
244 278
245 is_thread_group = !thread_group_empty(tsk); 279 spin_lock_irqsave(&tsk->signal->stats_lock, flags);
246 rc = 0; 280 is_thread_group = tsk->signal->stats ? 1 : 0;
281 spin_unlock_irqrestore(&tsk->signal->stats_lock, flags);
247 282
283 rc = 0;
248 /* 284 /*
249 * Size includes space for nested attributes 285 * Size includes space for nested attributes
250 */ 286 */
@@ -268,30 +304,28 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
268 *tidstats); 304 *tidstats);
269 nla_nest_end(rep_skb, na); 305 nla_nest_end(rep_skb, na);
270 306
271 if (!is_thread_group || !tgidstats) { 307 if (!is_thread_group)
272 send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); 308 goto send;
273 goto ret;
274 }
275 309
276 rc = fill_tgid(tsk->pid, tsk, tgidstats);
277 /* 310 /*
278 * If fill_tgid() failed then one probable reason could be that the 311 * tsk has/had a thread group so fill the tsk->signal->stats structure
279 * thread group leader has exited. fill_tgid() will fail, send out 312 * Doesn't matter if tsk is the leader or the last group member leaving
280 * the pid statistics collected earlier.
281 */ 313 */
282 if (rc < 0) { 314
283 send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); 315 fill_tgid_exit(tsk);
284 goto ret; 316 if (!group_dead)
285 } 317 goto send;
286 318
287 na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); 319 na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID);
288 NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); 320 NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid);
321 /* No locking needed for tsk->signal->stats since group is dead */
289 NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, 322 NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
290 *tgidstats); 323 *tsk->signal->stats);
291 nla_nest_end(rep_skb, na); 324 nla_nest_end(rep_skb, na);
292 325
326send:
293 send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); 327 send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
294 goto ret; 328 return;
295 329
296nla_put_failure: 330nla_put_failure:
297 genlmsg_cancel(rep_skb, reply); 331 genlmsg_cancel(rep_skb, reply);