diff options
Diffstat (limited to 'kernel')
63 files changed, 2323 insertions, 1889 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 6c5f081132a4..188c43223f52 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -11,7 +11,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o pm_qos_params.o | 12 | notifier.o ksysfs.o pm_qos_params.o |
13 | 13 | ||
14 | obj-$(CONFIG_SYSCTL) += sysctl_check.o | 14 | obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o |
15 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 15 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
16 | obj-y += time/ | 16 | obj-y += time/ |
17 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | 17 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o |
diff --git a/kernel/audit.c b/kernel/audit.c index a7b16086d36f..b7d3709cc452 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -126,6 +126,8 @@ static int audit_freelist_count; | |||
126 | static LIST_HEAD(audit_freelist); | 126 | static LIST_HEAD(audit_freelist); |
127 | 127 | ||
128 | static struct sk_buff_head audit_skb_queue; | 128 | static struct sk_buff_head audit_skb_queue; |
129 | /* queue of skbs to send to auditd when/if it comes back */ | ||
130 | static struct sk_buff_head audit_skb_hold_queue; | ||
129 | static struct task_struct *kauditd_task; | 131 | static struct task_struct *kauditd_task; |
130 | static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); | 132 | static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); |
131 | static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); | 133 | static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); |
@@ -154,6 +156,11 @@ struct audit_buffer { | |||
154 | gfp_t gfp_mask; | 156 | gfp_t gfp_mask; |
155 | }; | 157 | }; |
156 | 158 | ||
159 | struct audit_reply { | ||
160 | int pid; | ||
161 | struct sk_buff *skb; | ||
162 | }; | ||
163 | |||
157 | static void audit_set_pid(struct audit_buffer *ab, pid_t pid) | 164 | static void audit_set_pid(struct audit_buffer *ab, pid_t pid) |
158 | { | 165 | { |
159 | if (ab) { | 166 | if (ab) { |
@@ -252,14 +259,15 @@ void audit_log_lost(const char *message) | |||
252 | } | 259 | } |
253 | 260 | ||
254 | static int audit_log_config_change(char *function_name, int new, int old, | 261 | static int audit_log_config_change(char *function_name, int new, int old, |
255 | uid_t loginuid, u32 sid, int allow_changes) | 262 | uid_t loginuid, u32 sessionid, u32 sid, |
263 | int allow_changes) | ||
256 | { | 264 | { |
257 | struct audit_buffer *ab; | 265 | struct audit_buffer *ab; |
258 | int rc = 0; | 266 | int rc = 0; |
259 | 267 | ||
260 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); | 268 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); |
261 | audit_log_format(ab, "%s=%d old=%d by auid=%u", function_name, new, | 269 | audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new, |
262 | old, loginuid); | 270 | old, loginuid, sessionid); |
263 | if (sid) { | 271 | if (sid) { |
264 | char *ctx = NULL; | 272 | char *ctx = NULL; |
265 | u32 len; | 273 | u32 len; |
@@ -279,7 +287,8 @@ static int audit_log_config_change(char *function_name, int new, int old, | |||
279 | } | 287 | } |
280 | 288 | ||
281 | static int audit_do_config_change(char *function_name, int *to_change, | 289 | static int audit_do_config_change(char *function_name, int *to_change, |
282 | int new, uid_t loginuid, u32 sid) | 290 | int new, uid_t loginuid, u32 sessionid, |
291 | u32 sid) | ||
283 | { | 292 | { |
284 | int allow_changes, rc = 0, old = *to_change; | 293 | int allow_changes, rc = 0, old = *to_change; |
285 | 294 | ||
@@ -290,8 +299,8 @@ static int audit_do_config_change(char *function_name, int *to_change, | |||
290 | allow_changes = 1; | 299 | allow_changes = 1; |
291 | 300 | ||
292 | if (audit_enabled != AUDIT_OFF) { | 301 | if (audit_enabled != AUDIT_OFF) { |
293 | rc = audit_log_config_change(function_name, new, old, | 302 | rc = audit_log_config_change(function_name, new, old, loginuid, |
294 | loginuid, sid, allow_changes); | 303 | sessionid, sid, allow_changes); |
295 | if (rc) | 304 | if (rc) |
296 | allow_changes = 0; | 305 | allow_changes = 0; |
297 | } | 306 | } |
@@ -305,26 +314,28 @@ static int audit_do_config_change(char *function_name, int *to_change, | |||
305 | return rc; | 314 | return rc; |
306 | } | 315 | } |
307 | 316 | ||
308 | static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) | 317 | static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sessionid, |
318 | u32 sid) | ||
309 | { | 319 | { |
310 | return audit_do_config_change("audit_rate_limit", &audit_rate_limit, | 320 | return audit_do_config_change("audit_rate_limit", &audit_rate_limit, |
311 | limit, loginuid, sid); | 321 | limit, loginuid, sessionid, sid); |
312 | } | 322 | } |
313 | 323 | ||
314 | static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) | 324 | static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sessionid, |
325 | u32 sid) | ||
315 | { | 326 | { |
316 | return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit, | 327 | return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit, |
317 | limit, loginuid, sid); | 328 | limit, loginuid, sessionid, sid); |
318 | } | 329 | } |
319 | 330 | ||
320 | static int audit_set_enabled(int state, uid_t loginuid, u32 sid) | 331 | static int audit_set_enabled(int state, uid_t loginuid, u32 sessionid, u32 sid) |
321 | { | 332 | { |
322 | int rc; | 333 | int rc; |
323 | if (state < AUDIT_OFF || state > AUDIT_LOCKED) | 334 | if (state < AUDIT_OFF || state > AUDIT_LOCKED) |
324 | return -EINVAL; | 335 | return -EINVAL; |
325 | 336 | ||
326 | rc = audit_do_config_change("audit_enabled", &audit_enabled, state, | 337 | rc = audit_do_config_change("audit_enabled", &audit_enabled, state, |
327 | loginuid, sid); | 338 | loginuid, sessionid, sid); |
328 | 339 | ||
329 | if (!rc) | 340 | if (!rc) |
330 | audit_ever_enabled |= !!state; | 341 | audit_ever_enabled |= !!state; |
@@ -332,7 +343,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid) | |||
332 | return rc; | 343 | return rc; |
333 | } | 344 | } |
334 | 345 | ||
335 | static int audit_set_failure(int state, uid_t loginuid, u32 sid) | 346 | static int audit_set_failure(int state, uid_t loginuid, u32 sessionid, u32 sid) |
336 | { | 347 | { |
337 | if (state != AUDIT_FAIL_SILENT | 348 | if (state != AUDIT_FAIL_SILENT |
338 | && state != AUDIT_FAIL_PRINTK | 349 | && state != AUDIT_FAIL_PRINTK |
@@ -340,7 +351,43 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid) | |||
340 | return -EINVAL; | 351 | return -EINVAL; |
341 | 352 | ||
342 | return audit_do_config_change("audit_failure", &audit_failure, state, | 353 | return audit_do_config_change("audit_failure", &audit_failure, state, |
343 | loginuid, sid); | 354 | loginuid, sessionid, sid); |
355 | } | ||
356 | |||
357 | /* | ||
358 | * Queue skbs to be sent to auditd when/if it comes back. These skbs should | ||
359 | * already have been sent via prink/syslog and so if these messages are dropped | ||
360 | * it is not a huge concern since we already passed the audit_log_lost() | ||
361 | * notification and stuff. This is just nice to get audit messages during | ||
362 | * boot before auditd is running or messages generated while auditd is stopped. | ||
363 | * This only holds messages is audit_default is set, aka booting with audit=1 | ||
364 | * or building your kernel that way. | ||
365 | */ | ||
366 | static void audit_hold_skb(struct sk_buff *skb) | ||
367 | { | ||
368 | if (audit_default && | ||
369 | skb_queue_len(&audit_skb_hold_queue) < audit_backlog_limit) | ||
370 | skb_queue_tail(&audit_skb_hold_queue, skb); | ||
371 | else | ||
372 | kfree_skb(skb); | ||
373 | } | ||
374 | |||
375 | static void kauditd_send_skb(struct sk_buff *skb) | ||
376 | { | ||
377 | int err; | ||
378 | /* take a reference in case we can't send it and we want to hold it */ | ||
379 | skb_get(skb); | ||
380 | err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0); | ||
381 | if (err < 0) { | ||
382 | BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */ | ||
383 | printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); | ||
384 | audit_log_lost("auditd dissapeared\n"); | ||
385 | audit_pid = 0; | ||
386 | /* we might get lucky and get this in the next auditd */ | ||
387 | audit_hold_skb(skb); | ||
388 | } else | ||
389 | /* drop the extra reference if sent ok */ | ||
390 | kfree_skb(skb); | ||
344 | } | 391 | } |
345 | 392 | ||
346 | static int kauditd_thread(void *dummy) | 393 | static int kauditd_thread(void *dummy) |
@@ -349,24 +396,41 @@ static int kauditd_thread(void *dummy) | |||
349 | 396 | ||
350 | set_freezable(); | 397 | set_freezable(); |
351 | while (!kthread_should_stop()) { | 398 | while (!kthread_should_stop()) { |
399 | /* | ||
400 | * if auditd just started drain the queue of messages already | ||
401 | * sent to syslog/printk. remember loss here is ok. we already | ||
402 | * called audit_log_lost() if it didn't go out normally. so the | ||
403 | * race between the skb_dequeue and the next check for audit_pid | ||
404 | * doesn't matter. | ||
405 | * | ||
406 | * if you ever find kauditd to be too slow we can get a perf win | ||
407 | * by doing our own locking and keeping better track if there | ||
408 | * are messages in this queue. I don't see the need now, but | ||
409 | * in 5 years when I want to play with this again I'll see this | ||
410 | * note and still have no friggin idea what i'm thinking today. | ||
411 | */ | ||
412 | if (audit_default && audit_pid) { | ||
413 | skb = skb_dequeue(&audit_skb_hold_queue); | ||
414 | if (unlikely(skb)) { | ||
415 | while (skb && audit_pid) { | ||
416 | kauditd_send_skb(skb); | ||
417 | skb = skb_dequeue(&audit_skb_hold_queue); | ||
418 | } | ||
419 | } | ||
420 | } | ||
421 | |||
352 | skb = skb_dequeue(&audit_skb_queue); | 422 | skb = skb_dequeue(&audit_skb_queue); |
353 | wake_up(&audit_backlog_wait); | 423 | wake_up(&audit_backlog_wait); |
354 | if (skb) { | 424 | if (skb) { |
355 | if (audit_pid) { | 425 | if (audit_pid) |
356 | int err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0); | 426 | kauditd_send_skb(skb); |
357 | if (err < 0) { | 427 | else { |
358 | BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */ | ||
359 | printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); | ||
360 | audit_log_lost("auditd dissapeared\n"); | ||
361 | audit_pid = 0; | ||
362 | } | ||
363 | } else { | ||
364 | if (printk_ratelimit()) | 428 | if (printk_ratelimit()) |
365 | printk(KERN_NOTICE "%s\n", skb->data + | 429 | printk(KERN_NOTICE "%s\n", skb->data + NLMSG_SPACE(0)); |
366 | NLMSG_SPACE(0)); | ||
367 | else | 430 | else |
368 | audit_log_lost("printk limit exceeded\n"); | 431 | audit_log_lost("printk limit exceeded\n"); |
369 | kfree_skb(skb); | 432 | |
433 | audit_hold_skb(skb); | ||
370 | } | 434 | } |
371 | } else { | 435 | } else { |
372 | DECLARE_WAITQUEUE(wait, current); | 436 | DECLARE_WAITQUEUE(wait, current); |
@@ -385,13 +449,13 @@ static int kauditd_thread(void *dummy) | |||
385 | return 0; | 449 | return 0; |
386 | } | 450 | } |
387 | 451 | ||
388 | static int audit_prepare_user_tty(pid_t pid, uid_t loginuid) | 452 | static int audit_prepare_user_tty(pid_t pid, uid_t loginuid, u32 sessionid) |
389 | { | 453 | { |
390 | struct task_struct *tsk; | 454 | struct task_struct *tsk; |
391 | int err; | 455 | int err; |
392 | 456 | ||
393 | read_lock(&tasklist_lock); | 457 | read_lock(&tasklist_lock); |
394 | tsk = find_task_by_pid(pid); | 458 | tsk = find_task_by_vpid(pid); |
395 | err = -ESRCH; | 459 | err = -ESRCH; |
396 | if (!tsk) | 460 | if (!tsk) |
397 | goto out; | 461 | goto out; |
@@ -404,7 +468,7 @@ static int audit_prepare_user_tty(pid_t pid, uid_t loginuid) | |||
404 | if (err) | 468 | if (err) |
405 | goto out; | 469 | goto out; |
406 | 470 | ||
407 | tty_audit_push_task(tsk, loginuid); | 471 | tty_audit_push_task(tsk, loginuid, sessionid); |
408 | out: | 472 | out: |
409 | read_unlock(&tasklist_lock); | 473 | read_unlock(&tasklist_lock); |
410 | return err; | 474 | return err; |
@@ -469,6 +533,19 @@ nlmsg_failure: /* Used by NLMSG_PUT */ | |||
469 | return NULL; | 533 | return NULL; |
470 | } | 534 | } |
471 | 535 | ||
536 | static int audit_send_reply_thread(void *arg) | ||
537 | { | ||
538 | struct audit_reply *reply = (struct audit_reply *)arg; | ||
539 | |||
540 | mutex_lock(&audit_cmd_mutex); | ||
541 | mutex_unlock(&audit_cmd_mutex); | ||
542 | |||
543 | /* Ignore failure. It'll only happen if the sender goes away, | ||
544 | because our timeout is set to infinite. */ | ||
545 | netlink_unicast(audit_sock, reply->skb, reply->pid, 0); | ||
546 | kfree(reply); | ||
547 | return 0; | ||
548 | } | ||
472 | /** | 549 | /** |
473 | * audit_send_reply - send an audit reply message via netlink | 550 | * audit_send_reply - send an audit reply message via netlink |
474 | * @pid: process id to send reply to | 551 | * @pid: process id to send reply to |
@@ -485,14 +562,26 @@ nlmsg_failure: /* Used by NLMSG_PUT */ | |||
485 | void audit_send_reply(int pid, int seq, int type, int done, int multi, | 562 | void audit_send_reply(int pid, int seq, int type, int done, int multi, |
486 | void *payload, int size) | 563 | void *payload, int size) |
487 | { | 564 | { |
488 | struct sk_buff *skb; | 565 | struct sk_buff *skb; |
566 | struct task_struct *tsk; | ||
567 | struct audit_reply *reply = kmalloc(sizeof(struct audit_reply), | ||
568 | GFP_KERNEL); | ||
569 | |||
570 | if (!reply) | ||
571 | return; | ||
572 | |||
489 | skb = audit_make_reply(pid, seq, type, done, multi, payload, size); | 573 | skb = audit_make_reply(pid, seq, type, done, multi, payload, size); |
490 | if (!skb) | 574 | if (!skb) |
491 | return; | 575 | return; |
492 | /* Ignore failure. It'll only happen if the sender goes away, | 576 | |
493 | because our timeout is set to infinite. */ | 577 | reply->pid = pid; |
494 | netlink_unicast(audit_sock, skb, pid, 0); | 578 | reply->skb = skb; |
495 | return; | 579 | |
580 | tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply"); | ||
581 | if (IS_ERR(tsk)) { | ||
582 | kfree(reply); | ||
583 | kfree_skb(skb); | ||
584 | } | ||
496 | } | 585 | } |
497 | 586 | ||
498 | /* | 587 | /* |
@@ -534,7 +623,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) | |||
534 | } | 623 | } |
535 | 624 | ||
536 | static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, | 625 | static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, |
537 | u32 pid, u32 uid, uid_t auid, u32 sid) | 626 | u32 pid, u32 uid, uid_t auid, u32 ses, |
627 | u32 sid) | ||
538 | { | 628 | { |
539 | int rc = 0; | 629 | int rc = 0; |
540 | char *ctx = NULL; | 630 | char *ctx = NULL; |
@@ -546,8 +636,8 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, | |||
546 | } | 636 | } |
547 | 637 | ||
548 | *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); | 638 | *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); |
549 | audit_log_format(*ab, "user pid=%d uid=%u auid=%u", | 639 | audit_log_format(*ab, "user pid=%d uid=%u auid=%u ses=%u", |
550 | pid, uid, auid); | 640 | pid, uid, auid, ses); |
551 | if (sid) { | 641 | if (sid) { |
552 | rc = security_secid_to_secctx(sid, &ctx, &len); | 642 | rc = security_secid_to_secctx(sid, &ctx, &len); |
553 | if (rc) | 643 | if (rc) |
@@ -570,6 +660,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
570 | struct audit_buffer *ab; | 660 | struct audit_buffer *ab; |
571 | u16 msg_type = nlh->nlmsg_type; | 661 | u16 msg_type = nlh->nlmsg_type; |
572 | uid_t loginuid; /* loginuid of sender */ | 662 | uid_t loginuid; /* loginuid of sender */ |
663 | u32 sessionid; | ||
573 | struct audit_sig_info *sig_data; | 664 | struct audit_sig_info *sig_data; |
574 | char *ctx = NULL; | 665 | char *ctx = NULL; |
575 | u32 len; | 666 | u32 len; |
@@ -591,6 +682,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
591 | pid = NETLINK_CREDS(skb)->pid; | 682 | pid = NETLINK_CREDS(skb)->pid; |
592 | uid = NETLINK_CREDS(skb)->uid; | 683 | uid = NETLINK_CREDS(skb)->uid; |
593 | loginuid = NETLINK_CB(skb).loginuid; | 684 | loginuid = NETLINK_CB(skb).loginuid; |
685 | sessionid = NETLINK_CB(skb).sessionid; | ||
594 | sid = NETLINK_CB(skb).sid; | 686 | sid = NETLINK_CB(skb).sid; |
595 | seq = nlh->nlmsg_seq; | 687 | seq = nlh->nlmsg_seq; |
596 | data = NLMSG_DATA(nlh); | 688 | data = NLMSG_DATA(nlh); |
@@ -613,12 +705,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
613 | status_get = (struct audit_status *)data; | 705 | status_get = (struct audit_status *)data; |
614 | if (status_get->mask & AUDIT_STATUS_ENABLED) { | 706 | if (status_get->mask & AUDIT_STATUS_ENABLED) { |
615 | err = audit_set_enabled(status_get->enabled, | 707 | err = audit_set_enabled(status_get->enabled, |
616 | loginuid, sid); | 708 | loginuid, sessionid, sid); |
617 | if (err < 0) return err; | 709 | if (err < 0) return err; |
618 | } | 710 | } |
619 | if (status_get->mask & AUDIT_STATUS_FAILURE) { | 711 | if (status_get->mask & AUDIT_STATUS_FAILURE) { |
620 | err = audit_set_failure(status_get->failure, | 712 | err = audit_set_failure(status_get->failure, |
621 | loginuid, sid); | 713 | loginuid, sessionid, sid); |
622 | if (err < 0) return err; | 714 | if (err < 0) return err; |
623 | } | 715 | } |
624 | if (status_get->mask & AUDIT_STATUS_PID) { | 716 | if (status_get->mask & AUDIT_STATUS_PID) { |
@@ -627,17 +719,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
627 | if (audit_enabled != AUDIT_OFF) | 719 | if (audit_enabled != AUDIT_OFF) |
628 | audit_log_config_change("audit_pid", new_pid, | 720 | audit_log_config_change("audit_pid", new_pid, |
629 | audit_pid, loginuid, | 721 | audit_pid, loginuid, |
630 | sid, 1); | 722 | sessionid, sid, 1); |
631 | 723 | ||
632 | audit_pid = new_pid; | 724 | audit_pid = new_pid; |
633 | audit_nlk_pid = NETLINK_CB(skb).pid; | 725 | audit_nlk_pid = NETLINK_CB(skb).pid; |
634 | } | 726 | } |
635 | if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) | 727 | if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) |
636 | err = audit_set_rate_limit(status_get->rate_limit, | 728 | err = audit_set_rate_limit(status_get->rate_limit, |
637 | loginuid, sid); | 729 | loginuid, sessionid, sid); |
638 | if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) | 730 | if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) |
639 | err = audit_set_backlog_limit(status_get->backlog_limit, | 731 | err = audit_set_backlog_limit(status_get->backlog_limit, |
640 | loginuid, sid); | 732 | loginuid, sessionid, sid); |
641 | break; | 733 | break; |
642 | case AUDIT_USER: | 734 | case AUDIT_USER: |
643 | case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: | 735 | case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: |
@@ -649,12 +741,13 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
649 | if (err == 1) { | 741 | if (err == 1) { |
650 | err = 0; | 742 | err = 0; |
651 | if (msg_type == AUDIT_USER_TTY) { | 743 | if (msg_type == AUDIT_USER_TTY) { |
652 | err = audit_prepare_user_tty(pid, loginuid); | 744 | err = audit_prepare_user_tty(pid, loginuid, |
745 | sessionid); | ||
653 | if (err) | 746 | if (err) |
654 | break; | 747 | break; |
655 | } | 748 | } |
656 | audit_log_common_recv_msg(&ab, msg_type, pid, uid, | 749 | audit_log_common_recv_msg(&ab, msg_type, pid, uid, |
657 | loginuid, sid); | 750 | loginuid, sessionid, sid); |
658 | 751 | ||
659 | if (msg_type != AUDIT_USER_TTY) | 752 | if (msg_type != AUDIT_USER_TTY) |
660 | audit_log_format(ab, " msg='%.1024s'", | 753 | audit_log_format(ab, " msg='%.1024s'", |
@@ -664,8 +757,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
664 | 757 | ||
665 | audit_log_format(ab, " msg="); | 758 | audit_log_format(ab, " msg="); |
666 | size = nlmsg_len(nlh); | 759 | size = nlmsg_len(nlh); |
667 | audit_log_n_untrustedstring(ab, size, | 760 | audit_log_n_untrustedstring(ab, data, size); |
668 | data); | ||
669 | } | 761 | } |
670 | audit_set_pid(ab, pid); | 762 | audit_set_pid(ab, pid); |
671 | audit_log_end(ab); | 763 | audit_log_end(ab); |
@@ -677,7 +769,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
677 | return -EINVAL; | 769 | return -EINVAL; |
678 | if (audit_enabled == AUDIT_LOCKED) { | 770 | if (audit_enabled == AUDIT_LOCKED) { |
679 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, | 771 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, |
680 | uid, loginuid, sid); | 772 | uid, loginuid, sessionid, sid); |
681 | 773 | ||
682 | audit_log_format(ab, " audit_enabled=%d res=0", | 774 | audit_log_format(ab, " audit_enabled=%d res=0", |
683 | audit_enabled); | 775 | audit_enabled); |
@@ -688,7 +780,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
688 | case AUDIT_LIST: | 780 | case AUDIT_LIST: |
689 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, | 781 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, |
690 | uid, seq, data, nlmsg_len(nlh), | 782 | uid, seq, data, nlmsg_len(nlh), |
691 | loginuid, sid); | 783 | loginuid, sessionid, sid); |
692 | break; | 784 | break; |
693 | case AUDIT_ADD_RULE: | 785 | case AUDIT_ADD_RULE: |
694 | case AUDIT_DEL_RULE: | 786 | case AUDIT_DEL_RULE: |
@@ -696,7 +788,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
696 | return -EINVAL; | 788 | return -EINVAL; |
697 | if (audit_enabled == AUDIT_LOCKED) { | 789 | if (audit_enabled == AUDIT_LOCKED) { |
698 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, | 790 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, |
699 | uid, loginuid, sid); | 791 | uid, loginuid, sessionid, sid); |
700 | 792 | ||
701 | audit_log_format(ab, " audit_enabled=%d res=0", | 793 | audit_log_format(ab, " audit_enabled=%d res=0", |
702 | audit_enabled); | 794 | audit_enabled); |
@@ -707,13 +799,13 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
707 | case AUDIT_LIST_RULES: | 799 | case AUDIT_LIST_RULES: |
708 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, | 800 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, |
709 | uid, seq, data, nlmsg_len(nlh), | 801 | uid, seq, data, nlmsg_len(nlh), |
710 | loginuid, sid); | 802 | loginuid, sessionid, sid); |
711 | break; | 803 | break; |
712 | case AUDIT_TRIM: | 804 | case AUDIT_TRIM: |
713 | audit_trim_trees(); | 805 | audit_trim_trees(); |
714 | 806 | ||
715 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, | 807 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, |
716 | uid, loginuid, sid); | 808 | uid, loginuid, sessionid, sid); |
717 | 809 | ||
718 | audit_log_format(ab, " op=trim res=1"); | 810 | audit_log_format(ab, " op=trim res=1"); |
719 | audit_log_end(ab); | 811 | audit_log_end(ab); |
@@ -721,21 +813,21 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
721 | case AUDIT_MAKE_EQUIV: { | 813 | case AUDIT_MAKE_EQUIV: { |
722 | void *bufp = data; | 814 | void *bufp = data; |
723 | u32 sizes[2]; | 815 | u32 sizes[2]; |
724 | size_t len = nlmsg_len(nlh); | 816 | size_t msglen = nlmsg_len(nlh); |
725 | char *old, *new; | 817 | char *old, *new; |
726 | 818 | ||
727 | err = -EINVAL; | 819 | err = -EINVAL; |
728 | if (len < 2 * sizeof(u32)) | 820 | if (msglen < 2 * sizeof(u32)) |
729 | break; | 821 | break; |
730 | memcpy(sizes, bufp, 2 * sizeof(u32)); | 822 | memcpy(sizes, bufp, 2 * sizeof(u32)); |
731 | bufp += 2 * sizeof(u32); | 823 | bufp += 2 * sizeof(u32); |
732 | len -= 2 * sizeof(u32); | 824 | msglen -= 2 * sizeof(u32); |
733 | old = audit_unpack_string(&bufp, &len, sizes[0]); | 825 | old = audit_unpack_string(&bufp, &msglen, sizes[0]); |
734 | if (IS_ERR(old)) { | 826 | if (IS_ERR(old)) { |
735 | err = PTR_ERR(old); | 827 | err = PTR_ERR(old); |
736 | break; | 828 | break; |
737 | } | 829 | } |
738 | new = audit_unpack_string(&bufp, &len, sizes[1]); | 830 | new = audit_unpack_string(&bufp, &msglen, sizes[1]); |
739 | if (IS_ERR(new)) { | 831 | if (IS_ERR(new)) { |
740 | err = PTR_ERR(new); | 832 | err = PTR_ERR(new); |
741 | kfree(old); | 833 | kfree(old); |
@@ -745,7 +837,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
745 | err = audit_tag_tree(old, new); | 837 | err = audit_tag_tree(old, new); |
746 | 838 | ||
747 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, | 839 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, |
748 | uid, loginuid, sid); | 840 | uid, loginuid, sessionid, sid); |
749 | 841 | ||
750 | audit_log_format(ab, " op=make_equiv old="); | 842 | audit_log_format(ab, " op=make_equiv old="); |
751 | audit_log_untrustedstring(ab, old); | 843 | audit_log_untrustedstring(ab, old); |
@@ -779,7 +871,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
779 | struct task_struct *tsk; | 871 | struct task_struct *tsk; |
780 | 872 | ||
781 | read_lock(&tasklist_lock); | 873 | read_lock(&tasklist_lock); |
782 | tsk = find_task_by_pid(pid); | 874 | tsk = find_task_by_vpid(pid); |
783 | if (!tsk) | 875 | if (!tsk) |
784 | err = -ESRCH; | 876 | err = -ESRCH; |
785 | else { | 877 | else { |
@@ -802,7 +894,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
802 | if (s->enabled != 0 && s->enabled != 1) | 894 | if (s->enabled != 0 && s->enabled != 1) |
803 | return -EINVAL; | 895 | return -EINVAL; |
804 | read_lock(&tasklist_lock); | 896 | read_lock(&tasklist_lock); |
805 | tsk = find_task_by_pid(pid); | 897 | tsk = find_task_by_vpid(pid); |
806 | if (!tsk) | 898 | if (!tsk) |
807 | err = -ESRCH; | 899 | err = -ESRCH; |
808 | else { | 900 | else { |
@@ -877,6 +969,7 @@ static int __init audit_init(void) | |||
877 | audit_sock->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; | 969 | audit_sock->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; |
878 | 970 | ||
879 | skb_queue_head_init(&audit_skb_queue); | 971 | skb_queue_head_init(&audit_skb_queue); |
972 | skb_queue_head_init(&audit_skb_hold_queue); | ||
880 | audit_initialized = 1; | 973 | audit_initialized = 1; |
881 | audit_enabled = audit_default; | 974 | audit_enabled = audit_default; |
882 | audit_ever_enabled |= !!audit_default; | 975 | audit_ever_enabled |= !!audit_default; |
@@ -1199,7 +1292,7 @@ void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) | |||
1199 | * This function will take the passed buf and convert it into a string of | 1292 | * This function will take the passed buf and convert it into a string of |
1200 | * ascii hex digits. The new string is placed onto the skb. | 1293 | * ascii hex digits. The new string is placed onto the skb. |
1201 | */ | 1294 | */ |
1202 | void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, | 1295 | void audit_log_n_hex(struct audit_buffer *ab, const unsigned char *buf, |
1203 | size_t len) | 1296 | size_t len) |
1204 | { | 1297 | { |
1205 | int i, avail, new_len; | 1298 | int i, avail, new_len; |
@@ -1235,8 +1328,8 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, | |||
1235 | * Format a string of no more than slen characters into the audit buffer, | 1328 | * Format a string of no more than slen characters into the audit buffer, |
1236 | * enclosed in quote marks. | 1329 | * enclosed in quote marks. |
1237 | */ | 1330 | */ |
1238 | static void audit_log_n_string(struct audit_buffer *ab, size_t slen, | 1331 | void audit_log_n_string(struct audit_buffer *ab, const char *string, |
1239 | const char *string) | 1332 | size_t slen) |
1240 | { | 1333 | { |
1241 | int avail, new_len; | 1334 | int avail, new_len; |
1242 | unsigned char *ptr; | 1335 | unsigned char *ptr; |
@@ -1292,13 +1385,13 @@ int audit_string_contains_control(const char *string, size_t len) | |||
1292 | * The caller specifies the number of characters in the string to log, which may | 1385 | * The caller specifies the number of characters in the string to log, which may |
1293 | * or may not be the entire string. | 1386 | * or may not be the entire string. |
1294 | */ | 1387 | */ |
1295 | void audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, | 1388 | void audit_log_n_untrustedstring(struct audit_buffer *ab, const char *string, |
1296 | const char *string) | 1389 | size_t len) |
1297 | { | 1390 | { |
1298 | if (audit_string_contains_control(string, len)) | 1391 | if (audit_string_contains_control(string, len)) |
1299 | audit_log_hex(ab, string, len); | 1392 | audit_log_n_hex(ab, string, len); |
1300 | else | 1393 | else |
1301 | audit_log_n_string(ab, len, string); | 1394 | audit_log_n_string(ab, string, len); |
1302 | } | 1395 | } |
1303 | 1396 | ||
1304 | /** | 1397 | /** |
@@ -1311,7 +1404,7 @@ void audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, | |||
1311 | */ | 1404 | */ |
1312 | void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) | 1405 | void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) |
1313 | { | 1406 | { |
1314 | audit_log_n_untrustedstring(ab, strlen(string), string); | 1407 | audit_log_n_untrustedstring(ab, string, strlen(string)); |
1315 | } | 1408 | } |
1316 | 1409 | ||
1317 | /* This is a helper-function to print the escaped d_path */ | 1410 | /* This is a helper-function to print the escaped d_path */ |
@@ -1355,19 +1448,23 @@ void audit_log_end(struct audit_buffer *ab) | |||
1355 | audit_log_lost("rate limit exceeded"); | 1448 | audit_log_lost("rate limit exceeded"); |
1356 | } else { | 1449 | } else { |
1357 | struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); | 1450 | struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); |
1451 | nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); | ||
1452 | |||
1358 | if (audit_pid) { | 1453 | if (audit_pid) { |
1359 | nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); | ||
1360 | skb_queue_tail(&audit_skb_queue, ab->skb); | 1454 | skb_queue_tail(&audit_skb_queue, ab->skb); |
1361 | ab->skb = NULL; | ||
1362 | wake_up_interruptible(&kauditd_wait); | 1455 | wake_up_interruptible(&kauditd_wait); |
1363 | } else if (nlh->nlmsg_type != AUDIT_EOE) { | 1456 | } else { |
1364 | if (printk_ratelimit()) { | 1457 | if (nlh->nlmsg_type != AUDIT_EOE) { |
1365 | printk(KERN_NOTICE "type=%d %s\n", | 1458 | if (printk_ratelimit()) { |
1366 | nlh->nlmsg_type, | 1459 | printk(KERN_NOTICE "type=%d %s\n", |
1367 | ab->skb->data + NLMSG_SPACE(0)); | 1460 | nlh->nlmsg_type, |
1368 | } else | 1461 | ab->skb->data + NLMSG_SPACE(0)); |
1369 | audit_log_lost("printk limit exceeded\n"); | 1462 | } else |
1463 | audit_log_lost("printk limit exceeded\n"); | ||
1464 | } | ||
1465 | audit_hold_skb(ab->skb); | ||
1370 | } | 1466 | } |
1467 | ab->skb = NULL; | ||
1371 | } | 1468 | } |
1372 | audit_buffer_free(ab); | 1469 | audit_buffer_free(ab); |
1373 | } | 1470 | } |
diff --git a/kernel/audit.h b/kernel/audit.h index 3cfc54ee3e1f..9d6717412fec 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
@@ -74,6 +74,11 @@ struct audit_entry { | |||
74 | struct audit_krule rule; | 74 | struct audit_krule rule; |
75 | }; | 75 | }; |
76 | 76 | ||
77 | #ifdef CONFIG_AUDIT | ||
78 | extern int audit_enabled; | ||
79 | extern int audit_ever_enabled; | ||
80 | #endif | ||
81 | |||
77 | extern int audit_pid; | 82 | extern int audit_pid; |
78 | 83 | ||
79 | #define AUDIT_INODE_BUCKETS 32 | 84 | #define AUDIT_INODE_BUCKETS 32 |
@@ -104,6 +109,9 @@ struct audit_netlink_list { | |||
104 | int audit_send_list(void *); | 109 | int audit_send_list(void *); |
105 | 110 | ||
106 | struct inotify_watch; | 111 | struct inotify_watch; |
112 | /* Inotify handle */ | ||
113 | extern struct inotify_handle *audit_ih; | ||
114 | |||
107 | extern void audit_free_parent(struct inotify_watch *); | 115 | extern void audit_free_parent(struct inotify_watch *); |
108 | extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32, | 116 | extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32, |
109 | const char *, struct inode *); | 117 | const char *, struct inode *); |
@@ -111,6 +119,7 @@ extern int selinux_audit_rule_update(void); | |||
111 | 119 | ||
112 | extern struct mutex audit_filter_mutex; | 120 | extern struct mutex audit_filter_mutex; |
113 | extern void audit_free_rule_rcu(struct rcu_head *); | 121 | extern void audit_free_rule_rcu(struct rcu_head *); |
122 | extern struct list_head audit_filter_list[]; | ||
114 | 123 | ||
115 | #ifdef CONFIG_AUDIT_TREE | 124 | #ifdef CONFIG_AUDIT_TREE |
116 | extern struct audit_chunk *audit_tree_lookup(const struct inode *); | 125 | extern struct audit_chunk *audit_tree_lookup(const struct inode *); |
@@ -137,6 +146,10 @@ extern void audit_put_tree(struct audit_tree *); | |||
137 | 146 | ||
138 | extern char *audit_unpack_string(void **, size_t *, size_t); | 147 | extern char *audit_unpack_string(void **, size_t *, size_t); |
139 | 148 | ||
149 | extern pid_t audit_sig_pid; | ||
150 | extern uid_t audit_sig_uid; | ||
151 | extern u32 audit_sig_sid; | ||
152 | |||
140 | #ifdef CONFIG_AUDITSYSCALL | 153 | #ifdef CONFIG_AUDITSYSCALL |
141 | extern int __audit_signal_info(int sig, struct task_struct *t); | 154 | extern int __audit_signal_info(int sig, struct task_struct *t); |
142 | static inline int audit_signal_info(int sig, struct task_struct *t) | 155 | static inline int audit_signal_info(int sig, struct task_struct *t) |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 28fef6bf8534..0e0bd27e6512 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -89,14 +89,9 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { | |||
89 | 89 | ||
90 | DEFINE_MUTEX(audit_filter_mutex); | 90 | DEFINE_MUTEX(audit_filter_mutex); |
91 | 91 | ||
92 | /* Inotify handle */ | ||
93 | extern struct inotify_handle *audit_ih; | ||
94 | |||
95 | /* Inotify events we care about. */ | 92 | /* Inotify events we care about. */ |
96 | #define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF | 93 | #define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF |
97 | 94 | ||
98 | extern int audit_enabled; | ||
99 | |||
100 | void audit_free_parent(struct inotify_watch *i_watch) | 95 | void audit_free_parent(struct inotify_watch *i_watch) |
101 | { | 96 | { |
102 | struct audit_parent *parent; | 97 | struct audit_parent *parent; |
@@ -272,7 +267,7 @@ static int audit_to_watch(struct audit_krule *krule, char *path, int len, | |||
272 | return -EINVAL; | 267 | return -EINVAL; |
273 | 268 | ||
274 | watch = audit_init_watch(path); | 269 | watch = audit_init_watch(path); |
275 | if (unlikely(IS_ERR(watch))) | 270 | if (IS_ERR(watch)) |
276 | return PTR_ERR(watch); | 271 | return PTR_ERR(watch); |
277 | 272 | ||
278 | audit_get_watch(watch); | 273 | audit_get_watch(watch); |
@@ -422,7 +417,7 @@ exit_err: | |||
422 | static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | 417 | static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) |
423 | { | 418 | { |
424 | struct audit_entry *entry; | 419 | struct audit_entry *entry; |
425 | struct audit_field *f; | 420 | struct audit_field *ino_f; |
426 | int err = 0; | 421 | int err = 0; |
427 | int i; | 422 | int i; |
428 | 423 | ||
@@ -483,6 +478,10 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | |||
483 | if (f->val & ~15) | 478 | if (f->val & ~15) |
484 | goto exit_free; | 479 | goto exit_free; |
485 | break; | 480 | break; |
481 | case AUDIT_FILETYPE: | ||
482 | if ((f->val & ~S_IFMT) > S_IFMT) | ||
483 | goto exit_free; | ||
484 | break; | ||
486 | case AUDIT_INODE: | 485 | case AUDIT_INODE: |
487 | err = audit_to_inode(&entry->rule, f); | 486 | err = audit_to_inode(&entry->rule, f); |
488 | if (err) | 487 | if (err) |
@@ -504,9 +503,9 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | |||
504 | } | 503 | } |
505 | } | 504 | } |
506 | 505 | ||
507 | f = entry->rule.inode_f; | 506 | ino_f = entry->rule.inode_f; |
508 | if (f) { | 507 | if (ino_f) { |
509 | switch(f->op) { | 508 | switch(ino_f->op) { |
510 | case AUDIT_NOT_EQUAL: | 509 | case AUDIT_NOT_EQUAL: |
511 | entry->rule.inode_f = NULL; | 510 | entry->rule.inode_f = NULL; |
512 | case AUDIT_EQUAL: | 511 | case AUDIT_EQUAL: |
@@ -531,7 +530,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
531 | { | 530 | { |
532 | int err = 0; | 531 | int err = 0; |
533 | struct audit_entry *entry; | 532 | struct audit_entry *entry; |
534 | struct audit_field *f; | 533 | struct audit_field *ino_f; |
535 | void *bufp; | 534 | void *bufp; |
536 | size_t remain = datasz - sizeof(struct audit_rule_data); | 535 | size_t remain = datasz - sizeof(struct audit_rule_data); |
537 | int i; | 536 | int i; |
@@ -654,14 +653,18 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
654 | if (f->val & ~15) | 653 | if (f->val & ~15) |
655 | goto exit_free; | 654 | goto exit_free; |
656 | break; | 655 | break; |
656 | case AUDIT_FILETYPE: | ||
657 | if ((f->val & ~S_IFMT) > S_IFMT) | ||
658 | goto exit_free; | ||
659 | break; | ||
657 | default: | 660 | default: |
658 | goto exit_free; | 661 | goto exit_free; |
659 | } | 662 | } |
660 | } | 663 | } |
661 | 664 | ||
662 | f = entry->rule.inode_f; | 665 | ino_f = entry->rule.inode_f; |
663 | if (f) { | 666 | if (ino_f) { |
664 | switch(f->op) { | 667 | switch(ino_f->op) { |
665 | case AUDIT_NOT_EQUAL: | 668 | case AUDIT_NOT_EQUAL: |
666 | entry->rule.inode_f = NULL; | 669 | entry->rule.inode_f = NULL; |
667 | case AUDIT_EQUAL: | 670 | case AUDIT_EQUAL: |
@@ -848,7 +851,7 @@ static struct audit_watch *audit_dupe_watch(struct audit_watch *old) | |||
848 | return ERR_PTR(-ENOMEM); | 851 | return ERR_PTR(-ENOMEM); |
849 | 852 | ||
850 | new = audit_init_watch(path); | 853 | new = audit_init_watch(path); |
851 | if (unlikely(IS_ERR(new))) { | 854 | if (IS_ERR(new)) { |
852 | kfree(path); | 855 | kfree(path); |
853 | goto out; | 856 | goto out; |
854 | } | 857 | } |
@@ -989,7 +992,7 @@ static void audit_update_watch(struct audit_parent *parent, | |||
989 | audit_set_auditable(current->audit_context); | 992 | audit_set_auditable(current->audit_context); |
990 | 993 | ||
991 | nwatch = audit_dupe_watch(owatch); | 994 | nwatch = audit_dupe_watch(owatch); |
992 | if (unlikely(IS_ERR(nwatch))) { | 995 | if (IS_ERR(nwatch)) { |
993 | mutex_unlock(&audit_filter_mutex); | 996 | mutex_unlock(&audit_filter_mutex); |
994 | audit_panic("error updating watch, skipping"); | 997 | audit_panic("error updating watch, skipping"); |
995 | return; | 998 | return; |
@@ -1004,7 +1007,7 @@ static void audit_update_watch(struct audit_parent *parent, | |||
1004 | list_del_rcu(&oentry->list); | 1007 | list_del_rcu(&oentry->list); |
1005 | 1008 | ||
1006 | nentry = audit_dupe_rule(&oentry->rule, nwatch); | 1009 | nentry = audit_dupe_rule(&oentry->rule, nwatch); |
1007 | if (unlikely(IS_ERR(nentry))) | 1010 | if (IS_ERR(nentry)) |
1008 | audit_panic("error updating watch, removing"); | 1011 | audit_panic("error updating watch, removing"); |
1009 | else { | 1012 | else { |
1010 | int h = audit_hash_ino((u32)ino); | 1013 | int h = audit_hash_ino((u32)ino); |
@@ -1500,8 +1503,9 @@ static void audit_list_rules(int pid, int seq, struct sk_buff_head *q) | |||
1500 | } | 1503 | } |
1501 | 1504 | ||
1502 | /* Log rule additions and removals */ | 1505 | /* Log rule additions and removals */ |
1503 | static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, | 1506 | static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, |
1504 | struct audit_krule *rule, int res) | 1507 | char *action, struct audit_krule *rule, |
1508 | int res) | ||
1505 | { | 1509 | { |
1506 | struct audit_buffer *ab; | 1510 | struct audit_buffer *ab; |
1507 | 1511 | ||
@@ -1511,7 +1515,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, | |||
1511 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); | 1515 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); |
1512 | if (!ab) | 1516 | if (!ab) |
1513 | return; | 1517 | return; |
1514 | audit_log_format(ab, "auid=%u", loginuid); | 1518 | audit_log_format(ab, "auid=%u ses=%u", loginuid, sessionid); |
1515 | if (sid) { | 1519 | if (sid) { |
1516 | char *ctx = NULL; | 1520 | char *ctx = NULL; |
1517 | u32 len; | 1521 | u32 len; |
@@ -1543,7 +1547,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, | |||
1543 | * @sid: SE Linux Security ID of sender | 1547 | * @sid: SE Linux Security ID of sender |
1544 | */ | 1548 | */ |
1545 | int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | 1549 | int audit_receive_filter(int type, int pid, int uid, int seq, void *data, |
1546 | size_t datasz, uid_t loginuid, u32 sid) | 1550 | size_t datasz, uid_t loginuid, u32 sessionid, u32 sid) |
1547 | { | 1551 | { |
1548 | struct task_struct *tsk; | 1552 | struct task_struct *tsk; |
1549 | struct audit_netlink_list *dest; | 1553 | struct audit_netlink_list *dest; |
@@ -1590,7 +1594,8 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | |||
1590 | 1594 | ||
1591 | err = audit_add_rule(entry, | 1595 | err = audit_add_rule(entry, |
1592 | &audit_filter_list[entry->rule.listnr]); | 1596 | &audit_filter_list[entry->rule.listnr]); |
1593 | audit_log_rule_change(loginuid, sid, "add", &entry->rule, !err); | 1597 | audit_log_rule_change(loginuid, sessionid, sid, "add", |
1598 | &entry->rule, !err); | ||
1594 | 1599 | ||
1595 | if (err) | 1600 | if (err) |
1596 | audit_free_rule(entry); | 1601 | audit_free_rule(entry); |
@@ -1606,8 +1611,8 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | |||
1606 | 1611 | ||
1607 | err = audit_del_rule(entry, | 1612 | err = audit_del_rule(entry, |
1608 | &audit_filter_list[entry->rule.listnr]); | 1613 | &audit_filter_list[entry->rule.listnr]); |
1609 | audit_log_rule_change(loginuid, sid, "remove", &entry->rule, | 1614 | audit_log_rule_change(loginuid, sessionid, sid, "remove", |
1610 | !err); | 1615 | &entry->rule, !err); |
1611 | 1616 | ||
1612 | audit_free_rule(entry); | 1617 | audit_free_rule(entry); |
1613 | break; | 1618 | break; |
@@ -1785,7 +1790,7 @@ int audit_update_lsm_rules(void) | |||
1785 | watch = entry->rule.watch; | 1790 | watch = entry->rule.watch; |
1786 | tree = entry->rule.tree; | 1791 | tree = entry->rule.tree; |
1787 | nentry = audit_dupe_rule(&entry->rule, watch); | 1792 | nentry = audit_dupe_rule(&entry->rule, watch); |
1788 | if (unlikely(IS_ERR(nentry))) { | 1793 | if (IS_ERR(nentry)) { |
1789 | /* save the first error encountered for the | 1794 | /* save the first error encountered for the |
1790 | * return value */ | 1795 | * return value */ |
1791 | if (!err) | 1796 | if (!err) |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 56e56ed594a8..c10e7aae04d7 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -68,9 +68,6 @@ | |||
68 | 68 | ||
69 | #include "audit.h" | 69 | #include "audit.h" |
70 | 70 | ||
71 | extern struct list_head audit_filter_list[]; | ||
72 | extern int audit_ever_enabled; | ||
73 | |||
74 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context | 71 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context |
75 | * for saving names from getname(). */ | 72 | * for saving names from getname(). */ |
76 | #define AUDIT_NAMES 20 | 73 | #define AUDIT_NAMES 20 |
@@ -283,6 +280,19 @@ static int audit_match_perm(struct audit_context *ctx, int mask) | |||
283 | } | 280 | } |
284 | } | 281 | } |
285 | 282 | ||
283 | static int audit_match_filetype(struct audit_context *ctx, int which) | ||
284 | { | ||
285 | unsigned index = which & ~S_IFMT; | ||
286 | mode_t mode = which & S_IFMT; | ||
287 | if (index >= ctx->name_count) | ||
288 | return 0; | ||
289 | if (ctx->names[index].ino == -1) | ||
290 | return 0; | ||
291 | if ((ctx->names[index].mode ^ mode) & S_IFMT) | ||
292 | return 0; | ||
293 | return 1; | ||
294 | } | ||
295 | |||
286 | /* | 296 | /* |
287 | * We keep a linked list of fixed-sized (31 pointer) arrays of audit_chunk *; | 297 | * We keep a linked list of fixed-sized (31 pointer) arrays of audit_chunk *; |
288 | * ->first_trees points to its beginning, ->trees - to the current end of data. | 298 | * ->first_trees points to its beginning, ->trees - to the current end of data. |
@@ -592,6 +602,9 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
592 | case AUDIT_PERM: | 602 | case AUDIT_PERM: |
593 | result = audit_match_perm(ctx, f->val); | 603 | result = audit_match_perm(ctx, f->val); |
594 | break; | 604 | break; |
605 | case AUDIT_FILETYPE: | ||
606 | result = audit_match_filetype(ctx, f->val); | ||
607 | break; | ||
595 | } | 608 | } |
596 | 609 | ||
597 | if (!result) | 610 | if (!result) |
@@ -1095,7 +1108,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, | |||
1095 | audit_log_format(*ab, "[%d]", i); | 1108 | audit_log_format(*ab, "[%d]", i); |
1096 | audit_log_format(*ab, "="); | 1109 | audit_log_format(*ab, "="); |
1097 | if (has_cntl) | 1110 | if (has_cntl) |
1098 | audit_log_hex(*ab, buf, to_send); | 1111 | audit_log_n_hex(*ab, buf, to_send); |
1099 | else | 1112 | else |
1100 | audit_log_format(*ab, "\"%s\"", buf); | 1113 | audit_log_format(*ab, "\"%s\"", buf); |
1101 | audit_log_format(*ab, "\n"); | 1114 | audit_log_format(*ab, "\n"); |
@@ -1296,7 +1309,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
1296 | break; } | 1309 | break; } |
1297 | 1310 | ||
1298 | case AUDIT_SOCKETCALL: { | 1311 | case AUDIT_SOCKETCALL: { |
1299 | int i; | ||
1300 | struct audit_aux_data_socketcall *axs = (void *)aux; | 1312 | struct audit_aux_data_socketcall *axs = (void *)aux; |
1301 | audit_log_format(ab, "nargs=%d", axs->nargs); | 1313 | audit_log_format(ab, "nargs=%d", axs->nargs); |
1302 | for (i=0; i<axs->nargs; i++) | 1314 | for (i=0; i<axs->nargs; i++) |
@@ -1307,7 +1319,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
1307 | struct audit_aux_data_sockaddr *axs = (void *)aux; | 1319 | struct audit_aux_data_sockaddr *axs = (void *)aux; |
1308 | 1320 | ||
1309 | audit_log_format(ab, "saddr="); | 1321 | audit_log_format(ab, "saddr="); |
1310 | audit_log_hex(ab, axs->a, axs->len); | 1322 | audit_log_n_hex(ab, axs->a, axs->len); |
1311 | break; } | 1323 | break; } |
1312 | 1324 | ||
1313 | case AUDIT_FD_PAIR: { | 1325 | case AUDIT_FD_PAIR: { |
@@ -1321,7 +1333,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
1321 | 1333 | ||
1322 | for (aux = context->aux_pids; aux; aux = aux->next) { | 1334 | for (aux = context->aux_pids; aux; aux = aux->next) { |
1323 | struct audit_aux_data_pids *axs = (void *)aux; | 1335 | struct audit_aux_data_pids *axs = (void *)aux; |
1324 | int i; | ||
1325 | 1336 | ||
1326 | for (i = 0; i < axs->pid_count; i++) | 1337 | for (i = 0; i < axs->pid_count; i++) |
1327 | if (audit_log_pid_context(context, axs->target_pid[i], | 1338 | if (audit_log_pid_context(context, axs->target_pid[i], |
@@ -1371,8 +1382,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
1371 | default: | 1382 | default: |
1372 | /* log the name's directory component */ | 1383 | /* log the name's directory component */ |
1373 | audit_log_format(ab, " name="); | 1384 | audit_log_format(ab, " name="); |
1374 | audit_log_n_untrustedstring(ab, n->name_len, | 1385 | audit_log_n_untrustedstring(ab, n->name, |
1375 | n->name); | 1386 | n->name_len); |
1376 | } | 1387 | } |
1377 | } else | 1388 | } else |
1378 | audit_log_format(ab, " name=(null)"); | 1389 | audit_log_format(ab, " name=(null)"); |
@@ -1596,7 +1607,7 @@ static inline void handle_one(const struct inode *inode) | |||
1596 | if (likely(put_tree_ref(context, chunk))) | 1607 | if (likely(put_tree_ref(context, chunk))) |
1597 | return; | 1608 | return; |
1598 | if (unlikely(!grow_tree_refs(context))) { | 1609 | if (unlikely(!grow_tree_refs(context))) { |
1599 | printk(KERN_WARNING "out of memory, audit has lost a tree reference"); | 1610 | printk(KERN_WARNING "out of memory, audit has lost a tree reference\n"); |
1600 | audit_set_auditable(context); | 1611 | audit_set_auditable(context); |
1601 | audit_put_chunk(chunk); | 1612 | audit_put_chunk(chunk); |
1602 | unroll_tree_refs(context, p, count); | 1613 | unroll_tree_refs(context, p, count); |
@@ -1656,7 +1667,7 @@ retry: | |||
1656 | } | 1667 | } |
1657 | /* too bad */ | 1668 | /* too bad */ |
1658 | printk(KERN_WARNING | 1669 | printk(KERN_WARNING |
1659 | "out of memory, audit has lost a tree reference"); | 1670 | "out of memory, audit has lost a tree reference\n"); |
1660 | unroll_tree_refs(context, p, count); | 1671 | unroll_tree_refs(context, p, count); |
1661 | audit_set_auditable(context); | 1672 | audit_set_auditable(context); |
1662 | return; | 1673 | return; |
@@ -1752,13 +1763,13 @@ static int audit_inc_name_count(struct audit_context *context, | |||
1752 | if (context->name_count >= AUDIT_NAMES) { | 1763 | if (context->name_count >= AUDIT_NAMES) { |
1753 | if (inode) | 1764 | if (inode) |
1754 | printk(KERN_DEBUG "name_count maxed, losing inode data: " | 1765 | printk(KERN_DEBUG "name_count maxed, losing inode data: " |
1755 | "dev=%02x:%02x, inode=%lu", | 1766 | "dev=%02x:%02x, inode=%lu\n", |
1756 | MAJOR(inode->i_sb->s_dev), | 1767 | MAJOR(inode->i_sb->s_dev), |
1757 | MINOR(inode->i_sb->s_dev), | 1768 | MINOR(inode->i_sb->s_dev), |
1758 | inode->i_ino); | 1769 | inode->i_ino); |
1759 | 1770 | ||
1760 | else | 1771 | else |
1761 | printk(KERN_DEBUG "name_count maxed, losing inode data"); | 1772 | printk(KERN_DEBUG "name_count maxed, losing inode data\n"); |
1762 | return 1; | 1773 | return 1; |
1763 | } | 1774 | } |
1764 | context->name_count++; | 1775 | context->name_count++; |
@@ -2361,9 +2372,6 @@ int __audit_signal_info(int sig, struct task_struct *t) | |||
2361 | struct audit_aux_data_pids *axp; | 2372 | struct audit_aux_data_pids *axp; |
2362 | struct task_struct *tsk = current; | 2373 | struct task_struct *tsk = current; |
2363 | struct audit_context *ctx = tsk->audit_context; | 2374 | struct audit_context *ctx = tsk->audit_context; |
2364 | extern pid_t audit_sig_pid; | ||
2365 | extern uid_t audit_sig_uid; | ||
2366 | extern u32 audit_sig_sid; | ||
2367 | 2375 | ||
2368 | if (audit_pid && t->tgid == audit_pid) { | 2376 | if (audit_pid && t->tgid == audit_pid) { |
2369 | if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) { | 2377 | if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) { |
diff --git a/kernel/bounds.c b/kernel/bounds.c index c3c55544db2f..3c5301381837 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c | |||
@@ -8,11 +8,7 @@ | |||
8 | /* Include headers that define the enum constants of interest */ | 8 | /* Include headers that define the enum constants of interest */ |
9 | #include <linux/page-flags.h> | 9 | #include <linux/page-flags.h> |
10 | #include <linux/mmzone.h> | 10 | #include <linux/mmzone.h> |
11 | 11 | #include <linux/kbuild.h> | |
12 | #define DEFINE(sym, val) \ | ||
13 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) | ||
14 | |||
15 | #define BLANK() asm volatile("\n->" : : ) | ||
16 | 12 | ||
17 | void foo(void) | 13 | void foo(void) |
18 | { | 14 | { |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6d8de051382b..fbc6fc8949b4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/kmod.h> | 44 | #include <linux/kmod.h> |
45 | #include <linux/delayacct.h> | 45 | #include <linux/delayacct.h> |
46 | #include <linux/cgroupstats.h> | 46 | #include <linux/cgroupstats.h> |
47 | #include <linux/hash.h> | ||
47 | 48 | ||
48 | #include <asm/atomic.h> | 49 | #include <asm/atomic.h> |
49 | 50 | ||
@@ -118,17 +119,7 @@ static int root_count; | |||
118 | * be called. | 119 | * be called. |
119 | */ | 120 | */ |
120 | static int need_forkexit_callback; | 121 | static int need_forkexit_callback; |
121 | 122 | static int need_mm_owner_callback __read_mostly; | |
122 | /* bits in struct cgroup flags field */ | ||
123 | enum { | ||
124 | /* Control Group is dead */ | ||
125 | CGRP_REMOVED, | ||
126 | /* Control Group has previously had a child cgroup or a task, | ||
127 | * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */ | ||
128 | CGRP_RELEASABLE, | ||
129 | /* Control Group requires release notifications to userspace */ | ||
130 | CGRP_NOTIFY_ON_RELEASE, | ||
131 | }; | ||
132 | 123 | ||
133 | /* convenient tests for these bits */ | 124 | /* convenient tests for these bits */ |
134 | inline int cgroup_is_removed(const struct cgroup *cgrp) | 125 | inline int cgroup_is_removed(const struct cgroup *cgrp) |
@@ -204,6 +195,27 @@ static struct cg_cgroup_link init_css_set_link; | |||
204 | static DEFINE_RWLOCK(css_set_lock); | 195 | static DEFINE_RWLOCK(css_set_lock); |
205 | static int css_set_count; | 196 | static int css_set_count; |
206 | 197 | ||
198 | /* hash table for cgroup groups. This improves the performance to | ||
199 | * find an existing css_set */ | ||
200 | #define CSS_SET_HASH_BITS 7 | ||
201 | #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) | ||
202 | static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; | ||
203 | |||
204 | static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) | ||
205 | { | ||
206 | int i; | ||
207 | int index; | ||
208 | unsigned long tmp = 0UL; | ||
209 | |||
210 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) | ||
211 | tmp += (unsigned long)css[i]; | ||
212 | tmp = (tmp >> 16) ^ tmp; | ||
213 | |||
214 | index = hash_long(tmp, CSS_SET_HASH_BITS); | ||
215 | |||
216 | return &css_set_table[index]; | ||
217 | } | ||
218 | |||
207 | /* We don't maintain the lists running through each css_set to its | 219 | /* We don't maintain the lists running through each css_set to its |
208 | * task until after the first call to cgroup_iter_start(). This | 220 | * task until after the first call to cgroup_iter_start(). This |
209 | * reduces the fork()/exit() overhead for people who have cgroups | 221 | * reduces the fork()/exit() overhead for people who have cgroups |
@@ -230,7 +242,7 @@ static int use_task_css_set_links; | |||
230 | static void unlink_css_set(struct css_set *cg) | 242 | static void unlink_css_set(struct css_set *cg) |
231 | { | 243 | { |
232 | write_lock(&css_set_lock); | 244 | write_lock(&css_set_lock); |
233 | list_del(&cg->list); | 245 | hlist_del(&cg->hlist); |
234 | css_set_count--; | 246 | css_set_count--; |
235 | while (!list_empty(&cg->cg_links)) { | 247 | while (!list_empty(&cg->cg_links)) { |
236 | struct cg_cgroup_link *link; | 248 | struct cg_cgroup_link *link; |
@@ -295,9 +307,7 @@ static inline void put_css_set_taskexit(struct css_set *cg) | |||
295 | /* | 307 | /* |
296 | * find_existing_css_set() is a helper for | 308 | * find_existing_css_set() is a helper for |
297 | * find_css_set(), and checks to see whether an existing | 309 | * find_css_set(), and checks to see whether an existing |
298 | * css_set is suitable. This currently walks a linked-list for | 310 | * css_set is suitable. |
299 | * simplicity; a later patch will use a hash table for better | ||
300 | * performance | ||
301 | * | 311 | * |
302 | * oldcg: the cgroup group that we're using before the cgroup | 312 | * oldcg: the cgroup group that we're using before the cgroup |
303 | * transition | 313 | * transition |
@@ -314,7 +324,9 @@ static struct css_set *find_existing_css_set( | |||
314 | { | 324 | { |
315 | int i; | 325 | int i; |
316 | struct cgroupfs_root *root = cgrp->root; | 326 | struct cgroupfs_root *root = cgrp->root; |
317 | struct list_head *l = &init_css_set.list; | 327 | struct hlist_head *hhead; |
328 | struct hlist_node *node; | ||
329 | struct css_set *cg; | ||
318 | 330 | ||
319 | /* Built the set of subsystem state objects that we want to | 331 | /* Built the set of subsystem state objects that we want to |
320 | * see in the new css_set */ | 332 | * see in the new css_set */ |
@@ -331,18 +343,13 @@ static struct css_set *find_existing_css_set( | |||
331 | } | 343 | } |
332 | } | 344 | } |
333 | 345 | ||
334 | /* Look through existing cgroup groups to find one to reuse */ | 346 | hhead = css_set_hash(template); |
335 | do { | 347 | hlist_for_each_entry(cg, node, hhead, hlist) { |
336 | struct css_set *cg = | ||
337 | list_entry(l, struct css_set, list); | ||
338 | |||
339 | if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { | 348 | if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { |
340 | /* All subsystems matched */ | 349 | /* All subsystems matched */ |
341 | return cg; | 350 | return cg; |
342 | } | 351 | } |
343 | /* Try the next cgroup group */ | 352 | } |
344 | l = l->next; | ||
345 | } while (l != &init_css_set.list); | ||
346 | 353 | ||
347 | /* No existing cgroup group matched */ | 354 | /* No existing cgroup group matched */ |
348 | return NULL; | 355 | return NULL; |
@@ -404,6 +411,8 @@ static struct css_set *find_css_set( | |||
404 | struct list_head tmp_cg_links; | 411 | struct list_head tmp_cg_links; |
405 | struct cg_cgroup_link *link; | 412 | struct cg_cgroup_link *link; |
406 | 413 | ||
414 | struct hlist_head *hhead; | ||
415 | |||
407 | /* First see if we already have a cgroup group that matches | 416 | /* First see if we already have a cgroup group that matches |
408 | * the desired set */ | 417 | * the desired set */ |
409 | write_lock(&css_set_lock); | 418 | write_lock(&css_set_lock); |
@@ -428,6 +437,7 @@ static struct css_set *find_css_set( | |||
428 | kref_init(&res->ref); | 437 | kref_init(&res->ref); |
429 | INIT_LIST_HEAD(&res->cg_links); | 438 | INIT_LIST_HEAD(&res->cg_links); |
430 | INIT_LIST_HEAD(&res->tasks); | 439 | INIT_LIST_HEAD(&res->tasks); |
440 | INIT_HLIST_NODE(&res->hlist); | ||
431 | 441 | ||
432 | /* Copy the set of subsystem state objects generated in | 442 | /* Copy the set of subsystem state objects generated in |
433 | * find_existing_css_set() */ | 443 | * find_existing_css_set() */ |
@@ -467,9 +477,12 @@ static struct css_set *find_css_set( | |||
467 | 477 | ||
468 | BUG_ON(!list_empty(&tmp_cg_links)); | 478 | BUG_ON(!list_empty(&tmp_cg_links)); |
469 | 479 | ||
470 | /* Link this cgroup group into the list */ | ||
471 | list_add(&res->list, &init_css_set.list); | ||
472 | css_set_count++; | 480 | css_set_count++; |
481 | |||
482 | /* Add this cgroup group to the hash table */ | ||
483 | hhead = css_set_hash(res->subsys); | ||
484 | hlist_add_head(&res->hlist, hhead); | ||
485 | |||
473 | write_unlock(&css_set_lock); | 486 | write_unlock(&css_set_lock); |
474 | 487 | ||
475 | return res; | 488 | return res; |
@@ -562,7 +575,7 @@ static struct inode_operations cgroup_dir_inode_operations; | |||
562 | static struct file_operations proc_cgroupstats_operations; | 575 | static struct file_operations proc_cgroupstats_operations; |
563 | 576 | ||
564 | static struct backing_dev_info cgroup_backing_dev_info = { | 577 | static struct backing_dev_info cgroup_backing_dev_info = { |
565 | .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, | 578 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
566 | }; | 579 | }; |
567 | 580 | ||
568 | static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | 581 | static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) |
@@ -948,7 +961,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
948 | int ret = 0; | 961 | int ret = 0; |
949 | struct super_block *sb; | 962 | struct super_block *sb; |
950 | struct cgroupfs_root *root; | 963 | struct cgroupfs_root *root; |
951 | struct list_head tmp_cg_links, *l; | 964 | struct list_head tmp_cg_links; |
952 | INIT_LIST_HEAD(&tmp_cg_links); | 965 | INIT_LIST_HEAD(&tmp_cg_links); |
953 | 966 | ||
954 | /* First find the desired set of subsystems */ | 967 | /* First find the desired set of subsystems */ |
@@ -990,6 +1003,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
990 | /* New superblock */ | 1003 | /* New superblock */ |
991 | struct cgroup *cgrp = &root->top_cgroup; | 1004 | struct cgroup *cgrp = &root->top_cgroup; |
992 | struct inode *inode; | 1005 | struct inode *inode; |
1006 | int i; | ||
993 | 1007 | ||
994 | BUG_ON(sb->s_root != NULL); | 1008 | BUG_ON(sb->s_root != NULL); |
995 | 1009 | ||
@@ -1034,22 +1048,25 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1034 | /* Link the top cgroup in this hierarchy into all | 1048 | /* Link the top cgroup in this hierarchy into all |
1035 | * the css_set objects */ | 1049 | * the css_set objects */ |
1036 | write_lock(&css_set_lock); | 1050 | write_lock(&css_set_lock); |
1037 | l = &init_css_set.list; | 1051 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { |
1038 | do { | 1052 | struct hlist_head *hhead = &css_set_table[i]; |
1053 | struct hlist_node *node; | ||
1039 | struct css_set *cg; | 1054 | struct css_set *cg; |
1040 | struct cg_cgroup_link *link; | 1055 | |
1041 | cg = list_entry(l, struct css_set, list); | 1056 | hlist_for_each_entry(cg, node, hhead, hlist) { |
1042 | BUG_ON(list_empty(&tmp_cg_links)); | 1057 | struct cg_cgroup_link *link; |
1043 | link = list_entry(tmp_cg_links.next, | 1058 | |
1044 | struct cg_cgroup_link, | 1059 | BUG_ON(list_empty(&tmp_cg_links)); |
1045 | cgrp_link_list); | 1060 | link = list_entry(tmp_cg_links.next, |
1046 | list_del(&link->cgrp_link_list); | 1061 | struct cg_cgroup_link, |
1047 | link->cg = cg; | 1062 | cgrp_link_list); |
1048 | list_add(&link->cgrp_link_list, | 1063 | list_del(&link->cgrp_link_list); |
1049 | &root->top_cgroup.css_sets); | 1064 | link->cg = cg; |
1050 | list_add(&link->cg_link_list, &cg->cg_links); | 1065 | list_add(&link->cgrp_link_list, |
1051 | l = l->next; | 1066 | &root->top_cgroup.css_sets); |
1052 | } while (l != &init_css_set.list); | 1067 | list_add(&link->cg_link_list, &cg->cg_links); |
1068 | } | ||
1069 | } | ||
1053 | write_unlock(&css_set_lock); | 1070 | write_unlock(&css_set_lock); |
1054 | 1071 | ||
1055 | free_cg_links(&tmp_cg_links); | 1072 | free_cg_links(&tmp_cg_links); |
@@ -1307,18 +1324,16 @@ enum cgroup_filetype { | |||
1307 | FILE_DIR, | 1324 | FILE_DIR, |
1308 | FILE_TASKLIST, | 1325 | FILE_TASKLIST, |
1309 | FILE_NOTIFY_ON_RELEASE, | 1326 | FILE_NOTIFY_ON_RELEASE, |
1310 | FILE_RELEASABLE, | ||
1311 | FILE_RELEASE_AGENT, | 1327 | FILE_RELEASE_AGENT, |
1312 | }; | 1328 | }; |
1313 | 1329 | ||
1314 | static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft, | 1330 | static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, |
1315 | struct file *file, | 1331 | struct file *file, |
1316 | const char __user *userbuf, | 1332 | const char __user *userbuf, |
1317 | size_t nbytes, loff_t *unused_ppos) | 1333 | size_t nbytes, loff_t *unused_ppos) |
1318 | { | 1334 | { |
1319 | char buffer[64]; | 1335 | char buffer[64]; |
1320 | int retval = 0; | 1336 | int retval = 0; |
1321 | u64 val; | ||
1322 | char *end; | 1337 | char *end; |
1323 | 1338 | ||
1324 | if (!nbytes) | 1339 | if (!nbytes) |
@@ -1329,16 +1344,18 @@ static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft, | |||
1329 | return -EFAULT; | 1344 | return -EFAULT; |
1330 | 1345 | ||
1331 | buffer[nbytes] = 0; /* nul-terminate */ | 1346 | buffer[nbytes] = 0; /* nul-terminate */ |
1332 | 1347 | strstrip(buffer); | |
1333 | /* strip newline if necessary */ | 1348 | if (cft->write_u64) { |
1334 | if (nbytes && (buffer[nbytes-1] == '\n')) | 1349 | u64 val = simple_strtoull(buffer, &end, 0); |
1335 | buffer[nbytes-1] = 0; | 1350 | if (*end) |
1336 | val = simple_strtoull(buffer, &end, 0); | 1351 | return -EINVAL; |
1337 | if (*end) | 1352 | retval = cft->write_u64(cgrp, cft, val); |
1338 | return -EINVAL; | 1353 | } else { |
1339 | 1354 | s64 val = simple_strtoll(buffer, &end, 0); | |
1340 | /* Pass to subsystem */ | 1355 | if (*end) |
1341 | retval = cft->write_uint(cgrp, cft, val); | 1356 | return -EINVAL; |
1357 | retval = cft->write_s64(cgrp, cft, val); | ||
1358 | } | ||
1342 | if (!retval) | 1359 | if (!retval) |
1343 | retval = nbytes; | 1360 | retval = nbytes; |
1344 | return retval; | 1361 | return retval; |
@@ -1419,23 +1436,39 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, | |||
1419 | return -ENODEV; | 1436 | return -ENODEV; |
1420 | if (cft->write) | 1437 | if (cft->write) |
1421 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); | 1438 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); |
1422 | if (cft->write_uint) | 1439 | if (cft->write_u64 || cft->write_s64) |
1423 | return cgroup_write_uint(cgrp, cft, file, buf, nbytes, ppos); | 1440 | return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos); |
1441 | if (cft->trigger) { | ||
1442 | int ret = cft->trigger(cgrp, (unsigned int)cft->private); | ||
1443 | return ret ? ret : nbytes; | ||
1444 | } | ||
1424 | return -EINVAL; | 1445 | return -EINVAL; |
1425 | } | 1446 | } |
1426 | 1447 | ||
1427 | static ssize_t cgroup_read_uint(struct cgroup *cgrp, struct cftype *cft, | 1448 | static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft, |
1428 | struct file *file, | 1449 | struct file *file, |
1429 | char __user *buf, size_t nbytes, | 1450 | char __user *buf, size_t nbytes, |
1430 | loff_t *ppos) | 1451 | loff_t *ppos) |
1431 | { | 1452 | { |
1432 | char tmp[64]; | 1453 | char tmp[64]; |
1433 | u64 val = cft->read_uint(cgrp, cft); | 1454 | u64 val = cft->read_u64(cgrp, cft); |
1434 | int len = sprintf(tmp, "%llu\n", (unsigned long long) val); | 1455 | int len = sprintf(tmp, "%llu\n", (unsigned long long) val); |
1435 | 1456 | ||
1436 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | 1457 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); |
1437 | } | 1458 | } |
1438 | 1459 | ||
1460 | static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft, | ||
1461 | struct file *file, | ||
1462 | char __user *buf, size_t nbytes, | ||
1463 | loff_t *ppos) | ||
1464 | { | ||
1465 | char tmp[64]; | ||
1466 | s64 val = cft->read_s64(cgrp, cft); | ||
1467 | int len = sprintf(tmp, "%lld\n", (long long) val); | ||
1468 | |||
1469 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | ||
1470 | } | ||
1471 | |||
1439 | static ssize_t cgroup_common_file_read(struct cgroup *cgrp, | 1472 | static ssize_t cgroup_common_file_read(struct cgroup *cgrp, |
1440 | struct cftype *cft, | 1473 | struct cftype *cft, |
1441 | struct file *file, | 1474 | struct file *file, |
@@ -1490,11 +1523,56 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, | |||
1490 | 1523 | ||
1491 | if (cft->read) | 1524 | if (cft->read) |
1492 | return cft->read(cgrp, cft, file, buf, nbytes, ppos); | 1525 | return cft->read(cgrp, cft, file, buf, nbytes, ppos); |
1493 | if (cft->read_uint) | 1526 | if (cft->read_u64) |
1494 | return cgroup_read_uint(cgrp, cft, file, buf, nbytes, ppos); | 1527 | return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos); |
1528 | if (cft->read_s64) | ||
1529 | return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos); | ||
1495 | return -EINVAL; | 1530 | return -EINVAL; |
1496 | } | 1531 | } |
1497 | 1532 | ||
1533 | /* | ||
1534 | * seqfile ops/methods for returning structured data. Currently just | ||
1535 | * supports string->u64 maps, but can be extended in future. | ||
1536 | */ | ||
1537 | |||
1538 | struct cgroup_seqfile_state { | ||
1539 | struct cftype *cft; | ||
1540 | struct cgroup *cgroup; | ||
1541 | }; | ||
1542 | |||
1543 | static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) | ||
1544 | { | ||
1545 | struct seq_file *sf = cb->state; | ||
1546 | return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value); | ||
1547 | } | ||
1548 | |||
1549 | static int cgroup_seqfile_show(struct seq_file *m, void *arg) | ||
1550 | { | ||
1551 | struct cgroup_seqfile_state *state = m->private; | ||
1552 | struct cftype *cft = state->cft; | ||
1553 | if (cft->read_map) { | ||
1554 | struct cgroup_map_cb cb = { | ||
1555 | .fill = cgroup_map_add, | ||
1556 | .state = m, | ||
1557 | }; | ||
1558 | return cft->read_map(state->cgroup, cft, &cb); | ||
1559 | } | ||
1560 | return cft->read_seq_string(state->cgroup, cft, m); | ||
1561 | } | ||
1562 | |||
1563 | int cgroup_seqfile_release(struct inode *inode, struct file *file) | ||
1564 | { | ||
1565 | struct seq_file *seq = file->private_data; | ||
1566 | kfree(seq->private); | ||
1567 | return single_release(inode, file); | ||
1568 | } | ||
1569 | |||
1570 | static struct file_operations cgroup_seqfile_operations = { | ||
1571 | .read = seq_read, | ||
1572 | .llseek = seq_lseek, | ||
1573 | .release = cgroup_seqfile_release, | ||
1574 | }; | ||
1575 | |||
1498 | static int cgroup_file_open(struct inode *inode, struct file *file) | 1576 | static int cgroup_file_open(struct inode *inode, struct file *file) |
1499 | { | 1577 | { |
1500 | int err; | 1578 | int err; |
@@ -1507,7 +1585,18 @@ static int cgroup_file_open(struct inode *inode, struct file *file) | |||
1507 | cft = __d_cft(file->f_dentry); | 1585 | cft = __d_cft(file->f_dentry); |
1508 | if (!cft) | 1586 | if (!cft) |
1509 | return -ENODEV; | 1587 | return -ENODEV; |
1510 | if (cft->open) | 1588 | if (cft->read_map || cft->read_seq_string) { |
1589 | struct cgroup_seqfile_state *state = | ||
1590 | kzalloc(sizeof(*state), GFP_USER); | ||
1591 | if (!state) | ||
1592 | return -ENOMEM; | ||
1593 | state->cft = cft; | ||
1594 | state->cgroup = __d_cgrp(file->f_dentry->d_parent); | ||
1595 | file->f_op = &cgroup_seqfile_operations; | ||
1596 | err = single_open(file, cgroup_seqfile_show, state); | ||
1597 | if (err < 0) | ||
1598 | kfree(state); | ||
1599 | } else if (cft->open) | ||
1511 | err = cft->open(inode, file); | 1600 | err = cft->open(inode, file); |
1512 | else | 1601 | else |
1513 | err = 0; | 1602 | err = 0; |
@@ -1715,7 +1804,7 @@ static void cgroup_advance_iter(struct cgroup *cgrp, | |||
1715 | * The tasklist_lock is not held here, as do_each_thread() and | 1804 | * The tasklist_lock is not held here, as do_each_thread() and |
1716 | * while_each_thread() are protected by RCU. | 1805 | * while_each_thread() are protected by RCU. |
1717 | */ | 1806 | */ |
1718 | void cgroup_enable_task_cg_lists(void) | 1807 | static void cgroup_enable_task_cg_lists(void) |
1719 | { | 1808 | { |
1720 | struct task_struct *p, *g; | 1809 | struct task_struct *p, *g; |
1721 | write_lock(&css_set_lock); | 1810 | write_lock(&css_set_lock); |
@@ -1913,14 +2002,14 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
1913 | 2002 | ||
1914 | if (heap->size) { | 2003 | if (heap->size) { |
1915 | for (i = 0; i < heap->size; i++) { | 2004 | for (i = 0; i < heap->size; i++) { |
1916 | struct task_struct *p = heap->ptrs[i]; | 2005 | struct task_struct *q = heap->ptrs[i]; |
1917 | if (i == 0) { | 2006 | if (i == 0) { |
1918 | latest_time = p->start_time; | 2007 | latest_time = q->start_time; |
1919 | latest_task = p; | 2008 | latest_task = q; |
1920 | } | 2009 | } |
1921 | /* Process the task per the caller's callback */ | 2010 | /* Process the task per the caller's callback */ |
1922 | scan->process_task(p, scan); | 2011 | scan->process_task(q, scan); |
1923 | put_task_struct(p); | 2012 | put_task_struct(q); |
1924 | } | 2013 | } |
1925 | /* | 2014 | /* |
1926 | * If we had to process any tasks at all, scan again | 2015 | * If we had to process any tasks at all, scan again |
@@ -2138,11 +2227,6 @@ static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, | |||
2138 | return notify_on_release(cgrp); | 2227 | return notify_on_release(cgrp); |
2139 | } | 2228 | } |
2140 | 2229 | ||
2141 | static u64 cgroup_read_releasable(struct cgroup *cgrp, struct cftype *cft) | ||
2142 | { | ||
2143 | return test_bit(CGRP_RELEASABLE, &cgrp->flags); | ||
2144 | } | ||
2145 | |||
2146 | /* | 2230 | /* |
2147 | * for the common functions, 'private' gives the type of file | 2231 | * for the common functions, 'private' gives the type of file |
2148 | */ | 2232 | */ |
@@ -2158,16 +2242,10 @@ static struct cftype files[] = { | |||
2158 | 2242 | ||
2159 | { | 2243 | { |
2160 | .name = "notify_on_release", | 2244 | .name = "notify_on_release", |
2161 | .read_uint = cgroup_read_notify_on_release, | 2245 | .read_u64 = cgroup_read_notify_on_release, |
2162 | .write = cgroup_common_file_write, | 2246 | .write = cgroup_common_file_write, |
2163 | .private = FILE_NOTIFY_ON_RELEASE, | 2247 | .private = FILE_NOTIFY_ON_RELEASE, |
2164 | }, | 2248 | }, |
2165 | |||
2166 | { | ||
2167 | .name = "releasable", | ||
2168 | .read_uint = cgroup_read_releasable, | ||
2169 | .private = FILE_RELEASABLE, | ||
2170 | } | ||
2171 | }; | 2249 | }; |
2172 | 2250 | ||
2173 | static struct cftype cft_release_agent = { | 2251 | static struct cftype cft_release_agent = { |
@@ -2401,10 +2479,9 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2401 | return 0; | 2479 | return 0; |
2402 | } | 2480 | } |
2403 | 2481 | ||
2404 | static void cgroup_init_subsys(struct cgroup_subsys *ss) | 2482 | static void __init cgroup_init_subsys(struct cgroup_subsys *ss) |
2405 | { | 2483 | { |
2406 | struct cgroup_subsys_state *css; | 2484 | struct cgroup_subsys_state *css; |
2407 | struct list_head *l; | ||
2408 | 2485 | ||
2409 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); | 2486 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); |
2410 | 2487 | ||
@@ -2415,34 +2492,19 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2415 | BUG_ON(IS_ERR(css)); | 2492 | BUG_ON(IS_ERR(css)); |
2416 | init_cgroup_css(css, ss, dummytop); | 2493 | init_cgroup_css(css, ss, dummytop); |
2417 | 2494 | ||
2418 | /* Update all cgroup groups to contain a subsys | 2495 | /* Update the init_css_set to contain a subsys |
2419 | * pointer to this state - since the subsystem is | 2496 | * pointer to this state - since the subsystem is |
2420 | * newly registered, all tasks and hence all cgroup | 2497 | * newly registered, all tasks and hence the |
2421 | * groups are in the subsystem's top cgroup. */ | 2498 | * init_css_set is in the subsystem's top cgroup. */ |
2422 | write_lock(&css_set_lock); | 2499 | init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; |
2423 | l = &init_css_set.list; | ||
2424 | do { | ||
2425 | struct css_set *cg = | ||
2426 | list_entry(l, struct css_set, list); | ||
2427 | cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; | ||
2428 | l = l->next; | ||
2429 | } while (l != &init_css_set.list); | ||
2430 | write_unlock(&css_set_lock); | ||
2431 | |||
2432 | /* If this subsystem requested that it be notified with fork | ||
2433 | * events, we should send it one now for every process in the | ||
2434 | * system */ | ||
2435 | if (ss->fork) { | ||
2436 | struct task_struct *g, *p; | ||
2437 | |||
2438 | read_lock(&tasklist_lock); | ||
2439 | do_each_thread(g, p) { | ||
2440 | ss->fork(ss, p); | ||
2441 | } while_each_thread(g, p); | ||
2442 | read_unlock(&tasklist_lock); | ||
2443 | } | ||
2444 | 2500 | ||
2445 | need_forkexit_callback |= ss->fork || ss->exit; | 2501 | need_forkexit_callback |= ss->fork || ss->exit; |
2502 | need_mm_owner_callback |= !!ss->mm_owner_changed; | ||
2503 | |||
2504 | /* At system boot, before all subsystems have been | ||
2505 | * registered, no tasks have been forked, so we don't | ||
2506 | * need to invoke fork callbacks here. */ | ||
2507 | BUG_ON(!list_empty(&init_task.tasks)); | ||
2446 | 2508 | ||
2447 | ss->active = 1; | 2509 | ss->active = 1; |
2448 | } | 2510 | } |
@@ -2458,9 +2520,9 @@ int __init cgroup_init_early(void) | |||
2458 | int i; | 2520 | int i; |
2459 | kref_init(&init_css_set.ref); | 2521 | kref_init(&init_css_set.ref); |
2460 | kref_get(&init_css_set.ref); | 2522 | kref_get(&init_css_set.ref); |
2461 | INIT_LIST_HEAD(&init_css_set.list); | ||
2462 | INIT_LIST_HEAD(&init_css_set.cg_links); | 2523 | INIT_LIST_HEAD(&init_css_set.cg_links); |
2463 | INIT_LIST_HEAD(&init_css_set.tasks); | 2524 | INIT_LIST_HEAD(&init_css_set.tasks); |
2525 | INIT_HLIST_NODE(&init_css_set.hlist); | ||
2464 | css_set_count = 1; | 2526 | css_set_count = 1; |
2465 | init_cgroup_root(&rootnode); | 2527 | init_cgroup_root(&rootnode); |
2466 | list_add(&rootnode.root_list, &roots); | 2528 | list_add(&rootnode.root_list, &roots); |
@@ -2473,6 +2535,9 @@ int __init cgroup_init_early(void) | |||
2473 | list_add(&init_css_set_link.cg_link_list, | 2535 | list_add(&init_css_set_link.cg_link_list, |
2474 | &init_css_set.cg_links); | 2536 | &init_css_set.cg_links); |
2475 | 2537 | ||
2538 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) | ||
2539 | INIT_HLIST_HEAD(&css_set_table[i]); | ||
2540 | |||
2476 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 2541 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
2477 | struct cgroup_subsys *ss = subsys[i]; | 2542 | struct cgroup_subsys *ss = subsys[i]; |
2478 | 2543 | ||
@@ -2502,7 +2567,7 @@ int __init cgroup_init(void) | |||
2502 | { | 2567 | { |
2503 | int err; | 2568 | int err; |
2504 | int i; | 2569 | int i; |
2505 | struct proc_dir_entry *entry; | 2570 | struct hlist_head *hhead; |
2506 | 2571 | ||
2507 | err = bdi_init(&cgroup_backing_dev_info); | 2572 | err = bdi_init(&cgroup_backing_dev_info); |
2508 | if (err) | 2573 | if (err) |
@@ -2514,13 +2579,15 @@ int __init cgroup_init(void) | |||
2514 | cgroup_init_subsys(ss); | 2579 | cgroup_init_subsys(ss); |
2515 | } | 2580 | } |
2516 | 2581 | ||
2582 | /* Add init_css_set to the hash table */ | ||
2583 | hhead = css_set_hash(init_css_set.subsys); | ||
2584 | hlist_add_head(&init_css_set.hlist, hhead); | ||
2585 | |||
2517 | err = register_filesystem(&cgroup_fs_type); | 2586 | err = register_filesystem(&cgroup_fs_type); |
2518 | if (err < 0) | 2587 | if (err < 0) |
2519 | goto out; | 2588 | goto out; |
2520 | 2589 | ||
2521 | entry = create_proc_entry("cgroups", 0, NULL); | 2590 | proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations); |
2522 | if (entry) | ||
2523 | entry->proc_fops = &proc_cgroupstats_operations; | ||
2524 | 2591 | ||
2525 | out: | 2592 | out: |
2526 | if (err) | 2593 | if (err) |
@@ -2683,6 +2750,34 @@ void cgroup_fork_callbacks(struct task_struct *child) | |||
2683 | } | 2750 | } |
2684 | } | 2751 | } |
2685 | 2752 | ||
2753 | #ifdef CONFIG_MM_OWNER | ||
2754 | /** | ||
2755 | * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes | ||
2756 | * @p: the new owner | ||
2757 | * | ||
2758 | * Called on every change to mm->owner. mm_init_owner() does not | ||
2759 | * invoke this routine, since it assigns the mm->owner the first time | ||
2760 | * and does not change it. | ||
2761 | */ | ||
2762 | void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) | ||
2763 | { | ||
2764 | struct cgroup *oldcgrp, *newcgrp; | ||
2765 | |||
2766 | if (need_mm_owner_callback) { | ||
2767 | int i; | ||
2768 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2769 | struct cgroup_subsys *ss = subsys[i]; | ||
2770 | oldcgrp = task_cgroup(old, ss->subsys_id); | ||
2771 | newcgrp = task_cgroup(new, ss->subsys_id); | ||
2772 | if (oldcgrp == newcgrp) | ||
2773 | continue; | ||
2774 | if (ss->mm_owner_changed) | ||
2775 | ss->mm_owner_changed(ss, oldcgrp, newcgrp); | ||
2776 | } | ||
2777 | } | ||
2778 | } | ||
2779 | #endif /* CONFIG_MM_OWNER */ | ||
2780 | |||
2686 | /** | 2781 | /** |
2687 | * cgroup_post_fork - called on a new task after adding it to the task list | 2782 | * cgroup_post_fork - called on a new task after adding it to the task list |
2688 | * @child: the task in question | 2783 | * @child: the task in question |
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c index 37301e877cb0..c3dc3aba4c02 100644 --- a/kernel/cgroup_debug.c +++ b/kernel/cgroup_debug.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * kernel/ccontainer_debug.c - Example cgroup subsystem that | 2 | * kernel/cgroup_debug.c - Example cgroup subsystem that |
3 | * exposes debug info | 3 | * exposes debug info |
4 | * | 4 | * |
5 | * Copyright (C) Google Inc, 2007 | 5 | * Copyright (C) Google Inc, 2007 |
@@ -62,25 +62,35 @@ static u64 current_css_set_refcount_read(struct cgroup *cont, | |||
62 | return count; | 62 | return count; |
63 | } | 63 | } |
64 | 64 | ||
65 | static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) | ||
66 | { | ||
67 | return test_bit(CGRP_RELEASABLE, &cgrp->flags); | ||
68 | } | ||
69 | |||
65 | static struct cftype files[] = { | 70 | static struct cftype files[] = { |
66 | { | 71 | { |
67 | .name = "cgroup_refcount", | 72 | .name = "cgroup_refcount", |
68 | .read_uint = cgroup_refcount_read, | 73 | .read_u64 = cgroup_refcount_read, |
69 | }, | 74 | }, |
70 | { | 75 | { |
71 | .name = "taskcount", | 76 | .name = "taskcount", |
72 | .read_uint = taskcount_read, | 77 | .read_u64 = taskcount_read, |
73 | }, | 78 | }, |
74 | 79 | ||
75 | { | 80 | { |
76 | .name = "current_css_set", | 81 | .name = "current_css_set", |
77 | .read_uint = current_css_set_read, | 82 | .read_u64 = current_css_set_read, |
78 | }, | 83 | }, |
79 | 84 | ||
80 | { | 85 | { |
81 | .name = "current_css_set_refcount", | 86 | .name = "current_css_set_refcount", |
82 | .read_uint = current_css_set_refcount_read, | 87 | .read_u64 = current_css_set_refcount_read, |
83 | }, | 88 | }, |
89 | |||
90 | { | ||
91 | .name = "releasable", | ||
92 | .read_u64 = releasable_read, | ||
93 | } | ||
84 | }; | 94 | }; |
85 | 95 | ||
86 | static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) | 96 | static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) |
diff --git a/kernel/compat.c b/kernel/compat.c index e1ef04870c2a..32c254a8ab9a 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -898,7 +898,7 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat | |||
898 | 898 | ||
899 | current->state = TASK_INTERRUPTIBLE; | 899 | current->state = TASK_INTERRUPTIBLE; |
900 | schedule(); | 900 | schedule(); |
901 | set_thread_flag(TIF_RESTORE_SIGMASK); | 901 | set_restore_sigmask(); |
902 | return -ERESTARTNOHAND; | 902 | return -ERESTARTNOHAND; |
903 | } | 903 | } |
904 | #endif /* __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND */ | 904 | #endif /* __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND */ |
@@ -955,7 +955,8 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) | |||
955 | __put_user(txc.jitcnt, &utp->jitcnt) || | 955 | __put_user(txc.jitcnt, &utp->jitcnt) || |
956 | __put_user(txc.calcnt, &utp->calcnt) || | 956 | __put_user(txc.calcnt, &utp->calcnt) || |
957 | __put_user(txc.errcnt, &utp->errcnt) || | 957 | __put_user(txc.errcnt, &utp->errcnt) || |
958 | __put_user(txc.stbcnt, &utp->stbcnt)) | 958 | __put_user(txc.stbcnt, &utp->stbcnt) || |
959 | __put_user(txc.tai, &utp->tai)) | ||
959 | ret = -EFAULT; | 960 | ret = -EFAULT; |
960 | 961 | ||
961 | return ret; | 962 | return ret; |
@@ -1080,4 +1081,3 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info) | |||
1080 | 1081 | ||
1081 | return 0; | 1082 | return 0; |
1082 | } | 1083 | } |
1083 | |||
diff --git a/kernel/configs.c b/kernel/configs.c index e84d3f9c6c7b..4c345210ed8c 100644 --- a/kernel/configs.c +++ b/kernel/configs.c | |||
@@ -79,12 +79,11 @@ static int __init ikconfig_init(void) | |||
79 | struct proc_dir_entry *entry; | 79 | struct proc_dir_entry *entry; |
80 | 80 | ||
81 | /* create the current config file */ | 81 | /* create the current config file */ |
82 | entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, | 82 | entry = proc_create("config.gz", S_IFREG | S_IRUGO, NULL, |
83 | &proc_root); | 83 | &ikconfig_file_ops); |
84 | if (!entry) | 84 | if (!entry) |
85 | return -ENOMEM; | 85 | return -ENOMEM; |
86 | 86 | ||
87 | entry->proc_fops = &ikconfig_file_ops; | ||
88 | entry->size = kernel_config_data_size; | 87 | entry->size = kernel_config_data_size; |
89 | 88 | ||
90 | return 0; | 89 | return 0; |
@@ -95,7 +94,7 @@ static int __init ikconfig_init(void) | |||
95 | 94 | ||
96 | static void __exit ikconfig_cleanup(void) | 95 | static void __exit ikconfig_cleanup(void) |
97 | { | 96 | { |
98 | remove_proc_entry("config.gz", &proc_root); | 97 | remove_proc_entry("config.gz", NULL); |
99 | } | 98 | } |
100 | 99 | ||
101 | module_init(ikconfig_init); | 100 | module_init(ikconfig_init); |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 2011ad8d2697..c77bc3a1c722 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -33,17 +33,13 @@ static struct { | |||
33 | * an ongoing cpu hotplug operation. | 33 | * an ongoing cpu hotplug operation. |
34 | */ | 34 | */ |
35 | int refcount; | 35 | int refcount; |
36 | wait_queue_head_t writer_queue; | ||
37 | } cpu_hotplug; | 36 | } cpu_hotplug; |
38 | 37 | ||
39 | #define writer_exists() (cpu_hotplug.active_writer != NULL) | ||
40 | |||
41 | void __init cpu_hotplug_init(void) | 38 | void __init cpu_hotplug_init(void) |
42 | { | 39 | { |
43 | cpu_hotplug.active_writer = NULL; | 40 | cpu_hotplug.active_writer = NULL; |
44 | mutex_init(&cpu_hotplug.lock); | 41 | mutex_init(&cpu_hotplug.lock); |
45 | cpu_hotplug.refcount = 0; | 42 | cpu_hotplug.refcount = 0; |
46 | init_waitqueue_head(&cpu_hotplug.writer_queue); | ||
47 | } | 43 | } |
48 | 44 | ||
49 | #ifdef CONFIG_HOTPLUG_CPU | 45 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -65,11 +61,8 @@ void put_online_cpus(void) | |||
65 | if (cpu_hotplug.active_writer == current) | 61 | if (cpu_hotplug.active_writer == current) |
66 | return; | 62 | return; |
67 | mutex_lock(&cpu_hotplug.lock); | 63 | mutex_lock(&cpu_hotplug.lock); |
68 | cpu_hotplug.refcount--; | 64 | if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer)) |
69 | 65 | wake_up_process(cpu_hotplug.active_writer); | |
70 | if (unlikely(writer_exists()) && !cpu_hotplug.refcount) | ||
71 | wake_up(&cpu_hotplug.writer_queue); | ||
72 | |||
73 | mutex_unlock(&cpu_hotplug.lock); | 66 | mutex_unlock(&cpu_hotplug.lock); |
74 | 67 | ||
75 | } | 68 | } |
@@ -98,8 +91,8 @@ void cpu_maps_update_done(void) | |||
98 | * Note that during a cpu-hotplug operation, the new readers, if any, | 91 | * Note that during a cpu-hotplug operation, the new readers, if any, |
99 | * will be blocked by the cpu_hotplug.lock | 92 | * will be blocked by the cpu_hotplug.lock |
100 | * | 93 | * |
101 | * Since cpu_maps_update_begin is always called after invoking | 94 | * Since cpu_hotplug_begin() is always called after invoking |
102 | * cpu_maps_update_begin, we can be sure that only one writer is active. | 95 | * cpu_maps_update_begin(), we can be sure that only one writer is active. |
103 | * | 96 | * |
104 | * Note that theoretically, there is a possibility of a livelock: | 97 | * Note that theoretically, there is a possibility of a livelock: |
105 | * - Refcount goes to zero, last reader wakes up the sleeping | 98 | * - Refcount goes to zero, last reader wakes up the sleeping |
@@ -115,19 +108,16 @@ void cpu_maps_update_done(void) | |||
115 | */ | 108 | */ |
116 | static void cpu_hotplug_begin(void) | 109 | static void cpu_hotplug_begin(void) |
117 | { | 110 | { |
118 | DECLARE_WAITQUEUE(wait, current); | ||
119 | |||
120 | mutex_lock(&cpu_hotplug.lock); | ||
121 | |||
122 | cpu_hotplug.active_writer = current; | 111 | cpu_hotplug.active_writer = current; |
123 | add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait); | 112 | |
124 | while (cpu_hotplug.refcount) { | 113 | for (;;) { |
125 | set_current_state(TASK_UNINTERRUPTIBLE); | 114 | mutex_lock(&cpu_hotplug.lock); |
115 | if (likely(!cpu_hotplug.refcount)) | ||
116 | break; | ||
117 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
126 | mutex_unlock(&cpu_hotplug.lock); | 118 | mutex_unlock(&cpu_hotplug.lock); |
127 | schedule(); | 119 | schedule(); |
128 | mutex_lock(&cpu_hotplug.lock); | ||
129 | } | 120 | } |
130 | remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait); | ||
131 | } | 121 | } |
132 | 122 | ||
133 | static void cpu_hotplug_done(void) | 123 | static void cpu_hotplug_done(void) |
@@ -136,7 +126,7 @@ static void cpu_hotplug_done(void) | |||
136 | mutex_unlock(&cpu_hotplug.lock); | 126 | mutex_unlock(&cpu_hotplug.lock); |
137 | } | 127 | } |
138 | /* Need to know about CPUs going up/down? */ | 128 | /* Need to know about CPUs going up/down? */ |
139 | int __cpuinit register_cpu_notifier(struct notifier_block *nb) | 129 | int __ref register_cpu_notifier(struct notifier_block *nb) |
140 | { | 130 | { |
141 | int ret; | 131 | int ret; |
142 | cpu_maps_update_begin(); | 132 | cpu_maps_update_begin(); |
@@ -149,7 +139,7 @@ int __cpuinit register_cpu_notifier(struct notifier_block *nb) | |||
149 | 139 | ||
150 | EXPORT_SYMBOL(register_cpu_notifier); | 140 | EXPORT_SYMBOL(register_cpu_notifier); |
151 | 141 | ||
152 | void unregister_cpu_notifier(struct notifier_block *nb) | 142 | void __ref unregister_cpu_notifier(struct notifier_block *nb) |
153 | { | 143 | { |
154 | cpu_maps_update_begin(); | 144 | cpu_maps_update_begin(); |
155 | raw_notifier_chain_unregister(&cpu_chain, nb); | 145 | raw_notifier_chain_unregister(&cpu_chain, nb); |
@@ -180,7 +170,7 @@ struct take_cpu_down_param { | |||
180 | }; | 170 | }; |
181 | 171 | ||
182 | /* Take this CPU down. */ | 172 | /* Take this CPU down. */ |
183 | static int take_cpu_down(void *_param) | 173 | static int __ref take_cpu_down(void *_param) |
184 | { | 174 | { |
185 | struct take_cpu_down_param *param = _param; | 175 | struct take_cpu_down_param *param = _param; |
186 | int err; | 176 | int err; |
@@ -199,7 +189,7 @@ static int take_cpu_down(void *_param) | |||
199 | } | 189 | } |
200 | 190 | ||
201 | /* Requires cpu_add_remove_lock to be held */ | 191 | /* Requires cpu_add_remove_lock to be held */ |
202 | static int _cpu_down(unsigned int cpu, int tasks_frozen) | 192 | static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) |
203 | { | 193 | { |
204 | int err, nr_calls = 0; | 194 | int err, nr_calls = 0; |
205 | struct task_struct *p; | 195 | struct task_struct *p; |
@@ -225,7 +215,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |||
225 | __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, | 215 | __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, |
226 | hcpu, nr_calls, NULL); | 216 | hcpu, nr_calls, NULL); |
227 | printk("%s: attempt to take down CPU %u failed\n", | 217 | printk("%s: attempt to take down CPU %u failed\n", |
228 | __FUNCTION__, cpu); | 218 | __func__, cpu); |
229 | err = -EINVAL; | 219 | err = -EINVAL; |
230 | goto out_release; | 220 | goto out_release; |
231 | } | 221 | } |
@@ -274,7 +264,7 @@ out_release: | |||
274 | return err; | 264 | return err; |
275 | } | 265 | } |
276 | 266 | ||
277 | int cpu_down(unsigned int cpu) | 267 | int __ref cpu_down(unsigned int cpu) |
278 | { | 268 | { |
279 | int err = 0; | 269 | int err = 0; |
280 | 270 | ||
@@ -305,7 +295,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) | |||
305 | if (ret == NOTIFY_BAD) { | 295 | if (ret == NOTIFY_BAD) { |
306 | nr_calls--; | 296 | nr_calls--; |
307 | printk("%s: attempt to bring up CPU %u failed\n", | 297 | printk("%s: attempt to bring up CPU %u failed\n", |
308 | __FUNCTION__, cpu); | 298 | __func__, cpu); |
309 | ret = -EINVAL; | 299 | ret = -EINVAL; |
310 | goto out_notify; | 300 | goto out_notify; |
311 | } | 301 | } |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 48a976c52cf5..8da627d33804 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -127,6 +127,7 @@ struct cpuset_hotplug_scanner { | |||
127 | typedef enum { | 127 | typedef enum { |
128 | CS_CPU_EXCLUSIVE, | 128 | CS_CPU_EXCLUSIVE, |
129 | CS_MEM_EXCLUSIVE, | 129 | CS_MEM_EXCLUSIVE, |
130 | CS_MEM_HARDWALL, | ||
130 | CS_MEMORY_MIGRATE, | 131 | CS_MEMORY_MIGRATE, |
131 | CS_SCHED_LOAD_BALANCE, | 132 | CS_SCHED_LOAD_BALANCE, |
132 | CS_SPREAD_PAGE, | 133 | CS_SPREAD_PAGE, |
@@ -144,6 +145,11 @@ static inline int is_mem_exclusive(const struct cpuset *cs) | |||
144 | return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); | 145 | return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); |
145 | } | 146 | } |
146 | 147 | ||
148 | static inline int is_mem_hardwall(const struct cpuset *cs) | ||
149 | { | ||
150 | return test_bit(CS_MEM_HARDWALL, &cs->flags); | ||
151 | } | ||
152 | |||
147 | static inline int is_sched_load_balance(const struct cpuset *cs) | 153 | static inline int is_sched_load_balance(const struct cpuset *cs) |
148 | { | 154 | { |
149 | return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); | 155 | return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); |
@@ -735,7 +741,8 @@ static inline int started_after(void *p1, void *p2) | |||
735 | * Return nonzero if this tasks's cpus_allowed mask should be changed (in other | 741 | * Return nonzero if this tasks's cpus_allowed mask should be changed (in other |
736 | * words, if its mask is not equal to its cpuset's mask). | 742 | * words, if its mask is not equal to its cpuset's mask). |
737 | */ | 743 | */ |
738 | int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | 744 | static int cpuset_test_cpumask(struct task_struct *tsk, |
745 | struct cgroup_scanner *scan) | ||
739 | { | 746 | { |
740 | return !cpus_equal(tsk->cpus_allowed, | 747 | return !cpus_equal(tsk->cpus_allowed, |
741 | (cgroup_cs(scan->cg))->cpus_allowed); | 748 | (cgroup_cs(scan->cg))->cpus_allowed); |
@@ -752,7 +759,8 @@ int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | |||
752 | * We don't need to re-check for the cgroup/cpuset membership, since we're | 759 | * We don't need to re-check for the cgroup/cpuset membership, since we're |
753 | * holding cgroup_lock() at this point. | 760 | * holding cgroup_lock() at this point. |
754 | */ | 761 | */ |
755 | void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | 762 | static void cpuset_change_cpumask(struct task_struct *tsk, |
763 | struct cgroup_scanner *scan) | ||
756 | { | 764 | { |
757 | set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); | 765 | set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); |
758 | } | 766 | } |
@@ -1023,19 +1031,6 @@ int current_cpuset_is_being_rebound(void) | |||
1023 | return task_cs(current) == cpuset_being_rebound; | 1031 | return task_cs(current) == cpuset_being_rebound; |
1024 | } | 1032 | } |
1025 | 1033 | ||
1026 | /* | ||
1027 | * Call with cgroup_mutex held. | ||
1028 | */ | ||
1029 | |||
1030 | static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) | ||
1031 | { | ||
1032 | if (simple_strtoul(buf, NULL, 10) != 0) | ||
1033 | cpuset_memory_pressure_enabled = 1; | ||
1034 | else | ||
1035 | cpuset_memory_pressure_enabled = 0; | ||
1036 | return 0; | ||
1037 | } | ||
1038 | |||
1039 | static int update_relax_domain_level(struct cpuset *cs, char *buf) | 1034 | static int update_relax_domain_level(struct cpuset *cs, char *buf) |
1040 | { | 1035 | { |
1041 | int val = simple_strtol(buf, NULL, 10); | 1036 | int val = simple_strtol(buf, NULL, 10); |
@@ -1053,25 +1048,20 @@ static int update_relax_domain_level(struct cpuset *cs, char *buf) | |||
1053 | 1048 | ||
1054 | /* | 1049 | /* |
1055 | * update_flag - read a 0 or a 1 in a file and update associated flag | 1050 | * update_flag - read a 0 or a 1 in a file and update associated flag |
1056 | * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, | 1051 | * bit: the bit to update (see cpuset_flagbits_t) |
1057 | * CS_SCHED_LOAD_BALANCE, | 1052 | * cs: the cpuset to update |
1058 | * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE, | 1053 | * turning_on: whether the flag is being set or cleared |
1059 | * CS_SPREAD_PAGE, CS_SPREAD_SLAB) | ||
1060 | * cs: the cpuset to update | ||
1061 | * buf: the buffer where we read the 0 or 1 | ||
1062 | * | 1054 | * |
1063 | * Call with cgroup_mutex held. | 1055 | * Call with cgroup_mutex held. |
1064 | */ | 1056 | */ |
1065 | 1057 | ||
1066 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) | 1058 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, |
1059 | int turning_on) | ||
1067 | { | 1060 | { |
1068 | int turning_on; | ||
1069 | struct cpuset trialcs; | 1061 | struct cpuset trialcs; |
1070 | int err; | 1062 | int err; |
1071 | int cpus_nonempty, balance_flag_changed; | 1063 | int cpus_nonempty, balance_flag_changed; |
1072 | 1064 | ||
1073 | turning_on = (simple_strtoul(buf, NULL, 10) != 0); | ||
1074 | |||
1075 | trialcs = *cs; | 1065 | trialcs = *cs; |
1076 | if (turning_on) | 1066 | if (turning_on) |
1077 | set_bit(bit, &trialcs.flags); | 1067 | set_bit(bit, &trialcs.flags); |
@@ -1241,6 +1231,7 @@ typedef enum { | |||
1241 | FILE_MEMLIST, | 1231 | FILE_MEMLIST, |
1242 | FILE_CPU_EXCLUSIVE, | 1232 | FILE_CPU_EXCLUSIVE, |
1243 | FILE_MEM_EXCLUSIVE, | 1233 | FILE_MEM_EXCLUSIVE, |
1234 | FILE_MEM_HARDWALL, | ||
1244 | FILE_SCHED_LOAD_BALANCE, | 1235 | FILE_SCHED_LOAD_BALANCE, |
1245 | FILE_SCHED_RELAX_DOMAIN_LEVEL, | 1236 | FILE_SCHED_RELAX_DOMAIN_LEVEL, |
1246 | FILE_MEMORY_PRESSURE_ENABLED, | 1237 | FILE_MEMORY_PRESSURE_ENABLED, |
@@ -1289,46 +1280,71 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont, | |||
1289 | case FILE_MEMLIST: | 1280 | case FILE_MEMLIST: |
1290 | retval = update_nodemask(cs, buffer); | 1281 | retval = update_nodemask(cs, buffer); |
1291 | break; | 1282 | break; |
1283 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | ||
1284 | retval = update_relax_domain_level(cs, buffer); | ||
1285 | break; | ||
1286 | default: | ||
1287 | retval = -EINVAL; | ||
1288 | goto out2; | ||
1289 | } | ||
1290 | |||
1291 | if (retval == 0) | ||
1292 | retval = nbytes; | ||
1293 | out2: | ||
1294 | cgroup_unlock(); | ||
1295 | out1: | ||
1296 | kfree(buffer); | ||
1297 | return retval; | ||
1298 | } | ||
1299 | |||
1300 | static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) | ||
1301 | { | ||
1302 | int retval = 0; | ||
1303 | struct cpuset *cs = cgroup_cs(cgrp); | ||
1304 | cpuset_filetype_t type = cft->private; | ||
1305 | |||
1306 | cgroup_lock(); | ||
1307 | |||
1308 | if (cgroup_is_removed(cgrp)) { | ||
1309 | cgroup_unlock(); | ||
1310 | return -ENODEV; | ||
1311 | } | ||
1312 | |||
1313 | switch (type) { | ||
1292 | case FILE_CPU_EXCLUSIVE: | 1314 | case FILE_CPU_EXCLUSIVE: |
1293 | retval = update_flag(CS_CPU_EXCLUSIVE, cs, buffer); | 1315 | retval = update_flag(CS_CPU_EXCLUSIVE, cs, val); |
1294 | break; | 1316 | break; |
1295 | case FILE_MEM_EXCLUSIVE: | 1317 | case FILE_MEM_EXCLUSIVE: |
1296 | retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer); | 1318 | retval = update_flag(CS_MEM_EXCLUSIVE, cs, val); |
1297 | break; | 1319 | break; |
1298 | case FILE_SCHED_LOAD_BALANCE: | 1320 | case FILE_MEM_HARDWALL: |
1299 | retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); | 1321 | retval = update_flag(CS_MEM_HARDWALL, cs, val); |
1300 | break; | 1322 | break; |
1301 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | 1323 | case FILE_SCHED_LOAD_BALANCE: |
1302 | retval = update_relax_domain_level(cs, buffer); | 1324 | retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val); |
1303 | break; | 1325 | break; |
1304 | case FILE_MEMORY_MIGRATE: | 1326 | case FILE_MEMORY_MIGRATE: |
1305 | retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); | 1327 | retval = update_flag(CS_MEMORY_MIGRATE, cs, val); |
1306 | break; | 1328 | break; |
1307 | case FILE_MEMORY_PRESSURE_ENABLED: | 1329 | case FILE_MEMORY_PRESSURE_ENABLED: |
1308 | retval = update_memory_pressure_enabled(cs, buffer); | 1330 | cpuset_memory_pressure_enabled = !!val; |
1309 | break; | 1331 | break; |
1310 | case FILE_MEMORY_PRESSURE: | 1332 | case FILE_MEMORY_PRESSURE: |
1311 | retval = -EACCES; | 1333 | retval = -EACCES; |
1312 | break; | 1334 | break; |
1313 | case FILE_SPREAD_PAGE: | 1335 | case FILE_SPREAD_PAGE: |
1314 | retval = update_flag(CS_SPREAD_PAGE, cs, buffer); | 1336 | retval = update_flag(CS_SPREAD_PAGE, cs, val); |
1315 | cs->mems_generation = cpuset_mems_generation++; | 1337 | cs->mems_generation = cpuset_mems_generation++; |
1316 | break; | 1338 | break; |
1317 | case FILE_SPREAD_SLAB: | 1339 | case FILE_SPREAD_SLAB: |
1318 | retval = update_flag(CS_SPREAD_SLAB, cs, buffer); | 1340 | retval = update_flag(CS_SPREAD_SLAB, cs, val); |
1319 | cs->mems_generation = cpuset_mems_generation++; | 1341 | cs->mems_generation = cpuset_mems_generation++; |
1320 | break; | 1342 | break; |
1321 | default: | 1343 | default: |
1322 | retval = -EINVAL; | 1344 | retval = -EINVAL; |
1323 | goto out2; | 1345 | break; |
1324 | } | 1346 | } |
1325 | |||
1326 | if (retval == 0) | ||
1327 | retval = nbytes; | ||
1328 | out2: | ||
1329 | cgroup_unlock(); | 1347 | cgroup_unlock(); |
1330 | out1: | ||
1331 | kfree(buffer); | ||
1332 | return retval; | 1348 | return retval; |
1333 | } | 1349 | } |
1334 | 1350 | ||
@@ -1390,33 +1406,9 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont, | |||
1390 | case FILE_MEMLIST: | 1406 | case FILE_MEMLIST: |
1391 | s += cpuset_sprintf_memlist(s, cs); | 1407 | s += cpuset_sprintf_memlist(s, cs); |
1392 | break; | 1408 | break; |
1393 | case FILE_CPU_EXCLUSIVE: | ||
1394 | *s++ = is_cpu_exclusive(cs) ? '1' : '0'; | ||
1395 | break; | ||
1396 | case FILE_MEM_EXCLUSIVE: | ||
1397 | *s++ = is_mem_exclusive(cs) ? '1' : '0'; | ||
1398 | break; | ||
1399 | case FILE_SCHED_LOAD_BALANCE: | ||
1400 | *s++ = is_sched_load_balance(cs) ? '1' : '0'; | ||
1401 | break; | ||
1402 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | 1409 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: |
1403 | s += sprintf(s, "%d", cs->relax_domain_level); | 1410 | s += sprintf(s, "%d", cs->relax_domain_level); |
1404 | break; | 1411 | break; |
1405 | case FILE_MEMORY_MIGRATE: | ||
1406 | *s++ = is_memory_migrate(cs) ? '1' : '0'; | ||
1407 | break; | ||
1408 | case FILE_MEMORY_PRESSURE_ENABLED: | ||
1409 | *s++ = cpuset_memory_pressure_enabled ? '1' : '0'; | ||
1410 | break; | ||
1411 | case FILE_MEMORY_PRESSURE: | ||
1412 | s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter)); | ||
1413 | break; | ||
1414 | case FILE_SPREAD_PAGE: | ||
1415 | *s++ = is_spread_page(cs) ? '1' : '0'; | ||
1416 | break; | ||
1417 | case FILE_SPREAD_SLAB: | ||
1418 | *s++ = is_spread_slab(cs) ? '1' : '0'; | ||
1419 | break; | ||
1420 | default: | 1412 | default: |
1421 | retval = -EINVAL; | 1413 | retval = -EINVAL; |
1422 | goto out; | 1414 | goto out; |
@@ -1429,121 +1421,137 @@ out: | |||
1429 | return retval; | 1421 | return retval; |
1430 | } | 1422 | } |
1431 | 1423 | ||
1432 | 1424 | static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) | |
1433 | 1425 | { | |
1426 | struct cpuset *cs = cgroup_cs(cont); | ||
1427 | cpuset_filetype_t type = cft->private; | ||
1428 | switch (type) { | ||
1429 | case FILE_CPU_EXCLUSIVE: | ||
1430 | return is_cpu_exclusive(cs); | ||
1431 | case FILE_MEM_EXCLUSIVE: | ||
1432 | return is_mem_exclusive(cs); | ||
1433 | case FILE_MEM_HARDWALL: | ||
1434 | return is_mem_hardwall(cs); | ||
1435 | case FILE_SCHED_LOAD_BALANCE: | ||
1436 | return is_sched_load_balance(cs); | ||
1437 | case FILE_MEMORY_MIGRATE: | ||
1438 | return is_memory_migrate(cs); | ||
1439 | case FILE_MEMORY_PRESSURE_ENABLED: | ||
1440 | return cpuset_memory_pressure_enabled; | ||
1441 | case FILE_MEMORY_PRESSURE: | ||
1442 | return fmeter_getrate(&cs->fmeter); | ||
1443 | case FILE_SPREAD_PAGE: | ||
1444 | return is_spread_page(cs); | ||
1445 | case FILE_SPREAD_SLAB: | ||
1446 | return is_spread_slab(cs); | ||
1447 | default: | ||
1448 | BUG(); | ||
1449 | } | ||
1450 | } | ||
1434 | 1451 | ||
1435 | 1452 | ||
1436 | /* | 1453 | /* |
1437 | * for the common functions, 'private' gives the type of file | 1454 | * for the common functions, 'private' gives the type of file |
1438 | */ | 1455 | */ |
1439 | 1456 | ||
1440 | static struct cftype cft_cpus = { | 1457 | static struct cftype files[] = { |
1441 | .name = "cpus", | 1458 | { |
1442 | .read = cpuset_common_file_read, | 1459 | .name = "cpus", |
1443 | .write = cpuset_common_file_write, | 1460 | .read = cpuset_common_file_read, |
1444 | .private = FILE_CPULIST, | 1461 | .write = cpuset_common_file_write, |
1445 | }; | 1462 | .private = FILE_CPULIST, |
1446 | 1463 | }, | |
1447 | static struct cftype cft_mems = { | 1464 | |
1448 | .name = "mems", | 1465 | { |
1449 | .read = cpuset_common_file_read, | 1466 | .name = "mems", |
1450 | .write = cpuset_common_file_write, | 1467 | .read = cpuset_common_file_read, |
1451 | .private = FILE_MEMLIST, | 1468 | .write = cpuset_common_file_write, |
1452 | }; | 1469 | .private = FILE_MEMLIST, |
1453 | 1470 | }, | |
1454 | static struct cftype cft_cpu_exclusive = { | 1471 | |
1455 | .name = "cpu_exclusive", | 1472 | { |
1456 | .read = cpuset_common_file_read, | 1473 | .name = "cpu_exclusive", |
1457 | .write = cpuset_common_file_write, | 1474 | .read_u64 = cpuset_read_u64, |
1458 | .private = FILE_CPU_EXCLUSIVE, | 1475 | .write_u64 = cpuset_write_u64, |
1459 | }; | 1476 | .private = FILE_CPU_EXCLUSIVE, |
1460 | 1477 | }, | |
1461 | static struct cftype cft_mem_exclusive = { | 1478 | |
1462 | .name = "mem_exclusive", | 1479 | { |
1463 | .read = cpuset_common_file_read, | 1480 | .name = "mem_exclusive", |
1464 | .write = cpuset_common_file_write, | 1481 | .read_u64 = cpuset_read_u64, |
1465 | .private = FILE_MEM_EXCLUSIVE, | 1482 | .write_u64 = cpuset_write_u64, |
1466 | }; | 1483 | .private = FILE_MEM_EXCLUSIVE, |
1467 | 1484 | }, | |
1468 | static struct cftype cft_sched_load_balance = { | 1485 | |
1469 | .name = "sched_load_balance", | 1486 | { |
1470 | .read = cpuset_common_file_read, | 1487 | .name = "mem_hardwall", |
1471 | .write = cpuset_common_file_write, | 1488 | .read_u64 = cpuset_read_u64, |
1472 | .private = FILE_SCHED_LOAD_BALANCE, | 1489 | .write_u64 = cpuset_write_u64, |
1473 | }; | 1490 | .private = FILE_MEM_HARDWALL, |
1474 | 1491 | }, | |
1475 | static struct cftype cft_sched_relax_domain_level = { | 1492 | |
1476 | .name = "sched_relax_domain_level", | 1493 | { |
1477 | .read = cpuset_common_file_read, | 1494 | .name = "sched_load_balance", |
1478 | .write = cpuset_common_file_write, | 1495 | .read_u64 = cpuset_read_u64, |
1479 | .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, | 1496 | .write_u64 = cpuset_write_u64, |
1480 | }; | 1497 | .private = FILE_SCHED_LOAD_BALANCE, |
1481 | 1498 | }, | |
1482 | static struct cftype cft_memory_migrate = { | 1499 | |
1483 | .name = "memory_migrate", | 1500 | { |
1484 | .read = cpuset_common_file_read, | 1501 | .name = "sched_relax_domain_level", |
1485 | .write = cpuset_common_file_write, | 1502 | .read_u64 = cpuset_read_u64, |
1486 | .private = FILE_MEMORY_MIGRATE, | 1503 | .write_u64 = cpuset_write_u64, |
1504 | .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, | ||
1505 | }, | ||
1506 | |||
1507 | { | ||
1508 | .name = "memory_migrate", | ||
1509 | .read_u64 = cpuset_read_u64, | ||
1510 | .write_u64 = cpuset_write_u64, | ||
1511 | .private = FILE_MEMORY_MIGRATE, | ||
1512 | }, | ||
1513 | |||
1514 | { | ||
1515 | .name = "memory_pressure", | ||
1516 | .read_u64 = cpuset_read_u64, | ||
1517 | .write_u64 = cpuset_write_u64, | ||
1518 | .private = FILE_MEMORY_PRESSURE, | ||
1519 | }, | ||
1520 | |||
1521 | { | ||
1522 | .name = "memory_spread_page", | ||
1523 | .read_u64 = cpuset_read_u64, | ||
1524 | .write_u64 = cpuset_write_u64, | ||
1525 | .private = FILE_SPREAD_PAGE, | ||
1526 | }, | ||
1527 | |||
1528 | { | ||
1529 | .name = "memory_spread_slab", | ||
1530 | .read_u64 = cpuset_read_u64, | ||
1531 | .write_u64 = cpuset_write_u64, | ||
1532 | .private = FILE_SPREAD_SLAB, | ||
1533 | }, | ||
1487 | }; | 1534 | }; |
1488 | 1535 | ||
1489 | static struct cftype cft_memory_pressure_enabled = { | 1536 | static struct cftype cft_memory_pressure_enabled = { |
1490 | .name = "memory_pressure_enabled", | 1537 | .name = "memory_pressure_enabled", |
1491 | .read = cpuset_common_file_read, | 1538 | .read_u64 = cpuset_read_u64, |
1492 | .write = cpuset_common_file_write, | 1539 | .write_u64 = cpuset_write_u64, |
1493 | .private = FILE_MEMORY_PRESSURE_ENABLED, | 1540 | .private = FILE_MEMORY_PRESSURE_ENABLED, |
1494 | }; | 1541 | }; |
1495 | 1542 | ||
1496 | static struct cftype cft_memory_pressure = { | ||
1497 | .name = "memory_pressure", | ||
1498 | .read = cpuset_common_file_read, | ||
1499 | .write = cpuset_common_file_write, | ||
1500 | .private = FILE_MEMORY_PRESSURE, | ||
1501 | }; | ||
1502 | |||
1503 | static struct cftype cft_spread_page = { | ||
1504 | .name = "memory_spread_page", | ||
1505 | .read = cpuset_common_file_read, | ||
1506 | .write = cpuset_common_file_write, | ||
1507 | .private = FILE_SPREAD_PAGE, | ||
1508 | }; | ||
1509 | |||
1510 | static struct cftype cft_spread_slab = { | ||
1511 | .name = "memory_spread_slab", | ||
1512 | .read = cpuset_common_file_read, | ||
1513 | .write = cpuset_common_file_write, | ||
1514 | .private = FILE_SPREAD_SLAB, | ||
1515 | }; | ||
1516 | |||
1517 | static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) | 1543 | static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) |
1518 | { | 1544 | { |
1519 | int err; | 1545 | int err; |
1520 | 1546 | ||
1521 | if ((err = cgroup_add_file(cont, ss, &cft_cpus)) < 0) | 1547 | err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); |
1522 | return err; | 1548 | if (err) |
1523 | if ((err = cgroup_add_file(cont, ss, &cft_mems)) < 0) | ||
1524 | return err; | ||
1525 | if ((err = cgroup_add_file(cont, ss, &cft_cpu_exclusive)) < 0) | ||
1526 | return err; | ||
1527 | if ((err = cgroup_add_file(cont, ss, &cft_mem_exclusive)) < 0) | ||
1528 | return err; | ||
1529 | if ((err = cgroup_add_file(cont, ss, &cft_memory_migrate)) < 0) | ||
1530 | return err; | ||
1531 | if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) | ||
1532 | return err; | ||
1533 | if ((err = cgroup_add_file(cont, ss, | ||
1534 | &cft_sched_relax_domain_level)) < 0) | ||
1535 | return err; | ||
1536 | if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) | ||
1537 | return err; | ||
1538 | if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) | ||
1539 | return err; | ||
1540 | if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0) | ||
1541 | return err; | 1549 | return err; |
1542 | /* memory_pressure_enabled is in root cpuset only */ | 1550 | /* memory_pressure_enabled is in root cpuset only */ |
1543 | if (err == 0 && !cont->parent) | 1551 | if (!cont->parent) |
1544 | err = cgroup_add_file(cont, ss, | 1552 | err = cgroup_add_file(cont, ss, |
1545 | &cft_memory_pressure_enabled); | 1553 | &cft_memory_pressure_enabled); |
1546 | return 0; | 1554 | return err; |
1547 | } | 1555 | } |
1548 | 1556 | ||
1549 | /* | 1557 | /* |
@@ -1643,7 +1651,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1643 | cpuset_update_task_memory_state(); | 1651 | cpuset_update_task_memory_state(); |
1644 | 1652 | ||
1645 | if (is_sched_load_balance(cs)) | 1653 | if (is_sched_load_balance(cs)) |
1646 | update_flag(CS_SCHED_LOAD_BALANCE, cs, "0"); | 1654 | update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); |
1647 | 1655 | ||
1648 | number_of_cpusets--; | 1656 | number_of_cpusets--; |
1649 | kfree(cs); | 1657 | kfree(cs); |
@@ -1708,7 +1716,8 @@ int __init cpuset_init(void) | |||
1708 | * Called by cgroup_scan_tasks() for each task in a cgroup. | 1716 | * Called by cgroup_scan_tasks() for each task in a cgroup. |
1709 | * Return nonzero to stop the walk through the tasks. | 1717 | * Return nonzero to stop the walk through the tasks. |
1710 | */ | 1718 | */ |
1711 | void cpuset_do_move_task(struct task_struct *tsk, struct cgroup_scanner *scan) | 1719 | static void cpuset_do_move_task(struct task_struct *tsk, |
1720 | struct cgroup_scanner *scan) | ||
1712 | { | 1721 | { |
1713 | struct cpuset_hotplug_scanner *chsp; | 1722 | struct cpuset_hotplug_scanner *chsp; |
1714 | 1723 | ||
@@ -1970,14 +1979,14 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) | |||
1970 | } | 1979 | } |
1971 | 1980 | ||
1972 | /* | 1981 | /* |
1973 | * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive | 1982 | * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or |
1974 | * ancestor to the specified cpuset. Call holding callback_mutex. | 1983 | * mem_hardwall ancestor to the specified cpuset. Call holding |
1975 | * If no ancestor is mem_exclusive (an unusual configuration), then | 1984 | * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall |
1976 | * returns the root cpuset. | 1985 | * (an unusual configuration), then returns the root cpuset. |
1977 | */ | 1986 | */ |
1978 | static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | 1987 | static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs) |
1979 | { | 1988 | { |
1980 | while (!is_mem_exclusive(cs) && cs->parent) | 1989 | while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent) |
1981 | cs = cs->parent; | 1990 | cs = cs->parent; |
1982 | return cs; | 1991 | return cs; |
1983 | } | 1992 | } |
@@ -1991,7 +2000,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
1991 | * __GFP_THISNODE is set, yes, we can always allocate. If zone | 2000 | * __GFP_THISNODE is set, yes, we can always allocate. If zone |
1992 | * z's node is in our tasks mems_allowed, yes. If it's not a | 2001 | * z's node is in our tasks mems_allowed, yes. If it's not a |
1993 | * __GFP_HARDWALL request and this zone's nodes is in the nearest | 2002 | * __GFP_HARDWALL request and this zone's nodes is in the nearest |
1994 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. | 2003 | * hardwalled cpuset ancestor to this tasks cpuset, yes. |
1995 | * If the task has been OOM killed and has access to memory reserves | 2004 | * If the task has been OOM killed and has access to memory reserves |
1996 | * as specified by the TIF_MEMDIE flag, yes. | 2005 | * as specified by the TIF_MEMDIE flag, yes. |
1997 | * Otherwise, no. | 2006 | * Otherwise, no. |
@@ -2014,7 +2023,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2014 | * and do not allow allocations outside the current tasks cpuset | 2023 | * and do not allow allocations outside the current tasks cpuset |
2015 | * unless the task has been OOM killed as is marked TIF_MEMDIE. | 2024 | * unless the task has been OOM killed as is marked TIF_MEMDIE. |
2016 | * GFP_KERNEL allocations are not so marked, so can escape to the | 2025 | * GFP_KERNEL allocations are not so marked, so can escape to the |
2017 | * nearest enclosing mem_exclusive ancestor cpuset. | 2026 | * nearest enclosing hardwalled ancestor cpuset. |
2018 | * | 2027 | * |
2019 | * Scanning up parent cpusets requires callback_mutex. The | 2028 | * Scanning up parent cpusets requires callback_mutex. The |
2020 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit | 2029 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit |
@@ -2037,7 +2046,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2037 | * in_interrupt - any node ok (current task context irrelevant) | 2046 | * in_interrupt - any node ok (current task context irrelevant) |
2038 | * GFP_ATOMIC - any node ok | 2047 | * GFP_ATOMIC - any node ok |
2039 | * TIF_MEMDIE - any node ok | 2048 | * TIF_MEMDIE - any node ok |
2040 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok | 2049 | * GFP_KERNEL - any node in enclosing hardwalled cpuset ok |
2041 | * GFP_USER - only nodes in current tasks mems allowed ok. | 2050 | * GFP_USER - only nodes in current tasks mems allowed ok. |
2042 | * | 2051 | * |
2043 | * Rule: | 2052 | * Rule: |
@@ -2074,7 +2083,7 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) | |||
2074 | mutex_lock(&callback_mutex); | 2083 | mutex_lock(&callback_mutex); |
2075 | 2084 | ||
2076 | task_lock(current); | 2085 | task_lock(current); |
2077 | cs = nearest_exclusive_ancestor(task_cs(current)); | 2086 | cs = nearest_hardwall_ancestor(task_cs(current)); |
2078 | task_unlock(current); | 2087 | task_unlock(current); |
2079 | 2088 | ||
2080 | allowed = node_isset(node, cs->mems_allowed); | 2089 | allowed = node_isset(node, cs->mems_allowed); |
diff --git a/kernel/dma.c b/kernel/dma.c index 6a82bb716dac..d2c60a822790 100644 --- a/kernel/dma.c +++ b/kernel/dma.c | |||
@@ -149,12 +149,7 @@ static const struct file_operations proc_dma_operations = { | |||
149 | 149 | ||
150 | static int __init proc_dma_init(void) | 150 | static int __init proc_dma_init(void) |
151 | { | 151 | { |
152 | struct proc_dir_entry *e; | 152 | proc_create("dma", 0, NULL, &proc_dma_operations); |
153 | |||
154 | e = create_proc_entry("dma", 0, NULL); | ||
155 | if (e) | ||
156 | e->proc_fops = &proc_dma_operations; | ||
157 | |||
158 | return 0; | 153 | return 0; |
159 | } | 154 | } |
160 | 155 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 2a9d98c641ac..1510f78a0ffa 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/acct.h> | 19 | #include <linux/acct.h> |
20 | #include <linux/tsacct_kern.h> | 20 | #include <linux/tsacct_kern.h> |
21 | #include <linux/file.h> | 21 | #include <linux/file.h> |
22 | #include <linux/fdtable.h> | ||
22 | #include <linux/binfmts.h> | 23 | #include <linux/binfmts.h> |
23 | #include <linux/nsproxy.h> | 24 | #include <linux/nsproxy.h> |
24 | #include <linux/pid_namespace.h> | 25 | #include <linux/pid_namespace.h> |
@@ -52,6 +53,11 @@ | |||
52 | 53 | ||
53 | static void exit_mm(struct task_struct * tsk); | 54 | static void exit_mm(struct task_struct * tsk); |
54 | 55 | ||
56 | static inline int task_detached(struct task_struct *p) | ||
57 | { | ||
58 | return p->exit_signal == -1; | ||
59 | } | ||
60 | |||
55 | static void __unhash_process(struct task_struct *p) | 61 | static void __unhash_process(struct task_struct *p) |
56 | { | 62 | { |
57 | nr_threads--; | 63 | nr_threads--; |
@@ -160,7 +166,7 @@ repeat: | |||
160 | zap_leader = 0; | 166 | zap_leader = 0; |
161 | leader = p->group_leader; | 167 | leader = p->group_leader; |
162 | if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { | 168 | if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { |
163 | BUG_ON(leader->exit_signal == -1); | 169 | BUG_ON(task_detached(leader)); |
164 | do_notify_parent(leader, leader->exit_signal); | 170 | do_notify_parent(leader, leader->exit_signal); |
165 | /* | 171 | /* |
166 | * If we were the last child thread and the leader has | 172 | * If we were the last child thread and the leader has |
@@ -170,7 +176,7 @@ repeat: | |||
170 | * do_notify_parent() will have marked it self-reaping in | 176 | * do_notify_parent() will have marked it self-reaping in |
171 | * that case. | 177 | * that case. |
172 | */ | 178 | */ |
173 | zap_leader = (leader->exit_signal == -1); | 179 | zap_leader = task_detached(leader); |
174 | } | 180 | } |
175 | 181 | ||
176 | write_unlock_irq(&tasklist_lock); | 182 | write_unlock_irq(&tasklist_lock); |
@@ -329,13 +335,11 @@ void __set_special_pids(struct pid *pid) | |||
329 | pid_t nr = pid_nr(pid); | 335 | pid_t nr = pid_nr(pid); |
330 | 336 | ||
331 | if (task_session(curr) != pid) { | 337 | if (task_session(curr) != pid) { |
332 | detach_pid(curr, PIDTYPE_SID); | 338 | change_pid(curr, PIDTYPE_SID, pid); |
333 | attach_pid(curr, PIDTYPE_SID, pid); | ||
334 | set_task_session(curr, nr); | 339 | set_task_session(curr, nr); |
335 | } | 340 | } |
336 | if (task_pgrp(curr) != pid) { | 341 | if (task_pgrp(curr) != pid) { |
337 | detach_pid(curr, PIDTYPE_PGID); | 342 | change_pid(curr, PIDTYPE_PGID, pid); |
338 | attach_pid(curr, PIDTYPE_PGID, pid); | ||
339 | set_task_pgrp(curr, nr); | 343 | set_task_pgrp(curr, nr); |
340 | } | 344 | } |
341 | } | 345 | } |
@@ -557,6 +561,88 @@ void exit_fs(struct task_struct *tsk) | |||
557 | 561 | ||
558 | EXPORT_SYMBOL_GPL(exit_fs); | 562 | EXPORT_SYMBOL_GPL(exit_fs); |
559 | 563 | ||
564 | #ifdef CONFIG_MM_OWNER | ||
565 | /* | ||
566 | * Task p is exiting and it owned mm, lets find a new owner for it | ||
567 | */ | ||
568 | static inline int | ||
569 | mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) | ||
570 | { | ||
571 | /* | ||
572 | * If there are other users of the mm and the owner (us) is exiting | ||
573 | * we need to find a new owner to take on the responsibility. | ||
574 | */ | ||
575 | if (!mm) | ||
576 | return 0; | ||
577 | if (atomic_read(&mm->mm_users) <= 1) | ||
578 | return 0; | ||
579 | if (mm->owner != p) | ||
580 | return 0; | ||
581 | return 1; | ||
582 | } | ||
583 | |||
584 | void mm_update_next_owner(struct mm_struct *mm) | ||
585 | { | ||
586 | struct task_struct *c, *g, *p = current; | ||
587 | |||
588 | retry: | ||
589 | if (!mm_need_new_owner(mm, p)) | ||
590 | return; | ||
591 | |||
592 | read_lock(&tasklist_lock); | ||
593 | /* | ||
594 | * Search in the children | ||
595 | */ | ||
596 | list_for_each_entry(c, &p->children, sibling) { | ||
597 | if (c->mm == mm) | ||
598 | goto assign_new_owner; | ||
599 | } | ||
600 | |||
601 | /* | ||
602 | * Search in the siblings | ||
603 | */ | ||
604 | list_for_each_entry(c, &p->parent->children, sibling) { | ||
605 | if (c->mm == mm) | ||
606 | goto assign_new_owner; | ||
607 | } | ||
608 | |||
609 | /* | ||
610 | * Search through everything else. We should not get | ||
611 | * here often | ||
612 | */ | ||
613 | do_each_thread(g, c) { | ||
614 | if (c->mm == mm) | ||
615 | goto assign_new_owner; | ||
616 | } while_each_thread(g, c); | ||
617 | |||
618 | read_unlock(&tasklist_lock); | ||
619 | return; | ||
620 | |||
621 | assign_new_owner: | ||
622 | BUG_ON(c == p); | ||
623 | get_task_struct(c); | ||
624 | /* | ||
625 | * The task_lock protects c->mm from changing. | ||
626 | * We always want mm->owner->mm == mm | ||
627 | */ | ||
628 | task_lock(c); | ||
629 | /* | ||
630 | * Delay read_unlock() till we have the task_lock() | ||
631 | * to ensure that c does not slip away underneath us | ||
632 | */ | ||
633 | read_unlock(&tasklist_lock); | ||
634 | if (c->mm != mm) { | ||
635 | task_unlock(c); | ||
636 | put_task_struct(c); | ||
637 | goto retry; | ||
638 | } | ||
639 | cgroup_mm_owner_callbacks(mm->owner, c); | ||
640 | mm->owner = c; | ||
641 | task_unlock(c); | ||
642 | put_task_struct(c); | ||
643 | } | ||
644 | #endif /* CONFIG_MM_OWNER */ | ||
645 | |||
560 | /* | 646 | /* |
561 | * Turn us into a lazy TLB process if we | 647 | * Turn us into a lazy TLB process if we |
562 | * aren't already.. | 648 | * aren't already.. |
@@ -596,6 +682,7 @@ static void exit_mm(struct task_struct * tsk) | |||
596 | /* We don't want this task to be frozen prematurely */ | 682 | /* We don't want this task to be frozen prematurely */ |
597 | clear_freeze_flag(tsk); | 683 | clear_freeze_flag(tsk); |
598 | task_unlock(tsk); | 684 | task_unlock(tsk); |
685 | mm_update_next_owner(mm); | ||
599 | mmput(mm); | 686 | mmput(mm); |
600 | } | 687 | } |
601 | 688 | ||
@@ -610,7 +697,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) | |||
610 | if (unlikely(traced)) { | 697 | if (unlikely(traced)) { |
611 | /* Preserve ptrace links if someone else is tracing this child. */ | 698 | /* Preserve ptrace links if someone else is tracing this child. */ |
612 | list_del_init(&p->ptrace_list); | 699 | list_del_init(&p->ptrace_list); |
613 | if (p->parent != p->real_parent) | 700 | if (ptrace_reparented(p)) |
614 | list_add(&p->ptrace_list, &p->real_parent->ptrace_children); | 701 | list_add(&p->ptrace_list, &p->real_parent->ptrace_children); |
615 | } else { | 702 | } else { |
616 | /* If this child is being traced, then we're the one tracing it | 703 | /* If this child is being traced, then we're the one tracing it |
@@ -634,18 +721,18 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) | |||
634 | /* If this is a threaded reparent there is no need to | 721 | /* If this is a threaded reparent there is no need to |
635 | * notify anyone anything has happened. | 722 | * notify anyone anything has happened. |
636 | */ | 723 | */ |
637 | if (p->real_parent->group_leader == father->group_leader) | 724 | if (same_thread_group(p->real_parent, father)) |
638 | return; | 725 | return; |
639 | 726 | ||
640 | /* We don't want people slaying init. */ | 727 | /* We don't want people slaying init. */ |
641 | if (p->exit_signal != -1) | 728 | if (!task_detached(p)) |
642 | p->exit_signal = SIGCHLD; | 729 | p->exit_signal = SIGCHLD; |
643 | 730 | ||
644 | /* If we'd notified the old parent about this child's death, | 731 | /* If we'd notified the old parent about this child's death, |
645 | * also notify the new parent. | 732 | * also notify the new parent. |
646 | */ | 733 | */ |
647 | if (!traced && p->exit_state == EXIT_ZOMBIE && | 734 | if (!traced && p->exit_state == EXIT_ZOMBIE && |
648 | p->exit_signal != -1 && thread_group_empty(p)) | 735 | !task_detached(p) && thread_group_empty(p)) |
649 | do_notify_parent(p, p->exit_signal); | 736 | do_notify_parent(p, p->exit_signal); |
650 | 737 | ||
651 | kill_orphaned_pgrp(p, father); | 738 | kill_orphaned_pgrp(p, father); |
@@ -698,18 +785,18 @@ static void forget_original_parent(struct task_struct *father) | |||
698 | } else { | 785 | } else { |
699 | /* reparent ptraced task to its real parent */ | 786 | /* reparent ptraced task to its real parent */ |
700 | __ptrace_unlink (p); | 787 | __ptrace_unlink (p); |
701 | if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 && | 788 | if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) && |
702 | thread_group_empty(p)) | 789 | thread_group_empty(p)) |
703 | do_notify_parent(p, p->exit_signal); | 790 | do_notify_parent(p, p->exit_signal); |
704 | } | 791 | } |
705 | 792 | ||
706 | /* | 793 | /* |
707 | * if the ptraced child is a zombie with exit_signal == -1 | 794 | * if the ptraced child is a detached zombie we must collect |
708 | * we must collect it before we exit, or it will remain | 795 | * it before we exit, or it will remain zombie forever since |
709 | * zombie forever since we prevented it from self-reap itself | 796 | * we prevented it from self-reap itself while it was being |
710 | * while it was being traced by us, to be able to see it in wait4. | 797 | * traced by us, to be able to see it in wait4. |
711 | */ | 798 | */ |
712 | if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1)) | 799 | if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p))) |
713 | list_add(&p->ptrace_list, &ptrace_dead); | 800 | list_add(&p->ptrace_list, &ptrace_dead); |
714 | } | 801 | } |
715 | 802 | ||
@@ -766,29 +853,30 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
766 | * we have changed execution domain as these two values started | 853 | * we have changed execution domain as these two values started |
767 | * the same after a fork. | 854 | * the same after a fork. |
768 | */ | 855 | */ |
769 | if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 && | 856 | if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && |
770 | (tsk->parent_exec_id != tsk->real_parent->self_exec_id || | 857 | (tsk->parent_exec_id != tsk->real_parent->self_exec_id || |
771 | tsk->self_exec_id != tsk->parent_exec_id) | 858 | tsk->self_exec_id != tsk->parent_exec_id) && |
772 | && !capable(CAP_KILL)) | 859 | !capable(CAP_KILL)) |
773 | tsk->exit_signal = SIGCHLD; | 860 | tsk->exit_signal = SIGCHLD; |
774 | 861 | ||
775 | |||
776 | /* If something other than our normal parent is ptracing us, then | 862 | /* If something other than our normal parent is ptracing us, then |
777 | * send it a SIGCHLD instead of honoring exit_signal. exit_signal | 863 | * send it a SIGCHLD instead of honoring exit_signal. exit_signal |
778 | * only has special meaning to our real parent. | 864 | * only has special meaning to our real parent. |
779 | */ | 865 | */ |
780 | if (tsk->exit_signal != -1 && thread_group_empty(tsk)) { | 866 | if (!task_detached(tsk) && thread_group_empty(tsk)) { |
781 | int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD; | 867 | int signal = ptrace_reparented(tsk) ? |
868 | SIGCHLD : tsk->exit_signal; | ||
782 | do_notify_parent(tsk, signal); | 869 | do_notify_parent(tsk, signal); |
783 | } else if (tsk->ptrace) { | 870 | } else if (tsk->ptrace) { |
784 | do_notify_parent(tsk, SIGCHLD); | 871 | do_notify_parent(tsk, SIGCHLD); |
785 | } | 872 | } |
786 | 873 | ||
787 | state = EXIT_ZOMBIE; | 874 | state = EXIT_ZOMBIE; |
788 | if (tsk->exit_signal == -1 && likely(!tsk->ptrace)) | 875 | if (task_detached(tsk) && likely(!tsk->ptrace)) |
789 | state = EXIT_DEAD; | 876 | state = EXIT_DEAD; |
790 | tsk->exit_state = state; | 877 | tsk->exit_state = state; |
791 | 878 | ||
879 | /* mt-exec, de_thread() is waiting for us */ | ||
792 | if (thread_group_leader(tsk) && | 880 | if (thread_group_leader(tsk) && |
793 | tsk->signal->notify_count < 0 && | 881 | tsk->signal->notify_count < 0 && |
794 | tsk->signal->group_exit_task) | 882 | tsk->signal->group_exit_task) |
@@ -1032,12 +1120,13 @@ asmlinkage long sys_exit(int error_code) | |||
1032 | NORET_TYPE void | 1120 | NORET_TYPE void |
1033 | do_group_exit(int exit_code) | 1121 | do_group_exit(int exit_code) |
1034 | { | 1122 | { |
1123 | struct signal_struct *sig = current->signal; | ||
1124 | |||
1035 | BUG_ON(exit_code & 0x80); /* core dumps don't get here */ | 1125 | BUG_ON(exit_code & 0x80); /* core dumps don't get here */ |
1036 | 1126 | ||
1037 | if (current->signal->flags & SIGNAL_GROUP_EXIT) | 1127 | if (signal_group_exit(sig)) |
1038 | exit_code = current->signal->group_exit_code; | 1128 | exit_code = sig->group_exit_code; |
1039 | else if (!thread_group_empty(current)) { | 1129 | else if (!thread_group_empty(current)) { |
1040 | struct signal_struct *const sig = current->signal; | ||
1041 | struct sighand_struct *const sighand = current->sighand; | 1130 | struct sighand_struct *const sighand = current->sighand; |
1042 | spin_lock_irq(&sighand->siglock); | 1131 | spin_lock_irq(&sighand->siglock); |
1043 | if (signal_group_exit(sig)) | 1132 | if (signal_group_exit(sig)) |
@@ -1089,7 +1178,7 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, | |||
1089 | * Do not consider detached threads that are | 1178 | * Do not consider detached threads that are |
1090 | * not ptraced: | 1179 | * not ptraced: |
1091 | */ | 1180 | */ |
1092 | if (p->exit_signal == -1 && !p->ptrace) | 1181 | if (task_detached(p) && !p->ptrace) |
1093 | return 0; | 1182 | return 0; |
1094 | 1183 | ||
1095 | /* Wait for all children (clone and not) if __WALL is set; | 1184 | /* Wait for all children (clone and not) if __WALL is set; |
@@ -1179,8 +1268,7 @@ static int wait_task_zombie(struct task_struct *p, int noreap, | |||
1179 | return 0; | 1268 | return 0; |
1180 | } | 1269 | } |
1181 | 1270 | ||
1182 | /* traced means p->ptrace, but not vice versa */ | 1271 | traced = ptrace_reparented(p); |
1183 | traced = (p->real_parent != p->parent); | ||
1184 | 1272 | ||
1185 | if (likely(!traced)) { | 1273 | if (likely(!traced)) { |
1186 | struct signal_struct *psig; | 1274 | struct signal_struct *psig; |
@@ -1281,9 +1369,9 @@ static int wait_task_zombie(struct task_struct *p, int noreap, | |||
1281 | * If it's still not detached after that, don't release | 1369 | * If it's still not detached after that, don't release |
1282 | * it now. | 1370 | * it now. |
1283 | */ | 1371 | */ |
1284 | if (p->exit_signal != -1) { | 1372 | if (!task_detached(p)) { |
1285 | do_notify_parent(p, p->exit_signal); | 1373 | do_notify_parent(p, p->exit_signal); |
1286 | if (p->exit_signal != -1) { | 1374 | if (!task_detached(p)) { |
1287 | p->exit_state = EXIT_ZOMBIE; | 1375 | p->exit_state = EXIT_ZOMBIE; |
1288 | p = NULL; | 1376 | p = NULL; |
1289 | } | 1377 | } |
diff --git a/kernel/fork.c b/kernel/fork.c index 6067e429f281..933e60ebccae 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/mempolicy.h> | 22 | #include <linux/mempolicy.h> |
23 | #include <linux/sem.h> | 23 | #include <linux/sem.h> |
24 | #include <linux/file.h> | 24 | #include <linux/file.h> |
25 | #include <linux/fdtable.h> | ||
25 | #include <linux/key.h> | 26 | #include <linux/key.h> |
26 | #include <linux/binfmts.h> | 27 | #include <linux/binfmts.h> |
27 | #include <linux/mman.h> | 28 | #include <linux/mman.h> |
@@ -381,14 +382,13 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
381 | mm->ioctx_list = NULL; | 382 | mm->ioctx_list = NULL; |
382 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 383 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
383 | mm->cached_hole_size = ~0UL; | 384 | mm->cached_hole_size = ~0UL; |
384 | mm_init_cgroup(mm, p); | 385 | mm_init_owner(mm, p); |
385 | 386 | ||
386 | if (likely(!mm_alloc_pgd(mm))) { | 387 | if (likely(!mm_alloc_pgd(mm))) { |
387 | mm->def_flags = 0; | 388 | mm->def_flags = 0; |
388 | return mm; | 389 | return mm; |
389 | } | 390 | } |
390 | 391 | ||
391 | mm_free_cgroup(mm); | ||
392 | free_mm(mm); | 392 | free_mm(mm); |
393 | return NULL; | 393 | return NULL; |
394 | } | 394 | } |
@@ -432,13 +432,13 @@ void mmput(struct mm_struct *mm) | |||
432 | if (atomic_dec_and_test(&mm->mm_users)) { | 432 | if (atomic_dec_and_test(&mm->mm_users)) { |
433 | exit_aio(mm); | 433 | exit_aio(mm); |
434 | exit_mmap(mm); | 434 | exit_mmap(mm); |
435 | set_mm_exe_file(mm, NULL); | ||
435 | if (!list_empty(&mm->mmlist)) { | 436 | if (!list_empty(&mm->mmlist)) { |
436 | spin_lock(&mmlist_lock); | 437 | spin_lock(&mmlist_lock); |
437 | list_del(&mm->mmlist); | 438 | list_del(&mm->mmlist); |
438 | spin_unlock(&mmlist_lock); | 439 | spin_unlock(&mmlist_lock); |
439 | } | 440 | } |
440 | put_swap_token(mm); | 441 | put_swap_token(mm); |
441 | mm_free_cgroup(mm); | ||
442 | mmdrop(mm); | 442 | mmdrop(mm); |
443 | } | 443 | } |
444 | } | 444 | } |
@@ -545,6 +545,8 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
545 | if (init_new_context(tsk, mm)) | 545 | if (init_new_context(tsk, mm)) |
546 | goto fail_nocontext; | 546 | goto fail_nocontext; |
547 | 547 | ||
548 | dup_mm_exe_file(oldmm, mm); | ||
549 | |||
548 | err = dup_mmap(mm, oldmm); | 550 | err = dup_mmap(mm, oldmm); |
549 | if (err) | 551 | if (err) |
550 | goto free_pt; | 552 | goto free_pt; |
@@ -891,7 +893,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
891 | sig->group_exit_code = 0; | 893 | sig->group_exit_code = 0; |
892 | sig->group_exit_task = NULL; | 894 | sig->group_exit_task = NULL; |
893 | sig->group_stop_count = 0; | 895 | sig->group_stop_count = 0; |
894 | sig->curr_target = NULL; | 896 | sig->curr_target = tsk; |
895 | init_sigpending(&sig->shared_pending); | 897 | init_sigpending(&sig->shared_pending); |
896 | INIT_LIST_HEAD(&sig->posix_timers); | 898 | INIT_LIST_HEAD(&sig->posix_timers); |
897 | 899 | ||
@@ -982,6 +984,13 @@ static void rt_mutex_init_task(struct task_struct *p) | |||
982 | #endif | 984 | #endif |
983 | } | 985 | } |
984 | 986 | ||
987 | #ifdef CONFIG_MM_OWNER | ||
988 | void mm_init_owner(struct mm_struct *mm, struct task_struct *p) | ||
989 | { | ||
990 | mm->owner = p; | ||
991 | } | ||
992 | #endif /* CONFIG_MM_OWNER */ | ||
993 | |||
985 | /* | 994 | /* |
986 | * This creates a new process as a copy of the old one, | 995 | * This creates a new process as a copy of the old one, |
987 | * but does not actually start it yet. | 996 | * but does not actually start it yet. |
@@ -1664,18 +1673,6 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp | |||
1664 | } | 1673 | } |
1665 | 1674 | ||
1666 | /* | 1675 | /* |
1667 | * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not | ||
1668 | * supported yet | ||
1669 | */ | ||
1670 | static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp) | ||
1671 | { | ||
1672 | if (unshare_flags & CLONE_SYSVSEM) | ||
1673 | return -EINVAL; | ||
1674 | |||
1675 | return 0; | ||
1676 | } | ||
1677 | |||
1678 | /* | ||
1679 | * unshare allows a process to 'unshare' part of the process | 1676 | * unshare allows a process to 'unshare' part of the process |
1680 | * context which was originally shared using clone. copy_* | 1677 | * context which was originally shared using clone. copy_* |
1681 | * functions used by do_fork() cannot be used here directly | 1678 | * functions used by do_fork() cannot be used here directly |
@@ -1690,8 +1687,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1690 | struct sighand_struct *new_sigh = NULL; | 1687 | struct sighand_struct *new_sigh = NULL; |
1691 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; | 1688 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; |
1692 | struct files_struct *fd, *new_fd = NULL; | 1689 | struct files_struct *fd, *new_fd = NULL; |
1693 | struct sem_undo_list *new_ulist = NULL; | ||
1694 | struct nsproxy *new_nsproxy = NULL; | 1690 | struct nsproxy *new_nsproxy = NULL; |
1691 | int do_sysvsem = 0; | ||
1695 | 1692 | ||
1696 | check_unshare_flags(&unshare_flags); | 1693 | check_unshare_flags(&unshare_flags); |
1697 | 1694 | ||
@@ -1703,6 +1700,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1703 | CLONE_NEWNET)) | 1700 | CLONE_NEWNET)) |
1704 | goto bad_unshare_out; | 1701 | goto bad_unshare_out; |
1705 | 1702 | ||
1703 | /* | ||
1704 | * CLONE_NEWIPC must also detach from the undolist: after switching | ||
1705 | * to a new ipc namespace, the semaphore arrays from the old | ||
1706 | * namespace are unreachable. | ||
1707 | */ | ||
1708 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) | ||
1709 | do_sysvsem = 1; | ||
1706 | if ((err = unshare_thread(unshare_flags))) | 1710 | if ((err = unshare_thread(unshare_flags))) |
1707 | goto bad_unshare_out; | 1711 | goto bad_unshare_out; |
1708 | if ((err = unshare_fs(unshare_flags, &new_fs))) | 1712 | if ((err = unshare_fs(unshare_flags, &new_fs))) |
@@ -1713,13 +1717,17 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1713 | goto bad_unshare_cleanup_sigh; | 1717 | goto bad_unshare_cleanup_sigh; |
1714 | if ((err = unshare_fd(unshare_flags, &new_fd))) | 1718 | if ((err = unshare_fd(unshare_flags, &new_fd))) |
1715 | goto bad_unshare_cleanup_vm; | 1719 | goto bad_unshare_cleanup_vm; |
1716 | if ((err = unshare_semundo(unshare_flags, &new_ulist))) | ||
1717 | goto bad_unshare_cleanup_fd; | ||
1718 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, | 1720 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, |
1719 | new_fs))) | 1721 | new_fs))) |
1720 | goto bad_unshare_cleanup_semundo; | 1722 | goto bad_unshare_cleanup_fd; |
1721 | 1723 | ||
1722 | if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) { | 1724 | if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { |
1725 | if (do_sysvsem) { | ||
1726 | /* | ||
1727 | * CLONE_SYSVSEM is equivalent to sys_exit(). | ||
1728 | */ | ||
1729 | exit_sem(current); | ||
1730 | } | ||
1723 | 1731 | ||
1724 | if (new_nsproxy) { | 1732 | if (new_nsproxy) { |
1725 | switch_task_namespaces(current, new_nsproxy); | 1733 | switch_task_namespaces(current, new_nsproxy); |
@@ -1755,7 +1763,6 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1755 | if (new_nsproxy) | 1763 | if (new_nsproxy) |
1756 | put_nsproxy(new_nsproxy); | 1764 | put_nsproxy(new_nsproxy); |
1757 | 1765 | ||
1758 | bad_unshare_cleanup_semundo: | ||
1759 | bad_unshare_cleanup_fd: | 1766 | bad_unshare_cleanup_fd: |
1760 | if (new_fd) | 1767 | if (new_fd) |
1761 | put_files_struct(new_fd); | 1768 | put_files_struct(new_fd); |
diff --git a/kernel/futex.c b/kernel/futex.c index e43945e995f5..98092c9817f4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1266,11 +1266,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1266 | if (!abs_time) | 1266 | if (!abs_time) |
1267 | schedule(); | 1267 | schedule(); |
1268 | else { | 1268 | else { |
1269 | hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 1269 | hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, |
1270 | HRTIMER_MODE_ABS); | ||
1270 | hrtimer_init_sleeper(&t, current); | 1271 | hrtimer_init_sleeper(&t, current); |
1271 | t.timer.expires = *abs_time; | 1272 | t.timer.expires = *abs_time; |
1272 | 1273 | ||
1273 | hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS); | 1274 | hrtimer_start(&t.timer, t.timer.expires, |
1275 | HRTIMER_MODE_ABS); | ||
1274 | if (!hrtimer_active(&t.timer)) | 1276 | if (!hrtimer_active(&t.timer)) |
1275 | t.task = NULL; | 1277 | t.task = NULL; |
1276 | 1278 | ||
@@ -1286,6 +1288,8 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1286 | 1288 | ||
1287 | /* Flag if a timeout occured */ | 1289 | /* Flag if a timeout occured */ |
1288 | rem = (t.task == NULL); | 1290 | rem = (t.task == NULL); |
1291 | |||
1292 | destroy_hrtimer_on_stack(&t.timer); | ||
1289 | } | 1293 | } |
1290 | } | 1294 | } |
1291 | __set_current_state(TASK_RUNNING); | 1295 | __set_current_state(TASK_RUNNING); |
@@ -1367,7 +1371,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1367 | 1371 | ||
1368 | if (time) { | 1372 | if (time) { |
1369 | to = &timeout; | 1373 | to = &timeout; |
1370 | hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | 1374 | hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, |
1375 | HRTIMER_MODE_ABS); | ||
1371 | hrtimer_init_sleeper(to, current); | 1376 | hrtimer_init_sleeper(to, current); |
1372 | to->timer.expires = *time; | 1377 | to->timer.expires = *time; |
1373 | } | 1378 | } |
@@ -1581,6 +1586,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1581 | unqueue_me_pi(&q); | 1586 | unqueue_me_pi(&q); |
1582 | futex_unlock_mm(fshared); | 1587 | futex_unlock_mm(fshared); |
1583 | 1588 | ||
1589 | if (to) | ||
1590 | destroy_hrtimer_on_stack(&to->timer); | ||
1584 | return ret != -EINTR ? ret : -ERESTARTNOINTR; | 1591 | return ret != -EINTR ? ret : -ERESTARTNOINTR; |
1585 | 1592 | ||
1586 | out_unlock_release_sem: | 1593 | out_unlock_release_sem: |
@@ -1588,6 +1595,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1588 | 1595 | ||
1589 | out_release_sem: | 1596 | out_release_sem: |
1590 | futex_unlock_mm(fshared); | 1597 | futex_unlock_mm(fshared); |
1598 | if (to) | ||
1599 | destroy_hrtimer_on_stack(&to->timer); | ||
1591 | return ret; | 1600 | return ret; |
1592 | 1601 | ||
1593 | uaddr_faulted: | 1602 | uaddr_faulted: |
@@ -1615,6 +1624,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1615 | if (!ret && (uval != -EFAULT)) | 1624 | if (!ret && (uval != -EFAULT)) |
1616 | goto retry; | 1625 | goto retry; |
1617 | 1626 | ||
1627 | if (to) | ||
1628 | destroy_hrtimer_on_stack(&to->timer); | ||
1618 | return ret; | 1629 | return ret; |
1619 | } | 1630 | } |
1620 | 1631 | ||
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index dea4c9124ac8..9af1d6a8095e 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/tick.h> | 43 | #include <linux/tick.h> |
44 | #include <linux/seq_file.h> | 44 | #include <linux/seq_file.h> |
45 | #include <linux/err.h> | 45 | #include <linux/err.h> |
46 | #include <linux/debugobjects.h> | ||
46 | 47 | ||
47 | #include <asm/uaccess.h> | 48 | #include <asm/uaccess.h> |
48 | 49 | ||
@@ -342,6 +343,115 @@ ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) | |||
342 | return res; | 343 | return res; |
343 | } | 344 | } |
344 | 345 | ||
346 | #ifdef CONFIG_DEBUG_OBJECTS_TIMERS | ||
347 | |||
348 | static struct debug_obj_descr hrtimer_debug_descr; | ||
349 | |||
350 | /* | ||
351 | * fixup_init is called when: | ||
352 | * - an active object is initialized | ||
353 | */ | ||
354 | static int hrtimer_fixup_init(void *addr, enum debug_obj_state state) | ||
355 | { | ||
356 | struct hrtimer *timer = addr; | ||
357 | |||
358 | switch (state) { | ||
359 | case ODEBUG_STATE_ACTIVE: | ||
360 | hrtimer_cancel(timer); | ||
361 | debug_object_init(timer, &hrtimer_debug_descr); | ||
362 | return 1; | ||
363 | default: | ||
364 | return 0; | ||
365 | } | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * fixup_activate is called when: | ||
370 | * - an active object is activated | ||
371 | * - an unknown object is activated (might be a statically initialized object) | ||
372 | */ | ||
373 | static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state) | ||
374 | { | ||
375 | switch (state) { | ||
376 | |||
377 | case ODEBUG_STATE_NOTAVAILABLE: | ||
378 | WARN_ON_ONCE(1); | ||
379 | return 0; | ||
380 | |||
381 | case ODEBUG_STATE_ACTIVE: | ||
382 | WARN_ON(1); | ||
383 | |||
384 | default: | ||
385 | return 0; | ||
386 | } | ||
387 | } | ||
388 | |||
389 | /* | ||
390 | * fixup_free is called when: | ||
391 | * - an active object is freed | ||
392 | */ | ||
393 | static int hrtimer_fixup_free(void *addr, enum debug_obj_state state) | ||
394 | { | ||
395 | struct hrtimer *timer = addr; | ||
396 | |||
397 | switch (state) { | ||
398 | case ODEBUG_STATE_ACTIVE: | ||
399 | hrtimer_cancel(timer); | ||
400 | debug_object_free(timer, &hrtimer_debug_descr); | ||
401 | return 1; | ||
402 | default: | ||
403 | return 0; | ||
404 | } | ||
405 | } | ||
406 | |||
407 | static struct debug_obj_descr hrtimer_debug_descr = { | ||
408 | .name = "hrtimer", | ||
409 | .fixup_init = hrtimer_fixup_init, | ||
410 | .fixup_activate = hrtimer_fixup_activate, | ||
411 | .fixup_free = hrtimer_fixup_free, | ||
412 | }; | ||
413 | |||
414 | static inline void debug_hrtimer_init(struct hrtimer *timer) | ||
415 | { | ||
416 | debug_object_init(timer, &hrtimer_debug_descr); | ||
417 | } | ||
418 | |||
419 | static inline void debug_hrtimer_activate(struct hrtimer *timer) | ||
420 | { | ||
421 | debug_object_activate(timer, &hrtimer_debug_descr); | ||
422 | } | ||
423 | |||
424 | static inline void debug_hrtimer_deactivate(struct hrtimer *timer) | ||
425 | { | ||
426 | debug_object_deactivate(timer, &hrtimer_debug_descr); | ||
427 | } | ||
428 | |||
429 | static inline void debug_hrtimer_free(struct hrtimer *timer) | ||
430 | { | ||
431 | debug_object_free(timer, &hrtimer_debug_descr); | ||
432 | } | ||
433 | |||
434 | static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | ||
435 | enum hrtimer_mode mode); | ||
436 | |||
437 | void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, | ||
438 | enum hrtimer_mode mode) | ||
439 | { | ||
440 | debug_object_init_on_stack(timer, &hrtimer_debug_descr); | ||
441 | __hrtimer_init(timer, clock_id, mode); | ||
442 | } | ||
443 | |||
444 | void destroy_hrtimer_on_stack(struct hrtimer *timer) | ||
445 | { | ||
446 | debug_object_free(timer, &hrtimer_debug_descr); | ||
447 | } | ||
448 | |||
449 | #else | ||
450 | static inline void debug_hrtimer_init(struct hrtimer *timer) { } | ||
451 | static inline void debug_hrtimer_activate(struct hrtimer *timer) { } | ||
452 | static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } | ||
453 | #endif | ||
454 | |||
345 | /* | 455 | /* |
346 | * Check, whether the timer is on the callback pending list | 456 | * Check, whether the timer is on the callback pending list |
347 | */ | 457 | */ |
@@ -567,6 +677,7 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
567 | /* Timer is expired, act upon the callback mode */ | 677 | /* Timer is expired, act upon the callback mode */ |
568 | switch(timer->cb_mode) { | 678 | switch(timer->cb_mode) { |
569 | case HRTIMER_CB_IRQSAFE_NO_RESTART: | 679 | case HRTIMER_CB_IRQSAFE_NO_RESTART: |
680 | debug_hrtimer_deactivate(timer); | ||
570 | /* | 681 | /* |
571 | * We can call the callback from here. No restart | 682 | * We can call the callback from here. No restart |
572 | * happens, so no danger of recursion | 683 | * happens, so no danger of recursion |
@@ -581,6 +692,7 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
581 | * the tick timer in the softirq ! The calling site | 692 | * the tick timer in the softirq ! The calling site |
582 | * takes care of this. | 693 | * takes care of this. |
583 | */ | 694 | */ |
695 | debug_hrtimer_deactivate(timer); | ||
584 | return 1; | 696 | return 1; |
585 | case HRTIMER_CB_IRQSAFE: | 697 | case HRTIMER_CB_IRQSAFE: |
586 | case HRTIMER_CB_SOFTIRQ: | 698 | case HRTIMER_CB_SOFTIRQ: |
@@ -735,6 +847,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, | |||
735 | struct hrtimer *entry; | 847 | struct hrtimer *entry; |
736 | int leftmost = 1; | 848 | int leftmost = 1; |
737 | 849 | ||
850 | debug_hrtimer_activate(timer); | ||
851 | |||
738 | /* | 852 | /* |
739 | * Find the right place in the rbtree: | 853 | * Find the right place in the rbtree: |
740 | */ | 854 | */ |
@@ -831,6 +945,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) | |||
831 | * reprogramming happens in the interrupt handler. This is a | 945 | * reprogramming happens in the interrupt handler. This is a |
832 | * rare case and less expensive than a smp call. | 946 | * rare case and less expensive than a smp call. |
833 | */ | 947 | */ |
948 | debug_hrtimer_deactivate(timer); | ||
834 | timer_stats_hrtimer_clear_start_info(timer); | 949 | timer_stats_hrtimer_clear_start_info(timer); |
835 | reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); | 950 | reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); |
836 | __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, | 951 | __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, |
@@ -878,6 +993,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
878 | tim = ktime_add_safe(tim, base->resolution); | 993 | tim = ktime_add_safe(tim, base->resolution); |
879 | #endif | 994 | #endif |
880 | } | 995 | } |
996 | |||
881 | timer->expires = tim; | 997 | timer->expires = tim; |
882 | 998 | ||
883 | timer_stats_hrtimer_set_start_info(timer); | 999 | timer_stats_hrtimer_set_start_info(timer); |
@@ -1011,14 +1127,8 @@ ktime_t hrtimer_get_next_event(void) | |||
1011 | } | 1127 | } |
1012 | #endif | 1128 | #endif |
1013 | 1129 | ||
1014 | /** | 1130 | static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, |
1015 | * hrtimer_init - initialize a timer to the given clock | 1131 | enum hrtimer_mode mode) |
1016 | * @timer: the timer to be initialized | ||
1017 | * @clock_id: the clock to be used | ||
1018 | * @mode: timer mode abs/rel | ||
1019 | */ | ||
1020 | void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | ||
1021 | enum hrtimer_mode mode) | ||
1022 | { | 1132 | { |
1023 | struct hrtimer_cpu_base *cpu_base; | 1133 | struct hrtimer_cpu_base *cpu_base; |
1024 | 1134 | ||
@@ -1039,6 +1149,19 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |||
1039 | memset(timer->start_comm, 0, TASK_COMM_LEN); | 1149 | memset(timer->start_comm, 0, TASK_COMM_LEN); |
1040 | #endif | 1150 | #endif |
1041 | } | 1151 | } |
1152 | |||
1153 | /** | ||
1154 | * hrtimer_init - initialize a timer to the given clock | ||
1155 | * @timer: the timer to be initialized | ||
1156 | * @clock_id: the clock to be used | ||
1157 | * @mode: timer mode abs/rel | ||
1158 | */ | ||
1159 | void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | ||
1160 | enum hrtimer_mode mode) | ||
1161 | { | ||
1162 | debug_hrtimer_init(timer); | ||
1163 | __hrtimer_init(timer, clock_id, mode); | ||
1164 | } | ||
1042 | EXPORT_SYMBOL_GPL(hrtimer_init); | 1165 | EXPORT_SYMBOL_GPL(hrtimer_init); |
1043 | 1166 | ||
1044 | /** | 1167 | /** |
@@ -1072,6 +1195,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) | |||
1072 | timer = list_entry(cpu_base->cb_pending.next, | 1195 | timer = list_entry(cpu_base->cb_pending.next, |
1073 | struct hrtimer, cb_entry); | 1196 | struct hrtimer, cb_entry); |
1074 | 1197 | ||
1198 | debug_hrtimer_deactivate(timer); | ||
1075 | timer_stats_account_hrtimer(timer); | 1199 | timer_stats_account_hrtimer(timer); |
1076 | 1200 | ||
1077 | fn = timer->function; | 1201 | fn = timer->function; |
@@ -1120,6 +1244,7 @@ static void __run_hrtimer(struct hrtimer *timer) | |||
1120 | enum hrtimer_restart (*fn)(struct hrtimer *); | 1244 | enum hrtimer_restart (*fn)(struct hrtimer *); |
1121 | int restart; | 1245 | int restart; |
1122 | 1246 | ||
1247 | debug_hrtimer_deactivate(timer); | ||
1123 | __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); | 1248 | __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); |
1124 | timer_stats_account_hrtimer(timer); | 1249 | timer_stats_account_hrtimer(timer); |
1125 | 1250 | ||
@@ -1378,22 +1503,27 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |||
1378 | { | 1503 | { |
1379 | struct hrtimer_sleeper t; | 1504 | struct hrtimer_sleeper t; |
1380 | struct timespec __user *rmtp; | 1505 | struct timespec __user *rmtp; |
1506 | int ret = 0; | ||
1381 | 1507 | ||
1382 | hrtimer_init(&t.timer, restart->nanosleep.index, HRTIMER_MODE_ABS); | 1508 | hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, |
1509 | HRTIMER_MODE_ABS); | ||
1383 | t.timer.expires.tv64 = restart->nanosleep.expires; | 1510 | t.timer.expires.tv64 = restart->nanosleep.expires; |
1384 | 1511 | ||
1385 | if (do_nanosleep(&t, HRTIMER_MODE_ABS)) | 1512 | if (do_nanosleep(&t, HRTIMER_MODE_ABS)) |
1386 | return 0; | 1513 | goto out; |
1387 | 1514 | ||
1388 | rmtp = restart->nanosleep.rmtp; | 1515 | rmtp = restart->nanosleep.rmtp; |
1389 | if (rmtp) { | 1516 | if (rmtp) { |
1390 | int ret = update_rmtp(&t.timer, rmtp); | 1517 | ret = update_rmtp(&t.timer, rmtp); |
1391 | if (ret <= 0) | 1518 | if (ret <= 0) |
1392 | return ret; | 1519 | goto out; |
1393 | } | 1520 | } |
1394 | 1521 | ||
1395 | /* The other values in restart are already filled in */ | 1522 | /* The other values in restart are already filled in */ |
1396 | return -ERESTART_RESTARTBLOCK; | 1523 | ret = -ERESTART_RESTARTBLOCK; |
1524 | out: | ||
1525 | destroy_hrtimer_on_stack(&t.timer); | ||
1526 | return ret; | ||
1397 | } | 1527 | } |
1398 | 1528 | ||
1399 | long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | 1529 | long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, |
@@ -1401,20 +1531,23 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
1401 | { | 1531 | { |
1402 | struct restart_block *restart; | 1532 | struct restart_block *restart; |
1403 | struct hrtimer_sleeper t; | 1533 | struct hrtimer_sleeper t; |
1534 | int ret = 0; | ||
1404 | 1535 | ||
1405 | hrtimer_init(&t.timer, clockid, mode); | 1536 | hrtimer_init_on_stack(&t.timer, clockid, mode); |
1406 | t.timer.expires = timespec_to_ktime(*rqtp); | 1537 | t.timer.expires = timespec_to_ktime(*rqtp); |
1407 | if (do_nanosleep(&t, mode)) | 1538 | if (do_nanosleep(&t, mode)) |
1408 | return 0; | 1539 | goto out; |
1409 | 1540 | ||
1410 | /* Absolute timers do not update the rmtp value and restart: */ | 1541 | /* Absolute timers do not update the rmtp value and restart: */ |
1411 | if (mode == HRTIMER_MODE_ABS) | 1542 | if (mode == HRTIMER_MODE_ABS) { |
1412 | return -ERESTARTNOHAND; | 1543 | ret = -ERESTARTNOHAND; |
1544 | goto out; | ||
1545 | } | ||
1413 | 1546 | ||
1414 | if (rmtp) { | 1547 | if (rmtp) { |
1415 | int ret = update_rmtp(&t.timer, rmtp); | 1548 | ret = update_rmtp(&t.timer, rmtp); |
1416 | if (ret <= 0) | 1549 | if (ret <= 0) |
1417 | return ret; | 1550 | goto out; |
1418 | } | 1551 | } |
1419 | 1552 | ||
1420 | restart = ¤t_thread_info()->restart_block; | 1553 | restart = ¤t_thread_info()->restart_block; |
@@ -1423,7 +1556,10 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
1423 | restart->nanosleep.rmtp = rmtp; | 1556 | restart->nanosleep.rmtp = rmtp; |
1424 | restart->nanosleep.expires = t.timer.expires.tv64; | 1557 | restart->nanosleep.expires = t.timer.expires.tv64; |
1425 | 1558 | ||
1426 | return -ERESTART_RESTARTBLOCK; | 1559 | ret = -ERESTART_RESTARTBLOCK; |
1560 | out: | ||
1561 | destroy_hrtimer_on_stack(&t.timer); | ||
1562 | return ret; | ||
1427 | } | 1563 | } |
1428 | 1564 | ||
1429 | asmlinkage long | 1565 | asmlinkage long |
@@ -1468,6 +1604,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, | |||
1468 | while ((node = rb_first(&old_base->active))) { | 1604 | while ((node = rb_first(&old_base->active))) { |
1469 | timer = rb_entry(node, struct hrtimer, node); | 1605 | timer = rb_entry(node, struct hrtimer, node); |
1470 | BUG_ON(hrtimer_callback_running(timer)); | 1606 | BUG_ON(hrtimer_callback_running(timer)); |
1607 | debug_hrtimer_deactivate(timer); | ||
1471 | __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0); | 1608 | __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0); |
1472 | timer->base = new_base; | 1609 | timer->base = new_base; |
1473 | /* | 1610 | /* |
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 6d9204f3a370..38a25b8d8bff 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/module.h> | 1 | #include <linux/module.h> |
2 | #include <linux/interrupt.h> | 2 | #include <linux/interrupt.h> |
3 | #include <linux/device.h> | 3 | #include <linux/device.h> |
4 | #include <linux/gfp.h> | ||
4 | 5 | ||
5 | /* | 6 | /* |
6 | * Device resource management aware IRQ request/free implementation. | 7 | * Device resource management aware IRQ request/free implementation. |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 438a01464287..46d6611a33bb 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/random.h> | 12 | #include <linux/random.h> |
13 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
14 | #include <linux/slab.h> | ||
14 | 15 | ||
15 | #include "internals.h" | 16 | #include "internals.h" |
16 | 17 | ||
@@ -149,6 +150,26 @@ void disable_irq(unsigned int irq) | |||
149 | } | 150 | } |
150 | EXPORT_SYMBOL(disable_irq); | 151 | EXPORT_SYMBOL(disable_irq); |
151 | 152 | ||
153 | static void __enable_irq(struct irq_desc *desc, unsigned int irq) | ||
154 | { | ||
155 | switch (desc->depth) { | ||
156 | case 0: | ||
157 | printk(KERN_WARNING "Unbalanced enable for IRQ %d\n", irq); | ||
158 | WARN_ON(1); | ||
159 | break; | ||
160 | case 1: { | ||
161 | unsigned int status = desc->status & ~IRQ_DISABLED; | ||
162 | |||
163 | /* Prevent probing on this irq: */ | ||
164 | desc->status = status | IRQ_NOPROBE; | ||
165 | check_irq_resend(desc, irq); | ||
166 | /* fall-through */ | ||
167 | } | ||
168 | default: | ||
169 | desc->depth--; | ||
170 | } | ||
171 | } | ||
172 | |||
152 | /** | 173 | /** |
153 | * enable_irq - enable handling of an irq | 174 | * enable_irq - enable handling of an irq |
154 | * @irq: Interrupt to enable | 175 | * @irq: Interrupt to enable |
@@ -168,22 +189,7 @@ void enable_irq(unsigned int irq) | |||
168 | return; | 189 | return; |
169 | 190 | ||
170 | spin_lock_irqsave(&desc->lock, flags); | 191 | spin_lock_irqsave(&desc->lock, flags); |
171 | switch (desc->depth) { | 192 | __enable_irq(desc, irq); |
172 | case 0: | ||
173 | printk(KERN_WARNING "Unbalanced enable for IRQ %d\n", irq); | ||
174 | WARN_ON(1); | ||
175 | break; | ||
176 | case 1: { | ||
177 | unsigned int status = desc->status & ~IRQ_DISABLED; | ||
178 | |||
179 | /* Prevent probing on this irq: */ | ||
180 | desc->status = status | IRQ_NOPROBE; | ||
181 | check_irq_resend(desc, irq); | ||
182 | /* fall-through */ | ||
183 | } | ||
184 | default: | ||
185 | desc->depth--; | ||
186 | } | ||
187 | spin_unlock_irqrestore(&desc->lock, flags); | 193 | spin_unlock_irqrestore(&desc->lock, flags); |
188 | } | 194 | } |
189 | EXPORT_SYMBOL(enable_irq); | 195 | EXPORT_SYMBOL(enable_irq); |
@@ -364,7 +370,7 @@ int setup_irq(unsigned int irq, struct irqaction *new) | |||
364 | compat_irq_chip_set_default_handler(desc); | 370 | compat_irq_chip_set_default_handler(desc); |
365 | 371 | ||
366 | desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING | | 372 | desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING | |
367 | IRQ_INPROGRESS); | 373 | IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED); |
368 | 374 | ||
369 | if (!(desc->status & IRQ_NOAUTOEN)) { | 375 | if (!(desc->status & IRQ_NOAUTOEN)) { |
370 | desc->depth = 0; | 376 | desc->depth = 0; |
@@ -380,6 +386,16 @@ int setup_irq(unsigned int irq, struct irqaction *new) | |||
380 | /* Reset broken irq detection when installing new handler */ | 386 | /* Reset broken irq detection when installing new handler */ |
381 | desc->irq_count = 0; | 387 | desc->irq_count = 0; |
382 | desc->irqs_unhandled = 0; | 388 | desc->irqs_unhandled = 0; |
389 | |||
390 | /* | ||
391 | * Check whether we disabled the irq via the spurious handler | ||
392 | * before. Reenable it and give it another chance. | ||
393 | */ | ||
394 | if (shared && (desc->status & IRQ_SPURIOUS_DISABLED)) { | ||
395 | desc->status &= ~IRQ_SPURIOUS_DISABLED; | ||
396 | __enable_irq(desc, irq); | ||
397 | } | ||
398 | |||
383 | spin_unlock_irqrestore(&desc->lock, flags); | 399 | spin_unlock_irqrestore(&desc->lock, flags); |
384 | 400 | ||
385 | new->irq = irq; | 401 | new->irq = irq; |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 088dabbf2d6a..c66d3f10e853 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -209,8 +209,8 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
209 | * Now kill the IRQ | 209 | * Now kill the IRQ |
210 | */ | 210 | */ |
211 | printk(KERN_EMERG "Disabling IRQ #%d\n", irq); | 211 | printk(KERN_EMERG "Disabling IRQ #%d\n", irq); |
212 | desc->status |= IRQ_DISABLED; | 212 | desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED; |
213 | desc->depth = 1; | 213 | desc->depth++; |
214 | desc->chip->disable(irq); | 214 | desc->chip->disable(irq); |
215 | } | 215 | } |
216 | desc->irqs_unhandled = 0; | 216 | desc->irqs_unhandled = 0; |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index f091d13def00..6fc0040f3e3a 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -472,11 +472,7 @@ static const struct file_operations kallsyms_operations = { | |||
472 | 472 | ||
473 | static int __init kallsyms_init(void) | 473 | static int __init kallsyms_init(void) |
474 | { | 474 | { |
475 | struct proc_dir_entry *entry; | 475 | proc_create("kallsyms", 0444, NULL, &kallsyms_operations); |
476 | |||
477 | entry = create_proc_entry("kallsyms", 0444, NULL); | ||
478 | if (entry) | ||
479 | entry->proc_fops = &kallsyms_operations; | ||
480 | return 0; | 476 | return 0; |
481 | } | 477 | } |
482 | __initcall(kallsyms_init); | 478 | __initcall(kallsyms_init); |
diff --git a/kernel/kexec.c b/kernel/kexec.c index cb85c79989b4..1c5fcacbcf33 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -1217,7 +1217,7 @@ static int __init parse_crashkernel_mem(char *cmdline, | |||
1217 | } | 1217 | } |
1218 | 1218 | ||
1219 | /* match ? */ | 1219 | /* match ? */ |
1220 | if (system_ram >= start && system_ram <= end) { | 1220 | if (system_ram >= start && system_ram < end) { |
1221 | *crash_size = size; | 1221 | *crash_size = size; |
1222 | break; | 1222 | break; |
1223 | } | 1223 | } |
diff --git a/kernel/kmod.c b/kernel/kmod.c index e2764047ec03..8df97d3dfda8 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/mnt_namespace.h> | 27 | #include <linux/mnt_namespace.h> |
28 | #include <linux/completion.h> | 28 | #include <linux/completion.h> |
29 | #include <linux/file.h> | 29 | #include <linux/file.h> |
30 | #include <linux/fdtable.h> | ||
30 | #include <linux/workqueue.h> | 31 | #include <linux/workqueue.h> |
31 | #include <linux/security.h> | 32 | #include <linux/security.h> |
32 | #include <linux/mount.h> | 33 | #include <linux/mount.h> |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 92cf6930ab51..bd1b9ea024e1 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -98,7 +98,7 @@ static void create_kthread(struct kthread_create_info *create) | |||
98 | struct sched_param param = { .sched_priority = 0 }; | 98 | struct sched_param param = { .sched_priority = 0 }; |
99 | wait_for_completion(&create->started); | 99 | wait_for_completion(&create->started); |
100 | read_lock(&tasklist_lock); | 100 | read_lock(&tasklist_lock); |
101 | create->result = find_task_by_pid(pid); | 101 | create->result = find_task_by_pid_ns(pid, &init_pid_ns); |
102 | read_unlock(&tasklist_lock); | 102 | read_unlock(&tasklist_lock); |
103 | /* | 103 | /* |
104 | * root may have changed our (kthreadd's) priority or CPU mask. | 104 | * root may have changed our (kthreadd's) priority or CPU mask. |
@@ -144,9 +144,9 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
144 | 144 | ||
145 | spin_lock(&kthread_create_lock); | 145 | spin_lock(&kthread_create_lock); |
146 | list_add_tail(&create.list, &kthread_create_list); | 146 | list_add_tail(&create.list, &kthread_create_list); |
147 | wake_up_process(kthreadd_task); | ||
148 | spin_unlock(&kthread_create_lock); | 147 | spin_unlock(&kthread_create_lock); |
149 | 148 | ||
149 | wake_up_process(kthreadd_task); | ||
150 | wait_for_completion(&create.done); | 150 | wait_for_completion(&create.done); |
151 | 151 | ||
152 | if (!IS_ERR(create.result)) { | 152 | if (!IS_ERR(create.result)) { |
diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 7c74dab0d21b..5e7b45c56923 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c | |||
@@ -233,14 +233,7 @@ static struct file_operations lstats_fops = { | |||
233 | 233 | ||
234 | static int __init init_lstats_procfs(void) | 234 | static int __init init_lstats_procfs(void) |
235 | { | 235 | { |
236 | struct proc_dir_entry *pe; | 236 | proc_create("latency_stats", 0644, NULL, &lstats_fops); |
237 | |||
238 | pe = create_proc_entry("latency_stats", 0644, NULL); | ||
239 | if (!pe) | ||
240 | return -ENOMEM; | ||
241 | |||
242 | pe->proc_fops = &lstats_fops; | ||
243 | |||
244 | return 0; | 237 | return 0; |
245 | } | 238 | } |
246 | __initcall(init_lstats_procfs); | 239 | __initcall(init_lstats_procfs); |
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 8a135bd163c2..dc5d29648d85 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c | |||
@@ -660,20 +660,12 @@ static const struct file_operations proc_lock_stat_operations = { | |||
660 | 660 | ||
661 | static int __init lockdep_proc_init(void) | 661 | static int __init lockdep_proc_init(void) |
662 | { | 662 | { |
663 | struct proc_dir_entry *entry; | 663 | proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations); |
664 | 664 | proc_create("lockdep_stats", S_IRUSR, NULL, | |
665 | entry = create_proc_entry("lockdep", S_IRUSR, NULL); | 665 | &proc_lockdep_stats_operations); |
666 | if (entry) | ||
667 | entry->proc_fops = &proc_lockdep_operations; | ||
668 | |||
669 | entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL); | ||
670 | if (entry) | ||
671 | entry->proc_fops = &proc_lockdep_stats_operations; | ||
672 | 666 | ||
673 | #ifdef CONFIG_LOCK_STAT | 667 | #ifdef CONFIG_LOCK_STAT |
674 | entry = create_proc_entry("lock_stat", S_IRUSR, NULL); | 668 | proc_create("lock_stat", S_IRUSR, NULL, &proc_lock_stat_operations); |
675 | if (entry) | ||
676 | entry->proc_fops = &proc_lock_stat_operations; | ||
677 | #endif | 669 | #endif |
678 | 670 | ||
679 | return 0; | 671 | return 0; |
diff --git a/kernel/marker.c b/kernel/marker.c index 005b95954593..b5a9fe1d50d5 100644 --- a/kernel/marker.c +++ b/kernel/marker.c | |||
@@ -23,12 +23,13 @@ | |||
23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
24 | #include <linux/marker.h> | 24 | #include <linux/marker.h> |
25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
26 | #include <linux/slab.h> | ||
26 | 27 | ||
27 | extern struct marker __start___markers[]; | 28 | extern struct marker __start___markers[]; |
28 | extern struct marker __stop___markers[]; | 29 | extern struct marker __stop___markers[]; |
29 | 30 | ||
30 | /* Set to 1 to enable marker debug output */ | 31 | /* Set to 1 to enable marker debug output */ |
31 | const int marker_debug; | 32 | static const int marker_debug; |
32 | 33 | ||
33 | /* | 34 | /* |
34 | * markers_mutex nests inside module_mutex. Markers mutex protects the builtin | 35 | * markers_mutex nests inside module_mutex. Markers mutex protects the builtin |
diff --git a/kernel/module.c b/kernel/module.c index 8d6cccc6c3cf..8674a390a2e8 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -164,131 +164,140 @@ static const struct kernel_symbol *lookup_symbol(const char *name, | |||
164 | return NULL; | 164 | return NULL; |
165 | } | 165 | } |
166 | 166 | ||
167 | static void printk_unused_warning(const char *name) | 167 | static bool always_ok(bool gplok, bool warn, const char *name) |
168 | { | 168 | { |
169 | printk(KERN_WARNING "Symbol %s is marked as UNUSED, " | 169 | return true; |
170 | "however this module is using it.\n", name); | ||
171 | printk(KERN_WARNING "This symbol will go away in the future.\n"); | ||
172 | printk(KERN_WARNING "Please evalute if this is the right api to use, " | ||
173 | "and if it really is, submit a report the linux kernel " | ||
174 | "mailinglist together with submitting your code for " | ||
175 | "inclusion.\n"); | ||
176 | } | 170 | } |
177 | 171 | ||
178 | /* Find a symbol, return value, crc and module which owns it */ | 172 | static bool printk_unused_warning(bool gplok, bool warn, const char *name) |
179 | static unsigned long __find_symbol(const char *name, | ||
180 | struct module **owner, | ||
181 | const unsigned long **crc, | ||
182 | int gplok) | ||
183 | { | 173 | { |
184 | struct module *mod; | 174 | if (warn) { |
185 | const struct kernel_symbol *ks; | 175 | printk(KERN_WARNING "Symbol %s is marked as UNUSED, " |
186 | 176 | "however this module is using it.\n", name); | |
187 | /* Core kernel first. */ | 177 | printk(KERN_WARNING |
188 | *owner = NULL; | 178 | "This symbol will go away in the future.\n"); |
189 | ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); | 179 | printk(KERN_WARNING |
190 | if (ks) { | 180 | "Please evalute if this is the right api to use and if " |
191 | *crc = symversion(__start___kcrctab, (ks - __start___ksymtab)); | 181 | "it really is, submit a report the linux kernel " |
192 | return ks->value; | 182 | "mailinglist together with submitting your code for " |
183 | "inclusion.\n"); | ||
193 | } | 184 | } |
194 | if (gplok) { | 185 | return true; |
195 | ks = lookup_symbol(name, __start___ksymtab_gpl, | 186 | } |
196 | __stop___ksymtab_gpl); | 187 | |
197 | if (ks) { | 188 | static bool gpl_only_unused_warning(bool gplok, bool warn, const char *name) |
198 | *crc = symversion(__start___kcrctab_gpl, | 189 | { |
199 | (ks - __start___ksymtab_gpl)); | 190 | if (!gplok) |
200 | return ks->value; | 191 | return false; |
201 | } | 192 | return printk_unused_warning(gplok, warn, name); |
202 | } | 193 | } |
203 | ks = lookup_symbol(name, __start___ksymtab_gpl_future, | 194 | |
204 | __stop___ksymtab_gpl_future); | 195 | static bool gpl_only(bool gplok, bool warn, const char *name) |
205 | if (ks) { | 196 | { |
206 | if (!gplok) { | 197 | return gplok; |
207 | printk(KERN_WARNING "Symbol %s is being used " | 198 | } |
208 | "by a non-GPL module, which will not " | 199 | |
209 | "be allowed in the future\n", name); | 200 | static bool warn_if_not_gpl(bool gplok, bool warn, const char *name) |
210 | printk(KERN_WARNING "Please see the file " | 201 | { |
211 | "Documentation/feature-removal-schedule.txt " | 202 | if (!gplok && warn) { |
212 | "in the kernel source tree for more " | 203 | printk(KERN_WARNING "Symbol %s is being used " |
213 | "details.\n"); | 204 | "by a non-GPL module, which will not " |
214 | } | 205 | "be allowed in the future\n", name); |
215 | *crc = symversion(__start___kcrctab_gpl_future, | 206 | printk(KERN_WARNING "Please see the file " |
216 | (ks - __start___ksymtab_gpl_future)); | 207 | "Documentation/feature-removal-schedule.txt " |
217 | return ks->value; | 208 | "in the kernel source tree for more details.\n"); |
218 | } | 209 | } |
210 | return true; | ||
211 | } | ||
219 | 212 | ||
220 | ks = lookup_symbol(name, __start___ksymtab_unused, | 213 | struct symsearch { |
221 | __stop___ksymtab_unused); | 214 | const struct kernel_symbol *start, *stop; |
222 | if (ks) { | 215 | const unsigned long *crcs; |
223 | printk_unused_warning(name); | 216 | bool (*check)(bool gplok, bool warn, const char *name); |
224 | *crc = symversion(__start___kcrctab_unused, | 217 | }; |
225 | (ks - __start___ksymtab_unused)); | 218 | |
226 | return ks->value; | 219 | /* Look through this array of symbol tables for a symbol match which |
220 | * passes the check function. */ | ||
221 | static const struct kernel_symbol *search_symarrays(const struct symsearch *arr, | ||
222 | unsigned int num, | ||
223 | const char *name, | ||
224 | bool gplok, | ||
225 | bool warn, | ||
226 | const unsigned long **crc) | ||
227 | { | ||
228 | unsigned int i; | ||
229 | const struct kernel_symbol *ks; | ||
230 | |||
231 | for (i = 0; i < num; i++) { | ||
232 | ks = lookup_symbol(name, arr[i].start, arr[i].stop); | ||
233 | if (!ks || !arr[i].check(gplok, warn, name)) | ||
234 | continue; | ||
235 | |||
236 | if (crc) | ||
237 | *crc = symversion(arr[i].crcs, ks - arr[i].start); | ||
238 | return ks; | ||
227 | } | 239 | } |
240 | return NULL; | ||
241 | } | ||
242 | |||
243 | /* Find a symbol, return value, (optional) crc and (optional) module | ||
244 | * which owns it */ | ||
245 | static unsigned long find_symbol(const char *name, | ||
246 | struct module **owner, | ||
247 | const unsigned long **crc, | ||
248 | bool gplok, | ||
249 | bool warn) | ||
250 | { | ||
251 | struct module *mod; | ||
252 | const struct kernel_symbol *ks; | ||
253 | const struct symsearch arr[] = { | ||
254 | { __start___ksymtab, __stop___ksymtab, __start___kcrctab, | ||
255 | always_ok }, | ||
256 | { __start___ksymtab_gpl, __stop___ksymtab_gpl, | ||
257 | __start___kcrctab_gpl, gpl_only }, | ||
258 | { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future, | ||
259 | __start___kcrctab_gpl_future, warn_if_not_gpl }, | ||
260 | { __start___ksymtab_unused, __stop___ksymtab_unused, | ||
261 | __start___kcrctab_unused, printk_unused_warning }, | ||
262 | { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl, | ||
263 | __start___kcrctab_unused_gpl, gpl_only_unused_warning }, | ||
264 | }; | ||
228 | 265 | ||
229 | if (gplok) | 266 | /* Core kernel first. */ |
230 | ks = lookup_symbol(name, __start___ksymtab_unused_gpl, | 267 | ks = search_symarrays(arr, ARRAY_SIZE(arr), name, gplok, warn, crc); |
231 | __stop___ksymtab_unused_gpl); | ||
232 | if (ks) { | 268 | if (ks) { |
233 | printk_unused_warning(name); | 269 | if (owner) |
234 | *crc = symversion(__start___kcrctab_unused_gpl, | 270 | *owner = NULL; |
235 | (ks - __start___ksymtab_unused_gpl)); | ||
236 | return ks->value; | 271 | return ks->value; |
237 | } | 272 | } |
238 | 273 | ||
239 | /* Now try modules. */ | 274 | /* Now try modules. */ |
240 | list_for_each_entry(mod, &modules, list) { | 275 | list_for_each_entry(mod, &modules, list) { |
241 | *owner = mod; | 276 | struct symsearch arr[] = { |
242 | ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); | 277 | { mod->syms, mod->syms + mod->num_syms, mod->crcs, |
243 | if (ks) { | 278 | always_ok }, |
244 | *crc = symversion(mod->crcs, (ks - mod->syms)); | 279 | { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms, |
245 | return ks->value; | 280 | mod->gpl_crcs, gpl_only }, |
246 | } | 281 | { mod->gpl_future_syms, |
247 | 282 | mod->gpl_future_syms + mod->num_gpl_future_syms, | |
248 | if (gplok) { | 283 | mod->gpl_future_crcs, warn_if_not_gpl }, |
249 | ks = lookup_symbol(name, mod->gpl_syms, | 284 | { mod->unused_syms, |
250 | mod->gpl_syms + mod->num_gpl_syms); | 285 | mod->unused_syms + mod->num_unused_syms, |
251 | if (ks) { | 286 | mod->unused_crcs, printk_unused_warning }, |
252 | *crc = symversion(mod->gpl_crcs, | 287 | { mod->unused_gpl_syms, |
253 | (ks - mod->gpl_syms)); | 288 | mod->unused_gpl_syms + mod->num_unused_gpl_syms, |
254 | return ks->value; | 289 | mod->unused_gpl_crcs, gpl_only_unused_warning }, |
255 | } | 290 | }; |
256 | } | 291 | |
257 | ks = lookup_symbol(name, mod->unused_syms, mod->unused_syms + mod->num_unused_syms); | 292 | ks = search_symarrays(arr, ARRAY_SIZE(arr), |
293 | name, gplok, warn, crc); | ||
258 | if (ks) { | 294 | if (ks) { |
259 | printk_unused_warning(name); | 295 | if (owner) |
260 | *crc = symversion(mod->unused_crcs, (ks - mod->unused_syms)); | 296 | *owner = mod; |
261 | return ks->value; | ||
262 | } | ||
263 | |||
264 | if (gplok) { | ||
265 | ks = lookup_symbol(name, mod->unused_gpl_syms, | ||
266 | mod->unused_gpl_syms + mod->num_unused_gpl_syms); | ||
267 | if (ks) { | ||
268 | printk_unused_warning(name); | ||
269 | *crc = symversion(mod->unused_gpl_crcs, | ||
270 | (ks - mod->unused_gpl_syms)); | ||
271 | return ks->value; | ||
272 | } | ||
273 | } | ||
274 | ks = lookup_symbol(name, mod->gpl_future_syms, | ||
275 | (mod->gpl_future_syms + | ||
276 | mod->num_gpl_future_syms)); | ||
277 | if (ks) { | ||
278 | if (!gplok) { | ||
279 | printk(KERN_WARNING "Symbol %s is being used " | ||
280 | "by a non-GPL module, which will not " | ||
281 | "be allowed in the future\n", name); | ||
282 | printk(KERN_WARNING "Please see the file " | ||
283 | "Documentation/feature-removal-schedule.txt " | ||
284 | "in the kernel source tree for more " | ||
285 | "details.\n"); | ||
286 | } | ||
287 | *crc = symversion(mod->gpl_future_crcs, | ||
288 | (ks - mod->gpl_future_syms)); | ||
289 | return ks->value; | 297 | return ks->value; |
290 | } | 298 | } |
291 | } | 299 | } |
300 | |||
292 | DEBUGP("Failed to find symbol %s\n", name); | 301 | DEBUGP("Failed to find symbol %s\n", name); |
293 | return -ENOENT; | 302 | return -ENOENT; |
294 | } | 303 | } |
@@ -736,12 +745,13 @@ sys_delete_module(const char __user *name_user, unsigned int flags) | |||
736 | if (!forced && module_refcount(mod) != 0) | 745 | if (!forced && module_refcount(mod) != 0) |
737 | wait_for_zero_refcount(mod); | 746 | wait_for_zero_refcount(mod); |
738 | 747 | ||
748 | mutex_unlock(&module_mutex); | ||
739 | /* Final destruction now noone is using it. */ | 749 | /* Final destruction now noone is using it. */ |
740 | if (mod->exit != NULL) { | 750 | if (mod->exit != NULL) |
741 | mutex_unlock(&module_mutex); | ||
742 | mod->exit(); | 751 | mod->exit(); |
743 | mutex_lock(&module_mutex); | 752 | blocking_notifier_call_chain(&module_notify_list, |
744 | } | 753 | MODULE_STATE_GOING, mod); |
754 | mutex_lock(&module_mutex); | ||
745 | /* Store the name of the last unloaded module for diagnostic purposes */ | 755 | /* Store the name of the last unloaded module for diagnostic purposes */ |
746 | strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); | 756 | strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); |
747 | free_module(mod); | 757 | free_module(mod); |
@@ -777,10 +787,9 @@ static void print_unload_info(struct seq_file *m, struct module *mod) | |||
777 | void __symbol_put(const char *symbol) | 787 | void __symbol_put(const char *symbol) |
778 | { | 788 | { |
779 | struct module *owner; | 789 | struct module *owner; |
780 | const unsigned long *crc; | ||
781 | 790 | ||
782 | preempt_disable(); | 791 | preempt_disable(); |
783 | if (IS_ERR_VALUE(__find_symbol(symbol, &owner, &crc, 1))) | 792 | if (IS_ERR_VALUE(find_symbol(symbol, &owner, NULL, true, false))) |
784 | BUG(); | 793 | BUG(); |
785 | module_put(owner); | 794 | module_put(owner); |
786 | preempt_enable(); | 795 | preempt_enable(); |
@@ -924,13 +933,10 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, | |||
924 | struct module *mod) | 933 | struct module *mod) |
925 | { | 934 | { |
926 | const unsigned long *crc; | 935 | const unsigned long *crc; |
927 | struct module *owner; | ||
928 | 936 | ||
929 | if (IS_ERR_VALUE(__find_symbol("struct_module", | 937 | if (IS_ERR_VALUE(find_symbol("struct_module", NULL, &crc, true, false))) |
930 | &owner, &crc, 1))) | ||
931 | BUG(); | 938 | BUG(); |
932 | return check_version(sechdrs, versindex, "struct_module", mod, | 939 | return check_version(sechdrs, versindex, "struct_module", mod, crc); |
933 | crc); | ||
934 | } | 940 | } |
935 | 941 | ||
936 | /* First part is kernel version, which we ignore. */ | 942 | /* First part is kernel version, which we ignore. */ |
@@ -974,8 +980,8 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, | |||
974 | unsigned long ret; | 980 | unsigned long ret; |
975 | const unsigned long *crc; | 981 | const unsigned long *crc; |
976 | 982 | ||
977 | ret = __find_symbol(name, &owner, &crc, | 983 | ret = find_symbol(name, &owner, &crc, |
978 | !(mod->taints & TAINT_PROPRIETARY_MODULE)); | 984 | !(mod->taints & TAINT_PROPRIETARY_MODULE), true); |
979 | if (!IS_ERR_VALUE(ret)) { | 985 | if (!IS_ERR_VALUE(ret)) { |
980 | /* use_module can fail due to OOM, | 986 | /* use_module can fail due to OOM, |
981 | or module initialization or unloading */ | 987 | or module initialization or unloading */ |
@@ -991,6 +997,20 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, | |||
991 | * J. Corbet <corbet@lwn.net> | 997 | * J. Corbet <corbet@lwn.net> |
992 | */ | 998 | */ |
993 | #if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) | 999 | #if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) |
1000 | struct module_sect_attr | ||
1001 | { | ||
1002 | struct module_attribute mattr; | ||
1003 | char *name; | ||
1004 | unsigned long address; | ||
1005 | }; | ||
1006 | |||
1007 | struct module_sect_attrs | ||
1008 | { | ||
1009 | struct attribute_group grp; | ||
1010 | unsigned int nsections; | ||
1011 | struct module_sect_attr attrs[0]; | ||
1012 | }; | ||
1013 | |||
994 | static ssize_t module_sect_show(struct module_attribute *mattr, | 1014 | static ssize_t module_sect_show(struct module_attribute *mattr, |
995 | struct module *mod, char *buf) | 1015 | struct module *mod, char *buf) |
996 | { | 1016 | { |
@@ -1001,7 +1021,7 @@ static ssize_t module_sect_show(struct module_attribute *mattr, | |||
1001 | 1021 | ||
1002 | static void free_sect_attrs(struct module_sect_attrs *sect_attrs) | 1022 | static void free_sect_attrs(struct module_sect_attrs *sect_attrs) |
1003 | { | 1023 | { |
1004 | int section; | 1024 | unsigned int section; |
1005 | 1025 | ||
1006 | for (section = 0; section < sect_attrs->nsections; section++) | 1026 | for (section = 0; section < sect_attrs->nsections; section++) |
1007 | kfree(sect_attrs->attrs[section].name); | 1027 | kfree(sect_attrs->attrs[section].name); |
@@ -1362,10 +1382,9 @@ void *__symbol_get(const char *symbol) | |||
1362 | { | 1382 | { |
1363 | struct module *owner; | 1383 | struct module *owner; |
1364 | unsigned long value; | 1384 | unsigned long value; |
1365 | const unsigned long *crc; | ||
1366 | 1385 | ||
1367 | preempt_disable(); | 1386 | preempt_disable(); |
1368 | value = __find_symbol(symbol, &owner, &crc, 1); | 1387 | value = find_symbol(symbol, &owner, NULL, true, true); |
1369 | if (IS_ERR_VALUE(value)) | 1388 | if (IS_ERR_VALUE(value)) |
1370 | value = 0; | 1389 | value = 0; |
1371 | else if (strong_try_module_get(owner)) | 1390 | else if (strong_try_module_get(owner)) |
@@ -1382,33 +1401,33 @@ EXPORT_SYMBOL_GPL(__symbol_get); | |||
1382 | */ | 1401 | */ |
1383 | static int verify_export_symbols(struct module *mod) | 1402 | static int verify_export_symbols(struct module *mod) |
1384 | { | 1403 | { |
1385 | const char *name = NULL; | 1404 | unsigned int i; |
1386 | unsigned long i, ret = 0; | ||
1387 | struct module *owner; | 1405 | struct module *owner; |
1388 | const unsigned long *crc; | 1406 | const struct kernel_symbol *s; |
1389 | 1407 | struct { | |
1390 | for (i = 0; i < mod->num_syms; i++) | 1408 | const struct kernel_symbol *sym; |
1391 | if (!IS_ERR_VALUE(__find_symbol(mod->syms[i].name, | 1409 | unsigned int num; |
1392 | &owner, &crc, 1))) { | 1410 | } arr[] = { |
1393 | name = mod->syms[i].name; | 1411 | { mod->syms, mod->num_syms }, |
1394 | ret = -ENOEXEC; | 1412 | { mod->gpl_syms, mod->num_gpl_syms }, |
1395 | goto dup; | 1413 | { mod->gpl_future_syms, mod->num_gpl_future_syms }, |
1396 | } | 1414 | { mod->unused_syms, mod->num_unused_syms }, |
1415 | { mod->unused_gpl_syms, mod->num_unused_gpl_syms }, | ||
1416 | }; | ||
1397 | 1417 | ||
1398 | for (i = 0; i < mod->num_gpl_syms; i++) | 1418 | for (i = 0; i < ARRAY_SIZE(arr); i++) { |
1399 | if (!IS_ERR_VALUE(__find_symbol(mod->gpl_syms[i].name, | 1419 | for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { |
1400 | &owner, &crc, 1))) { | 1420 | if (!IS_ERR_VALUE(find_symbol(s->name, &owner, |
1401 | name = mod->gpl_syms[i].name; | 1421 | NULL, true, false))) { |
1402 | ret = -ENOEXEC; | 1422 | printk(KERN_ERR |
1403 | goto dup; | 1423 | "%s: exports duplicate symbol %s" |
1424 | " (owned by %s)\n", | ||
1425 | mod->name, s->name, module_name(owner)); | ||
1426 | return -ENOEXEC; | ||
1427 | } | ||
1404 | } | 1428 | } |
1405 | 1429 | } | |
1406 | dup: | 1430 | return 0; |
1407 | if (ret) | ||
1408 | printk(KERN_ERR "%s: exports duplicate symbol %s (owned by %s)\n", | ||
1409 | mod->name, name, module_name(owner)); | ||
1410 | |||
1411 | return ret; | ||
1412 | } | 1431 | } |
1413 | 1432 | ||
1414 | /* Change all symbols so that st_value encodes the pointer directly. */ | 1433 | /* Change all symbols so that st_value encodes the pointer directly. */ |
@@ -1814,8 +1833,9 @@ static struct module *load_module(void __user *umod, | |||
1814 | unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); | 1833 | unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); |
1815 | #endif | 1834 | #endif |
1816 | 1835 | ||
1817 | /* Don't keep modinfo section */ | 1836 | /* Don't keep modinfo and version sections. */ |
1818 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1837 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
1838 | sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | ||
1819 | #ifdef CONFIG_KALLSYMS | 1839 | #ifdef CONFIG_KALLSYMS |
1820 | /* Keep symbol and string tables for decoding later. */ | 1840 | /* Keep symbol and string tables for decoding later. */ |
1821 | sechdrs[symindex].sh_flags |= SHF_ALLOC; | 1841 | sechdrs[symindex].sh_flags |= SHF_ALLOC; |
@@ -1977,7 +1997,8 @@ static struct module *load_module(void __user *umod, | |||
1977 | mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; | 1997 | mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; |
1978 | mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; | 1998 | mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; |
1979 | if (unusedgplcrcindex) | 1999 | if (unusedgplcrcindex) |
1980 | mod->unused_crcs = (void *)sechdrs[unusedgplcrcindex].sh_addr; | 2000 | mod->unused_gpl_crcs |
2001 | = (void *)sechdrs[unusedgplcrcindex].sh_addr; | ||
1981 | 2002 | ||
1982 | #ifdef CONFIG_MODVERSIONS | 2003 | #ifdef CONFIG_MODVERSIONS |
1983 | if ((mod->num_syms && !crcindex) || | 2004 | if ((mod->num_syms && !crcindex) || |
@@ -2171,6 +2192,8 @@ sys_init_module(void __user *umod, | |||
2171 | mod->state = MODULE_STATE_GOING; | 2192 | mod->state = MODULE_STATE_GOING; |
2172 | synchronize_sched(); | 2193 | synchronize_sched(); |
2173 | module_put(mod); | 2194 | module_put(mod); |
2195 | blocking_notifier_call_chain(&module_notify_list, | ||
2196 | MODULE_STATE_GOING, mod); | ||
2174 | mutex_lock(&module_mutex); | 2197 | mutex_lock(&module_mutex); |
2175 | free_module(mod); | 2198 | free_module(mod); |
2176 | mutex_unlock(&module_mutex); | 2199 | mutex_unlock(&module_mutex); |
diff --git a/kernel/notifier.c b/kernel/notifier.c index 643360d1bb14..823be11584ef 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c | |||
@@ -31,6 +31,21 @@ static int notifier_chain_register(struct notifier_block **nl, | |||
31 | return 0; | 31 | return 0; |
32 | } | 32 | } |
33 | 33 | ||
34 | static int notifier_chain_cond_register(struct notifier_block **nl, | ||
35 | struct notifier_block *n) | ||
36 | { | ||
37 | while ((*nl) != NULL) { | ||
38 | if ((*nl) == n) | ||
39 | return 0; | ||
40 | if (n->priority > (*nl)->priority) | ||
41 | break; | ||
42 | nl = &((*nl)->next); | ||
43 | } | ||
44 | n->next = *nl; | ||
45 | rcu_assign_pointer(*nl, n); | ||
46 | return 0; | ||
47 | } | ||
48 | |||
34 | static int notifier_chain_unregister(struct notifier_block **nl, | 49 | static int notifier_chain_unregister(struct notifier_block **nl, |
35 | struct notifier_block *n) | 50 | struct notifier_block *n) |
36 | { | 51 | { |
@@ -205,6 +220,29 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, | |||
205 | EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); | 220 | EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); |
206 | 221 | ||
207 | /** | 222 | /** |
223 | * blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain | ||
224 | * @nh: Pointer to head of the blocking notifier chain | ||
225 | * @n: New entry in notifier chain | ||
226 | * | ||
227 | * Adds a notifier to a blocking notifier chain, only if not already | ||
228 | * present in the chain. | ||
229 | * Must be called in process context. | ||
230 | * | ||
231 | * Currently always returns zero. | ||
232 | */ | ||
233 | int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh, | ||
234 | struct notifier_block *n) | ||
235 | { | ||
236 | int ret; | ||
237 | |||
238 | down_write(&nh->rwsem); | ||
239 | ret = notifier_chain_cond_register(&nh->head, n); | ||
240 | up_write(&nh->rwsem); | ||
241 | return ret; | ||
242 | } | ||
243 | EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register); | ||
244 | |||
245 | /** | ||
208 | * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain | 246 | * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain |
209 | * @nh: Pointer to head of the blocking notifier chain | 247 | * @nh: Pointer to head of the blocking notifier chain |
210 | * @n: Entry to remove from notifier chain | 248 | * @n: Entry to remove from notifier chain |
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index aead4d69f62b..48d7ed6fc3a4 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/cgroup.h> | 8 | #include <linux/cgroup.h> |
9 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
10 | #include <linux/slab.h> | ||
11 | #include <linux/nsproxy.h> | ||
10 | 12 | ||
11 | struct ns_cgroup { | 13 | struct ns_cgroup { |
12 | struct cgroup_subsys_state css; | 14 | struct cgroup_subsys_state css; |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index f5d332cf8c63..adc785146a1c 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -139,6 +139,18 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) | |||
139 | goto out; | 139 | goto out; |
140 | } | 140 | } |
141 | 141 | ||
142 | /* | ||
143 | * CLONE_NEWIPC must detach from the undolist: after switching | ||
144 | * to a new ipc namespace, the semaphore arrays from the old | ||
145 | * namespace are unreachable. In clone parlance, CLONE_SYSVSEM | ||
146 | * means share undolist with parent, so we must forbid using | ||
147 | * it along with CLONE_NEWIPC. | ||
148 | */ | ||
149 | if ((flags & CLONE_NEWIPC) && (flags & CLONE_SYSVSEM)) { | ||
150 | err = -EINVAL; | ||
151 | goto out; | ||
152 | } | ||
153 | |||
142 | new_ns = create_new_namespaces(flags, tsk, tsk->fs); | 154 | new_ns = create_new_namespaces(flags, tsk, tsk->fs); |
143 | if (IS_ERR(new_ns)) { | 155 | if (IS_ERR(new_ns)) { |
144 | err = PTR_ERR(new_ns); | 156 | err = PTR_ERR(new_ns); |
diff --git a/kernel/panic.c b/kernel/panic.c index 24af9f8bac99..425567f45b9f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -153,6 +153,8 @@ EXPORT_SYMBOL(panic); | |||
153 | * 'M' - System experienced a machine check exception. | 153 | * 'M' - System experienced a machine check exception. |
154 | * 'B' - System has hit bad_page. | 154 | * 'B' - System has hit bad_page. |
155 | * 'U' - Userspace-defined naughtiness. | 155 | * 'U' - Userspace-defined naughtiness. |
156 | * 'A' - ACPI table overridden. | ||
157 | * 'W' - Taint on warning. | ||
156 | * | 158 | * |
157 | * The string is overwritten by the next call to print_taint(). | 159 | * The string is overwritten by the next call to print_taint(). |
158 | */ | 160 | */ |
@@ -161,7 +163,7 @@ const char *print_tainted(void) | |||
161 | { | 163 | { |
162 | static char buf[20]; | 164 | static char buf[20]; |
163 | if (tainted) { | 165 | if (tainted) { |
164 | snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c", | 166 | snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c%c", |
165 | tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', | 167 | tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', |
166 | tainted & TAINT_FORCED_MODULE ? 'F' : ' ', | 168 | tainted & TAINT_FORCED_MODULE ? 'F' : ' ', |
167 | tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', | 169 | tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', |
@@ -170,7 +172,8 @@ const char *print_tainted(void) | |||
170 | tainted & TAINT_BAD_PAGE ? 'B' : ' ', | 172 | tainted & TAINT_BAD_PAGE ? 'B' : ' ', |
171 | tainted & TAINT_USER ? 'U' : ' ', | 173 | tainted & TAINT_USER ? 'U' : ' ', |
172 | tainted & TAINT_DIE ? 'D' : ' ', | 174 | tainted & TAINT_DIE ? 'D' : ' ', |
173 | tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' '); | 175 | tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' ', |
176 | tainted & TAINT_WARN ? 'W' : ' '); | ||
174 | } | 177 | } |
175 | else | 178 | else |
176 | snprintf(buf, sizeof(buf), "Not tainted"); | 179 | snprintf(buf, sizeof(buf), "Not tainted"); |
@@ -312,6 +315,7 @@ void warn_on_slowpath(const char *file, int line) | |||
312 | print_modules(); | 315 | print_modules(); |
313 | dump_stack(); | 316 | dump_stack(); |
314 | print_oops_end_marker(); | 317 | print_oops_end_marker(); |
318 | add_taint(TAINT_WARN); | ||
315 | } | 319 | } |
316 | EXPORT_SYMBOL(warn_on_slowpath); | 320 | EXPORT_SYMBOL(warn_on_slowpath); |
317 | #endif | 321 | #endif |
diff --git a/kernel/pid.c b/kernel/pid.c index 477691576b33..20d59fa2d493 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -111,10 +111,11 @@ EXPORT_SYMBOL(is_container_init); | |||
111 | 111 | ||
112 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); | 112 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); |
113 | 113 | ||
114 | static void free_pidmap(struct pid_namespace *pid_ns, int pid) | 114 | static void free_pidmap(struct upid *upid) |
115 | { | 115 | { |
116 | struct pidmap *map = pid_ns->pidmap + pid / BITS_PER_PAGE; | 116 | int nr = upid->nr; |
117 | int offset = pid & BITS_PER_PAGE_MASK; | 117 | struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE; |
118 | int offset = nr & BITS_PER_PAGE_MASK; | ||
118 | 119 | ||
119 | clear_bit(offset, map->page); | 120 | clear_bit(offset, map->page); |
120 | atomic_inc(&map->nr_free); | 121 | atomic_inc(&map->nr_free); |
@@ -232,7 +233,7 @@ void free_pid(struct pid *pid) | |||
232 | spin_unlock_irqrestore(&pidmap_lock, flags); | 233 | spin_unlock_irqrestore(&pidmap_lock, flags); |
233 | 234 | ||
234 | for (i = 0; i <= pid->level; i++) | 235 | for (i = 0; i <= pid->level; i++) |
235 | free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr); | 236 | free_pidmap(pid->numbers + i); |
236 | 237 | ||
237 | call_rcu(&pid->rcu, delayed_put_pid); | 238 | call_rcu(&pid->rcu, delayed_put_pid); |
238 | } | 239 | } |
@@ -278,8 +279,8 @@ out: | |||
278 | return pid; | 279 | return pid; |
279 | 280 | ||
280 | out_free: | 281 | out_free: |
281 | for (i++; i <= ns->level; i++) | 282 | while (++i <= ns->level) |
282 | free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr); | 283 | free_pidmap(pid->numbers + i); |
283 | 284 | ||
284 | kmem_cache_free(ns->pid_cachep, pid); | 285 | kmem_cache_free(ns->pid_cachep, pid); |
285 | pid = NULL; | 286 | pid = NULL; |
@@ -316,7 +317,7 @@ EXPORT_SYMBOL_GPL(find_pid); | |||
316 | /* | 317 | /* |
317 | * attach_pid() must be called with the tasklist_lock write-held. | 318 | * attach_pid() must be called with the tasklist_lock write-held. |
318 | */ | 319 | */ |
319 | int attach_pid(struct task_struct *task, enum pid_type type, | 320 | void attach_pid(struct task_struct *task, enum pid_type type, |
320 | struct pid *pid) | 321 | struct pid *pid) |
321 | { | 322 | { |
322 | struct pid_link *link; | 323 | struct pid_link *link; |
@@ -324,11 +325,10 @@ int attach_pid(struct task_struct *task, enum pid_type type, | |||
324 | link = &task->pids[type]; | 325 | link = &task->pids[type]; |
325 | link->pid = pid; | 326 | link->pid = pid; |
326 | hlist_add_head_rcu(&link->node, &pid->tasks[type]); | 327 | hlist_add_head_rcu(&link->node, &pid->tasks[type]); |
327 | |||
328 | return 0; | ||
329 | } | 328 | } |
330 | 329 | ||
331 | void detach_pid(struct task_struct *task, enum pid_type type) | 330 | static void __change_pid(struct task_struct *task, enum pid_type type, |
331 | struct pid *new) | ||
332 | { | 332 | { |
333 | struct pid_link *link; | 333 | struct pid_link *link; |
334 | struct pid *pid; | 334 | struct pid *pid; |
@@ -338,7 +338,7 @@ void detach_pid(struct task_struct *task, enum pid_type type) | |||
338 | pid = link->pid; | 338 | pid = link->pid; |
339 | 339 | ||
340 | hlist_del_rcu(&link->node); | 340 | hlist_del_rcu(&link->node); |
341 | link->pid = NULL; | 341 | link->pid = new; |
342 | 342 | ||
343 | for (tmp = PIDTYPE_MAX; --tmp >= 0; ) | 343 | for (tmp = PIDTYPE_MAX; --tmp >= 0; ) |
344 | if (!hlist_empty(&pid->tasks[tmp])) | 344 | if (!hlist_empty(&pid->tasks[tmp])) |
@@ -347,13 +347,24 @@ void detach_pid(struct task_struct *task, enum pid_type type) | |||
347 | free_pid(pid); | 347 | free_pid(pid); |
348 | } | 348 | } |
349 | 349 | ||
350 | void detach_pid(struct task_struct *task, enum pid_type type) | ||
351 | { | ||
352 | __change_pid(task, type, NULL); | ||
353 | } | ||
354 | |||
355 | void change_pid(struct task_struct *task, enum pid_type type, | ||
356 | struct pid *pid) | ||
357 | { | ||
358 | __change_pid(task, type, pid); | ||
359 | attach_pid(task, type, pid); | ||
360 | } | ||
361 | |||
350 | /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ | 362 | /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ |
351 | void transfer_pid(struct task_struct *old, struct task_struct *new, | 363 | void transfer_pid(struct task_struct *old, struct task_struct *new, |
352 | enum pid_type type) | 364 | enum pid_type type) |
353 | { | 365 | { |
354 | new->pids[type].pid = old->pids[type].pid; | 366 | new->pids[type].pid = old->pids[type].pid; |
355 | hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node); | 367 | hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node); |
356 | old->pids[type].pid = NULL; | ||
357 | } | 368 | } |
358 | 369 | ||
359 | struct task_struct *pid_task(struct pid *pid, enum pid_type type) | 370 | struct task_struct *pid_task(struct pid *pid, enum pid_type type) |
@@ -380,12 +391,6 @@ struct task_struct *find_task_by_pid_type_ns(int type, int nr, | |||
380 | 391 | ||
381 | EXPORT_SYMBOL(find_task_by_pid_type_ns); | 392 | EXPORT_SYMBOL(find_task_by_pid_type_ns); |
382 | 393 | ||
383 | struct task_struct *find_task_by_pid(pid_t nr) | ||
384 | { | ||
385 | return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns); | ||
386 | } | ||
387 | EXPORT_SYMBOL(find_task_by_pid); | ||
388 | |||
389 | struct task_struct *find_task_by_vpid(pid_t vnr) | 394 | struct task_struct *find_task_by_vpid(pid_t vnr) |
390 | { | 395 | { |
391 | return find_task_by_pid_type_ns(PIDTYPE_PID, vnr, | 396 | return find_task_by_pid_type_ns(PIDTYPE_PID, vnr, |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 5ca37fa50beb..98702b4b8851 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -66,7 +66,7 @@ err_alloc: | |||
66 | return NULL; | 66 | return NULL; |
67 | } | 67 | } |
68 | 68 | ||
69 | static struct pid_namespace *create_pid_namespace(int level) | 69 | static struct pid_namespace *create_pid_namespace(unsigned int level) |
70 | { | 70 | { |
71 | struct pid_namespace *ns; | 71 | struct pid_namespace *ns; |
72 | int i; | 72 | int i; |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index ae5c6c147c4b..f1525ad06cb3 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -4,8 +4,9 @@ | |||
4 | 4 | ||
5 | #include <linux/sched.h> | 5 | #include <linux/sched.h> |
6 | #include <linux/posix-timers.h> | 6 | #include <linux/posix-timers.h> |
7 | #include <asm/uaccess.h> | ||
8 | #include <linux/errno.h> | 7 | #include <linux/errno.h> |
8 | #include <linux/math64.h> | ||
9 | #include <asm/uaccess.h> | ||
9 | 10 | ||
10 | static int check_clock(const clockid_t which_clock) | 11 | static int check_clock(const clockid_t which_clock) |
11 | { | 12 | { |
@@ -47,12 +48,10 @@ static void sample_to_timespec(const clockid_t which_clock, | |||
47 | union cpu_time_count cpu, | 48 | union cpu_time_count cpu, |
48 | struct timespec *tp) | 49 | struct timespec *tp) |
49 | { | 50 | { |
50 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | 51 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) |
51 | tp->tv_sec = div_long_long_rem(cpu.sched, | 52 | *tp = ns_to_timespec(cpu.sched); |
52 | NSEC_PER_SEC, &tp->tv_nsec); | 53 | else |
53 | } else { | ||
54 | cputime_to_timespec(cpu.cpu, tp); | 54 | cputime_to_timespec(cpu.cpu, tp); |
55 | } | ||
56 | } | 55 | } |
57 | 56 | ||
58 | static inline int cpu_time_before(const clockid_t which_clock, | 57 | static inline int cpu_time_before(const clockid_t which_clock, |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 8476956ffd92..dbd8398ddb0b 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -310,8 +310,7 @@ int posix_timer_event(struct k_itimer *timr,int si_private) | |||
310 | 310 | ||
311 | if (timr->it_sigev_notify & SIGEV_THREAD_ID) { | 311 | if (timr->it_sigev_notify & SIGEV_THREAD_ID) { |
312 | struct task_struct *leader; | 312 | struct task_struct *leader; |
313 | int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq, | 313 | int ret = send_sigqueue(timr->sigq, timr->it_process, 0); |
314 | timr->it_process); | ||
315 | 314 | ||
316 | if (likely(ret >= 0)) | 315 | if (likely(ret >= 0)) |
317 | return ret; | 316 | return ret; |
@@ -322,8 +321,7 @@ int posix_timer_event(struct k_itimer *timr,int si_private) | |||
322 | timr->it_process = leader; | 321 | timr->it_process = leader; |
323 | } | 322 | } |
324 | 323 | ||
325 | return send_group_sigqueue(timr->it_sigev_signo, timr->sigq, | 324 | return send_sigqueue(timr->sigq, timr->it_process, 1); |
326 | timr->it_process); | ||
327 | } | 325 | } |
328 | EXPORT_SYMBOL_GPL(posix_timer_event); | 326 | EXPORT_SYMBOL_GPL(posix_timer_event); |
329 | 327 | ||
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 6233f3b4ae66..b45da40e8d25 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -19,16 +19,6 @@ config PM | |||
19 | will issue the hlt instruction if nothing is to be done, thereby | 19 | will issue the hlt instruction if nothing is to be done, thereby |
20 | sending the processor to sleep and saving power. | 20 | sending the processor to sleep and saving power. |
21 | 21 | ||
22 | config PM_LEGACY | ||
23 | bool "Legacy Power Management API (DEPRECATED)" | ||
24 | depends on PM | ||
25 | default n | ||
26 | ---help--- | ||
27 | Support for pm_register() and friends. This old API is obsoleted | ||
28 | by the driver model. | ||
29 | |||
30 | If unsure, say N. | ||
31 | |||
32 | config PM_DEBUG | 22 | config PM_DEBUG |
33 | bool "Power Management Debug Support" | 23 | bool "Power Management Debug Support" |
34 | depends on PM | 24 | depends on PM |
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index f7dfff28ecdb..597823b5b700 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile | |||
@@ -4,7 +4,6 @@ EXTRA_CFLAGS += -DDEBUG | |||
4 | endif | 4 | endif |
5 | 5 | ||
6 | obj-y := main.o | 6 | obj-y := main.o |
7 | obj-$(CONFIG_PM_LEGACY) += pm.o | ||
8 | obj-$(CONFIG_PM_SLEEP) += process.o console.o | 7 | obj-$(CONFIG_PM_SLEEP) += process.o console.o |
9 | obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o | 8 | obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o |
10 | 9 | ||
diff --git a/kernel/power/pm.c b/kernel/power/pm.c deleted file mode 100644 index 60c73fa670d5..000000000000 --- a/kernel/power/pm.c +++ /dev/null | |||
@@ -1,205 +0,0 @@ | |||
1 | /* | ||
2 | * pm.c - Power management interface | ||
3 | * | ||
4 | * Copyright (C) 2000 Andrew Henroid | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
19 | */ | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/spinlock.h> | ||
23 | #include <linux/mm.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/pm.h> | ||
26 | #include <linux/pm_legacy.h> | ||
27 | #include <linux/interrupt.h> | ||
28 | #include <linux/mutex.h> | ||
29 | |||
30 | /* | ||
31 | * Locking notes: | ||
32 | * pm_devs_lock can be a semaphore providing pm ops are not called | ||
33 | * from an interrupt handler (already a bad idea so no change here). Each | ||
34 | * change must be protected so that an unlink of an entry doesn't clash | ||
35 | * with a pm send - which is permitted to sleep in the current architecture | ||
36 | * | ||
37 | * Module unloads clashing with pm events now work out safely, the module | ||
38 | * unload path will block until the event has been sent. It may well block | ||
39 | * until a resume but that will be fine. | ||
40 | */ | ||
41 | |||
42 | static DEFINE_MUTEX(pm_devs_lock); | ||
43 | static LIST_HEAD(pm_devs); | ||
44 | |||
45 | /** | ||
46 | * pm_register - register a device with power management | ||
47 | * @type: device type | ||
48 | * @id: device ID | ||
49 | * @callback: callback function | ||
50 | * | ||
51 | * Add a device to the list of devices that wish to be notified about | ||
52 | * power management events. A &pm_dev structure is returned on success, | ||
53 | * on failure the return is %NULL. | ||
54 | * | ||
55 | * The callback function will be called in process context and | ||
56 | * it may sleep. | ||
57 | */ | ||
58 | |||
59 | struct pm_dev *pm_register(pm_dev_t type, | ||
60 | unsigned long id, | ||
61 | pm_callback callback) | ||
62 | { | ||
63 | struct pm_dev *dev = kzalloc(sizeof(struct pm_dev), GFP_KERNEL); | ||
64 | if (dev) { | ||
65 | dev->type = type; | ||
66 | dev->id = id; | ||
67 | dev->callback = callback; | ||
68 | |||
69 | mutex_lock(&pm_devs_lock); | ||
70 | list_add(&dev->entry, &pm_devs); | ||
71 | mutex_unlock(&pm_devs_lock); | ||
72 | } | ||
73 | return dev; | ||
74 | } | ||
75 | |||
76 | /** | ||
77 | * pm_send - send request to a single device | ||
78 | * @dev: device to send to | ||
79 | * @rqst: power management request | ||
80 | * @data: data for the callback | ||
81 | * | ||
82 | * Issue a power management request to a given device. The | ||
83 | * %PM_SUSPEND and %PM_RESUME events are handled specially. The | ||
84 | * data field must hold the intended next state. No call is made | ||
85 | * if the state matches. | ||
86 | * | ||
87 | * BUGS: what stops two power management requests occurring in parallel | ||
88 | * and conflicting. | ||
89 | * | ||
90 | * WARNING: Calling pm_send directly is not generally recommended, in | ||
91 | * particular there is no locking against the pm_dev going away. The | ||
92 | * caller must maintain all needed locking or have 'inside knowledge' | ||
93 | * on the safety. Also remember that this function is not locked against | ||
94 | * pm_unregister. This means that you must handle SMP races on callback | ||
95 | * execution and unload yourself. | ||
96 | */ | ||
97 | |||
98 | static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data) | ||
99 | { | ||
100 | int status = 0; | ||
101 | unsigned long prev_state, next_state; | ||
102 | |||
103 | if (in_interrupt()) | ||
104 | BUG(); | ||
105 | |||
106 | switch (rqst) { | ||
107 | case PM_SUSPEND: | ||
108 | case PM_RESUME: | ||
109 | prev_state = dev->state; | ||
110 | next_state = (unsigned long) data; | ||
111 | if (prev_state != next_state) { | ||
112 | if (dev->callback) | ||
113 | status = (*dev->callback)(dev, rqst, data); | ||
114 | if (!status) { | ||
115 | dev->state = next_state; | ||
116 | dev->prev_state = prev_state; | ||
117 | } | ||
118 | } | ||
119 | else { | ||
120 | dev->prev_state = prev_state; | ||
121 | } | ||
122 | break; | ||
123 | default: | ||
124 | if (dev->callback) | ||
125 | status = (*dev->callback)(dev, rqst, data); | ||
126 | break; | ||
127 | } | ||
128 | return status; | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * Undo incomplete request | ||
133 | */ | ||
134 | static void pm_undo_all(struct pm_dev *last) | ||
135 | { | ||
136 | struct list_head *entry = last->entry.prev; | ||
137 | while (entry != &pm_devs) { | ||
138 | struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); | ||
139 | if (dev->state != dev->prev_state) { | ||
140 | /* previous state was zero (running) resume or | ||
141 | * previous state was non-zero (suspended) suspend | ||
142 | */ | ||
143 | pm_request_t undo = (dev->prev_state | ||
144 | ? PM_SUSPEND:PM_RESUME); | ||
145 | pm_send(dev, undo, (void*) dev->prev_state); | ||
146 | } | ||
147 | entry = entry->prev; | ||
148 | } | ||
149 | } | ||
150 | |||
151 | /** | ||
152 | * pm_send_all - send request to all managed devices | ||
153 | * @rqst: power management request | ||
154 | * @data: data for the callback | ||
155 | * | ||
156 | * Issue a power management request to a all devices. The | ||
157 | * %PM_SUSPEND events are handled specially. Any device is | ||
158 | * permitted to fail a suspend by returning a non zero (error) | ||
159 | * value from its callback function. If any device vetoes a | ||
160 | * suspend request then all other devices that have suspended | ||
161 | * during the processing of this request are restored to their | ||
162 | * previous state. | ||
163 | * | ||
164 | * WARNING: This function takes the pm_devs_lock. The lock is not dropped until | ||
165 | * the callbacks have completed. This prevents races against pm locking | ||
166 | * functions, races against module unload pm_unregister code. It does | ||
167 | * mean however that you must not issue pm_ functions within the callback | ||
168 | * or you will deadlock and users will hate you. | ||
169 | * | ||
170 | * Zero is returned on success. If a suspend fails then the status | ||
171 | * from the device that vetoes the suspend is returned. | ||
172 | * | ||
173 | * BUGS: what stops two power management requests occurring in parallel | ||
174 | * and conflicting. | ||
175 | */ | ||
176 | |||
177 | int pm_send_all(pm_request_t rqst, void *data) | ||
178 | { | ||
179 | struct list_head *entry; | ||
180 | |||
181 | mutex_lock(&pm_devs_lock); | ||
182 | entry = pm_devs.next; | ||
183 | while (entry != &pm_devs) { | ||
184 | struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); | ||
185 | if (dev->callback) { | ||
186 | int status = pm_send(dev, rqst, data); | ||
187 | if (status) { | ||
188 | /* return devices to previous state on | ||
189 | * failed suspend request | ||
190 | */ | ||
191 | if (rqst == PM_SUSPEND) | ||
192 | pm_undo_all(dev); | ||
193 | mutex_unlock(&pm_devs_lock); | ||
194 | return status; | ||
195 | } | ||
196 | } | ||
197 | entry = entry->next; | ||
198 | } | ||
199 | mutex_unlock(&pm_devs_lock); | ||
200 | return 0; | ||
201 | } | ||
202 | |||
203 | EXPORT_SYMBOL(pm_register); | ||
204 | EXPORT_SYMBOL(pm_send_all); | ||
205 | |||
diff --git a/kernel/printk.c b/kernel/printk.c index bdd4ea8c3f2b..8fb01c32aa3b 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -111,6 +111,9 @@ struct console_cmdline | |||
111 | char name[8]; /* Name of the driver */ | 111 | char name[8]; /* Name of the driver */ |
112 | int index; /* Minor dev. to use */ | 112 | int index; /* Minor dev. to use */ |
113 | char *options; /* Options for the driver */ | 113 | char *options; /* Options for the driver */ |
114 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE | ||
115 | char *brl_options; /* Options for braille driver */ | ||
116 | #endif | ||
114 | }; | 117 | }; |
115 | 118 | ||
116 | #define MAX_CMDLINECONSOLES 8 | 119 | #define MAX_CMDLINECONSOLES 8 |
@@ -808,15 +811,60 @@ static void call_console_drivers(unsigned start, unsigned end) | |||
808 | 811 | ||
809 | #endif | 812 | #endif |
810 | 813 | ||
814 | static int __add_preferred_console(char *name, int idx, char *options, | ||
815 | char *brl_options) | ||
816 | { | ||
817 | struct console_cmdline *c; | ||
818 | int i; | ||
819 | |||
820 | /* | ||
821 | * See if this tty is not yet registered, and | ||
822 | * if we have a slot free. | ||
823 | */ | ||
824 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) | ||
825 | if (strcmp(console_cmdline[i].name, name) == 0 && | ||
826 | console_cmdline[i].index == idx) { | ||
827 | if (!brl_options) | ||
828 | selected_console = i; | ||
829 | return 0; | ||
830 | } | ||
831 | if (i == MAX_CMDLINECONSOLES) | ||
832 | return -E2BIG; | ||
833 | if (!brl_options) | ||
834 | selected_console = i; | ||
835 | c = &console_cmdline[i]; | ||
836 | strlcpy(c->name, name, sizeof(c->name)); | ||
837 | c->options = options; | ||
838 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE | ||
839 | c->brl_options = brl_options; | ||
840 | #endif | ||
841 | c->index = idx; | ||
842 | return 0; | ||
843 | } | ||
811 | /* | 844 | /* |
812 | * Set up a list of consoles. Called from init/main.c | 845 | * Set up a list of consoles. Called from init/main.c |
813 | */ | 846 | */ |
814 | static int __init console_setup(char *str) | 847 | static int __init console_setup(char *str) |
815 | { | 848 | { |
816 | char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */ | 849 | char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */ |
817 | char *s, *options; | 850 | char *s, *options, *brl_options = NULL; |
818 | int idx; | 851 | int idx; |
819 | 852 | ||
853 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE | ||
854 | if (!memcmp(str, "brl,", 4)) { | ||
855 | brl_options = ""; | ||
856 | str += 4; | ||
857 | } else if (!memcmp(str, "brl=", 4)) { | ||
858 | brl_options = str + 4; | ||
859 | str = strchr(brl_options, ','); | ||
860 | if (!str) { | ||
861 | printk(KERN_ERR "need port name after brl=\n"); | ||
862 | return 1; | ||
863 | } | ||
864 | *(str++) = 0; | ||
865 | } | ||
866 | #endif | ||
867 | |||
820 | /* | 868 | /* |
821 | * Decode str into name, index, options. | 869 | * Decode str into name, index, options. |
822 | */ | 870 | */ |
@@ -841,7 +889,7 @@ static int __init console_setup(char *str) | |||
841 | idx = simple_strtoul(s, NULL, 10); | 889 | idx = simple_strtoul(s, NULL, 10); |
842 | *s = 0; | 890 | *s = 0; |
843 | 891 | ||
844 | add_preferred_console(buf, idx, options); | 892 | __add_preferred_console(buf, idx, options, brl_options); |
845 | return 1; | 893 | return 1; |
846 | } | 894 | } |
847 | __setup("console=", console_setup); | 895 | __setup("console=", console_setup); |
@@ -861,28 +909,7 @@ __setup("console=", console_setup); | |||
861 | */ | 909 | */ |
862 | int add_preferred_console(char *name, int idx, char *options) | 910 | int add_preferred_console(char *name, int idx, char *options) |
863 | { | 911 | { |
864 | struct console_cmdline *c; | 912 | return __add_preferred_console(name, idx, options, NULL); |
865 | int i; | ||
866 | |||
867 | /* | ||
868 | * See if this tty is not yet registered, and | ||
869 | * if we have a slot free. | ||
870 | */ | ||
871 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) | ||
872 | if (strcmp(console_cmdline[i].name, name) == 0 && | ||
873 | console_cmdline[i].index == idx) { | ||
874 | selected_console = i; | ||
875 | return 0; | ||
876 | } | ||
877 | if (i == MAX_CMDLINECONSOLES) | ||
878 | return -E2BIG; | ||
879 | selected_console = i; | ||
880 | c = &console_cmdline[i]; | ||
881 | memcpy(c->name, name, sizeof(c->name)); | ||
882 | c->name[sizeof(c->name) - 1] = 0; | ||
883 | c->options = options; | ||
884 | c->index = idx; | ||
885 | return 0; | ||
886 | } | 913 | } |
887 | 914 | ||
888 | int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) | 915 | int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) |
@@ -894,7 +921,7 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha | |||
894 | if (strcmp(console_cmdline[i].name, name) == 0 && | 921 | if (strcmp(console_cmdline[i].name, name) == 0 && |
895 | console_cmdline[i].index == idx) { | 922 | console_cmdline[i].index == idx) { |
896 | c = &console_cmdline[i]; | 923 | c = &console_cmdline[i]; |
897 | memcpy(c->name, name_new, sizeof(c->name)); | 924 | strlcpy(c->name, name_new, sizeof(c->name)); |
898 | c->name[sizeof(c->name) - 1] = 0; | 925 | c->name[sizeof(c->name) - 1] = 0; |
899 | c->options = options; | 926 | c->options = options; |
900 | c->index = idx_new; | 927 | c->index = idx_new; |
@@ -1163,6 +1190,16 @@ void register_console(struct console *console) | |||
1163 | continue; | 1190 | continue; |
1164 | if (console->index < 0) | 1191 | if (console->index < 0) |
1165 | console->index = console_cmdline[i].index; | 1192 | console->index = console_cmdline[i].index; |
1193 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE | ||
1194 | if (console_cmdline[i].brl_options) { | ||
1195 | console->flags |= CON_BRL; | ||
1196 | braille_register_console(console, | ||
1197 | console_cmdline[i].index, | ||
1198 | console_cmdline[i].options, | ||
1199 | console_cmdline[i].brl_options); | ||
1200 | return; | ||
1201 | } | ||
1202 | #endif | ||
1166 | if (console->setup && | 1203 | if (console->setup && |
1167 | console->setup(console, console_cmdline[i].options) != 0) | 1204 | console->setup(console, console_cmdline[i].options) != 0) |
1168 | break; | 1205 | break; |
@@ -1221,6 +1258,11 @@ int unregister_console(struct console *console) | |||
1221 | struct console *a, *b; | 1258 | struct console *a, *b; |
1222 | int res = 1; | 1259 | int res = 1; |
1223 | 1260 | ||
1261 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE | ||
1262 | if (console->flags & CON_BRL) | ||
1263 | return braille_unregister_console(console); | ||
1264 | #endif | ||
1265 | |||
1224 | acquire_console_sem(); | 1266 | acquire_console_sem(); |
1225 | if (console_drivers == console) { | 1267 | if (console_drivers == console) { |
1226 | console_drivers=console->next; | 1268 | console_drivers=console->next; |
@@ -1272,8 +1314,8 @@ late_initcall(disable_boot_consoles); | |||
1272 | */ | 1314 | */ |
1273 | void tty_write_message(struct tty_struct *tty, char *msg) | 1315 | void tty_write_message(struct tty_struct *tty, char *msg) |
1274 | { | 1316 | { |
1275 | if (tty && tty->driver->write) | 1317 | if (tty && tty->ops->write) |
1276 | tty->driver->write(tty, msg, strlen(msg)); | 1318 | tty->ops->write(tty, msg, strlen(msg)); |
1277 | return; | 1319 | return; |
1278 | } | 1320 | } |
1279 | 1321 | ||
@@ -1287,31 +1329,7 @@ void tty_write_message(struct tty_struct *tty, char *msg) | |||
1287 | */ | 1329 | */ |
1288 | int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst) | 1330 | int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst) |
1289 | { | 1331 | { |
1290 | static DEFINE_SPINLOCK(ratelimit_lock); | 1332 | return __ratelimit(ratelimit_jiffies, ratelimit_burst); |
1291 | static unsigned toks = 10 * 5 * HZ; | ||
1292 | static unsigned long last_msg; | ||
1293 | static int missed; | ||
1294 | unsigned long flags; | ||
1295 | unsigned long now = jiffies; | ||
1296 | |||
1297 | spin_lock_irqsave(&ratelimit_lock, flags); | ||
1298 | toks += now - last_msg; | ||
1299 | last_msg = now; | ||
1300 | if (toks > (ratelimit_burst * ratelimit_jiffies)) | ||
1301 | toks = ratelimit_burst * ratelimit_jiffies; | ||
1302 | if (toks >= ratelimit_jiffies) { | ||
1303 | int lost = missed; | ||
1304 | |||
1305 | missed = 0; | ||
1306 | toks -= ratelimit_jiffies; | ||
1307 | spin_unlock_irqrestore(&ratelimit_lock, flags); | ||
1308 | if (lost) | ||
1309 | printk(KERN_WARNING "printk: %d messages suppressed.\n", lost); | ||
1310 | return 1; | ||
1311 | } | ||
1312 | missed++; | ||
1313 | spin_unlock_irqrestore(&ratelimit_lock, flags); | ||
1314 | return 0; | ||
1315 | } | 1333 | } |
1316 | EXPORT_SYMBOL(__printk_ratelimit); | 1334 | EXPORT_SYMBOL(__printk_ratelimit); |
1317 | 1335 | ||
diff --git a/kernel/profile.c b/kernel/profile.c index 606d7387265c..ae7ead82cbc9 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -587,10 +587,10 @@ static int __init create_proc_profile(void) | |||
587 | return 0; | 587 | return 0; |
588 | if (create_hash_tables()) | 588 | if (create_hash_tables()) |
589 | return -1; | 589 | return -1; |
590 | entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL); | 590 | entry = proc_create("profile", S_IWUSR | S_IRUGO, |
591 | NULL, &proc_profile_operations); | ||
591 | if (!entry) | 592 | if (!entry) |
592 | return 0; | 593 | return 0; |
593 | entry->proc_fops = &proc_profile_operations; | ||
594 | entry->size = (1+prof_len) * sizeof(atomic_t); | 594 | entry->size = (1+prof_len) * sizeof(atomic_t); |
595 | hotcpu_notifier(profile_cpu_callback, 0); | 595 | hotcpu_notifier(profile_cpu_callback, 0); |
596 | return 0; | 596 | return 0; |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index dac4b4e57293..6c19e94fd0a5 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -73,7 +73,7 @@ void __ptrace_unlink(struct task_struct *child) | |||
73 | BUG_ON(!child->ptrace); | 73 | BUG_ON(!child->ptrace); |
74 | 74 | ||
75 | child->ptrace = 0; | 75 | child->ptrace = 0; |
76 | if (!list_empty(&child->ptrace_list)) { | 76 | if (ptrace_reparented(child)) { |
77 | list_del_init(&child->ptrace_list); | 77 | list_del_init(&child->ptrace_list); |
78 | remove_parent(child); | 78 | remove_parent(child); |
79 | child->parent = child->real_parent; | 79 | child->parent = child->real_parent; |
@@ -168,8 +168,6 @@ int ptrace_attach(struct task_struct *task) | |||
168 | audit_ptrace(task); | 168 | audit_ptrace(task); |
169 | 169 | ||
170 | retval = -EPERM; | 170 | retval = -EPERM; |
171 | if (task->pid <= 1) | ||
172 | goto out; | ||
173 | if (same_thread_group(task, current)) | 171 | if (same_thread_group(task, current)) |
174 | goto out; | 172 | goto out; |
175 | 173 | ||
@@ -208,8 +206,7 @@ repeat: | |||
208 | 206 | ||
209 | __ptrace_link(task, current); | 207 | __ptrace_link(task, current); |
210 | 208 | ||
211 | force_sig_specific(SIGSTOP, task); | 209 | send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); |
212 | |||
213 | bad: | 210 | bad: |
214 | write_unlock_irqrestore(&tasklist_lock, flags); | 211 | write_unlock_irqrestore(&tasklist_lock, flags); |
215 | task_unlock(task); | 212 | task_unlock(task); |
@@ -522,12 +519,6 @@ struct task_struct *ptrace_get_task_struct(pid_t pid) | |||
522 | { | 519 | { |
523 | struct task_struct *child; | 520 | struct task_struct *child; |
524 | 521 | ||
525 | /* | ||
526 | * Tracing init is not allowed. | ||
527 | */ | ||
528 | if (pid == 1) | ||
529 | return ERR_PTR(-EPERM); | ||
530 | |||
531 | read_lock(&tasklist_lock); | 522 | read_lock(&tasklist_lock); |
532 | child = find_task_by_vpid(pid); | 523 | child = find_task_by_vpid(pid); |
533 | if (child) | 524 | if (child) |
@@ -543,7 +534,6 @@ struct task_struct *ptrace_get_task_struct(pid_t pid) | |||
543 | #define arch_ptrace_attach(child) do { } while (0) | 534 | #define arch_ptrace_attach(child) do { } while (0) |
544 | #endif | 535 | #endif |
545 | 536 | ||
546 | #ifndef __ARCH_SYS_PTRACE | ||
547 | asmlinkage long sys_ptrace(long request, long pid, long addr, long data) | 537 | asmlinkage long sys_ptrace(long request, long pid, long addr, long data) |
548 | { | 538 | { |
549 | struct task_struct *child; | 539 | struct task_struct *child; |
@@ -591,7 +581,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) | |||
591 | unlock_kernel(); | 581 | unlock_kernel(); |
592 | return ret; | 582 | return ret; |
593 | } | 583 | } |
594 | #endif /* __ARCH_SYS_PTRACE */ | ||
595 | 584 | ||
596 | int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) | 585 | int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) |
597 | { | 586 | { |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 47894f919d4e..33acc424667e 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/byteorder/swabb.h> | 45 | #include <linux/byteorder/swabb.h> |
46 | #include <linux/stat.h> | 46 | #include <linux/stat.h> |
47 | #include <linux/srcu.h> | 47 | #include <linux/srcu.h> |
48 | #include <linux/slab.h> | ||
48 | 49 | ||
49 | MODULE_LICENSE("GPL"); | 50 | MODULE_LICENSE("GPL"); |
50 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " | 51 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " |
diff --git a/kernel/relay.c b/kernel/relay.c index d6204a485818..7de644cdec43 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -65,6 +65,35 @@ static struct vm_operations_struct relay_file_mmap_ops = { | |||
65 | .close = relay_file_mmap_close, | 65 | .close = relay_file_mmap_close, |
66 | }; | 66 | }; |
67 | 67 | ||
68 | /* | ||
69 | * allocate an array of pointers of struct page | ||
70 | */ | ||
71 | static struct page **relay_alloc_page_array(unsigned int n_pages) | ||
72 | { | ||
73 | struct page **array; | ||
74 | size_t pa_size = n_pages * sizeof(struct page *); | ||
75 | |||
76 | if (pa_size > PAGE_SIZE) { | ||
77 | array = vmalloc(pa_size); | ||
78 | if (array) | ||
79 | memset(array, 0, pa_size); | ||
80 | } else { | ||
81 | array = kzalloc(pa_size, GFP_KERNEL); | ||
82 | } | ||
83 | return array; | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * free an array of pointers of struct page | ||
88 | */ | ||
89 | static void relay_free_page_array(struct page **array) | ||
90 | { | ||
91 | if (is_vmalloc_addr(array)) | ||
92 | vfree(array); | ||
93 | else | ||
94 | kfree(array); | ||
95 | } | ||
96 | |||
68 | /** | 97 | /** |
69 | * relay_mmap_buf: - mmap channel buffer to process address space | 98 | * relay_mmap_buf: - mmap channel buffer to process address space |
70 | * @buf: relay channel buffer | 99 | * @buf: relay channel buffer |
@@ -109,7 +138,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) | |||
109 | *size = PAGE_ALIGN(*size); | 138 | *size = PAGE_ALIGN(*size); |
110 | n_pages = *size >> PAGE_SHIFT; | 139 | n_pages = *size >> PAGE_SHIFT; |
111 | 140 | ||
112 | buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL); | 141 | buf->page_array = relay_alloc_page_array(n_pages); |
113 | if (!buf->page_array) | 142 | if (!buf->page_array) |
114 | return NULL; | 143 | return NULL; |
115 | 144 | ||
@@ -130,7 +159,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) | |||
130 | depopulate: | 159 | depopulate: |
131 | for (j = 0; j < i; j++) | 160 | for (j = 0; j < i; j++) |
132 | __free_page(buf->page_array[j]); | 161 | __free_page(buf->page_array[j]); |
133 | kfree(buf->page_array); | 162 | relay_free_page_array(buf->page_array); |
134 | return NULL; | 163 | return NULL; |
135 | } | 164 | } |
136 | 165 | ||
@@ -189,7 +218,7 @@ static void relay_destroy_buf(struct rchan_buf *buf) | |||
189 | vunmap(buf->start); | 218 | vunmap(buf->start); |
190 | for (i = 0; i < buf->page_count; i++) | 219 | for (i = 0; i < buf->page_count; i++) |
191 | __free_page(buf->page_array[i]); | 220 | __free_page(buf->page_array[i]); |
192 | kfree(buf->page_array); | 221 | relay_free_page_array(buf->page_array); |
193 | } | 222 | } |
194 | chan->buf[buf->cpu] = NULL; | 223 | chan->buf[buf->cpu] = NULL; |
195 | kfree(buf->padding); | 224 | kfree(buf->padding); |
@@ -1162,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in, | |||
1162 | ret = 0; | 1191 | ret = 0; |
1163 | spliced = 0; | 1192 | spliced = 0; |
1164 | 1193 | ||
1165 | while (len) { | 1194 | while (len && !spliced) { |
1166 | ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); | 1195 | ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); |
1167 | if (ret < 0) | 1196 | if (ret < 0) |
1168 | break; | 1197 | break; |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index efbfc0fc232f..d3c61b4ebef2 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/types.h> | 10 | #include <linux/types.h> |
11 | #include <linux/parser.h> | 11 | #include <linux/parser.h> |
12 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
13 | #include <linux/slab.h> | ||
13 | #include <linux/res_counter.h> | 14 | #include <linux/res_counter.h> |
14 | #include <linux/uaccess.h> | 15 | #include <linux/uaccess.h> |
15 | 16 | ||
@@ -27,6 +28,8 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val) | |||
27 | } | 28 | } |
28 | 29 | ||
29 | counter->usage += val; | 30 | counter->usage += val; |
31 | if (counter->usage > counter->max_usage) | ||
32 | counter->max_usage = counter->usage; | ||
30 | return 0; | 33 | return 0; |
31 | } | 34 | } |
32 | 35 | ||
@@ -65,6 +68,8 @@ res_counter_member(struct res_counter *counter, int member) | |||
65 | switch (member) { | 68 | switch (member) { |
66 | case RES_USAGE: | 69 | case RES_USAGE: |
67 | return &counter->usage; | 70 | return &counter->usage; |
71 | case RES_MAX_USAGE: | ||
72 | return &counter->max_usage; | ||
68 | case RES_LIMIT: | 73 | case RES_LIMIT: |
69 | return &counter->limit; | 74 | return &counter->limit; |
70 | case RES_FAILCNT: | 75 | case RES_FAILCNT: |
@@ -92,6 +97,11 @@ ssize_t res_counter_read(struct res_counter *counter, int member, | |||
92 | pos, buf, s - buf); | 97 | pos, buf, s - buf); |
93 | } | 98 | } |
94 | 99 | ||
100 | u64 res_counter_read_u64(struct res_counter *counter, int member) | ||
101 | { | ||
102 | return *res_counter_member(counter, member); | ||
103 | } | ||
104 | |||
95 | ssize_t res_counter_write(struct res_counter *counter, int member, | 105 | ssize_t res_counter_write(struct res_counter *counter, int member, |
96 | const char __user *userbuf, size_t nbytes, loff_t *pos, | 106 | const char __user *userbuf, size_t nbytes, loff_t *pos, |
97 | int (*write_strategy)(char *st_buf, unsigned long long *val)) | 107 | int (*write_strategy)(char *st_buf, unsigned long long *val)) |
diff --git a/kernel/resource.c b/kernel/resource.c index cee12cc47cab..74af2d7cb5a1 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -131,14 +131,8 @@ static const struct file_operations proc_iomem_operations = { | |||
131 | 131 | ||
132 | static int __init ioresources_init(void) | 132 | static int __init ioresources_init(void) |
133 | { | 133 | { |
134 | struct proc_dir_entry *entry; | 134 | proc_create("ioports", 0, NULL, &proc_ioports_operations); |
135 | 135 | proc_create("iomem", 0, NULL, &proc_iomem_operations); | |
136 | entry = create_proc_entry("ioports", 0, NULL); | ||
137 | if (entry) | ||
138 | entry->proc_fops = &proc_ioports_operations; | ||
139 | entry = create_proc_entry("iomem", 0, NULL); | ||
140 | if (entry) | ||
141 | entry->proc_fops = &proc_iomem_operations; | ||
142 | return 0; | 136 | return 0; |
143 | } | 137 | } |
144 | __initcall(ioresources_init); | 138 | __initcall(ioresources_init); |
diff --git a/kernel/sched.c b/kernel/sched.c index 740fb409e5bb..34bcc5bc120e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -8025,7 +8025,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | |||
8025 | 8025 | ||
8026 | se->my_q = cfs_rq; | 8026 | se->my_q = cfs_rq; |
8027 | se->load.weight = tg->shares; | 8027 | se->load.weight = tg->shares; |
8028 | se->load.inv_weight = div64_64(1ULL<<32, se->load.weight); | 8028 | se->load.inv_weight = div64_u64(1ULL<<32, se->load.weight); |
8029 | se->parent = parent; | 8029 | se->parent = parent; |
8030 | } | 8030 | } |
8031 | #endif | 8031 | #endif |
@@ -8692,7 +8692,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares) | |||
8692 | dequeue_entity(cfs_rq, se, 0); | 8692 | dequeue_entity(cfs_rq, se, 0); |
8693 | 8693 | ||
8694 | se->load.weight = shares; | 8694 | se->load.weight = shares; |
8695 | se->load.inv_weight = div64_64((1ULL<<32), shares); | 8695 | se->load.inv_weight = div64_u64((1ULL<<32), shares); |
8696 | 8696 | ||
8697 | if (on_rq) | 8697 | if (on_rq) |
8698 | enqueue_entity(cfs_rq, se, 0); | 8698 | enqueue_entity(cfs_rq, se, 0); |
@@ -8787,7 +8787,7 @@ static unsigned long to_ratio(u64 period, u64 runtime) | |||
8787 | if (runtime == RUNTIME_INF) | 8787 | if (runtime == RUNTIME_INF) |
8788 | return 1ULL << 16; | 8788 | return 1ULL << 16; |
8789 | 8789 | ||
8790 | return div64_64(runtime << 16, period); | 8790 | return div64_u64(runtime << 16, period); |
8791 | } | 8791 | } |
8792 | 8792 | ||
8793 | #ifdef CONFIG_CGROUP_SCHED | 8793 | #ifdef CONFIG_CGROUP_SCHED |
@@ -9057,13 +9057,13 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
9057 | } | 9057 | } |
9058 | 9058 | ||
9059 | #ifdef CONFIG_FAIR_GROUP_SCHED | 9059 | #ifdef CONFIG_FAIR_GROUP_SCHED |
9060 | static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, | 9060 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, |
9061 | u64 shareval) | 9061 | u64 shareval) |
9062 | { | 9062 | { |
9063 | return sched_group_set_shares(cgroup_tg(cgrp), shareval); | 9063 | return sched_group_set_shares(cgroup_tg(cgrp), shareval); |
9064 | } | 9064 | } |
9065 | 9065 | ||
9066 | static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft) | 9066 | static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) |
9067 | { | 9067 | { |
9068 | struct task_group *tg = cgroup_tg(cgrp); | 9068 | struct task_group *tg = cgroup_tg(cgrp); |
9069 | 9069 | ||
@@ -9073,48 +9073,14 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft) | |||
9073 | 9073 | ||
9074 | #ifdef CONFIG_RT_GROUP_SCHED | 9074 | #ifdef CONFIG_RT_GROUP_SCHED |
9075 | static ssize_t cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, | 9075 | static ssize_t cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, |
9076 | struct file *file, | 9076 | s64 val) |
9077 | const char __user *userbuf, | ||
9078 | size_t nbytes, loff_t *unused_ppos) | ||
9079 | { | 9077 | { |
9080 | char buffer[64]; | 9078 | return sched_group_set_rt_runtime(cgroup_tg(cgrp), val); |
9081 | int retval = 0; | ||
9082 | s64 val; | ||
9083 | char *end; | ||
9084 | |||
9085 | if (!nbytes) | ||
9086 | return -EINVAL; | ||
9087 | if (nbytes >= sizeof(buffer)) | ||
9088 | return -E2BIG; | ||
9089 | if (copy_from_user(buffer, userbuf, nbytes)) | ||
9090 | return -EFAULT; | ||
9091 | |||
9092 | buffer[nbytes] = 0; /* nul-terminate */ | ||
9093 | |||
9094 | /* strip newline if necessary */ | ||
9095 | if (nbytes && (buffer[nbytes-1] == '\n')) | ||
9096 | buffer[nbytes-1] = 0; | ||
9097 | val = simple_strtoll(buffer, &end, 0); | ||
9098 | if (*end) | ||
9099 | return -EINVAL; | ||
9100 | |||
9101 | /* Pass to subsystem */ | ||
9102 | retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val); | ||
9103 | if (!retval) | ||
9104 | retval = nbytes; | ||
9105 | return retval; | ||
9106 | } | 9079 | } |
9107 | 9080 | ||
9108 | static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft, | 9081 | static s64 cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft) |
9109 | struct file *file, | ||
9110 | char __user *buf, size_t nbytes, | ||
9111 | loff_t *ppos) | ||
9112 | { | 9082 | { |
9113 | char tmp[64]; | 9083 | return sched_group_rt_runtime(cgroup_tg(cgrp)); |
9114 | long val = sched_group_rt_runtime(cgroup_tg(cgrp)); | ||
9115 | int len = sprintf(tmp, "%ld\n", val); | ||
9116 | |||
9117 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | ||
9118 | } | 9084 | } |
9119 | 9085 | ||
9120 | static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype, | 9086 | static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype, |
@@ -9133,20 +9099,20 @@ static struct cftype cpu_files[] = { | |||
9133 | #ifdef CONFIG_FAIR_GROUP_SCHED | 9099 | #ifdef CONFIG_FAIR_GROUP_SCHED |
9134 | { | 9100 | { |
9135 | .name = "shares", | 9101 | .name = "shares", |
9136 | .read_uint = cpu_shares_read_uint, | 9102 | .read_u64 = cpu_shares_read_u64, |
9137 | .write_uint = cpu_shares_write_uint, | 9103 | .write_u64 = cpu_shares_write_u64, |
9138 | }, | 9104 | }, |
9139 | #endif | 9105 | #endif |
9140 | #ifdef CONFIG_RT_GROUP_SCHED | 9106 | #ifdef CONFIG_RT_GROUP_SCHED |
9141 | { | 9107 | { |
9142 | .name = "rt_runtime_us", | 9108 | .name = "rt_runtime_us", |
9143 | .read = cpu_rt_runtime_read, | 9109 | .read_s64 = cpu_rt_runtime_read, |
9144 | .write = cpu_rt_runtime_write, | 9110 | .write_s64 = cpu_rt_runtime_write, |
9145 | }, | 9111 | }, |
9146 | { | 9112 | { |
9147 | .name = "rt_period_us", | 9113 | .name = "rt_period_us", |
9148 | .read_uint = cpu_rt_period_read_uint, | 9114 | .read_u64 = cpu_rt_period_read_uint, |
9149 | .write_uint = cpu_rt_period_write_uint, | 9115 | .write_u64 = cpu_rt_period_write_uint, |
9150 | }, | 9116 | }, |
9151 | #endif | 9117 | #endif |
9152 | }; | 9118 | }; |
@@ -9277,8 +9243,8 @@ out: | |||
9277 | static struct cftype files[] = { | 9243 | static struct cftype files[] = { |
9278 | { | 9244 | { |
9279 | .name = "usage", | 9245 | .name = "usage", |
9280 | .read_uint = cpuusage_read, | 9246 | .read_u64 = cpuusage_read, |
9281 | .write_uint = cpuusage_write, | 9247 | .write_u64 = cpuusage_write, |
9282 | }, | 9248 | }, |
9283 | }; | 9249 | }; |
9284 | 9250 | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index f3f4af4b8b0f..6b4a12558e88 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -277,12 +277,9 @@ static int __init init_sched_debug_procfs(void) | |||
277 | { | 277 | { |
278 | struct proc_dir_entry *pe; | 278 | struct proc_dir_entry *pe; |
279 | 279 | ||
280 | pe = create_proc_entry("sched_debug", 0644, NULL); | 280 | pe = proc_create("sched_debug", 0644, NULL, &sched_debug_fops); |
281 | if (!pe) | 281 | if (!pe) |
282 | return -ENOMEM; | 282 | return -ENOMEM; |
283 | |||
284 | pe->proc_fops = &sched_debug_fops; | ||
285 | |||
286 | return 0; | 283 | return 0; |
287 | } | 284 | } |
288 | 285 | ||
@@ -360,8 +357,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
360 | 357 | ||
361 | avg_per_cpu = p->se.sum_exec_runtime; | 358 | avg_per_cpu = p->se.sum_exec_runtime; |
362 | if (p->se.nr_migrations) { | 359 | if (p->se.nr_migrations) { |
363 | avg_per_cpu = div64_64(avg_per_cpu, | 360 | avg_per_cpu = div64_u64(avg_per_cpu, |
364 | p->se.nr_migrations); | 361 | p->se.nr_migrations); |
365 | } else { | 362 | } else { |
366 | avg_per_cpu = -1LL; | 363 | avg_per_cpu = -1LL; |
367 | } | 364 | } |
diff --git a/kernel/signal.c b/kernel/signal.c index 64ad0ed15992..72bb4f51f963 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -39,11 +39,19 @@ | |||
39 | 39 | ||
40 | static struct kmem_cache *sigqueue_cachep; | 40 | static struct kmem_cache *sigqueue_cachep; |
41 | 41 | ||
42 | static int __sig_ignored(struct task_struct *t, int sig) | ||
43 | { | ||
44 | void __user *handler; | ||
45 | |||
46 | /* Is it explicitly or implicitly ignored? */ | ||
47 | |||
48 | handler = t->sighand->action[sig - 1].sa.sa_handler; | ||
49 | return handler == SIG_IGN || | ||
50 | (handler == SIG_DFL && sig_kernel_ignore(sig)); | ||
51 | } | ||
42 | 52 | ||
43 | static int sig_ignored(struct task_struct *t, int sig) | 53 | static int sig_ignored(struct task_struct *t, int sig) |
44 | { | 54 | { |
45 | void __user * handler; | ||
46 | |||
47 | /* | 55 | /* |
48 | * Tracers always want to know about signals.. | 56 | * Tracers always want to know about signals.. |
49 | */ | 57 | */ |
@@ -58,10 +66,7 @@ static int sig_ignored(struct task_struct *t, int sig) | |||
58 | if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) | 66 | if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) |
59 | return 0; | 67 | return 0; |
60 | 68 | ||
61 | /* Is it explicitly or implicitly ignored? */ | 69 | return __sig_ignored(t, sig); |
62 | handler = t->sighand->action[sig-1].sa.sa_handler; | ||
63 | return handler == SIG_IGN || | ||
64 | (handler == SIG_DFL && sig_kernel_ignore(sig)); | ||
65 | } | 70 | } |
66 | 71 | ||
67 | /* | 72 | /* |
@@ -372,7 +377,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, | |||
372 | */ | 377 | */ |
373 | int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | 378 | int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) |
374 | { | 379 | { |
375 | int signr = 0; | 380 | int signr; |
376 | 381 | ||
377 | /* We only dequeue private signals from ourselves, we don't let | 382 | /* We only dequeue private signals from ourselves, we don't let |
378 | * signalfd steal them | 383 | * signalfd steal them |
@@ -405,8 +410,12 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |||
405 | } | 410 | } |
406 | } | 411 | } |
407 | } | 412 | } |
413 | |||
408 | recalc_sigpending(); | 414 | recalc_sigpending(); |
409 | if (signr && unlikely(sig_kernel_stop(signr))) { | 415 | if (!signr) |
416 | return 0; | ||
417 | |||
418 | if (unlikely(sig_kernel_stop(signr))) { | ||
410 | /* | 419 | /* |
411 | * Set a marker that we have dequeued a stop signal. Our | 420 | * Set a marker that we have dequeued a stop signal. Our |
412 | * caller might release the siglock and then the pending | 421 | * caller might release the siglock and then the pending |
@@ -422,9 +431,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |||
422 | if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) | 431 | if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) |
423 | tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; | 432 | tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; |
424 | } | 433 | } |
425 | if (signr && | 434 | if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { |
426 | ((info->si_code & __SI_MASK) == __SI_TIMER) && | ||
427 | info->si_sys_private) { | ||
428 | /* | 435 | /* |
429 | * Release the siglock to ensure proper locking order | 436 | * Release the siglock to ensure proper locking order |
430 | * of timer locks outside of siglocks. Note, we leave | 437 | * of timer locks outside of siglocks. Note, we leave |
@@ -526,21 +533,34 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s) | |||
526 | static int check_kill_permission(int sig, struct siginfo *info, | 533 | static int check_kill_permission(int sig, struct siginfo *info, |
527 | struct task_struct *t) | 534 | struct task_struct *t) |
528 | { | 535 | { |
529 | int error = -EINVAL; | 536 | struct pid *sid; |
537 | int error; | ||
538 | |||
530 | if (!valid_signal(sig)) | 539 | if (!valid_signal(sig)) |
531 | return error; | 540 | return -EINVAL; |
532 | 541 | ||
533 | if (info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) { | 542 | if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info))) |
534 | error = audit_signal_info(sig, t); /* Let audit system see the signal */ | 543 | return 0; |
535 | if (error) | 544 | |
536 | return error; | 545 | error = audit_signal_info(sig, t); /* Let audit system see the signal */ |
537 | error = -EPERM; | 546 | if (error) |
538 | if (((sig != SIGCONT) || | ||
539 | (task_session_nr(current) != task_session_nr(t))) | ||
540 | && (current->euid ^ t->suid) && (current->euid ^ t->uid) | ||
541 | && (current->uid ^ t->suid) && (current->uid ^ t->uid) | ||
542 | && !capable(CAP_KILL)) | ||
543 | return error; | 547 | return error; |
548 | |||
549 | if ((current->euid ^ t->suid) && (current->euid ^ t->uid) && | ||
550 | (current->uid ^ t->suid) && (current->uid ^ t->uid) && | ||
551 | !capable(CAP_KILL)) { | ||
552 | switch (sig) { | ||
553 | case SIGCONT: | ||
554 | sid = task_session(t); | ||
555 | /* | ||
556 | * We don't return the error if sid == NULL. The | ||
557 | * task was unhashed, the caller must notice this. | ||
558 | */ | ||
559 | if (!sid || sid == task_session(current)) | ||
560 | break; | ||
561 | default: | ||
562 | return -EPERM; | ||
563 | } | ||
544 | } | 564 | } |
545 | 565 | ||
546 | return security_task_kill(t, info, sig, 0); | 566 | return security_task_kill(t, info, sig, 0); |
@@ -550,62 +570,44 @@ static int check_kill_permission(int sig, struct siginfo *info, | |||
550 | static void do_notify_parent_cldstop(struct task_struct *tsk, int why); | 570 | static void do_notify_parent_cldstop(struct task_struct *tsk, int why); |
551 | 571 | ||
552 | /* | 572 | /* |
553 | * Handle magic process-wide effects of stop/continue signals. | 573 | * Handle magic process-wide effects of stop/continue signals. Unlike |
554 | * Unlike the signal actions, these happen immediately at signal-generation | 574 | * the signal actions, these happen immediately at signal-generation |
555 | * time regardless of blocking, ignoring, or handling. This does the | 575 | * time regardless of blocking, ignoring, or handling. This does the |
556 | * actual continuing for SIGCONT, but not the actual stopping for stop | 576 | * actual continuing for SIGCONT, but not the actual stopping for stop |
557 | * signals. The process stop is done as a signal action for SIG_DFL. | 577 | * signals. The process stop is done as a signal action for SIG_DFL. |
578 | * | ||
579 | * Returns true if the signal should be actually delivered, otherwise | ||
580 | * it should be dropped. | ||
558 | */ | 581 | */ |
559 | static void handle_stop_signal(int sig, struct task_struct *p) | 582 | static int prepare_signal(int sig, struct task_struct *p) |
560 | { | 583 | { |
584 | struct signal_struct *signal = p->signal; | ||
561 | struct task_struct *t; | 585 | struct task_struct *t; |
562 | 586 | ||
563 | if (p->signal->flags & SIGNAL_GROUP_EXIT) | 587 | if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) { |
564 | /* | 588 | /* |
565 | * The process is in the middle of dying already. | 589 | * The process is in the middle of dying, nothing to do. |
566 | */ | 590 | */ |
567 | return; | 591 | } else if (sig_kernel_stop(sig)) { |
568 | |||
569 | if (sig_kernel_stop(sig)) { | ||
570 | /* | 592 | /* |
571 | * This is a stop signal. Remove SIGCONT from all queues. | 593 | * This is a stop signal. Remove SIGCONT from all queues. |
572 | */ | 594 | */ |
573 | rm_from_queue(sigmask(SIGCONT), &p->signal->shared_pending); | 595 | rm_from_queue(sigmask(SIGCONT), &signal->shared_pending); |
574 | t = p; | 596 | t = p; |
575 | do { | 597 | do { |
576 | rm_from_queue(sigmask(SIGCONT), &t->pending); | 598 | rm_from_queue(sigmask(SIGCONT), &t->pending); |
577 | t = next_thread(t); | 599 | } while_each_thread(p, t); |
578 | } while (t != p); | ||
579 | } else if (sig == SIGCONT) { | 600 | } else if (sig == SIGCONT) { |
601 | unsigned int why; | ||
580 | /* | 602 | /* |
581 | * Remove all stop signals from all queues, | 603 | * Remove all stop signals from all queues, |
582 | * and wake all threads. | 604 | * and wake all threads. |
583 | */ | 605 | */ |
584 | if (unlikely(p->signal->group_stop_count > 0)) { | 606 | rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending); |
585 | /* | ||
586 | * There was a group stop in progress. We'll | ||
587 | * pretend it finished before we got here. We are | ||
588 | * obliged to report it to the parent: if the | ||
589 | * SIGSTOP happened "after" this SIGCONT, then it | ||
590 | * would have cleared this pending SIGCONT. If it | ||
591 | * happened "before" this SIGCONT, then the parent | ||
592 | * got the SIGCHLD about the stop finishing before | ||
593 | * the continue happened. We do the notification | ||
594 | * now, and it's as if the stop had finished and | ||
595 | * the SIGCHLD was pending on entry to this kill. | ||
596 | */ | ||
597 | p->signal->group_stop_count = 0; | ||
598 | p->signal->flags = SIGNAL_STOP_CONTINUED; | ||
599 | spin_unlock(&p->sighand->siglock); | ||
600 | do_notify_parent_cldstop(p, CLD_STOPPED); | ||
601 | spin_lock(&p->sighand->siglock); | ||
602 | } | ||
603 | rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); | ||
604 | t = p; | 607 | t = p; |
605 | do { | 608 | do { |
606 | unsigned int state; | 609 | unsigned int state; |
607 | rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); | 610 | rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); |
608 | |||
609 | /* | 611 | /* |
610 | * If there is a handler for SIGCONT, we must make | 612 | * If there is a handler for SIGCONT, we must make |
611 | * sure that no thread returns to user mode before | 613 | * sure that no thread returns to user mode before |
@@ -615,7 +617,7 @@ static void handle_stop_signal(int sig, struct task_struct *p) | |||
615 | * running the handler. With the TIF_SIGPENDING | 617 | * running the handler. With the TIF_SIGPENDING |
616 | * flag set, the thread will pause and acquire the | 618 | * flag set, the thread will pause and acquire the |
617 | * siglock that we hold now and until we've queued | 619 | * siglock that we hold now and until we've queued |
618 | * the pending signal. | 620 | * the pending signal. |
619 | * | 621 | * |
620 | * Wake up the stopped thread _after_ setting | 622 | * Wake up the stopped thread _after_ setting |
621 | * TIF_SIGPENDING | 623 | * TIF_SIGPENDING |
@@ -626,49 +628,163 @@ static void handle_stop_signal(int sig, struct task_struct *p) | |||
626 | state |= TASK_INTERRUPTIBLE; | 628 | state |= TASK_INTERRUPTIBLE; |
627 | } | 629 | } |
628 | wake_up_state(t, state); | 630 | wake_up_state(t, state); |
631 | } while_each_thread(p, t); | ||
629 | 632 | ||
630 | t = next_thread(t); | 633 | /* |
631 | } while (t != p); | 634 | * Notify the parent with CLD_CONTINUED if we were stopped. |
635 | * | ||
636 | * If we were in the middle of a group stop, we pretend it | ||
637 | * was already finished, and then continued. Since SIGCHLD | ||
638 | * doesn't queue we report only CLD_STOPPED, as if the next | ||
639 | * CLD_CONTINUED was dropped. | ||
640 | */ | ||
641 | why = 0; | ||
642 | if (signal->flags & SIGNAL_STOP_STOPPED) | ||
643 | why |= SIGNAL_CLD_CONTINUED; | ||
644 | else if (signal->group_stop_count) | ||
645 | why |= SIGNAL_CLD_STOPPED; | ||
632 | 646 | ||
633 | if (p->signal->flags & SIGNAL_STOP_STOPPED) { | 647 | if (why) { |
634 | /* | 648 | /* |
635 | * We were in fact stopped, and are now continued. | 649 | * The first thread which returns from finish_stop() |
636 | * Notify the parent with CLD_CONTINUED. | 650 | * will take ->siglock, notice SIGNAL_CLD_MASK, and |
651 | * notify its parent. See get_signal_to_deliver(). | ||
637 | */ | 652 | */ |
638 | p->signal->flags = SIGNAL_STOP_CONTINUED; | 653 | signal->flags = why | SIGNAL_STOP_CONTINUED; |
639 | p->signal->group_exit_code = 0; | 654 | signal->group_stop_count = 0; |
640 | spin_unlock(&p->sighand->siglock); | 655 | signal->group_exit_code = 0; |
641 | do_notify_parent_cldstop(p, CLD_CONTINUED); | ||
642 | spin_lock(&p->sighand->siglock); | ||
643 | } else { | 656 | } else { |
644 | /* | 657 | /* |
645 | * We are not stopped, but there could be a stop | 658 | * We are not stopped, but there could be a stop |
646 | * signal in the middle of being processed after | 659 | * signal in the middle of being processed after |
647 | * being removed from the queue. Clear that too. | 660 | * being removed from the queue. Clear that too. |
648 | */ | 661 | */ |
649 | p->signal->flags = 0; | 662 | signal->flags &= ~SIGNAL_STOP_DEQUEUED; |
650 | } | 663 | } |
651 | } else if (sig == SIGKILL) { | 664 | } |
665 | |||
666 | return !sig_ignored(p, sig); | ||
667 | } | ||
668 | |||
669 | /* | ||
670 | * Test if P wants to take SIG. After we've checked all threads with this, | ||
671 | * it's equivalent to finding no threads not blocking SIG. Any threads not | ||
672 | * blocking SIG were ruled out because they are not running and already | ||
673 | * have pending signals. Such threads will dequeue from the shared queue | ||
674 | * as soon as they're available, so putting the signal on the shared queue | ||
675 | * will be equivalent to sending it to one such thread. | ||
676 | */ | ||
677 | static inline int wants_signal(int sig, struct task_struct *p) | ||
678 | { | ||
679 | if (sigismember(&p->blocked, sig)) | ||
680 | return 0; | ||
681 | if (p->flags & PF_EXITING) | ||
682 | return 0; | ||
683 | if (sig == SIGKILL) | ||
684 | return 1; | ||
685 | if (task_is_stopped_or_traced(p)) | ||
686 | return 0; | ||
687 | return task_curr(p) || !signal_pending(p); | ||
688 | } | ||
689 | |||
690 | static void complete_signal(int sig, struct task_struct *p, int group) | ||
691 | { | ||
692 | struct signal_struct *signal = p->signal; | ||
693 | struct task_struct *t; | ||
694 | |||
695 | /* | ||
696 | * Now find a thread we can wake up to take the signal off the queue. | ||
697 | * | ||
698 | * If the main thread wants the signal, it gets first crack. | ||
699 | * Probably the least surprising to the average bear. | ||
700 | */ | ||
701 | if (wants_signal(sig, p)) | ||
702 | t = p; | ||
703 | else if (!group || thread_group_empty(p)) | ||
704 | /* | ||
705 | * There is just one thread and it does not need to be woken. | ||
706 | * It will dequeue unblocked signals before it runs again. | ||
707 | */ | ||
708 | return; | ||
709 | else { | ||
652 | /* | 710 | /* |
653 | * Make sure that any pending stop signal already dequeued | 711 | * Otherwise try to find a suitable thread. |
654 | * is undone by the wakeup for SIGKILL. | ||
655 | */ | 712 | */ |
656 | p->signal->flags = 0; | 713 | t = signal->curr_target; |
714 | while (!wants_signal(sig, t)) { | ||
715 | t = next_thread(t); | ||
716 | if (t == signal->curr_target) | ||
717 | /* | ||
718 | * No thread needs to be woken. | ||
719 | * Any eligible threads will see | ||
720 | * the signal in the queue soon. | ||
721 | */ | ||
722 | return; | ||
723 | } | ||
724 | signal->curr_target = t; | ||
657 | } | 725 | } |
726 | |||
727 | /* | ||
728 | * Found a killable thread. If the signal will be fatal, | ||
729 | * then start taking the whole group down immediately. | ||
730 | */ | ||
731 | if (sig_fatal(p, sig) && | ||
732 | !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && | ||
733 | !sigismember(&t->real_blocked, sig) && | ||
734 | (sig == SIGKILL || !(t->ptrace & PT_PTRACED))) { | ||
735 | /* | ||
736 | * This signal will be fatal to the whole group. | ||
737 | */ | ||
738 | if (!sig_kernel_coredump(sig)) { | ||
739 | /* | ||
740 | * Start a group exit and wake everybody up. | ||
741 | * This way we don't have other threads | ||
742 | * running and doing things after a slower | ||
743 | * thread has the fatal signal pending. | ||
744 | */ | ||
745 | signal->flags = SIGNAL_GROUP_EXIT; | ||
746 | signal->group_exit_code = sig; | ||
747 | signal->group_stop_count = 0; | ||
748 | t = p; | ||
749 | do { | ||
750 | sigaddset(&t->pending.signal, SIGKILL); | ||
751 | signal_wake_up(t, 1); | ||
752 | } while_each_thread(p, t); | ||
753 | return; | ||
754 | } | ||
755 | } | ||
756 | |||
757 | /* | ||
758 | * The signal is already in the shared-pending queue. | ||
759 | * Tell the chosen thread to wake up and dequeue it. | ||
760 | */ | ||
761 | signal_wake_up(t, sig == SIGKILL); | ||
762 | return; | ||
763 | } | ||
764 | |||
765 | static inline int legacy_queue(struct sigpending *signals, int sig) | ||
766 | { | ||
767 | return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); | ||
658 | } | 768 | } |
659 | 769 | ||
660 | static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | 770 | static int send_signal(int sig, struct siginfo *info, struct task_struct *t, |
661 | struct sigpending *signals) | 771 | int group) |
662 | { | 772 | { |
663 | struct sigqueue * q = NULL; | 773 | struct sigpending *pending; |
664 | int ret = 0; | 774 | struct sigqueue *q; |
775 | |||
776 | assert_spin_locked(&t->sighand->siglock); | ||
777 | if (!prepare_signal(sig, t)) | ||
778 | return 0; | ||
665 | 779 | ||
780 | pending = group ? &t->signal->shared_pending : &t->pending; | ||
666 | /* | 781 | /* |
667 | * Deliver the signal to listening signalfds. This must be called | 782 | * Short-circuit ignored signals and support queuing |
668 | * with the sighand lock held. | 783 | * exactly one non-rt signal, so that we can get more |
784 | * detailed information about the cause of the signal. | ||
669 | */ | 785 | */ |
670 | signalfd_notify(t, sig); | 786 | if (legacy_queue(pending, sig)) |
671 | 787 | return 0; | |
672 | /* | 788 | /* |
673 | * fast-pathed signals for kernel-internal things like SIGSTOP | 789 | * fast-pathed signals for kernel-internal things like SIGSTOP |
674 | * or SIGKILL. | 790 | * or SIGKILL. |
@@ -688,7 +804,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
688 | (is_si_special(info) || | 804 | (is_si_special(info) || |
689 | info->si_code >= 0))); | 805 | info->si_code >= 0))); |
690 | if (q) { | 806 | if (q) { |
691 | list_add_tail(&q->list, &signals->list); | 807 | list_add_tail(&q->list, &pending->list); |
692 | switch ((unsigned long) info) { | 808 | switch ((unsigned long) info) { |
693 | case (unsigned long) SEND_SIG_NOINFO: | 809 | case (unsigned long) SEND_SIG_NOINFO: |
694 | q->info.si_signo = sig; | 810 | q->info.si_signo = sig; |
@@ -718,13 +834,12 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
718 | } | 834 | } |
719 | 835 | ||
720 | out_set: | 836 | out_set: |
721 | sigaddset(&signals->signal, sig); | 837 | signalfd_notify(t, sig); |
722 | return ret; | 838 | sigaddset(&pending->signal, sig); |
839 | complete_signal(sig, t, group); | ||
840 | return 0; | ||
723 | } | 841 | } |
724 | 842 | ||
725 | #define LEGACY_QUEUE(sigptr, sig) \ | ||
726 | (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) | ||
727 | |||
728 | int print_fatal_signals; | 843 | int print_fatal_signals; |
729 | 844 | ||
730 | static void print_fatal_signal(struct pt_regs *regs, int signr) | 845 | static void print_fatal_signal(struct pt_regs *regs, int signr) |
@@ -757,29 +872,16 @@ static int __init setup_print_fatal_signals(char *str) | |||
757 | 872 | ||
758 | __setup("print-fatal-signals=", setup_print_fatal_signals); | 873 | __setup("print-fatal-signals=", setup_print_fatal_signals); |
759 | 874 | ||
875 | int | ||
876 | __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) | ||
877 | { | ||
878 | return send_signal(sig, info, p, 1); | ||
879 | } | ||
880 | |||
760 | static int | 881 | static int |
761 | specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) | 882 | specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
762 | { | 883 | { |
763 | int ret = 0; | 884 | return send_signal(sig, info, t, 0); |
764 | |||
765 | BUG_ON(!irqs_disabled()); | ||
766 | assert_spin_locked(&t->sighand->siglock); | ||
767 | |||
768 | /* Short-circuit ignored signals. */ | ||
769 | if (sig_ignored(t, sig)) | ||
770 | goto out; | ||
771 | |||
772 | /* Support queueing exactly one non-rt signal, so that we | ||
773 | can get more detailed information about the cause of | ||
774 | the signal. */ | ||
775 | if (LEGACY_QUEUE(&t->pending, sig)) | ||
776 | goto out; | ||
777 | |||
778 | ret = send_signal(sig, info, t, &t->pending); | ||
779 | if (!ret && !sigismember(&t->blocked, sig)) | ||
780 | signal_wake_up(t, sig == SIGKILL); | ||
781 | out: | ||
782 | return ret; | ||
783 | } | 885 | } |
784 | 886 | ||
785 | /* | 887 | /* |
@@ -790,7 +892,8 @@ out: | |||
790 | * since we do not want to have a signal handler that was blocked | 892 | * since we do not want to have a signal handler that was blocked |
791 | * be invoked when user space had explicitly blocked it. | 893 | * be invoked when user space had explicitly blocked it. |
792 | * | 894 | * |
793 | * We don't want to have recursive SIGSEGV's etc, for example. | 895 | * We don't want to have recursive SIGSEGV's etc, for example, |
896 | * that is why we also clear SIGNAL_UNKILLABLE. | ||
794 | */ | 897 | */ |
795 | int | 898 | int |
796 | force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | 899 | force_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
@@ -810,6 +913,8 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |||
810 | recalc_sigpending_and_wake(t); | 913 | recalc_sigpending_and_wake(t); |
811 | } | 914 | } |
812 | } | 915 | } |
916 | if (action->sa.sa_handler == SIG_DFL) | ||
917 | t->signal->flags &= ~SIGNAL_UNKILLABLE; | ||
813 | ret = specific_send_sig_info(sig, info, t); | 918 | ret = specific_send_sig_info(sig, info, t); |
814 | spin_unlock_irqrestore(&t->sighand->siglock, flags); | 919 | spin_unlock_irqrestore(&t->sighand->siglock, flags); |
815 | 920 | ||
@@ -823,134 +928,6 @@ force_sig_specific(int sig, struct task_struct *t) | |||
823 | } | 928 | } |
824 | 929 | ||
825 | /* | 930 | /* |
826 | * Test if P wants to take SIG. After we've checked all threads with this, | ||
827 | * it's equivalent to finding no threads not blocking SIG. Any threads not | ||
828 | * blocking SIG were ruled out because they are not running and already | ||
829 | * have pending signals. Such threads will dequeue from the shared queue | ||
830 | * as soon as they're available, so putting the signal on the shared queue | ||
831 | * will be equivalent to sending it to one such thread. | ||
832 | */ | ||
833 | static inline int wants_signal(int sig, struct task_struct *p) | ||
834 | { | ||
835 | if (sigismember(&p->blocked, sig)) | ||
836 | return 0; | ||
837 | if (p->flags & PF_EXITING) | ||
838 | return 0; | ||
839 | if (sig == SIGKILL) | ||
840 | return 1; | ||
841 | if (task_is_stopped_or_traced(p)) | ||
842 | return 0; | ||
843 | return task_curr(p) || !signal_pending(p); | ||
844 | } | ||
845 | |||
846 | static void | ||
847 | __group_complete_signal(int sig, struct task_struct *p) | ||
848 | { | ||
849 | struct task_struct *t; | ||
850 | |||
851 | /* | ||
852 | * Now find a thread we can wake up to take the signal off the queue. | ||
853 | * | ||
854 | * If the main thread wants the signal, it gets first crack. | ||
855 | * Probably the least surprising to the average bear. | ||
856 | */ | ||
857 | if (wants_signal(sig, p)) | ||
858 | t = p; | ||
859 | else if (thread_group_empty(p)) | ||
860 | /* | ||
861 | * There is just one thread and it does not need to be woken. | ||
862 | * It will dequeue unblocked signals before it runs again. | ||
863 | */ | ||
864 | return; | ||
865 | else { | ||
866 | /* | ||
867 | * Otherwise try to find a suitable thread. | ||
868 | */ | ||
869 | t = p->signal->curr_target; | ||
870 | if (t == NULL) | ||
871 | /* restart balancing at this thread */ | ||
872 | t = p->signal->curr_target = p; | ||
873 | |||
874 | while (!wants_signal(sig, t)) { | ||
875 | t = next_thread(t); | ||
876 | if (t == p->signal->curr_target) | ||
877 | /* | ||
878 | * No thread needs to be woken. | ||
879 | * Any eligible threads will see | ||
880 | * the signal in the queue soon. | ||
881 | */ | ||
882 | return; | ||
883 | } | ||
884 | p->signal->curr_target = t; | ||
885 | } | ||
886 | |||
887 | /* | ||
888 | * Found a killable thread. If the signal will be fatal, | ||
889 | * then start taking the whole group down immediately. | ||
890 | */ | ||
891 | if (sig_fatal(p, sig) && !(p->signal->flags & SIGNAL_GROUP_EXIT) && | ||
892 | !sigismember(&t->real_blocked, sig) && | ||
893 | (sig == SIGKILL || !(t->ptrace & PT_PTRACED))) { | ||
894 | /* | ||
895 | * This signal will be fatal to the whole group. | ||
896 | */ | ||
897 | if (!sig_kernel_coredump(sig)) { | ||
898 | /* | ||
899 | * Start a group exit and wake everybody up. | ||
900 | * This way we don't have other threads | ||
901 | * running and doing things after a slower | ||
902 | * thread has the fatal signal pending. | ||
903 | */ | ||
904 | p->signal->flags = SIGNAL_GROUP_EXIT; | ||
905 | p->signal->group_exit_code = sig; | ||
906 | p->signal->group_stop_count = 0; | ||
907 | t = p; | ||
908 | do { | ||
909 | sigaddset(&t->pending.signal, SIGKILL); | ||
910 | signal_wake_up(t, 1); | ||
911 | } while_each_thread(p, t); | ||
912 | return; | ||
913 | } | ||
914 | } | ||
915 | |||
916 | /* | ||
917 | * The signal is already in the shared-pending queue. | ||
918 | * Tell the chosen thread to wake up and dequeue it. | ||
919 | */ | ||
920 | signal_wake_up(t, sig == SIGKILL); | ||
921 | return; | ||
922 | } | ||
923 | |||
924 | int | ||
925 | __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) | ||
926 | { | ||
927 | int ret = 0; | ||
928 | |||
929 | assert_spin_locked(&p->sighand->siglock); | ||
930 | handle_stop_signal(sig, p); | ||
931 | |||
932 | /* Short-circuit ignored signals. */ | ||
933 | if (sig_ignored(p, sig)) | ||
934 | return ret; | ||
935 | |||
936 | if (LEGACY_QUEUE(&p->signal->shared_pending, sig)) | ||
937 | /* This is a non-RT signal and we already have one queued. */ | ||
938 | return ret; | ||
939 | |||
940 | /* | ||
941 | * Put this signal on the shared-pending queue, or fail with EAGAIN. | ||
942 | * We always use the shared queue for process-wide signals, | ||
943 | * to avoid several races. | ||
944 | */ | ||
945 | ret = send_signal(sig, info, p, &p->signal->shared_pending); | ||
946 | if (unlikely(ret)) | ||
947 | return ret; | ||
948 | |||
949 | __group_complete_signal(sig, p); | ||
950 | return 0; | ||
951 | } | ||
952 | |||
953 | /* | ||
954 | * Nuke all other threads in the group. | 931 | * Nuke all other threads in the group. |
955 | */ | 932 | */ |
956 | void zap_other_threads(struct task_struct *p) | 933 | void zap_other_threads(struct task_struct *p) |
@@ -978,13 +955,11 @@ int __fatal_signal_pending(struct task_struct *tsk) | |||
978 | } | 955 | } |
979 | EXPORT_SYMBOL(__fatal_signal_pending); | 956 | EXPORT_SYMBOL(__fatal_signal_pending); |
980 | 957 | ||
981 | /* | ||
982 | * Must be called under rcu_read_lock() or with tasklist_lock read-held. | ||
983 | */ | ||
984 | struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) | 958 | struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) |
985 | { | 959 | { |
986 | struct sighand_struct *sighand; | 960 | struct sighand_struct *sighand; |
987 | 961 | ||
962 | rcu_read_lock(); | ||
988 | for (;;) { | 963 | for (;;) { |
989 | sighand = rcu_dereference(tsk->sighand); | 964 | sighand = rcu_dereference(tsk->sighand); |
990 | if (unlikely(sighand == NULL)) | 965 | if (unlikely(sighand == NULL)) |
@@ -995,6 +970,7 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long | |||
995 | break; | 970 | break; |
996 | spin_unlock_irqrestore(&sighand->siglock, *flags); | 971 | spin_unlock_irqrestore(&sighand->siglock, *flags); |
997 | } | 972 | } |
973 | rcu_read_unlock(); | ||
998 | 974 | ||
999 | return sighand; | 975 | return sighand; |
1000 | } | 976 | } |
@@ -1043,9 +1019,6 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) | |||
1043 | struct task_struct *p; | 1019 | struct task_struct *p; |
1044 | 1020 | ||
1045 | rcu_read_lock(); | 1021 | rcu_read_lock(); |
1046 | if (unlikely(sig_needs_tasklist(sig))) | ||
1047 | read_lock(&tasklist_lock); | ||
1048 | |||
1049 | retry: | 1022 | retry: |
1050 | p = pid_task(pid, PIDTYPE_PID); | 1023 | p = pid_task(pid, PIDTYPE_PID); |
1051 | if (p) { | 1024 | if (p) { |
@@ -1059,10 +1032,8 @@ retry: | |||
1059 | */ | 1032 | */ |
1060 | goto retry; | 1033 | goto retry; |
1061 | } | 1034 | } |
1062 | |||
1063 | if (unlikely(sig_needs_tasklist(sig))) | ||
1064 | read_unlock(&tasklist_lock); | ||
1065 | rcu_read_unlock(); | 1035 | rcu_read_unlock(); |
1036 | |||
1066 | return error; | 1037 | return error; |
1067 | } | 1038 | } |
1068 | 1039 | ||
@@ -1159,8 +1130,7 @@ static int kill_something_info(int sig, struct siginfo *info, int pid) | |||
1159 | */ | 1130 | */ |
1160 | 1131 | ||
1161 | /* | 1132 | /* |
1162 | * These two are the most common entry points. They send a signal | 1133 | * The caller must ensure the task can't exit. |
1163 | * just to the specific thread. | ||
1164 | */ | 1134 | */ |
1165 | int | 1135 | int |
1166 | send_sig_info(int sig, struct siginfo *info, struct task_struct *p) | 1136 | send_sig_info(int sig, struct siginfo *info, struct task_struct *p) |
@@ -1175,17 +1145,9 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p) | |||
1175 | if (!valid_signal(sig)) | 1145 | if (!valid_signal(sig)) |
1176 | return -EINVAL; | 1146 | return -EINVAL; |
1177 | 1147 | ||
1178 | /* | ||
1179 | * We need the tasklist lock even for the specific | ||
1180 | * thread case (when we don't need to follow the group | ||
1181 | * lists) in order to avoid races with "p->sighand" | ||
1182 | * going away or changing from under us. | ||
1183 | */ | ||
1184 | read_lock(&tasklist_lock); | ||
1185 | spin_lock_irqsave(&p->sighand->siglock, flags); | 1148 | spin_lock_irqsave(&p->sighand->siglock, flags); |
1186 | ret = specific_send_sig_info(sig, info, p); | 1149 | ret = specific_send_sig_info(sig, info, p); |
1187 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 1150 | spin_unlock_irqrestore(&p->sighand->siglock, flags); |
1188 | read_unlock(&tasklist_lock); | ||
1189 | return ret; | 1151 | return ret; |
1190 | } | 1152 | } |
1191 | 1153 | ||
@@ -1291,28 +1253,24 @@ void sigqueue_free(struct sigqueue *q) | |||
1291 | __sigqueue_free(q); | 1253 | __sigqueue_free(q); |
1292 | } | 1254 | } |
1293 | 1255 | ||
1294 | int send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | 1256 | int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) |
1295 | { | 1257 | { |
1258 | int sig = q->info.si_signo; | ||
1259 | struct sigpending *pending; | ||
1296 | unsigned long flags; | 1260 | unsigned long flags; |
1297 | int ret = 0; | 1261 | int ret; |
1298 | 1262 | ||
1299 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | 1263 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); |
1300 | 1264 | ||
1301 | /* | 1265 | ret = -1; |
1302 | * The rcu based delayed sighand destroy makes it possible to | 1266 | if (!likely(lock_task_sighand(t, &flags))) |
1303 | * run this without tasklist lock held. The task struct itself | 1267 | goto ret; |
1304 | * cannot go away as create_timer did get_task_struct(). | ||
1305 | * | ||
1306 | * We return -1, when the task is marked exiting, so | ||
1307 | * posix_timer_event can redirect it to the group leader | ||
1308 | */ | ||
1309 | rcu_read_lock(); | ||
1310 | 1268 | ||
1311 | if (!likely(lock_task_sighand(p, &flags))) { | 1269 | ret = 1; /* the signal is ignored */ |
1312 | ret = -1; | 1270 | if (!prepare_signal(sig, t)) |
1313 | goto out_err; | 1271 | goto out; |
1314 | } | ||
1315 | 1272 | ||
1273 | ret = 0; | ||
1316 | if (unlikely(!list_empty(&q->list))) { | 1274 | if (unlikely(!list_empty(&q->list))) { |
1317 | /* | 1275 | /* |
1318 | * If an SI_TIMER entry is already queue just increment | 1276 | * If an SI_TIMER entry is already queue just increment |
@@ -1322,77 +1280,15 @@ int send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1322 | q->info.si_overrun++; | 1280 | q->info.si_overrun++; |
1323 | goto out; | 1281 | goto out; |
1324 | } | 1282 | } |
1325 | /* Short-circuit ignored signals. */ | ||
1326 | if (sig_ignored(p, sig)) { | ||
1327 | ret = 1; | ||
1328 | goto out; | ||
1329 | } | ||
1330 | /* | ||
1331 | * Deliver the signal to listening signalfds. This must be called | ||
1332 | * with the sighand lock held. | ||
1333 | */ | ||
1334 | signalfd_notify(p, sig); | ||
1335 | |||
1336 | list_add_tail(&q->list, &p->pending.list); | ||
1337 | sigaddset(&p->pending.signal, sig); | ||
1338 | if (!sigismember(&p->blocked, sig)) | ||
1339 | signal_wake_up(p, sig == SIGKILL); | ||
1340 | |||
1341 | out: | ||
1342 | unlock_task_sighand(p, &flags); | ||
1343 | out_err: | ||
1344 | rcu_read_unlock(); | ||
1345 | |||
1346 | return ret; | ||
1347 | } | ||
1348 | |||
1349 | int | ||
1350 | send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | ||
1351 | { | ||
1352 | unsigned long flags; | ||
1353 | int ret = 0; | ||
1354 | |||
1355 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | ||
1356 | |||
1357 | read_lock(&tasklist_lock); | ||
1358 | /* Since it_lock is held, p->sighand cannot be NULL. */ | ||
1359 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
1360 | handle_stop_signal(sig, p); | ||
1361 | |||
1362 | /* Short-circuit ignored signals. */ | ||
1363 | if (sig_ignored(p, sig)) { | ||
1364 | ret = 1; | ||
1365 | goto out; | ||
1366 | } | ||
1367 | 1283 | ||
1368 | if (unlikely(!list_empty(&q->list))) { | 1284 | signalfd_notify(t, sig); |
1369 | /* | 1285 | pending = group ? &t->signal->shared_pending : &t->pending; |
1370 | * If an SI_TIMER entry is already queue just increment | 1286 | list_add_tail(&q->list, &pending->list); |
1371 | * the overrun count. Other uses should not try to | 1287 | sigaddset(&pending->signal, sig); |
1372 | * send the signal multiple times. | 1288 | complete_signal(sig, t, group); |
1373 | */ | ||
1374 | BUG_ON(q->info.si_code != SI_TIMER); | ||
1375 | q->info.si_overrun++; | ||
1376 | goto out; | ||
1377 | } | ||
1378 | /* | ||
1379 | * Deliver the signal to listening signalfds. This must be called | ||
1380 | * with the sighand lock held. | ||
1381 | */ | ||
1382 | signalfd_notify(p, sig); | ||
1383 | |||
1384 | /* | ||
1385 | * Put this signal on the shared-pending queue. | ||
1386 | * We always use the shared queue for process-wide signals, | ||
1387 | * to avoid several races. | ||
1388 | */ | ||
1389 | list_add_tail(&q->list, &p->signal->shared_pending.list); | ||
1390 | sigaddset(&p->signal->shared_pending.signal, sig); | ||
1391 | |||
1392 | __group_complete_signal(sig, p); | ||
1393 | out: | 1289 | out: |
1394 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 1290 | unlock_task_sighand(t, &flags); |
1395 | read_unlock(&tasklist_lock); | 1291 | ret: |
1396 | return ret; | 1292 | return ret; |
1397 | } | 1293 | } |
1398 | 1294 | ||
@@ -1723,8 +1619,9 @@ static int do_signal_stop(int signr) | |||
1723 | } else { | 1619 | } else { |
1724 | struct task_struct *t; | 1620 | struct task_struct *t; |
1725 | 1621 | ||
1726 | if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || | 1622 | if (unlikely((sig->flags & (SIGNAL_STOP_DEQUEUED | SIGNAL_UNKILLABLE)) |
1727 | unlikely(sig->group_exit_task)) | 1623 | != SIGNAL_STOP_DEQUEUED) || |
1624 | unlikely(signal_group_exit(sig))) | ||
1728 | return 0; | 1625 | return 0; |
1729 | /* | 1626 | /* |
1730 | * There is no group stop already in progress. | 1627 | * There is no group stop already in progress. |
@@ -1799,8 +1696,9 @@ static int ptrace_signal(int signr, siginfo_t *info, | |||
1799 | int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, | 1696 | int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, |
1800 | struct pt_regs *regs, void *cookie) | 1697 | struct pt_regs *regs, void *cookie) |
1801 | { | 1698 | { |
1802 | sigset_t *mask = ¤t->blocked; | 1699 | struct sighand_struct *sighand = current->sighand; |
1803 | int signr = 0; | 1700 | struct signal_struct *signal = current->signal; |
1701 | int signr; | ||
1804 | 1702 | ||
1805 | relock: | 1703 | relock: |
1806 | /* | 1704 | /* |
@@ -1811,16 +1709,32 @@ relock: | |||
1811 | */ | 1709 | */ |
1812 | try_to_freeze(); | 1710 | try_to_freeze(); |
1813 | 1711 | ||
1814 | spin_lock_irq(¤t->sighand->siglock); | 1712 | spin_lock_irq(&sighand->siglock); |
1713 | /* | ||
1714 | * Every stopped thread goes here after wakeup. Check to see if | ||
1715 | * we should notify the parent, prepare_signal(SIGCONT) encodes | ||
1716 | * the CLD_ si_code into SIGNAL_CLD_MASK bits. | ||
1717 | */ | ||
1718 | if (unlikely(signal->flags & SIGNAL_CLD_MASK)) { | ||
1719 | int why = (signal->flags & SIGNAL_STOP_CONTINUED) | ||
1720 | ? CLD_CONTINUED : CLD_STOPPED; | ||
1721 | signal->flags &= ~SIGNAL_CLD_MASK; | ||
1722 | spin_unlock_irq(&sighand->siglock); | ||
1723 | |||
1724 | read_lock(&tasklist_lock); | ||
1725 | do_notify_parent_cldstop(current->group_leader, why); | ||
1726 | read_unlock(&tasklist_lock); | ||
1727 | goto relock; | ||
1728 | } | ||
1729 | |||
1815 | for (;;) { | 1730 | for (;;) { |
1816 | struct k_sigaction *ka; | 1731 | struct k_sigaction *ka; |
1817 | 1732 | ||
1818 | if (unlikely(current->signal->group_stop_count > 0) && | 1733 | if (unlikely(signal->group_stop_count > 0) && |
1819 | do_signal_stop(0)) | 1734 | do_signal_stop(0)) |
1820 | goto relock; | 1735 | goto relock; |
1821 | 1736 | ||
1822 | signr = dequeue_signal(current, mask, info); | 1737 | signr = dequeue_signal(current, ¤t->blocked, info); |
1823 | |||
1824 | if (!signr) | 1738 | if (!signr) |
1825 | break; /* will return 0 */ | 1739 | break; /* will return 0 */ |
1826 | 1740 | ||
@@ -1830,7 +1744,7 @@ relock: | |||
1830 | continue; | 1744 | continue; |
1831 | } | 1745 | } |
1832 | 1746 | ||
1833 | ka = ¤t->sighand->action[signr-1]; | 1747 | ka = &sighand->action[signr-1]; |
1834 | if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ | 1748 | if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ |
1835 | continue; | 1749 | continue; |
1836 | if (ka->sa.sa_handler != SIG_DFL) { | 1750 | if (ka->sa.sa_handler != SIG_DFL) { |
@@ -1852,7 +1766,8 @@ relock: | |||
1852 | /* | 1766 | /* |
1853 | * Global init gets no signals it doesn't want. | 1767 | * Global init gets no signals it doesn't want. |
1854 | */ | 1768 | */ |
1855 | if (is_global_init(current)) | 1769 | if (unlikely(signal->flags & SIGNAL_UNKILLABLE) && |
1770 | !signal_group_exit(signal)) | ||
1856 | continue; | 1771 | continue; |
1857 | 1772 | ||
1858 | if (sig_kernel_stop(signr)) { | 1773 | if (sig_kernel_stop(signr)) { |
@@ -1867,14 +1782,14 @@ relock: | |||
1867 | * We need to check for that and bail out if necessary. | 1782 | * We need to check for that and bail out if necessary. |
1868 | */ | 1783 | */ |
1869 | if (signr != SIGSTOP) { | 1784 | if (signr != SIGSTOP) { |
1870 | spin_unlock_irq(¤t->sighand->siglock); | 1785 | spin_unlock_irq(&sighand->siglock); |
1871 | 1786 | ||
1872 | /* signals can be posted during this window */ | 1787 | /* signals can be posted during this window */ |
1873 | 1788 | ||
1874 | if (is_current_pgrp_orphaned()) | 1789 | if (is_current_pgrp_orphaned()) |
1875 | goto relock; | 1790 | goto relock; |
1876 | 1791 | ||
1877 | spin_lock_irq(¤t->sighand->siglock); | 1792 | spin_lock_irq(&sighand->siglock); |
1878 | } | 1793 | } |
1879 | 1794 | ||
1880 | if (likely(do_signal_stop(signr))) { | 1795 | if (likely(do_signal_stop(signr))) { |
@@ -1889,15 +1804,16 @@ relock: | |||
1889 | continue; | 1804 | continue; |
1890 | } | 1805 | } |
1891 | 1806 | ||
1892 | spin_unlock_irq(¤t->sighand->siglock); | 1807 | spin_unlock_irq(&sighand->siglock); |
1893 | 1808 | ||
1894 | /* | 1809 | /* |
1895 | * Anything else is fatal, maybe with a core dump. | 1810 | * Anything else is fatal, maybe with a core dump. |
1896 | */ | 1811 | */ |
1897 | current->flags |= PF_SIGNALED; | 1812 | current->flags |= PF_SIGNALED; |
1898 | if ((signr != SIGKILL) && print_fatal_signals) | 1813 | |
1899 | print_fatal_signal(regs, signr); | ||
1900 | if (sig_kernel_coredump(signr)) { | 1814 | if (sig_kernel_coredump(signr)) { |
1815 | if (print_fatal_signals) | ||
1816 | print_fatal_signal(regs, signr); | ||
1901 | /* | 1817 | /* |
1902 | * If it was able to dump core, this kills all | 1818 | * If it was able to dump core, this kills all |
1903 | * other threads in the group and synchronizes with | 1819 | * other threads in the group and synchronizes with |
@@ -1915,7 +1831,7 @@ relock: | |||
1915 | do_group_exit(signr); | 1831 | do_group_exit(signr); |
1916 | /* NOTREACHED */ | 1832 | /* NOTREACHED */ |
1917 | } | 1833 | } |
1918 | spin_unlock_irq(¤t->sighand->siglock); | 1834 | spin_unlock_irq(&sighand->siglock); |
1919 | return signr; | 1835 | return signr; |
1920 | } | 1836 | } |
1921 | 1837 | ||
@@ -2259,6 +2175,7 @@ static int do_tkill(int tgid, int pid, int sig) | |||
2259 | int error; | 2175 | int error; |
2260 | struct siginfo info; | 2176 | struct siginfo info; |
2261 | struct task_struct *p; | 2177 | struct task_struct *p; |
2178 | unsigned long flags; | ||
2262 | 2179 | ||
2263 | error = -ESRCH; | 2180 | error = -ESRCH; |
2264 | info.si_signo = sig; | 2181 | info.si_signo = sig; |
@@ -2267,22 +2184,24 @@ static int do_tkill(int tgid, int pid, int sig) | |||
2267 | info.si_pid = task_tgid_vnr(current); | 2184 | info.si_pid = task_tgid_vnr(current); |
2268 | info.si_uid = current->uid; | 2185 | info.si_uid = current->uid; |
2269 | 2186 | ||
2270 | read_lock(&tasklist_lock); | 2187 | rcu_read_lock(); |
2271 | p = find_task_by_vpid(pid); | 2188 | p = find_task_by_vpid(pid); |
2272 | if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) { | 2189 | if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) { |
2273 | error = check_kill_permission(sig, &info, p); | 2190 | error = check_kill_permission(sig, &info, p); |
2274 | /* | 2191 | /* |
2275 | * The null signal is a permissions and process existence | 2192 | * The null signal is a permissions and process existence |
2276 | * probe. No signal is actually delivered. | 2193 | * probe. No signal is actually delivered. |
2194 | * | ||
2195 | * If lock_task_sighand() fails we pretend the task dies | ||
2196 | * after receiving the signal. The window is tiny, and the | ||
2197 | * signal is private anyway. | ||
2277 | */ | 2198 | */ |
2278 | if (!error && sig && p->sighand) { | 2199 | if (!error && sig && lock_task_sighand(p, &flags)) { |
2279 | spin_lock_irq(&p->sighand->siglock); | ||
2280 | handle_stop_signal(sig, p); | ||
2281 | error = specific_send_sig_info(sig, &info, p); | 2200 | error = specific_send_sig_info(sig, &info, p); |
2282 | spin_unlock_irq(&p->sighand->siglock); | 2201 | unlock_task_sighand(p, &flags); |
2283 | } | 2202 | } |
2284 | } | 2203 | } |
2285 | read_unlock(&tasklist_lock); | 2204 | rcu_read_unlock(); |
2286 | 2205 | ||
2287 | return error; | 2206 | return error; |
2288 | } | 2207 | } |
@@ -2339,13 +2258,14 @@ sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo) | |||
2339 | 2258 | ||
2340 | int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) | 2259 | int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) |
2341 | { | 2260 | { |
2261 | struct task_struct *t = current; | ||
2342 | struct k_sigaction *k; | 2262 | struct k_sigaction *k; |
2343 | sigset_t mask; | 2263 | sigset_t mask; |
2344 | 2264 | ||
2345 | if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig))) | 2265 | if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig))) |
2346 | return -EINVAL; | 2266 | return -EINVAL; |
2347 | 2267 | ||
2348 | k = ¤t->sighand->action[sig-1]; | 2268 | k = &t->sighand->action[sig-1]; |
2349 | 2269 | ||
2350 | spin_lock_irq(¤t->sighand->siglock); | 2270 | spin_lock_irq(¤t->sighand->siglock); |
2351 | if (oact) | 2271 | if (oact) |
@@ -2366,9 +2286,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) | |||
2366 | * (for example, SIGCHLD), shall cause the pending signal to | 2286 | * (for example, SIGCHLD), shall cause the pending signal to |
2367 | * be discarded, whether or not it is blocked" | 2287 | * be discarded, whether or not it is blocked" |
2368 | */ | 2288 | */ |
2369 | if (act->sa.sa_handler == SIG_IGN || | 2289 | if (__sig_ignored(t, sig)) { |
2370 | (act->sa.sa_handler == SIG_DFL && sig_kernel_ignore(sig))) { | ||
2371 | struct task_struct *t = current; | ||
2372 | sigemptyset(&mask); | 2290 | sigemptyset(&mask); |
2373 | sigaddset(&mask, sig); | 2291 | sigaddset(&mask, sig); |
2374 | rm_from_queue_full(&mask, &t->signal->shared_pending); | 2292 | rm_from_queue_full(&mask, &t->signal->shared_pending); |
@@ -2623,7 +2541,7 @@ asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) | |||
2623 | 2541 | ||
2624 | current->state = TASK_INTERRUPTIBLE; | 2542 | current->state = TASK_INTERRUPTIBLE; |
2625 | schedule(); | 2543 | schedule(); |
2626 | set_thread_flag(TIF_RESTORE_SIGMASK); | 2544 | set_restore_sigmask(); |
2627 | return -ERESTARTNOHAND; | 2545 | return -ERESTARTNOHAND; |
2628 | } | 2546 | } |
2629 | #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */ | 2547 | #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */ |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 3c44956ee7e2..36e061740047 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -589,16 +589,20 @@ static void takeover_tasklets(unsigned int cpu) | |||
589 | local_irq_disable(); | 589 | local_irq_disable(); |
590 | 590 | ||
591 | /* Find end, append list for that CPU. */ | 591 | /* Find end, append list for that CPU. */ |
592 | *__get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).head; | 592 | if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) { |
593 | __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail; | 593 | *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head; |
594 | per_cpu(tasklet_vec, cpu).head = NULL; | 594 | __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail; |
595 | per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; | 595 | per_cpu(tasklet_vec, cpu).head = NULL; |
596 | per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; | ||
597 | } | ||
596 | raise_softirq_irqoff(TASKLET_SOFTIRQ); | 598 | raise_softirq_irqoff(TASKLET_SOFTIRQ); |
597 | 599 | ||
598 | *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head; | 600 | if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) { |
599 | __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail; | 601 | *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head; |
600 | per_cpu(tasklet_hi_vec, cpu).head = NULL; | 602 | __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail; |
601 | per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; | 603 | per_cpu(tasklet_hi_vec, cpu).head = NULL; |
604 | per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; | ||
605 | } | ||
602 | raise_softirq_irqoff(HI_SOFTIRQ); | 606 | raise_softirq_irqoff(HI_SOFTIRQ); |
603 | 607 | ||
604 | local_irq_enable(); | 608 | local_irq_enable(); |
diff --git a/kernel/sys.c b/kernel/sys.c index f2a451366953..895d2d4c9493 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -978,8 +978,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) | |||
978 | goto out; | 978 | goto out; |
979 | 979 | ||
980 | if (task_pgrp(p) != pgrp) { | 980 | if (task_pgrp(p) != pgrp) { |
981 | detach_pid(p, PIDTYPE_PGID); | 981 | change_pid(p, PIDTYPE_PGID, pgrp); |
982 | attach_pid(p, PIDTYPE_PGID, pgrp); | ||
983 | set_task_pgrp(p, pid_nr(pgrp)); | 982 | set_task_pgrp(p, pid_nr(pgrp)); |
984 | } | 983 | } |
985 | 984 | ||
@@ -992,54 +991,67 @@ out: | |||
992 | 991 | ||
993 | asmlinkage long sys_getpgid(pid_t pid) | 992 | asmlinkage long sys_getpgid(pid_t pid) |
994 | { | 993 | { |
994 | struct task_struct *p; | ||
995 | struct pid *grp; | ||
996 | int retval; | ||
997 | |||
998 | rcu_read_lock(); | ||
995 | if (!pid) | 999 | if (!pid) |
996 | return task_pgrp_vnr(current); | 1000 | grp = task_pgrp(current); |
997 | else { | 1001 | else { |
998 | int retval; | ||
999 | struct task_struct *p; | ||
1000 | |||
1001 | read_lock(&tasklist_lock); | ||
1002 | p = find_task_by_vpid(pid); | ||
1003 | retval = -ESRCH; | 1002 | retval = -ESRCH; |
1004 | if (p) { | 1003 | p = find_task_by_vpid(pid); |
1005 | retval = security_task_getpgid(p); | 1004 | if (!p) |
1006 | if (!retval) | 1005 | goto out; |
1007 | retval = task_pgrp_vnr(p); | 1006 | grp = task_pgrp(p); |
1008 | } | 1007 | if (!grp) |
1009 | read_unlock(&tasklist_lock); | 1008 | goto out; |
1010 | return retval; | 1009 | |
1010 | retval = security_task_getpgid(p); | ||
1011 | if (retval) | ||
1012 | goto out; | ||
1011 | } | 1013 | } |
1014 | retval = pid_vnr(grp); | ||
1015 | out: | ||
1016 | rcu_read_unlock(); | ||
1017 | return retval; | ||
1012 | } | 1018 | } |
1013 | 1019 | ||
1014 | #ifdef __ARCH_WANT_SYS_GETPGRP | 1020 | #ifdef __ARCH_WANT_SYS_GETPGRP |
1015 | 1021 | ||
1016 | asmlinkage long sys_getpgrp(void) | 1022 | asmlinkage long sys_getpgrp(void) |
1017 | { | 1023 | { |
1018 | /* SMP - assuming writes are word atomic this is fine */ | 1024 | return sys_getpgid(0); |
1019 | return task_pgrp_vnr(current); | ||
1020 | } | 1025 | } |
1021 | 1026 | ||
1022 | #endif | 1027 | #endif |
1023 | 1028 | ||
1024 | asmlinkage long sys_getsid(pid_t pid) | 1029 | asmlinkage long sys_getsid(pid_t pid) |
1025 | { | 1030 | { |
1031 | struct task_struct *p; | ||
1032 | struct pid *sid; | ||
1033 | int retval; | ||
1034 | |||
1035 | rcu_read_lock(); | ||
1026 | if (!pid) | 1036 | if (!pid) |
1027 | return task_session_vnr(current); | 1037 | sid = task_session(current); |
1028 | else { | 1038 | else { |
1029 | int retval; | ||
1030 | struct task_struct *p; | ||
1031 | |||
1032 | rcu_read_lock(); | ||
1033 | p = find_task_by_vpid(pid); | ||
1034 | retval = -ESRCH; | 1039 | retval = -ESRCH; |
1035 | if (p) { | 1040 | p = find_task_by_vpid(pid); |
1036 | retval = security_task_getsid(p); | 1041 | if (!p) |
1037 | if (!retval) | 1042 | goto out; |
1038 | retval = task_session_vnr(p); | 1043 | sid = task_session(p); |
1039 | } | 1044 | if (!sid) |
1040 | rcu_read_unlock(); | 1045 | goto out; |
1041 | return retval; | 1046 | |
1047 | retval = security_task_getsid(p); | ||
1048 | if (retval) | ||
1049 | goto out; | ||
1042 | } | 1050 | } |
1051 | retval = pid_vnr(sid); | ||
1052 | out: | ||
1053 | rcu_read_unlock(); | ||
1054 | return retval; | ||
1043 | } | 1055 | } |
1044 | 1056 | ||
1045 | asmlinkage long sys_setsid(void) | 1057 | asmlinkage long sys_setsid(void) |
@@ -1545,6 +1557,19 @@ out: | |||
1545 | * | 1557 | * |
1546 | */ | 1558 | */ |
1547 | 1559 | ||
1560 | static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r, | ||
1561 | cputime_t *utimep, cputime_t *stimep) | ||
1562 | { | ||
1563 | *utimep = cputime_add(*utimep, t->utime); | ||
1564 | *stimep = cputime_add(*stimep, t->stime); | ||
1565 | r->ru_nvcsw += t->nvcsw; | ||
1566 | r->ru_nivcsw += t->nivcsw; | ||
1567 | r->ru_minflt += t->min_flt; | ||
1568 | r->ru_majflt += t->maj_flt; | ||
1569 | r->ru_inblock += task_io_get_inblock(t); | ||
1570 | r->ru_oublock += task_io_get_oublock(t); | ||
1571 | } | ||
1572 | |||
1548 | static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | 1573 | static void k_getrusage(struct task_struct *p, int who, struct rusage *r) |
1549 | { | 1574 | { |
1550 | struct task_struct *t; | 1575 | struct task_struct *t; |
@@ -1554,12 +1579,14 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1554 | memset((char *) r, 0, sizeof *r); | 1579 | memset((char *) r, 0, sizeof *r); |
1555 | utime = stime = cputime_zero; | 1580 | utime = stime = cputime_zero; |
1556 | 1581 | ||
1557 | rcu_read_lock(); | 1582 | if (who == RUSAGE_THREAD) { |
1558 | if (!lock_task_sighand(p, &flags)) { | 1583 | accumulate_thread_rusage(p, r, &utime, &stime); |
1559 | rcu_read_unlock(); | 1584 | goto out; |
1560 | return; | ||
1561 | } | 1585 | } |
1562 | 1586 | ||
1587 | if (!lock_task_sighand(p, &flags)) | ||
1588 | return; | ||
1589 | |||
1563 | switch (who) { | 1590 | switch (who) { |
1564 | case RUSAGE_BOTH: | 1591 | case RUSAGE_BOTH: |
1565 | case RUSAGE_CHILDREN: | 1592 | case RUSAGE_CHILDREN: |
@@ -1586,14 +1613,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1586 | r->ru_oublock += p->signal->oublock; | 1613 | r->ru_oublock += p->signal->oublock; |
1587 | t = p; | 1614 | t = p; |
1588 | do { | 1615 | do { |
1589 | utime = cputime_add(utime, t->utime); | 1616 | accumulate_thread_rusage(t, r, &utime, &stime); |
1590 | stime = cputime_add(stime, t->stime); | ||
1591 | r->ru_nvcsw += t->nvcsw; | ||
1592 | r->ru_nivcsw += t->nivcsw; | ||
1593 | r->ru_minflt += t->min_flt; | ||
1594 | r->ru_majflt += t->maj_flt; | ||
1595 | r->ru_inblock += task_io_get_inblock(t); | ||
1596 | r->ru_oublock += task_io_get_oublock(t); | ||
1597 | t = next_thread(t); | 1617 | t = next_thread(t); |
1598 | } while (t != p); | 1618 | } while (t != p); |
1599 | break; | 1619 | break; |
@@ -1601,10 +1621,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1601 | default: | 1621 | default: |
1602 | BUG(); | 1622 | BUG(); |
1603 | } | 1623 | } |
1604 | |||
1605 | unlock_task_sighand(p, &flags); | 1624 | unlock_task_sighand(p, &flags); |
1606 | rcu_read_unlock(); | ||
1607 | 1625 | ||
1626 | out: | ||
1608 | cputime_to_timeval(utime, &r->ru_utime); | 1627 | cputime_to_timeval(utime, &r->ru_utime); |
1609 | cputime_to_timeval(stime, &r->ru_stime); | 1628 | cputime_to_timeval(stime, &r->ru_stime); |
1610 | } | 1629 | } |
@@ -1618,7 +1637,8 @@ int getrusage(struct task_struct *p, int who, struct rusage __user *ru) | |||
1618 | 1637 | ||
1619 | asmlinkage long sys_getrusage(int who, struct rusage __user *ru) | 1638 | asmlinkage long sys_getrusage(int who, struct rusage __user *ru) |
1620 | { | 1639 | { |
1621 | if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) | 1640 | if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && |
1641 | who != RUSAGE_THREAD) | ||
1622 | return -EINVAL; | 1642 | return -EINVAL; |
1623 | return getrusage(current, who, ru); | 1643 | return getrusage(current, who, ru); |
1624 | } | 1644 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fd3364827ccf..d7ffdc59816a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/writeback.h> | 38 | #include <linux/writeback.h> |
39 | #include <linux/hugetlb.h> | 39 | #include <linux/hugetlb.h> |
40 | #include <linux/initrd.h> | 40 | #include <linux/initrd.h> |
41 | #include <linux/key.h> | ||
41 | #include <linux/times.h> | 42 | #include <linux/times.h> |
42 | #include <linux/limits.h> | 43 | #include <linux/limits.h> |
43 | #include <linux/dcache.h> | 44 | #include <linux/dcache.h> |
@@ -144,12 +145,6 @@ extern int no_unaligned_warning; | |||
144 | extern int max_lock_depth; | 145 | extern int max_lock_depth; |
145 | #endif | 146 | #endif |
146 | 147 | ||
147 | #ifdef CONFIG_SYSCTL_SYSCALL | ||
148 | static int parse_table(int __user *, int, void __user *, size_t __user *, | ||
149 | void __user *, size_t, struct ctl_table *); | ||
150 | #endif | ||
151 | |||
152 | |||
153 | #ifdef CONFIG_PROC_SYSCTL | 148 | #ifdef CONFIG_PROC_SYSCTL |
154 | static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, | 149 | static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, |
155 | void __user *buffer, size_t *lenp, loff_t *ppos); | 150 | void __user *buffer, size_t *lenp, loff_t *ppos); |
@@ -809,6 +804,14 @@ static struct ctl_table kern_table[] = { | |||
809 | .proc_handler = &proc_dostring, | 804 | .proc_handler = &proc_dostring, |
810 | .strategy = &sysctl_string, | 805 | .strategy = &sysctl_string, |
811 | }, | 806 | }, |
807 | #ifdef CONFIG_KEYS | ||
808 | { | ||
809 | .ctl_name = CTL_UNNUMBERED, | ||
810 | .procname = "keys", | ||
811 | .mode = 0555, | ||
812 | .child = key_sysctls, | ||
813 | }, | ||
814 | #endif | ||
812 | /* | 815 | /* |
813 | * NOTE: do not add new entries to this table unless you have read | 816 | * NOTE: do not add new entries to this table unless you have read |
814 | * Documentation/sysctl/ctl_unnumbered.txt | 817 | * Documentation/sysctl/ctl_unnumbered.txt |
@@ -1430,6 +1433,76 @@ void register_sysctl_root(struct ctl_table_root *root) | |||
1430 | } | 1433 | } |
1431 | 1434 | ||
1432 | #ifdef CONFIG_SYSCTL_SYSCALL | 1435 | #ifdef CONFIG_SYSCTL_SYSCALL |
1436 | /* Perform the actual read/write of a sysctl table entry. */ | ||
1437 | static int do_sysctl_strategy(struct ctl_table_root *root, | ||
1438 | struct ctl_table *table, | ||
1439 | int __user *name, int nlen, | ||
1440 | void __user *oldval, size_t __user *oldlenp, | ||
1441 | void __user *newval, size_t newlen) | ||
1442 | { | ||
1443 | int op = 0, rc; | ||
1444 | |||
1445 | if (oldval) | ||
1446 | op |= 004; | ||
1447 | if (newval) | ||
1448 | op |= 002; | ||
1449 | if (sysctl_perm(root, table, op)) | ||
1450 | return -EPERM; | ||
1451 | |||
1452 | if (table->strategy) { | ||
1453 | rc = table->strategy(table, name, nlen, oldval, oldlenp, | ||
1454 | newval, newlen); | ||
1455 | if (rc < 0) | ||
1456 | return rc; | ||
1457 | if (rc > 0) | ||
1458 | return 0; | ||
1459 | } | ||
1460 | |||
1461 | /* If there is no strategy routine, or if the strategy returns | ||
1462 | * zero, proceed with automatic r/w */ | ||
1463 | if (table->data && table->maxlen) { | ||
1464 | rc = sysctl_data(table, name, nlen, oldval, oldlenp, | ||
1465 | newval, newlen); | ||
1466 | if (rc < 0) | ||
1467 | return rc; | ||
1468 | } | ||
1469 | return 0; | ||
1470 | } | ||
1471 | |||
1472 | static int parse_table(int __user *name, int nlen, | ||
1473 | void __user *oldval, size_t __user *oldlenp, | ||
1474 | void __user *newval, size_t newlen, | ||
1475 | struct ctl_table_root *root, | ||
1476 | struct ctl_table *table) | ||
1477 | { | ||
1478 | int n; | ||
1479 | repeat: | ||
1480 | if (!nlen) | ||
1481 | return -ENOTDIR; | ||
1482 | if (get_user(n, name)) | ||
1483 | return -EFAULT; | ||
1484 | for ( ; table->ctl_name || table->procname; table++) { | ||
1485 | if (!table->ctl_name) | ||
1486 | continue; | ||
1487 | if (n == table->ctl_name) { | ||
1488 | int error; | ||
1489 | if (table->child) { | ||
1490 | if (sysctl_perm(root, table, 001)) | ||
1491 | return -EPERM; | ||
1492 | name++; | ||
1493 | nlen--; | ||
1494 | table = table->child; | ||
1495 | goto repeat; | ||
1496 | } | ||
1497 | error = do_sysctl_strategy(root, table, name, nlen, | ||
1498 | oldval, oldlenp, | ||
1499 | newval, newlen); | ||
1500 | return error; | ||
1501 | } | ||
1502 | } | ||
1503 | return -ENOTDIR; | ||
1504 | } | ||
1505 | |||
1433 | int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, | 1506 | int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, |
1434 | void __user *newval, size_t newlen) | 1507 | void __user *newval, size_t newlen) |
1435 | { | 1508 | { |
@@ -1447,7 +1520,8 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol | |||
1447 | for (head = sysctl_head_next(NULL); head; | 1520 | for (head = sysctl_head_next(NULL); head; |
1448 | head = sysctl_head_next(head)) { | 1521 | head = sysctl_head_next(head)) { |
1449 | error = parse_table(name, nlen, oldval, oldlenp, | 1522 | error = parse_table(name, nlen, oldval, oldlenp, |
1450 | newval, newlen, head->ctl_table); | 1523 | newval, newlen, |
1524 | head->root, head->ctl_table); | ||
1451 | if (error != -ENOTDIR) { | 1525 | if (error != -ENOTDIR) { |
1452 | sysctl_head_finish(head); | 1526 | sysctl_head_finish(head); |
1453 | break; | 1527 | break; |
@@ -1493,84 +1567,22 @@ static int test_perm(int mode, int op) | |||
1493 | return -EACCES; | 1567 | return -EACCES; |
1494 | } | 1568 | } |
1495 | 1569 | ||
1496 | int sysctl_perm(struct ctl_table *table, int op) | 1570 | int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) |
1497 | { | 1571 | { |
1498 | int error; | 1572 | int error; |
1573 | int mode; | ||
1574 | |||
1499 | error = security_sysctl(table, op); | 1575 | error = security_sysctl(table, op); |
1500 | if (error) | 1576 | if (error) |
1501 | return error; | 1577 | return error; |
1502 | return test_perm(table->mode, op); | ||
1503 | } | ||
1504 | |||
1505 | #ifdef CONFIG_SYSCTL_SYSCALL | ||
1506 | static int parse_table(int __user *name, int nlen, | ||
1507 | void __user *oldval, size_t __user *oldlenp, | ||
1508 | void __user *newval, size_t newlen, | ||
1509 | struct ctl_table *table) | ||
1510 | { | ||
1511 | int n; | ||
1512 | repeat: | ||
1513 | if (!nlen) | ||
1514 | return -ENOTDIR; | ||
1515 | if (get_user(n, name)) | ||
1516 | return -EFAULT; | ||
1517 | for ( ; table->ctl_name || table->procname; table++) { | ||
1518 | if (!table->ctl_name) | ||
1519 | continue; | ||
1520 | if (n == table->ctl_name) { | ||
1521 | int error; | ||
1522 | if (table->child) { | ||
1523 | if (sysctl_perm(table, 001)) | ||
1524 | return -EPERM; | ||
1525 | name++; | ||
1526 | nlen--; | ||
1527 | table = table->child; | ||
1528 | goto repeat; | ||
1529 | } | ||
1530 | error = do_sysctl_strategy(table, name, nlen, | ||
1531 | oldval, oldlenp, | ||
1532 | newval, newlen); | ||
1533 | return error; | ||
1534 | } | ||
1535 | } | ||
1536 | return -ENOTDIR; | ||
1537 | } | ||
1538 | 1578 | ||
1539 | /* Perform the actual read/write of a sysctl table entry. */ | 1579 | if (root->permissions) |
1540 | int do_sysctl_strategy (struct ctl_table *table, | 1580 | mode = root->permissions(root, current->nsproxy, table); |
1541 | int __user *name, int nlen, | 1581 | else |
1542 | void __user *oldval, size_t __user *oldlenp, | 1582 | mode = table->mode; |
1543 | void __user *newval, size_t newlen) | ||
1544 | { | ||
1545 | int op = 0, rc; | ||
1546 | |||
1547 | if (oldval) | ||
1548 | op |= 004; | ||
1549 | if (newval) | ||
1550 | op |= 002; | ||
1551 | if (sysctl_perm(table, op)) | ||
1552 | return -EPERM; | ||
1553 | 1583 | ||
1554 | if (table->strategy) { | 1584 | return test_perm(mode, op); |
1555 | rc = table->strategy(table, name, nlen, oldval, oldlenp, | ||
1556 | newval, newlen); | ||
1557 | if (rc < 0) | ||
1558 | return rc; | ||
1559 | if (rc > 0) | ||
1560 | return 0; | ||
1561 | } | ||
1562 | |||
1563 | /* If there is no strategy routine, or if the strategy returns | ||
1564 | * zero, proceed with automatic r/w */ | ||
1565 | if (table->data && table->maxlen) { | ||
1566 | rc = sysctl_data(table, name, nlen, oldval, oldlenp, | ||
1567 | newval, newlen); | ||
1568 | if (rc < 0) | ||
1569 | return rc; | ||
1570 | } | ||
1571 | return 0; | ||
1572 | } | 1585 | } |
1573 | #endif /* CONFIG_SYSCTL_SYSCALL */ | ||
1574 | 1586 | ||
1575 | static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) | 1587 | static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) |
1576 | { | 1588 | { |
@@ -1583,9 +1595,13 @@ static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) | |||
1583 | 1595 | ||
1584 | static __init int sysctl_init(void) | 1596 | static __init int sysctl_init(void) |
1585 | { | 1597 | { |
1586 | int err; | ||
1587 | sysctl_set_parent(NULL, root_table); | 1598 | sysctl_set_parent(NULL, root_table); |
1588 | err = sysctl_check_table(current->nsproxy, root_table); | 1599 | #ifdef CONFIG_SYSCTL_SYSCALL_CHECK |
1600 | { | ||
1601 | int err; | ||
1602 | err = sysctl_check_table(current->nsproxy, root_table); | ||
1603 | } | ||
1604 | #endif | ||
1589 | return 0; | 1605 | return 0; |
1590 | } | 1606 | } |
1591 | 1607 | ||
@@ -1712,10 +1728,12 @@ struct ctl_table_header *__register_sysctl_paths( | |||
1712 | header->unregistering = NULL; | 1728 | header->unregistering = NULL; |
1713 | header->root = root; | 1729 | header->root = root; |
1714 | sysctl_set_parent(NULL, header->ctl_table); | 1730 | sysctl_set_parent(NULL, header->ctl_table); |
1731 | #ifdef CONFIG_SYSCTL_SYSCALL_CHECK | ||
1715 | if (sysctl_check_table(namespaces, header->ctl_table)) { | 1732 | if (sysctl_check_table(namespaces, header->ctl_table)) { |
1716 | kfree(header); | 1733 | kfree(header); |
1717 | return NULL; | 1734 | return NULL; |
1718 | } | 1735 | } |
1736 | #endif | ||
1719 | spin_lock(&sysctl_lock); | 1737 | spin_lock(&sysctl_lock); |
1720 | header_list = lookup_header_list(root, namespaces); | 1738 | header_list = lookup_header_list(root, namespaces); |
1721 | list_add_tail(&header->ctl_entry, header_list); | 1739 | list_add_tail(&header->ctl_entry, header_list); |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 07e86a828073..4a23517169a6 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -183,7 +183,7 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, | |||
183 | 183 | ||
184 | if (!tsk) { | 184 | if (!tsk) { |
185 | rcu_read_lock(); | 185 | rcu_read_lock(); |
186 | tsk = find_task_by_pid(pid); | 186 | tsk = find_task_by_vpid(pid); |
187 | if (tsk) | 187 | if (tsk) |
188 | get_task_struct(tsk); | 188 | get_task_struct(tsk); |
189 | rcu_read_unlock(); | 189 | rcu_read_unlock(); |
@@ -230,7 +230,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, | |||
230 | */ | 230 | */ |
231 | rcu_read_lock(); | 231 | rcu_read_lock(); |
232 | if (!first) | 232 | if (!first) |
233 | first = find_task_by_pid(tgid); | 233 | first = find_task_by_vpid(tgid); |
234 | 234 | ||
235 | if (!first || !lock_task_sighand(first, &flags)) | 235 | if (!first || !lock_task_sighand(first, &flags)) |
236 | goto out; | 236 | goto out; |
@@ -547,7 +547,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) | |||
547 | if (!stats) | 547 | if (!stats) |
548 | goto err; | 548 | goto err; |
549 | 549 | ||
550 | rc = fill_pid(tsk->pid, tsk, stats); | 550 | rc = fill_pid(-1, tsk, stats); |
551 | if (rc < 0) | 551 | if (rc < 0) |
552 | goto err; | 552 | goto err; |
553 | 553 | ||
diff --git a/kernel/time.c b/kernel/time.c index 35d373a98782..cbe0d5a222ff 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -35,6 +35,8 @@ | |||
35 | #include <linux/syscalls.h> | 35 | #include <linux/syscalls.h> |
36 | #include <linux/security.h> | 36 | #include <linux/security.h> |
37 | #include <linux/fs.h> | 37 | #include <linux/fs.h> |
38 | #include <linux/slab.h> | ||
39 | #include <linux/math64.h> | ||
38 | 40 | ||
39 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
40 | #include <asm/unistd.h> | 42 | #include <asm/unistd.h> |
@@ -390,13 +392,17 @@ EXPORT_SYMBOL(set_normalized_timespec); | |||
390 | struct timespec ns_to_timespec(const s64 nsec) | 392 | struct timespec ns_to_timespec(const s64 nsec) |
391 | { | 393 | { |
392 | struct timespec ts; | 394 | struct timespec ts; |
395 | s32 rem; | ||
393 | 396 | ||
394 | if (!nsec) | 397 | if (!nsec) |
395 | return (struct timespec) {0, 0}; | 398 | return (struct timespec) {0, 0}; |
396 | 399 | ||
397 | ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec); | 400 | ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem); |
398 | if (unlikely(nsec < 0)) | 401 | if (unlikely(rem < 0)) { |
399 | set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec); | 402 | ts.tv_sec--; |
403 | rem += NSEC_PER_SEC; | ||
404 | } | ||
405 | ts.tv_nsec = rem; | ||
400 | 406 | ||
401 | return ts; | 407 | return ts; |
402 | } | 408 | } |
@@ -526,8 +532,10 @@ jiffies_to_timespec(const unsigned long jiffies, struct timespec *value) | |||
526 | * Convert jiffies to nanoseconds and separate with | 532 | * Convert jiffies to nanoseconds and separate with |
527 | * one divide. | 533 | * one divide. |
528 | */ | 534 | */ |
529 | u64 nsec = (u64)jiffies * TICK_NSEC; | 535 | u32 rem; |
530 | value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec); | 536 | value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC, |
537 | NSEC_PER_SEC, &rem); | ||
538 | value->tv_nsec = rem; | ||
531 | } | 539 | } |
532 | EXPORT_SYMBOL(jiffies_to_timespec); | 540 | EXPORT_SYMBOL(jiffies_to_timespec); |
533 | 541 | ||
@@ -565,12 +573,11 @@ void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) | |||
565 | * Convert jiffies to nanoseconds and separate with | 573 | * Convert jiffies to nanoseconds and separate with |
566 | * one divide. | 574 | * one divide. |
567 | */ | 575 | */ |
568 | u64 nsec = (u64)jiffies * TICK_NSEC; | 576 | u32 rem; |
569 | long tv_usec; | ||
570 | 577 | ||
571 | value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &tv_usec); | 578 | value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC, |
572 | tv_usec /= NSEC_PER_USEC; | 579 | NSEC_PER_SEC, &rem); |
573 | value->tv_usec = tv_usec; | 580 | value->tv_usec = rem / NSEC_PER_USEC; |
574 | } | 581 | } |
575 | EXPORT_SYMBOL(jiffies_to_timeval); | 582 | EXPORT_SYMBOL(jiffies_to_timeval); |
576 | 583 | ||
@@ -586,9 +593,7 @@ clock_t jiffies_to_clock_t(long x) | |||
586 | return x / (HZ / USER_HZ); | 593 | return x / (HZ / USER_HZ); |
587 | # endif | 594 | # endif |
588 | #else | 595 | #else |
589 | u64 tmp = (u64)x * TICK_NSEC; | 596 | return div_u64((u64)x * TICK_NSEC, NSEC_PER_SEC / USER_HZ); |
590 | do_div(tmp, (NSEC_PER_SEC / USER_HZ)); | ||
591 | return (long)tmp; | ||
592 | #endif | 597 | #endif |
593 | } | 598 | } |
594 | EXPORT_SYMBOL(jiffies_to_clock_t); | 599 | EXPORT_SYMBOL(jiffies_to_clock_t); |
@@ -600,16 +605,12 @@ unsigned long clock_t_to_jiffies(unsigned long x) | |||
600 | return ~0UL; | 605 | return ~0UL; |
601 | return x * (HZ / USER_HZ); | 606 | return x * (HZ / USER_HZ); |
602 | #else | 607 | #else |
603 | u64 jif; | ||
604 | |||
605 | /* Don't worry about loss of precision here .. */ | 608 | /* Don't worry about loss of precision here .. */ |
606 | if (x >= ~0UL / HZ * USER_HZ) | 609 | if (x >= ~0UL / HZ * USER_HZ) |
607 | return ~0UL; | 610 | return ~0UL; |
608 | 611 | ||
609 | /* .. but do try to contain it here */ | 612 | /* .. but do try to contain it here */ |
610 | jif = x * (u64) HZ; | 613 | return div_u64((u64)x * HZ, USER_HZ); |
611 | do_div(jif, USER_HZ); | ||
612 | return jif; | ||
613 | #endif | 614 | #endif |
614 | } | 615 | } |
615 | EXPORT_SYMBOL(clock_t_to_jiffies); | 616 | EXPORT_SYMBOL(clock_t_to_jiffies); |
@@ -618,10 +619,9 @@ u64 jiffies_64_to_clock_t(u64 x) | |||
618 | { | 619 | { |
619 | #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 | 620 | #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 |
620 | # if HZ < USER_HZ | 621 | # if HZ < USER_HZ |
621 | x *= USER_HZ; | 622 | x = div_u64(x * USER_HZ, HZ); |
622 | do_div(x, HZ); | ||
623 | # elif HZ > USER_HZ | 623 | # elif HZ > USER_HZ |
624 | do_div(x, HZ / USER_HZ); | 624 | x = div_u64(x, HZ / USER_HZ); |
625 | # else | 625 | # else |
626 | /* Nothing to do */ | 626 | /* Nothing to do */ |
627 | # endif | 627 | # endif |
@@ -631,8 +631,7 @@ u64 jiffies_64_to_clock_t(u64 x) | |||
631 | * but even this doesn't overflow in hundreds of years | 631 | * but even this doesn't overflow in hundreds of years |
632 | * in 64 bits, so.. | 632 | * in 64 bits, so.. |
633 | */ | 633 | */ |
634 | x *= TICK_NSEC; | 634 | x = div_u64(x * TICK_NSEC, (NSEC_PER_SEC / USER_HZ)); |
635 | do_div(x, (NSEC_PER_SEC / USER_HZ)); | ||
636 | #endif | 635 | #endif |
637 | return x; | 636 | return x; |
638 | } | 637 | } |
@@ -641,21 +640,17 @@ EXPORT_SYMBOL(jiffies_64_to_clock_t); | |||
641 | u64 nsec_to_clock_t(u64 x) | 640 | u64 nsec_to_clock_t(u64 x) |
642 | { | 641 | { |
643 | #if (NSEC_PER_SEC % USER_HZ) == 0 | 642 | #if (NSEC_PER_SEC % USER_HZ) == 0 |
644 | do_div(x, (NSEC_PER_SEC / USER_HZ)); | 643 | return div_u64(x, NSEC_PER_SEC / USER_HZ); |
645 | #elif (USER_HZ % 512) == 0 | 644 | #elif (USER_HZ % 512) == 0 |
646 | x *= USER_HZ/512; | 645 | return div_u64(x * USER_HZ / 512, NSEC_PER_SEC / 512); |
647 | do_div(x, (NSEC_PER_SEC / 512)); | ||
648 | #else | 646 | #else |
649 | /* | 647 | /* |
650 | * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024, | 648 | * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024, |
651 | * overflow after 64.99 years. | 649 | * overflow after 64.99 years. |
652 | * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ... | 650 | * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ... |
653 | */ | 651 | */ |
654 | x *= 9; | 652 | return div_u64(x * 9, (9ull * NSEC_PER_SEC + (USER_HZ / 2)) / USER_HZ); |
655 | do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (USER_HZ/2)) / | ||
656 | USER_HZ)); | ||
657 | #endif | 653 | #endif |
658 | return x; | ||
659 | } | 654 | } |
660 | 655 | ||
661 | #if (BITS_PER_LONG < 64) | 656 | #if (BITS_PER_LONG < 64) |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 5fd9b9469770..5125ddd8196b 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -15,7 +15,8 @@ | |||
15 | #include <linux/jiffies.h> | 15 | #include <linux/jiffies.h> |
16 | #include <linux/hrtimer.h> | 16 | #include <linux/hrtimer.h> |
17 | #include <linux/capability.h> | 17 | #include <linux/capability.h> |
18 | #include <asm/div64.h> | 18 | #include <linux/math64.h> |
19 | #include <linux/clocksource.h> | ||
19 | #include <asm/timex.h> | 20 | #include <asm/timex.h> |
20 | 21 | ||
21 | /* | 22 | /* |
@@ -23,11 +24,14 @@ | |||
23 | */ | 24 | */ |
24 | unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ | 25 | unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ |
25 | unsigned long tick_nsec; /* ACTHZ period (nsec) */ | 26 | unsigned long tick_nsec; /* ACTHZ period (nsec) */ |
26 | static u64 tick_length, tick_length_base; | 27 | u64 tick_length; |
28 | static u64 tick_length_base; | ||
29 | |||
30 | static struct hrtimer leap_timer; | ||
27 | 31 | ||
28 | #define MAX_TICKADJ 500 /* microsecs */ | 32 | #define MAX_TICKADJ 500 /* microsecs */ |
29 | #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ | 33 | #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ |
30 | TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ) | 34 | NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) |
31 | 35 | ||
32 | /* | 36 | /* |
33 | * phase-lock loop variables | 37 | * phase-lock loop variables |
@@ -35,11 +39,12 @@ static u64 tick_length, tick_length_base; | |||
35 | /* TIME_ERROR prevents overwriting the CMOS clock */ | 39 | /* TIME_ERROR prevents overwriting the CMOS clock */ |
36 | static int time_state = TIME_OK; /* clock synchronization status */ | 40 | static int time_state = TIME_OK; /* clock synchronization status */ |
37 | int time_status = STA_UNSYNC; /* clock status bits */ | 41 | int time_status = STA_UNSYNC; /* clock status bits */ |
38 | static s64 time_offset; /* time adjustment (ns) */ | 42 | static long time_tai; /* TAI offset (s) */ |
43 | static s64 time_offset; /* time adjustment (ns) */ | ||
39 | static long time_constant = 2; /* pll time constant */ | 44 | static long time_constant = 2; /* pll time constant */ |
40 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ | 45 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ |
41 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ | 46 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ |
42 | long time_freq; /* frequency offset (scaled ppm)*/ | 47 | static s64 time_freq; /* frequency offset (scaled ns/s)*/ |
43 | static long time_reftime; /* time at last adjustment (s) */ | 48 | static long time_reftime; /* time at last adjustment (s) */ |
44 | long time_adjust; | 49 | long time_adjust; |
45 | static long ntp_tick_adj; | 50 | static long ntp_tick_adj; |
@@ -47,16 +52,56 @@ static long ntp_tick_adj; | |||
47 | static void ntp_update_frequency(void) | 52 | static void ntp_update_frequency(void) |
48 | { | 53 | { |
49 | u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) | 54 | u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) |
50 | << TICK_LENGTH_SHIFT; | 55 | << NTP_SCALE_SHIFT; |
51 | second_length += (s64)ntp_tick_adj << TICK_LENGTH_SHIFT; | 56 | second_length += (s64)ntp_tick_adj << NTP_SCALE_SHIFT; |
52 | second_length += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); | 57 | second_length += time_freq; |
53 | 58 | ||
54 | tick_length_base = second_length; | 59 | tick_length_base = second_length; |
55 | 60 | ||
56 | do_div(second_length, HZ); | 61 | tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT; |
57 | tick_nsec = second_length >> TICK_LENGTH_SHIFT; | 62 | tick_length_base = div_u64(tick_length_base, NTP_INTERVAL_FREQ); |
63 | } | ||
64 | |||
65 | static void ntp_update_offset(long offset) | ||
66 | { | ||
67 | long mtemp; | ||
68 | s64 freq_adj; | ||
69 | |||
70 | if (!(time_status & STA_PLL)) | ||
71 | return; | ||
58 | 72 | ||
59 | do_div(tick_length_base, NTP_INTERVAL_FREQ); | 73 | if (!(time_status & STA_NANO)) |
74 | offset *= NSEC_PER_USEC; | ||
75 | |||
76 | /* | ||
77 | * Scale the phase adjustment and | ||
78 | * clamp to the operating range. | ||
79 | */ | ||
80 | offset = min(offset, MAXPHASE); | ||
81 | offset = max(offset, -MAXPHASE); | ||
82 | |||
83 | /* | ||
84 | * Select how the frequency is to be controlled | ||
85 | * and in which mode (PLL or FLL). | ||
86 | */ | ||
87 | if (time_status & STA_FREQHOLD || time_reftime == 0) | ||
88 | time_reftime = xtime.tv_sec; | ||
89 | mtemp = xtime.tv_sec - time_reftime; | ||
90 | time_reftime = xtime.tv_sec; | ||
91 | |||
92 | freq_adj = (s64)offset * mtemp; | ||
93 | freq_adj <<= NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant); | ||
94 | time_status &= ~STA_MODE; | ||
95 | if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { | ||
96 | freq_adj += div_s64((s64)offset << (NTP_SCALE_SHIFT - SHIFT_FLL), | ||
97 | mtemp); | ||
98 | time_status |= STA_MODE; | ||
99 | } | ||
100 | freq_adj += time_freq; | ||
101 | freq_adj = min(freq_adj, MAXFREQ_SCALED); | ||
102 | time_freq = max(freq_adj, -MAXFREQ_SCALED); | ||
103 | |||
104 | time_offset = div_s64((s64)offset << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ); | ||
60 | } | 105 | } |
61 | 106 | ||
62 | /** | 107 | /** |
@@ -78,62 +123,70 @@ void ntp_clear(void) | |||
78 | } | 123 | } |
79 | 124 | ||
80 | /* | 125 | /* |
81 | * this routine handles the overflow of the microsecond field | 126 | * Leap second processing. If in leap-insert state at the end of the |
82 | * | 127 | * day, the system clock is set back one second; if in leap-delete |
83 | * The tricky bits of code to handle the accurate clock support | 128 | * state, the system clock is set ahead one second. |
84 | * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||
85 | * They were originally developed for SUN and DEC kernels. | ||
86 | * All the kudos should go to Dave for this stuff. | ||
87 | */ | 129 | */ |
88 | void second_overflow(void) | 130 | static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) |
89 | { | 131 | { |
90 | long time_adj; | 132 | enum hrtimer_restart res = HRTIMER_NORESTART; |
91 | 133 | ||
92 | /* Bump the maxerror field */ | 134 | write_seqlock_irq(&xtime_lock); |
93 | time_maxerror += MAXFREQ >> SHIFT_USEC; | ||
94 | if (time_maxerror > NTP_PHASE_LIMIT) { | ||
95 | time_maxerror = NTP_PHASE_LIMIT; | ||
96 | time_status |= STA_UNSYNC; | ||
97 | } | ||
98 | 135 | ||
99 | /* | ||
100 | * Leap second processing. If in leap-insert state at the end of the | ||
101 | * day, the system clock is set back one second; if in leap-delete | ||
102 | * state, the system clock is set ahead one second. The microtime() | ||
103 | * routine or external clock driver will insure that reported time is | ||
104 | * always monotonic. The ugly divides should be replaced. | ||
105 | */ | ||
106 | switch (time_state) { | 136 | switch (time_state) { |
107 | case TIME_OK: | 137 | case TIME_OK: |
108 | if (time_status & STA_INS) | ||
109 | time_state = TIME_INS; | ||
110 | else if (time_status & STA_DEL) | ||
111 | time_state = TIME_DEL; | ||
112 | break; | 138 | break; |
113 | case TIME_INS: | 139 | case TIME_INS: |
114 | if (xtime.tv_sec % 86400 == 0) { | 140 | xtime.tv_sec--; |
115 | xtime.tv_sec--; | 141 | wall_to_monotonic.tv_sec++; |
116 | wall_to_monotonic.tv_sec++; | 142 | time_state = TIME_OOP; |
117 | time_state = TIME_OOP; | 143 | printk(KERN_NOTICE "Clock: " |
118 | printk(KERN_NOTICE "Clock: inserting leap second " | 144 | "inserting leap second 23:59:60 UTC\n"); |
119 | "23:59:60 UTC\n"); | 145 | leap_timer.expires = ktime_add_ns(leap_timer.expires, |
120 | } | 146 | NSEC_PER_SEC); |
147 | res = HRTIMER_RESTART; | ||
121 | break; | 148 | break; |
122 | case TIME_DEL: | 149 | case TIME_DEL: |
123 | if ((xtime.tv_sec + 1) % 86400 == 0) { | 150 | xtime.tv_sec++; |
124 | xtime.tv_sec++; | 151 | time_tai--; |
125 | wall_to_monotonic.tv_sec--; | 152 | wall_to_monotonic.tv_sec--; |
126 | time_state = TIME_WAIT; | 153 | time_state = TIME_WAIT; |
127 | printk(KERN_NOTICE "Clock: deleting leap second " | 154 | printk(KERN_NOTICE "Clock: " |
128 | "23:59:59 UTC\n"); | 155 | "deleting leap second 23:59:59 UTC\n"); |
129 | } | ||
130 | break; | 156 | break; |
131 | case TIME_OOP: | 157 | case TIME_OOP: |
158 | time_tai++; | ||
132 | time_state = TIME_WAIT; | 159 | time_state = TIME_WAIT; |
133 | break; | 160 | /* fall through */ |
134 | case TIME_WAIT: | 161 | case TIME_WAIT: |
135 | if (!(time_status & (STA_INS | STA_DEL))) | 162 | if (!(time_status & (STA_INS | STA_DEL))) |
136 | time_state = TIME_OK; | 163 | time_state = TIME_OK; |
164 | break; | ||
165 | } | ||
166 | update_vsyscall(&xtime, clock); | ||
167 | |||
168 | write_sequnlock_irq(&xtime_lock); | ||
169 | |||
170 | return res; | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * this routine handles the overflow of the microsecond field | ||
175 | * | ||
176 | * The tricky bits of code to handle the accurate clock support | ||
177 | * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||
178 | * They were originally developed for SUN and DEC kernels. | ||
179 | * All the kudos should go to Dave for this stuff. | ||
180 | */ | ||
181 | void second_overflow(void) | ||
182 | { | ||
183 | s64 time_adj; | ||
184 | |||
185 | /* Bump the maxerror field */ | ||
186 | time_maxerror += MAXFREQ / NSEC_PER_USEC; | ||
187 | if (time_maxerror > NTP_PHASE_LIMIT) { | ||
188 | time_maxerror = NTP_PHASE_LIMIT; | ||
189 | time_status |= STA_UNSYNC; | ||
137 | } | 190 | } |
138 | 191 | ||
139 | /* | 192 | /* |
@@ -143,7 +196,7 @@ void second_overflow(void) | |||
143 | tick_length = tick_length_base; | 196 | tick_length = tick_length_base; |
144 | time_adj = shift_right(time_offset, SHIFT_PLL + time_constant); | 197 | time_adj = shift_right(time_offset, SHIFT_PLL + time_constant); |
145 | time_offset -= time_adj; | 198 | time_offset -= time_adj; |
146 | tick_length += (s64)time_adj << (TICK_LENGTH_SHIFT - SHIFT_UPDATE); | 199 | tick_length += time_adj; |
147 | 200 | ||
148 | if (unlikely(time_adjust)) { | 201 | if (unlikely(time_adjust)) { |
149 | if (time_adjust > MAX_TICKADJ) { | 202 | if (time_adjust > MAX_TICKADJ) { |
@@ -154,25 +207,12 @@ void second_overflow(void) | |||
154 | tick_length -= MAX_TICKADJ_SCALED; | 207 | tick_length -= MAX_TICKADJ_SCALED; |
155 | } else { | 208 | } else { |
156 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / | 209 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / |
157 | NTP_INTERVAL_FREQ) << TICK_LENGTH_SHIFT; | 210 | NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT; |
158 | time_adjust = 0; | 211 | time_adjust = 0; |
159 | } | 212 | } |
160 | } | 213 | } |
161 | } | 214 | } |
162 | 215 | ||
163 | /* | ||
164 | * Return how long ticks are at the moment, that is, how much time | ||
165 | * update_wall_time_one_tick will add to xtime next time we call it | ||
166 | * (assuming no calls to do_adjtimex in the meantime). | ||
167 | * The return value is in fixed-point nanoseconds shifted by the | ||
168 | * specified number of bits to the right of the binary point. | ||
169 | * This function has no side-effects. | ||
170 | */ | ||
171 | u64 current_tick_length(void) | ||
172 | { | ||
173 | return tick_length; | ||
174 | } | ||
175 | |||
176 | #ifdef CONFIG_GENERIC_CMOS_UPDATE | 216 | #ifdef CONFIG_GENERIC_CMOS_UPDATE |
177 | 217 | ||
178 | /* Disable the cmos update - used by virtualization and embedded */ | 218 | /* Disable the cmos update - used by virtualization and embedded */ |
@@ -236,8 +276,8 @@ static inline void notify_cmos_timer(void) { } | |||
236 | */ | 276 | */ |
237 | int do_adjtimex(struct timex *txc) | 277 | int do_adjtimex(struct timex *txc) |
238 | { | 278 | { |
239 | long mtemp, save_adjust, rem; | 279 | struct timespec ts; |
240 | s64 freq_adj, temp64; | 280 | long save_adjust, sec; |
241 | int result; | 281 | int result; |
242 | 282 | ||
243 | /* In order to modify anything, you gotta be super-user! */ | 283 | /* In order to modify anything, you gotta be super-user! */ |
@@ -247,147 +287,132 @@ int do_adjtimex(struct timex *txc) | |||
247 | /* Now we validate the data before disabling interrupts */ | 287 | /* Now we validate the data before disabling interrupts */ |
248 | 288 | ||
249 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) { | 289 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) { |
250 | /* singleshot must not be used with any other mode bits */ | 290 | /* singleshot must not be used with any other mode bits */ |
251 | if (txc->modes != ADJ_OFFSET_SINGLESHOT && | 291 | if (txc->modes & ~ADJ_OFFSET_SS_READ) |
252 | txc->modes != ADJ_OFFSET_SS_READ) | ||
253 | return -EINVAL; | 292 | return -EINVAL; |
254 | } | 293 | } |
255 | 294 | ||
256 | if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET)) | ||
257 | /* adjustment Offset limited to +- .512 seconds */ | ||
258 | if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE ) | ||
259 | return -EINVAL; | ||
260 | |||
261 | /* if the quartz is off by more than 10% something is VERY wrong ! */ | 295 | /* if the quartz is off by more than 10% something is VERY wrong ! */ |
262 | if (txc->modes & ADJ_TICK) | 296 | if (txc->modes & ADJ_TICK) |
263 | if (txc->tick < 900000/USER_HZ || | 297 | if (txc->tick < 900000/USER_HZ || |
264 | txc->tick > 1100000/USER_HZ) | 298 | txc->tick > 1100000/USER_HZ) |
265 | return -EINVAL; | 299 | return -EINVAL; |
266 | 300 | ||
301 | if (time_state != TIME_OK && txc->modes & ADJ_STATUS) | ||
302 | hrtimer_cancel(&leap_timer); | ||
303 | getnstimeofday(&ts); | ||
304 | |||
267 | write_seqlock_irq(&xtime_lock); | 305 | write_seqlock_irq(&xtime_lock); |
268 | result = time_state; /* mostly `TIME_OK' */ | ||
269 | 306 | ||
270 | /* Save for later - semantics of adjtime is to return old value */ | 307 | /* Save for later - semantics of adjtime is to return old value */ |
271 | save_adjust = time_adjust; | 308 | save_adjust = time_adjust; |
272 | 309 | ||
273 | #if 0 /* STA_CLOCKERR is never set yet */ | ||
274 | time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ | ||
275 | #endif | ||
276 | /* If there are input parameters, then process them */ | 310 | /* If there are input parameters, then process them */ |
277 | if (txc->modes) | 311 | if (txc->modes) { |
278 | { | 312 | if (txc->modes & ADJ_STATUS) { |
279 | if (txc->modes & ADJ_STATUS) /* only set allowed bits */ | 313 | if ((time_status & STA_PLL) && |
280 | time_status = (txc->status & ~STA_RONLY) | | 314 | !(txc->status & STA_PLL)) { |
281 | (time_status & STA_RONLY); | 315 | time_state = TIME_OK; |
282 | 316 | time_status = STA_UNSYNC; | |
283 | if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ | 317 | } |
284 | if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { | 318 | /* only set allowed bits */ |
285 | result = -EINVAL; | 319 | time_status &= STA_RONLY; |
286 | goto leave; | 320 | time_status |= txc->status & ~STA_RONLY; |
287 | } | 321 | |
288 | time_freq = ((s64)txc->freq * NSEC_PER_USEC) | 322 | switch (time_state) { |
289 | >> (SHIFT_USEC - SHIFT_NSEC); | 323 | case TIME_OK: |
290 | } | 324 | start_timer: |
291 | 325 | sec = ts.tv_sec; | |
292 | if (txc->modes & ADJ_MAXERROR) { | 326 | if (time_status & STA_INS) { |
293 | if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { | 327 | time_state = TIME_INS; |
294 | result = -EINVAL; | 328 | sec += 86400 - sec % 86400; |
295 | goto leave; | 329 | hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS); |
330 | } else if (time_status & STA_DEL) { | ||
331 | time_state = TIME_DEL; | ||
332 | sec += 86400 - (sec + 1) % 86400; | ||
333 | hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS); | ||
334 | } | ||
335 | break; | ||
336 | case TIME_INS: | ||
337 | case TIME_DEL: | ||
338 | time_state = TIME_OK; | ||
339 | goto start_timer; | ||
340 | break; | ||
341 | case TIME_WAIT: | ||
342 | if (!(time_status & (STA_INS | STA_DEL))) | ||
343 | time_state = TIME_OK; | ||
344 | break; | ||
345 | case TIME_OOP: | ||
346 | hrtimer_restart(&leap_timer); | ||
347 | break; | ||
348 | } | ||
296 | } | 349 | } |
297 | time_maxerror = txc->maxerror; | ||
298 | } | ||
299 | 350 | ||
300 | if (txc->modes & ADJ_ESTERROR) { | 351 | if (txc->modes & ADJ_NANO) |
301 | if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { | 352 | time_status |= STA_NANO; |
302 | result = -EINVAL; | 353 | if (txc->modes & ADJ_MICRO) |
303 | goto leave; | 354 | time_status &= ~STA_NANO; |
355 | |||
356 | if (txc->modes & ADJ_FREQUENCY) { | ||
357 | time_freq = (s64)txc->freq * PPM_SCALE; | ||
358 | time_freq = min(time_freq, MAXFREQ_SCALED); | ||
359 | time_freq = max(time_freq, -MAXFREQ_SCALED); | ||
304 | } | 360 | } |
305 | time_esterror = txc->esterror; | ||
306 | } | ||
307 | 361 | ||
308 | if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ | 362 | if (txc->modes & ADJ_MAXERROR) |
309 | if (txc->constant < 0) { /* NTP v4 uses values > 6 */ | 363 | time_maxerror = txc->maxerror; |
310 | result = -EINVAL; | 364 | if (txc->modes & ADJ_ESTERROR) |
311 | goto leave; | 365 | time_esterror = txc->esterror; |
366 | |||
367 | if (txc->modes & ADJ_TIMECONST) { | ||
368 | time_constant = txc->constant; | ||
369 | if (!(time_status & STA_NANO)) | ||
370 | time_constant += 4; | ||
371 | time_constant = min(time_constant, (long)MAXTC); | ||
372 | time_constant = max(time_constant, 0l); | ||
312 | } | 373 | } |
313 | time_constant = min(txc->constant + 4, (long)MAXTC); | ||
314 | } | ||
315 | 374 | ||
316 | if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ | 375 | if (txc->modes & ADJ_TAI && txc->constant > 0) |
317 | if (txc->modes == ADJ_OFFSET_SINGLESHOT) { | 376 | time_tai = txc->constant; |
318 | /* adjtime() is independent from ntp_adjtime() */ | 377 | |
319 | time_adjust = txc->offset; | 378 | if (txc->modes & ADJ_OFFSET) { |
379 | if (txc->modes == ADJ_OFFSET_SINGLESHOT) | ||
380 | /* adjtime() is independent from ntp_adjtime() */ | ||
381 | time_adjust = txc->offset; | ||
382 | else | ||
383 | ntp_update_offset(txc->offset); | ||
320 | } | 384 | } |
321 | else if (time_status & STA_PLL) { | 385 | if (txc->modes & ADJ_TICK) |
322 | time_offset = txc->offset * NSEC_PER_USEC; | 386 | tick_usec = txc->tick; |
323 | 387 | ||
324 | /* | 388 | if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET)) |
325 | * Scale the phase adjustment and | 389 | ntp_update_frequency(); |
326 | * clamp to the operating range. | 390 | } |
327 | */ | 391 | |
328 | time_offset = min(time_offset, (s64)MAXPHASE * NSEC_PER_USEC); | 392 | result = time_state; /* mostly `TIME_OK' */ |
329 | time_offset = max(time_offset, (s64)-MAXPHASE * NSEC_PER_USEC); | 393 | if (time_status & (STA_UNSYNC|STA_CLOCKERR)) |
330 | |||
331 | /* | ||
332 | * Select whether the frequency is to be controlled | ||
333 | * and in which mode (PLL or FLL). Clamp to the operating | ||
334 | * range. Ugly multiply/divide should be replaced someday. | ||
335 | */ | ||
336 | |||
337 | if (time_status & STA_FREQHOLD || time_reftime == 0) | ||
338 | time_reftime = xtime.tv_sec; | ||
339 | mtemp = xtime.tv_sec - time_reftime; | ||
340 | time_reftime = xtime.tv_sec; | ||
341 | |||
342 | freq_adj = time_offset * mtemp; | ||
343 | freq_adj = shift_right(freq_adj, time_constant * 2 + | ||
344 | (SHIFT_PLL + 2) * 2 - SHIFT_NSEC); | ||
345 | if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { | ||
346 | u64 utemp64; | ||
347 | temp64 = time_offset << (SHIFT_NSEC - SHIFT_FLL); | ||
348 | if (time_offset < 0) { | ||
349 | utemp64 = -temp64; | ||
350 | do_div(utemp64, mtemp); | ||
351 | freq_adj -= utemp64; | ||
352 | } else { | ||
353 | utemp64 = temp64; | ||
354 | do_div(utemp64, mtemp); | ||
355 | freq_adj += utemp64; | ||
356 | } | ||
357 | } | ||
358 | freq_adj += time_freq; | ||
359 | freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); | ||
360 | time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); | ||
361 | time_offset = div_long_long_rem_signed(time_offset, | ||
362 | NTP_INTERVAL_FREQ, | ||
363 | &rem); | ||
364 | time_offset <<= SHIFT_UPDATE; | ||
365 | } /* STA_PLL */ | ||
366 | } /* txc->modes & ADJ_OFFSET */ | ||
367 | if (txc->modes & ADJ_TICK) | ||
368 | tick_usec = txc->tick; | ||
369 | |||
370 | if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET)) | ||
371 | ntp_update_frequency(); | ||
372 | } /* txc->modes */ | ||
373 | leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) | ||
374 | result = TIME_ERROR; | 394 | result = TIME_ERROR; |
375 | 395 | ||
376 | if ((txc->modes == ADJ_OFFSET_SINGLESHOT) || | 396 | if ((txc->modes == ADJ_OFFSET_SINGLESHOT) || |
377 | (txc->modes == ADJ_OFFSET_SS_READ)) | 397 | (txc->modes == ADJ_OFFSET_SS_READ)) |
378 | txc->offset = save_adjust; | 398 | txc->offset = save_adjust; |
379 | else | 399 | else { |
380 | txc->offset = ((long)shift_right(time_offset, SHIFT_UPDATE)) * | 400 | txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, |
381 | NTP_INTERVAL_FREQ / 1000; | 401 | NTP_SCALE_SHIFT); |
382 | txc->freq = (time_freq / NSEC_PER_USEC) << | 402 | if (!(time_status & STA_NANO)) |
383 | (SHIFT_USEC - SHIFT_NSEC); | 403 | txc->offset /= NSEC_PER_USEC; |
404 | } | ||
405 | txc->freq = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) * | ||
406 | (s64)PPM_SCALE_INV, | ||
407 | NTP_SCALE_SHIFT); | ||
384 | txc->maxerror = time_maxerror; | 408 | txc->maxerror = time_maxerror; |
385 | txc->esterror = time_esterror; | 409 | txc->esterror = time_esterror; |
386 | txc->status = time_status; | 410 | txc->status = time_status; |
387 | txc->constant = time_constant; | 411 | txc->constant = time_constant; |
388 | txc->precision = 1; | 412 | txc->precision = 1; |
389 | txc->tolerance = MAXFREQ; | 413 | txc->tolerance = MAXFREQ_SCALED / PPM_SCALE; |
390 | txc->tick = tick_usec; | 414 | txc->tick = tick_usec; |
415 | txc->tai = time_tai; | ||
391 | 416 | ||
392 | /* PPS is not implemented, so these are zero */ | 417 | /* PPS is not implemented, so these are zero */ |
393 | txc->ppsfreq = 0; | 418 | txc->ppsfreq = 0; |
@@ -399,9 +424,15 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) | |||
399 | txc->errcnt = 0; | 424 | txc->errcnt = 0; |
400 | txc->stbcnt = 0; | 425 | txc->stbcnt = 0; |
401 | write_sequnlock_irq(&xtime_lock); | 426 | write_sequnlock_irq(&xtime_lock); |
402 | do_gettimeofday(&txc->time); | 427 | |
428 | txc->time.tv_sec = ts.tv_sec; | ||
429 | txc->time.tv_usec = ts.tv_nsec; | ||
430 | if (!(time_status & STA_NANO)) | ||
431 | txc->time.tv_usec /= NSEC_PER_USEC; | ||
432 | |||
403 | notify_cmos_timer(); | 433 | notify_cmos_timer(); |
404 | return(result); | 434 | |
435 | return result; | ||
405 | } | 436 | } |
406 | 437 | ||
407 | static int __init ntp_tick_adj_setup(char *str) | 438 | static int __init ntp_tick_adj_setup(char *str) |
@@ -411,3 +442,10 @@ static int __init ntp_tick_adj_setup(char *str) | |||
411 | } | 442 | } |
412 | 443 | ||
413 | __setup("ntp_tick_adj=", ntp_tick_adj_setup); | 444 | __setup("ntp_tick_adj=", ntp_tick_adj_setup); |
445 | |||
446 | void __init ntp_init(void) | ||
447 | { | ||
448 | ntp_clear(); | ||
449 | hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | ||
450 | leap_timer.function = ntp_leap_second; | ||
451 | } | ||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2d6087c7cf98..e91c29f961c9 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -53,7 +53,7 @@ void update_xtime_cache(u64 nsec) | |||
53 | timespec_add_ns(&xtime_cache, nsec); | 53 | timespec_add_ns(&xtime_cache, nsec); |
54 | } | 54 | } |
55 | 55 | ||
56 | static struct clocksource *clock; /* pointer to current clocksource */ | 56 | struct clocksource *clock; |
57 | 57 | ||
58 | 58 | ||
59 | #ifdef CONFIG_GENERIC_TIME | 59 | #ifdef CONFIG_GENERIC_TIME |
@@ -246,7 +246,7 @@ void __init timekeeping_init(void) | |||
246 | 246 | ||
247 | write_seqlock_irqsave(&xtime_lock, flags); | 247 | write_seqlock_irqsave(&xtime_lock, flags); |
248 | 248 | ||
249 | ntp_clear(); | 249 | ntp_init(); |
250 | 250 | ||
251 | clock = clocksource_get_next(); | 251 | clock = clocksource_get_next(); |
252 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); | 252 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); |
@@ -371,7 +371,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, | |||
371 | * here. This is tuned so that an error of about 1 msec is adjusted | 371 | * here. This is tuned so that an error of about 1 msec is adjusted |
372 | * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). | 372 | * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). |
373 | */ | 373 | */ |
374 | error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ); | 374 | error2 = clock->error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); |
375 | error2 = abs(error2); | 375 | error2 = abs(error2); |
376 | for (look_ahead = 0; error2 > 0; look_ahead++) | 376 | for (look_ahead = 0; error2 > 0; look_ahead++) |
377 | error2 >>= 2; | 377 | error2 >>= 2; |
@@ -380,8 +380,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, | |||
380 | * Now calculate the error in (1 << look_ahead) ticks, but first | 380 | * Now calculate the error in (1 << look_ahead) ticks, but first |
381 | * remove the single look ahead already included in the error. | 381 | * remove the single look ahead already included in the error. |
382 | */ | 382 | */ |
383 | tick_error = current_tick_length() >> | 383 | tick_error = tick_length >> (NTP_SCALE_SHIFT - clock->shift + 1); |
384 | (TICK_LENGTH_SHIFT - clock->shift + 1); | ||
385 | tick_error -= clock->xtime_interval >> 1; | 384 | tick_error -= clock->xtime_interval >> 1; |
386 | error = ((error - tick_error) >> look_ahead) + tick_error; | 385 | error = ((error - tick_error) >> look_ahead) + tick_error; |
387 | 386 | ||
@@ -412,7 +411,7 @@ static void clocksource_adjust(s64 offset) | |||
412 | s64 error, interval = clock->cycle_interval; | 411 | s64 error, interval = clock->cycle_interval; |
413 | int adj; | 412 | int adj; |
414 | 413 | ||
415 | error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); | 414 | error = clock->error >> (NTP_SCALE_SHIFT - clock->shift - 1); |
416 | if (error > interval) { | 415 | if (error > interval) { |
417 | error >>= 2; | 416 | error >>= 2; |
418 | if (likely(error <= interval)) | 417 | if (likely(error <= interval)) |
@@ -434,7 +433,7 @@ static void clocksource_adjust(s64 offset) | |||
434 | clock->xtime_interval += interval; | 433 | clock->xtime_interval += interval; |
435 | clock->xtime_nsec -= offset; | 434 | clock->xtime_nsec -= offset; |
436 | clock->error -= (interval - offset) << | 435 | clock->error -= (interval - offset) << |
437 | (TICK_LENGTH_SHIFT - clock->shift); | 436 | (NTP_SCALE_SHIFT - clock->shift); |
438 | } | 437 | } |
439 | 438 | ||
440 | /** | 439 | /** |
@@ -473,8 +472,8 @@ void update_wall_time(void) | |||
473 | } | 472 | } |
474 | 473 | ||
475 | /* accumulate error between NTP and clock interval */ | 474 | /* accumulate error between NTP and clock interval */ |
476 | clock->error += current_tick_length(); | 475 | clock->error += tick_length; |
477 | clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift); | 476 | clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); |
478 | } | 477 | } |
479 | 478 | ||
480 | /* correct the clock when NTP error is too big */ | 479 | /* correct the clock when NTP error is too big */ |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 67fe8fc21fb1..a40e20fd0001 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -278,12 +278,9 @@ static int __init init_timer_list_procfs(void) | |||
278 | { | 278 | { |
279 | struct proc_dir_entry *pe; | 279 | struct proc_dir_entry *pe; |
280 | 280 | ||
281 | pe = create_proc_entry("timer_list", 0644, NULL); | 281 | pe = proc_create("timer_list", 0644, NULL, &timer_list_fops); |
282 | if (!pe) | 282 | if (!pe) |
283 | return -ENOMEM; | 283 | return -ENOMEM; |
284 | |||
285 | pe->proc_fops = &timer_list_fops; | ||
286 | |||
287 | return 0; | 284 | return 0; |
288 | } | 285 | } |
289 | __initcall(init_timer_list_procfs); | 286 | __initcall(init_timer_list_procfs); |
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 417da8c5bc72..c994530d166d 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c | |||
@@ -415,12 +415,9 @@ static int __init init_tstats_procfs(void) | |||
415 | { | 415 | { |
416 | struct proc_dir_entry *pe; | 416 | struct proc_dir_entry *pe; |
417 | 417 | ||
418 | pe = create_proc_entry("timer_stats", 0644, NULL); | 418 | pe = proc_create("timer_stats", 0644, NULL, &tstats_fops); |
419 | if (!pe) | 419 | if (!pe) |
420 | return -ENOMEM; | 420 | return -ENOMEM; |
421 | |||
422 | pe->proc_fops = &tstats_fops; | ||
423 | |||
424 | return 0; | 421 | return 0; |
425 | } | 422 | } |
426 | __initcall(init_tstats_procfs); | 423 | __initcall(init_tstats_procfs); |
diff --git a/kernel/timer.c b/kernel/timer.c index f3d35d4ea42e..ceacc6626572 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -320,14 +320,130 @@ static void timer_stats_account_timer(struct timer_list *timer) | |||
320 | static void timer_stats_account_timer(struct timer_list *timer) {} | 320 | static void timer_stats_account_timer(struct timer_list *timer) {} |
321 | #endif | 321 | #endif |
322 | 322 | ||
323 | /** | 323 | #ifdef CONFIG_DEBUG_OBJECTS_TIMERS |
324 | * init_timer - initialize a timer. | 324 | |
325 | * @timer: the timer to be initialized | 325 | static struct debug_obj_descr timer_debug_descr; |
326 | * | 326 | |
327 | * init_timer() must be done to a timer prior calling *any* of the | 327 | /* |
328 | * other timer functions. | 328 | * fixup_init is called when: |
329 | * - an active object is initialized | ||
329 | */ | 330 | */ |
330 | void init_timer(struct timer_list *timer) | 331 | static int timer_fixup_init(void *addr, enum debug_obj_state state) |
332 | { | ||
333 | struct timer_list *timer = addr; | ||
334 | |||
335 | switch (state) { | ||
336 | case ODEBUG_STATE_ACTIVE: | ||
337 | del_timer_sync(timer); | ||
338 | debug_object_init(timer, &timer_debug_descr); | ||
339 | return 1; | ||
340 | default: | ||
341 | return 0; | ||
342 | } | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * fixup_activate is called when: | ||
347 | * - an active object is activated | ||
348 | * - an unknown object is activated (might be a statically initialized object) | ||
349 | */ | ||
350 | static int timer_fixup_activate(void *addr, enum debug_obj_state state) | ||
351 | { | ||
352 | struct timer_list *timer = addr; | ||
353 | |||
354 | switch (state) { | ||
355 | |||
356 | case ODEBUG_STATE_NOTAVAILABLE: | ||
357 | /* | ||
358 | * This is not really a fixup. The timer was | ||
359 | * statically initialized. We just make sure that it | ||
360 | * is tracked in the object tracker. | ||
361 | */ | ||
362 | if (timer->entry.next == NULL && | ||
363 | timer->entry.prev == TIMER_ENTRY_STATIC) { | ||
364 | debug_object_init(timer, &timer_debug_descr); | ||
365 | debug_object_activate(timer, &timer_debug_descr); | ||
366 | return 0; | ||
367 | } else { | ||
368 | WARN_ON_ONCE(1); | ||
369 | } | ||
370 | return 0; | ||
371 | |||
372 | case ODEBUG_STATE_ACTIVE: | ||
373 | WARN_ON(1); | ||
374 | |||
375 | default: | ||
376 | return 0; | ||
377 | } | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * fixup_free is called when: | ||
382 | * - an active object is freed | ||
383 | */ | ||
384 | static int timer_fixup_free(void *addr, enum debug_obj_state state) | ||
385 | { | ||
386 | struct timer_list *timer = addr; | ||
387 | |||
388 | switch (state) { | ||
389 | case ODEBUG_STATE_ACTIVE: | ||
390 | del_timer_sync(timer); | ||
391 | debug_object_free(timer, &timer_debug_descr); | ||
392 | return 1; | ||
393 | default: | ||
394 | return 0; | ||
395 | } | ||
396 | } | ||
397 | |||
398 | static struct debug_obj_descr timer_debug_descr = { | ||
399 | .name = "timer_list", | ||
400 | .fixup_init = timer_fixup_init, | ||
401 | .fixup_activate = timer_fixup_activate, | ||
402 | .fixup_free = timer_fixup_free, | ||
403 | }; | ||
404 | |||
405 | static inline void debug_timer_init(struct timer_list *timer) | ||
406 | { | ||
407 | debug_object_init(timer, &timer_debug_descr); | ||
408 | } | ||
409 | |||
410 | static inline void debug_timer_activate(struct timer_list *timer) | ||
411 | { | ||
412 | debug_object_activate(timer, &timer_debug_descr); | ||
413 | } | ||
414 | |||
415 | static inline void debug_timer_deactivate(struct timer_list *timer) | ||
416 | { | ||
417 | debug_object_deactivate(timer, &timer_debug_descr); | ||
418 | } | ||
419 | |||
420 | static inline void debug_timer_free(struct timer_list *timer) | ||
421 | { | ||
422 | debug_object_free(timer, &timer_debug_descr); | ||
423 | } | ||
424 | |||
425 | static void __init_timer(struct timer_list *timer); | ||
426 | |||
427 | void init_timer_on_stack(struct timer_list *timer) | ||
428 | { | ||
429 | debug_object_init_on_stack(timer, &timer_debug_descr); | ||
430 | __init_timer(timer); | ||
431 | } | ||
432 | EXPORT_SYMBOL_GPL(init_timer_on_stack); | ||
433 | |||
434 | void destroy_timer_on_stack(struct timer_list *timer) | ||
435 | { | ||
436 | debug_object_free(timer, &timer_debug_descr); | ||
437 | } | ||
438 | EXPORT_SYMBOL_GPL(destroy_timer_on_stack); | ||
439 | |||
440 | #else | ||
441 | static inline void debug_timer_init(struct timer_list *timer) { } | ||
442 | static inline void debug_timer_activate(struct timer_list *timer) { } | ||
443 | static inline void debug_timer_deactivate(struct timer_list *timer) { } | ||
444 | #endif | ||
445 | |||
446 | static void __init_timer(struct timer_list *timer) | ||
331 | { | 447 | { |
332 | timer->entry.next = NULL; | 448 | timer->entry.next = NULL; |
333 | timer->base = __raw_get_cpu_var(tvec_bases); | 449 | timer->base = __raw_get_cpu_var(tvec_bases); |
@@ -337,6 +453,19 @@ void init_timer(struct timer_list *timer) | |||
337 | memset(timer->start_comm, 0, TASK_COMM_LEN); | 453 | memset(timer->start_comm, 0, TASK_COMM_LEN); |
338 | #endif | 454 | #endif |
339 | } | 455 | } |
456 | |||
457 | /** | ||
458 | * init_timer - initialize a timer. | ||
459 | * @timer: the timer to be initialized | ||
460 | * | ||
461 | * init_timer() must be done to a timer prior calling *any* of the | ||
462 | * other timer functions. | ||
463 | */ | ||
464 | void init_timer(struct timer_list *timer) | ||
465 | { | ||
466 | debug_timer_init(timer); | ||
467 | __init_timer(timer); | ||
468 | } | ||
340 | EXPORT_SYMBOL(init_timer); | 469 | EXPORT_SYMBOL(init_timer); |
341 | 470 | ||
342 | void init_timer_deferrable(struct timer_list *timer) | 471 | void init_timer_deferrable(struct timer_list *timer) |
@@ -351,6 +480,8 @@ static inline void detach_timer(struct timer_list *timer, | |||
351 | { | 480 | { |
352 | struct list_head *entry = &timer->entry; | 481 | struct list_head *entry = &timer->entry; |
353 | 482 | ||
483 | debug_timer_deactivate(timer); | ||
484 | |||
354 | __list_del(entry->prev, entry->next); | 485 | __list_del(entry->prev, entry->next); |
355 | if (clear_pending) | 486 | if (clear_pending) |
356 | entry->next = NULL; | 487 | entry->next = NULL; |
@@ -405,6 +536,8 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) | |||
405 | ret = 1; | 536 | ret = 1; |
406 | } | 537 | } |
407 | 538 | ||
539 | debug_timer_activate(timer); | ||
540 | |||
408 | new_base = __get_cpu_var(tvec_bases); | 541 | new_base = __get_cpu_var(tvec_bases); |
409 | 542 | ||
410 | if (base != new_base) { | 543 | if (base != new_base) { |
@@ -450,6 +583,7 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
450 | BUG_ON(timer_pending(timer) || !timer->function); | 583 | BUG_ON(timer_pending(timer) || !timer->function); |
451 | spin_lock_irqsave(&base->lock, flags); | 584 | spin_lock_irqsave(&base->lock, flags); |
452 | timer_set_base(timer, base); | 585 | timer_set_base(timer, base); |
586 | debug_timer_activate(timer); | ||
453 | internal_add_timer(base, timer); | 587 | internal_add_timer(base, timer); |
454 | /* | 588 | /* |
455 | * Check whether the other CPU is idle and needs to be | 589 | * Check whether the other CPU is idle and needs to be |
@@ -1086,11 +1220,14 @@ signed long __sched schedule_timeout(signed long timeout) | |||
1086 | 1220 | ||
1087 | expire = timeout + jiffies; | 1221 | expire = timeout + jiffies; |
1088 | 1222 | ||
1089 | setup_timer(&timer, process_timeout, (unsigned long)current); | 1223 | setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); |
1090 | __mod_timer(&timer, expire); | 1224 | __mod_timer(&timer, expire); |
1091 | schedule(); | 1225 | schedule(); |
1092 | del_singleshot_timer_sync(&timer); | 1226 | del_singleshot_timer_sync(&timer); |
1093 | 1227 | ||
1228 | /* Remove the timer from the object tracker */ | ||
1229 | destroy_timer_on_stack(&timer); | ||
1230 | |||
1094 | timeout = expire - jiffies; | 1231 | timeout = expire - jiffies; |
1095 | 1232 | ||
1096 | out: | 1233 | out: |
diff --git a/kernel/user.c b/kernel/user.c index debce602bfdd..865ecf57a096 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -53,10 +53,6 @@ struct user_struct root_user = { | |||
53 | .files = ATOMIC_INIT(0), | 53 | .files = ATOMIC_INIT(0), |
54 | .sigpending = ATOMIC_INIT(0), | 54 | .sigpending = ATOMIC_INIT(0), |
55 | .locked_shm = 0, | 55 | .locked_shm = 0, |
56 | #ifdef CONFIG_KEYS | ||
57 | .uid_keyring = &root_user_keyring, | ||
58 | .session_keyring = &root_session_keyring, | ||
59 | #endif | ||
60 | #ifdef CONFIG_USER_SCHED | 56 | #ifdef CONFIG_USER_SCHED |
61 | .tg = &init_task_group, | 57 | .tg = &init_task_group, |
62 | #endif | 58 | #endif |
@@ -388,7 +384,7 @@ void free_uid(struct user_struct *up) | |||
388 | local_irq_restore(flags); | 384 | local_irq_restore(flags); |
389 | } | 385 | } |
390 | 386 | ||
391 | struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | 387 | struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) |
392 | { | 388 | { |
393 | struct hlist_head *hashent = uidhashentry(ns, uid); | 389 | struct hlist_head *hashent = uidhashentry(ns, uid); |
394 | struct user_struct *up, *new; | 390 | struct user_struct *up, *new; |
@@ -403,29 +399,15 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
403 | spin_unlock_irq(&uidhash_lock); | 399 | spin_unlock_irq(&uidhash_lock); |
404 | 400 | ||
405 | if (!up) { | 401 | if (!up) { |
406 | new = kmem_cache_alloc(uid_cachep, GFP_KERNEL); | 402 | new = kmem_cache_zalloc(uid_cachep, GFP_KERNEL); |
407 | if (!new) | 403 | if (!new) |
408 | goto out_unlock; | 404 | goto out_unlock; |
409 | 405 | ||
410 | new->uid = uid; | 406 | new->uid = uid; |
411 | atomic_set(&new->__count, 1); | 407 | atomic_set(&new->__count, 1); |
412 | atomic_set(&new->processes, 0); | ||
413 | atomic_set(&new->files, 0); | ||
414 | atomic_set(&new->sigpending, 0); | ||
415 | #ifdef CONFIG_INOTIFY_USER | ||
416 | atomic_set(&new->inotify_watches, 0); | ||
417 | atomic_set(&new->inotify_devs, 0); | ||
418 | #endif | ||
419 | #ifdef CONFIG_POSIX_MQUEUE | ||
420 | new->mq_bytes = 0; | ||
421 | #endif | ||
422 | new->locked_shm = 0; | ||
423 | |||
424 | if (alloc_uid_keyring(new, current) < 0) | ||
425 | goto out_free_user; | ||
426 | 408 | ||
427 | if (sched_create_user(new) < 0) | 409 | if (sched_create_user(new) < 0) |
428 | goto out_put_keys; | 410 | goto out_free_user; |
429 | 411 | ||
430 | if (uids_user_create(new)) | 412 | if (uids_user_create(new)) |
431 | goto out_destoy_sched; | 413 | goto out_destoy_sched; |
@@ -459,9 +441,6 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
459 | 441 | ||
460 | out_destoy_sched: | 442 | out_destoy_sched: |
461 | sched_destroy_user(new); | 443 | sched_destroy_user(new); |
462 | out_put_keys: | ||
463 | key_put(new->uid_keyring); | ||
464 | key_put(new->session_keyring); | ||
465 | out_free_user: | 444 | out_free_user: |
466 | kmem_cache_free(uid_cachep, new); | 445 | kmem_cache_free(uid_cachep, new); |
467 | out_unlock: | 446 | out_unlock: |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 4c9006275df7..a9ab0596de44 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/version.h> | 9 | #include <linux/version.h> |
10 | #include <linux/nsproxy.h> | 10 | #include <linux/nsproxy.h> |
11 | #include <linux/slab.h> | ||
11 | #include <linux/user_namespace.h> | 12 | #include <linux/user_namespace.h> |
12 | 13 | ||
13 | /* | 14 | /* |
@@ -73,3 +74,4 @@ void free_user_ns(struct kref *kref) | |||
73 | release_uids(ns); | 74 | release_uids(ns); |
74 | kfree(ns); | 75 | kfree(ns); |
75 | } | 76 | } |
77 | EXPORT_SYMBOL(free_user_ns); | ||
diff --git a/kernel/utsname.c b/kernel/utsname.c index 816d7b24fa03..64d398f12444 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/utsname.h> | 14 | #include <linux/utsname.h> |
15 | #include <linux/version.h> | 15 | #include <linux/version.h> |
16 | #include <linux/err.h> | 16 | #include <linux/err.h> |
17 | #include <linux/slab.h> | ||
17 | 18 | ||
18 | /* | 19 | /* |
19 | * Clone a new ns copying an original utsname, setting refcount to 1 | 20 | * Clone a new ns copying an original utsname, setting refcount to 1 |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 00ff4d08e370..29fc39f1029c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -158,8 +158,8 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, | |||
158 | * | 158 | * |
159 | * Returns 0 if @work was already on a queue, non-zero otherwise. | 159 | * Returns 0 if @work was already on a queue, non-zero otherwise. |
160 | * | 160 | * |
161 | * We queue the work to the CPU it was submitted, but there is no | 161 | * We queue the work to the CPU on which it was submitted, but if the CPU dies |
162 | * guarantee that it will be processed by that CPU. | 162 | * it can be processed by another CPU. |
163 | */ | 163 | */ |
164 | int queue_work(struct workqueue_struct *wq, struct work_struct *work) | 164 | int queue_work(struct workqueue_struct *wq, struct work_struct *work) |
165 | { | 165 | { |
@@ -195,7 +195,6 @@ static void delayed_work_timer_fn(unsigned long __data) | |||
195 | int queue_delayed_work(struct workqueue_struct *wq, | 195 | int queue_delayed_work(struct workqueue_struct *wq, |
196 | struct delayed_work *dwork, unsigned long delay) | 196 | struct delayed_work *dwork, unsigned long delay) |
197 | { | 197 | { |
198 | timer_stats_timer_set_start_info(&dwork->timer); | ||
199 | if (delay == 0) | 198 | if (delay == 0) |
200 | return queue_work(wq, &dwork->work); | 199 | return queue_work(wq, &dwork->work); |
201 | 200 | ||
@@ -219,11 +218,12 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | |||
219 | struct timer_list *timer = &dwork->timer; | 218 | struct timer_list *timer = &dwork->timer; |
220 | struct work_struct *work = &dwork->work; | 219 | struct work_struct *work = &dwork->work; |
221 | 220 | ||
222 | timer_stats_timer_set_start_info(&dwork->timer); | ||
223 | if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { | 221 | if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { |
224 | BUG_ON(timer_pending(timer)); | 222 | BUG_ON(timer_pending(timer)); |
225 | BUG_ON(!list_empty(&work->entry)); | 223 | BUG_ON(!list_empty(&work->entry)); |
226 | 224 | ||
225 | timer_stats_timer_set_start_info(&dwork->timer); | ||
226 | |||
227 | /* This stores cwq for the moment, for the timer_fn */ | 227 | /* This stores cwq for the moment, for the timer_fn */ |
228 | set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id())); | 228 | set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id())); |
229 | timer->expires = jiffies + delay; | 229 | timer->expires = jiffies + delay; |
@@ -247,7 +247,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) | |||
247 | if (cwq->run_depth > 3) { | 247 | if (cwq->run_depth > 3) { |
248 | /* morton gets to eat his hat */ | 248 | /* morton gets to eat his hat */ |
249 | printk("%s: recursion depth exceeded: %d\n", | 249 | printk("%s: recursion depth exceeded: %d\n", |
250 | __FUNCTION__, cwq->run_depth); | 250 | __func__, cwq->run_depth); |
251 | dump_stack(); | 251 | dump_stack(); |
252 | } | 252 | } |
253 | while (!list_empty(&cwq->worklist)) { | 253 | while (!list_empty(&cwq->worklist)) { |
@@ -564,7 +564,6 @@ EXPORT_SYMBOL(schedule_work); | |||
564 | int schedule_delayed_work(struct delayed_work *dwork, | 564 | int schedule_delayed_work(struct delayed_work *dwork, |
565 | unsigned long delay) | 565 | unsigned long delay) |
566 | { | 566 | { |
567 | timer_stats_timer_set_start_info(&dwork->timer); | ||
568 | return queue_delayed_work(keventd_wq, dwork, delay); | 567 | return queue_delayed_work(keventd_wq, dwork, delay); |
569 | } | 568 | } |
570 | EXPORT_SYMBOL(schedule_delayed_work); | 569 | EXPORT_SYMBOL(schedule_delayed_work); |
@@ -581,7 +580,6 @@ EXPORT_SYMBOL(schedule_delayed_work); | |||
581 | int schedule_delayed_work_on(int cpu, | 580 | int schedule_delayed_work_on(int cpu, |
582 | struct delayed_work *dwork, unsigned long delay) | 581 | struct delayed_work *dwork, unsigned long delay) |
583 | { | 582 | { |
584 | timer_stats_timer_set_start_info(&dwork->timer); | ||
585 | return queue_delayed_work_on(cpu, keventd_wq, dwork, delay); | 583 | return queue_delayed_work_on(cpu, keventd_wq, dwork, delay); |
586 | } | 584 | } |
587 | EXPORT_SYMBOL(schedule_delayed_work_on); | 585 | EXPORT_SYMBOL(schedule_delayed_work_on); |
@@ -772,7 +770,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name, | |||
772 | } | 770 | } |
773 | EXPORT_SYMBOL_GPL(__create_workqueue_key); | 771 | EXPORT_SYMBOL_GPL(__create_workqueue_key); |
774 | 772 | ||
775 | static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | 773 | static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) |
776 | { | 774 | { |
777 | /* | 775 | /* |
778 | * Our caller is either destroy_workqueue() or CPU_DEAD, | 776 | * Our caller is either destroy_workqueue() or CPU_DEAD, |
@@ -808,19 +806,16 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | |||
808 | void destroy_workqueue(struct workqueue_struct *wq) | 806 | void destroy_workqueue(struct workqueue_struct *wq) |
809 | { | 807 | { |
810 | const cpumask_t *cpu_map = wq_cpu_map(wq); | 808 | const cpumask_t *cpu_map = wq_cpu_map(wq); |
811 | struct cpu_workqueue_struct *cwq; | ||
812 | int cpu; | 809 | int cpu; |
813 | 810 | ||
814 | get_online_cpus(); | 811 | get_online_cpus(); |
815 | spin_lock(&workqueue_lock); | 812 | spin_lock(&workqueue_lock); |
816 | list_del(&wq->list); | 813 | list_del(&wq->list); |
817 | spin_unlock(&workqueue_lock); | 814 | spin_unlock(&workqueue_lock); |
818 | put_online_cpus(); | ||
819 | 815 | ||
820 | for_each_cpu_mask(cpu, *cpu_map) { | 816 | for_each_cpu_mask(cpu, *cpu_map) |
821 | cwq = per_cpu_ptr(wq->cpu_wq, cpu); | 817 | cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu)); |
822 | cleanup_workqueue_thread(cwq, cpu); | 818 | put_online_cpus(); |
823 | } | ||
824 | 819 | ||
825 | free_percpu(wq->cpu_wq); | 820 | free_percpu(wq->cpu_wq); |
826 | kfree(wq); | 821 | kfree(wq); |
@@ -838,7 +833,6 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
838 | action &= ~CPU_TASKS_FROZEN; | 833 | action &= ~CPU_TASKS_FROZEN; |
839 | 834 | ||
840 | switch (action) { | 835 | switch (action) { |
841 | |||
842 | case CPU_UP_PREPARE: | 836 | case CPU_UP_PREPARE: |
843 | cpu_set(cpu, cpu_populated_map); | 837 | cpu_set(cpu, cpu_populated_map); |
844 | } | 838 | } |
@@ -861,11 +855,17 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
861 | case CPU_UP_CANCELED: | 855 | case CPU_UP_CANCELED: |
862 | start_workqueue_thread(cwq, -1); | 856 | start_workqueue_thread(cwq, -1); |
863 | case CPU_DEAD: | 857 | case CPU_DEAD: |
864 | cleanup_workqueue_thread(cwq, cpu); | 858 | cleanup_workqueue_thread(cwq); |
865 | break; | 859 | break; |
866 | } | 860 | } |
867 | } | 861 | } |
868 | 862 | ||
863 | switch (action) { | ||
864 | case CPU_UP_CANCELED: | ||
865 | case CPU_DEAD: | ||
866 | cpu_clear(cpu, cpu_populated_map); | ||
867 | } | ||
868 | |||
869 | return NOTIFY_OK; | 869 | return NOTIFY_OK; |
870 | } | 870 | } |
871 | 871 | ||