aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2006-02-09 04:29:00 -0500
committerJeff Garzik <jgarzik@pobox.com>2006-02-09 04:29:00 -0500
commit9caafa6c8686e319cf4d5f3757b3972c6c522b7c (patch)
treeb38979b835b5d22e681b175d0b98a3c7560d9c59 /kernel
parent51e9f2ff83df6b1c81c5c44f4486c68ed87aa20e (diff)
parentcac0e8e8bb2e7a086643bdd00c41d900a79bb4fa (diff)
Merge branch 'upstream-fixes'
Diffstat (limited to 'kernel')
-rw-r--r--kernel/compat.c1
-rw-r--r--kernel/cpuset.c2
-rw-r--r--kernel/fork.c396
-rw-r--r--kernel/hrtimer.c70
-rw-r--r--kernel/intermodule.c3
-rw-r--r--kernel/itimer.c11
-rw-r--r--kernel/kprobes.c36
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/posix-timers.c53
-rw-r--r--kernel/power/console.c16
-rw-r--r--kernel/power/disk.c15
-rw-r--r--kernel/power/main.c4
-rw-r--r--kernel/power/power.h15
-rw-r--r--kernel/power/swsusp.c14
-rw-r--r--kernel/rcutorture.c10
-rw-r--r--kernel/sched.c24
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/sys.c27
-rw-r--r--kernel/sysctl.c12
-rw-r--r--kernel/time.c15
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/user.c32
22 files changed, 522 insertions, 244 deletions
diff --git a/kernel/compat.c b/kernel/compat.c
index 1867290c37e3..8c9cd88b6785 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -23,7 +23,6 @@
23#include <linux/security.h> 23#include <linux/security.h>
24 24
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/bug.h>
27 26
28int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) 27int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
29{ 28{
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index fe2f71f92ae0..ba42b0a76961 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -641,7 +641,7 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
641 * task has been modifying its cpuset. 641 * task has been modifying its cpuset.
642 */ 642 */
643 643
644void cpuset_update_task_memory_state() 644void cpuset_update_task_memory_state(void)
645{ 645{
646 int my_cpusets_mem_gen; 646 int my_cpusets_mem_gen;
647 struct task_struct *tsk = current; 647 struct task_struct *tsk = current;
diff --git a/kernel/fork.c b/kernel/fork.c
index 4ae8cfc1c89c..8e88b374cee9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -446,6 +446,55 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
446 } 446 }
447} 447}
448 448
449/*
450 * Allocate a new mm structure and copy contents from the
451 * mm structure of the passed in task structure.
452 */
453static struct mm_struct *dup_mm(struct task_struct *tsk)
454{
455 struct mm_struct *mm, *oldmm = current->mm;
456 int err;
457
458 if (!oldmm)
459 return NULL;
460
461 mm = allocate_mm();
462 if (!mm)
463 goto fail_nomem;
464
465 memcpy(mm, oldmm, sizeof(*mm));
466
467 if (!mm_init(mm))
468 goto fail_nomem;
469
470 if (init_new_context(tsk, mm))
471 goto fail_nocontext;
472
473 err = dup_mmap(mm, oldmm);
474 if (err)
475 goto free_pt;
476
477 mm->hiwater_rss = get_mm_rss(mm);
478 mm->hiwater_vm = mm->total_vm;
479
480 return mm;
481
482free_pt:
483 mmput(mm);
484
485fail_nomem:
486 return NULL;
487
488fail_nocontext:
489 /*
490 * If init_new_context() failed, we cannot use mmput() to free the mm
491 * because it calls destroy_context()
492 */
493 mm_free_pgd(mm);
494 free_mm(mm);
495 return NULL;
496}
497
449static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) 498static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
450{ 499{
451 struct mm_struct * mm, *oldmm; 500 struct mm_struct * mm, *oldmm;
@@ -473,43 +522,17 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
473 } 522 }
474 523
475 retval = -ENOMEM; 524 retval = -ENOMEM;
476 mm = allocate_mm(); 525 mm = dup_mm(tsk);
477 if (!mm) 526 if (!mm)
478 goto fail_nomem; 527 goto fail_nomem;
479 528
480 /* Copy the current MM stuff.. */
481 memcpy(mm, oldmm, sizeof(*mm));
482 if (!mm_init(mm))
483 goto fail_nomem;
484
485 if (init_new_context(tsk,mm))
486 goto fail_nocontext;
487
488 retval = dup_mmap(mm, oldmm);
489 if (retval)
490 goto free_pt;
491
492 mm->hiwater_rss = get_mm_rss(mm);
493 mm->hiwater_vm = mm->total_vm;
494
495good_mm: 529good_mm:
496 tsk->mm = mm; 530 tsk->mm = mm;
497 tsk->active_mm = mm; 531 tsk->active_mm = mm;
498 return 0; 532 return 0;
499 533
500free_pt:
501 mmput(mm);
502fail_nomem: 534fail_nomem:
503 return retval; 535 return retval;
504
505fail_nocontext:
506 /*
507 * If init_new_context() failed, we cannot use mmput() to free the mm
508 * because it calls destroy_context()
509 */
510 mm_free_pgd(mm);
511 free_mm(mm);
512 return retval;
513} 536}
514 537
515static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) 538static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
@@ -597,32 +620,17 @@ out:
597 return newf; 620 return newf;
598} 621}
599 622
600static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 623/*
624 * Allocate a new files structure and copy contents from the
625 * passed in files structure.
626 */
627static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
601{ 628{
602 struct files_struct *oldf, *newf; 629 struct files_struct *newf;
603 struct file **old_fds, **new_fds; 630 struct file **old_fds, **new_fds;
604 int open_files, size, i, error = 0, expand; 631 int open_files, size, i, expand;
605 struct fdtable *old_fdt, *new_fdt; 632 struct fdtable *old_fdt, *new_fdt;
606 633
607 /*
608 * A background process may not have any files ...
609 */
610 oldf = current->files;
611 if (!oldf)
612 goto out;
613
614 if (clone_flags & CLONE_FILES) {
615 atomic_inc(&oldf->count);
616 goto out;
617 }
618
619 /*
620 * Note: we may be using current for both targets (See exec.c)
621 * This works because we cache current->files (old) as oldf. Don't
622 * break this.
623 */
624 tsk->files = NULL;
625 error = -ENOMEM;
626 newf = alloc_files(); 634 newf = alloc_files();
627 if (!newf) 635 if (!newf)
628 goto out; 636 goto out;
@@ -651,9 +659,9 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
651 if (expand) { 659 if (expand) {
652 spin_unlock(&oldf->file_lock); 660 spin_unlock(&oldf->file_lock);
653 spin_lock(&newf->file_lock); 661 spin_lock(&newf->file_lock);
654 error = expand_files(newf, open_files-1); 662 *errorp = expand_files(newf, open_files-1);
655 spin_unlock(&newf->file_lock); 663 spin_unlock(&newf->file_lock);
656 if (error < 0) 664 if (*errorp < 0)
657 goto out_release; 665 goto out_release;
658 new_fdt = files_fdtable(newf); 666 new_fdt = files_fdtable(newf);
659 /* 667 /*
@@ -702,10 +710,8 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
702 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); 710 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
703 } 711 }
704 712
705 tsk->files = newf;
706 error = 0;
707out: 713out:
708 return error; 714 return newf;
709 715
710out_release: 716out_release:
711 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); 717 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
@@ -715,6 +721,40 @@ out_release:
715 goto out; 721 goto out;
716} 722}
717 723
724static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
725{
726 struct files_struct *oldf, *newf;
727 int error = 0;
728
729 /*
730 * A background process may not have any files ...
731 */
732 oldf = current->files;
733 if (!oldf)
734 goto out;
735
736 if (clone_flags & CLONE_FILES) {
737 atomic_inc(&oldf->count);
738 goto out;
739 }
740
741 /*
742 * Note: we may be using current for both targets (See exec.c)
743 * This works because we cache current->files (old) as oldf. Don't
744 * break this.
745 */
746 tsk->files = NULL;
747 error = -ENOMEM;
748 newf = dup_fd(oldf, &error);
749 if (!newf)
750 goto out;
751
752 tsk->files = newf;
753 error = 0;
754out:
755 return error;
756}
757
718/* 758/*
719 * Helper to unshare the files of the current task. 759 * Helper to unshare the files of the current task.
720 * We don't want to expose copy_files internals to 760 * We don't want to expose copy_files internals to
@@ -802,7 +842,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
802 init_sigpending(&sig->shared_pending); 842 init_sigpending(&sig->shared_pending);
803 INIT_LIST_HEAD(&sig->posix_timers); 843 INIT_LIST_HEAD(&sig->posix_timers);
804 844
805 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC); 845 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
806 sig->it_real_incr.tv64 = 0; 846 sig->it_real_incr.tv64 = 0;
807 sig->real_timer.function = it_real_fn; 847 sig->real_timer.function = it_real_fn;
808 sig->real_timer.data = tsk; 848 sig->real_timer.data = tsk;
@@ -1323,3 +1363,249 @@ void __init proc_caches_init(void)
1323 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 1363 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1324 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1364 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1325} 1365}
1366
1367
1368/*
1369 * Check constraints on flags passed to the unshare system call and
1370 * force unsharing of additional process context as appropriate.
1371 */
1372static inline void check_unshare_flags(unsigned long *flags_ptr)
1373{
1374 /*
1375 * If unsharing a thread from a thread group, must also
1376 * unshare vm.
1377 */
1378 if (*flags_ptr & CLONE_THREAD)
1379 *flags_ptr |= CLONE_VM;
1380
1381 /*
1382 * If unsharing vm, must also unshare signal handlers.
1383 */
1384 if (*flags_ptr & CLONE_VM)
1385 *flags_ptr |= CLONE_SIGHAND;
1386
1387 /*
1388 * If unsharing signal handlers and the task was created
1389 * using CLONE_THREAD, then must unshare the thread
1390 */
1391 if ((*flags_ptr & CLONE_SIGHAND) &&
1392 (atomic_read(&current->signal->count) > 1))
1393 *flags_ptr |= CLONE_THREAD;
1394
1395 /*
1396 * If unsharing namespace, must also unshare filesystem information.
1397 */
1398 if (*flags_ptr & CLONE_NEWNS)
1399 *flags_ptr |= CLONE_FS;
1400}
1401
1402/*
1403 * Unsharing of tasks created with CLONE_THREAD is not supported yet
1404 */
1405static int unshare_thread(unsigned long unshare_flags)
1406{
1407 if (unshare_flags & CLONE_THREAD)
1408 return -EINVAL;
1409
1410 return 0;
1411}
1412
1413/*
1414 * Unshare the filesystem structure if it is being shared
1415 */
1416static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
1417{
1418 struct fs_struct *fs = current->fs;
1419
1420 if ((unshare_flags & CLONE_FS) &&
1421 (fs && atomic_read(&fs->count) > 1)) {
1422 *new_fsp = __copy_fs_struct(current->fs);
1423 if (!*new_fsp)
1424 return -ENOMEM;
1425 }
1426
1427 return 0;
1428}
1429
1430/*
1431 * Unshare the namespace structure if it is being shared
1432 */
1433static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
1434{
1435 struct namespace *ns = current->namespace;
1436
1437 if ((unshare_flags & CLONE_NEWNS) &&
1438 (ns && atomic_read(&ns->count) > 1)) {
1439 if (!capable(CAP_SYS_ADMIN))
1440 return -EPERM;
1441
1442 *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs);
1443 if (!*new_nsp)
1444 return -ENOMEM;
1445 }
1446
1447 return 0;
1448}
1449
1450/*
1451 * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
1452 * supported yet
1453 */
1454static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
1455{
1456 struct sighand_struct *sigh = current->sighand;
1457
1458 if ((unshare_flags & CLONE_SIGHAND) &&
1459 (sigh && atomic_read(&sigh->count) > 1))
1460 return -EINVAL;
1461 else
1462 return 0;
1463}
1464
1465/*
1466 * Unshare vm if it is being shared
1467 */
1468static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
1469{
1470 struct mm_struct *mm = current->mm;
1471
1472 if ((unshare_flags & CLONE_VM) &&
1473 (mm && atomic_read(&mm->mm_users) > 1)) {
1474 *new_mmp = dup_mm(current);
1475 if (!*new_mmp)
1476 return -ENOMEM;
1477 }
1478
1479 return 0;
1480}
1481
1482/*
1483 * Unshare file descriptor table if it is being shared
1484 */
1485static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
1486{
1487 struct files_struct *fd = current->files;
1488 int error = 0;
1489
1490 if ((unshare_flags & CLONE_FILES) &&
1491 (fd && atomic_read(&fd->count) > 1)) {
1492 *new_fdp = dup_fd(fd, &error);
1493 if (!*new_fdp)
1494 return error;
1495 }
1496
1497 return 0;
1498}
1499
1500/*
1501 * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not
1502 * supported yet
1503 */
1504static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
1505{
1506 if (unshare_flags & CLONE_SYSVSEM)
1507 return -EINVAL;
1508
1509 return 0;
1510}
1511
1512/*
1513 * unshare allows a process to 'unshare' part of the process
1514 * context which was originally shared using clone. copy_*
1515 * functions used by do_fork() cannot be used here directly
1516 * because they modify an inactive task_struct that is being
1517 * constructed. Here we are modifying the current, active,
1518 * task_struct.
1519 */
1520asmlinkage long sys_unshare(unsigned long unshare_flags)
1521{
1522 int err = 0;
1523 struct fs_struct *fs, *new_fs = NULL;
1524 struct namespace *ns, *new_ns = NULL;
1525 struct sighand_struct *sigh, *new_sigh = NULL;
1526 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1527 struct files_struct *fd, *new_fd = NULL;
1528 struct sem_undo_list *new_ulist = NULL;
1529
1530 check_unshare_flags(&unshare_flags);
1531
1532 if ((err = unshare_thread(unshare_flags)))
1533 goto bad_unshare_out;
1534 if ((err = unshare_fs(unshare_flags, &new_fs)))
1535 goto bad_unshare_cleanup_thread;
1536 if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
1537 goto bad_unshare_cleanup_fs;
1538 if ((err = unshare_sighand(unshare_flags, &new_sigh)))
1539 goto bad_unshare_cleanup_ns;
1540 if ((err = unshare_vm(unshare_flags, &new_mm)))
1541 goto bad_unshare_cleanup_sigh;
1542 if ((err = unshare_fd(unshare_flags, &new_fd)))
1543 goto bad_unshare_cleanup_vm;
1544 if ((err = unshare_semundo(unshare_flags, &new_ulist)))
1545 goto bad_unshare_cleanup_fd;
1546
1547 if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
1548
1549 task_lock(current);
1550
1551 if (new_fs) {
1552 fs = current->fs;
1553 current->fs = new_fs;
1554 new_fs = fs;
1555 }
1556
1557 if (new_ns) {
1558 ns = current->namespace;
1559 current->namespace = new_ns;
1560 new_ns = ns;
1561 }
1562
1563 if (new_sigh) {
1564 sigh = current->sighand;
1565 current->sighand = new_sigh;
1566 new_sigh = sigh;
1567 }
1568
1569 if (new_mm) {
1570 mm = current->mm;
1571 active_mm = current->active_mm;
1572 current->mm = new_mm;
1573 current->active_mm = new_mm;
1574 activate_mm(active_mm, new_mm);
1575 new_mm = mm;
1576 }
1577
1578 if (new_fd) {
1579 fd = current->files;
1580 current->files = new_fd;
1581 new_fd = fd;
1582 }
1583
1584 task_unlock(current);
1585 }
1586
1587bad_unshare_cleanup_fd:
1588 if (new_fd)
1589 put_files_struct(new_fd);
1590
1591bad_unshare_cleanup_vm:
1592 if (new_mm)
1593 mmput(new_mm);
1594
1595bad_unshare_cleanup_sigh:
1596 if (new_sigh)
1597 if (atomic_dec_and_test(&new_sigh->count))
1598 kmem_cache_free(sighand_cachep, new_sigh);
1599
1600bad_unshare_cleanup_ns:
1601 if (new_ns)
1602 put_namespace(new_ns);
1603
1604bad_unshare_cleanup_fs:
1605 if (new_fs)
1606 put_fs_struct(new_fs);
1607
1608bad_unshare_cleanup_thread:
1609bad_unshare_out:
1610 return err;
1611}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f1c4155b49ac..2b6e1757aedd 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -21,6 +21,12 @@
21 * Credits: 21 * Credits:
22 * based on kernel/timer.c 22 * based on kernel/timer.c
23 * 23 *
24 * Help, testing, suggestions, bugfixes, improvements were
25 * provided by:
26 *
27 * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
28 * et. al.
29 *
24 * For licencing details see kernel-base/COPYING 30 * For licencing details see kernel-base/COPYING
25 */ 31 */
26 32
@@ -66,6 +72,12 @@ EXPORT_SYMBOL_GPL(ktime_get_real);
66 72
67/* 73/*
68 * The timer bases: 74 * The timer bases:
75 *
76 * Note: If we want to add new timer bases, we have to skip the two
77 * clock ids captured by the cpu-timers. We do this by holding empty
78 * entries rather than doing math adjustment of the clock ids.
79 * This ensures that we capture erroneous accesses to these clock ids
80 * rather than moving them into the range of valid clock id's.
69 */ 81 */
70 82
71#define MAX_HRTIMER_BASES 2 83#define MAX_HRTIMER_BASES 2
@@ -483,29 +495,25 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
483} 495}
484 496
485/** 497/**
486 * hrtimer_rebase - rebase an initialized hrtimer to a different base 498 * hrtimer_init - initialize a timer to the given clock
487 * 499 *
488 * @timer: the timer to be rebased 500 * @timer: the timer to be initialized
489 * @clock_id: the clock to be used 501 * @clock_id: the clock to be used
502 * @mode: timer mode abs/rel
490 */ 503 */
491void hrtimer_rebase(struct hrtimer *timer, const clockid_t clock_id) 504void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
505 enum hrtimer_mode mode)
492{ 506{
493 struct hrtimer_base *bases; 507 struct hrtimer_base *bases;
494 508
509 memset(timer, 0, sizeof(struct hrtimer));
510
495 bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); 511 bases = per_cpu(hrtimer_bases, raw_smp_processor_id());
496 timer->base = &bases[clock_id];
497}
498 512
499/** 513 if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS)
500 * hrtimer_init - initialize a timer to the given clock 514 clock_id = CLOCK_MONOTONIC;
501 * 515
502 * @timer: the timer to be initialized 516 timer->base = &bases[clock_id];
503 * @clock_id: the clock to be used
504 */
505void hrtimer_init(struct hrtimer *timer, const clockid_t clock_id)
506{
507 memset(timer, 0, sizeof(struct hrtimer));
508 hrtimer_rebase(timer, clock_id);
509} 517}
510 518
511/** 519/**
@@ -550,6 +558,7 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base)
550 fn = timer->function; 558 fn = timer->function;
551 data = timer->data; 559 data = timer->data;
552 set_curr_timer(base, timer); 560 set_curr_timer(base, timer);
561 timer->state = HRTIMER_RUNNING;
553 __remove_hrtimer(timer, base); 562 __remove_hrtimer(timer, base);
554 spin_unlock_irq(&base->lock); 563 spin_unlock_irq(&base->lock);
555 564
@@ -565,6 +574,10 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base)
565 574
566 spin_lock_irq(&base->lock); 575 spin_lock_irq(&base->lock);
567 576
577 /* Another CPU has added back the timer */
578 if (timer->state != HRTIMER_RUNNING)
579 continue;
580
568 if (restart == HRTIMER_RESTART) 581 if (restart == HRTIMER_RESTART)
569 enqueue_hrtimer(timer, base); 582 enqueue_hrtimer(timer, base);
570 else 583 else
@@ -638,8 +651,7 @@ schedule_hrtimer_interruptible(struct hrtimer *timer,
638 return schedule_hrtimer(timer, mode); 651 return schedule_hrtimer(timer, mode);
639} 652}
640 653
641static long __sched 654static long __sched nanosleep_restart(struct restart_block *restart)
642nanosleep_restart(struct restart_block *restart, clockid_t clockid)
643{ 655{
644 struct timespec __user *rmtp; 656 struct timespec __user *rmtp;
645 struct timespec tu; 657 struct timespec tu;
@@ -649,7 +661,7 @@ nanosleep_restart(struct restart_block *restart, clockid_t clockid)
649 661
650 restart->fn = do_no_restart_syscall; 662 restart->fn = do_no_restart_syscall;
651 663
652 hrtimer_init(&timer, clockid); 664 hrtimer_init(&timer, (clockid_t) restart->arg3, HRTIMER_ABS);
653 665
654 timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0; 666 timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0;
655 667
@@ -669,16 +681,6 @@ nanosleep_restart(struct restart_block *restart, clockid_t clockid)
669 return -ERESTART_RESTARTBLOCK; 681 return -ERESTART_RESTARTBLOCK;
670} 682}
671 683
672static long __sched nanosleep_restart_mono(struct restart_block *restart)
673{
674 return nanosleep_restart(restart, CLOCK_MONOTONIC);
675}
676
677static long __sched nanosleep_restart_real(struct restart_block *restart)
678{
679 return nanosleep_restart(restart, CLOCK_REALTIME);
680}
681
682long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, 684long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
683 const enum hrtimer_mode mode, const clockid_t clockid) 685 const enum hrtimer_mode mode, const clockid_t clockid)
684{ 686{
@@ -687,7 +689,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
687 struct timespec tu; 689 struct timespec tu;
688 ktime_t rem; 690 ktime_t rem;
689 691
690 hrtimer_init(&timer, clockid); 692 hrtimer_init(&timer, clockid, mode);
691 693
692 timer.expires = timespec_to_ktime(*rqtp); 694 timer.expires = timespec_to_ktime(*rqtp);
693 695
@@ -695,7 +697,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
695 if (rem.tv64 <= 0) 697 if (rem.tv64 <= 0)
696 return 0; 698 return 0;
697 699
698 /* Absolute timers do not update the rmtp value: */ 700 /* Absolute timers do not update the rmtp value and restart: */
699 if (mode == HRTIMER_ABS) 701 if (mode == HRTIMER_ABS)
700 return -ERESTARTNOHAND; 702 return -ERESTARTNOHAND;
701 703
@@ -705,11 +707,11 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
705 return -EFAULT; 707 return -EFAULT;
706 708
707 restart = &current_thread_info()->restart_block; 709 restart = &current_thread_info()->restart_block;
708 restart->fn = (clockid == CLOCK_MONOTONIC) ? 710 restart->fn = nanosleep_restart;
709 nanosleep_restart_mono : nanosleep_restart_real;
710 restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF; 711 restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF;
711 restart->arg1 = timer.expires.tv64 >> 32; 712 restart->arg1 = timer.expires.tv64 >> 32;
712 restart->arg2 = (unsigned long) rmtp; 713 restart->arg2 = (unsigned long) rmtp;
714 restart->arg3 = (unsigned long) timer.base->index;
713 715
714 return -ERESTART_RESTARTBLOCK; 716 return -ERESTART_RESTARTBLOCK;
715} 717}
@@ -736,10 +738,8 @@ static void __devinit init_hrtimers_cpu(int cpu)
736 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); 738 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu);
737 int i; 739 int i;
738 740
739 for (i = 0; i < MAX_HRTIMER_BASES; i++) { 741 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++)
740 spin_lock_init(&base->lock); 742 spin_lock_init(&base->lock);
741 base++;
742 }
743} 743}
744 744
745#ifdef CONFIG_HOTPLUG_CPU 745#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/intermodule.c b/kernel/intermodule.c
index 0cbe633420fb..55b1e5b85db9 100644
--- a/kernel/intermodule.c
+++ b/kernel/intermodule.c
@@ -179,3 +179,6 @@ EXPORT_SYMBOL(inter_module_register);
179EXPORT_SYMBOL(inter_module_unregister); 179EXPORT_SYMBOL(inter_module_unregister);
180EXPORT_SYMBOL(inter_module_get_request); 180EXPORT_SYMBOL(inter_module_get_request);
181EXPORT_SYMBOL(inter_module_put); 181EXPORT_SYMBOL(inter_module_put);
182
183MODULE_LICENSE("GPL");
184
diff --git a/kernel/itimer.c b/kernel/itimer.c
index c2c05c4ff28d..379be2f8c84c 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -49,9 +49,11 @@ int do_getitimer(int which, struct itimerval *value)
49 49
50 switch (which) { 50 switch (which) {
51 case ITIMER_REAL: 51 case ITIMER_REAL:
52 spin_lock_irq(&tsk->sighand->siglock);
52 value->it_value = itimer_get_remtime(&tsk->signal->real_timer); 53 value->it_value = itimer_get_remtime(&tsk->signal->real_timer);
53 value->it_interval = 54 value->it_interval =
54 ktime_to_timeval(tsk->signal->it_real_incr); 55 ktime_to_timeval(tsk->signal->it_real_incr);
56 spin_unlock_irq(&tsk->sighand->siglock);
55 break; 57 break;
56 case ITIMER_VIRTUAL: 58 case ITIMER_VIRTUAL:
57 read_lock(&tasklist_lock); 59 read_lock(&tasklist_lock);
@@ -150,18 +152,25 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
150 152
151 switch (which) { 153 switch (which) {
152 case ITIMER_REAL: 154 case ITIMER_REAL:
155again:
156 spin_lock_irq(&tsk->sighand->siglock);
153 timer = &tsk->signal->real_timer; 157 timer = &tsk->signal->real_timer;
154 hrtimer_cancel(timer);
155 if (ovalue) { 158 if (ovalue) {
156 ovalue->it_value = itimer_get_remtime(timer); 159 ovalue->it_value = itimer_get_remtime(timer);
157 ovalue->it_interval 160 ovalue->it_interval
158 = ktime_to_timeval(tsk->signal->it_real_incr); 161 = ktime_to_timeval(tsk->signal->it_real_incr);
159 } 162 }
163 /* We are sharing ->siglock with it_real_fn() */
164 if (hrtimer_try_to_cancel(timer) < 0) {
165 spin_unlock_irq(&tsk->sighand->siglock);
166 goto again;
167 }
160 tsk->signal->it_real_incr = 168 tsk->signal->it_real_incr =
161 timeval_to_ktime(value->it_interval); 169 timeval_to_ktime(value->it_interval);
162 expires = timeval_to_ktime(value->it_value); 170 expires = timeval_to_ktime(value->it_value);
163 if (expires.tv64 != 0) 171 if (expires.tv64 != 0)
164 hrtimer_start(timer, expires, HRTIMER_REL); 172 hrtimer_start(timer, expires, HRTIMER_REL);
173 spin_unlock_irq(&tsk->sighand->siglock);
165 break; 174 break;
166 case ITIMER_VIRTUAL: 175 case ITIMER_VIRTUAL:
167 nval = timeval_to_cputime(&value->it_value); 176 nval = timeval_to_cputime(&value->it_value);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3ea6325228da..fef1af8a73ce 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -344,23 +344,6 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
344 spin_unlock_irqrestore(&kretprobe_lock, flags); 344 spin_unlock_irqrestore(&kretprobe_lock, flags);
345} 345}
346 346
347/*
348 * This kprobe pre_handler is registered with every kretprobe. When probe
349 * hits it will set up the return probe.
350 */
351static int __kprobes pre_handler_kretprobe(struct kprobe *p,
352 struct pt_regs *regs)
353{
354 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
355 unsigned long flags = 0;
356
357 /*TODO: consider to only swap the RA after the last pre_handler fired */
358 spin_lock_irqsave(&kretprobe_lock, flags);
359 arch_prepare_kretprobe(rp, regs);
360 spin_unlock_irqrestore(&kretprobe_lock, flags);
361 return 0;
362}
363
364static inline void free_rp_inst(struct kretprobe *rp) 347static inline void free_rp_inst(struct kretprobe *rp)
365{ 348{
366 struct kretprobe_instance *ri; 349 struct kretprobe_instance *ri;
@@ -578,6 +561,23 @@ void __kprobes unregister_jprobe(struct jprobe *jp)
578 561
579#ifdef ARCH_SUPPORTS_KRETPROBES 562#ifdef ARCH_SUPPORTS_KRETPROBES
580 563
564/*
565 * This kprobe pre_handler is registered with every kretprobe. When probe
566 * hits it will set up the return probe.
567 */
568static int __kprobes pre_handler_kretprobe(struct kprobe *p,
569 struct pt_regs *regs)
570{
571 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
572 unsigned long flags = 0;
573
574 /*TODO: consider to only swap the RA after the last pre_handler fired */
575 spin_lock_irqsave(&kretprobe_lock, flags);
576 arch_prepare_kretprobe(rp, regs);
577 spin_unlock_irqrestore(&kretprobe_lock, flags);
578 return 0;
579}
580
581int __kprobes register_kretprobe(struct kretprobe *rp) 581int __kprobes register_kretprobe(struct kretprobe *rp)
582{ 582{
583 int ret = 0; 583 int ret = 0;
@@ -631,12 +631,12 @@ void __kprobes unregister_kretprobe(struct kretprobe *rp)
631 unregister_kprobe(&rp->kp); 631 unregister_kprobe(&rp->kp);
632 /* No race here */ 632 /* No race here */
633 spin_lock_irqsave(&kretprobe_lock, flags); 633 spin_lock_irqsave(&kretprobe_lock, flags);
634 free_rp_inst(rp);
635 while ((ri = get_used_rp_inst(rp)) != NULL) { 634 while ((ri = get_used_rp_inst(rp)) != NULL) {
636 ri->rp = NULL; 635 ri->rp = NULL;
637 hlist_del(&ri->uflist); 636 hlist_del(&ri->uflist);
638 } 637 }
639 spin_unlock_irqrestore(&kretprobe_lock, flags); 638 spin_unlock_irqrestore(&kretprobe_lock, flags);
639 free_rp_inst(rp);
640} 640}
641 641
642static int __init init_kprobes(void) 642static int __init init_kprobes(void)
diff --git a/kernel/module.c b/kernel/module.c
index 618ed6e23ecc..5aad477ddc79 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1670,6 +1670,9 @@ static struct module *load_module(void __user *umod,
1670 goto free_mod; 1670 goto free_mod;
1671 } 1671 }
1672 1672
1673 /* Userspace could have altered the string after the strlen_user() */
1674 args[arglen - 1] = '\0';
1675
1673 if (find_module(mod->name)) { 1676 if (find_module(mod->name)) {
1674 err = -EEXIST; 1677 err = -EEXIST;
1675 goto free_mod; 1678 goto free_mod;
@@ -2092,7 +2095,8 @@ static unsigned long mod_find_symname(struct module *mod, const char *name)
2092 unsigned int i; 2095 unsigned int i;
2093 2096
2094 for (i = 0; i < mod->num_symtab; i++) 2097 for (i = 0; i < mod->num_symtab; i++)
2095 if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0) 2098 if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 &&
2099 mod->symtab[i].st_info != 'U')
2096 return mod->symtab[i].st_value; 2100 return mod->symtab[i].st_value;
2097 return 0; 2101 return 0;
2098} 2102}
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 197208b3aa2a..216f574b5ffb 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -194,9 +194,7 @@ static inline int common_clock_set(const clockid_t which_clock,
194 194
195static int common_timer_create(struct k_itimer *new_timer) 195static int common_timer_create(struct k_itimer *new_timer)
196{ 196{
197 hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock); 197 hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
198 new_timer->it.real.timer.data = new_timer;
199 new_timer->it.real.timer.function = posix_timer_fn;
200 return 0; 198 return 0;
201} 199}
202 200
@@ -290,7 +288,8 @@ void do_schedule_next_timer(struct siginfo *info)
290 info->si_overrun = timr->it_overrun_last; 288 info->si_overrun = timr->it_overrun_last;
291 } 289 }
292 290
293 unlock_timer(timr, flags); 291 if (timr)
292 unlock_timer(timr, flags);
294} 293}
295 294
296int posix_timer_event(struct k_itimer *timr,int si_private) 295int posix_timer_event(struct k_itimer *timr,int si_private)
@@ -692,6 +691,7 @@ common_timer_set(struct k_itimer *timr, int flags,
692 struct itimerspec *new_setting, struct itimerspec *old_setting) 691 struct itimerspec *new_setting, struct itimerspec *old_setting)
693{ 692{
694 struct hrtimer *timer = &timr->it.real.timer; 693 struct hrtimer *timer = &timr->it.real.timer;
694 enum hrtimer_mode mode;
695 695
696 if (old_setting) 696 if (old_setting)
697 common_timer_get(timr, old_setting); 697 common_timer_get(timr, old_setting);
@@ -713,14 +713,10 @@ common_timer_set(struct k_itimer *timr, int flags,
713 if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) 713 if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
714 return 0; 714 return 0;
715 715
716 /* Posix madness. Only absolute CLOCK_REALTIME timers 716 mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL;
717 * are affected by clock sets. So we must reiniatilize 717 hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
718 * the timer. 718 timr->it.real.timer.data = timr;
719 */ 719 timr->it.real.timer.function = posix_timer_fn;
720 if (timr->it_clock == CLOCK_REALTIME && (flags & TIMER_ABSTIME))
721 hrtimer_rebase(timer, CLOCK_REALTIME);
722 else
723 hrtimer_rebase(timer, CLOCK_MONOTONIC);
724 720
725 timer->expires = timespec_to_ktime(new_setting->it_value); 721 timer->expires = timespec_to_ktime(new_setting->it_value);
726 722
@@ -728,11 +724,15 @@ common_timer_set(struct k_itimer *timr, int flags,
728 timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); 724 timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
729 725
730 /* SIGEV_NONE timers are not queued ! See common_timer_get */ 726 /* SIGEV_NONE timers are not queued ! See common_timer_get */
731 if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) 727 if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
728 /* Setup correct expiry time for relative timers */
729 if (mode == HRTIMER_REL)
730 timer->expires = ktime_add(timer->expires,
731 timer->base->get_time());
732 return 0; 732 return 0;
733 }
733 734
734 hrtimer_start(timer, timer->expires, (flags & TIMER_ABSTIME) ? 735 hrtimer_start(timer, timer->expires, mode);
735 HRTIMER_ABS : HRTIMER_REL);
736 return 0; 736 return 0;
737} 737}
738 738
@@ -875,12 +875,6 @@ int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp)
875} 875}
876EXPORT_SYMBOL_GPL(do_posix_clock_nosettime); 876EXPORT_SYMBOL_GPL(do_posix_clock_nosettime);
877 877
878int do_posix_clock_notimer_create(struct k_itimer *timer)
879{
880 return -EINVAL;
881}
882EXPORT_SYMBOL_GPL(do_posix_clock_notimer_create);
883
884int do_posix_clock_nonanosleep(const clockid_t clock, int flags, 878int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
885 struct timespec *t, struct timespec __user *r) 879 struct timespec *t, struct timespec __user *r)
886{ 880{
@@ -947,21 +941,8 @@ sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp)
947static int common_nsleep(const clockid_t which_clock, int flags, 941static int common_nsleep(const clockid_t which_clock, int flags,
948 struct timespec *tsave, struct timespec __user *rmtp) 942 struct timespec *tsave, struct timespec __user *rmtp)
949{ 943{
950 int mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL; 944 return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ?
951 int clockid = which_clock; 945 HRTIMER_ABS : HRTIMER_REL, which_clock);
952
953 switch (which_clock) {
954 case CLOCK_REALTIME:
955 /* Posix madness. Only absolute timers on clock realtime
956 are affected by clock set. */
957 if (mode != HRTIMER_ABS)
958 clockid = CLOCK_MONOTONIC;
959 case CLOCK_MONOTONIC:
960 break;
961 default:
962 return -EINVAL;
963 }
964 return hrtimer_nanosleep(tsave, rmtp, mode, clockid);
965} 946}
966 947
967asmlinkage long 948asmlinkage long
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 7ff375e7c95f..623786d44159 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -9,18 +9,13 @@
9#include <linux/console.h> 9#include <linux/console.h>
10#include "power.h" 10#include "power.h"
11 11
12static int new_loglevel = 10; 12#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
13static int orig_loglevel; 13#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
14#ifdef SUSPEND_CONSOLE 14
15static int orig_fgconsole, orig_kmsg; 15static int orig_fgconsole, orig_kmsg;
16#endif
17 16
18int pm_prepare_console(void) 17int pm_prepare_console(void)
19{ 18{
20 orig_loglevel = console_loglevel;
21 console_loglevel = new_loglevel;
22
23#ifdef SUSPEND_CONSOLE
24 acquire_console_sem(); 19 acquire_console_sem();
25 20
26 orig_fgconsole = fg_console; 21 orig_fgconsole = fg_console;
@@ -41,18 +36,15 @@ int pm_prepare_console(void)
41 } 36 }
42 orig_kmsg = kmsg_redirect; 37 orig_kmsg = kmsg_redirect;
43 kmsg_redirect = SUSPEND_CONSOLE; 38 kmsg_redirect = SUSPEND_CONSOLE;
44#endif
45 return 0; 39 return 0;
46} 40}
47 41
48void pm_restore_console(void) 42void pm_restore_console(void)
49{ 43{
50 console_loglevel = orig_loglevel;
51#ifdef SUSPEND_CONSOLE
52 acquire_console_sem(); 44 acquire_console_sem();
53 set_console(orig_fgconsole); 45 set_console(orig_fgconsole);
54 release_console_sem(); 46 release_console_sem();
55 kmsg_redirect = orig_kmsg; 47 kmsg_redirect = orig_kmsg;
56#endif
57 return; 48 return;
58} 49}
50#endif
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index e24446f8d8cd..0b43847dc980 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -53,7 +53,7 @@ static void power_down(suspend_disk_method_t mode)
53 53
54 switch(mode) { 54 switch(mode) {
55 case PM_DISK_PLATFORM: 55 case PM_DISK_PLATFORM:
56 kernel_power_off_prepare(); 56 kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
57 error = pm_ops->enter(PM_SUSPEND_DISK); 57 error = pm_ops->enter(PM_SUSPEND_DISK);
58 break; 58 break;
59 case PM_DISK_SHUTDOWN: 59 case PM_DISK_SHUTDOWN:
@@ -95,13 +95,6 @@ static int prepare_processes(void)
95 goto thaw; 95 goto thaw;
96 } 96 }
97 97
98 if (pm_disk_mode == PM_DISK_PLATFORM) {
99 if (pm_ops && pm_ops->prepare) {
100 if ((error = pm_ops->prepare(PM_SUSPEND_DISK)))
101 goto thaw;
102 }
103 }
104
105 /* Free memory before shutting down devices. */ 98 /* Free memory before shutting down devices. */
106 if (!(error = swsusp_shrink_memory())) 99 if (!(error = swsusp_shrink_memory()))
107 return 0; 100 return 0;
@@ -367,14 +360,14 @@ power_attr(resume);
367 360
368static ssize_t image_size_show(struct subsystem * subsys, char *buf) 361static ssize_t image_size_show(struct subsystem * subsys, char *buf)
369{ 362{
370 return sprintf(buf, "%u\n", image_size); 363 return sprintf(buf, "%lu\n", image_size);
371} 364}
372 365
373static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n) 366static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n)
374{ 367{
375 unsigned int size; 368 unsigned long size;
376 369
377 if (sscanf(buf, "%u", &size) == 1) { 370 if (sscanf(buf, "%lu", &size) == 1) {
378 image_size = size; 371 image_size = size;
379 return n; 372 return n;
380 } 373 }
diff --git a/kernel/power/main.c b/kernel/power/main.c
index d253f3ae2fa5..9cb235cba4a9 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -133,10 +133,10 @@ static int suspend_enter(suspend_state_t state)
133static void suspend_finish(suspend_state_t state) 133static void suspend_finish(suspend_state_t state)
134{ 134{
135 device_resume(); 135 device_resume();
136 if (pm_ops && pm_ops->finish)
137 pm_ops->finish(state);
138 thaw_processes(); 136 thaw_processes();
139 enable_nonboot_cpus(); 137 enable_nonboot_cpus();
138 if (pm_ops && pm_ops->finish)
139 pm_ops->finish(state);
140 pm_restore_console(); 140 pm_restore_console();
141} 141}
142 142
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 7e8492fd1423..388dba680841 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -1,14 +1,6 @@
1#include <linux/suspend.h> 1#include <linux/suspend.h>
2#include <linux/utsname.h> 2#include <linux/utsname.h>
3 3
4/* With SUSPEND_CONSOLE defined suspend looks *really* cool, but
5 we probably do not take enough locks for switching consoles, etc,
6 so bad things might happen.
7*/
8#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
9#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
10#endif
11
12struct swsusp_info { 4struct swsusp_info {
13 struct new_utsname uts; 5 struct new_utsname uts;
14 u32 version_code; 6 u32 version_code;
@@ -42,17 +34,14 @@ static struct subsys_attribute _name##_attr = { \
42 34
43extern struct subsystem power_subsys; 35extern struct subsystem power_subsys;
44 36
45extern int pm_prepare_console(void);
46extern void pm_restore_console(void);
47
48/* References to section boundaries */ 37/* References to section boundaries */
49extern const void __nosave_begin, __nosave_end; 38extern const void __nosave_begin, __nosave_end;
50 39
51extern unsigned int nr_copy_pages; 40extern unsigned int nr_copy_pages;
52extern struct pbe *pagedir_nosave; 41extern struct pbe *pagedir_nosave;
53 42
54/* Preferred image size in MB (default 500) */ 43/* Preferred image size in bytes (default 500 MB) */
55extern unsigned int image_size; 44extern unsigned long image_size;
56 45
57extern asmlinkage int swsusp_arch_suspend(void); 46extern asmlinkage int swsusp_arch_suspend(void);
58extern asmlinkage int swsusp_arch_resume(void); 47extern asmlinkage int swsusp_arch_resume(void);
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 55a18d26abed..4e90905f0e87 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -70,12 +70,12 @@
70#include "power.h" 70#include "power.h"
71 71
72/* 72/*
73 * Preferred image size in MB (tunable via /sys/power/image_size). 73 * Preferred image size in bytes (tunable via /sys/power/image_size).
74 * When it is set to N, swsusp will do its best to ensure the image 74 * When it is set to N, swsusp will do its best to ensure the image
75 * size will not exceed N MB, but if that is impossible, it will 75 * size will not exceed N bytes, but if that is impossible, it will
76 * try to create the smallest image possible. 76 * try to create the smallest image possible.
77 */ 77 */
78unsigned int image_size = 500; 78unsigned long image_size = 500 * 1024 * 1024;
79 79
80#ifdef CONFIG_HIGHMEM 80#ifdef CONFIG_HIGHMEM
81unsigned int count_highmem_pages(void); 81unsigned int count_highmem_pages(void);
@@ -590,7 +590,7 @@ int swsusp_shrink_memory(void)
590 if (!tmp) 590 if (!tmp)
591 return -ENOMEM; 591 return -ENOMEM;
592 pages += tmp; 592 pages += tmp;
593 } else if (size > (image_size * 1024 * 1024) / PAGE_SIZE) { 593 } else if (size > image_size / PAGE_SIZE) {
594 tmp = shrink_all_memory(SHRINK_BITE); 594 tmp = shrink_all_memory(SHRINK_BITE);
595 pages += tmp; 595 pages += tmp;
596 } 596 }
@@ -743,7 +743,6 @@ static int submit(int rw, pgoff_t page_off, void *page)
743 if (!bio) 743 if (!bio)
744 return -ENOMEM; 744 return -ENOMEM;
745 bio->bi_sector = page_off * (PAGE_SIZE >> 9); 745 bio->bi_sector = page_off * (PAGE_SIZE >> 9);
746 bio_get(bio);
747 bio->bi_bdev = resume_bdev; 746 bio->bi_bdev = resume_bdev;
748 bio->bi_end_io = end_io; 747 bio->bi_end_io = end_io;
749 748
@@ -753,14 +752,13 @@ static int submit(int rw, pgoff_t page_off, void *page)
753 goto Done; 752 goto Done;
754 } 753 }
755 754
756 if (rw == WRITE)
757 bio_set_pages_dirty(bio);
758 755
759 atomic_set(&io_done, 1); 756 atomic_set(&io_done, 1);
760 submit_bio(rw | (1 << BIO_RW_SYNC), bio); 757 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
761 while (atomic_read(&io_done)) 758 while (atomic_read(&io_done))
762 yield(); 759 yield();
763 760 if (rw == READ)
761 bio_set_pages_dirty(bio);
764 Done: 762 Done:
765 bio_put(bio); 763 bio_put(bio);
766 return error; 764 return error;
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 773219907dd8..7712912dbc84 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -114,16 +114,16 @@ rcu_torture_alloc(void)
114{ 114{
115 struct list_head *p; 115 struct list_head *p;
116 116
117 spin_lock(&rcu_torture_lock); 117 spin_lock_bh(&rcu_torture_lock);
118 if (list_empty(&rcu_torture_freelist)) { 118 if (list_empty(&rcu_torture_freelist)) {
119 atomic_inc(&n_rcu_torture_alloc_fail); 119 atomic_inc(&n_rcu_torture_alloc_fail);
120 spin_unlock(&rcu_torture_lock); 120 spin_unlock_bh(&rcu_torture_lock);
121 return NULL; 121 return NULL;
122 } 122 }
123 atomic_inc(&n_rcu_torture_alloc); 123 atomic_inc(&n_rcu_torture_alloc);
124 p = rcu_torture_freelist.next; 124 p = rcu_torture_freelist.next;
125 list_del_init(p); 125 list_del_init(p);
126 spin_unlock(&rcu_torture_lock); 126 spin_unlock_bh(&rcu_torture_lock);
127 return container_of(p, struct rcu_torture, rtort_free); 127 return container_of(p, struct rcu_torture, rtort_free);
128} 128}
129 129
@@ -134,9 +134,9 @@ static void
134rcu_torture_free(struct rcu_torture *p) 134rcu_torture_free(struct rcu_torture *p)
135{ 135{
136 atomic_inc(&n_rcu_torture_free); 136 atomic_inc(&n_rcu_torture_free);
137 spin_lock(&rcu_torture_lock); 137 spin_lock_bh(&rcu_torture_lock);
138 list_add_tail(&p->rtort_free, &rcu_torture_freelist); 138 list_add_tail(&p->rtort_free, &rcu_torture_freelist);
139 spin_unlock(&rcu_torture_lock); 139 spin_unlock_bh(&rcu_torture_lock);
140} 140}
141 141
142static void 142static void
diff --git a/kernel/sched.c b/kernel/sched.c
index 3ee2ae45125f..bc38804e40dd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4031,7 +4031,7 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
4031 goto out_unlock; 4031 goto out_unlock;
4032 4032
4033 retval = 0; 4033 retval = 0;
4034 cpus_and(*mask, p->cpus_allowed, cpu_possible_map); 4034 cpus_and(*mask, p->cpus_allowed, cpu_online_map);
4035 4035
4036out_unlock: 4036out_unlock:
4037 read_unlock(&tasklist_lock); 4037 read_unlock(&tasklist_lock);
@@ -5141,7 +5141,7 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
5141#define SEARCH_SCOPE 2 5141#define SEARCH_SCOPE 2
5142#define MIN_CACHE_SIZE (64*1024U) 5142#define MIN_CACHE_SIZE (64*1024U)
5143#define DEFAULT_CACHE_SIZE (5*1024*1024U) 5143#define DEFAULT_CACHE_SIZE (5*1024*1024U)
5144#define ITERATIONS 2 5144#define ITERATIONS 1
5145#define SIZE_THRESH 130 5145#define SIZE_THRESH 130
5146#define COST_THRESH 130 5146#define COST_THRESH 130
5147 5147
@@ -5480,9 +5480,9 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2)
5480 break; 5480 break;
5481 } 5481 }
5482 /* 5482 /*
5483 * Increase the cachesize in 5% steps: 5483 * Increase the cachesize in 10% steps:
5484 */ 5484 */
5485 size = size * 20 / 19; 5485 size = size * 10 / 9;
5486 } 5486 }
5487 5487
5488 if (migration_debug) 5488 if (migration_debug)
@@ -5551,13 +5551,15 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map)
5551 -1 5551 -1
5552#endif 5552#endif
5553 ); 5553 );
5554 printk("migration_cost="); 5554 if (system_state == SYSTEM_BOOTING) {
5555 for (distance = 0; distance <= max_distance; distance++) { 5555 printk("migration_cost=");
5556 if (distance) 5556 for (distance = 0; distance <= max_distance; distance++) {
5557 printk(","); 5557 if (distance)
5558 printk("%ld", (long)migration_cost[distance] / 1000); 5558 printk(",");
5559 printk("%ld", (long)migration_cost[distance] / 1000);
5560 }
5561 printk("\n");
5559 } 5562 }
5560 printk("\n");
5561 j1 = jiffies; 5563 j1 = jiffies;
5562 if (migration_debug) 5564 if (migration_debug)
5563 printk("migration: %ld seconds\n", (j1-j0)/HZ); 5565 printk("migration: %ld seconds\n", (j1-j0)/HZ);
@@ -6109,7 +6111,7 @@ void __init sched_init(void)
6109 runqueue_t *rq; 6111 runqueue_t *rq;
6110 int i, j, k; 6112 int i, j, k;
6111 6113
6112 for (i = 0; i < NR_CPUS; i++) { 6114 for_each_cpu(i) {
6113 prio_array_t *array; 6115 prio_array_t *array;
6114 6116
6115 rq = cpu_rq(i); 6117 rq = cpu_rq(i);
diff --git a/kernel/signal.c b/kernel/signal.c
index d3efafd8109a..b373fc2420da 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -283,7 +283,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
283 return(q); 283 return(q);
284} 284}
285 285
286static inline void __sigqueue_free(struct sigqueue *q) 286static void __sigqueue_free(struct sigqueue *q)
287{ 287{
288 if (q->flags & SIGQUEUE_PREALLOC) 288 if (q->flags & SIGQUEUE_PREALLOC)
289 return; 289 return;
diff --git a/kernel/sys.c b/kernel/sys.c
index d09cac23fdfd..f91218a5463e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -428,7 +428,7 @@ void kernel_kexec(void)
428{ 428{
429#ifdef CONFIG_KEXEC 429#ifdef CONFIG_KEXEC
430 struct kimage *image; 430 struct kimage *image;
431 image = xchg(&kexec_image, 0); 431 image = xchg(&kexec_image, NULL);
432 if (!image) { 432 if (!image) {
433 return; 433 return;
434 } 434 }
@@ -440,23 +440,25 @@ void kernel_kexec(void)
440} 440}
441EXPORT_SYMBOL_GPL(kernel_kexec); 441EXPORT_SYMBOL_GPL(kernel_kexec);
442 442
443void kernel_shutdown_prepare(enum system_states state)
444{
445 notifier_call_chain(&reboot_notifier_list,
446 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
447 system_state = state;
448 device_shutdown();
449}
443/** 450/**
444 * kernel_halt - halt the system 451 * kernel_halt - halt the system
445 * 452 *
446 * Shutdown everything and perform a clean system halt. 453 * Shutdown everything and perform a clean system halt.
447 */ 454 */
448void kernel_halt_prepare(void)
449{
450 notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
451 system_state = SYSTEM_HALT;
452 device_shutdown();
453}
454void kernel_halt(void) 455void kernel_halt(void)
455{ 456{
456 kernel_halt_prepare(); 457 kernel_shutdown_prepare(SYSTEM_HALT);
457 printk(KERN_EMERG "System halted.\n"); 458 printk(KERN_EMERG "System halted.\n");
458 machine_halt(); 459 machine_halt();
459} 460}
461
460EXPORT_SYMBOL_GPL(kernel_halt); 462EXPORT_SYMBOL_GPL(kernel_halt);
461 463
462/** 464/**
@@ -464,20 +466,13 @@ EXPORT_SYMBOL_GPL(kernel_halt);
464 * 466 *
465 * Shutdown everything and perform a clean system power_off. 467 * Shutdown everything and perform a clean system power_off.
466 */ 468 */
467void kernel_power_off_prepare(void)
468{
469 notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
470 system_state = SYSTEM_POWER_OFF;
471 device_shutdown();
472}
473void kernel_power_off(void) 469void kernel_power_off(void)
474{ 470{
475 kernel_power_off_prepare(); 471 kernel_shutdown_prepare(SYSTEM_POWER_OFF);
476 printk(KERN_EMERG "Power down.\n"); 472 printk(KERN_EMERG "Power down.\n");
477 machine_power_off(); 473 machine_power_off();
478} 474}
479EXPORT_SYMBOL_GPL(kernel_power_off); 475EXPORT_SYMBOL_GPL(kernel_power_off);
480
481/* 476/*
482 * Reboot system call: for obvious reasons only root may call it, 477 * Reboot system call: for obvious reasons only root may call it,
483 * and even root needs to set up some magic numbers in the registers 478 * and even root needs to set up some magic numbers in the registers
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index cb99a42f8b37..71dd6f62efec 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -878,7 +878,17 @@ static ctl_table vm_table[] = {
878 .maxlen = sizeof(zone_reclaim_mode), 878 .maxlen = sizeof(zone_reclaim_mode),
879 .mode = 0644, 879 .mode = 0644,
880 .proc_handler = &proc_dointvec, 880 .proc_handler = &proc_dointvec,
881 .strategy = &zero, 881 .strategy = &sysctl_intvec,
882 .extra1 = &zero,
883 },
884 {
885 .ctl_name = VM_ZONE_RECLAIM_INTERVAL,
886 .procname = "zone_reclaim_interval",
887 .data = &zone_reclaim_interval,
888 .maxlen = sizeof(zone_reclaim_interval),
889 .mode = 0644,
890 .proc_handler = &proc_dointvec_jiffies,
891 .strategy = &sysctl_jiffies,
882 }, 892 },
883#endif 893#endif
884 { .ctl_name = 0 } 894 { .ctl_name = 0 }
diff --git a/kernel/time.c b/kernel/time.c
index 7477b1d2079e..804539165d8b 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -155,7 +155,7 @@ int do_sys_settimeofday(struct timespec *tv, struct timezone *tz)
155 static int firsttime = 1; 155 static int firsttime = 1;
156 int error = 0; 156 int error = 0;
157 157
158 if (!timespec_valid(tv)) 158 if (tv && !timespec_valid(tv))
159 return -EINVAL; 159 return -EINVAL;
160 160
161 error = security_settime(tv, tz); 161 error = security_settime(tv, tz);
@@ -637,15 +637,16 @@ void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
637 * 637 *
638 * Returns the timespec representation of the nsec parameter. 638 * Returns the timespec representation of the nsec parameter.
639 */ 639 */
640inline struct timespec ns_to_timespec(const nsec_t nsec) 640struct timespec ns_to_timespec(const nsec_t nsec)
641{ 641{
642 struct timespec ts; 642 struct timespec ts;
643 643
644 if (nsec) 644 if (!nsec)
645 ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, 645 return (struct timespec) {0, 0};
646 &ts.tv_nsec); 646
647 else 647 ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec);
648 ts.tv_sec = ts.tv_nsec = 0; 648 if (unlikely(nsec < 0))
649 set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec);
649 650
650 return ts; 651 return ts;
651} 652}
diff --git a/kernel/timer.c b/kernel/timer.c
index 4f1cb0ab5251..b9dad3994676 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -495,7 +495,7 @@ unsigned long next_timer_interrupt(void)
495 base = &__get_cpu_var(tvec_bases); 495 base = &__get_cpu_var(tvec_bases);
496 spin_lock(&base->t_base.lock); 496 spin_lock(&base->t_base.lock);
497 expires = base->timer_jiffies + (LONG_MAX >> 1); 497 expires = base->timer_jiffies + (LONG_MAX >> 1);
498 list = 0; 498 list = NULL;
499 499
500 /* Look for timer events in tv1. */ 500 /* Look for timer events in tv1. */
501 j = base->timer_jiffies & TVR_MASK; 501 j = base->timer_jiffies & TVR_MASK;
diff --git a/kernel/user.c b/kernel/user.c
index 89e562feb1b1..d9deae43a9ab 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/bitops.h> 14#include <linux/bitops.h>
15#include <linux/key.h> 15#include <linux/key.h>
16#include <linux/interrupt.h>
16 17
17/* 18/*
18 * UID task count cache, to get fast user lookup in "alloc_uid" 19 * UID task count cache, to get fast user lookup in "alloc_uid"
@@ -27,6 +28,16 @@
27 28
28static kmem_cache_t *uid_cachep; 29static kmem_cache_t *uid_cachep;
29static struct list_head uidhash_table[UIDHASH_SZ]; 30static struct list_head uidhash_table[UIDHASH_SZ];
31
32/*
33 * The uidhash_lock is mostly taken from process context, but it is
34 * occasionally also taken from softirq/tasklet context, when
35 * task-structs get RCU-freed. Hence all locking must be softirq-safe.
36 * But free_uid() is also called with local interrupts disabled, and running
37 * local_bh_enable() with local interrupts disabled is an error - we'll run
38 * softirq callbacks, and they can unconditionally enable interrupts, and
39 * the caller of free_uid() didn't expect that..
40 */
30static DEFINE_SPINLOCK(uidhash_lock); 41static DEFINE_SPINLOCK(uidhash_lock);
31 42
32struct user_struct root_user = { 43struct user_struct root_user = {
@@ -82,15 +93,19 @@ static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *has
82struct user_struct *find_user(uid_t uid) 93struct user_struct *find_user(uid_t uid)
83{ 94{
84 struct user_struct *ret; 95 struct user_struct *ret;
96 unsigned long flags;
85 97
86 spin_lock(&uidhash_lock); 98 spin_lock_irqsave(&uidhash_lock, flags);
87 ret = uid_hash_find(uid, uidhashentry(uid)); 99 ret = uid_hash_find(uid, uidhashentry(uid));
88 spin_unlock(&uidhash_lock); 100 spin_unlock_irqrestore(&uidhash_lock, flags);
89 return ret; 101 return ret;
90} 102}
91 103
92void free_uid(struct user_struct *up) 104void free_uid(struct user_struct *up)
93{ 105{
106 unsigned long flags;
107
108 local_irq_save(flags);
94 if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) { 109 if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
95 uid_hash_remove(up); 110 uid_hash_remove(up);
96 key_put(up->uid_keyring); 111 key_put(up->uid_keyring);
@@ -98,6 +113,7 @@ void free_uid(struct user_struct *up)
98 kmem_cache_free(uid_cachep, up); 113 kmem_cache_free(uid_cachep, up);
99 spin_unlock(&uidhash_lock); 114 spin_unlock(&uidhash_lock);
100 } 115 }
116 local_irq_restore(flags);
101} 117}
102 118
103struct user_struct * alloc_uid(uid_t uid) 119struct user_struct * alloc_uid(uid_t uid)
@@ -105,9 +121,9 @@ struct user_struct * alloc_uid(uid_t uid)
105 struct list_head *hashent = uidhashentry(uid); 121 struct list_head *hashent = uidhashentry(uid);
106 struct user_struct *up; 122 struct user_struct *up;
107 123
108 spin_lock(&uidhash_lock); 124 spin_lock_irq(&uidhash_lock);
109 up = uid_hash_find(uid, hashent); 125 up = uid_hash_find(uid, hashent);
110 spin_unlock(&uidhash_lock); 126 spin_unlock_irq(&uidhash_lock);
111 127
112 if (!up) { 128 if (!up) {
113 struct user_struct *new; 129 struct user_struct *new;
@@ -137,7 +153,7 @@ struct user_struct * alloc_uid(uid_t uid)
137 * Before adding this, check whether we raced 153 * Before adding this, check whether we raced
138 * on adding the same user already.. 154 * on adding the same user already..
139 */ 155 */
140 spin_lock(&uidhash_lock); 156 spin_lock_irq(&uidhash_lock);
141 up = uid_hash_find(uid, hashent); 157 up = uid_hash_find(uid, hashent);
142 if (up) { 158 if (up) {
143 key_put(new->uid_keyring); 159 key_put(new->uid_keyring);
@@ -147,7 +163,7 @@ struct user_struct * alloc_uid(uid_t uid)
147 uid_hash_insert(new, hashent); 163 uid_hash_insert(new, hashent);
148 up = new; 164 up = new;
149 } 165 }
150 spin_unlock(&uidhash_lock); 166 spin_unlock_irq(&uidhash_lock);
151 167
152 } 168 }
153 return up; 169 return up;
@@ -183,9 +199,9 @@ static int __init uid_cache_init(void)
183 INIT_LIST_HEAD(uidhash_table + n); 199 INIT_LIST_HEAD(uidhash_table + n);
184 200
185 /* Insert the root user immediately (init already runs as root) */ 201 /* Insert the root user immediately (init already runs as root) */
186 spin_lock(&uidhash_lock); 202 spin_lock_irq(&uidhash_lock);
187 uid_hash_insert(&root_user, uidhashentry(0)); 203 uid_hash_insert(&root_user, uidhashentry(0));
188 spin_unlock(&uidhash_lock); 204 spin_unlock_irq(&uidhash_lock);
189 205
190 return 0; 206 return 0;
191} 207}