aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2005-08-17 00:51:31 -0400
committerJeff Garzik <jgarzik@pobox.com>2005-08-17 00:51:31 -0400
commita2e30e529a48ef4e106e405f91cf4ae525bb01c4 (patch)
tree2def96ef17c0672c30f1a10287552978bf1d0b1c /kernel
parentedb3366703224d5d8df573ae698ccd6b488dc743 (diff)
parent2ad56496627630ebc99f06af5f81ca23e17e014e (diff)
Merge /spare/repo/linux-2.6/
Diffstat (limited to 'kernel')
-rw-r--r--kernel/capability.c20
-rw-r--r--kernel/cpuset.c94
-rw-r--r--kernel/crash_dump.c11
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/itimer.c37
-rw-r--r--kernel/module.c15
-rw-r--r--kernel/panic.c9
-rw-r--r--kernel/posix-timers.c18
-rw-r--r--kernel/power/disk.c9
-rw-r--r--kernel/power/poweroff.c4
-rw-r--r--kernel/power/smp.c2
-rw-r--r--kernel/sched.c8
-rw-r--r--kernel/sys.c109
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c12
-rw-r--r--kernel/time.c2
-rw-r--r--kernel/workqueue.c2
17 files changed, 221 insertions, 136 deletions
diff --git a/kernel/capability.c b/kernel/capability.c
index 64db1ee820c2..8986a37a67ea 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -31,8 +31,14 @@ static DEFINE_SPINLOCK(task_capability_lock);
31 * uninteresting and/or not to be changed. 31 * uninteresting and/or not to be changed.
32 */ 32 */
33 33
34/* 34/**
35 * sys_capget - get the capabilities of a given process. 35 * sys_capget - get the capabilities of a given process.
36 * @header: pointer to struct that contains capability version and
37 * target pid data
38 * @dataptr: pointer to struct that contains the effective, permitted,
39 * and inheritable capabilities that are returned
40 *
41 * Returns 0 on success and < 0 on error.
36 */ 42 */
37asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) 43asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
38{ 44{
@@ -141,8 +147,14 @@ static inline int cap_set_all(kernel_cap_t *effective,
141 return ret; 147 return ret;
142} 148}
143 149
144/* 150/**
145 * sys_capset - set capabilities for a given process, all processes, or all 151 * sys_capset - set capabilities for a process or a group of processes
152 * @header: pointer to struct that contains capability version and
153 * target pid data
154 * @data: pointer to struct that contains the effective, permitted,
155 * and inheritable capabilities
156 *
157 * Set capabilities for a given process, all processes, or all
146 * processes in a given process group. 158 * processes in a given process group.
147 * 159 *
148 * The restrictions on setting capabilities are specified as: 160 * The restrictions on setting capabilities are specified as:
@@ -152,6 +164,8 @@ static inline int cap_set_all(kernel_cap_t *effective,
152 * I: any raised capabilities must be a subset of the (old current) permitted 164 * I: any raised capabilities must be a subset of the (old current) permitted
153 * P: any raised capabilities must be a subset of the (old current) permitted 165 * P: any raised capabilities must be a subset of the (old current) permitted
154 * E: must be set to a subset of (new target) permitted 166 * E: must be set to a subset of (new target) permitted
167 *
168 * Returns 0 on success and < 0 on error.
155 */ 169 */
156asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) 170asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
157{ 171{
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 984c0bf3807f..21a4e3b2cbda 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -398,21 +398,31 @@ static int cpuset_path(const struct cpuset *cs, char *buf, int buflen)
398 * to continue to serve a useful existence. Next time it's released, 398 * to continue to serve a useful existence. Next time it's released,
399 * we will get notified again, if it still has 'notify_on_release' set. 399 * we will get notified again, if it still has 'notify_on_release' set.
400 * 400 *
401 * Note final arg to call_usermodehelper() is 0 - that means 401 * The final arg to call_usermodehelper() is 0, which means don't
402 * don't wait. Since we are holding the global cpuset_sem here, 402 * wait. The separate /sbin/cpuset_release_agent task is forked by
403 * and we are asking another thread (started from keventd) to rmdir a 403 * call_usermodehelper(), then control in this thread returns here,
404 * cpuset, we can't wait - or we'd deadlock with the removing thread 404 * without waiting for the release agent task. We don't bother to
405 * on cpuset_sem. 405 * wait because the caller of this routine has no use for the exit
406 * status of the /sbin/cpuset_release_agent task, so no sense holding
407 * our caller up for that.
408 *
409 * The simple act of forking that task might require more memory,
410 * which might need cpuset_sem. So this routine must be called while
411 * cpuset_sem is not held, to avoid a possible deadlock. See also
412 * comments for check_for_release(), below.
406 */ 413 */
407 414
408static int cpuset_release_agent(char *cpuset_str) 415static void cpuset_release_agent(const char *pathbuf)
409{ 416{
410 char *argv[3], *envp[3]; 417 char *argv[3], *envp[3];
411 int i; 418 int i;
412 419
420 if (!pathbuf)
421 return;
422
413 i = 0; 423 i = 0;
414 argv[i++] = "/sbin/cpuset_release_agent"; 424 argv[i++] = "/sbin/cpuset_release_agent";
415 argv[i++] = cpuset_str; 425 argv[i++] = (char *)pathbuf;
416 argv[i] = NULL; 426 argv[i] = NULL;
417 427
418 i = 0; 428 i = 0;
@@ -421,17 +431,29 @@ static int cpuset_release_agent(char *cpuset_str)
421 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; 431 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
422 envp[i] = NULL; 432 envp[i] = NULL;
423 433
424 return call_usermodehelper(argv[0], argv, envp, 0); 434 call_usermodehelper(argv[0], argv, envp, 0);
435 kfree(pathbuf);
425} 436}
426 437
427/* 438/*
428 * Either cs->count of using tasks transitioned to zero, or the 439 * Either cs->count of using tasks transitioned to zero, or the
429 * cs->children list of child cpusets just became empty. If this 440 * cs->children list of child cpusets just became empty. If this
430 * cs is notify_on_release() and now both the user count is zero and 441 * cs is notify_on_release() and now both the user count is zero and
431 * the list of children is empty, send notice to user land. 442 * the list of children is empty, prepare cpuset path in a kmalloc'd
443 * buffer, to be returned via ppathbuf, so that the caller can invoke
444 * cpuset_release_agent() with it later on, once cpuset_sem is dropped.
445 * Call here with cpuset_sem held.
446 *
447 * This check_for_release() routine is responsible for kmalloc'ing
448 * pathbuf. The above cpuset_release_agent() is responsible for
449 * kfree'ing pathbuf. The caller of these routines is responsible
450 * for providing a pathbuf pointer, initialized to NULL, then
451 * calling check_for_release() with cpuset_sem held and the address
452 * of the pathbuf pointer, then dropping cpuset_sem, then calling
453 * cpuset_release_agent() with pathbuf, as set by check_for_release().
432 */ 454 */
433 455
434static void check_for_release(struct cpuset *cs) 456static void check_for_release(struct cpuset *cs, char **ppathbuf)
435{ 457{
436 if (notify_on_release(cs) && atomic_read(&cs->count) == 0 && 458 if (notify_on_release(cs) && atomic_read(&cs->count) == 0 &&
437 list_empty(&cs->children)) { 459 list_empty(&cs->children)) {
@@ -441,10 +463,9 @@ static void check_for_release(struct cpuset *cs)
441 if (!buf) 463 if (!buf)
442 return; 464 return;
443 if (cpuset_path(cs, buf, PAGE_SIZE) < 0) 465 if (cpuset_path(cs, buf, PAGE_SIZE) < 0)
444 goto out; 466 kfree(buf);
445 cpuset_release_agent(buf); 467 else
446out: 468 *ppathbuf = buf;
447 kfree(buf);
448 } 469 }
449} 470}
450 471
@@ -727,14 +748,14 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
727 return 0; 748 return 0;
728} 749}
729 750
730static int attach_task(struct cpuset *cs, char *buf) 751static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
731{ 752{
732 pid_t pid; 753 pid_t pid;
733 struct task_struct *tsk; 754 struct task_struct *tsk;
734 struct cpuset *oldcs; 755 struct cpuset *oldcs;
735 cpumask_t cpus; 756 cpumask_t cpus;
736 757
737 if (sscanf(buf, "%d", &pid) != 1) 758 if (sscanf(pidbuf, "%d", &pid) != 1)
738 return -EIO; 759 return -EIO;
739 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) 760 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
740 return -ENOSPC; 761 return -ENOSPC;
@@ -777,7 +798,7 @@ static int attach_task(struct cpuset *cs, char *buf)
777 798
778 put_task_struct(tsk); 799 put_task_struct(tsk);
779 if (atomic_dec_and_test(&oldcs->count)) 800 if (atomic_dec_and_test(&oldcs->count))
780 check_for_release(oldcs); 801 check_for_release(oldcs, ppathbuf);
781 return 0; 802 return 0;
782} 803}
783 804
@@ -801,6 +822,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
801 struct cftype *cft = __d_cft(file->f_dentry); 822 struct cftype *cft = __d_cft(file->f_dentry);
802 cpuset_filetype_t type = cft->private; 823 cpuset_filetype_t type = cft->private;
803 char *buffer; 824 char *buffer;
825 char *pathbuf = NULL;
804 int retval = 0; 826 int retval = 0;
805 827
806 /* Crude upper limit on largest legitimate cpulist user might write. */ 828 /* Crude upper limit on largest legitimate cpulist user might write. */
@@ -841,7 +863,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
841 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); 863 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
842 break; 864 break;
843 case FILE_TASKLIST: 865 case FILE_TASKLIST:
844 retval = attach_task(cs, buffer); 866 retval = attach_task(cs, buffer, &pathbuf);
845 break; 867 break;
846 default: 868 default:
847 retval = -EINVAL; 869 retval = -EINVAL;
@@ -852,6 +874,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
852 retval = nbytes; 874 retval = nbytes;
853out2: 875out2:
854 up(&cpuset_sem); 876 up(&cpuset_sem);
877 cpuset_release_agent(pathbuf);
855out1: 878out1:
856 kfree(buffer); 879 kfree(buffer);
857 return retval; 880 return retval;
@@ -1357,6 +1380,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1357 struct cpuset *cs = dentry->d_fsdata; 1380 struct cpuset *cs = dentry->d_fsdata;
1358 struct dentry *d; 1381 struct dentry *d;
1359 struct cpuset *parent; 1382 struct cpuset *parent;
1383 char *pathbuf = NULL;
1360 1384
1361 /* the vfs holds both inode->i_sem already */ 1385 /* the vfs holds both inode->i_sem already */
1362 1386
@@ -1376,7 +1400,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1376 update_cpu_domains(cs); 1400 update_cpu_domains(cs);
1377 list_del(&cs->sibling); /* delete my sibling from parent->children */ 1401 list_del(&cs->sibling); /* delete my sibling from parent->children */
1378 if (list_empty(&parent->children)) 1402 if (list_empty(&parent->children))
1379 check_for_release(parent); 1403 check_for_release(parent, &pathbuf);
1380 spin_lock(&cs->dentry->d_lock); 1404 spin_lock(&cs->dentry->d_lock);
1381 d = dget(cs->dentry); 1405 d = dget(cs->dentry);
1382 cs->dentry = NULL; 1406 cs->dentry = NULL;
@@ -1384,6 +1408,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1384 cpuset_d_remove_dir(d); 1408 cpuset_d_remove_dir(d);
1385 dput(d); 1409 dput(d);
1386 up(&cpuset_sem); 1410 up(&cpuset_sem);
1411 cpuset_release_agent(pathbuf);
1387 return 0; 1412 return 0;
1388} 1413}
1389 1414
@@ -1440,10 +1465,10 @@ void __init cpuset_init_smp(void)
1440 1465
1441/** 1466/**
1442 * cpuset_fork - attach newly forked task to its parents cpuset. 1467 * cpuset_fork - attach newly forked task to its parents cpuset.
1443 * @p: pointer to task_struct of forking parent process. 1468 * @tsk: pointer to task_struct of forking parent process.
1444 * 1469 *
1445 * Description: By default, on fork, a task inherits its 1470 * Description: By default, on fork, a task inherits its
1446 * parents cpuset. The pointer to the shared cpuset is 1471 * parent's cpuset. The pointer to the shared cpuset is
1447 * automatically copied in fork.c by dup_task_struct(). 1472 * automatically copied in fork.c by dup_task_struct().
1448 * This cpuset_fork() routine need only increment the usage 1473 * This cpuset_fork() routine need only increment the usage
1449 * counter in that cpuset. 1474 * counter in that cpuset.
@@ -1471,7 +1496,6 @@ void cpuset_fork(struct task_struct *tsk)
1471 * by the cpuset_sem semaphore. If you don't hold cpuset_sem, 1496 * by the cpuset_sem semaphore. If you don't hold cpuset_sem,
1472 * then a zero cpuset use count is a license to any other task to 1497 * then a zero cpuset use count is a license to any other task to
1473 * nuke the cpuset immediately. 1498 * nuke the cpuset immediately.
1474 *
1475 **/ 1499 **/
1476 1500
1477void cpuset_exit(struct task_struct *tsk) 1501void cpuset_exit(struct task_struct *tsk)
@@ -1484,10 +1508,13 @@ void cpuset_exit(struct task_struct *tsk)
1484 task_unlock(tsk); 1508 task_unlock(tsk);
1485 1509
1486 if (notify_on_release(cs)) { 1510 if (notify_on_release(cs)) {
1511 char *pathbuf = NULL;
1512
1487 down(&cpuset_sem); 1513 down(&cpuset_sem);
1488 if (atomic_dec_and_test(&cs->count)) 1514 if (atomic_dec_and_test(&cs->count))
1489 check_for_release(cs); 1515 check_for_release(cs, &pathbuf);
1490 up(&cpuset_sem); 1516 up(&cpuset_sem);
1517 cpuset_release_agent(pathbuf);
1491 } else { 1518 } else {
1492 atomic_dec(&cs->count); 1519 atomic_dec(&cs->count);
1493 } 1520 }
@@ -1521,7 +1548,9 @@ void cpuset_init_current_mems_allowed(void)
1521 current->mems_allowed = NODE_MASK_ALL; 1548 current->mems_allowed = NODE_MASK_ALL;
1522} 1549}
1523 1550
1524/* 1551/**
1552 * cpuset_update_current_mems_allowed - update mems parameters to new values
1553 *
1525 * If the current tasks cpusets mems_allowed changed behind our backs, 1554 * If the current tasks cpusets mems_allowed changed behind our backs,
1526 * update current->mems_allowed and mems_generation to the new value. 1555 * update current->mems_allowed and mems_generation to the new value.
1527 * Do not call this routine if in_interrupt(). 1556 * Do not call this routine if in_interrupt().
@@ -1540,13 +1569,20 @@ void cpuset_update_current_mems_allowed(void)
1540 } 1569 }
1541} 1570}
1542 1571
1572/**
1573 * cpuset_restrict_to_mems_allowed - limit nodes to current mems_allowed
1574 * @nodes: pointer to a node bitmap that is and-ed with mems_allowed
1575 */
1543void cpuset_restrict_to_mems_allowed(unsigned long *nodes) 1576void cpuset_restrict_to_mems_allowed(unsigned long *nodes)
1544{ 1577{
1545 bitmap_and(nodes, nodes, nodes_addr(current->mems_allowed), 1578 bitmap_and(nodes, nodes, nodes_addr(current->mems_allowed),
1546 MAX_NUMNODES); 1579 MAX_NUMNODES);
1547} 1580}
1548 1581
1549/* 1582/**
1583 * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed
1584 * @zl: the zonelist to be checked
1585 *
1550 * Are any of the nodes on zonelist zl allowed in current->mems_allowed? 1586 * Are any of the nodes on zonelist zl allowed in current->mems_allowed?
1551 */ 1587 */
1552int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) 1588int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
@@ -1562,8 +1598,12 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
1562 return 0; 1598 return 0;
1563} 1599}
1564 1600
1565/* 1601/**
1566 * Is 'current' valid, and is zone z allowed in current->mems_allowed? 1602 * cpuset_zone_allowed - is zone z allowed in current->mems_allowed
1603 * @z: zone in question
1604 *
1605 * Is zone z allowed in current->mems_allowed, or is
1606 * the CPU in interrupt context? (zone is always allowed in this case)
1567 */ 1607 */
1568int cpuset_zone_allowed(struct zone *z) 1608int cpuset_zone_allowed(struct zone *z)
1569{ 1609{
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
index 459ba49e376a..334c37f5218a 100644
--- a/kernel/crash_dump.c
+++ b/kernel/crash_dump.c
@@ -18,7 +18,16 @@
18/* Stores the physical address of elf header of crash image. */ 18/* Stores the physical address of elf header of crash image. */
19unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; 19unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
20 20
21/* 21/**
22 * copy_oldmem_page - copy one page from "oldmem"
23 * @pfn: page frame number to be copied
24 * @buf: target memory address for the copy; this can be in kernel address
25 * space or user address space (see @userbuf)
26 * @csize: number of bytes to copy
27 * @offset: offset in bytes into the page (based on pfn) to begin the copy
28 * @userbuf: if set, @buf is in user address space, use copy_to_user(),
29 * otherwise @buf is in kernel address space, use memcpy().
30 *
22 * Copy a page from "oldmem". For this page, there is no pte mapped 31 * Copy a page from "oldmem". For this page, there is no pte mapped
23 * in the current kernel. We stitch up a pte, similar to kmap_atomic. 32 * in the current kernel. We stitch up a pte, similar to kmap_atomic.
24 */ 33 */
diff --git a/kernel/exit.c b/kernel/exit.c
index 9d1b10ed0135..5b0fb9f09f21 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -829,8 +829,10 @@ fastcall NORET_TYPE void do_exit(long code)
829 acct_update_integrals(tsk); 829 acct_update_integrals(tsk);
830 update_mem_hiwater(tsk); 830 update_mem_hiwater(tsk);
831 group_dead = atomic_dec_and_test(&tsk->signal->live); 831 group_dead = atomic_dec_and_test(&tsk->signal->live);
832 if (group_dead) 832 if (group_dead) {
833 del_timer_sync(&tsk->signal->real_timer);
833 acct_process(code); 834 acct_process(code);
835 }
834 exit_mm(tsk); 836 exit_mm(tsk);
835 837
836 exit_sem(tsk); 838 exit_sem(tsk);
diff --git a/kernel/itimer.c b/kernel/itimer.c
index a72cb0e5aa4b..7c1b25e25e47 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -112,28 +112,11 @@ asmlinkage long sys_getitimer(int which, struct itimerval __user *value)
112 return error; 112 return error;
113} 113}
114 114
115/*
116 * Called with P->sighand->siglock held and P->signal->real_timer inactive.
117 * If interval is nonzero, arm the timer for interval ticks from now.
118 */
119static inline void it_real_arm(struct task_struct *p, unsigned long interval)
120{
121 p->signal->it_real_value = interval; /* XXX unnecessary field?? */
122 if (interval == 0)
123 return;
124 if (interval > (unsigned long) LONG_MAX)
125 interval = LONG_MAX;
126 /* the "+ 1" below makes sure that the timer doesn't go off before
127 * the interval requested. This could happen if
128 * time requested % (usecs per jiffy) is more than the usecs left
129 * in the current jiffy */
130 p->signal->real_timer.expires = jiffies + interval + 1;
131 add_timer(&p->signal->real_timer);
132}
133 115
134void it_real_fn(unsigned long __data) 116void it_real_fn(unsigned long __data)
135{ 117{
136 struct task_struct * p = (struct task_struct *) __data; 118 struct task_struct * p = (struct task_struct *) __data;
119 unsigned long inc = p->signal->it_real_incr;
137 120
138 send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p); 121 send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p);
139 122
@@ -141,14 +124,23 @@ void it_real_fn(unsigned long __data)
141 * Now restart the timer if necessary. We don't need any locking 124 * Now restart the timer if necessary. We don't need any locking
142 * here because do_setitimer makes sure we have finished running 125 * here because do_setitimer makes sure we have finished running
143 * before it touches anything. 126 * before it touches anything.
127 * Note, we KNOW we are (or should be) at a jiffie edge here so
128 * we don't need the +1 stuff. Also, we want to use the prior
129 * expire value so as to not "slip" a jiffie if we are late.
130 * Deal with requesting a time prior to "now" here rather than
131 * in add_timer.
144 */ 132 */
145 it_real_arm(p, p->signal->it_real_incr); 133 if (!inc)
134 return;
135 while (time_before_eq(p->signal->real_timer.expires, jiffies))
136 p->signal->real_timer.expires += inc;
137 add_timer(&p->signal->real_timer);
146} 138}
147 139
148int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) 140int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
149{ 141{
150 struct task_struct *tsk = current; 142 struct task_struct *tsk = current;
151 unsigned long val, interval; 143 unsigned long val, interval, expires;
152 cputime_t cval, cinterval, nval, ninterval; 144 cputime_t cval, cinterval, nval, ninterval;
153 145
154 switch (which) { 146 switch (which) {
@@ -164,7 +156,10 @@ again:
164 } 156 }
165 tsk->signal->it_real_incr = 157 tsk->signal->it_real_incr =
166 timeval_to_jiffies(&value->it_interval); 158 timeval_to_jiffies(&value->it_interval);
167 it_real_arm(tsk, timeval_to_jiffies(&value->it_value)); 159 expires = timeval_to_jiffies(&value->it_value);
160 if (expires)
161 mod_timer(&tsk->signal->real_timer,
162 jiffies + 1 + expires);
168 spin_unlock_irq(&tsk->sighand->siglock); 163 spin_unlock_irq(&tsk->sighand->siglock);
169 if (ovalue) { 164 if (ovalue) {
170 jiffies_to_timeval(val, &ovalue->it_value); 165 jiffies_to_timeval(val, &ovalue->it_value);
diff --git a/kernel/module.c b/kernel/module.c
index 068e271ab3a5..c32995fbd8fd 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -250,13 +250,18 @@ static inline unsigned int block_size(int val)
250/* Created by linker magic */ 250/* Created by linker magic */
251extern char __per_cpu_start[], __per_cpu_end[]; 251extern char __per_cpu_start[], __per_cpu_end[];
252 252
253static void *percpu_modalloc(unsigned long size, unsigned long align) 253static void *percpu_modalloc(unsigned long size, unsigned long align,
254 const char *name)
254{ 255{
255 unsigned long extra; 256 unsigned long extra;
256 unsigned int i; 257 unsigned int i;
257 void *ptr; 258 void *ptr;
258 259
259 BUG_ON(align > SMP_CACHE_BYTES); 260 if (align > SMP_CACHE_BYTES) {
261 printk(KERN_WARNING "%s: per-cpu alignment %li > %i\n",
262 name, align, SMP_CACHE_BYTES);
263 align = SMP_CACHE_BYTES;
264 }
260 265
261 ptr = __per_cpu_start; 266 ptr = __per_cpu_start;
262 for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { 267 for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
@@ -348,7 +353,8 @@ static int percpu_modinit(void)
348} 353}
349__initcall(percpu_modinit); 354__initcall(percpu_modinit);
350#else /* ... !CONFIG_SMP */ 355#else /* ... !CONFIG_SMP */
351static inline void *percpu_modalloc(unsigned long size, unsigned long align) 356static inline void *percpu_modalloc(unsigned long size, unsigned long align,
357 const char *name)
352{ 358{
353 return NULL; 359 return NULL;
354} 360}
@@ -1644,7 +1650,8 @@ static struct module *load_module(void __user *umod,
1644 if (pcpuindex) { 1650 if (pcpuindex) {
1645 /* We have a special allocation for this section. */ 1651 /* We have a special allocation for this section. */
1646 percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, 1652 percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
1647 sechdrs[pcpuindex].sh_addralign); 1653 sechdrs[pcpuindex].sh_addralign,
1654 mod->name);
1648 if (!percpu) { 1655 if (!percpu) {
1649 err = -ENOMEM; 1656 err = -ENOMEM;
1650 goto free_mod; 1657 goto free_mod;
diff --git a/kernel/panic.c b/kernel/panic.c
index 74ba5f3e46c7..aabc5f86fa3f 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -111,12 +111,11 @@ NORET_TYPE void panic(const char * fmt, ...)
111 mdelay(1); 111 mdelay(1);
112 i++; 112 i++;
113 } 113 }
114 /* 114 /* This will not be a clean reboot, with everything
115 * Should we run the reboot notifier. For the moment Im 115 * shutting down. But if there is a chance of
116 * choosing not too. It might crash, be corrupt or do 116 * rebooting the system it will be rebooted.
117 * more harm than good for other reasons.
118 */ 117 */
119 machine_restart(NULL); 118 emergency_restart();
120 } 119 }
121#ifdef __sparc__ 120#ifdef __sparc__
122 { 121 {
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 5b7b4736d82b..38798a2ff994 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -896,21 +896,10 @@ static int adjust_abs_time(struct k_clock *clock, struct timespec *tp,
896 jiffies_64_f = get_jiffies_64(); 896 jiffies_64_f = get_jiffies_64();
897 } 897 }
898 /* 898 /*
899 * Take away now to get delta 899 * Take away now to get delta and normalize
900 */ 900 */
901 oc.tv_sec -= now.tv_sec; 901 set_normalized_timespec(&oc, oc.tv_sec - now.tv_sec,
902 oc.tv_nsec -= now.tv_nsec; 902 oc.tv_nsec - now.tv_nsec);
903 /*
904 * Normalize...
905 */
906 while ((oc.tv_nsec - NSEC_PER_SEC) >= 0) {
907 oc.tv_nsec -= NSEC_PER_SEC;
908 oc.tv_sec++;
909 }
910 while ((oc.tv_nsec) < 0) {
911 oc.tv_nsec += NSEC_PER_SEC;
912 oc.tv_sec--;
913 }
914 }else{ 903 }else{
915 jiffies_64_f = get_jiffies_64(); 904 jiffies_64_f = get_jiffies_64();
916 } 905 }
@@ -1177,7 +1166,6 @@ void exit_itimers(struct signal_struct *sig)
1177 tmr = list_entry(sig->posix_timers.next, struct k_itimer, list); 1166 tmr = list_entry(sig->posix_timers.next, struct k_itimer, list);
1178 itimer_delete(tmr); 1167 itimer_delete(tmr);
1179 } 1168 }
1180 del_timer_sync(&sig->real_timer);
1181} 1169}
1182 1170
1183/* 1171/*
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 3ec789c6b537..664eb0469b6e 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -59,16 +59,13 @@ static void power_down(suspend_disk_method_t mode)
59 error = pm_ops->enter(PM_SUSPEND_DISK); 59 error = pm_ops->enter(PM_SUSPEND_DISK);
60 break; 60 break;
61 case PM_DISK_SHUTDOWN: 61 case PM_DISK_SHUTDOWN:
62 printk("Powering off system\n"); 62 kernel_power_off();
63 device_shutdown();
64 machine_power_off();
65 break; 63 break;
66 case PM_DISK_REBOOT: 64 case PM_DISK_REBOOT:
67 device_shutdown(); 65 kernel_restart(NULL);
68 machine_restart(NULL);
69 break; 66 break;
70 } 67 }
71 machine_halt(); 68 kernel_halt();
72 /* Valid image is on the disk, if we continue we risk serious data corruption 69 /* Valid image is on the disk, if we continue we risk serious data corruption
73 after resume. */ 70 after resume. */
74 printk(KERN_CRIT "Please power me down manually\n"); 71 printk(KERN_CRIT "Please power me down manually\n");
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index 715081b2d829..7a4144ba3afd 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -9,6 +9,7 @@
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/pm.h> 10#include <linux/pm.h>
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/reboot.h>
12 13
13/* 14/*
14 * When the user hits Sys-Rq o to power down the machine this is the 15 * When the user hits Sys-Rq o to power down the machine this is the
@@ -17,8 +18,7 @@
17 18
18static void do_poweroff(void *dummy) 19static void do_poweroff(void *dummy)
19{ 20{
20 if (pm_power_off) 21 kernel_power_off();
21 pm_power_off();
22} 22}
23 23
24static DECLARE_WORK(poweroff_work, do_poweroff, NULL); 24static DECLARE_WORK(poweroff_work, do_poweroff, NULL);
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
index bbe23079c62c..911fc62b8225 100644
--- a/kernel/power/smp.c
+++ b/kernel/power/smp.c
@@ -38,7 +38,7 @@ void disable_nonboot_cpus(void)
38 } 38 }
39 printk("Error taking cpu %d down: %d\n", cpu, error); 39 printk("Error taking cpu %d down: %d\n", cpu, error);
40 } 40 }
41 BUG_ON(smp_processor_id() != 0); 41 BUG_ON(raw_smp_processor_id() != 0);
42 if (error) 42 if (error)
43 panic("cpus not sleeping"); 43 panic("cpus not sleeping");
44} 44}
diff --git a/kernel/sched.c b/kernel/sched.c
index 4107db0dc091..a646e4f36c41 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3486,7 +3486,7 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
3486 p->policy = policy; 3486 p->policy = policy;
3487 p->rt_priority = prio; 3487 p->rt_priority = prio;
3488 if (policy != SCHED_NORMAL) 3488 if (policy != SCHED_NORMAL)
3489 p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority; 3489 p->prio = MAX_RT_PRIO-1 - p->rt_priority;
3490 else 3490 else
3491 p->prio = p->static_prio; 3491 p->prio = p->static_prio;
3492} 3492}
@@ -3518,7 +3518,8 @@ recheck:
3518 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. 3518 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0.
3519 */ 3519 */
3520 if (param->sched_priority < 0 || 3520 if (param->sched_priority < 0 ||
3521 param->sched_priority > MAX_USER_RT_PRIO-1) 3521 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
3522 (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
3522 return -EINVAL; 3523 return -EINVAL;
3523 if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) 3524 if ((policy == SCHED_NORMAL) != (param->sched_priority == 0))
3524 return -EINVAL; 3525 return -EINVAL;
@@ -3528,7 +3529,8 @@ recheck:
3528 */ 3529 */
3529 if (!capable(CAP_SYS_NICE)) { 3530 if (!capable(CAP_SYS_NICE)) {
3530 /* can't change policy */ 3531 /* can't change policy */
3531 if (policy != p->policy) 3532 if (policy != p->policy &&
3533 !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
3532 return -EPERM; 3534 return -EPERM;
3533 /* can't increase priority */ 3535 /* can't increase priority */
3534 if (policy != SCHED_NORMAL && 3536 if (policy != SCHED_NORMAL &&
diff --git a/kernel/sys.c b/kernel/sys.c
index 9a24374c23bc..0bcaed6560ac 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -361,6 +361,64 @@ out_unlock:
361 return retval; 361 return retval;
362} 362}
363 363
364void emergency_restart(void)
365{
366 machine_emergency_restart();
367}
368EXPORT_SYMBOL_GPL(emergency_restart);
369
370void kernel_restart(char *cmd)
371{
372 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
373 system_state = SYSTEM_RESTART;
374 device_shutdown();
375 if (!cmd) {
376 printk(KERN_EMERG "Restarting system.\n");
377 } else {
378 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
379 }
380 printk(".\n");
381 machine_restart(cmd);
382}
383EXPORT_SYMBOL_GPL(kernel_restart);
384
385void kernel_kexec(void)
386{
387#ifdef CONFIG_KEXEC
388 struct kimage *image;
389 image = xchg(&kexec_image, 0);
390 if (!image) {
391 return;
392 }
393 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
394 system_state = SYSTEM_RESTART;
395 device_shutdown();
396 printk(KERN_EMERG "Starting new kernel\n");
397 machine_shutdown();
398 machine_kexec(image);
399#endif
400}
401EXPORT_SYMBOL_GPL(kernel_kexec);
402
403void kernel_halt(void)
404{
405 notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
406 system_state = SYSTEM_HALT;
407 device_shutdown();
408 printk(KERN_EMERG "System halted.\n");
409 machine_halt();
410}
411EXPORT_SYMBOL_GPL(kernel_halt);
412
413void kernel_power_off(void)
414{
415 notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
416 system_state = SYSTEM_POWER_OFF;
417 device_shutdown();
418 printk(KERN_EMERG "Power down.\n");
419 machine_power_off();
420}
421EXPORT_SYMBOL_GPL(kernel_power_off);
364 422
365/* 423/*
366 * Reboot system call: for obvious reasons only root may call it, 424 * Reboot system call: for obvious reasons only root may call it,
@@ -389,11 +447,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
389 lock_kernel(); 447 lock_kernel();
390 switch (cmd) { 448 switch (cmd) {
391 case LINUX_REBOOT_CMD_RESTART: 449 case LINUX_REBOOT_CMD_RESTART:
392 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); 450 kernel_restart(NULL);
393 system_state = SYSTEM_RESTART;
394 device_shutdown();
395 printk(KERN_EMERG "Restarting system.\n");
396 machine_restart(NULL);
397 break; 451 break;
398 452
399 case LINUX_REBOOT_CMD_CAD_ON: 453 case LINUX_REBOOT_CMD_CAD_ON:
@@ -405,23 +459,13 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
405 break; 459 break;
406 460
407 case LINUX_REBOOT_CMD_HALT: 461 case LINUX_REBOOT_CMD_HALT:
408 notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); 462 kernel_halt();
409 system_state = SYSTEM_HALT;
410 device_suspend(PMSG_SUSPEND);
411 device_shutdown();
412 printk(KERN_EMERG "System halted.\n");
413 machine_halt();
414 unlock_kernel(); 463 unlock_kernel();
415 do_exit(0); 464 do_exit(0);
416 break; 465 break;
417 466
418 case LINUX_REBOOT_CMD_POWER_OFF: 467 case LINUX_REBOOT_CMD_POWER_OFF:
419 notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL); 468 kernel_power_off();
420 system_state = SYSTEM_POWER_OFF;
421 device_suspend(PMSG_SUSPEND);
422 device_shutdown();
423 printk(KERN_EMERG "Power down.\n");
424 machine_power_off();
425 unlock_kernel(); 469 unlock_kernel();
426 do_exit(0); 470 do_exit(0);
427 break; 471 break;
@@ -433,32 +477,14 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
433 } 477 }
434 buffer[sizeof(buffer) - 1] = '\0'; 478 buffer[sizeof(buffer) - 1] = '\0';
435 479
436 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer); 480 kernel_restart(buffer);
437 system_state = SYSTEM_RESTART;
438 device_suspend(PMSG_FREEZE);
439 device_shutdown();
440 printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
441 machine_restart(buffer);
442 break; 481 break;
443 482
444#ifdef CONFIG_KEXEC
445 case LINUX_REBOOT_CMD_KEXEC: 483 case LINUX_REBOOT_CMD_KEXEC:
446 { 484 kernel_kexec();
447 struct kimage *image; 485 unlock_kernel();
448 image = xchg(&kexec_image, 0); 486 return -EINVAL;
449 if (!image) { 487
450 unlock_kernel();
451 return -EINVAL;
452 }
453 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
454 system_state = SYSTEM_RESTART;
455 device_shutdown();
456 printk(KERN_EMERG "Starting new kernel\n");
457 machine_shutdown();
458 machine_kexec(image);
459 break;
460 }
461#endif
462#ifdef CONFIG_SOFTWARE_SUSPEND 488#ifdef CONFIG_SOFTWARE_SUSPEND
463 case LINUX_REBOOT_CMD_SW_SUSPEND: 489 case LINUX_REBOOT_CMD_SW_SUSPEND:
464 { 490 {
@@ -478,8 +504,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
478 504
479static void deferred_cad(void *dummy) 505static void deferred_cad(void *dummy)
480{ 506{
481 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); 507 kernel_restart(NULL);
482 machine_restart(NULL);
483} 508}
484 509
485/* 510/*
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 42b40ae5eada..1ab2370e2efa 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -79,7 +79,6 @@ cond_syscall(sys_request_key);
79cond_syscall(sys_keyctl); 79cond_syscall(sys_keyctl);
80cond_syscall(compat_sys_keyctl); 80cond_syscall(compat_sys_keyctl);
81cond_syscall(compat_sys_socketcall); 81cond_syscall(compat_sys_socketcall);
82cond_syscall(sys_set_zone_reclaim);
83cond_syscall(sys_inotify_init); 82cond_syscall(sys_inotify_init);
84cond_syscall(sys_inotify_add_watch); 83cond_syscall(sys_inotify_add_watch);
85cond_syscall(sys_inotify_rm_watch); 84cond_syscall(sys_inotify_rm_watch);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e60b9c36f1f0..3e0bbee549ea 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -114,6 +114,7 @@ extern int unaligned_enabled;
114extern int sysctl_ieee_emulation_warnings; 114extern int sysctl_ieee_emulation_warnings;
115#endif 115#endif
116extern int sysctl_userprocess_debug; 116extern int sysctl_userprocess_debug;
117extern int spin_retry;
117#endif 118#endif
118 119
119extern int sysctl_hz_timer; 120extern int sysctl_hz_timer;
@@ -647,7 +648,16 @@ static ctl_table kern_table[] = {
647 .mode = 0644, 648 .mode = 0644,
648 .proc_handler = &proc_dointvec, 649 .proc_handler = &proc_dointvec,
649 }, 650 },
650 651#if defined(CONFIG_ARCH_S390)
652 {
653 .ctl_name = KERN_SPIN_RETRY,
654 .procname = "spin_retry",
655 .data = &spin_retry,
656 .maxlen = sizeof (int),
657 .mode = 0644,
658 .proc_handler = &proc_dointvec,
659 },
660#endif
651 { .ctl_name = 0 } 661 { .ctl_name = 0 }
652}; 662};
653 663
diff --git a/kernel/time.c b/kernel/time.c
index d4335c1c884c..dd5ae1162a8f 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -128,7 +128,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __us
128 * as real UNIX machines always do it. This avoids all headaches about 128 * as real UNIX machines always do it. This avoids all headaches about
129 * daylight saving times and warping kernel clocks. 129 * daylight saving times and warping kernel clocks.
130 */ 130 */
131inline static void warp_clock(void) 131static inline void warp_clock(void)
132{ 132{
133 write_seqlock_irq(&xtime_lock); 133 write_seqlock_irq(&xtime_lock);
134 wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; 134 wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 259cf55da3c9..c7e36d4a70ca 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -308,8 +308,6 @@ struct workqueue_struct *__create_workqueue(const char *name,
308 struct workqueue_struct *wq; 308 struct workqueue_struct *wq;
309 struct task_struct *p; 309 struct task_struct *p;
310 310
311 BUG_ON(strlen(name) > 10);
312
313 wq = kmalloc(sizeof(*wq), GFP_KERNEL); 311 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
314 if (!wq) 312 if (!wq)
315 return NULL; 313 return NULL;