aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/acct.c43
-rw-r--r--kernel/compat.c9
-rw-r--r--kernel/cpuset.c104
-rw-r--r--kernel/exit.c26
-rw-r--r--kernel/fork.c101
-rw-r--r--kernel/rcupdate.c14
-rw-r--r--kernel/sched.c233
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/spinlock.c15
-rw-r--r--kernel/timer.c32
11 files changed, 392 insertions, 189 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 8d57a2f1226b..ff4dc02ce170 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -12,6 +12,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
12obj-$(CONFIG_FUTEX) += futex.o 12obj-$(CONFIG_FUTEX) += futex.o
13obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o 13obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
14obj-$(CONFIG_SMP) += cpu.o spinlock.o 14obj-$(CONFIG_SMP) += cpu.o spinlock.o
15obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
15obj-$(CONFIG_UID16) += uid16.o 16obj-$(CONFIG_UID16) += uid16.o
16obj-$(CONFIG_MODULES) += module.o 17obj-$(CONFIG_MODULES) += module.o
17obj-$(CONFIG_KALLSYMS) += kallsyms.o 18obj-$(CONFIG_KALLSYMS) += kallsyms.o
diff --git a/kernel/acct.c b/kernel/acct.c
index f70e6027cca9..b756f527497e 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -165,7 +165,7 @@ out:
165} 165}
166 166
167/* 167/*
168 * Close the old accouting file (if currently open) and then replace 168 * Close the old accounting file (if currently open) and then replace
169 * it with file (if non-NULL). 169 * it with file (if non-NULL).
170 * 170 *
171 * NOTE: acct_globals.lock MUST be held on entry and exit. 171 * NOTE: acct_globals.lock MUST be held on entry and exit.
@@ -199,11 +199,16 @@ static void acct_file_reopen(struct file *file)
199 } 199 }
200} 200}
201 201
202/* 202/**
203 * sys_acct() is the only system call needed to implement process 203 * sys_acct - enable/disable process accounting
204 * accounting. It takes the name of the file where accounting records 204 * @name: file name for accounting records or NULL to shutdown accounting
205 * should be written. If the filename is NULL, accounting will be 205 *
206 * shutdown. 206 * Returns 0 for success or negative errno values for failure.
207 *
208 * sys_acct() is the only system call needed to implement process
209 * accounting. It takes the name of the file where accounting records
210 * should be written. If the filename is NULL, accounting will be
211 * shutdown.
207 */ 212 */
208asmlinkage long sys_acct(const char __user *name) 213asmlinkage long sys_acct(const char __user *name)
209{ 214{
@@ -250,9 +255,12 @@ asmlinkage long sys_acct(const char __user *name)
250 return (0); 255 return (0);
251} 256}
252 257
253/* 258/**
254 * If the accouting is turned on for a file in the filesystem pointed 259 * acct_auto_close - turn off a filesystem's accounting if it is on
255 * to by sb, turn accouting off. 260 * @sb: super block for the filesystem
261 *
262 * If the accounting is turned on for a file in the filesystem pointed
263 * to by sb, turn accounting off.
256 */ 264 */
257void acct_auto_close(struct super_block *sb) 265void acct_auto_close(struct super_block *sb)
258{ 266{
@@ -503,8 +511,11 @@ static void do_acct_process(long exitcode, struct file *file)
503 set_fs(fs); 511 set_fs(fs);
504} 512}
505 513
506/* 514/**
507 * acct_process - now just a wrapper around do_acct_process 515 * acct_process - now just a wrapper around do_acct_process
516 * @exitcode: task exit code
517 *
518 * handles process accounting for an exiting task
508 */ 519 */
509void acct_process(long exitcode) 520void acct_process(long exitcode)
510{ 521{
@@ -530,9 +541,9 @@ void acct_process(long exitcode)
530} 541}
531 542
532 543
533/* 544/**
534 * acct_update_integrals 545 * acct_update_integrals - update mm integral fields in task_struct
535 * - update mm integral fields in task_struct 546 * @tsk: task_struct for accounting
536 */ 547 */
537void acct_update_integrals(struct task_struct *tsk) 548void acct_update_integrals(struct task_struct *tsk)
538{ 549{
@@ -547,9 +558,9 @@ void acct_update_integrals(struct task_struct *tsk)
547 } 558 }
548} 559}
549 560
550/* 561/**
551 * acct_clear_integrals 562 * acct_clear_integrals - clear the mm integral fields in task_struct
552 * - clear the mm integral fields in task_struct 563 * @tsk: task_struct whose accounting fields are cleared
553 */ 564 */
554void acct_clear_integrals(struct task_struct *tsk) 565void acct_clear_integrals(struct task_struct *tsk)
555{ 566{
diff --git a/kernel/compat.c b/kernel/compat.c
index ddfcaaa86623..102296e21ea8 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -48,8 +48,7 @@ static long compat_nanosleep_restart(struct restart_block *restart)
48 if (!time_after(expire, now)) 48 if (!time_after(expire, now))
49 return 0; 49 return 0;
50 50
51 current->state = TASK_INTERRUPTIBLE; 51 expire = schedule_timeout_interruptible(expire - now);
52 expire = schedule_timeout(expire - now);
53 if (expire == 0) 52 if (expire == 0)
54 return 0; 53 return 0;
55 54
@@ -82,8 +81,7 @@ asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
82 return -EINVAL; 81 return -EINVAL;
83 82
84 expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); 83 expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
85 current->state = TASK_INTERRUPTIBLE; 84 expire = schedule_timeout_interruptible(expire);
86 expire = schedule_timeout(expire);
87 if (expire == 0) 85 if (expire == 0)
88 return 0; 86 return 0;
89 87
@@ -795,8 +793,7 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
795 recalc_sigpending(); 793 recalc_sigpending();
796 spin_unlock_irq(&current->sighand->siglock); 794 spin_unlock_irq(&current->sighand->siglock);
797 795
798 current->state = TASK_INTERRUPTIBLE; 796 timeout = schedule_timeout_interruptible(timeout);
799 timeout = schedule_timeout(timeout);
800 797
801 spin_lock_irq(&current->sighand->siglock); 798 spin_lock_irq(&current->sighand->siglock);
802 sig = dequeue_signal(current, &s, &info); 799 sig = dequeue_signal(current, &s, &info);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1f06e7690106..407b5f0a8c8e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -182,6 +182,37 @@ static struct super_block *cpuset_sb = NULL;
182static DECLARE_MUTEX(cpuset_sem); 182static DECLARE_MUTEX(cpuset_sem);
183 183
184/* 184/*
185 * The global cpuset semaphore cpuset_sem can be needed by the
186 * memory allocator to update a tasks mems_allowed (see the calls
187 * to cpuset_update_current_mems_allowed()) or to walk up the
188 * cpuset hierarchy to find a mem_exclusive cpuset see the calls
189 * to cpuset_excl_nodes_overlap()).
190 *
191 * But if the memory allocation is being done by cpuset.c code, it
192 * usually already holds cpuset_sem. Double tripping on a kernel
193 * semaphore deadlocks the current task, and any other task that
194 * subsequently tries to obtain the lock.
195 *
196 * Run all up's and down's on cpuset_sem through the following
197 * wrappers, which will detect this nested locking, and avoid
198 * deadlocking.
199 */
200
201static inline void cpuset_down(struct semaphore *psem)
202{
203 if (current->cpuset_sem_nest_depth == 0)
204 down(psem);
205 current->cpuset_sem_nest_depth++;
206}
207
208static inline void cpuset_up(struct semaphore *psem)
209{
210 current->cpuset_sem_nest_depth--;
211 if (current->cpuset_sem_nest_depth == 0)
212 up(psem);
213}
214
215/*
185 * A couple of forward declarations required, due to cyclic reference loop: 216 * A couple of forward declarations required, due to cyclic reference loop:
186 * cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file 217 * cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file
187 * -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir. 218 * -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir.
@@ -522,19 +553,10 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
522 * Refresh current tasks mems_allowed and mems_generation from 553 * Refresh current tasks mems_allowed and mems_generation from
523 * current tasks cpuset. Call with cpuset_sem held. 554 * current tasks cpuset. Call with cpuset_sem held.
524 * 555 *
525 * Be sure to call refresh_mems() on any cpuset operation which 556 * This routine is needed to update the per-task mems_allowed
526 * (1) holds cpuset_sem, and (2) might possibly alloc memory. 557 * data, within the tasks context, when it is trying to allocate
527 * Call after obtaining cpuset_sem lock, before any possible 558 * memory (in various mm/mempolicy.c routines) and notices
528 * allocation. Otherwise one risks trying to allocate memory 559 * that some other task has been modifying its cpuset.
529 * while the task cpuset_mems_generation is not the same as
530 * the mems_generation in its cpuset, which would deadlock on
531 * cpuset_sem in cpuset_update_current_mems_allowed().
532 *
533 * Since we hold cpuset_sem, once refresh_mems() is called, the
534 * test (current->cpuset_mems_generation != cs->mems_generation)
535 * in cpuset_update_current_mems_allowed() will remain false,
536 * until we drop cpuset_sem. Anyone else who would change our
537 * cpusets mems_generation needs to lock cpuset_sem first.
538 */ 560 */
539 561
540static void refresh_mems(void) 562static void refresh_mems(void)
@@ -840,7 +862,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
840 } 862 }
841 buffer[nbytes] = 0; /* nul-terminate */ 863 buffer[nbytes] = 0; /* nul-terminate */
842 864
843 down(&cpuset_sem); 865 cpuset_down(&cpuset_sem);
844 866
845 if (is_removed(cs)) { 867 if (is_removed(cs)) {
846 retval = -ENODEV; 868 retval = -ENODEV;
@@ -874,7 +896,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
874 if (retval == 0) 896 if (retval == 0)
875 retval = nbytes; 897 retval = nbytes;
876out2: 898out2:
877 up(&cpuset_sem); 899 cpuset_up(&cpuset_sem);
878 cpuset_release_agent(pathbuf); 900 cpuset_release_agent(pathbuf);
879out1: 901out1:
880 kfree(buffer); 902 kfree(buffer);
@@ -914,9 +936,9 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
914{ 936{
915 cpumask_t mask; 937 cpumask_t mask;
916 938
917 down(&cpuset_sem); 939 cpuset_down(&cpuset_sem);
918 mask = cs->cpus_allowed; 940 mask = cs->cpus_allowed;
919 up(&cpuset_sem); 941 cpuset_up(&cpuset_sem);
920 942
921 return cpulist_scnprintf(page, PAGE_SIZE, mask); 943 return cpulist_scnprintf(page, PAGE_SIZE, mask);
922} 944}
@@ -925,9 +947,9 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
925{ 947{
926 nodemask_t mask; 948 nodemask_t mask;
927 949
928 down(&cpuset_sem); 950 cpuset_down(&cpuset_sem);
929 mask = cs->mems_allowed; 951 mask = cs->mems_allowed;
930 up(&cpuset_sem); 952 cpuset_up(&cpuset_sem);
931 953
932 return nodelist_scnprintf(page, PAGE_SIZE, mask); 954 return nodelist_scnprintf(page, PAGE_SIZE, mask);
933} 955}
@@ -972,6 +994,10 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
972 *s++ = '\n'; 994 *s++ = '\n';
973 *s = '\0'; 995 *s = '\0';
974 996
997 /* Do nothing if *ppos is at the eof or beyond the eof. */
998 if (s - page <= *ppos)
999 return 0;
1000
975 start = page + *ppos; 1001 start = page + *ppos;
976 n = s - start; 1002 n = s - start;
977 retval = n - copy_to_user(buf, start, min(n, nbytes)); 1003 retval = n - copy_to_user(buf, start, min(n, nbytes));
@@ -1330,8 +1356,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1330 if (!cs) 1356 if (!cs)
1331 return -ENOMEM; 1357 return -ENOMEM;
1332 1358
1333 down(&cpuset_sem); 1359 cpuset_down(&cpuset_sem);
1334 refresh_mems();
1335 cs->flags = 0; 1360 cs->flags = 0;
1336 if (notify_on_release(parent)) 1361 if (notify_on_release(parent))
1337 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); 1362 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
@@ -1356,14 +1381,14 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1356 * will down() this new directory's i_sem and if we race with 1381 * will down() this new directory's i_sem and if we race with
1357 * another mkdir, we might deadlock. 1382 * another mkdir, we might deadlock.
1358 */ 1383 */
1359 up(&cpuset_sem); 1384 cpuset_up(&cpuset_sem);
1360 1385
1361 err = cpuset_populate_dir(cs->dentry); 1386 err = cpuset_populate_dir(cs->dentry);
1362 /* If err < 0, we have a half-filled directory - oh well ;) */ 1387 /* If err < 0, we have a half-filled directory - oh well ;) */
1363 return 0; 1388 return 0;
1364err: 1389err:
1365 list_del(&cs->sibling); 1390 list_del(&cs->sibling);
1366 up(&cpuset_sem); 1391 cpuset_up(&cpuset_sem);
1367 kfree(cs); 1392 kfree(cs);
1368 return err; 1393 return err;
1369} 1394}
@@ -1385,14 +1410,13 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1385 1410
1386 /* the vfs holds both inode->i_sem already */ 1411 /* the vfs holds both inode->i_sem already */
1387 1412
1388 down(&cpuset_sem); 1413 cpuset_down(&cpuset_sem);
1389 refresh_mems();
1390 if (atomic_read(&cs->count) > 0) { 1414 if (atomic_read(&cs->count) > 0) {
1391 up(&cpuset_sem); 1415 cpuset_up(&cpuset_sem);
1392 return -EBUSY; 1416 return -EBUSY;
1393 } 1417 }
1394 if (!list_empty(&cs->children)) { 1418 if (!list_empty(&cs->children)) {
1395 up(&cpuset_sem); 1419 cpuset_up(&cpuset_sem);
1396 return -EBUSY; 1420 return -EBUSY;
1397 } 1421 }
1398 parent = cs->parent; 1422 parent = cs->parent;
@@ -1408,7 +1432,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1408 spin_unlock(&d->d_lock); 1432 spin_unlock(&d->d_lock);
1409 cpuset_d_remove_dir(d); 1433 cpuset_d_remove_dir(d);
1410 dput(d); 1434 dput(d);
1411 up(&cpuset_sem); 1435 cpuset_up(&cpuset_sem);
1412 cpuset_release_agent(pathbuf); 1436 cpuset_release_agent(pathbuf);
1413 return 0; 1437 return 0;
1414} 1438}
@@ -1511,10 +1535,10 @@ void cpuset_exit(struct task_struct *tsk)
1511 if (notify_on_release(cs)) { 1535 if (notify_on_release(cs)) {
1512 char *pathbuf = NULL; 1536 char *pathbuf = NULL;
1513 1537
1514 down(&cpuset_sem); 1538 cpuset_down(&cpuset_sem);
1515 if (atomic_dec_and_test(&cs->count)) 1539 if (atomic_dec_and_test(&cs->count))
1516 check_for_release(cs, &pathbuf); 1540 check_for_release(cs, &pathbuf);
1517 up(&cpuset_sem); 1541 cpuset_up(&cpuset_sem);
1518 cpuset_release_agent(pathbuf); 1542 cpuset_release_agent(pathbuf);
1519 } else { 1543 } else {
1520 atomic_dec(&cs->count); 1544 atomic_dec(&cs->count);
@@ -1535,11 +1559,11 @@ cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk)
1535{ 1559{
1536 cpumask_t mask; 1560 cpumask_t mask;
1537 1561
1538 down(&cpuset_sem); 1562 cpuset_down(&cpuset_sem);
1539 task_lock((struct task_struct *)tsk); 1563 task_lock((struct task_struct *)tsk);
1540 guarantee_online_cpus(tsk->cpuset, &mask); 1564 guarantee_online_cpus(tsk->cpuset, &mask);
1541 task_unlock((struct task_struct *)tsk); 1565 task_unlock((struct task_struct *)tsk);
1542 up(&cpuset_sem); 1566 cpuset_up(&cpuset_sem);
1543 1567
1544 return mask; 1568 return mask;
1545} 1569}
@@ -1564,9 +1588,9 @@ void cpuset_update_current_mems_allowed(void)
1564 if (!cs) 1588 if (!cs)
1565 return; /* task is exiting */ 1589 return; /* task is exiting */
1566 if (current->cpuset_mems_generation != cs->mems_generation) { 1590 if (current->cpuset_mems_generation != cs->mems_generation) {
1567 down(&cpuset_sem); 1591 cpuset_down(&cpuset_sem);
1568 refresh_mems(); 1592 refresh_mems();
1569 up(&cpuset_sem); 1593 cpuset_up(&cpuset_sem);
1570 } 1594 }
1571} 1595}
1572 1596
@@ -1665,14 +1689,14 @@ int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask)
1665 return 0; 1689 return 0;
1666 1690
1667 /* Not hardwall and node outside mems_allowed: scan up cpusets */ 1691 /* Not hardwall and node outside mems_allowed: scan up cpusets */
1668 down(&cpuset_sem); 1692 cpuset_down(&cpuset_sem);
1669 cs = current->cpuset; 1693 cs = current->cpuset;
1670 if (!cs) 1694 if (!cs)
1671 goto done; /* current task exiting */ 1695 goto done; /* current task exiting */
1672 cs = nearest_exclusive_ancestor(cs); 1696 cs = nearest_exclusive_ancestor(cs);
1673 allowed = node_isset(node, cs->mems_allowed); 1697 allowed = node_isset(node, cs->mems_allowed);
1674done: 1698done:
1675 up(&cpuset_sem); 1699 cpuset_up(&cpuset_sem);
1676 return allowed; 1700 return allowed;
1677} 1701}
1678 1702
@@ -1693,7 +1717,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
1693 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ 1717 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
1694 int overlap = 0; /* do cpusets overlap? */ 1718 int overlap = 0; /* do cpusets overlap? */
1695 1719
1696 down(&cpuset_sem); 1720 cpuset_down(&cpuset_sem);
1697 cs1 = current->cpuset; 1721 cs1 = current->cpuset;
1698 if (!cs1) 1722 if (!cs1)
1699 goto done; /* current task exiting */ 1723 goto done; /* current task exiting */
@@ -1704,7 +1728,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
1704 cs2 = nearest_exclusive_ancestor(cs2); 1728 cs2 = nearest_exclusive_ancestor(cs2);
1705 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); 1729 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
1706done: 1730done:
1707 up(&cpuset_sem); 1731 cpuset_up(&cpuset_sem);
1708 1732
1709 return overlap; 1733 return overlap;
1710} 1734}
@@ -1727,7 +1751,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
1727 return -ENOMEM; 1751 return -ENOMEM;
1728 1752
1729 tsk = m->private; 1753 tsk = m->private;
1730 down(&cpuset_sem); 1754 cpuset_down(&cpuset_sem);
1731 task_lock(tsk); 1755 task_lock(tsk);
1732 cs = tsk->cpuset; 1756 cs = tsk->cpuset;
1733 task_unlock(tsk); 1757 task_unlock(tsk);
@@ -1742,7 +1766,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
1742 seq_puts(m, buf); 1766 seq_puts(m, buf);
1743 seq_putc(m, '\n'); 1767 seq_putc(m, '\n');
1744out: 1768out:
1745 up(&cpuset_sem); 1769 cpuset_up(&cpuset_sem);
1746 kfree(buf); 1770 kfree(buf);
1747 return retval; 1771 return retval;
1748} 1772}
diff --git a/kernel/exit.c b/kernel/exit.c
index 5b0fb9f09f21..6d2089a1bce7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -368,17 +368,19 @@ EXPORT_SYMBOL(daemonize);
368static inline void close_files(struct files_struct * files) 368static inline void close_files(struct files_struct * files)
369{ 369{
370 int i, j; 370 int i, j;
371 struct fdtable *fdt;
371 372
372 j = 0; 373 j = 0;
374 fdt = files_fdtable(files);
373 for (;;) { 375 for (;;) {
374 unsigned long set; 376 unsigned long set;
375 i = j * __NFDBITS; 377 i = j * __NFDBITS;
376 if (i >= files->max_fdset || i >= files->max_fds) 378 if (i >= fdt->max_fdset || i >= fdt->max_fds)
377 break; 379 break;
378 set = files->open_fds->fds_bits[j++]; 380 set = fdt->open_fds->fds_bits[j++];
379 while (set) { 381 while (set) {
380 if (set & 1) { 382 if (set & 1) {
381 struct file * file = xchg(&files->fd[i], NULL); 383 struct file * file = xchg(&fdt->fd[i], NULL);
382 if (file) 384 if (file)
383 filp_close(file, files); 385 filp_close(file, files);
384 } 386 }
@@ -403,18 +405,22 @@ struct files_struct *get_files_struct(struct task_struct *task)
403 405
404void fastcall put_files_struct(struct files_struct *files) 406void fastcall put_files_struct(struct files_struct *files)
405{ 407{
408 struct fdtable *fdt;
409
406 if (atomic_dec_and_test(&files->count)) { 410 if (atomic_dec_and_test(&files->count)) {
407 close_files(files); 411 close_files(files);
408 /* 412 /*
409 * Free the fd and fdset arrays if we expanded them. 413 * Free the fd and fdset arrays if we expanded them.
414 * If the fdtable was embedded, pass files for freeing
415 * at the end of the RCU grace period. Otherwise,
416 * you can free files immediately.
410 */ 417 */
411 if (files->fd != &files->fd_array[0]) 418 fdt = files_fdtable(files);
412 free_fd_array(files->fd, files->max_fds); 419 if (fdt == &files->fdtab)
413 if (files->max_fdset > __FD_SETSIZE) { 420 fdt->free_files = files;
414 free_fdset(files->open_fds, files->max_fdset); 421 else
415 free_fdset(files->close_on_exec, files->max_fdset); 422 kmem_cache_free(files_cachep, files);
416 } 423 free_fdtable(fdt);
417 kmem_cache_free(files_cachep, files);
418 } 424 }
419} 425}
420 426
diff --git a/kernel/fork.c b/kernel/fork.c
index 7e1ead9a6ba4..8149f3602881 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -35,6 +35,7 @@
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
36#include <linux/jiffies.h> 36#include <linux/jiffies.h>
37#include <linux/futex.h> 37#include <linux/futex.h>
38#include <linux/rcupdate.h>
38#include <linux/ptrace.h> 39#include <linux/ptrace.h>
39#include <linux/mount.h> 40#include <linux/mount.h>
40#include <linux/audit.h> 41#include <linux/audit.h>
@@ -176,6 +177,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
176 177
177 /* One for us, one for whoever does the "release_task()" (usually parent) */ 178 /* One for us, one for whoever does the "release_task()" (usually parent) */
178 atomic_set(&tsk->usage,2); 179 atomic_set(&tsk->usage,2);
180 atomic_set(&tsk->fs_excl, 0);
179 return tsk; 181 return tsk;
180} 182}
181 183
@@ -564,24 +566,53 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
564 return 0; 566 return 0;
565} 567}
566 568
567static int count_open_files(struct files_struct *files, int size) 569static int count_open_files(struct fdtable *fdt)
568{ 570{
571 int size = fdt->max_fdset;
569 int i; 572 int i;
570 573
571 /* Find the last open fd */ 574 /* Find the last open fd */
572 for (i = size/(8*sizeof(long)); i > 0; ) { 575 for (i = size/(8*sizeof(long)); i > 0; ) {
573 if (files->open_fds->fds_bits[--i]) 576 if (fdt->open_fds->fds_bits[--i])
574 break; 577 break;
575 } 578 }
576 i = (i+1) * 8 * sizeof(long); 579 i = (i+1) * 8 * sizeof(long);
577 return i; 580 return i;
578} 581}
579 582
583static struct files_struct *alloc_files(void)
584{
585 struct files_struct *newf;
586 struct fdtable *fdt;
587
588 newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
589 if (!newf)
590 goto out;
591
592 atomic_set(&newf->count, 1);
593
594 spin_lock_init(&newf->file_lock);
595 fdt = &newf->fdtab;
596 fdt->next_fd = 0;
597 fdt->max_fds = NR_OPEN_DEFAULT;
598 fdt->max_fdset = __FD_SETSIZE;
599 fdt->close_on_exec = &newf->close_on_exec_init;
600 fdt->open_fds = &newf->open_fds_init;
601 fdt->fd = &newf->fd_array[0];
602 INIT_RCU_HEAD(&fdt->rcu);
603 fdt->free_files = NULL;
604 fdt->next = NULL;
605 rcu_assign_pointer(newf->fdt, fdt);
606out:
607 return newf;
608}
609
580static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 610static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
581{ 611{
582 struct files_struct *oldf, *newf; 612 struct files_struct *oldf, *newf;
583 struct file **old_fds, **new_fds; 613 struct file **old_fds, **new_fds;
584 int open_files, size, i, error = 0, expand; 614 int open_files, size, i, error = 0, expand;
615 struct fdtable *old_fdt, *new_fdt;
585 616
586 /* 617 /*
587 * A background process may not have any files ... 618 * A background process may not have any files ...
@@ -602,35 +633,27 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
602 */ 633 */
603 tsk->files = NULL; 634 tsk->files = NULL;
604 error = -ENOMEM; 635 error = -ENOMEM;
605 newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); 636 newf = alloc_files();
606 if (!newf) 637 if (!newf)
607 goto out; 638 goto out;
608 639
609 atomic_set(&newf->count, 1);
610
611 spin_lock_init(&newf->file_lock);
612 newf->next_fd = 0;
613 newf->max_fds = NR_OPEN_DEFAULT;
614 newf->max_fdset = __FD_SETSIZE;
615 newf->close_on_exec = &newf->close_on_exec_init;
616 newf->open_fds = &newf->open_fds_init;
617 newf->fd = &newf->fd_array[0];
618
619 spin_lock(&oldf->file_lock); 640 spin_lock(&oldf->file_lock);
620 641 old_fdt = files_fdtable(oldf);
621 open_files = count_open_files(oldf, oldf->max_fdset); 642 new_fdt = files_fdtable(newf);
643 size = old_fdt->max_fdset;
644 open_files = count_open_files(old_fdt);
622 expand = 0; 645 expand = 0;
623 646
624 /* 647 /*
625 * Check whether we need to allocate a larger fd array or fd set. 648 * Check whether we need to allocate a larger fd array or fd set.
626 * Note: we're not a clone task, so the open count won't change. 649 * Note: we're not a clone task, so the open count won't change.
627 */ 650 */
628 if (open_files > newf->max_fdset) { 651 if (open_files > new_fdt->max_fdset) {
629 newf->max_fdset = 0; 652 new_fdt->max_fdset = 0;
630 expand = 1; 653 expand = 1;
631 } 654 }
632 if (open_files > newf->max_fds) { 655 if (open_files > new_fdt->max_fds) {
633 newf->max_fds = 0; 656 new_fdt->max_fds = 0;
634 expand = 1; 657 expand = 1;
635 } 658 }
636 659
@@ -642,14 +665,21 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
642 spin_unlock(&newf->file_lock); 665 spin_unlock(&newf->file_lock);
643 if (error < 0) 666 if (error < 0)
644 goto out_release; 667 goto out_release;
668 new_fdt = files_fdtable(newf);
669 /*
670 * Reacquire the oldf lock and a pointer to its fd table
671 * who knows it may have a new bigger fd table. We need
672 * the latest pointer.
673 */
645 spin_lock(&oldf->file_lock); 674 spin_lock(&oldf->file_lock);
675 old_fdt = files_fdtable(oldf);
646 } 676 }
647 677
648 old_fds = oldf->fd; 678 old_fds = old_fdt->fd;
649 new_fds = newf->fd; 679 new_fds = new_fdt->fd;
650 680
651 memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8); 681 memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, open_files/8);
652 memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8); 682 memcpy(new_fdt->close_on_exec->fds_bits, old_fdt->close_on_exec->fds_bits, open_files/8);
653 683
654 for (i = open_files; i != 0; i--) { 684 for (i = open_files; i != 0; i--) {
655 struct file *f = *old_fds++; 685 struct file *f = *old_fds++;
@@ -662,24 +692,24 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
662 * is partway through open(). So make sure that this 692 * is partway through open(). So make sure that this
663 * fd is available to the new process. 693 * fd is available to the new process.
664 */ 694 */
665 FD_CLR(open_files - i, newf->open_fds); 695 FD_CLR(open_files - i, new_fdt->open_fds);
666 } 696 }
667 *new_fds++ = f; 697 rcu_assign_pointer(*new_fds++, f);
668 } 698 }
669 spin_unlock(&oldf->file_lock); 699 spin_unlock(&oldf->file_lock);
670 700
671 /* compute the remainder to be cleared */ 701 /* compute the remainder to be cleared */
672 size = (newf->max_fds - open_files) * sizeof(struct file *); 702 size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
673 703
674 /* This is long word aligned thus could use a optimized version */ 704 /* This is long word aligned thus could use a optimized version */
675 memset(new_fds, 0, size); 705 memset(new_fds, 0, size);
676 706
677 if (newf->max_fdset > open_files) { 707 if (new_fdt->max_fdset > open_files) {
678 int left = (newf->max_fdset-open_files)/8; 708 int left = (new_fdt->max_fdset-open_files)/8;
679 int start = open_files / (8 * sizeof(unsigned long)); 709 int start = open_files / (8 * sizeof(unsigned long));
680 710
681 memset(&newf->open_fds->fds_bits[start], 0, left); 711 memset(&new_fdt->open_fds->fds_bits[start], 0, left);
682 memset(&newf->close_on_exec->fds_bits[start], 0, left); 712 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
683 } 713 }
684 714
685 tsk->files = newf; 715 tsk->files = newf;
@@ -688,9 +718,9 @@ out:
688 return error; 718 return error;
689 719
690out_release: 720out_release:
691 free_fdset (newf->close_on_exec, newf->max_fdset); 721 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
692 free_fdset (newf->open_fds, newf->max_fdset); 722 free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
693 free_fd_array(newf->fd, newf->max_fds); 723 free_fd_array(new_fdt->fd, new_fdt->max_fds);
694 kmem_cache_free(files_cachep, newf); 724 kmem_cache_free(files_cachep, newf);
695 goto out; 725 goto out;
696} 726}
@@ -1115,6 +1145,9 @@ static task_t *copy_process(unsigned long clone_flags,
1115 __get_cpu_var(process_counts)++; 1145 __get_cpu_var(process_counts)++;
1116 } 1146 }
1117 1147
1148 if (!current->signal->tty && p->signal->tty)
1149 p->signal->tty = NULL;
1150
1118 nr_threads++; 1151 nr_threads++;
1119 total_forks++; 1152 total_forks++;
1120 write_unlock_irq(&tasklist_lock); 1153 write_unlock_irq(&tasklist_lock);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f436993bd590..bef3b6901b76 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -45,6 +45,7 @@
45#include <linux/percpu.h> 45#include <linux/percpu.h>
46#include <linux/notifier.h> 46#include <linux/notifier.h>
47#include <linux/rcupdate.h> 47#include <linux/rcupdate.h>
48#include <linux/rcuref.h>
48#include <linux/cpu.h> 49#include <linux/cpu.h>
49 50
50/* Definition for rcupdate control block. */ 51/* Definition for rcupdate control block. */
@@ -72,6 +73,19 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
72static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; 73static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
73static int maxbatch = 10; 74static int maxbatch = 10;
74 75
76#ifndef __HAVE_ARCH_CMPXCHG
77/*
78 * We use an array of spinlocks for the rcurefs -- similar to ones in sparc
79 * 32 bit atomic_t implementations, and a hash function similar to that
80 * for our refcounting needs.
81 * Can't help multiprocessors which donot have cmpxchg :(
82 */
83
84spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = {
85 [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
86};
87#endif
88
75/** 89/**
76 * call_rcu - Queue an RCU callback for invocation after a grace period. 90 * call_rcu - Queue an RCU callback for invocation after a grace period.
77 * @head: structure to be used for queueing the RCU updates. 91 * @head: structure to be used for queueing the RCU updates.
diff --git a/kernel/sched.c b/kernel/sched.c
index 18b95520a2e2..dbd4490afec1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -875,7 +875,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
875 * smp_call_function() if an IPI is sent by the same process we are 875 * smp_call_function() if an IPI is sent by the same process we are
876 * waiting to become inactive. 876 * waiting to become inactive.
877 */ 877 */
878void wait_task_inactive(task_t * p) 878void wait_task_inactive(task_t *p)
879{ 879{
880 unsigned long flags; 880 unsigned long flags;
881 runqueue_t *rq; 881 runqueue_t *rq;
@@ -966,8 +966,11 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
966 int local_group; 966 int local_group;
967 int i; 967 int i;
968 968
969 /* Skip over this group if it has no CPUs allowed */
970 if (!cpus_intersects(group->cpumask, p->cpus_allowed))
971 goto nextgroup;
972
969 local_group = cpu_isset(this_cpu, group->cpumask); 973 local_group = cpu_isset(this_cpu, group->cpumask);
970 /* XXX: put a cpus allowed check */
971 974
972 /* Tally up the load of all CPUs in the group */ 975 /* Tally up the load of all CPUs in the group */
973 avg_load = 0; 976 avg_load = 0;
@@ -992,6 +995,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
992 min_load = avg_load; 995 min_load = avg_load;
993 idlest = group; 996 idlest = group;
994 } 997 }
998nextgroup:
995 group = group->next; 999 group = group->next;
996 } while (group != sd->groups); 1000 } while (group != sd->groups);
997 1001
@@ -1003,13 +1007,18 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
1003/* 1007/*
1004 * find_idlest_queue - find the idlest runqueue among the cpus in group. 1008 * find_idlest_queue - find the idlest runqueue among the cpus in group.
1005 */ 1009 */
1006static int find_idlest_cpu(struct sched_group *group, int this_cpu) 1010static int
1011find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
1007{ 1012{
1013 cpumask_t tmp;
1008 unsigned long load, min_load = ULONG_MAX; 1014 unsigned long load, min_load = ULONG_MAX;
1009 int idlest = -1; 1015 int idlest = -1;
1010 int i; 1016 int i;
1011 1017
1012 for_each_cpu_mask(i, group->cpumask) { 1018 /* Traverse only the allowed CPUs */
1019 cpus_and(tmp, group->cpumask, p->cpus_allowed);
1020
1021 for_each_cpu_mask(i, tmp) {
1013 load = source_load(i, 0); 1022 load = source_load(i, 0);
1014 1023
1015 if (load < min_load || (load == min_load && i == this_cpu)) { 1024 if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -1052,7 +1061,7 @@ static int sched_balance_self(int cpu, int flag)
1052 if (!group) 1061 if (!group)
1053 goto nextlevel; 1062 goto nextlevel;
1054 1063
1055 new_cpu = find_idlest_cpu(group, cpu); 1064 new_cpu = find_idlest_cpu(group, t, cpu);
1056 if (new_cpu == -1 || new_cpu == cpu) 1065 if (new_cpu == -1 || new_cpu == cpu)
1057 goto nextlevel; 1066 goto nextlevel;
1058 1067
@@ -1127,7 +1136,7 @@ static inline int wake_idle(int cpu, task_t *p)
1127 * 1136 *
1128 * returns failure only if the task is already active. 1137 * returns failure only if the task is already active.
1129 */ 1138 */
1130static int try_to_wake_up(task_t * p, unsigned int state, int sync) 1139static int try_to_wake_up(task_t *p, unsigned int state, int sync)
1131{ 1140{
1132 int cpu, this_cpu, success = 0; 1141 int cpu, this_cpu, success = 0;
1133 unsigned long flags; 1142 unsigned long flags;
@@ -1252,6 +1261,16 @@ out_activate:
1252 } 1261 }
1253 1262
1254 /* 1263 /*
1264 * Tasks that have marked their sleep as noninteractive get
1265 * woken up without updating their sleep average. (i.e. their
1266 * sleep is handled in a priority-neutral manner, no priority
1267 * boost and no penalty.)
1268 */
1269 if (old_state & TASK_NONINTERACTIVE)
1270 __activate_task(p, rq);
1271 else
1272 activate_task(p, rq, cpu == this_cpu);
1273 /*
1255 * Sync wakeups (i.e. those types of wakeups where the waker 1274 * Sync wakeups (i.e. those types of wakeups where the waker
1256 * has indicated that it will leave the CPU in short order) 1275 * has indicated that it will leave the CPU in short order)
1257 * don't trigger a preemption, if the woken up task will run on 1276 * don't trigger a preemption, if the woken up task will run on
@@ -1259,7 +1278,6 @@ out_activate:
1259 * the waker guarantees that the freshly woken up task is going 1278 * the waker guarantees that the freshly woken up task is going
1260 * to be considered on this CPU.) 1279 * to be considered on this CPU.)
1261 */ 1280 */
1262 activate_task(p, rq, cpu == this_cpu);
1263 if (!sync || cpu != this_cpu) { 1281 if (!sync || cpu != this_cpu) {
1264 if (TASK_PREEMPTS_CURR(p, rq)) 1282 if (TASK_PREEMPTS_CURR(p, rq))
1265 resched_task(rq->curr); 1283 resched_task(rq->curr);
@@ -1274,7 +1292,7 @@ out:
1274 return success; 1292 return success;
1275} 1293}
1276 1294
1277int fastcall wake_up_process(task_t * p) 1295int fastcall wake_up_process(task_t *p)
1278{ 1296{
1279 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | 1297 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
1280 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); 1298 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
@@ -1353,7 +1371,7 @@ void fastcall sched_fork(task_t *p, int clone_flags)
1353 * that must be done for every newly created context, then puts the task 1371 * that must be done for every newly created context, then puts the task
1354 * on the runqueue and wakes it. 1372 * on the runqueue and wakes it.
1355 */ 1373 */
1356void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) 1374void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1357{ 1375{
1358 unsigned long flags; 1376 unsigned long flags;
1359 int this_cpu, cpu; 1377 int this_cpu, cpu;
@@ -1436,7 +1454,7 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags)
1436 * artificially, because any timeslice recovered here 1454 * artificially, because any timeslice recovered here
1437 * was given away by the parent in the first place.) 1455 * was given away by the parent in the first place.)
1438 */ 1456 */
1439void fastcall sched_exit(task_t * p) 1457void fastcall sched_exit(task_t *p)
1440{ 1458{
1441 unsigned long flags; 1459 unsigned long flags;
1442 runqueue_t *rq; 1460 runqueue_t *rq;
@@ -1511,6 +1529,10 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
1511 * Manfred Spraul <manfred@colorfullife.com> 1529 * Manfred Spraul <manfred@colorfullife.com>
1512 */ 1530 */
1513 prev_task_flags = prev->flags; 1531 prev_task_flags = prev->flags;
1532#ifdef CONFIG_DEBUG_SPINLOCK
1533 /* this is a valid case when another task releases the spinlock */
1534 rq->lock.owner = current;
1535#endif
1514 finish_arch_switch(prev); 1536 finish_arch_switch(prev);
1515 finish_lock_switch(rq, prev); 1537 finish_lock_switch(rq, prev);
1516 if (mm) 1538 if (mm)
@@ -1753,7 +1775,8 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1753 */ 1775 */
1754static inline 1776static inline
1755int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, 1777int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
1756 struct sched_domain *sd, enum idle_type idle, int *all_pinned) 1778 struct sched_domain *sd, enum idle_type idle,
1779 int *all_pinned)
1757{ 1780{
1758 /* 1781 /*
1759 * We do not migrate tasks that are: 1782 * We do not migrate tasks that are:
@@ -1883,10 +1906,11 @@ out:
1883 */ 1906 */
1884static struct sched_group * 1907static struct sched_group *
1885find_busiest_group(struct sched_domain *sd, int this_cpu, 1908find_busiest_group(struct sched_domain *sd, int this_cpu,
1886 unsigned long *imbalance, enum idle_type idle) 1909 unsigned long *imbalance, enum idle_type idle, int *sd_idle)
1887{ 1910{
1888 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; 1911 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
1889 unsigned long max_load, avg_load, total_load, this_load, total_pwr; 1912 unsigned long max_load, avg_load, total_load, this_load, total_pwr;
1913 unsigned long max_pull;
1890 int load_idx; 1914 int load_idx;
1891 1915
1892 max_load = this_load = total_load = total_pwr = 0; 1916 max_load = this_load = total_load = total_pwr = 0;
@@ -1908,6 +1932,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
1908 avg_load = 0; 1932 avg_load = 0;
1909 1933
1910 for_each_cpu_mask(i, group->cpumask) { 1934 for_each_cpu_mask(i, group->cpumask) {
1935 if (*sd_idle && !idle_cpu(i))
1936 *sd_idle = 0;
1937
1911 /* Bias balancing toward cpus of our domain */ 1938 /* Bias balancing toward cpus of our domain */
1912 if (local_group) 1939 if (local_group)
1913 load = target_load(i, load_idx); 1940 load = target_load(i, load_idx);
@@ -1933,7 +1960,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
1933 group = group->next; 1960 group = group->next;
1934 } while (group != sd->groups); 1961 } while (group != sd->groups);
1935 1962
1936 if (!busiest || this_load >= max_load) 1963 if (!busiest || this_load >= max_load || max_load <= SCHED_LOAD_SCALE)
1937 goto out_balanced; 1964 goto out_balanced;
1938 1965
1939 avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; 1966 avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
@@ -1953,8 +1980,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
1953 * by pulling tasks to us. Be careful of negative numbers as they'll 1980 * by pulling tasks to us. Be careful of negative numbers as they'll
1954 * appear as very large values with unsigned longs. 1981 * appear as very large values with unsigned longs.
1955 */ 1982 */
1983
1984 /* Don't want to pull so many tasks that a group would go idle */
1985 max_pull = min(max_load - avg_load, max_load - SCHED_LOAD_SCALE);
1986
1956 /* How much load to actually move to equalise the imbalance */ 1987 /* How much load to actually move to equalise the imbalance */
1957 *imbalance = min((max_load - avg_load) * busiest->cpu_power, 1988 *imbalance = min(max_pull * busiest->cpu_power,
1958 (avg_load - this_load) * this->cpu_power) 1989 (avg_load - this_load) * this->cpu_power)
1959 / SCHED_LOAD_SCALE; 1990 / SCHED_LOAD_SCALE;
1960 1991
@@ -2051,11 +2082,14 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2051 unsigned long imbalance; 2082 unsigned long imbalance;
2052 int nr_moved, all_pinned = 0; 2083 int nr_moved, all_pinned = 0;
2053 int active_balance = 0; 2084 int active_balance = 0;
2085 int sd_idle = 0;
2086
2087 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER)
2088 sd_idle = 1;
2054 2089
2055 spin_lock(&this_rq->lock);
2056 schedstat_inc(sd, lb_cnt[idle]); 2090 schedstat_inc(sd, lb_cnt[idle]);
2057 2091
2058 group = find_busiest_group(sd, this_cpu, &imbalance, idle); 2092 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle);
2059 if (!group) { 2093 if (!group) {
2060 schedstat_inc(sd, lb_nobusyg[idle]); 2094 schedstat_inc(sd, lb_nobusyg[idle]);
2061 goto out_balanced; 2095 goto out_balanced;
@@ -2079,19 +2113,16 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2079 * still unbalanced. nr_moved simply stays zero, so it is 2113 * still unbalanced. nr_moved simply stays zero, so it is
2080 * correctly treated as an imbalance. 2114 * correctly treated as an imbalance.
2081 */ 2115 */
2082 double_lock_balance(this_rq, busiest); 2116 double_rq_lock(this_rq, busiest);
2083 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2117 nr_moved = move_tasks(this_rq, this_cpu, busiest,
2084 imbalance, sd, idle, 2118 imbalance, sd, idle, &all_pinned);
2085 &all_pinned); 2119 double_rq_unlock(this_rq, busiest);
2086 spin_unlock(&busiest->lock);
2087 2120
2088 /* All tasks on this runqueue were pinned by CPU affinity */ 2121 /* All tasks on this runqueue were pinned by CPU affinity */
2089 if (unlikely(all_pinned)) 2122 if (unlikely(all_pinned))
2090 goto out_balanced; 2123 goto out_balanced;
2091 } 2124 }
2092 2125
2093 spin_unlock(&this_rq->lock);
2094
2095 if (!nr_moved) { 2126 if (!nr_moved) {
2096 schedstat_inc(sd, lb_failed[idle]); 2127 schedstat_inc(sd, lb_failed[idle]);
2097 sd->nr_balance_failed++; 2128 sd->nr_balance_failed++;
@@ -2099,6 +2130,16 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2099 if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { 2130 if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
2100 2131
2101 spin_lock(&busiest->lock); 2132 spin_lock(&busiest->lock);
2133
2134 /* don't kick the migration_thread, if the curr
2135 * task on busiest cpu can't be moved to this_cpu
2136 */
2137 if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
2138 spin_unlock(&busiest->lock);
2139 all_pinned = 1;
2140 goto out_one_pinned;
2141 }
2142
2102 if (!busiest->active_balance) { 2143 if (!busiest->active_balance) {
2103 busiest->active_balance = 1; 2144 busiest->active_balance = 1;
2104 busiest->push_cpu = this_cpu; 2145 busiest->push_cpu = this_cpu;
@@ -2131,19 +2172,23 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2131 sd->balance_interval *= 2; 2172 sd->balance_interval *= 2;
2132 } 2173 }
2133 2174
2175 if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER)
2176 return -1;
2134 return nr_moved; 2177 return nr_moved;
2135 2178
2136out_balanced: 2179out_balanced:
2137 spin_unlock(&this_rq->lock);
2138
2139 schedstat_inc(sd, lb_balanced[idle]); 2180 schedstat_inc(sd, lb_balanced[idle]);
2140 2181
2141 sd->nr_balance_failed = 0; 2182 sd->nr_balance_failed = 0;
2183
2184out_one_pinned:
2142 /* tune up the balancing interval */ 2185 /* tune up the balancing interval */
2143 if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) || 2186 if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) ||
2144 (sd->balance_interval < sd->max_interval)) 2187 (sd->balance_interval < sd->max_interval))
2145 sd->balance_interval *= 2; 2188 sd->balance_interval *= 2;
2146 2189
2190 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
2191 return -1;
2147 return 0; 2192 return 0;
2148} 2193}
2149 2194
@@ -2161,9 +2206,13 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
2161 runqueue_t *busiest = NULL; 2206 runqueue_t *busiest = NULL;
2162 unsigned long imbalance; 2207 unsigned long imbalance;
2163 int nr_moved = 0; 2208 int nr_moved = 0;
2209 int sd_idle = 0;
2210
2211 if (sd->flags & SD_SHARE_CPUPOWER)
2212 sd_idle = 1;
2164 2213
2165 schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); 2214 schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
2166 group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE); 2215 group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle);
2167 if (!group) { 2216 if (!group) {
2168 schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); 2217 schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
2169 goto out_balanced; 2218 goto out_balanced;
@@ -2177,22 +2226,30 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
2177 2226
2178 BUG_ON(busiest == this_rq); 2227 BUG_ON(busiest == this_rq);
2179 2228
2180 /* Attempt to move tasks */
2181 double_lock_balance(this_rq, busiest);
2182
2183 schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance); 2229 schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance);
2184 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2230
2231 nr_moved = 0;
2232 if (busiest->nr_running > 1) {
2233 /* Attempt to move tasks */
2234 double_lock_balance(this_rq, busiest);
2235 nr_moved = move_tasks(this_rq, this_cpu, busiest,
2185 imbalance, sd, NEWLY_IDLE, NULL); 2236 imbalance, sd, NEWLY_IDLE, NULL);
2186 if (!nr_moved) 2237 spin_unlock(&busiest->lock);
2238 }
2239
2240 if (!nr_moved) {
2187 schedstat_inc(sd, lb_failed[NEWLY_IDLE]); 2241 schedstat_inc(sd, lb_failed[NEWLY_IDLE]);
2188 else 2242 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
2243 return -1;
2244 } else
2189 sd->nr_balance_failed = 0; 2245 sd->nr_balance_failed = 0;
2190 2246
2191 spin_unlock(&busiest->lock);
2192 return nr_moved; 2247 return nr_moved;
2193 2248
2194out_balanced: 2249out_balanced:
2195 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); 2250 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
2251 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
2252 return -1;
2196 sd->nr_balance_failed = 0; 2253 sd->nr_balance_failed = 0;
2197 return 0; 2254 return 0;
2198} 2255}
@@ -2317,7 +2374,11 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
2317 2374
2318 if (j - sd->last_balance >= interval) { 2375 if (j - sd->last_balance >= interval) {
2319 if (load_balance(this_cpu, this_rq, sd, idle)) { 2376 if (load_balance(this_cpu, this_rq, sd, idle)) {
2320 /* We've pulled tasks over so no longer idle */ 2377 /*
2378 * We've pulled tasks over so either we're no
2379 * longer idle, or one of our SMT siblings is
2380 * not idle.
2381 */
2321 idle = NOT_IDLE; 2382 idle = NOT_IDLE;
2322 } 2383 }
2323 sd->last_balance += interval; 2384 sd->last_balance += interval;
@@ -2576,6 +2637,13 @@ out:
2576} 2637}
2577 2638
2578#ifdef CONFIG_SCHED_SMT 2639#ifdef CONFIG_SCHED_SMT
2640static inline void wakeup_busy_runqueue(runqueue_t *rq)
2641{
2642 /* If an SMT runqueue is sleeping due to priority reasons wake it up */
2643 if (rq->curr == rq->idle && rq->nr_running)
2644 resched_task(rq->idle);
2645}
2646
2579static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) 2647static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
2580{ 2648{
2581 struct sched_domain *tmp, *sd = NULL; 2649 struct sched_domain *tmp, *sd = NULL;
@@ -2609,12 +2677,7 @@ static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
2609 for_each_cpu_mask(i, sibling_map) { 2677 for_each_cpu_mask(i, sibling_map) {
2610 runqueue_t *smt_rq = cpu_rq(i); 2678 runqueue_t *smt_rq = cpu_rq(i);
2611 2679
2612 /* 2680 wakeup_busy_runqueue(smt_rq);
2613 * If an SMT sibling task is sleeping due to priority
2614 * reasons wake it up now.
2615 */
2616 if (smt_rq->curr == smt_rq->idle && smt_rq->nr_running)
2617 resched_task(smt_rq->idle);
2618 } 2681 }
2619 2682
2620 for_each_cpu_mask(i, sibling_map) 2683 for_each_cpu_mask(i, sibling_map)
@@ -2625,6 +2688,16 @@ static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
2625 */ 2688 */
2626} 2689}
2627 2690
2691/*
2692 * number of 'lost' timeslices this task wont be able to fully
2693 * utilize, if another task runs on a sibling. This models the
2694 * slowdown effect of other tasks running on siblings:
2695 */
2696static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
2697{
2698 return p->time_slice * (100 - sd->per_cpu_gain) / 100;
2699}
2700
2628static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) 2701static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
2629{ 2702{
2630 struct sched_domain *tmp, *sd = NULL; 2703 struct sched_domain *tmp, *sd = NULL;
@@ -2668,6 +2741,10 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
2668 runqueue_t *smt_rq = cpu_rq(i); 2741 runqueue_t *smt_rq = cpu_rq(i);
2669 task_t *smt_curr = smt_rq->curr; 2742 task_t *smt_curr = smt_rq->curr;
2670 2743
2744 /* Kernel threads do not participate in dependent sleeping */
2745 if (!p->mm || !smt_curr->mm || rt_task(p))
2746 goto check_smt_task;
2747
2671 /* 2748 /*
2672 * If a user task with lower static priority than the 2749 * If a user task with lower static priority than the
2673 * running task on the SMT sibling is trying to schedule, 2750 * running task on the SMT sibling is trying to schedule,
@@ -2676,21 +2753,45 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
2676 * task from using an unfair proportion of the 2753 * task from using an unfair proportion of the
2677 * physical cpu's resources. -ck 2754 * physical cpu's resources. -ck
2678 */ 2755 */
2679 if (((smt_curr->time_slice * (100 - sd->per_cpu_gain) / 100) > 2756 if (rt_task(smt_curr)) {
2680 task_timeslice(p) || rt_task(smt_curr)) && 2757 /*
2681 p->mm && smt_curr->mm && !rt_task(p)) 2758 * With real time tasks we run non-rt tasks only
2682 ret = 1; 2759 * per_cpu_gain% of the time.
2760 */
2761 if ((jiffies % DEF_TIMESLICE) >
2762 (sd->per_cpu_gain * DEF_TIMESLICE / 100))
2763 ret = 1;
2764 } else
2765 if (smt_curr->static_prio < p->static_prio &&
2766 !TASK_PREEMPTS_CURR(p, smt_rq) &&
2767 smt_slice(smt_curr, sd) > task_timeslice(p))
2768 ret = 1;
2769
2770check_smt_task:
2771 if ((!smt_curr->mm && smt_curr != smt_rq->idle) ||
2772 rt_task(smt_curr))
2773 continue;
2774 if (!p->mm) {
2775 wakeup_busy_runqueue(smt_rq);
2776 continue;
2777 }
2683 2778
2684 /* 2779 /*
2685 * Reschedule a lower priority task on the SMT sibling, 2780 * Reschedule a lower priority task on the SMT sibling for
2686 * or wake it up if it has been put to sleep for priority 2781 * it to be put to sleep, or wake it up if it has been put to
2687 * reasons. 2782 * sleep for priority reasons to see if it should run now.
2688 */ 2783 */
2689 if ((((p->time_slice * (100 - sd->per_cpu_gain) / 100) > 2784 if (rt_task(p)) {
2690 task_timeslice(smt_curr) || rt_task(p)) && 2785 if ((jiffies % DEF_TIMESLICE) >
2691 smt_curr->mm && p->mm && !rt_task(smt_curr)) || 2786 (sd->per_cpu_gain * DEF_TIMESLICE / 100))
2692 (smt_curr == smt_rq->idle && smt_rq->nr_running)) 2787 resched_task(smt_curr);
2693 resched_task(smt_curr); 2788 } else {
2789 if (TASK_PREEMPTS_CURR(p, smt_rq) &&
2790 smt_slice(p, sd) > task_timeslice(smt_curr))
2791 resched_task(smt_curr);
2792 else
2793 wakeup_busy_runqueue(smt_rq);
2794 }
2694 } 2795 }
2695out_unlock: 2796out_unlock:
2696 for_each_cpu_mask(i, sibling_map) 2797 for_each_cpu_mask(i, sibling_map)
@@ -2888,6 +2989,7 @@ switch_tasks:
2888 if (next == rq->idle) 2989 if (next == rq->idle)
2889 schedstat_inc(rq, sched_goidle); 2990 schedstat_inc(rq, sched_goidle);
2890 prefetch(next); 2991 prefetch(next);
2992 prefetch_stack(next);
2891 clear_tsk_need_resched(prev); 2993 clear_tsk_need_resched(prev);
2892 rcu_qsctr_inc(task_cpu(prev)); 2994 rcu_qsctr_inc(task_cpu(prev));
2893 2995
@@ -3015,7 +3117,8 @@ need_resched:
3015 3117
3016#endif /* CONFIG_PREEMPT */ 3118#endif /* CONFIG_PREEMPT */
3017 3119
3018int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key) 3120int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
3121 void *key)
3019{ 3122{
3020 task_t *p = curr->private; 3123 task_t *p = curr->private;
3021 return try_to_wake_up(p, mode, sync); 3124 return try_to_wake_up(p, mode, sync);
@@ -3057,7 +3160,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
3057 * @key: is directly passed to the wakeup function 3160 * @key: is directly passed to the wakeup function
3058 */ 3161 */
3059void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, 3162void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
3060 int nr_exclusive, void *key) 3163 int nr_exclusive, void *key)
3061{ 3164{
3062 unsigned long flags; 3165 unsigned long flags;
3063 3166
@@ -3089,7 +3192,8 @@ void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
3089 * 3192 *
3090 * On UP it can prevent extra preemption. 3193 * On UP it can prevent extra preemption.
3091 */ 3194 */
3092void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) 3195void fastcall
3196__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
3093{ 3197{
3094 unsigned long flags; 3198 unsigned long flags;
3095 int sync = 1; 3199 int sync = 1;
@@ -3280,7 +3384,8 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q)
3280 3384
3281EXPORT_SYMBOL(interruptible_sleep_on); 3385EXPORT_SYMBOL(interruptible_sleep_on);
3282 3386
3283long fastcall __sched interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) 3387long fastcall __sched
3388interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
3284{ 3389{
3285 SLEEP_ON_VAR 3390 SLEEP_ON_VAR
3286 3391
@@ -3499,7 +3604,8 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
3499 * @policy: new policy. 3604 * @policy: new policy.
3500 * @param: structure containing the new RT priority. 3605 * @param: structure containing the new RT priority.
3501 */ 3606 */
3502int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param) 3607int sched_setscheduler(struct task_struct *p, int policy,
3608 struct sched_param *param)
3503{ 3609{
3504 int retval; 3610 int retval;
3505 int oldprio, oldpolicy = -1; 3611 int oldprio, oldpolicy = -1;
@@ -3519,7 +3625,7 @@ recheck:
3519 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. 3625 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0.
3520 */ 3626 */
3521 if (param->sched_priority < 0 || 3627 if (param->sched_priority < 0 ||
3522 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || 3628 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
3523 (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) 3629 (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
3524 return -EINVAL; 3630 return -EINVAL;
3525 if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) 3631 if ((policy == SCHED_NORMAL) != (param->sched_priority == 0))
@@ -3582,7 +3688,8 @@ recheck:
3582} 3688}
3583EXPORT_SYMBOL_GPL(sched_setscheduler); 3689EXPORT_SYMBOL_GPL(sched_setscheduler);
3584 3690
3585static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 3691static int
3692do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
3586{ 3693{
3587 int retval; 3694 int retval;
3588 struct sched_param lparam; 3695 struct sched_param lparam;
@@ -3849,7 +3956,7 @@ asmlinkage long sys_sched_yield(void)
3849 if (rt_task(current)) 3956 if (rt_task(current))
3850 target = rq->active; 3957 target = rq->active;
3851 3958
3852 if (current->array->nr_active == 1) { 3959 if (array->nr_active == 1) {
3853 schedstat_inc(rq, yld_act_empty); 3960 schedstat_inc(rq, yld_act_empty);
3854 if (!rq->expired->nr_active) 3961 if (!rq->expired->nr_active)
3855 schedstat_inc(rq, yld_both_empty); 3962 schedstat_inc(rq, yld_both_empty);
@@ -3913,7 +4020,7 @@ EXPORT_SYMBOL(cond_resched);
3913 * operations here to prevent schedule() from being called twice (once via 4020 * operations here to prevent schedule() from being called twice (once via
3914 * spin_unlock(), once by hand). 4021 * spin_unlock(), once by hand).
3915 */ 4022 */
3916int cond_resched_lock(spinlock_t * lock) 4023int cond_resched_lock(spinlock_t *lock)
3917{ 4024{
3918 int ret = 0; 4025 int ret = 0;
3919 4026
@@ -4096,7 +4203,7 @@ static inline struct task_struct *younger_sibling(struct task_struct *p)
4096 return list_entry(p->sibling.next,struct task_struct,sibling); 4203 return list_entry(p->sibling.next,struct task_struct,sibling);
4097} 4204}
4098 4205
4099static void show_task(task_t * p) 4206static void show_task(task_t *p)
4100{ 4207{
4101 task_t *relative; 4208 task_t *relative;
4102 unsigned state; 4209 unsigned state;
@@ -4122,7 +4229,7 @@ static void show_task(task_t * p)
4122#endif 4229#endif
4123#ifdef CONFIG_DEBUG_STACK_USAGE 4230#ifdef CONFIG_DEBUG_STACK_USAGE
4124 { 4231 {
4125 unsigned long * n = (unsigned long *) (p->thread_info+1); 4232 unsigned long *n = (unsigned long *) (p->thread_info+1);
4126 while (!*n) 4233 while (!*n)
4127 n++; 4234 n++;
4128 free = (unsigned long) n - (unsigned long)(p->thread_info+1); 4235 free = (unsigned long) n - (unsigned long)(p->thread_info+1);
@@ -4331,7 +4438,7 @@ out:
4331 * thread migration by bumping thread off CPU then 'pushing' onto 4438 * thread migration by bumping thread off CPU then 'pushing' onto
4332 * another runqueue. 4439 * another runqueue.
4333 */ 4440 */
4334static int migration_thread(void * data) 4441static int migration_thread(void *data)
4335{ 4442{
4336 runqueue_t *rq; 4443 runqueue_t *rq;
4337 int cpu = (long)data; 4444 int cpu = (long)data;
diff --git a/kernel/signal.c b/kernel/signal.c
index 4980a073237f..b92c3c9f8b9a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2221,8 +2221,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese,
2221 recalc_sigpending(); 2221 recalc_sigpending();
2222 spin_unlock_irq(&current->sighand->siglock); 2222 spin_unlock_irq(&current->sighand->siglock);
2223 2223
2224 current->state = TASK_INTERRUPTIBLE; 2224 timeout = schedule_timeout_interruptible(timeout);
2225 timeout = schedule_timeout(timeout);
2226 2225
2227 try_to_freeze(); 2226 try_to_freeze();
2228 spin_lock_irq(&current->sighand->siglock); 2227 spin_lock_irq(&current->sighand->siglock);
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 0c3f9d8bbe17..0375fcd5921d 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -3,7 +3,10 @@
3 * 3 *
4 * Author: Zwane Mwaikambo <zwane@fsmlabs.com> 4 * Author: Zwane Mwaikambo <zwane@fsmlabs.com>
5 * 5 *
6 * Copyright (2004) Ingo Molnar 6 * Copyright (2004, 2005) Ingo Molnar
7 *
8 * This file contains the spinlock/rwlock implementations for the
9 * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them)
7 */ 10 */
8 11
9#include <linux/config.h> 12#include <linux/config.h>
@@ -17,12 +20,12 @@
17 * Generic declaration of the raw read_trylock() function, 20 * Generic declaration of the raw read_trylock() function,
18 * architectures are supposed to optimize this: 21 * architectures are supposed to optimize this:
19 */ 22 */
20int __lockfunc generic_raw_read_trylock(rwlock_t *lock) 23int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock)
21{ 24{
22 _raw_read_lock(lock); 25 __raw_read_lock(lock);
23 return 1; 26 return 1;
24} 27}
25EXPORT_SYMBOL(generic_raw_read_trylock); 28EXPORT_SYMBOL(generic__raw_read_trylock);
26 29
27int __lockfunc _spin_trylock(spinlock_t *lock) 30int __lockfunc _spin_trylock(spinlock_t *lock)
28{ 31{
@@ -57,7 +60,7 @@ int __lockfunc _write_trylock(rwlock_t *lock)
57} 60}
58EXPORT_SYMBOL(_write_trylock); 61EXPORT_SYMBOL(_write_trylock);
59 62
60#ifndef CONFIG_PREEMPT 63#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
61 64
62void __lockfunc _read_lock(rwlock_t *lock) 65void __lockfunc _read_lock(rwlock_t *lock)
63{ 66{
@@ -72,7 +75,7 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
72 75
73 local_irq_save(flags); 76 local_irq_save(flags);
74 preempt_disable(); 77 preempt_disable();
75 _raw_spin_lock_flags(lock, flags); 78 _raw_spin_lock_flags(lock, &flags);
76 return flags; 79 return flags;
77} 80}
78EXPORT_SYMBOL(_spin_lock_irqsave); 81EXPORT_SYMBOL(_spin_lock_irqsave);
diff --git a/kernel/timer.c b/kernel/timer.c
index 13e2b513be01..f4152fcd9f8e 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1154,6 +1154,20 @@ fastcall signed long __sched schedule_timeout(signed long timeout)
1154 1154
1155EXPORT_SYMBOL(schedule_timeout); 1155EXPORT_SYMBOL(schedule_timeout);
1156 1156
1157signed long __sched schedule_timeout_interruptible(signed long timeout)
1158{
1159 set_current_state(TASK_INTERRUPTIBLE);
1160 return schedule_timeout(timeout);
1161}
1162EXPORT_SYMBOL(schedule_timeout_interruptible);
1163
1164signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1165{
1166 set_current_state(TASK_UNINTERRUPTIBLE);
1167 return schedule_timeout(timeout);
1168}
1169EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1170
1157/* Thread ID - the internal kernel "pid" */ 1171/* Thread ID - the internal kernel "pid" */
1158asmlinkage long sys_gettid(void) 1172asmlinkage long sys_gettid(void)
1159{ 1173{
@@ -1170,8 +1184,7 @@ static long __sched nanosleep_restart(struct restart_block *restart)
1170 if (!time_after(expire, now)) 1184 if (!time_after(expire, now))
1171 return 0; 1185 return 0;
1172 1186
1173 current->state = TASK_INTERRUPTIBLE; 1187 expire = schedule_timeout_interruptible(expire - now);
1174 expire = schedule_timeout(expire - now);
1175 1188
1176 ret = 0; 1189 ret = 0;
1177 if (expire) { 1190 if (expire) {
@@ -1199,8 +1212,7 @@ asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __us
1199 return -EINVAL; 1212 return -EINVAL;
1200 1213
1201 expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); 1214 expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
1202 current->state = TASK_INTERRUPTIBLE; 1215 expire = schedule_timeout_interruptible(expire);
1203 expire = schedule_timeout(expire);
1204 1216
1205 ret = 0; 1217 ret = 0;
1206 if (expire) { 1218 if (expire) {
@@ -1598,10 +1610,8 @@ void msleep(unsigned int msecs)
1598{ 1610{
1599 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1611 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1600 1612
1601 while (timeout) { 1613 while (timeout)
1602 set_current_state(TASK_UNINTERRUPTIBLE); 1614 timeout = schedule_timeout_uninterruptible(timeout);
1603 timeout = schedule_timeout(timeout);
1604 }
1605} 1615}
1606 1616
1607EXPORT_SYMBOL(msleep); 1617EXPORT_SYMBOL(msleep);
@@ -1614,10 +1624,8 @@ unsigned long msleep_interruptible(unsigned int msecs)
1614{ 1624{
1615 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1625 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1616 1626
1617 while (timeout && !signal_pending(current)) { 1627 while (timeout && !signal_pending(current))
1618 set_current_state(TASK_INTERRUPTIBLE); 1628 timeout = schedule_timeout_interruptible(timeout);
1619 timeout = schedule_timeout(timeout);
1620 }
1621 return jiffies_to_msecs(timeout); 1629 return jiffies_to_msecs(timeout);
1622} 1630}
1623 1631