diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/acct.c | 43 | ||||
-rw-r--r-- | kernel/compat.c | 9 | ||||
-rw-r--r-- | kernel/cpuset.c | 104 | ||||
-rw-r--r-- | kernel/exit.c | 26 | ||||
-rw-r--r-- | kernel/fork.c | 101 | ||||
-rw-r--r-- | kernel/rcupdate.c | 14 | ||||
-rw-r--r-- | kernel/sched.c | 233 | ||||
-rw-r--r-- | kernel/signal.c | 3 | ||||
-rw-r--r-- | kernel/spinlock.c | 15 | ||||
-rw-r--r-- | kernel/timer.c | 32 |
11 files changed, 392 insertions, 189 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 8d57a2f1226b..ff4dc02ce170 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -12,6 +12,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
12 | obj-$(CONFIG_FUTEX) += futex.o | 12 | obj-$(CONFIG_FUTEX) += futex.o |
13 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o | 13 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o |
14 | obj-$(CONFIG_SMP) += cpu.o spinlock.o | 14 | obj-$(CONFIG_SMP) += cpu.o spinlock.o |
15 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | ||
15 | obj-$(CONFIG_UID16) += uid16.o | 16 | obj-$(CONFIG_UID16) += uid16.o |
16 | obj-$(CONFIG_MODULES) += module.o | 17 | obj-$(CONFIG_MODULES) += module.o |
17 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 18 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
diff --git a/kernel/acct.c b/kernel/acct.c index f70e6027cca9..b756f527497e 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -165,7 +165,7 @@ out: | |||
165 | } | 165 | } |
166 | 166 | ||
167 | /* | 167 | /* |
168 | * Close the old accouting file (if currently open) and then replace | 168 | * Close the old accounting file (if currently open) and then replace |
169 | * it with file (if non-NULL). | 169 | * it with file (if non-NULL). |
170 | * | 170 | * |
171 | * NOTE: acct_globals.lock MUST be held on entry and exit. | 171 | * NOTE: acct_globals.lock MUST be held on entry and exit. |
@@ -199,11 +199,16 @@ static void acct_file_reopen(struct file *file) | |||
199 | } | 199 | } |
200 | } | 200 | } |
201 | 201 | ||
202 | /* | 202 | /** |
203 | * sys_acct() is the only system call needed to implement process | 203 | * sys_acct - enable/disable process accounting |
204 | * accounting. It takes the name of the file where accounting records | 204 | * @name: file name for accounting records or NULL to shutdown accounting |
205 | * should be written. If the filename is NULL, accounting will be | 205 | * |
206 | * shutdown. | 206 | * Returns 0 for success or negative errno values for failure. |
207 | * | ||
208 | * sys_acct() is the only system call needed to implement process | ||
209 | * accounting. It takes the name of the file where accounting records | ||
210 | * should be written. If the filename is NULL, accounting will be | ||
211 | * shutdown. | ||
207 | */ | 212 | */ |
208 | asmlinkage long sys_acct(const char __user *name) | 213 | asmlinkage long sys_acct(const char __user *name) |
209 | { | 214 | { |
@@ -250,9 +255,12 @@ asmlinkage long sys_acct(const char __user *name) | |||
250 | return (0); | 255 | return (0); |
251 | } | 256 | } |
252 | 257 | ||
253 | /* | 258 | /** |
254 | * If the accouting is turned on for a file in the filesystem pointed | 259 | * acct_auto_close - turn off a filesystem's accounting if it is on |
255 | * to by sb, turn accouting off. | 260 | * @sb: super block for the filesystem |
261 | * | ||
262 | * If the accounting is turned on for a file in the filesystem pointed | ||
263 | * to by sb, turn accounting off. | ||
256 | */ | 264 | */ |
257 | void acct_auto_close(struct super_block *sb) | 265 | void acct_auto_close(struct super_block *sb) |
258 | { | 266 | { |
@@ -503,8 +511,11 @@ static void do_acct_process(long exitcode, struct file *file) | |||
503 | set_fs(fs); | 511 | set_fs(fs); |
504 | } | 512 | } |
505 | 513 | ||
506 | /* | 514 | /** |
507 | * acct_process - now just a wrapper around do_acct_process | 515 | * acct_process - now just a wrapper around do_acct_process |
516 | * @exitcode: task exit code | ||
517 | * | ||
518 | * handles process accounting for an exiting task | ||
508 | */ | 519 | */ |
509 | void acct_process(long exitcode) | 520 | void acct_process(long exitcode) |
510 | { | 521 | { |
@@ -530,9 +541,9 @@ void acct_process(long exitcode) | |||
530 | } | 541 | } |
531 | 542 | ||
532 | 543 | ||
533 | /* | 544 | /** |
534 | * acct_update_integrals | 545 | * acct_update_integrals - update mm integral fields in task_struct |
535 | * - update mm integral fields in task_struct | 546 | * @tsk: task_struct for accounting |
536 | */ | 547 | */ |
537 | void acct_update_integrals(struct task_struct *tsk) | 548 | void acct_update_integrals(struct task_struct *tsk) |
538 | { | 549 | { |
@@ -547,9 +558,9 @@ void acct_update_integrals(struct task_struct *tsk) | |||
547 | } | 558 | } |
548 | } | 559 | } |
549 | 560 | ||
550 | /* | 561 | /** |
551 | * acct_clear_integrals | 562 | * acct_clear_integrals - clear the mm integral fields in task_struct |
552 | * - clear the mm integral fields in task_struct | 563 | * @tsk: task_struct whose accounting fields are cleared |
553 | */ | 564 | */ |
554 | void acct_clear_integrals(struct task_struct *tsk) | 565 | void acct_clear_integrals(struct task_struct *tsk) |
555 | { | 566 | { |
diff --git a/kernel/compat.c b/kernel/compat.c index ddfcaaa86623..102296e21ea8 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -48,8 +48,7 @@ static long compat_nanosleep_restart(struct restart_block *restart) | |||
48 | if (!time_after(expire, now)) | 48 | if (!time_after(expire, now)) |
49 | return 0; | 49 | return 0; |
50 | 50 | ||
51 | current->state = TASK_INTERRUPTIBLE; | 51 | expire = schedule_timeout_interruptible(expire - now); |
52 | expire = schedule_timeout(expire - now); | ||
53 | if (expire == 0) | 52 | if (expire == 0) |
54 | return 0; | 53 | return 0; |
55 | 54 | ||
@@ -82,8 +81,7 @@ asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp, | |||
82 | return -EINVAL; | 81 | return -EINVAL; |
83 | 82 | ||
84 | expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); | 83 | expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); |
85 | current->state = TASK_INTERRUPTIBLE; | 84 | expire = schedule_timeout_interruptible(expire); |
86 | expire = schedule_timeout(expire); | ||
87 | if (expire == 0) | 85 | if (expire == 0) |
88 | return 0; | 86 | return 0; |
89 | 87 | ||
@@ -795,8 +793,7 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, | |||
795 | recalc_sigpending(); | 793 | recalc_sigpending(); |
796 | spin_unlock_irq(¤t->sighand->siglock); | 794 | spin_unlock_irq(¤t->sighand->siglock); |
797 | 795 | ||
798 | current->state = TASK_INTERRUPTIBLE; | 796 | timeout = schedule_timeout_interruptible(timeout); |
799 | timeout = schedule_timeout(timeout); | ||
800 | 797 | ||
801 | spin_lock_irq(¤t->sighand->siglock); | 798 | spin_lock_irq(¤t->sighand->siglock); |
802 | sig = dequeue_signal(current, &s, &info); | 799 | sig = dequeue_signal(current, &s, &info); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1f06e7690106..407b5f0a8c8e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -182,6 +182,37 @@ static struct super_block *cpuset_sb = NULL; | |||
182 | static DECLARE_MUTEX(cpuset_sem); | 182 | static DECLARE_MUTEX(cpuset_sem); |
183 | 183 | ||
184 | /* | 184 | /* |
185 | * The global cpuset semaphore cpuset_sem can be needed by the | ||
186 | * memory allocator to update a tasks mems_allowed (see the calls | ||
187 | * to cpuset_update_current_mems_allowed()) or to walk up the | ||
188 | * cpuset hierarchy to find a mem_exclusive cpuset see the calls | ||
189 | * to cpuset_excl_nodes_overlap()). | ||
190 | * | ||
191 | * But if the memory allocation is being done by cpuset.c code, it | ||
192 | * usually already holds cpuset_sem. Double tripping on a kernel | ||
193 | * semaphore deadlocks the current task, and any other task that | ||
194 | * subsequently tries to obtain the lock. | ||
195 | * | ||
196 | * Run all up's and down's on cpuset_sem through the following | ||
197 | * wrappers, which will detect this nested locking, and avoid | ||
198 | * deadlocking. | ||
199 | */ | ||
200 | |||
201 | static inline void cpuset_down(struct semaphore *psem) | ||
202 | { | ||
203 | if (current->cpuset_sem_nest_depth == 0) | ||
204 | down(psem); | ||
205 | current->cpuset_sem_nest_depth++; | ||
206 | } | ||
207 | |||
208 | static inline void cpuset_up(struct semaphore *psem) | ||
209 | { | ||
210 | current->cpuset_sem_nest_depth--; | ||
211 | if (current->cpuset_sem_nest_depth == 0) | ||
212 | up(psem); | ||
213 | } | ||
214 | |||
215 | /* | ||
185 | * A couple of forward declarations required, due to cyclic reference loop: | 216 | * A couple of forward declarations required, due to cyclic reference loop: |
186 | * cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file | 217 | * cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file |
187 | * -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir. | 218 | * -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir. |
@@ -522,19 +553,10 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | |||
522 | * Refresh current tasks mems_allowed and mems_generation from | 553 | * Refresh current tasks mems_allowed and mems_generation from |
523 | * current tasks cpuset. Call with cpuset_sem held. | 554 | * current tasks cpuset. Call with cpuset_sem held. |
524 | * | 555 | * |
525 | * Be sure to call refresh_mems() on any cpuset operation which | 556 | * This routine is needed to update the per-task mems_allowed |
526 | * (1) holds cpuset_sem, and (2) might possibly alloc memory. | 557 | * data, within the tasks context, when it is trying to allocate |
527 | * Call after obtaining cpuset_sem lock, before any possible | 558 | * memory (in various mm/mempolicy.c routines) and notices |
528 | * allocation. Otherwise one risks trying to allocate memory | 559 | * that some other task has been modifying its cpuset. |
529 | * while the task cpuset_mems_generation is not the same as | ||
530 | * the mems_generation in its cpuset, which would deadlock on | ||
531 | * cpuset_sem in cpuset_update_current_mems_allowed(). | ||
532 | * | ||
533 | * Since we hold cpuset_sem, once refresh_mems() is called, the | ||
534 | * test (current->cpuset_mems_generation != cs->mems_generation) | ||
535 | * in cpuset_update_current_mems_allowed() will remain false, | ||
536 | * until we drop cpuset_sem. Anyone else who would change our | ||
537 | * cpusets mems_generation needs to lock cpuset_sem first. | ||
538 | */ | 560 | */ |
539 | 561 | ||
540 | static void refresh_mems(void) | 562 | static void refresh_mems(void) |
@@ -840,7 +862,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us | |||
840 | } | 862 | } |
841 | buffer[nbytes] = 0; /* nul-terminate */ | 863 | buffer[nbytes] = 0; /* nul-terminate */ |
842 | 864 | ||
843 | down(&cpuset_sem); | 865 | cpuset_down(&cpuset_sem); |
844 | 866 | ||
845 | if (is_removed(cs)) { | 867 | if (is_removed(cs)) { |
846 | retval = -ENODEV; | 868 | retval = -ENODEV; |
@@ -874,7 +896,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us | |||
874 | if (retval == 0) | 896 | if (retval == 0) |
875 | retval = nbytes; | 897 | retval = nbytes; |
876 | out2: | 898 | out2: |
877 | up(&cpuset_sem); | 899 | cpuset_up(&cpuset_sem); |
878 | cpuset_release_agent(pathbuf); | 900 | cpuset_release_agent(pathbuf); |
879 | out1: | 901 | out1: |
880 | kfree(buffer); | 902 | kfree(buffer); |
@@ -914,9 +936,9 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) | |||
914 | { | 936 | { |
915 | cpumask_t mask; | 937 | cpumask_t mask; |
916 | 938 | ||
917 | down(&cpuset_sem); | 939 | cpuset_down(&cpuset_sem); |
918 | mask = cs->cpus_allowed; | 940 | mask = cs->cpus_allowed; |
919 | up(&cpuset_sem); | 941 | cpuset_up(&cpuset_sem); |
920 | 942 | ||
921 | return cpulist_scnprintf(page, PAGE_SIZE, mask); | 943 | return cpulist_scnprintf(page, PAGE_SIZE, mask); |
922 | } | 944 | } |
@@ -925,9 +947,9 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) | |||
925 | { | 947 | { |
926 | nodemask_t mask; | 948 | nodemask_t mask; |
927 | 949 | ||
928 | down(&cpuset_sem); | 950 | cpuset_down(&cpuset_sem); |
929 | mask = cs->mems_allowed; | 951 | mask = cs->mems_allowed; |
930 | up(&cpuset_sem); | 952 | cpuset_up(&cpuset_sem); |
931 | 953 | ||
932 | return nodelist_scnprintf(page, PAGE_SIZE, mask); | 954 | return nodelist_scnprintf(page, PAGE_SIZE, mask); |
933 | } | 955 | } |
@@ -972,6 +994,10 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, | |||
972 | *s++ = '\n'; | 994 | *s++ = '\n'; |
973 | *s = '\0'; | 995 | *s = '\0'; |
974 | 996 | ||
997 | /* Do nothing if *ppos is at the eof or beyond the eof. */ | ||
998 | if (s - page <= *ppos) | ||
999 | return 0; | ||
1000 | |||
975 | start = page + *ppos; | 1001 | start = page + *ppos; |
976 | n = s - start; | 1002 | n = s - start; |
977 | retval = n - copy_to_user(buf, start, min(n, nbytes)); | 1003 | retval = n - copy_to_user(buf, start, min(n, nbytes)); |
@@ -1330,8 +1356,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) | |||
1330 | if (!cs) | 1356 | if (!cs) |
1331 | return -ENOMEM; | 1357 | return -ENOMEM; |
1332 | 1358 | ||
1333 | down(&cpuset_sem); | 1359 | cpuset_down(&cpuset_sem); |
1334 | refresh_mems(); | ||
1335 | cs->flags = 0; | 1360 | cs->flags = 0; |
1336 | if (notify_on_release(parent)) | 1361 | if (notify_on_release(parent)) |
1337 | set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); | 1362 | set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); |
@@ -1356,14 +1381,14 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) | |||
1356 | * will down() this new directory's i_sem and if we race with | 1381 | * will down() this new directory's i_sem and if we race with |
1357 | * another mkdir, we might deadlock. | 1382 | * another mkdir, we might deadlock. |
1358 | */ | 1383 | */ |
1359 | up(&cpuset_sem); | 1384 | cpuset_up(&cpuset_sem); |
1360 | 1385 | ||
1361 | err = cpuset_populate_dir(cs->dentry); | 1386 | err = cpuset_populate_dir(cs->dentry); |
1362 | /* If err < 0, we have a half-filled directory - oh well ;) */ | 1387 | /* If err < 0, we have a half-filled directory - oh well ;) */ |
1363 | return 0; | 1388 | return 0; |
1364 | err: | 1389 | err: |
1365 | list_del(&cs->sibling); | 1390 | list_del(&cs->sibling); |
1366 | up(&cpuset_sem); | 1391 | cpuset_up(&cpuset_sem); |
1367 | kfree(cs); | 1392 | kfree(cs); |
1368 | return err; | 1393 | return err; |
1369 | } | 1394 | } |
@@ -1385,14 +1410,13 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
1385 | 1410 | ||
1386 | /* the vfs holds both inode->i_sem already */ | 1411 | /* the vfs holds both inode->i_sem already */ |
1387 | 1412 | ||
1388 | down(&cpuset_sem); | 1413 | cpuset_down(&cpuset_sem); |
1389 | refresh_mems(); | ||
1390 | if (atomic_read(&cs->count) > 0) { | 1414 | if (atomic_read(&cs->count) > 0) { |
1391 | up(&cpuset_sem); | 1415 | cpuset_up(&cpuset_sem); |
1392 | return -EBUSY; | 1416 | return -EBUSY; |
1393 | } | 1417 | } |
1394 | if (!list_empty(&cs->children)) { | 1418 | if (!list_empty(&cs->children)) { |
1395 | up(&cpuset_sem); | 1419 | cpuset_up(&cpuset_sem); |
1396 | return -EBUSY; | 1420 | return -EBUSY; |
1397 | } | 1421 | } |
1398 | parent = cs->parent; | 1422 | parent = cs->parent; |
@@ -1408,7 +1432,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
1408 | spin_unlock(&d->d_lock); | 1432 | spin_unlock(&d->d_lock); |
1409 | cpuset_d_remove_dir(d); | 1433 | cpuset_d_remove_dir(d); |
1410 | dput(d); | 1434 | dput(d); |
1411 | up(&cpuset_sem); | 1435 | cpuset_up(&cpuset_sem); |
1412 | cpuset_release_agent(pathbuf); | 1436 | cpuset_release_agent(pathbuf); |
1413 | return 0; | 1437 | return 0; |
1414 | } | 1438 | } |
@@ -1511,10 +1535,10 @@ void cpuset_exit(struct task_struct *tsk) | |||
1511 | if (notify_on_release(cs)) { | 1535 | if (notify_on_release(cs)) { |
1512 | char *pathbuf = NULL; | 1536 | char *pathbuf = NULL; |
1513 | 1537 | ||
1514 | down(&cpuset_sem); | 1538 | cpuset_down(&cpuset_sem); |
1515 | if (atomic_dec_and_test(&cs->count)) | 1539 | if (atomic_dec_and_test(&cs->count)) |
1516 | check_for_release(cs, &pathbuf); | 1540 | check_for_release(cs, &pathbuf); |
1517 | up(&cpuset_sem); | 1541 | cpuset_up(&cpuset_sem); |
1518 | cpuset_release_agent(pathbuf); | 1542 | cpuset_release_agent(pathbuf); |
1519 | } else { | 1543 | } else { |
1520 | atomic_dec(&cs->count); | 1544 | atomic_dec(&cs->count); |
@@ -1535,11 +1559,11 @@ cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk) | |||
1535 | { | 1559 | { |
1536 | cpumask_t mask; | 1560 | cpumask_t mask; |
1537 | 1561 | ||
1538 | down(&cpuset_sem); | 1562 | cpuset_down(&cpuset_sem); |
1539 | task_lock((struct task_struct *)tsk); | 1563 | task_lock((struct task_struct *)tsk); |
1540 | guarantee_online_cpus(tsk->cpuset, &mask); | 1564 | guarantee_online_cpus(tsk->cpuset, &mask); |
1541 | task_unlock((struct task_struct *)tsk); | 1565 | task_unlock((struct task_struct *)tsk); |
1542 | up(&cpuset_sem); | 1566 | cpuset_up(&cpuset_sem); |
1543 | 1567 | ||
1544 | return mask; | 1568 | return mask; |
1545 | } | 1569 | } |
@@ -1564,9 +1588,9 @@ void cpuset_update_current_mems_allowed(void) | |||
1564 | if (!cs) | 1588 | if (!cs) |
1565 | return; /* task is exiting */ | 1589 | return; /* task is exiting */ |
1566 | if (current->cpuset_mems_generation != cs->mems_generation) { | 1590 | if (current->cpuset_mems_generation != cs->mems_generation) { |
1567 | down(&cpuset_sem); | 1591 | cpuset_down(&cpuset_sem); |
1568 | refresh_mems(); | 1592 | refresh_mems(); |
1569 | up(&cpuset_sem); | 1593 | cpuset_up(&cpuset_sem); |
1570 | } | 1594 | } |
1571 | } | 1595 | } |
1572 | 1596 | ||
@@ -1665,14 +1689,14 @@ int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask) | |||
1665 | return 0; | 1689 | return 0; |
1666 | 1690 | ||
1667 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ | 1691 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ |
1668 | down(&cpuset_sem); | 1692 | cpuset_down(&cpuset_sem); |
1669 | cs = current->cpuset; | 1693 | cs = current->cpuset; |
1670 | if (!cs) | 1694 | if (!cs) |
1671 | goto done; /* current task exiting */ | 1695 | goto done; /* current task exiting */ |
1672 | cs = nearest_exclusive_ancestor(cs); | 1696 | cs = nearest_exclusive_ancestor(cs); |
1673 | allowed = node_isset(node, cs->mems_allowed); | 1697 | allowed = node_isset(node, cs->mems_allowed); |
1674 | done: | 1698 | done: |
1675 | up(&cpuset_sem); | 1699 | cpuset_up(&cpuset_sem); |
1676 | return allowed; | 1700 | return allowed; |
1677 | } | 1701 | } |
1678 | 1702 | ||
@@ -1693,7 +1717,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p) | |||
1693 | const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ | 1717 | const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ |
1694 | int overlap = 0; /* do cpusets overlap? */ | 1718 | int overlap = 0; /* do cpusets overlap? */ |
1695 | 1719 | ||
1696 | down(&cpuset_sem); | 1720 | cpuset_down(&cpuset_sem); |
1697 | cs1 = current->cpuset; | 1721 | cs1 = current->cpuset; |
1698 | if (!cs1) | 1722 | if (!cs1) |
1699 | goto done; /* current task exiting */ | 1723 | goto done; /* current task exiting */ |
@@ -1704,7 +1728,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p) | |||
1704 | cs2 = nearest_exclusive_ancestor(cs2); | 1728 | cs2 = nearest_exclusive_ancestor(cs2); |
1705 | overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); | 1729 | overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); |
1706 | done: | 1730 | done: |
1707 | up(&cpuset_sem); | 1731 | cpuset_up(&cpuset_sem); |
1708 | 1732 | ||
1709 | return overlap; | 1733 | return overlap; |
1710 | } | 1734 | } |
@@ -1727,7 +1751,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v) | |||
1727 | return -ENOMEM; | 1751 | return -ENOMEM; |
1728 | 1752 | ||
1729 | tsk = m->private; | 1753 | tsk = m->private; |
1730 | down(&cpuset_sem); | 1754 | cpuset_down(&cpuset_sem); |
1731 | task_lock(tsk); | 1755 | task_lock(tsk); |
1732 | cs = tsk->cpuset; | 1756 | cs = tsk->cpuset; |
1733 | task_unlock(tsk); | 1757 | task_unlock(tsk); |
@@ -1742,7 +1766,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v) | |||
1742 | seq_puts(m, buf); | 1766 | seq_puts(m, buf); |
1743 | seq_putc(m, '\n'); | 1767 | seq_putc(m, '\n'); |
1744 | out: | 1768 | out: |
1745 | up(&cpuset_sem); | 1769 | cpuset_up(&cpuset_sem); |
1746 | kfree(buf); | 1770 | kfree(buf); |
1747 | return retval; | 1771 | return retval; |
1748 | } | 1772 | } |
diff --git a/kernel/exit.c b/kernel/exit.c index 5b0fb9f09f21..6d2089a1bce7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -368,17 +368,19 @@ EXPORT_SYMBOL(daemonize); | |||
368 | static inline void close_files(struct files_struct * files) | 368 | static inline void close_files(struct files_struct * files) |
369 | { | 369 | { |
370 | int i, j; | 370 | int i, j; |
371 | struct fdtable *fdt; | ||
371 | 372 | ||
372 | j = 0; | 373 | j = 0; |
374 | fdt = files_fdtable(files); | ||
373 | for (;;) { | 375 | for (;;) { |
374 | unsigned long set; | 376 | unsigned long set; |
375 | i = j * __NFDBITS; | 377 | i = j * __NFDBITS; |
376 | if (i >= files->max_fdset || i >= files->max_fds) | 378 | if (i >= fdt->max_fdset || i >= fdt->max_fds) |
377 | break; | 379 | break; |
378 | set = files->open_fds->fds_bits[j++]; | 380 | set = fdt->open_fds->fds_bits[j++]; |
379 | while (set) { | 381 | while (set) { |
380 | if (set & 1) { | 382 | if (set & 1) { |
381 | struct file * file = xchg(&files->fd[i], NULL); | 383 | struct file * file = xchg(&fdt->fd[i], NULL); |
382 | if (file) | 384 | if (file) |
383 | filp_close(file, files); | 385 | filp_close(file, files); |
384 | } | 386 | } |
@@ -403,18 +405,22 @@ struct files_struct *get_files_struct(struct task_struct *task) | |||
403 | 405 | ||
404 | void fastcall put_files_struct(struct files_struct *files) | 406 | void fastcall put_files_struct(struct files_struct *files) |
405 | { | 407 | { |
408 | struct fdtable *fdt; | ||
409 | |||
406 | if (atomic_dec_and_test(&files->count)) { | 410 | if (atomic_dec_and_test(&files->count)) { |
407 | close_files(files); | 411 | close_files(files); |
408 | /* | 412 | /* |
409 | * Free the fd and fdset arrays if we expanded them. | 413 | * Free the fd and fdset arrays if we expanded them. |
414 | * If the fdtable was embedded, pass files for freeing | ||
415 | * at the end of the RCU grace period. Otherwise, | ||
416 | * you can free files immediately. | ||
410 | */ | 417 | */ |
411 | if (files->fd != &files->fd_array[0]) | 418 | fdt = files_fdtable(files); |
412 | free_fd_array(files->fd, files->max_fds); | 419 | if (fdt == &files->fdtab) |
413 | if (files->max_fdset > __FD_SETSIZE) { | 420 | fdt->free_files = files; |
414 | free_fdset(files->open_fds, files->max_fdset); | 421 | else |
415 | free_fdset(files->close_on_exec, files->max_fdset); | 422 | kmem_cache_free(files_cachep, files); |
416 | } | 423 | free_fdtable(fdt); |
417 | kmem_cache_free(files_cachep, files); | ||
418 | } | 424 | } |
419 | } | 425 | } |
420 | 426 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 7e1ead9a6ba4..8149f3602881 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/syscalls.h> | 35 | #include <linux/syscalls.h> |
36 | #include <linux/jiffies.h> | 36 | #include <linux/jiffies.h> |
37 | #include <linux/futex.h> | 37 | #include <linux/futex.h> |
38 | #include <linux/rcupdate.h> | ||
38 | #include <linux/ptrace.h> | 39 | #include <linux/ptrace.h> |
39 | #include <linux/mount.h> | 40 | #include <linux/mount.h> |
40 | #include <linux/audit.h> | 41 | #include <linux/audit.h> |
@@ -176,6 +177,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
176 | 177 | ||
177 | /* One for us, one for whoever does the "release_task()" (usually parent) */ | 178 | /* One for us, one for whoever does the "release_task()" (usually parent) */ |
178 | atomic_set(&tsk->usage,2); | 179 | atomic_set(&tsk->usage,2); |
180 | atomic_set(&tsk->fs_excl, 0); | ||
179 | return tsk; | 181 | return tsk; |
180 | } | 182 | } |
181 | 183 | ||
@@ -564,24 +566,53 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) | |||
564 | return 0; | 566 | return 0; |
565 | } | 567 | } |
566 | 568 | ||
567 | static int count_open_files(struct files_struct *files, int size) | 569 | static int count_open_files(struct fdtable *fdt) |
568 | { | 570 | { |
571 | int size = fdt->max_fdset; | ||
569 | int i; | 572 | int i; |
570 | 573 | ||
571 | /* Find the last open fd */ | 574 | /* Find the last open fd */ |
572 | for (i = size/(8*sizeof(long)); i > 0; ) { | 575 | for (i = size/(8*sizeof(long)); i > 0; ) { |
573 | if (files->open_fds->fds_bits[--i]) | 576 | if (fdt->open_fds->fds_bits[--i]) |
574 | break; | 577 | break; |
575 | } | 578 | } |
576 | i = (i+1) * 8 * sizeof(long); | 579 | i = (i+1) * 8 * sizeof(long); |
577 | return i; | 580 | return i; |
578 | } | 581 | } |
579 | 582 | ||
583 | static struct files_struct *alloc_files(void) | ||
584 | { | ||
585 | struct files_struct *newf; | ||
586 | struct fdtable *fdt; | ||
587 | |||
588 | newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); | ||
589 | if (!newf) | ||
590 | goto out; | ||
591 | |||
592 | atomic_set(&newf->count, 1); | ||
593 | |||
594 | spin_lock_init(&newf->file_lock); | ||
595 | fdt = &newf->fdtab; | ||
596 | fdt->next_fd = 0; | ||
597 | fdt->max_fds = NR_OPEN_DEFAULT; | ||
598 | fdt->max_fdset = __FD_SETSIZE; | ||
599 | fdt->close_on_exec = &newf->close_on_exec_init; | ||
600 | fdt->open_fds = &newf->open_fds_init; | ||
601 | fdt->fd = &newf->fd_array[0]; | ||
602 | INIT_RCU_HEAD(&fdt->rcu); | ||
603 | fdt->free_files = NULL; | ||
604 | fdt->next = NULL; | ||
605 | rcu_assign_pointer(newf->fdt, fdt); | ||
606 | out: | ||
607 | return newf; | ||
608 | } | ||
609 | |||
580 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | 610 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) |
581 | { | 611 | { |
582 | struct files_struct *oldf, *newf; | 612 | struct files_struct *oldf, *newf; |
583 | struct file **old_fds, **new_fds; | 613 | struct file **old_fds, **new_fds; |
584 | int open_files, size, i, error = 0, expand; | 614 | int open_files, size, i, error = 0, expand; |
615 | struct fdtable *old_fdt, *new_fdt; | ||
585 | 616 | ||
586 | /* | 617 | /* |
587 | * A background process may not have any files ... | 618 | * A background process may not have any files ... |
@@ -602,35 +633,27 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
602 | */ | 633 | */ |
603 | tsk->files = NULL; | 634 | tsk->files = NULL; |
604 | error = -ENOMEM; | 635 | error = -ENOMEM; |
605 | newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); | 636 | newf = alloc_files(); |
606 | if (!newf) | 637 | if (!newf) |
607 | goto out; | 638 | goto out; |
608 | 639 | ||
609 | atomic_set(&newf->count, 1); | ||
610 | |||
611 | spin_lock_init(&newf->file_lock); | ||
612 | newf->next_fd = 0; | ||
613 | newf->max_fds = NR_OPEN_DEFAULT; | ||
614 | newf->max_fdset = __FD_SETSIZE; | ||
615 | newf->close_on_exec = &newf->close_on_exec_init; | ||
616 | newf->open_fds = &newf->open_fds_init; | ||
617 | newf->fd = &newf->fd_array[0]; | ||
618 | |||
619 | spin_lock(&oldf->file_lock); | 640 | spin_lock(&oldf->file_lock); |
620 | 641 | old_fdt = files_fdtable(oldf); | |
621 | open_files = count_open_files(oldf, oldf->max_fdset); | 642 | new_fdt = files_fdtable(newf); |
643 | size = old_fdt->max_fdset; | ||
644 | open_files = count_open_files(old_fdt); | ||
622 | expand = 0; | 645 | expand = 0; |
623 | 646 | ||
624 | /* | 647 | /* |
625 | * Check whether we need to allocate a larger fd array or fd set. | 648 | * Check whether we need to allocate a larger fd array or fd set. |
626 | * Note: we're not a clone task, so the open count won't change. | 649 | * Note: we're not a clone task, so the open count won't change. |
627 | */ | 650 | */ |
628 | if (open_files > newf->max_fdset) { | 651 | if (open_files > new_fdt->max_fdset) { |
629 | newf->max_fdset = 0; | 652 | new_fdt->max_fdset = 0; |
630 | expand = 1; | 653 | expand = 1; |
631 | } | 654 | } |
632 | if (open_files > newf->max_fds) { | 655 | if (open_files > new_fdt->max_fds) { |
633 | newf->max_fds = 0; | 656 | new_fdt->max_fds = 0; |
634 | expand = 1; | 657 | expand = 1; |
635 | } | 658 | } |
636 | 659 | ||
@@ -642,14 +665,21 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
642 | spin_unlock(&newf->file_lock); | 665 | spin_unlock(&newf->file_lock); |
643 | if (error < 0) | 666 | if (error < 0) |
644 | goto out_release; | 667 | goto out_release; |
668 | new_fdt = files_fdtable(newf); | ||
669 | /* | ||
670 | * Reacquire the oldf lock and a pointer to its fd table | ||
671 | * who knows it may have a new bigger fd table. We need | ||
672 | * the latest pointer. | ||
673 | */ | ||
645 | spin_lock(&oldf->file_lock); | 674 | spin_lock(&oldf->file_lock); |
675 | old_fdt = files_fdtable(oldf); | ||
646 | } | 676 | } |
647 | 677 | ||
648 | old_fds = oldf->fd; | 678 | old_fds = old_fdt->fd; |
649 | new_fds = newf->fd; | 679 | new_fds = new_fdt->fd; |
650 | 680 | ||
651 | memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8); | 681 | memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, open_files/8); |
652 | memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8); | 682 | memcpy(new_fdt->close_on_exec->fds_bits, old_fdt->close_on_exec->fds_bits, open_files/8); |
653 | 683 | ||
654 | for (i = open_files; i != 0; i--) { | 684 | for (i = open_files; i != 0; i--) { |
655 | struct file *f = *old_fds++; | 685 | struct file *f = *old_fds++; |
@@ -662,24 +692,24 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
662 | * is partway through open(). So make sure that this | 692 | * is partway through open(). So make sure that this |
663 | * fd is available to the new process. | 693 | * fd is available to the new process. |
664 | */ | 694 | */ |
665 | FD_CLR(open_files - i, newf->open_fds); | 695 | FD_CLR(open_files - i, new_fdt->open_fds); |
666 | } | 696 | } |
667 | *new_fds++ = f; | 697 | rcu_assign_pointer(*new_fds++, f); |
668 | } | 698 | } |
669 | spin_unlock(&oldf->file_lock); | 699 | spin_unlock(&oldf->file_lock); |
670 | 700 | ||
671 | /* compute the remainder to be cleared */ | 701 | /* compute the remainder to be cleared */ |
672 | size = (newf->max_fds - open_files) * sizeof(struct file *); | 702 | size = (new_fdt->max_fds - open_files) * sizeof(struct file *); |
673 | 703 | ||
674 | /* This is long word aligned thus could use a optimized version */ | 704 | /* This is long word aligned thus could use a optimized version */ |
675 | memset(new_fds, 0, size); | 705 | memset(new_fds, 0, size); |
676 | 706 | ||
677 | if (newf->max_fdset > open_files) { | 707 | if (new_fdt->max_fdset > open_files) { |
678 | int left = (newf->max_fdset-open_files)/8; | 708 | int left = (new_fdt->max_fdset-open_files)/8; |
679 | int start = open_files / (8 * sizeof(unsigned long)); | 709 | int start = open_files / (8 * sizeof(unsigned long)); |
680 | 710 | ||
681 | memset(&newf->open_fds->fds_bits[start], 0, left); | 711 | memset(&new_fdt->open_fds->fds_bits[start], 0, left); |
682 | memset(&newf->close_on_exec->fds_bits[start], 0, left); | 712 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); |
683 | } | 713 | } |
684 | 714 | ||
685 | tsk->files = newf; | 715 | tsk->files = newf; |
@@ -688,9 +718,9 @@ out: | |||
688 | return error; | 718 | return error; |
689 | 719 | ||
690 | out_release: | 720 | out_release: |
691 | free_fdset (newf->close_on_exec, newf->max_fdset); | 721 | free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); |
692 | free_fdset (newf->open_fds, newf->max_fdset); | 722 | free_fdset (new_fdt->open_fds, new_fdt->max_fdset); |
693 | free_fd_array(newf->fd, newf->max_fds); | 723 | free_fd_array(new_fdt->fd, new_fdt->max_fds); |
694 | kmem_cache_free(files_cachep, newf); | 724 | kmem_cache_free(files_cachep, newf); |
695 | goto out; | 725 | goto out; |
696 | } | 726 | } |
@@ -1115,6 +1145,9 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1115 | __get_cpu_var(process_counts)++; | 1145 | __get_cpu_var(process_counts)++; |
1116 | } | 1146 | } |
1117 | 1147 | ||
1148 | if (!current->signal->tty && p->signal->tty) | ||
1149 | p->signal->tty = NULL; | ||
1150 | |||
1118 | nr_threads++; | 1151 | nr_threads++; |
1119 | total_forks++; | 1152 | total_forks++; |
1120 | write_unlock_irq(&tasklist_lock); | 1153 | write_unlock_irq(&tasklist_lock); |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f436993bd590..bef3b6901b76 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/percpu.h> | 45 | #include <linux/percpu.h> |
46 | #include <linux/notifier.h> | 46 | #include <linux/notifier.h> |
47 | #include <linux/rcupdate.h> | 47 | #include <linux/rcupdate.h> |
48 | #include <linux/rcuref.h> | ||
48 | #include <linux/cpu.h> | 49 | #include <linux/cpu.h> |
49 | 50 | ||
50 | /* Definition for rcupdate control block. */ | 51 | /* Definition for rcupdate control block. */ |
@@ -72,6 +73,19 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; | |||
72 | static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; | 73 | static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; |
73 | static int maxbatch = 10; | 74 | static int maxbatch = 10; |
74 | 75 | ||
76 | #ifndef __HAVE_ARCH_CMPXCHG | ||
77 | /* | ||
78 | * We use an array of spinlocks for the rcurefs -- similar to ones in sparc | ||
79 | * 32 bit atomic_t implementations, and a hash function similar to that | ||
80 | * for our refcounting needs. | ||
81 | * Can't help multiprocessors which donot have cmpxchg :( | ||
82 | */ | ||
83 | |||
84 | spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = { | ||
85 | [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED | ||
86 | }; | ||
87 | #endif | ||
88 | |||
75 | /** | 89 | /** |
76 | * call_rcu - Queue an RCU callback for invocation after a grace period. | 90 | * call_rcu - Queue an RCU callback for invocation after a grace period. |
77 | * @head: structure to be used for queueing the RCU updates. | 91 | * @head: structure to be used for queueing the RCU updates. |
diff --git a/kernel/sched.c b/kernel/sched.c index 18b95520a2e2..dbd4490afec1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -875,7 +875,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) | |||
875 | * smp_call_function() if an IPI is sent by the same process we are | 875 | * smp_call_function() if an IPI is sent by the same process we are |
876 | * waiting to become inactive. | 876 | * waiting to become inactive. |
877 | */ | 877 | */ |
878 | void wait_task_inactive(task_t * p) | 878 | void wait_task_inactive(task_t *p) |
879 | { | 879 | { |
880 | unsigned long flags; | 880 | unsigned long flags; |
881 | runqueue_t *rq; | 881 | runqueue_t *rq; |
@@ -966,8 +966,11 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
966 | int local_group; | 966 | int local_group; |
967 | int i; | 967 | int i; |
968 | 968 | ||
969 | /* Skip over this group if it has no CPUs allowed */ | ||
970 | if (!cpus_intersects(group->cpumask, p->cpus_allowed)) | ||
971 | goto nextgroup; | ||
972 | |||
969 | local_group = cpu_isset(this_cpu, group->cpumask); | 973 | local_group = cpu_isset(this_cpu, group->cpumask); |
970 | /* XXX: put a cpus allowed check */ | ||
971 | 974 | ||
972 | /* Tally up the load of all CPUs in the group */ | 975 | /* Tally up the load of all CPUs in the group */ |
973 | avg_load = 0; | 976 | avg_load = 0; |
@@ -992,6 +995,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
992 | min_load = avg_load; | 995 | min_load = avg_load; |
993 | idlest = group; | 996 | idlest = group; |
994 | } | 997 | } |
998 | nextgroup: | ||
995 | group = group->next; | 999 | group = group->next; |
996 | } while (group != sd->groups); | 1000 | } while (group != sd->groups); |
997 | 1001 | ||
@@ -1003,13 +1007,18 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
1003 | /* | 1007 | /* |
1004 | * find_idlest_queue - find the idlest runqueue among the cpus in group. | 1008 | * find_idlest_queue - find the idlest runqueue among the cpus in group. |
1005 | */ | 1009 | */ |
1006 | static int find_idlest_cpu(struct sched_group *group, int this_cpu) | 1010 | static int |
1011 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | ||
1007 | { | 1012 | { |
1013 | cpumask_t tmp; | ||
1008 | unsigned long load, min_load = ULONG_MAX; | 1014 | unsigned long load, min_load = ULONG_MAX; |
1009 | int idlest = -1; | 1015 | int idlest = -1; |
1010 | int i; | 1016 | int i; |
1011 | 1017 | ||
1012 | for_each_cpu_mask(i, group->cpumask) { | 1018 | /* Traverse only the allowed CPUs */ |
1019 | cpus_and(tmp, group->cpumask, p->cpus_allowed); | ||
1020 | |||
1021 | for_each_cpu_mask(i, tmp) { | ||
1013 | load = source_load(i, 0); | 1022 | load = source_load(i, 0); |
1014 | 1023 | ||
1015 | if (load < min_load || (load == min_load && i == this_cpu)) { | 1024 | if (load < min_load || (load == min_load && i == this_cpu)) { |
@@ -1052,7 +1061,7 @@ static int sched_balance_self(int cpu, int flag) | |||
1052 | if (!group) | 1061 | if (!group) |
1053 | goto nextlevel; | 1062 | goto nextlevel; |
1054 | 1063 | ||
1055 | new_cpu = find_idlest_cpu(group, cpu); | 1064 | new_cpu = find_idlest_cpu(group, t, cpu); |
1056 | if (new_cpu == -1 || new_cpu == cpu) | 1065 | if (new_cpu == -1 || new_cpu == cpu) |
1057 | goto nextlevel; | 1066 | goto nextlevel; |
1058 | 1067 | ||
@@ -1127,7 +1136,7 @@ static inline int wake_idle(int cpu, task_t *p) | |||
1127 | * | 1136 | * |
1128 | * returns failure only if the task is already active. | 1137 | * returns failure only if the task is already active. |
1129 | */ | 1138 | */ |
1130 | static int try_to_wake_up(task_t * p, unsigned int state, int sync) | 1139 | static int try_to_wake_up(task_t *p, unsigned int state, int sync) |
1131 | { | 1140 | { |
1132 | int cpu, this_cpu, success = 0; | 1141 | int cpu, this_cpu, success = 0; |
1133 | unsigned long flags; | 1142 | unsigned long flags; |
@@ -1252,6 +1261,16 @@ out_activate: | |||
1252 | } | 1261 | } |
1253 | 1262 | ||
1254 | /* | 1263 | /* |
1264 | * Tasks that have marked their sleep as noninteractive get | ||
1265 | * woken up without updating their sleep average. (i.e. their | ||
1266 | * sleep is handled in a priority-neutral manner, no priority | ||
1267 | * boost and no penalty.) | ||
1268 | */ | ||
1269 | if (old_state & TASK_NONINTERACTIVE) | ||
1270 | __activate_task(p, rq); | ||
1271 | else | ||
1272 | activate_task(p, rq, cpu == this_cpu); | ||
1273 | /* | ||
1255 | * Sync wakeups (i.e. those types of wakeups where the waker | 1274 | * Sync wakeups (i.e. those types of wakeups where the waker |
1256 | * has indicated that it will leave the CPU in short order) | 1275 | * has indicated that it will leave the CPU in short order) |
1257 | * don't trigger a preemption, if the woken up task will run on | 1276 | * don't trigger a preemption, if the woken up task will run on |
@@ -1259,7 +1278,6 @@ out_activate: | |||
1259 | * the waker guarantees that the freshly woken up task is going | 1278 | * the waker guarantees that the freshly woken up task is going |
1260 | * to be considered on this CPU.) | 1279 | * to be considered on this CPU.) |
1261 | */ | 1280 | */ |
1262 | activate_task(p, rq, cpu == this_cpu); | ||
1263 | if (!sync || cpu != this_cpu) { | 1281 | if (!sync || cpu != this_cpu) { |
1264 | if (TASK_PREEMPTS_CURR(p, rq)) | 1282 | if (TASK_PREEMPTS_CURR(p, rq)) |
1265 | resched_task(rq->curr); | 1283 | resched_task(rq->curr); |
@@ -1274,7 +1292,7 @@ out: | |||
1274 | return success; | 1292 | return success; |
1275 | } | 1293 | } |
1276 | 1294 | ||
1277 | int fastcall wake_up_process(task_t * p) | 1295 | int fastcall wake_up_process(task_t *p) |
1278 | { | 1296 | { |
1279 | return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | | 1297 | return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | |
1280 | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); | 1298 | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); |
@@ -1353,7 +1371,7 @@ void fastcall sched_fork(task_t *p, int clone_flags) | |||
1353 | * that must be done for every newly created context, then puts the task | 1371 | * that must be done for every newly created context, then puts the task |
1354 | * on the runqueue and wakes it. | 1372 | * on the runqueue and wakes it. |
1355 | */ | 1373 | */ |
1356 | void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) | 1374 | void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) |
1357 | { | 1375 | { |
1358 | unsigned long flags; | 1376 | unsigned long flags; |
1359 | int this_cpu, cpu; | 1377 | int this_cpu, cpu; |
@@ -1436,7 +1454,7 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) | |||
1436 | * artificially, because any timeslice recovered here | 1454 | * artificially, because any timeslice recovered here |
1437 | * was given away by the parent in the first place.) | 1455 | * was given away by the parent in the first place.) |
1438 | */ | 1456 | */ |
1439 | void fastcall sched_exit(task_t * p) | 1457 | void fastcall sched_exit(task_t *p) |
1440 | { | 1458 | { |
1441 | unsigned long flags; | 1459 | unsigned long flags; |
1442 | runqueue_t *rq; | 1460 | runqueue_t *rq; |
@@ -1511,6 +1529,10 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev) | |||
1511 | * Manfred Spraul <manfred@colorfullife.com> | 1529 | * Manfred Spraul <manfred@colorfullife.com> |
1512 | */ | 1530 | */ |
1513 | prev_task_flags = prev->flags; | 1531 | prev_task_flags = prev->flags; |
1532 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
1533 | /* this is a valid case when another task releases the spinlock */ | ||
1534 | rq->lock.owner = current; | ||
1535 | #endif | ||
1514 | finish_arch_switch(prev); | 1536 | finish_arch_switch(prev); |
1515 | finish_lock_switch(rq, prev); | 1537 | finish_lock_switch(rq, prev); |
1516 | if (mm) | 1538 | if (mm) |
@@ -1753,7 +1775,8 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
1753 | */ | 1775 | */ |
1754 | static inline | 1776 | static inline |
1755 | int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, | 1777 | int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, |
1756 | struct sched_domain *sd, enum idle_type idle, int *all_pinned) | 1778 | struct sched_domain *sd, enum idle_type idle, |
1779 | int *all_pinned) | ||
1757 | { | 1780 | { |
1758 | /* | 1781 | /* |
1759 | * We do not migrate tasks that are: | 1782 | * We do not migrate tasks that are: |
@@ -1883,10 +1906,11 @@ out: | |||
1883 | */ | 1906 | */ |
1884 | static struct sched_group * | 1907 | static struct sched_group * |
1885 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 1908 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
1886 | unsigned long *imbalance, enum idle_type idle) | 1909 | unsigned long *imbalance, enum idle_type idle, int *sd_idle) |
1887 | { | 1910 | { |
1888 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 1911 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
1889 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 1912 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
1913 | unsigned long max_pull; | ||
1890 | int load_idx; | 1914 | int load_idx; |
1891 | 1915 | ||
1892 | max_load = this_load = total_load = total_pwr = 0; | 1916 | max_load = this_load = total_load = total_pwr = 0; |
@@ -1908,6 +1932,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
1908 | avg_load = 0; | 1932 | avg_load = 0; |
1909 | 1933 | ||
1910 | for_each_cpu_mask(i, group->cpumask) { | 1934 | for_each_cpu_mask(i, group->cpumask) { |
1935 | if (*sd_idle && !idle_cpu(i)) | ||
1936 | *sd_idle = 0; | ||
1937 | |||
1911 | /* Bias balancing toward cpus of our domain */ | 1938 | /* Bias balancing toward cpus of our domain */ |
1912 | if (local_group) | 1939 | if (local_group) |
1913 | load = target_load(i, load_idx); | 1940 | load = target_load(i, load_idx); |
@@ -1933,7 +1960,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
1933 | group = group->next; | 1960 | group = group->next; |
1934 | } while (group != sd->groups); | 1961 | } while (group != sd->groups); |
1935 | 1962 | ||
1936 | if (!busiest || this_load >= max_load) | 1963 | if (!busiest || this_load >= max_load || max_load <= SCHED_LOAD_SCALE) |
1937 | goto out_balanced; | 1964 | goto out_balanced; |
1938 | 1965 | ||
1939 | avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; | 1966 | avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; |
@@ -1953,8 +1980,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
1953 | * by pulling tasks to us. Be careful of negative numbers as they'll | 1980 | * by pulling tasks to us. Be careful of negative numbers as they'll |
1954 | * appear as very large values with unsigned longs. | 1981 | * appear as very large values with unsigned longs. |
1955 | */ | 1982 | */ |
1983 | |||
1984 | /* Don't want to pull so many tasks that a group would go idle */ | ||
1985 | max_pull = min(max_load - avg_load, max_load - SCHED_LOAD_SCALE); | ||
1986 | |||
1956 | /* How much load to actually move to equalise the imbalance */ | 1987 | /* How much load to actually move to equalise the imbalance */ |
1957 | *imbalance = min((max_load - avg_load) * busiest->cpu_power, | 1988 | *imbalance = min(max_pull * busiest->cpu_power, |
1958 | (avg_load - this_load) * this->cpu_power) | 1989 | (avg_load - this_load) * this->cpu_power) |
1959 | / SCHED_LOAD_SCALE; | 1990 | / SCHED_LOAD_SCALE; |
1960 | 1991 | ||
@@ -2051,11 +2082,14 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
2051 | unsigned long imbalance; | 2082 | unsigned long imbalance; |
2052 | int nr_moved, all_pinned = 0; | 2083 | int nr_moved, all_pinned = 0; |
2053 | int active_balance = 0; | 2084 | int active_balance = 0; |
2085 | int sd_idle = 0; | ||
2086 | |||
2087 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER) | ||
2088 | sd_idle = 1; | ||
2054 | 2089 | ||
2055 | spin_lock(&this_rq->lock); | ||
2056 | schedstat_inc(sd, lb_cnt[idle]); | 2090 | schedstat_inc(sd, lb_cnt[idle]); |
2057 | 2091 | ||
2058 | group = find_busiest_group(sd, this_cpu, &imbalance, idle); | 2092 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle); |
2059 | if (!group) { | 2093 | if (!group) { |
2060 | schedstat_inc(sd, lb_nobusyg[idle]); | 2094 | schedstat_inc(sd, lb_nobusyg[idle]); |
2061 | goto out_balanced; | 2095 | goto out_balanced; |
@@ -2079,19 +2113,16 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
2079 | * still unbalanced. nr_moved simply stays zero, so it is | 2113 | * still unbalanced. nr_moved simply stays zero, so it is |
2080 | * correctly treated as an imbalance. | 2114 | * correctly treated as an imbalance. |
2081 | */ | 2115 | */ |
2082 | double_lock_balance(this_rq, busiest); | 2116 | double_rq_lock(this_rq, busiest); |
2083 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2117 | nr_moved = move_tasks(this_rq, this_cpu, busiest, |
2084 | imbalance, sd, idle, | 2118 | imbalance, sd, idle, &all_pinned); |
2085 | &all_pinned); | 2119 | double_rq_unlock(this_rq, busiest); |
2086 | spin_unlock(&busiest->lock); | ||
2087 | 2120 | ||
2088 | /* All tasks on this runqueue were pinned by CPU affinity */ | 2121 | /* All tasks on this runqueue were pinned by CPU affinity */ |
2089 | if (unlikely(all_pinned)) | 2122 | if (unlikely(all_pinned)) |
2090 | goto out_balanced; | 2123 | goto out_balanced; |
2091 | } | 2124 | } |
2092 | 2125 | ||
2093 | spin_unlock(&this_rq->lock); | ||
2094 | |||
2095 | if (!nr_moved) { | 2126 | if (!nr_moved) { |
2096 | schedstat_inc(sd, lb_failed[idle]); | 2127 | schedstat_inc(sd, lb_failed[idle]); |
2097 | sd->nr_balance_failed++; | 2128 | sd->nr_balance_failed++; |
@@ -2099,6 +2130,16 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
2099 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { | 2130 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { |
2100 | 2131 | ||
2101 | spin_lock(&busiest->lock); | 2132 | spin_lock(&busiest->lock); |
2133 | |||
2134 | /* don't kick the migration_thread, if the curr | ||
2135 | * task on busiest cpu can't be moved to this_cpu | ||
2136 | */ | ||
2137 | if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { | ||
2138 | spin_unlock(&busiest->lock); | ||
2139 | all_pinned = 1; | ||
2140 | goto out_one_pinned; | ||
2141 | } | ||
2142 | |||
2102 | if (!busiest->active_balance) { | 2143 | if (!busiest->active_balance) { |
2103 | busiest->active_balance = 1; | 2144 | busiest->active_balance = 1; |
2104 | busiest->push_cpu = this_cpu; | 2145 | busiest->push_cpu = this_cpu; |
@@ -2131,19 +2172,23 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
2131 | sd->balance_interval *= 2; | 2172 | sd->balance_interval *= 2; |
2132 | } | 2173 | } |
2133 | 2174 | ||
2175 | if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER) | ||
2176 | return -1; | ||
2134 | return nr_moved; | 2177 | return nr_moved; |
2135 | 2178 | ||
2136 | out_balanced: | 2179 | out_balanced: |
2137 | spin_unlock(&this_rq->lock); | ||
2138 | |||
2139 | schedstat_inc(sd, lb_balanced[idle]); | 2180 | schedstat_inc(sd, lb_balanced[idle]); |
2140 | 2181 | ||
2141 | sd->nr_balance_failed = 0; | 2182 | sd->nr_balance_failed = 0; |
2183 | |||
2184 | out_one_pinned: | ||
2142 | /* tune up the balancing interval */ | 2185 | /* tune up the balancing interval */ |
2143 | if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) || | 2186 | if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) || |
2144 | (sd->balance_interval < sd->max_interval)) | 2187 | (sd->balance_interval < sd->max_interval)) |
2145 | sd->balance_interval *= 2; | 2188 | sd->balance_interval *= 2; |
2146 | 2189 | ||
2190 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) | ||
2191 | return -1; | ||
2147 | return 0; | 2192 | return 0; |
2148 | } | 2193 | } |
2149 | 2194 | ||
@@ -2161,9 +2206,13 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
2161 | runqueue_t *busiest = NULL; | 2206 | runqueue_t *busiest = NULL; |
2162 | unsigned long imbalance; | 2207 | unsigned long imbalance; |
2163 | int nr_moved = 0; | 2208 | int nr_moved = 0; |
2209 | int sd_idle = 0; | ||
2210 | |||
2211 | if (sd->flags & SD_SHARE_CPUPOWER) | ||
2212 | sd_idle = 1; | ||
2164 | 2213 | ||
2165 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2214 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
2166 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE); | 2215 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle); |
2167 | if (!group) { | 2216 | if (!group) { |
2168 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); | 2217 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); |
2169 | goto out_balanced; | 2218 | goto out_balanced; |
@@ -2177,22 +2226,30 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
2177 | 2226 | ||
2178 | BUG_ON(busiest == this_rq); | 2227 | BUG_ON(busiest == this_rq); |
2179 | 2228 | ||
2180 | /* Attempt to move tasks */ | ||
2181 | double_lock_balance(this_rq, busiest); | ||
2182 | |||
2183 | schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance); | 2229 | schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance); |
2184 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2230 | |
2231 | nr_moved = 0; | ||
2232 | if (busiest->nr_running > 1) { | ||
2233 | /* Attempt to move tasks */ | ||
2234 | double_lock_balance(this_rq, busiest); | ||
2235 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | ||
2185 | imbalance, sd, NEWLY_IDLE, NULL); | 2236 | imbalance, sd, NEWLY_IDLE, NULL); |
2186 | if (!nr_moved) | 2237 | spin_unlock(&busiest->lock); |
2238 | } | ||
2239 | |||
2240 | if (!nr_moved) { | ||
2187 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); | 2241 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); |
2188 | else | 2242 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) |
2243 | return -1; | ||
2244 | } else | ||
2189 | sd->nr_balance_failed = 0; | 2245 | sd->nr_balance_failed = 0; |
2190 | 2246 | ||
2191 | spin_unlock(&busiest->lock); | ||
2192 | return nr_moved; | 2247 | return nr_moved; |
2193 | 2248 | ||
2194 | out_balanced: | 2249 | out_balanced: |
2195 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); | 2250 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); |
2251 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) | ||
2252 | return -1; | ||
2196 | sd->nr_balance_failed = 0; | 2253 | sd->nr_balance_failed = 0; |
2197 | return 0; | 2254 | return 0; |
2198 | } | 2255 | } |
@@ -2317,7 +2374,11 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, | |||
2317 | 2374 | ||
2318 | if (j - sd->last_balance >= interval) { | 2375 | if (j - sd->last_balance >= interval) { |
2319 | if (load_balance(this_cpu, this_rq, sd, idle)) { | 2376 | if (load_balance(this_cpu, this_rq, sd, idle)) { |
2320 | /* We've pulled tasks over so no longer idle */ | 2377 | /* |
2378 | * We've pulled tasks over so either we're no | ||
2379 | * longer idle, or one of our SMT siblings is | ||
2380 | * not idle. | ||
2381 | */ | ||
2321 | idle = NOT_IDLE; | 2382 | idle = NOT_IDLE; |
2322 | } | 2383 | } |
2323 | sd->last_balance += interval; | 2384 | sd->last_balance += interval; |
@@ -2576,6 +2637,13 @@ out: | |||
2576 | } | 2637 | } |
2577 | 2638 | ||
2578 | #ifdef CONFIG_SCHED_SMT | 2639 | #ifdef CONFIG_SCHED_SMT |
2640 | static inline void wakeup_busy_runqueue(runqueue_t *rq) | ||
2641 | { | ||
2642 | /* If an SMT runqueue is sleeping due to priority reasons wake it up */ | ||
2643 | if (rq->curr == rq->idle && rq->nr_running) | ||
2644 | resched_task(rq->idle); | ||
2645 | } | ||
2646 | |||
2579 | static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) | 2647 | static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) |
2580 | { | 2648 | { |
2581 | struct sched_domain *tmp, *sd = NULL; | 2649 | struct sched_domain *tmp, *sd = NULL; |
@@ -2609,12 +2677,7 @@ static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) | |||
2609 | for_each_cpu_mask(i, sibling_map) { | 2677 | for_each_cpu_mask(i, sibling_map) { |
2610 | runqueue_t *smt_rq = cpu_rq(i); | 2678 | runqueue_t *smt_rq = cpu_rq(i); |
2611 | 2679 | ||
2612 | /* | 2680 | wakeup_busy_runqueue(smt_rq); |
2613 | * If an SMT sibling task is sleeping due to priority | ||
2614 | * reasons wake it up now. | ||
2615 | */ | ||
2616 | if (smt_rq->curr == smt_rq->idle && smt_rq->nr_running) | ||
2617 | resched_task(smt_rq->idle); | ||
2618 | } | 2681 | } |
2619 | 2682 | ||
2620 | for_each_cpu_mask(i, sibling_map) | 2683 | for_each_cpu_mask(i, sibling_map) |
@@ -2625,6 +2688,16 @@ static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) | |||
2625 | */ | 2688 | */ |
2626 | } | 2689 | } |
2627 | 2690 | ||
2691 | /* | ||
2692 | * number of 'lost' timeslices this task wont be able to fully | ||
2693 | * utilize, if another task runs on a sibling. This models the | ||
2694 | * slowdown effect of other tasks running on siblings: | ||
2695 | */ | ||
2696 | static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) | ||
2697 | { | ||
2698 | return p->time_slice * (100 - sd->per_cpu_gain) / 100; | ||
2699 | } | ||
2700 | |||
2628 | static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) | 2701 | static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) |
2629 | { | 2702 | { |
2630 | struct sched_domain *tmp, *sd = NULL; | 2703 | struct sched_domain *tmp, *sd = NULL; |
@@ -2668,6 +2741,10 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) | |||
2668 | runqueue_t *smt_rq = cpu_rq(i); | 2741 | runqueue_t *smt_rq = cpu_rq(i); |
2669 | task_t *smt_curr = smt_rq->curr; | 2742 | task_t *smt_curr = smt_rq->curr; |
2670 | 2743 | ||
2744 | /* Kernel threads do not participate in dependent sleeping */ | ||
2745 | if (!p->mm || !smt_curr->mm || rt_task(p)) | ||
2746 | goto check_smt_task; | ||
2747 | |||
2671 | /* | 2748 | /* |
2672 | * If a user task with lower static priority than the | 2749 | * If a user task with lower static priority than the |
2673 | * running task on the SMT sibling is trying to schedule, | 2750 | * running task on the SMT sibling is trying to schedule, |
@@ -2676,21 +2753,45 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) | |||
2676 | * task from using an unfair proportion of the | 2753 | * task from using an unfair proportion of the |
2677 | * physical cpu's resources. -ck | 2754 | * physical cpu's resources. -ck |
2678 | */ | 2755 | */ |
2679 | if (((smt_curr->time_slice * (100 - sd->per_cpu_gain) / 100) > | 2756 | if (rt_task(smt_curr)) { |
2680 | task_timeslice(p) || rt_task(smt_curr)) && | 2757 | /* |
2681 | p->mm && smt_curr->mm && !rt_task(p)) | 2758 | * With real time tasks we run non-rt tasks only |
2682 | ret = 1; | 2759 | * per_cpu_gain% of the time. |
2760 | */ | ||
2761 | if ((jiffies % DEF_TIMESLICE) > | ||
2762 | (sd->per_cpu_gain * DEF_TIMESLICE / 100)) | ||
2763 | ret = 1; | ||
2764 | } else | ||
2765 | if (smt_curr->static_prio < p->static_prio && | ||
2766 | !TASK_PREEMPTS_CURR(p, smt_rq) && | ||
2767 | smt_slice(smt_curr, sd) > task_timeslice(p)) | ||
2768 | ret = 1; | ||
2769 | |||
2770 | check_smt_task: | ||
2771 | if ((!smt_curr->mm && smt_curr != smt_rq->idle) || | ||
2772 | rt_task(smt_curr)) | ||
2773 | continue; | ||
2774 | if (!p->mm) { | ||
2775 | wakeup_busy_runqueue(smt_rq); | ||
2776 | continue; | ||
2777 | } | ||
2683 | 2778 | ||
2684 | /* | 2779 | /* |
2685 | * Reschedule a lower priority task on the SMT sibling, | 2780 | * Reschedule a lower priority task on the SMT sibling for |
2686 | * or wake it up if it has been put to sleep for priority | 2781 | * it to be put to sleep, or wake it up if it has been put to |
2687 | * reasons. | 2782 | * sleep for priority reasons to see if it should run now. |
2688 | */ | 2783 | */ |
2689 | if ((((p->time_slice * (100 - sd->per_cpu_gain) / 100) > | 2784 | if (rt_task(p)) { |
2690 | task_timeslice(smt_curr) || rt_task(p)) && | 2785 | if ((jiffies % DEF_TIMESLICE) > |
2691 | smt_curr->mm && p->mm && !rt_task(smt_curr)) || | 2786 | (sd->per_cpu_gain * DEF_TIMESLICE / 100)) |
2692 | (smt_curr == smt_rq->idle && smt_rq->nr_running)) | 2787 | resched_task(smt_curr); |
2693 | resched_task(smt_curr); | 2788 | } else { |
2789 | if (TASK_PREEMPTS_CURR(p, smt_rq) && | ||
2790 | smt_slice(p, sd) > task_timeslice(smt_curr)) | ||
2791 | resched_task(smt_curr); | ||
2792 | else | ||
2793 | wakeup_busy_runqueue(smt_rq); | ||
2794 | } | ||
2694 | } | 2795 | } |
2695 | out_unlock: | 2796 | out_unlock: |
2696 | for_each_cpu_mask(i, sibling_map) | 2797 | for_each_cpu_mask(i, sibling_map) |
@@ -2888,6 +2989,7 @@ switch_tasks: | |||
2888 | if (next == rq->idle) | 2989 | if (next == rq->idle) |
2889 | schedstat_inc(rq, sched_goidle); | 2990 | schedstat_inc(rq, sched_goidle); |
2890 | prefetch(next); | 2991 | prefetch(next); |
2992 | prefetch_stack(next); | ||
2891 | clear_tsk_need_resched(prev); | 2993 | clear_tsk_need_resched(prev); |
2892 | rcu_qsctr_inc(task_cpu(prev)); | 2994 | rcu_qsctr_inc(task_cpu(prev)); |
2893 | 2995 | ||
@@ -3015,7 +3117,8 @@ need_resched: | |||
3015 | 3117 | ||
3016 | #endif /* CONFIG_PREEMPT */ | 3118 | #endif /* CONFIG_PREEMPT */ |
3017 | 3119 | ||
3018 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key) | 3120 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, |
3121 | void *key) | ||
3019 | { | 3122 | { |
3020 | task_t *p = curr->private; | 3123 | task_t *p = curr->private; |
3021 | return try_to_wake_up(p, mode, sync); | 3124 | return try_to_wake_up(p, mode, sync); |
@@ -3057,7 +3160,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | |||
3057 | * @key: is directly passed to the wakeup function | 3160 | * @key: is directly passed to the wakeup function |
3058 | */ | 3161 | */ |
3059 | void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, | 3162 | void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, |
3060 | int nr_exclusive, void *key) | 3163 | int nr_exclusive, void *key) |
3061 | { | 3164 | { |
3062 | unsigned long flags; | 3165 | unsigned long flags; |
3063 | 3166 | ||
@@ -3089,7 +3192,8 @@ void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode) | |||
3089 | * | 3192 | * |
3090 | * On UP it can prevent extra preemption. | 3193 | * On UP it can prevent extra preemption. |
3091 | */ | 3194 | */ |
3092 | void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) | 3195 | void fastcall |
3196 | __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) | ||
3093 | { | 3197 | { |
3094 | unsigned long flags; | 3198 | unsigned long flags; |
3095 | int sync = 1; | 3199 | int sync = 1; |
@@ -3280,7 +3384,8 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q) | |||
3280 | 3384 | ||
3281 | EXPORT_SYMBOL(interruptible_sleep_on); | 3385 | EXPORT_SYMBOL(interruptible_sleep_on); |
3282 | 3386 | ||
3283 | long fastcall __sched interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) | 3387 | long fastcall __sched |
3388 | interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) | ||
3284 | { | 3389 | { |
3285 | SLEEP_ON_VAR | 3390 | SLEEP_ON_VAR |
3286 | 3391 | ||
@@ -3499,7 +3604,8 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) | |||
3499 | * @policy: new policy. | 3604 | * @policy: new policy. |
3500 | * @param: structure containing the new RT priority. | 3605 | * @param: structure containing the new RT priority. |
3501 | */ | 3606 | */ |
3502 | int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param) | 3607 | int sched_setscheduler(struct task_struct *p, int policy, |
3608 | struct sched_param *param) | ||
3503 | { | 3609 | { |
3504 | int retval; | 3610 | int retval; |
3505 | int oldprio, oldpolicy = -1; | 3611 | int oldprio, oldpolicy = -1; |
@@ -3519,7 +3625,7 @@ recheck: | |||
3519 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. | 3625 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. |
3520 | */ | 3626 | */ |
3521 | if (param->sched_priority < 0 || | 3627 | if (param->sched_priority < 0 || |
3522 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || | 3628 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || |
3523 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) | 3629 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) |
3524 | return -EINVAL; | 3630 | return -EINVAL; |
3525 | if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) | 3631 | if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) |
@@ -3582,7 +3688,8 @@ recheck: | |||
3582 | } | 3688 | } |
3583 | EXPORT_SYMBOL_GPL(sched_setscheduler); | 3689 | EXPORT_SYMBOL_GPL(sched_setscheduler); |
3584 | 3690 | ||
3585 | static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | 3691 | static int |
3692 | do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | ||
3586 | { | 3693 | { |
3587 | int retval; | 3694 | int retval; |
3588 | struct sched_param lparam; | 3695 | struct sched_param lparam; |
@@ -3849,7 +3956,7 @@ asmlinkage long sys_sched_yield(void) | |||
3849 | if (rt_task(current)) | 3956 | if (rt_task(current)) |
3850 | target = rq->active; | 3957 | target = rq->active; |
3851 | 3958 | ||
3852 | if (current->array->nr_active == 1) { | 3959 | if (array->nr_active == 1) { |
3853 | schedstat_inc(rq, yld_act_empty); | 3960 | schedstat_inc(rq, yld_act_empty); |
3854 | if (!rq->expired->nr_active) | 3961 | if (!rq->expired->nr_active) |
3855 | schedstat_inc(rq, yld_both_empty); | 3962 | schedstat_inc(rq, yld_both_empty); |
@@ -3913,7 +4020,7 @@ EXPORT_SYMBOL(cond_resched); | |||
3913 | * operations here to prevent schedule() from being called twice (once via | 4020 | * operations here to prevent schedule() from being called twice (once via |
3914 | * spin_unlock(), once by hand). | 4021 | * spin_unlock(), once by hand). |
3915 | */ | 4022 | */ |
3916 | int cond_resched_lock(spinlock_t * lock) | 4023 | int cond_resched_lock(spinlock_t *lock) |
3917 | { | 4024 | { |
3918 | int ret = 0; | 4025 | int ret = 0; |
3919 | 4026 | ||
@@ -4096,7 +4203,7 @@ static inline struct task_struct *younger_sibling(struct task_struct *p) | |||
4096 | return list_entry(p->sibling.next,struct task_struct,sibling); | 4203 | return list_entry(p->sibling.next,struct task_struct,sibling); |
4097 | } | 4204 | } |
4098 | 4205 | ||
4099 | static void show_task(task_t * p) | 4206 | static void show_task(task_t *p) |
4100 | { | 4207 | { |
4101 | task_t *relative; | 4208 | task_t *relative; |
4102 | unsigned state; | 4209 | unsigned state; |
@@ -4122,7 +4229,7 @@ static void show_task(task_t * p) | |||
4122 | #endif | 4229 | #endif |
4123 | #ifdef CONFIG_DEBUG_STACK_USAGE | 4230 | #ifdef CONFIG_DEBUG_STACK_USAGE |
4124 | { | 4231 | { |
4125 | unsigned long * n = (unsigned long *) (p->thread_info+1); | 4232 | unsigned long *n = (unsigned long *) (p->thread_info+1); |
4126 | while (!*n) | 4233 | while (!*n) |
4127 | n++; | 4234 | n++; |
4128 | free = (unsigned long) n - (unsigned long)(p->thread_info+1); | 4235 | free = (unsigned long) n - (unsigned long)(p->thread_info+1); |
@@ -4331,7 +4438,7 @@ out: | |||
4331 | * thread migration by bumping thread off CPU then 'pushing' onto | 4438 | * thread migration by bumping thread off CPU then 'pushing' onto |
4332 | * another runqueue. | 4439 | * another runqueue. |
4333 | */ | 4440 | */ |
4334 | static int migration_thread(void * data) | 4441 | static int migration_thread(void *data) |
4335 | { | 4442 | { |
4336 | runqueue_t *rq; | 4443 | runqueue_t *rq; |
4337 | int cpu = (long)data; | 4444 | int cpu = (long)data; |
diff --git a/kernel/signal.c b/kernel/signal.c index 4980a073237f..b92c3c9f8b9a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -2221,8 +2221,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese, | |||
2221 | recalc_sigpending(); | 2221 | recalc_sigpending(); |
2222 | spin_unlock_irq(¤t->sighand->siglock); | 2222 | spin_unlock_irq(¤t->sighand->siglock); |
2223 | 2223 | ||
2224 | current->state = TASK_INTERRUPTIBLE; | 2224 | timeout = schedule_timeout_interruptible(timeout); |
2225 | timeout = schedule_timeout(timeout); | ||
2226 | 2225 | ||
2227 | try_to_freeze(); | 2226 | try_to_freeze(); |
2228 | spin_lock_irq(¤t->sighand->siglock); | 2227 | spin_lock_irq(¤t->sighand->siglock); |
diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 0c3f9d8bbe17..0375fcd5921d 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c | |||
@@ -3,7 +3,10 @@ | |||
3 | * | 3 | * |
4 | * Author: Zwane Mwaikambo <zwane@fsmlabs.com> | 4 | * Author: Zwane Mwaikambo <zwane@fsmlabs.com> |
5 | * | 5 | * |
6 | * Copyright (2004) Ingo Molnar | 6 | * Copyright (2004, 2005) Ingo Molnar |
7 | * | ||
8 | * This file contains the spinlock/rwlock implementations for the | ||
9 | * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them) | ||
7 | */ | 10 | */ |
8 | 11 | ||
9 | #include <linux/config.h> | 12 | #include <linux/config.h> |
@@ -17,12 +20,12 @@ | |||
17 | * Generic declaration of the raw read_trylock() function, | 20 | * Generic declaration of the raw read_trylock() function, |
18 | * architectures are supposed to optimize this: | 21 | * architectures are supposed to optimize this: |
19 | */ | 22 | */ |
20 | int __lockfunc generic_raw_read_trylock(rwlock_t *lock) | 23 | int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock) |
21 | { | 24 | { |
22 | _raw_read_lock(lock); | 25 | __raw_read_lock(lock); |
23 | return 1; | 26 | return 1; |
24 | } | 27 | } |
25 | EXPORT_SYMBOL(generic_raw_read_trylock); | 28 | EXPORT_SYMBOL(generic__raw_read_trylock); |
26 | 29 | ||
27 | int __lockfunc _spin_trylock(spinlock_t *lock) | 30 | int __lockfunc _spin_trylock(spinlock_t *lock) |
28 | { | 31 | { |
@@ -57,7 +60,7 @@ int __lockfunc _write_trylock(rwlock_t *lock) | |||
57 | } | 60 | } |
58 | EXPORT_SYMBOL(_write_trylock); | 61 | EXPORT_SYMBOL(_write_trylock); |
59 | 62 | ||
60 | #ifndef CONFIG_PREEMPT | 63 | #if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) |
61 | 64 | ||
62 | void __lockfunc _read_lock(rwlock_t *lock) | 65 | void __lockfunc _read_lock(rwlock_t *lock) |
63 | { | 66 | { |
@@ -72,7 +75,7 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) | |||
72 | 75 | ||
73 | local_irq_save(flags); | 76 | local_irq_save(flags); |
74 | preempt_disable(); | 77 | preempt_disable(); |
75 | _raw_spin_lock_flags(lock, flags); | 78 | _raw_spin_lock_flags(lock, &flags); |
76 | return flags; | 79 | return flags; |
77 | } | 80 | } |
78 | EXPORT_SYMBOL(_spin_lock_irqsave); | 81 | EXPORT_SYMBOL(_spin_lock_irqsave); |
diff --git a/kernel/timer.c b/kernel/timer.c index 13e2b513be01..f4152fcd9f8e 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -1154,6 +1154,20 @@ fastcall signed long __sched schedule_timeout(signed long timeout) | |||
1154 | 1154 | ||
1155 | EXPORT_SYMBOL(schedule_timeout); | 1155 | EXPORT_SYMBOL(schedule_timeout); |
1156 | 1156 | ||
1157 | signed long __sched schedule_timeout_interruptible(signed long timeout) | ||
1158 | { | ||
1159 | set_current_state(TASK_INTERRUPTIBLE); | ||
1160 | return schedule_timeout(timeout); | ||
1161 | } | ||
1162 | EXPORT_SYMBOL(schedule_timeout_interruptible); | ||
1163 | |||
1164 | signed long __sched schedule_timeout_uninterruptible(signed long timeout) | ||
1165 | { | ||
1166 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1167 | return schedule_timeout(timeout); | ||
1168 | } | ||
1169 | EXPORT_SYMBOL(schedule_timeout_uninterruptible); | ||
1170 | |||
1157 | /* Thread ID - the internal kernel "pid" */ | 1171 | /* Thread ID - the internal kernel "pid" */ |
1158 | asmlinkage long sys_gettid(void) | 1172 | asmlinkage long sys_gettid(void) |
1159 | { | 1173 | { |
@@ -1170,8 +1184,7 @@ static long __sched nanosleep_restart(struct restart_block *restart) | |||
1170 | if (!time_after(expire, now)) | 1184 | if (!time_after(expire, now)) |
1171 | return 0; | 1185 | return 0; |
1172 | 1186 | ||
1173 | current->state = TASK_INTERRUPTIBLE; | 1187 | expire = schedule_timeout_interruptible(expire - now); |
1174 | expire = schedule_timeout(expire - now); | ||
1175 | 1188 | ||
1176 | ret = 0; | 1189 | ret = 0; |
1177 | if (expire) { | 1190 | if (expire) { |
@@ -1199,8 +1212,7 @@ asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __us | |||
1199 | return -EINVAL; | 1212 | return -EINVAL; |
1200 | 1213 | ||
1201 | expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); | 1214 | expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); |
1202 | current->state = TASK_INTERRUPTIBLE; | 1215 | expire = schedule_timeout_interruptible(expire); |
1203 | expire = schedule_timeout(expire); | ||
1204 | 1216 | ||
1205 | ret = 0; | 1217 | ret = 0; |
1206 | if (expire) { | 1218 | if (expire) { |
@@ -1598,10 +1610,8 @@ void msleep(unsigned int msecs) | |||
1598 | { | 1610 | { |
1599 | unsigned long timeout = msecs_to_jiffies(msecs) + 1; | 1611 | unsigned long timeout = msecs_to_jiffies(msecs) + 1; |
1600 | 1612 | ||
1601 | while (timeout) { | 1613 | while (timeout) |
1602 | set_current_state(TASK_UNINTERRUPTIBLE); | 1614 | timeout = schedule_timeout_uninterruptible(timeout); |
1603 | timeout = schedule_timeout(timeout); | ||
1604 | } | ||
1605 | } | 1615 | } |
1606 | 1616 | ||
1607 | EXPORT_SYMBOL(msleep); | 1617 | EXPORT_SYMBOL(msleep); |
@@ -1614,10 +1624,8 @@ unsigned long msleep_interruptible(unsigned int msecs) | |||
1614 | { | 1624 | { |
1615 | unsigned long timeout = msecs_to_jiffies(msecs) + 1; | 1625 | unsigned long timeout = msecs_to_jiffies(msecs) + 1; |
1616 | 1626 | ||
1617 | while (timeout && !signal_pending(current)) { | 1627 | while (timeout && !signal_pending(current)) |
1618 | set_current_state(TASK_INTERRUPTIBLE); | 1628 | timeout = schedule_timeout_interruptible(timeout); |
1619 | timeout = schedule_timeout(timeout); | ||
1620 | } | ||
1621 | return jiffies_to_msecs(timeout); | 1629 | return jiffies_to_msecs(timeout); |
1622 | } | 1630 | } |
1623 | 1631 | ||