diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 1 | ||||
| -rw-r--r-- | kernel/acct.c | 43 | ||||
| -rw-r--r-- | kernel/compat.c | 9 | ||||
| -rw-r--r-- | kernel/cpuset.c | 104 | ||||
| -rw-r--r-- | kernel/exit.c | 26 | ||||
| -rw-r--r-- | kernel/fork.c | 101 | ||||
| -rw-r--r-- | kernel/rcupdate.c | 14 | ||||
| -rw-r--r-- | kernel/sched.c | 233 | ||||
| -rw-r--r-- | kernel/signal.c | 3 | ||||
| -rw-r--r-- | kernel/spinlock.c | 15 | ||||
| -rw-r--r-- | kernel/timer.c | 32 |
11 files changed, 392 insertions, 189 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 8d57a2f1226b..ff4dc02ce170 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -12,6 +12,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
| 12 | obj-$(CONFIG_FUTEX) += futex.o | 12 | obj-$(CONFIG_FUTEX) += futex.o |
| 13 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o | 13 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o |
| 14 | obj-$(CONFIG_SMP) += cpu.o spinlock.o | 14 | obj-$(CONFIG_SMP) += cpu.o spinlock.o |
| 15 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | ||
| 15 | obj-$(CONFIG_UID16) += uid16.o | 16 | obj-$(CONFIG_UID16) += uid16.o |
| 16 | obj-$(CONFIG_MODULES) += module.o | 17 | obj-$(CONFIG_MODULES) += module.o |
| 17 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 18 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
diff --git a/kernel/acct.c b/kernel/acct.c index f70e6027cca9..b756f527497e 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
| @@ -165,7 +165,7 @@ out: | |||
| 165 | } | 165 | } |
| 166 | 166 | ||
| 167 | /* | 167 | /* |
| 168 | * Close the old accouting file (if currently open) and then replace | 168 | * Close the old accounting file (if currently open) and then replace |
| 169 | * it with file (if non-NULL). | 169 | * it with file (if non-NULL). |
| 170 | * | 170 | * |
| 171 | * NOTE: acct_globals.lock MUST be held on entry and exit. | 171 | * NOTE: acct_globals.lock MUST be held on entry and exit. |
| @@ -199,11 +199,16 @@ static void acct_file_reopen(struct file *file) | |||
| 199 | } | 199 | } |
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | /* | 202 | /** |
| 203 | * sys_acct() is the only system call needed to implement process | 203 | * sys_acct - enable/disable process accounting |
| 204 | * accounting. It takes the name of the file where accounting records | 204 | * @name: file name for accounting records or NULL to shutdown accounting |
| 205 | * should be written. If the filename is NULL, accounting will be | 205 | * |
| 206 | * shutdown. | 206 | * Returns 0 for success or negative errno values for failure. |
| 207 | * | ||
| 208 | * sys_acct() is the only system call needed to implement process | ||
| 209 | * accounting. It takes the name of the file where accounting records | ||
| 210 | * should be written. If the filename is NULL, accounting will be | ||
| 211 | * shutdown. | ||
| 207 | */ | 212 | */ |
| 208 | asmlinkage long sys_acct(const char __user *name) | 213 | asmlinkage long sys_acct(const char __user *name) |
| 209 | { | 214 | { |
| @@ -250,9 +255,12 @@ asmlinkage long sys_acct(const char __user *name) | |||
| 250 | return (0); | 255 | return (0); |
| 251 | } | 256 | } |
| 252 | 257 | ||
| 253 | /* | 258 | /** |
| 254 | * If the accouting is turned on for a file in the filesystem pointed | 259 | * acct_auto_close - turn off a filesystem's accounting if it is on |
| 255 | * to by sb, turn accouting off. | 260 | * @sb: super block for the filesystem |
| 261 | * | ||
| 262 | * If the accounting is turned on for a file in the filesystem pointed | ||
| 263 | * to by sb, turn accounting off. | ||
| 256 | */ | 264 | */ |
| 257 | void acct_auto_close(struct super_block *sb) | 265 | void acct_auto_close(struct super_block *sb) |
| 258 | { | 266 | { |
| @@ -503,8 +511,11 @@ static void do_acct_process(long exitcode, struct file *file) | |||
| 503 | set_fs(fs); | 511 | set_fs(fs); |
| 504 | } | 512 | } |
| 505 | 513 | ||
| 506 | /* | 514 | /** |
| 507 | * acct_process - now just a wrapper around do_acct_process | 515 | * acct_process - now just a wrapper around do_acct_process |
| 516 | * @exitcode: task exit code | ||
| 517 | * | ||
| 518 | * handles process accounting for an exiting task | ||
| 508 | */ | 519 | */ |
| 509 | void acct_process(long exitcode) | 520 | void acct_process(long exitcode) |
| 510 | { | 521 | { |
| @@ -530,9 +541,9 @@ void acct_process(long exitcode) | |||
| 530 | } | 541 | } |
| 531 | 542 | ||
| 532 | 543 | ||
| 533 | /* | 544 | /** |
| 534 | * acct_update_integrals | 545 | * acct_update_integrals - update mm integral fields in task_struct |
| 535 | * - update mm integral fields in task_struct | 546 | * @tsk: task_struct for accounting |
| 536 | */ | 547 | */ |
| 537 | void acct_update_integrals(struct task_struct *tsk) | 548 | void acct_update_integrals(struct task_struct *tsk) |
| 538 | { | 549 | { |
| @@ -547,9 +558,9 @@ void acct_update_integrals(struct task_struct *tsk) | |||
| 547 | } | 558 | } |
| 548 | } | 559 | } |
| 549 | 560 | ||
| 550 | /* | 561 | /** |
| 551 | * acct_clear_integrals | 562 | * acct_clear_integrals - clear the mm integral fields in task_struct |
| 552 | * - clear the mm integral fields in task_struct | 563 | * @tsk: task_struct whose accounting fields are cleared |
| 553 | */ | 564 | */ |
| 554 | void acct_clear_integrals(struct task_struct *tsk) | 565 | void acct_clear_integrals(struct task_struct *tsk) |
| 555 | { | 566 | { |
diff --git a/kernel/compat.c b/kernel/compat.c index ddfcaaa86623..102296e21ea8 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
| @@ -48,8 +48,7 @@ static long compat_nanosleep_restart(struct restart_block *restart) | |||
| 48 | if (!time_after(expire, now)) | 48 | if (!time_after(expire, now)) |
| 49 | return 0; | 49 | return 0; |
| 50 | 50 | ||
| 51 | current->state = TASK_INTERRUPTIBLE; | 51 | expire = schedule_timeout_interruptible(expire - now); |
| 52 | expire = schedule_timeout(expire - now); | ||
| 53 | if (expire == 0) | 52 | if (expire == 0) |
| 54 | return 0; | 53 | return 0; |
| 55 | 54 | ||
| @@ -82,8 +81,7 @@ asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp, | |||
| 82 | return -EINVAL; | 81 | return -EINVAL; |
| 83 | 82 | ||
| 84 | expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); | 83 | expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); |
| 85 | current->state = TASK_INTERRUPTIBLE; | 84 | expire = schedule_timeout_interruptible(expire); |
| 86 | expire = schedule_timeout(expire); | ||
| 87 | if (expire == 0) | 85 | if (expire == 0) |
| 88 | return 0; | 86 | return 0; |
| 89 | 87 | ||
| @@ -795,8 +793,7 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, | |||
| 795 | recalc_sigpending(); | 793 | recalc_sigpending(); |
| 796 | spin_unlock_irq(¤t->sighand->siglock); | 794 | spin_unlock_irq(¤t->sighand->siglock); |
| 797 | 795 | ||
| 798 | current->state = TASK_INTERRUPTIBLE; | 796 | timeout = schedule_timeout_interruptible(timeout); |
| 799 | timeout = schedule_timeout(timeout); | ||
| 800 | 797 | ||
| 801 | spin_lock_irq(¤t->sighand->siglock); | 798 | spin_lock_irq(¤t->sighand->siglock); |
| 802 | sig = dequeue_signal(current, &s, &info); | 799 | sig = dequeue_signal(current, &s, &info); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1f06e7690106..407b5f0a8c8e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -182,6 +182,37 @@ static struct super_block *cpuset_sb = NULL; | |||
| 182 | static DECLARE_MUTEX(cpuset_sem); | 182 | static DECLARE_MUTEX(cpuset_sem); |
| 183 | 183 | ||
| 184 | /* | 184 | /* |
| 185 | * The global cpuset semaphore cpuset_sem can be needed by the | ||
| 186 | * memory allocator to update a tasks mems_allowed (see the calls | ||
| 187 | * to cpuset_update_current_mems_allowed()) or to walk up the | ||
| 188 | * cpuset hierarchy to find a mem_exclusive cpuset see the calls | ||
| 189 | * to cpuset_excl_nodes_overlap()). | ||
| 190 | * | ||
| 191 | * But if the memory allocation is being done by cpuset.c code, it | ||
| 192 | * usually already holds cpuset_sem. Double tripping on a kernel | ||
| 193 | * semaphore deadlocks the current task, and any other task that | ||
| 194 | * subsequently tries to obtain the lock. | ||
| 195 | * | ||
| 196 | * Run all up's and down's on cpuset_sem through the following | ||
| 197 | * wrappers, which will detect this nested locking, and avoid | ||
| 198 | * deadlocking. | ||
| 199 | */ | ||
| 200 | |||
| 201 | static inline void cpuset_down(struct semaphore *psem) | ||
| 202 | { | ||
| 203 | if (current->cpuset_sem_nest_depth == 0) | ||
| 204 | down(psem); | ||
| 205 | current->cpuset_sem_nest_depth++; | ||
| 206 | } | ||
| 207 | |||
| 208 | static inline void cpuset_up(struct semaphore *psem) | ||
| 209 | { | ||
| 210 | current->cpuset_sem_nest_depth--; | ||
| 211 | if (current->cpuset_sem_nest_depth == 0) | ||
| 212 | up(psem); | ||
| 213 | } | ||
| 214 | |||
| 215 | /* | ||
| 185 | * A couple of forward declarations required, due to cyclic reference loop: | 216 | * A couple of forward declarations required, due to cyclic reference loop: |
| 186 | * cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file | 217 | * cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file |
| 187 | * -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir. | 218 | * -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir. |
| @@ -522,19 +553,10 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | |||
| 522 | * Refresh current tasks mems_allowed and mems_generation from | 553 | * Refresh current tasks mems_allowed and mems_generation from |
| 523 | * current tasks cpuset. Call with cpuset_sem held. | 554 | * current tasks cpuset. Call with cpuset_sem held. |
| 524 | * | 555 | * |
| 525 | * Be sure to call refresh_mems() on any cpuset operation which | 556 | * This routine is needed to update the per-task mems_allowed |
| 526 | * (1) holds cpuset_sem, and (2) might possibly alloc memory. | 557 | * data, within the tasks context, when it is trying to allocate |
| 527 | * Call after obtaining cpuset_sem lock, before any possible | 558 | * memory (in various mm/mempolicy.c routines) and notices |
| 528 | * allocation. Otherwise one risks trying to allocate memory | 559 | * that some other task has been modifying its cpuset. |
| 529 | * while the task cpuset_mems_generation is not the same as | ||
| 530 | * the mems_generation in its cpuset, which would deadlock on | ||
| 531 | * cpuset_sem in cpuset_update_current_mems_allowed(). | ||
| 532 | * | ||
| 533 | * Since we hold cpuset_sem, once refresh_mems() is called, the | ||
| 534 | * test (current->cpuset_mems_generation != cs->mems_generation) | ||
| 535 | * in cpuset_update_current_mems_allowed() will remain false, | ||
| 536 | * until we drop cpuset_sem. Anyone else who would change our | ||
| 537 | * cpusets mems_generation needs to lock cpuset_sem first. | ||
| 538 | */ | 560 | */ |
| 539 | 561 | ||
| 540 | static void refresh_mems(void) | 562 | static void refresh_mems(void) |
| @@ -840,7 +862,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us | |||
| 840 | } | 862 | } |
| 841 | buffer[nbytes] = 0; /* nul-terminate */ | 863 | buffer[nbytes] = 0; /* nul-terminate */ |
| 842 | 864 | ||
| 843 | down(&cpuset_sem); | 865 | cpuset_down(&cpuset_sem); |
| 844 | 866 | ||
| 845 | if (is_removed(cs)) { | 867 | if (is_removed(cs)) { |
| 846 | retval = -ENODEV; | 868 | retval = -ENODEV; |
| @@ -874,7 +896,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us | |||
| 874 | if (retval == 0) | 896 | if (retval == 0) |
| 875 | retval = nbytes; | 897 | retval = nbytes; |
| 876 | out2: | 898 | out2: |
| 877 | up(&cpuset_sem); | 899 | cpuset_up(&cpuset_sem); |
| 878 | cpuset_release_agent(pathbuf); | 900 | cpuset_release_agent(pathbuf); |
| 879 | out1: | 901 | out1: |
| 880 | kfree(buffer); | 902 | kfree(buffer); |
| @@ -914,9 +936,9 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) | |||
| 914 | { | 936 | { |
| 915 | cpumask_t mask; | 937 | cpumask_t mask; |
| 916 | 938 | ||
| 917 | down(&cpuset_sem); | 939 | cpuset_down(&cpuset_sem); |
| 918 | mask = cs->cpus_allowed; | 940 | mask = cs->cpus_allowed; |
| 919 | up(&cpuset_sem); | 941 | cpuset_up(&cpuset_sem); |
| 920 | 942 | ||
| 921 | return cpulist_scnprintf(page, PAGE_SIZE, mask); | 943 | return cpulist_scnprintf(page, PAGE_SIZE, mask); |
| 922 | } | 944 | } |
| @@ -925,9 +947,9 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) | |||
| 925 | { | 947 | { |
| 926 | nodemask_t mask; | 948 | nodemask_t mask; |
| 927 | 949 | ||
| 928 | down(&cpuset_sem); | 950 | cpuset_down(&cpuset_sem); |
| 929 | mask = cs->mems_allowed; | 951 | mask = cs->mems_allowed; |
| 930 | up(&cpuset_sem); | 952 | cpuset_up(&cpuset_sem); |
| 931 | 953 | ||
| 932 | return nodelist_scnprintf(page, PAGE_SIZE, mask); | 954 | return nodelist_scnprintf(page, PAGE_SIZE, mask); |
| 933 | } | 955 | } |
| @@ -972,6 +994,10 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, | |||
| 972 | *s++ = '\n'; | 994 | *s++ = '\n'; |
| 973 | *s = '\0'; | 995 | *s = '\0'; |
| 974 | 996 | ||
| 997 | /* Do nothing if *ppos is at the eof or beyond the eof. */ | ||
| 998 | if (s - page <= *ppos) | ||
| 999 | return 0; | ||
| 1000 | |||
| 975 | start = page + *ppos; | 1001 | start = page + *ppos; |
| 976 | n = s - start; | 1002 | n = s - start; |
| 977 | retval = n - copy_to_user(buf, start, min(n, nbytes)); | 1003 | retval = n - copy_to_user(buf, start, min(n, nbytes)); |
| @@ -1330,8 +1356,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) | |||
| 1330 | if (!cs) | 1356 | if (!cs) |
| 1331 | return -ENOMEM; | 1357 | return -ENOMEM; |
| 1332 | 1358 | ||
| 1333 | down(&cpuset_sem); | 1359 | cpuset_down(&cpuset_sem); |
| 1334 | refresh_mems(); | ||
| 1335 | cs->flags = 0; | 1360 | cs->flags = 0; |
| 1336 | if (notify_on_release(parent)) | 1361 | if (notify_on_release(parent)) |
| 1337 | set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); | 1362 | set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); |
| @@ -1356,14 +1381,14 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) | |||
| 1356 | * will down() this new directory's i_sem and if we race with | 1381 | * will down() this new directory's i_sem and if we race with |
| 1357 | * another mkdir, we might deadlock. | 1382 | * another mkdir, we might deadlock. |
| 1358 | */ | 1383 | */ |
| 1359 | up(&cpuset_sem); | 1384 | cpuset_up(&cpuset_sem); |
| 1360 | 1385 | ||
| 1361 | err = cpuset_populate_dir(cs->dentry); | 1386 | err = cpuset_populate_dir(cs->dentry); |
| 1362 | /* If err < 0, we have a half-filled directory - oh well ;) */ | 1387 | /* If err < 0, we have a half-filled directory - oh well ;) */ |
| 1363 | return 0; | 1388 | return 0; |
| 1364 | err: | 1389 | err: |
| 1365 | list_del(&cs->sibling); | 1390 | list_del(&cs->sibling); |
| 1366 | up(&cpuset_sem); | 1391 | cpuset_up(&cpuset_sem); |
| 1367 | kfree(cs); | 1392 | kfree(cs); |
| 1368 | return err; | 1393 | return err; |
| 1369 | } | 1394 | } |
| @@ -1385,14 +1410,13 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
| 1385 | 1410 | ||
| 1386 | /* the vfs holds both inode->i_sem already */ | 1411 | /* the vfs holds both inode->i_sem already */ |
| 1387 | 1412 | ||
| 1388 | down(&cpuset_sem); | 1413 | cpuset_down(&cpuset_sem); |
| 1389 | refresh_mems(); | ||
| 1390 | if (atomic_read(&cs->count) > 0) { | 1414 | if (atomic_read(&cs->count) > 0) { |
| 1391 | up(&cpuset_sem); | 1415 | cpuset_up(&cpuset_sem); |
| 1392 | return -EBUSY; | 1416 | return -EBUSY; |
| 1393 | } | 1417 | } |
| 1394 | if (!list_empty(&cs->children)) { | 1418 | if (!list_empty(&cs->children)) { |
| 1395 | up(&cpuset_sem); | 1419 | cpuset_up(&cpuset_sem); |
| 1396 | return -EBUSY; | 1420 | return -EBUSY; |
| 1397 | } | 1421 | } |
| 1398 | parent = cs->parent; | 1422 | parent = cs->parent; |
| @@ -1408,7 +1432,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
| 1408 | spin_unlock(&d->d_lock); | 1432 | spin_unlock(&d->d_lock); |
| 1409 | cpuset_d_remove_dir(d); | 1433 | cpuset_d_remove_dir(d); |
| 1410 | dput(d); | 1434 | dput(d); |
| 1411 | up(&cpuset_sem); | 1435 | cpuset_up(&cpuset_sem); |
| 1412 | cpuset_release_agent(pathbuf); | 1436 | cpuset_release_agent(pathbuf); |
| 1413 | return 0; | 1437 | return 0; |
| 1414 | } | 1438 | } |
| @@ -1511,10 +1535,10 @@ void cpuset_exit(struct task_struct *tsk) | |||
| 1511 | if (notify_on_release(cs)) { | 1535 | if (notify_on_release(cs)) { |
| 1512 | char *pathbuf = NULL; | 1536 | char *pathbuf = NULL; |
| 1513 | 1537 | ||
| 1514 | down(&cpuset_sem); | 1538 | cpuset_down(&cpuset_sem); |
| 1515 | if (atomic_dec_and_test(&cs->count)) | 1539 | if (atomic_dec_and_test(&cs->count)) |
| 1516 | check_for_release(cs, &pathbuf); | 1540 | check_for_release(cs, &pathbuf); |
| 1517 | up(&cpuset_sem); | 1541 | cpuset_up(&cpuset_sem); |
| 1518 | cpuset_release_agent(pathbuf); | 1542 | cpuset_release_agent(pathbuf); |
| 1519 | } else { | 1543 | } else { |
| 1520 | atomic_dec(&cs->count); | 1544 | atomic_dec(&cs->count); |
| @@ -1535,11 +1559,11 @@ cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk) | |||
| 1535 | { | 1559 | { |
| 1536 | cpumask_t mask; | 1560 | cpumask_t mask; |
| 1537 | 1561 | ||
| 1538 | down(&cpuset_sem); | 1562 | cpuset_down(&cpuset_sem); |
| 1539 | task_lock((struct task_struct *)tsk); | 1563 | task_lock((struct task_struct *)tsk); |
| 1540 | guarantee_online_cpus(tsk->cpuset, &mask); | 1564 | guarantee_online_cpus(tsk->cpuset, &mask); |
| 1541 | task_unlock((struct task_struct *)tsk); | 1565 | task_unlock((struct task_struct *)tsk); |
| 1542 | up(&cpuset_sem); | 1566 | cpuset_up(&cpuset_sem); |
| 1543 | 1567 | ||
| 1544 | return mask; | 1568 | return mask; |
| 1545 | } | 1569 | } |
| @@ -1564,9 +1588,9 @@ void cpuset_update_current_mems_allowed(void) | |||
| 1564 | if (!cs) | 1588 | if (!cs) |
| 1565 | return; /* task is exiting */ | 1589 | return; /* task is exiting */ |
| 1566 | if (current->cpuset_mems_generation != cs->mems_generation) { | 1590 | if (current->cpuset_mems_generation != cs->mems_generation) { |
| 1567 | down(&cpuset_sem); | 1591 | cpuset_down(&cpuset_sem); |
| 1568 | refresh_mems(); | 1592 | refresh_mems(); |
| 1569 | up(&cpuset_sem); | 1593 | cpuset_up(&cpuset_sem); |
| 1570 | } | 1594 | } |
| 1571 | } | 1595 | } |
| 1572 | 1596 | ||
| @@ -1665,14 +1689,14 @@ int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask) | |||
| 1665 | return 0; | 1689 | return 0; |
| 1666 | 1690 | ||
| 1667 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ | 1691 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ |
| 1668 | down(&cpuset_sem); | 1692 | cpuset_down(&cpuset_sem); |
| 1669 | cs = current->cpuset; | 1693 | cs = current->cpuset; |
| 1670 | if (!cs) | 1694 | if (!cs) |
| 1671 | goto done; /* current task exiting */ | 1695 | goto done; /* current task exiting */ |
| 1672 | cs = nearest_exclusive_ancestor(cs); | 1696 | cs = nearest_exclusive_ancestor(cs); |
| 1673 | allowed = node_isset(node, cs->mems_allowed); | 1697 | allowed = node_isset(node, cs->mems_allowed); |
| 1674 | done: | 1698 | done: |
| 1675 | up(&cpuset_sem); | 1699 | cpuset_up(&cpuset_sem); |
| 1676 | return allowed; | 1700 | return allowed; |
| 1677 | } | 1701 | } |
| 1678 | 1702 | ||
| @@ -1693,7 +1717,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p) | |||
| 1693 | const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ | 1717 | const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ |
| 1694 | int overlap = 0; /* do cpusets overlap? */ | 1718 | int overlap = 0; /* do cpusets overlap? */ |
| 1695 | 1719 | ||
| 1696 | down(&cpuset_sem); | 1720 | cpuset_down(&cpuset_sem); |
| 1697 | cs1 = current->cpuset; | 1721 | cs1 = current->cpuset; |
| 1698 | if (!cs1) | 1722 | if (!cs1) |
| 1699 | goto done; /* current task exiting */ | 1723 | goto done; /* current task exiting */ |
| @@ -1704,7 +1728,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p) | |||
| 1704 | cs2 = nearest_exclusive_ancestor(cs2); | 1728 | cs2 = nearest_exclusive_ancestor(cs2); |
| 1705 | overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); | 1729 | overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); |
| 1706 | done: | 1730 | done: |
| 1707 | up(&cpuset_sem); | 1731 | cpuset_up(&cpuset_sem); |
| 1708 | 1732 | ||
| 1709 | return overlap; | 1733 | return overlap; |
| 1710 | } | 1734 | } |
| @@ -1727,7 +1751,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v) | |||
| 1727 | return -ENOMEM; | 1751 | return -ENOMEM; |
| 1728 | 1752 | ||
| 1729 | tsk = m->private; | 1753 | tsk = m->private; |
| 1730 | down(&cpuset_sem); | 1754 | cpuset_down(&cpuset_sem); |
| 1731 | task_lock(tsk); | 1755 | task_lock(tsk); |
| 1732 | cs = tsk->cpuset; | 1756 | cs = tsk->cpuset; |
| 1733 | task_unlock(tsk); | 1757 | task_unlock(tsk); |
| @@ -1742,7 +1766,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v) | |||
| 1742 | seq_puts(m, buf); | 1766 | seq_puts(m, buf); |
| 1743 | seq_putc(m, '\n'); | 1767 | seq_putc(m, '\n'); |
| 1744 | out: | 1768 | out: |
| 1745 | up(&cpuset_sem); | 1769 | cpuset_up(&cpuset_sem); |
| 1746 | kfree(buf); | 1770 | kfree(buf); |
| 1747 | return retval; | 1771 | return retval; |
| 1748 | } | 1772 | } |
diff --git a/kernel/exit.c b/kernel/exit.c index 5b0fb9f09f21..6d2089a1bce7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -368,17 +368,19 @@ EXPORT_SYMBOL(daemonize); | |||
| 368 | static inline void close_files(struct files_struct * files) | 368 | static inline void close_files(struct files_struct * files) |
| 369 | { | 369 | { |
| 370 | int i, j; | 370 | int i, j; |
| 371 | struct fdtable *fdt; | ||
| 371 | 372 | ||
| 372 | j = 0; | 373 | j = 0; |
| 374 | fdt = files_fdtable(files); | ||
| 373 | for (;;) { | 375 | for (;;) { |
| 374 | unsigned long set; | 376 | unsigned long set; |
| 375 | i = j * __NFDBITS; | 377 | i = j * __NFDBITS; |
| 376 | if (i >= files->max_fdset || i >= files->max_fds) | 378 | if (i >= fdt->max_fdset || i >= fdt->max_fds) |
| 377 | break; | 379 | break; |
| 378 | set = files->open_fds->fds_bits[j++]; | 380 | set = fdt->open_fds->fds_bits[j++]; |
| 379 | while (set) { | 381 | while (set) { |
| 380 | if (set & 1) { | 382 | if (set & 1) { |
| 381 | struct file * file = xchg(&files->fd[i], NULL); | 383 | struct file * file = xchg(&fdt->fd[i], NULL); |
| 382 | if (file) | 384 | if (file) |
| 383 | filp_close(file, files); | 385 | filp_close(file, files); |
| 384 | } | 386 | } |
| @@ -403,18 +405,22 @@ struct files_struct *get_files_struct(struct task_struct *task) | |||
| 403 | 405 | ||
| 404 | void fastcall put_files_struct(struct files_struct *files) | 406 | void fastcall put_files_struct(struct files_struct *files) |
| 405 | { | 407 | { |
| 408 | struct fdtable *fdt; | ||
| 409 | |||
| 406 | if (atomic_dec_and_test(&files->count)) { | 410 | if (atomic_dec_and_test(&files->count)) { |
| 407 | close_files(files); | 411 | close_files(files); |
| 408 | /* | 412 | /* |
| 409 | * Free the fd and fdset arrays if we expanded them. | 413 | * Free the fd and fdset arrays if we expanded them. |
| 414 | * If the fdtable was embedded, pass files for freeing | ||
| 415 | * at the end of the RCU grace period. Otherwise, | ||
| 416 | * you can free files immediately. | ||
| 410 | */ | 417 | */ |
| 411 | if (files->fd != &files->fd_array[0]) | 418 | fdt = files_fdtable(files); |
| 412 | free_fd_array(files->fd, files->max_fds); | 419 | if (fdt == &files->fdtab) |
| 413 | if (files->max_fdset > __FD_SETSIZE) { | 420 | fdt->free_files = files; |
| 414 | free_fdset(files->open_fds, files->max_fdset); | 421 | else |
| 415 | free_fdset(files->close_on_exec, files->max_fdset); | 422 | kmem_cache_free(files_cachep, files); |
| 416 | } | 423 | free_fdtable(fdt); |
| 417 | kmem_cache_free(files_cachep, files); | ||
| 418 | } | 424 | } |
| 419 | } | 425 | } |
| 420 | 426 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 7e1ead9a6ba4..8149f3602881 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | #include <linux/syscalls.h> | 35 | #include <linux/syscalls.h> |
| 36 | #include <linux/jiffies.h> | 36 | #include <linux/jiffies.h> |
| 37 | #include <linux/futex.h> | 37 | #include <linux/futex.h> |
| 38 | #include <linux/rcupdate.h> | ||
| 38 | #include <linux/ptrace.h> | 39 | #include <linux/ptrace.h> |
| 39 | #include <linux/mount.h> | 40 | #include <linux/mount.h> |
| 40 | #include <linux/audit.h> | 41 | #include <linux/audit.h> |
| @@ -176,6 +177,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 176 | 177 | ||
| 177 | /* One for us, one for whoever does the "release_task()" (usually parent) */ | 178 | /* One for us, one for whoever does the "release_task()" (usually parent) */ |
| 178 | atomic_set(&tsk->usage,2); | 179 | atomic_set(&tsk->usage,2); |
| 180 | atomic_set(&tsk->fs_excl, 0); | ||
| 179 | return tsk; | 181 | return tsk; |
| 180 | } | 182 | } |
| 181 | 183 | ||
| @@ -564,24 +566,53 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) | |||
| 564 | return 0; | 566 | return 0; |
| 565 | } | 567 | } |
| 566 | 568 | ||
| 567 | static int count_open_files(struct files_struct *files, int size) | 569 | static int count_open_files(struct fdtable *fdt) |
| 568 | { | 570 | { |
| 571 | int size = fdt->max_fdset; | ||
| 569 | int i; | 572 | int i; |
| 570 | 573 | ||
| 571 | /* Find the last open fd */ | 574 | /* Find the last open fd */ |
| 572 | for (i = size/(8*sizeof(long)); i > 0; ) { | 575 | for (i = size/(8*sizeof(long)); i > 0; ) { |
| 573 | if (files->open_fds->fds_bits[--i]) | 576 | if (fdt->open_fds->fds_bits[--i]) |
| 574 | break; | 577 | break; |
| 575 | } | 578 | } |
| 576 | i = (i+1) * 8 * sizeof(long); | 579 | i = (i+1) * 8 * sizeof(long); |
| 577 | return i; | 580 | return i; |
| 578 | } | 581 | } |
| 579 | 582 | ||
| 583 | static struct files_struct *alloc_files(void) | ||
| 584 | { | ||
| 585 | struct files_struct *newf; | ||
| 586 | struct fdtable *fdt; | ||
| 587 | |||
| 588 | newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); | ||
| 589 | if (!newf) | ||
| 590 | goto out; | ||
| 591 | |||
| 592 | atomic_set(&newf->count, 1); | ||
| 593 | |||
| 594 | spin_lock_init(&newf->file_lock); | ||
| 595 | fdt = &newf->fdtab; | ||
| 596 | fdt->next_fd = 0; | ||
| 597 | fdt->max_fds = NR_OPEN_DEFAULT; | ||
| 598 | fdt->max_fdset = __FD_SETSIZE; | ||
| 599 | fdt->close_on_exec = &newf->close_on_exec_init; | ||
| 600 | fdt->open_fds = &newf->open_fds_init; | ||
| 601 | fdt->fd = &newf->fd_array[0]; | ||
| 602 | INIT_RCU_HEAD(&fdt->rcu); | ||
| 603 | fdt->free_files = NULL; | ||
| 604 | fdt->next = NULL; | ||
| 605 | rcu_assign_pointer(newf->fdt, fdt); | ||
| 606 | out: | ||
| 607 | return newf; | ||
| 608 | } | ||
| 609 | |||
| 580 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | 610 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) |
| 581 | { | 611 | { |
| 582 | struct files_struct *oldf, *newf; | 612 | struct files_struct *oldf, *newf; |
| 583 | struct file **old_fds, **new_fds; | 613 | struct file **old_fds, **new_fds; |
| 584 | int open_files, size, i, error = 0, expand; | 614 | int open_files, size, i, error = 0, expand; |
| 615 | struct fdtable *old_fdt, *new_fdt; | ||
| 585 | 616 | ||
| 586 | /* | 617 | /* |
| 587 | * A background process may not have any files ... | 618 | * A background process may not have any files ... |
| @@ -602,35 +633,27 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
| 602 | */ | 633 | */ |
| 603 | tsk->files = NULL; | 634 | tsk->files = NULL; |
| 604 | error = -ENOMEM; | 635 | error = -ENOMEM; |
| 605 | newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); | 636 | newf = alloc_files(); |
| 606 | if (!newf) | 637 | if (!newf) |
| 607 | goto out; | 638 | goto out; |
| 608 | 639 | ||
| 609 | atomic_set(&newf->count, 1); | ||
| 610 | |||
| 611 | spin_lock_init(&newf->file_lock); | ||
| 612 | newf->next_fd = 0; | ||
| 613 | newf->max_fds = NR_OPEN_DEFAULT; | ||
| 614 | newf->max_fdset = __FD_SETSIZE; | ||
| 615 | newf->close_on_exec = &newf->close_on_exec_init; | ||
| 616 | newf->open_fds = &newf->open_fds_init; | ||
| 617 | newf->fd = &newf->fd_array[0]; | ||
| 618 | |||
| 619 | spin_lock(&oldf->file_lock); | 640 | spin_lock(&oldf->file_lock); |
| 620 | 641 | old_fdt = files_fdtable(oldf); | |
| 621 | open_files = count_open_files(oldf, oldf->max_fdset); | 642 | new_fdt = files_fdtable(newf); |
| 643 | size = old_fdt->max_fdset; | ||
| 644 | open_files = count_open_files(old_fdt); | ||
| 622 | expand = 0; | 645 | expand = 0; |
| 623 | 646 | ||
| 624 | /* | 647 | /* |
| 625 | * Check whether we need to allocate a larger fd array or fd set. | 648 | * Check whether we need to allocate a larger fd array or fd set. |
| 626 | * Note: we're not a clone task, so the open count won't change. | 649 | * Note: we're not a clone task, so the open count won't change. |
| 627 | */ | 650 | */ |
| 628 | if (open_files > newf->max_fdset) { | 651 | if (open_files > new_fdt->max_fdset) { |
| 629 | newf->max_fdset = 0; | 652 | new_fdt->max_fdset = 0; |
| 630 | expand = 1; | 653 | expand = 1; |
| 631 | } | 654 | } |
| 632 | if (open_files > newf->max_fds) { | 655 | if (open_files > new_fdt->max_fds) { |
| 633 | newf->max_fds = 0; | 656 | new_fdt->max_fds = 0; |
| 634 | expand = 1; | 657 | expand = 1; |
| 635 | } | 658 | } |
| 636 | 659 | ||
| @@ -642,14 +665,21 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
| 642 | spin_unlock(&newf->file_lock); | 665 | spin_unlock(&newf->file_lock); |
| 643 | if (error < 0) | 666 | if (error < 0) |
| 644 | goto out_release; | 667 | goto out_release; |
| 668 | new_fdt = files_fdtable(newf); | ||
| 669 | /* | ||
| 670 | * Reacquire the oldf lock and a pointer to its fd table | ||
| 671 | * who knows it may have a new bigger fd table. We need | ||
| 672 | * the latest pointer. | ||
| 673 | */ | ||
| 645 | spin_lock(&oldf->file_lock); | 674 | spin_lock(&oldf->file_lock); |
| 675 | old_fdt = files_fdtable(oldf); | ||
| 646 | } | 676 | } |
| 647 | 677 | ||
| 648 | old_fds = oldf->fd; | 678 | old_fds = old_fdt->fd; |
| 649 | new_fds = newf->fd; | 679 | new_fds = new_fdt->fd; |
| 650 | 680 | ||
| 651 | memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8); | 681 | memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, open_files/8); |
| 652 | memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8); | 682 | memcpy(new_fdt->close_on_exec->fds_bits, old_fdt->close_on_exec->fds_bits, open_files/8); |
| 653 | 683 | ||
| 654 | for (i = open_files; i != 0; i--) { | 684 | for (i = open_files; i != 0; i--) { |
| 655 | struct file *f = *old_fds++; | 685 | struct file *f = *old_fds++; |
| @@ -662,24 +692,24 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
| 662 | * is partway through open(). So make sure that this | 692 | * is partway through open(). So make sure that this |
| 663 | * fd is available to the new process. | 693 | * fd is available to the new process. |
| 664 | */ | 694 | */ |
| 665 | FD_CLR(open_files - i, newf->open_fds); | 695 | FD_CLR(open_files - i, new_fdt->open_fds); |
| 666 | } | 696 | } |
| 667 | *new_fds++ = f; | 697 | rcu_assign_pointer(*new_fds++, f); |
| 668 | } | 698 | } |
| 669 | spin_unlock(&oldf->file_lock); | 699 | spin_unlock(&oldf->file_lock); |
| 670 | 700 | ||
| 671 | /* compute the remainder to be cleared */ | 701 | /* compute the remainder to be cleared */ |
| 672 | size = (newf->max_fds - open_files) * sizeof(struct file *); | 702 | size = (new_fdt->max_fds - open_files) * sizeof(struct file *); |
| 673 | 703 | ||
| 674 | /* This is long word aligned thus could use a optimized version */ | 704 | /* This is long word aligned thus could use a optimized version */ |
| 675 | memset(new_fds, 0, size); | 705 | memset(new_fds, 0, size); |
| 676 | 706 | ||
| 677 | if (newf->max_fdset > open_files) { | 707 | if (new_fdt->max_fdset > open_files) { |
| 678 | int left = (newf->max_fdset-open_files)/8; | 708 | int left = (new_fdt->max_fdset-open_files)/8; |
| 679 | int start = open_files / (8 * sizeof(unsigned long)); | 709 | int start = open_files / (8 * sizeof(unsigned long)); |
| 680 | 710 | ||
| 681 | memset(&newf->open_fds->fds_bits[start], 0, left); | 711 | memset(&new_fdt->open_fds->fds_bits[start], 0, left); |
| 682 | memset(&newf->close_on_exec->fds_bits[start], 0, left); | 712 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); |
| 683 | } | 713 | } |
| 684 | 714 | ||
| 685 | tsk->files = newf; | 715 | tsk->files = newf; |
| @@ -688,9 +718,9 @@ out: | |||
| 688 | return error; | 718 | return error; |
| 689 | 719 | ||
| 690 | out_release: | 720 | out_release: |
| 691 | free_fdset (newf->close_on_exec, newf->max_fdset); | 721 | free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); |
| 692 | free_fdset (newf->open_fds, newf->max_fdset); | 722 | free_fdset (new_fdt->open_fds, new_fdt->max_fdset); |
| 693 | free_fd_array(newf->fd, newf->max_fds); | 723 | free_fd_array(new_fdt->fd, new_fdt->max_fds); |
| 694 | kmem_cache_free(files_cachep, newf); | 724 | kmem_cache_free(files_cachep, newf); |
| 695 | goto out; | 725 | goto out; |
| 696 | } | 726 | } |
| @@ -1115,6 +1145,9 @@ static task_t *copy_process(unsigned long clone_flags, | |||
| 1115 | __get_cpu_var(process_counts)++; | 1145 | __get_cpu_var(process_counts)++; |
| 1116 | } | 1146 | } |
| 1117 | 1147 | ||
| 1148 | if (!current->signal->tty && p->signal->tty) | ||
| 1149 | p->signal->tty = NULL; | ||
| 1150 | |||
| 1118 | nr_threads++; | 1151 | nr_threads++; |
| 1119 | total_forks++; | 1152 | total_forks++; |
| 1120 | write_unlock_irq(&tasklist_lock); | 1153 | write_unlock_irq(&tasklist_lock); |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f436993bd590..bef3b6901b76 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
| @@ -45,6 +45,7 @@ | |||
| 45 | #include <linux/percpu.h> | 45 | #include <linux/percpu.h> |
| 46 | #include <linux/notifier.h> | 46 | #include <linux/notifier.h> |
| 47 | #include <linux/rcupdate.h> | 47 | #include <linux/rcupdate.h> |
| 48 | #include <linux/rcuref.h> | ||
| 48 | #include <linux/cpu.h> | 49 | #include <linux/cpu.h> |
| 49 | 50 | ||
| 50 | /* Definition for rcupdate control block. */ | 51 | /* Definition for rcupdate control block. */ |
| @@ -72,6 +73,19 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; | |||
| 72 | static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; | 73 | static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; |
| 73 | static int maxbatch = 10; | 74 | static int maxbatch = 10; |
| 74 | 75 | ||
| 76 | #ifndef __HAVE_ARCH_CMPXCHG | ||
| 77 | /* | ||
| 78 | * We use an array of spinlocks for the rcurefs -- similar to ones in sparc | ||
| 79 | * 32 bit atomic_t implementations, and a hash function similar to that | ||
| 80 | * for our refcounting needs. | ||
| 81 | * Can't help multiprocessors which donot have cmpxchg :( | ||
| 82 | */ | ||
| 83 | |||
| 84 | spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = { | ||
| 85 | [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED | ||
| 86 | }; | ||
| 87 | #endif | ||
| 88 | |||
| 75 | /** | 89 | /** |
| 76 | * call_rcu - Queue an RCU callback for invocation after a grace period. | 90 | * call_rcu - Queue an RCU callback for invocation after a grace period. |
| 77 | * @head: structure to be used for queueing the RCU updates. | 91 | * @head: structure to be used for queueing the RCU updates. |
diff --git a/kernel/sched.c b/kernel/sched.c index 18b95520a2e2..dbd4490afec1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -875,7 +875,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) | |||
| 875 | * smp_call_function() if an IPI is sent by the same process we are | 875 | * smp_call_function() if an IPI is sent by the same process we are |
| 876 | * waiting to become inactive. | 876 | * waiting to become inactive. |
| 877 | */ | 877 | */ |
| 878 | void wait_task_inactive(task_t * p) | 878 | void wait_task_inactive(task_t *p) |
| 879 | { | 879 | { |
| 880 | unsigned long flags; | 880 | unsigned long flags; |
| 881 | runqueue_t *rq; | 881 | runqueue_t *rq; |
| @@ -966,8 +966,11 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
| 966 | int local_group; | 966 | int local_group; |
| 967 | int i; | 967 | int i; |
| 968 | 968 | ||
| 969 | /* Skip over this group if it has no CPUs allowed */ | ||
| 970 | if (!cpus_intersects(group->cpumask, p->cpus_allowed)) | ||
| 971 | goto nextgroup; | ||
| 972 | |||
| 969 | local_group = cpu_isset(this_cpu, group->cpumask); | 973 | local_group = cpu_isset(this_cpu, group->cpumask); |
| 970 | /* XXX: put a cpus allowed check */ | ||
| 971 | 974 | ||
| 972 | /* Tally up the load of all CPUs in the group */ | 975 | /* Tally up the load of all CPUs in the group */ |
| 973 | avg_load = 0; | 976 | avg_load = 0; |
| @@ -992,6 +995,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
| 992 | min_load = avg_load; | 995 | min_load = avg_load; |
| 993 | idlest = group; | 996 | idlest = group; |
| 994 | } | 997 | } |
| 998 | nextgroup: | ||
| 995 | group = group->next; | 999 | group = group->next; |
| 996 | } while (group != sd->groups); | 1000 | } while (group != sd->groups); |
| 997 | 1001 | ||
| @@ -1003,13 +1007,18 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
| 1003 | /* | 1007 | /* |
| 1004 | * find_idlest_queue - find the idlest runqueue among the cpus in group. | 1008 | * find_idlest_queue - find the idlest runqueue among the cpus in group. |
| 1005 | */ | 1009 | */ |
| 1006 | static int find_idlest_cpu(struct sched_group *group, int this_cpu) | 1010 | static int |
| 1011 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | ||
| 1007 | { | 1012 | { |
| 1013 | cpumask_t tmp; | ||
| 1008 | unsigned long load, min_load = ULONG_MAX; | 1014 | unsigned long load, min_load = ULONG_MAX; |
| 1009 | int idlest = -1; | 1015 | int idlest = -1; |
| 1010 | int i; | 1016 | int i; |
| 1011 | 1017 | ||
| 1012 | for_each_cpu_mask(i, group->cpumask) { | 1018 | /* Traverse only the allowed CPUs */ |
| 1019 | cpus_and(tmp, group->cpumask, p->cpus_allowed); | ||
| 1020 | |||
| 1021 | for_each_cpu_mask(i, tmp) { | ||
| 1013 | load = source_load(i, 0); | 1022 | load = source_load(i, 0); |
| 1014 | 1023 | ||
| 1015 | if (load < min_load || (load == min_load && i == this_cpu)) { | 1024 | if (load < min_load || (load == min_load && i == this_cpu)) { |
| @@ -1052,7 +1061,7 @@ static int sched_balance_self(int cpu, int flag) | |||
| 1052 | if (!group) | 1061 | if (!group) |
| 1053 | goto nextlevel; | 1062 | goto nextlevel; |
| 1054 | 1063 | ||
| 1055 | new_cpu = find_idlest_cpu(group, cpu); | 1064 | new_cpu = find_idlest_cpu(group, t, cpu); |
| 1056 | if (new_cpu == -1 || new_cpu == cpu) | 1065 | if (new_cpu == -1 || new_cpu == cpu) |
| 1057 | goto nextlevel; | 1066 | goto nextlevel; |
| 1058 | 1067 | ||
| @@ -1127,7 +1136,7 @@ static inline int wake_idle(int cpu, task_t *p) | |||
| 1127 | * | 1136 | * |
| 1128 | * returns failure only if the task is already active. | 1137 | * returns failure only if the task is already active. |
| 1129 | */ | 1138 | */ |
| 1130 | static int try_to_wake_up(task_t * p, unsigned int state, int sync) | 1139 | static int try_to_wake_up(task_t *p, unsigned int state, int sync) |
| 1131 | { | 1140 | { |
| 1132 | int cpu, this_cpu, success = 0; | 1141 | int cpu, this_cpu, success = 0; |
| 1133 | unsigned long flags; | 1142 | unsigned long flags; |
| @@ -1252,6 +1261,16 @@ out_activate: | |||
| 1252 | } | 1261 | } |
| 1253 | 1262 | ||
| 1254 | /* | 1263 | /* |
| 1264 | * Tasks that have marked their sleep as noninteractive get | ||
| 1265 | * woken up without updating their sleep average. (i.e. their | ||
| 1266 | * sleep is handled in a priority-neutral manner, no priority | ||
| 1267 | * boost and no penalty.) | ||
| 1268 | */ | ||
| 1269 | if (old_state & TASK_NONINTERACTIVE) | ||
| 1270 | __activate_task(p, rq); | ||
| 1271 | else | ||
| 1272 | activate_task(p, rq, cpu == this_cpu); | ||
| 1273 | /* | ||
| 1255 | * Sync wakeups (i.e. those types of wakeups where the waker | 1274 | * Sync wakeups (i.e. those types of wakeups where the waker |
| 1256 | * has indicated that it will leave the CPU in short order) | 1275 | * has indicated that it will leave the CPU in short order) |
| 1257 | * don't trigger a preemption, if the woken up task will run on | 1276 | * don't trigger a preemption, if the woken up task will run on |
| @@ -1259,7 +1278,6 @@ out_activate: | |||
| 1259 | * the waker guarantees that the freshly woken up task is going | 1278 | * the waker guarantees that the freshly woken up task is going |
| 1260 | * to be considered on this CPU.) | 1279 | * to be considered on this CPU.) |
| 1261 | */ | 1280 | */ |
| 1262 | activate_task(p, rq, cpu == this_cpu); | ||
| 1263 | if (!sync || cpu != this_cpu) { | 1281 | if (!sync || cpu != this_cpu) { |
| 1264 | if (TASK_PREEMPTS_CURR(p, rq)) | 1282 | if (TASK_PREEMPTS_CURR(p, rq)) |
| 1265 | resched_task(rq->curr); | 1283 | resched_task(rq->curr); |
| @@ -1274,7 +1292,7 @@ out: | |||
| 1274 | return success; | 1292 | return success; |
| 1275 | } | 1293 | } |
| 1276 | 1294 | ||
| 1277 | int fastcall wake_up_process(task_t * p) | 1295 | int fastcall wake_up_process(task_t *p) |
| 1278 | { | 1296 | { |
| 1279 | return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | | 1297 | return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | |
| 1280 | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); | 1298 | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); |
| @@ -1353,7 +1371,7 @@ void fastcall sched_fork(task_t *p, int clone_flags) | |||
| 1353 | * that must be done for every newly created context, then puts the task | 1371 | * that must be done for every newly created context, then puts the task |
| 1354 | * on the runqueue and wakes it. | 1372 | * on the runqueue and wakes it. |
| 1355 | */ | 1373 | */ |
| 1356 | void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) | 1374 | void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) |
| 1357 | { | 1375 | { |
| 1358 | unsigned long flags; | 1376 | unsigned long flags; |
| 1359 | int this_cpu, cpu; | 1377 | int this_cpu, cpu; |
| @@ -1436,7 +1454,7 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) | |||
| 1436 | * artificially, because any timeslice recovered here | 1454 | * artificially, because any timeslice recovered here |
| 1437 | * was given away by the parent in the first place.) | 1455 | * was given away by the parent in the first place.) |
| 1438 | */ | 1456 | */ |
| 1439 | void fastcall sched_exit(task_t * p) | 1457 | void fastcall sched_exit(task_t *p) |
| 1440 | { | 1458 | { |
| 1441 | unsigned long flags; | 1459 | unsigned long flags; |
| 1442 | runqueue_t *rq; | 1460 | runqueue_t *rq; |
| @@ -1511,6 +1529,10 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev) | |||
| 1511 | * Manfred Spraul <manfred@colorfullife.com> | 1529 | * Manfred Spraul <manfred@colorfullife.com> |
| 1512 | */ | 1530 | */ |
| 1513 | prev_task_flags = prev->flags; | 1531 | prev_task_flags = prev->flags; |
| 1532 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
| 1533 | /* this is a valid case when another task releases the spinlock */ | ||
| 1534 | rq->lock.owner = current; | ||
| 1535 | #endif | ||
| 1514 | finish_arch_switch(prev); | 1536 | finish_arch_switch(prev); |
| 1515 | finish_lock_switch(rq, prev); | 1537 | finish_lock_switch(rq, prev); |
| 1516 | if (mm) | 1538 | if (mm) |
| @@ -1753,7 +1775,8 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
| 1753 | */ | 1775 | */ |
| 1754 | static inline | 1776 | static inline |
| 1755 | int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, | 1777 | int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, |
| 1756 | struct sched_domain *sd, enum idle_type idle, int *all_pinned) | 1778 | struct sched_domain *sd, enum idle_type idle, |
| 1779 | int *all_pinned) | ||
| 1757 | { | 1780 | { |
| 1758 | /* | 1781 | /* |
| 1759 | * We do not migrate tasks that are: | 1782 | * We do not migrate tasks that are: |
| @@ -1883,10 +1906,11 @@ out: | |||
| 1883 | */ | 1906 | */ |
| 1884 | static struct sched_group * | 1907 | static struct sched_group * |
| 1885 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 1908 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
| 1886 | unsigned long *imbalance, enum idle_type idle) | 1909 | unsigned long *imbalance, enum idle_type idle, int *sd_idle) |
| 1887 | { | 1910 | { |
| 1888 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 1911 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
| 1889 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 1912 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
| 1913 | unsigned long max_pull; | ||
| 1890 | int load_idx; | 1914 | int load_idx; |
| 1891 | 1915 | ||
| 1892 | max_load = this_load = total_load = total_pwr = 0; | 1916 | max_load = this_load = total_load = total_pwr = 0; |
| @@ -1908,6 +1932,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 1908 | avg_load = 0; | 1932 | avg_load = 0; |
| 1909 | 1933 | ||
| 1910 | for_each_cpu_mask(i, group->cpumask) { | 1934 | for_each_cpu_mask(i, group->cpumask) { |
| 1935 | if (*sd_idle && !idle_cpu(i)) | ||
| 1936 | *sd_idle = 0; | ||
| 1937 | |||
| 1911 | /* Bias balancing toward cpus of our domain */ | 1938 | /* Bias balancing toward cpus of our domain */ |
| 1912 | if (local_group) | 1939 | if (local_group) |
| 1913 | load = target_load(i, load_idx); | 1940 | load = target_load(i, load_idx); |
| @@ -1933,7 +1960,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 1933 | group = group->next; | 1960 | group = group->next; |
| 1934 | } while (group != sd->groups); | 1961 | } while (group != sd->groups); |
| 1935 | 1962 | ||
| 1936 | if (!busiest || this_load >= max_load) | 1963 | if (!busiest || this_load >= max_load || max_load <= SCHED_LOAD_SCALE) |
| 1937 | goto out_balanced; | 1964 | goto out_balanced; |
| 1938 | 1965 | ||
| 1939 | avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; | 1966 | avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; |
| @@ -1953,8 +1980,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 1953 | * by pulling tasks to us. Be careful of negative numbers as they'll | 1980 | * by pulling tasks to us. Be careful of negative numbers as they'll |
| 1954 | * appear as very large values with unsigned longs. | 1981 | * appear as very large values with unsigned longs. |
| 1955 | */ | 1982 | */ |
| 1983 | |||
| 1984 | /* Don't want to pull so many tasks that a group would go idle */ | ||
| 1985 | max_pull = min(max_load - avg_load, max_load - SCHED_LOAD_SCALE); | ||
| 1986 | |||
| 1956 | /* How much load to actually move to equalise the imbalance */ | 1987 | /* How much load to actually move to equalise the imbalance */ |
| 1957 | *imbalance = min((max_load - avg_load) * busiest->cpu_power, | 1988 | *imbalance = min(max_pull * busiest->cpu_power, |
| 1958 | (avg_load - this_load) * this->cpu_power) | 1989 | (avg_load - this_load) * this->cpu_power) |
| 1959 | / SCHED_LOAD_SCALE; | 1990 | / SCHED_LOAD_SCALE; |
| 1960 | 1991 | ||
| @@ -2051,11 +2082,14 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
| 2051 | unsigned long imbalance; | 2082 | unsigned long imbalance; |
| 2052 | int nr_moved, all_pinned = 0; | 2083 | int nr_moved, all_pinned = 0; |
| 2053 | int active_balance = 0; | 2084 | int active_balance = 0; |
| 2085 | int sd_idle = 0; | ||
| 2086 | |||
| 2087 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER) | ||
| 2088 | sd_idle = 1; | ||
| 2054 | 2089 | ||
| 2055 | spin_lock(&this_rq->lock); | ||
| 2056 | schedstat_inc(sd, lb_cnt[idle]); | 2090 | schedstat_inc(sd, lb_cnt[idle]); |
| 2057 | 2091 | ||
| 2058 | group = find_busiest_group(sd, this_cpu, &imbalance, idle); | 2092 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle); |
| 2059 | if (!group) { | 2093 | if (!group) { |
| 2060 | schedstat_inc(sd, lb_nobusyg[idle]); | 2094 | schedstat_inc(sd, lb_nobusyg[idle]); |
| 2061 | goto out_balanced; | 2095 | goto out_balanced; |
| @@ -2079,19 +2113,16 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
| 2079 | * still unbalanced. nr_moved simply stays zero, so it is | 2113 | * still unbalanced. nr_moved simply stays zero, so it is |
| 2080 | * correctly treated as an imbalance. | 2114 | * correctly treated as an imbalance. |
| 2081 | */ | 2115 | */ |
| 2082 | double_lock_balance(this_rq, busiest); | 2116 | double_rq_lock(this_rq, busiest); |
| 2083 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2117 | nr_moved = move_tasks(this_rq, this_cpu, busiest, |
| 2084 | imbalance, sd, idle, | 2118 | imbalance, sd, idle, &all_pinned); |
| 2085 | &all_pinned); | 2119 | double_rq_unlock(this_rq, busiest); |
| 2086 | spin_unlock(&busiest->lock); | ||
| 2087 | 2120 | ||
| 2088 | /* All tasks on this runqueue were pinned by CPU affinity */ | 2121 | /* All tasks on this runqueue were pinned by CPU affinity */ |
| 2089 | if (unlikely(all_pinned)) | 2122 | if (unlikely(all_pinned)) |
| 2090 | goto out_balanced; | 2123 | goto out_balanced; |
| 2091 | } | 2124 | } |
| 2092 | 2125 | ||
| 2093 | spin_unlock(&this_rq->lock); | ||
| 2094 | |||
| 2095 | if (!nr_moved) { | 2126 | if (!nr_moved) { |
| 2096 | schedstat_inc(sd, lb_failed[idle]); | 2127 | schedstat_inc(sd, lb_failed[idle]); |
| 2097 | sd->nr_balance_failed++; | 2128 | sd->nr_balance_failed++; |
| @@ -2099,6 +2130,16 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
| 2099 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { | 2130 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { |
| 2100 | 2131 | ||
| 2101 | spin_lock(&busiest->lock); | 2132 | spin_lock(&busiest->lock); |
| 2133 | |||
| 2134 | /* don't kick the migration_thread, if the curr | ||
| 2135 | * task on busiest cpu can't be moved to this_cpu | ||
| 2136 | */ | ||
| 2137 | if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { | ||
| 2138 | spin_unlock(&busiest->lock); | ||
| 2139 | all_pinned = 1; | ||
| 2140 | goto out_one_pinned; | ||
| 2141 | } | ||
| 2142 | |||
| 2102 | if (!busiest->active_balance) { | 2143 | if (!busiest->active_balance) { |
| 2103 | busiest->active_balance = 1; | 2144 | busiest->active_balance = 1; |
| 2104 | busiest->push_cpu = this_cpu; | 2145 | busiest->push_cpu = this_cpu; |
| @@ -2131,19 +2172,23 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
| 2131 | sd->balance_interval *= 2; | 2172 | sd->balance_interval *= 2; |
| 2132 | } | 2173 | } |
| 2133 | 2174 | ||
| 2175 | if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER) | ||
| 2176 | return -1; | ||
| 2134 | return nr_moved; | 2177 | return nr_moved; |
| 2135 | 2178 | ||
| 2136 | out_balanced: | 2179 | out_balanced: |
| 2137 | spin_unlock(&this_rq->lock); | ||
| 2138 | |||
| 2139 | schedstat_inc(sd, lb_balanced[idle]); | 2180 | schedstat_inc(sd, lb_balanced[idle]); |
| 2140 | 2181 | ||
| 2141 | sd->nr_balance_failed = 0; | 2182 | sd->nr_balance_failed = 0; |
| 2183 | |||
| 2184 | out_one_pinned: | ||
| 2142 | /* tune up the balancing interval */ | 2185 | /* tune up the balancing interval */ |
| 2143 | if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) || | 2186 | if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) || |
| 2144 | (sd->balance_interval < sd->max_interval)) | 2187 | (sd->balance_interval < sd->max_interval)) |
| 2145 | sd->balance_interval *= 2; | 2188 | sd->balance_interval *= 2; |
| 2146 | 2189 | ||
| 2190 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) | ||
| 2191 | return -1; | ||
| 2147 | return 0; | 2192 | return 0; |
| 2148 | } | 2193 | } |
| 2149 | 2194 | ||
| @@ -2161,9 +2206,13 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
| 2161 | runqueue_t *busiest = NULL; | 2206 | runqueue_t *busiest = NULL; |
| 2162 | unsigned long imbalance; | 2207 | unsigned long imbalance; |
| 2163 | int nr_moved = 0; | 2208 | int nr_moved = 0; |
| 2209 | int sd_idle = 0; | ||
| 2210 | |||
| 2211 | if (sd->flags & SD_SHARE_CPUPOWER) | ||
| 2212 | sd_idle = 1; | ||
| 2164 | 2213 | ||
| 2165 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2214 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
| 2166 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE); | 2215 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle); |
| 2167 | if (!group) { | 2216 | if (!group) { |
| 2168 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); | 2217 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); |
| 2169 | goto out_balanced; | 2218 | goto out_balanced; |
| @@ -2177,22 +2226,30 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
| 2177 | 2226 | ||
| 2178 | BUG_ON(busiest == this_rq); | 2227 | BUG_ON(busiest == this_rq); |
| 2179 | 2228 | ||
| 2180 | /* Attempt to move tasks */ | ||
| 2181 | double_lock_balance(this_rq, busiest); | ||
| 2182 | |||
| 2183 | schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance); | 2229 | schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance); |
| 2184 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2230 | |
| 2231 | nr_moved = 0; | ||
| 2232 | if (busiest->nr_running > 1) { | ||
| 2233 | /* Attempt to move tasks */ | ||
| 2234 | double_lock_balance(this_rq, busiest); | ||
| 2235 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | ||
| 2185 | imbalance, sd, NEWLY_IDLE, NULL); | 2236 | imbalance, sd, NEWLY_IDLE, NULL); |
| 2186 | if (!nr_moved) | 2237 | spin_unlock(&busiest->lock); |
| 2238 | } | ||
| 2239 | |||
| 2240 | if (!nr_moved) { | ||
| 2187 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); | 2241 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); |
| 2188 | else | 2242 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) |
| 2243 | return -1; | ||
| 2244 | } else | ||
| 2189 | sd->nr_balance_failed = 0; | 2245 | sd->nr_balance_failed = 0; |
| 2190 | 2246 | ||
| 2191 | spin_unlock(&busiest->lock); | ||
| 2192 | return nr_moved; | 2247 | return nr_moved; |
| 2193 | 2248 | ||
| 2194 | out_balanced: | 2249 | out_balanced: |
| 2195 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); | 2250 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); |
| 2251 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) | ||
| 2252 | return -1; | ||
| 2196 | sd->nr_balance_failed = 0; | 2253 | sd->nr_balance_failed = 0; |
| 2197 | return 0; | 2254 | return 0; |
| 2198 | } | 2255 | } |
| @@ -2317,7 +2374,11 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, | |||
| 2317 | 2374 | ||
| 2318 | if (j - sd->last_balance >= interval) { | 2375 | if (j - sd->last_balance >= interval) { |
| 2319 | if (load_balance(this_cpu, this_rq, sd, idle)) { | 2376 | if (load_balance(this_cpu, this_rq, sd, idle)) { |
| 2320 | /* We've pulled tasks over so no longer idle */ | 2377 | /* |
| 2378 | * We've pulled tasks over so either we're no | ||
| 2379 | * longer idle, or one of our SMT siblings is | ||
| 2380 | * not idle. | ||
| 2381 | */ | ||
| 2321 | idle = NOT_IDLE; | 2382 | idle = NOT_IDLE; |
| 2322 | } | 2383 | } |
| 2323 | sd->last_balance += interval; | 2384 | sd->last_balance += interval; |
| @@ -2576,6 +2637,13 @@ out: | |||
| 2576 | } | 2637 | } |
| 2577 | 2638 | ||
| 2578 | #ifdef CONFIG_SCHED_SMT | 2639 | #ifdef CONFIG_SCHED_SMT |
| 2640 | static inline void wakeup_busy_runqueue(runqueue_t *rq) | ||
| 2641 | { | ||
| 2642 | /* If an SMT runqueue is sleeping due to priority reasons wake it up */ | ||
| 2643 | if (rq->curr == rq->idle && rq->nr_running) | ||
| 2644 | resched_task(rq->idle); | ||
| 2645 | } | ||
| 2646 | |||
| 2579 | static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) | 2647 | static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) |
| 2580 | { | 2648 | { |
| 2581 | struct sched_domain *tmp, *sd = NULL; | 2649 | struct sched_domain *tmp, *sd = NULL; |
| @@ -2609,12 +2677,7 @@ static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) | |||
| 2609 | for_each_cpu_mask(i, sibling_map) { | 2677 | for_each_cpu_mask(i, sibling_map) { |
| 2610 | runqueue_t *smt_rq = cpu_rq(i); | 2678 | runqueue_t *smt_rq = cpu_rq(i); |
| 2611 | 2679 | ||
| 2612 | /* | 2680 | wakeup_busy_runqueue(smt_rq); |
| 2613 | * If an SMT sibling task is sleeping due to priority | ||
| 2614 | * reasons wake it up now. | ||
| 2615 | */ | ||
| 2616 | if (smt_rq->curr == smt_rq->idle && smt_rq->nr_running) | ||
| 2617 | resched_task(smt_rq->idle); | ||
| 2618 | } | 2681 | } |
| 2619 | 2682 | ||
| 2620 | for_each_cpu_mask(i, sibling_map) | 2683 | for_each_cpu_mask(i, sibling_map) |
| @@ -2625,6 +2688,16 @@ static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) | |||
| 2625 | */ | 2688 | */ |
| 2626 | } | 2689 | } |
| 2627 | 2690 | ||
| 2691 | /* | ||
| 2692 | * number of 'lost' timeslices this task wont be able to fully | ||
| 2693 | * utilize, if another task runs on a sibling. This models the | ||
| 2694 | * slowdown effect of other tasks running on siblings: | ||
| 2695 | */ | ||
| 2696 | static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) | ||
| 2697 | { | ||
| 2698 | return p->time_slice * (100 - sd->per_cpu_gain) / 100; | ||
| 2699 | } | ||
| 2700 | |||
| 2628 | static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) | 2701 | static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) |
| 2629 | { | 2702 | { |
| 2630 | struct sched_domain *tmp, *sd = NULL; | 2703 | struct sched_domain *tmp, *sd = NULL; |
| @@ -2668,6 +2741,10 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) | |||
| 2668 | runqueue_t *smt_rq = cpu_rq(i); | 2741 | runqueue_t *smt_rq = cpu_rq(i); |
| 2669 | task_t *smt_curr = smt_rq->curr; | 2742 | task_t *smt_curr = smt_rq->curr; |
| 2670 | 2743 | ||
| 2744 | /* Kernel threads do not participate in dependent sleeping */ | ||
| 2745 | if (!p->mm || !smt_curr->mm || rt_task(p)) | ||
| 2746 | goto check_smt_task; | ||
| 2747 | |||
| 2671 | /* | 2748 | /* |
| 2672 | * If a user task with lower static priority than the | 2749 | * If a user task with lower static priority than the |
| 2673 | * running task on the SMT sibling is trying to schedule, | 2750 | * running task on the SMT sibling is trying to schedule, |
| @@ -2676,21 +2753,45 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) | |||
| 2676 | * task from using an unfair proportion of the | 2753 | * task from using an unfair proportion of the |
| 2677 | * physical cpu's resources. -ck | 2754 | * physical cpu's resources. -ck |
| 2678 | */ | 2755 | */ |
| 2679 | if (((smt_curr->time_slice * (100 - sd->per_cpu_gain) / 100) > | 2756 | if (rt_task(smt_curr)) { |
| 2680 | task_timeslice(p) || rt_task(smt_curr)) && | 2757 | /* |
| 2681 | p->mm && smt_curr->mm && !rt_task(p)) | 2758 | * With real time tasks we run non-rt tasks only |
| 2682 | ret = 1; | 2759 | * per_cpu_gain% of the time. |
| 2760 | */ | ||
| 2761 | if ((jiffies % DEF_TIMESLICE) > | ||
| 2762 | (sd->per_cpu_gain * DEF_TIMESLICE / 100)) | ||
| 2763 | ret = 1; | ||
| 2764 | } else | ||
| 2765 | if (smt_curr->static_prio < p->static_prio && | ||
| 2766 | !TASK_PREEMPTS_CURR(p, smt_rq) && | ||
| 2767 | smt_slice(smt_curr, sd) > task_timeslice(p)) | ||
| 2768 | ret = 1; | ||
| 2769 | |||
| 2770 | check_smt_task: | ||
| 2771 | if ((!smt_curr->mm && smt_curr != smt_rq->idle) || | ||
| 2772 | rt_task(smt_curr)) | ||
| 2773 | continue; | ||
| 2774 | if (!p->mm) { | ||
| 2775 | wakeup_busy_runqueue(smt_rq); | ||
| 2776 | continue; | ||
| 2777 | } | ||
| 2683 | 2778 | ||
| 2684 | /* | 2779 | /* |
| 2685 | * Reschedule a lower priority task on the SMT sibling, | 2780 | * Reschedule a lower priority task on the SMT sibling for |
| 2686 | * or wake it up if it has been put to sleep for priority | 2781 | * it to be put to sleep, or wake it up if it has been put to |
| 2687 | * reasons. | 2782 | * sleep for priority reasons to see if it should run now. |
| 2688 | */ | 2783 | */ |
| 2689 | if ((((p->time_slice * (100 - sd->per_cpu_gain) / 100) > | 2784 | if (rt_task(p)) { |
| 2690 | task_timeslice(smt_curr) || rt_task(p)) && | 2785 | if ((jiffies % DEF_TIMESLICE) > |
| 2691 | smt_curr->mm && p->mm && !rt_task(smt_curr)) || | 2786 | (sd->per_cpu_gain * DEF_TIMESLICE / 100)) |
| 2692 | (smt_curr == smt_rq->idle && smt_rq->nr_running)) | 2787 | resched_task(smt_curr); |
| 2693 | resched_task(smt_curr); | 2788 | } else { |
| 2789 | if (TASK_PREEMPTS_CURR(p, smt_rq) && | ||
| 2790 | smt_slice(p, sd) > task_timeslice(smt_curr)) | ||
| 2791 | resched_task(smt_curr); | ||
| 2792 | else | ||
| 2793 | wakeup_busy_runqueue(smt_rq); | ||
| 2794 | } | ||
| 2694 | } | 2795 | } |
| 2695 | out_unlock: | 2796 | out_unlock: |
| 2696 | for_each_cpu_mask(i, sibling_map) | 2797 | for_each_cpu_mask(i, sibling_map) |
| @@ -2888,6 +2989,7 @@ switch_tasks: | |||
| 2888 | if (next == rq->idle) | 2989 | if (next == rq->idle) |
| 2889 | schedstat_inc(rq, sched_goidle); | 2990 | schedstat_inc(rq, sched_goidle); |
| 2890 | prefetch(next); | 2991 | prefetch(next); |
| 2992 | prefetch_stack(next); | ||
| 2891 | clear_tsk_need_resched(prev); | 2993 | clear_tsk_need_resched(prev); |
| 2892 | rcu_qsctr_inc(task_cpu(prev)); | 2994 | rcu_qsctr_inc(task_cpu(prev)); |
| 2893 | 2995 | ||
| @@ -3015,7 +3117,8 @@ need_resched: | |||
| 3015 | 3117 | ||
| 3016 | #endif /* CONFIG_PREEMPT */ | 3118 | #endif /* CONFIG_PREEMPT */ |
| 3017 | 3119 | ||
| 3018 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key) | 3120 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, |
| 3121 | void *key) | ||
| 3019 | { | 3122 | { |
| 3020 | task_t *p = curr->private; | 3123 | task_t *p = curr->private; |
| 3021 | return try_to_wake_up(p, mode, sync); | 3124 | return try_to_wake_up(p, mode, sync); |
| @@ -3057,7 +3160,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | |||
| 3057 | * @key: is directly passed to the wakeup function | 3160 | * @key: is directly passed to the wakeup function |
| 3058 | */ | 3161 | */ |
| 3059 | void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, | 3162 | void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, |
| 3060 | int nr_exclusive, void *key) | 3163 | int nr_exclusive, void *key) |
| 3061 | { | 3164 | { |
| 3062 | unsigned long flags; | 3165 | unsigned long flags; |
| 3063 | 3166 | ||
| @@ -3089,7 +3192,8 @@ void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode) | |||
| 3089 | * | 3192 | * |
| 3090 | * On UP it can prevent extra preemption. | 3193 | * On UP it can prevent extra preemption. |
| 3091 | */ | 3194 | */ |
| 3092 | void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) | 3195 | void fastcall |
| 3196 | __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) | ||
| 3093 | { | 3197 | { |
| 3094 | unsigned long flags; | 3198 | unsigned long flags; |
| 3095 | int sync = 1; | 3199 | int sync = 1; |
| @@ -3280,7 +3384,8 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q) | |||
| 3280 | 3384 | ||
| 3281 | EXPORT_SYMBOL(interruptible_sleep_on); | 3385 | EXPORT_SYMBOL(interruptible_sleep_on); |
| 3282 | 3386 | ||
| 3283 | long fastcall __sched interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) | 3387 | long fastcall __sched |
| 3388 | interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) | ||
| 3284 | { | 3389 | { |
| 3285 | SLEEP_ON_VAR | 3390 | SLEEP_ON_VAR |
| 3286 | 3391 | ||
| @@ -3499,7 +3604,8 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) | |||
| 3499 | * @policy: new policy. | 3604 | * @policy: new policy. |
| 3500 | * @param: structure containing the new RT priority. | 3605 | * @param: structure containing the new RT priority. |
| 3501 | */ | 3606 | */ |
| 3502 | int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param) | 3607 | int sched_setscheduler(struct task_struct *p, int policy, |
| 3608 | struct sched_param *param) | ||
| 3503 | { | 3609 | { |
| 3504 | int retval; | 3610 | int retval; |
| 3505 | int oldprio, oldpolicy = -1; | 3611 | int oldprio, oldpolicy = -1; |
| @@ -3519,7 +3625,7 @@ recheck: | |||
| 3519 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. | 3625 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. |
| 3520 | */ | 3626 | */ |
| 3521 | if (param->sched_priority < 0 || | 3627 | if (param->sched_priority < 0 || |
| 3522 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || | 3628 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || |
| 3523 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) | 3629 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) |
| 3524 | return -EINVAL; | 3630 | return -EINVAL; |
| 3525 | if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) | 3631 | if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) |
| @@ -3582,7 +3688,8 @@ recheck: | |||
| 3582 | } | 3688 | } |
| 3583 | EXPORT_SYMBOL_GPL(sched_setscheduler); | 3689 | EXPORT_SYMBOL_GPL(sched_setscheduler); |
| 3584 | 3690 | ||
| 3585 | static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | 3691 | static int |
| 3692 | do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | ||
| 3586 | { | 3693 | { |
| 3587 | int retval; | 3694 | int retval; |
| 3588 | struct sched_param lparam; | 3695 | struct sched_param lparam; |
| @@ -3849,7 +3956,7 @@ asmlinkage long sys_sched_yield(void) | |||
| 3849 | if (rt_task(current)) | 3956 | if (rt_task(current)) |
| 3850 | target = rq->active; | 3957 | target = rq->active; |
| 3851 | 3958 | ||
| 3852 | if (current->array->nr_active == 1) { | 3959 | if (array->nr_active == 1) { |
| 3853 | schedstat_inc(rq, yld_act_empty); | 3960 | schedstat_inc(rq, yld_act_empty); |
| 3854 | if (!rq->expired->nr_active) | 3961 | if (!rq->expired->nr_active) |
| 3855 | schedstat_inc(rq, yld_both_empty); | 3962 | schedstat_inc(rq, yld_both_empty); |
| @@ -3913,7 +4020,7 @@ EXPORT_SYMBOL(cond_resched); | |||
| 3913 | * operations here to prevent schedule() from being called twice (once via | 4020 | * operations here to prevent schedule() from being called twice (once via |
| 3914 | * spin_unlock(), once by hand). | 4021 | * spin_unlock(), once by hand). |
| 3915 | */ | 4022 | */ |
| 3916 | int cond_resched_lock(spinlock_t * lock) | 4023 | int cond_resched_lock(spinlock_t *lock) |
| 3917 | { | 4024 | { |
| 3918 | int ret = 0; | 4025 | int ret = 0; |
| 3919 | 4026 | ||
| @@ -4096,7 +4203,7 @@ static inline struct task_struct *younger_sibling(struct task_struct *p) | |||
| 4096 | return list_entry(p->sibling.next,struct task_struct,sibling); | 4203 | return list_entry(p->sibling.next,struct task_struct,sibling); |
| 4097 | } | 4204 | } |
| 4098 | 4205 | ||
| 4099 | static void show_task(task_t * p) | 4206 | static void show_task(task_t *p) |
| 4100 | { | 4207 | { |
| 4101 | task_t *relative; | 4208 | task_t *relative; |
| 4102 | unsigned state; | 4209 | unsigned state; |
| @@ -4122,7 +4229,7 @@ static void show_task(task_t * p) | |||
| 4122 | #endif | 4229 | #endif |
| 4123 | #ifdef CONFIG_DEBUG_STACK_USAGE | 4230 | #ifdef CONFIG_DEBUG_STACK_USAGE |
| 4124 | { | 4231 | { |
| 4125 | unsigned long * n = (unsigned long *) (p->thread_info+1); | 4232 | unsigned long *n = (unsigned long *) (p->thread_info+1); |
| 4126 | while (!*n) | 4233 | while (!*n) |
| 4127 | n++; | 4234 | n++; |
| 4128 | free = (unsigned long) n - (unsigned long)(p->thread_info+1); | 4235 | free = (unsigned long) n - (unsigned long)(p->thread_info+1); |
| @@ -4331,7 +4438,7 @@ out: | |||
| 4331 | * thread migration by bumping thread off CPU then 'pushing' onto | 4438 | * thread migration by bumping thread off CPU then 'pushing' onto |
| 4332 | * another runqueue. | 4439 | * another runqueue. |
| 4333 | */ | 4440 | */ |
| 4334 | static int migration_thread(void * data) | 4441 | static int migration_thread(void *data) |
| 4335 | { | 4442 | { |
| 4336 | runqueue_t *rq; | 4443 | runqueue_t *rq; |
| 4337 | int cpu = (long)data; | 4444 | int cpu = (long)data; |
diff --git a/kernel/signal.c b/kernel/signal.c index 4980a073237f..b92c3c9f8b9a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -2221,8 +2221,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese, | |||
| 2221 | recalc_sigpending(); | 2221 | recalc_sigpending(); |
| 2222 | spin_unlock_irq(¤t->sighand->siglock); | 2222 | spin_unlock_irq(¤t->sighand->siglock); |
| 2223 | 2223 | ||
| 2224 | current->state = TASK_INTERRUPTIBLE; | 2224 | timeout = schedule_timeout_interruptible(timeout); |
| 2225 | timeout = schedule_timeout(timeout); | ||
| 2226 | 2225 | ||
| 2227 | try_to_freeze(); | 2226 | try_to_freeze(); |
| 2228 | spin_lock_irq(¤t->sighand->siglock); | 2227 | spin_lock_irq(¤t->sighand->siglock); |
diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 0c3f9d8bbe17..0375fcd5921d 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c | |||
| @@ -3,7 +3,10 @@ | |||
| 3 | * | 3 | * |
| 4 | * Author: Zwane Mwaikambo <zwane@fsmlabs.com> | 4 | * Author: Zwane Mwaikambo <zwane@fsmlabs.com> |
| 5 | * | 5 | * |
| 6 | * Copyright (2004) Ingo Molnar | 6 | * Copyright (2004, 2005) Ingo Molnar |
| 7 | * | ||
| 8 | * This file contains the spinlock/rwlock implementations for the | ||
| 9 | * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them) | ||
| 7 | */ | 10 | */ |
| 8 | 11 | ||
| 9 | #include <linux/config.h> | 12 | #include <linux/config.h> |
| @@ -17,12 +20,12 @@ | |||
| 17 | * Generic declaration of the raw read_trylock() function, | 20 | * Generic declaration of the raw read_trylock() function, |
| 18 | * architectures are supposed to optimize this: | 21 | * architectures are supposed to optimize this: |
| 19 | */ | 22 | */ |
| 20 | int __lockfunc generic_raw_read_trylock(rwlock_t *lock) | 23 | int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock) |
| 21 | { | 24 | { |
| 22 | _raw_read_lock(lock); | 25 | __raw_read_lock(lock); |
| 23 | return 1; | 26 | return 1; |
| 24 | } | 27 | } |
| 25 | EXPORT_SYMBOL(generic_raw_read_trylock); | 28 | EXPORT_SYMBOL(generic__raw_read_trylock); |
| 26 | 29 | ||
| 27 | int __lockfunc _spin_trylock(spinlock_t *lock) | 30 | int __lockfunc _spin_trylock(spinlock_t *lock) |
| 28 | { | 31 | { |
| @@ -57,7 +60,7 @@ int __lockfunc _write_trylock(rwlock_t *lock) | |||
| 57 | } | 60 | } |
| 58 | EXPORT_SYMBOL(_write_trylock); | 61 | EXPORT_SYMBOL(_write_trylock); |
| 59 | 62 | ||
| 60 | #ifndef CONFIG_PREEMPT | 63 | #if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) |
| 61 | 64 | ||
| 62 | void __lockfunc _read_lock(rwlock_t *lock) | 65 | void __lockfunc _read_lock(rwlock_t *lock) |
| 63 | { | 66 | { |
| @@ -72,7 +75,7 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) | |||
| 72 | 75 | ||
| 73 | local_irq_save(flags); | 76 | local_irq_save(flags); |
| 74 | preempt_disable(); | 77 | preempt_disable(); |
| 75 | _raw_spin_lock_flags(lock, flags); | 78 | _raw_spin_lock_flags(lock, &flags); |
| 76 | return flags; | 79 | return flags; |
| 77 | } | 80 | } |
| 78 | EXPORT_SYMBOL(_spin_lock_irqsave); | 81 | EXPORT_SYMBOL(_spin_lock_irqsave); |
diff --git a/kernel/timer.c b/kernel/timer.c index 13e2b513be01..f4152fcd9f8e 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -1154,6 +1154,20 @@ fastcall signed long __sched schedule_timeout(signed long timeout) | |||
| 1154 | 1154 | ||
| 1155 | EXPORT_SYMBOL(schedule_timeout); | 1155 | EXPORT_SYMBOL(schedule_timeout); |
| 1156 | 1156 | ||
| 1157 | signed long __sched schedule_timeout_interruptible(signed long timeout) | ||
| 1158 | { | ||
| 1159 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1160 | return schedule_timeout(timeout); | ||
| 1161 | } | ||
| 1162 | EXPORT_SYMBOL(schedule_timeout_interruptible); | ||
| 1163 | |||
| 1164 | signed long __sched schedule_timeout_uninterruptible(signed long timeout) | ||
| 1165 | { | ||
| 1166 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 1167 | return schedule_timeout(timeout); | ||
| 1168 | } | ||
| 1169 | EXPORT_SYMBOL(schedule_timeout_uninterruptible); | ||
| 1170 | |||
| 1157 | /* Thread ID - the internal kernel "pid" */ | 1171 | /* Thread ID - the internal kernel "pid" */ |
| 1158 | asmlinkage long sys_gettid(void) | 1172 | asmlinkage long sys_gettid(void) |
| 1159 | { | 1173 | { |
| @@ -1170,8 +1184,7 @@ static long __sched nanosleep_restart(struct restart_block *restart) | |||
| 1170 | if (!time_after(expire, now)) | 1184 | if (!time_after(expire, now)) |
| 1171 | return 0; | 1185 | return 0; |
| 1172 | 1186 | ||
| 1173 | current->state = TASK_INTERRUPTIBLE; | 1187 | expire = schedule_timeout_interruptible(expire - now); |
| 1174 | expire = schedule_timeout(expire - now); | ||
| 1175 | 1188 | ||
| 1176 | ret = 0; | 1189 | ret = 0; |
| 1177 | if (expire) { | 1190 | if (expire) { |
| @@ -1199,8 +1212,7 @@ asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __us | |||
| 1199 | return -EINVAL; | 1212 | return -EINVAL; |
| 1200 | 1213 | ||
| 1201 | expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); | 1214 | expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); |
| 1202 | current->state = TASK_INTERRUPTIBLE; | 1215 | expire = schedule_timeout_interruptible(expire); |
| 1203 | expire = schedule_timeout(expire); | ||
| 1204 | 1216 | ||
| 1205 | ret = 0; | 1217 | ret = 0; |
| 1206 | if (expire) { | 1218 | if (expire) { |
| @@ -1598,10 +1610,8 @@ void msleep(unsigned int msecs) | |||
| 1598 | { | 1610 | { |
| 1599 | unsigned long timeout = msecs_to_jiffies(msecs) + 1; | 1611 | unsigned long timeout = msecs_to_jiffies(msecs) + 1; |
| 1600 | 1612 | ||
| 1601 | while (timeout) { | 1613 | while (timeout) |
| 1602 | set_current_state(TASK_UNINTERRUPTIBLE); | 1614 | timeout = schedule_timeout_uninterruptible(timeout); |
| 1603 | timeout = schedule_timeout(timeout); | ||
| 1604 | } | ||
| 1605 | } | 1615 | } |
| 1606 | 1616 | ||
| 1607 | EXPORT_SYMBOL(msleep); | 1617 | EXPORT_SYMBOL(msleep); |
| @@ -1614,10 +1624,8 @@ unsigned long msleep_interruptible(unsigned int msecs) | |||
| 1614 | { | 1624 | { |
| 1615 | unsigned long timeout = msecs_to_jiffies(msecs) + 1; | 1625 | unsigned long timeout = msecs_to_jiffies(msecs) + 1; |
| 1616 | 1626 | ||
| 1617 | while (timeout && !signal_pending(current)) { | 1627 | while (timeout && !signal_pending(current)) |
| 1618 | set_current_state(TASK_INTERRUPTIBLE); | 1628 | timeout = schedule_timeout_interruptible(timeout); |
| 1619 | timeout = schedule_timeout(timeout); | ||
| 1620 | } | ||
| 1621 | return jiffies_to_msecs(timeout); | 1629 | return jiffies_to_msecs(timeout); |
| 1622 | } | 1630 | } |
| 1623 | 1631 | ||
