diff options
Diffstat (limited to 'kernel')
33 files changed, 1381 insertions, 989 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index e4c0e1fee9b0..385b88461c29 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -216,7 +216,6 @@ static int acct_on(char *name) | |||
216 | { | 216 | { |
217 | struct file *file; | 217 | struct file *file; |
218 | struct vfsmount *mnt; | 218 | struct vfsmount *mnt; |
219 | int error; | ||
220 | struct pid_namespace *ns; | 219 | struct pid_namespace *ns; |
221 | struct bsd_acct_struct *acct = NULL; | 220 | struct bsd_acct_struct *acct = NULL; |
222 | 221 | ||
@@ -244,13 +243,6 @@ static int acct_on(char *name) | |||
244 | } | 243 | } |
245 | } | 244 | } |
246 | 245 | ||
247 | error = security_acct(file); | ||
248 | if (error) { | ||
249 | kfree(acct); | ||
250 | filp_close(file, NULL); | ||
251 | return error; | ||
252 | } | ||
253 | |||
254 | spin_lock(&acct_lock); | 246 | spin_lock(&acct_lock); |
255 | if (ns->bacct == NULL) { | 247 | if (ns->bacct == NULL) { |
256 | ns->bacct = acct; | 248 | ns->bacct = acct; |
@@ -281,7 +273,7 @@ static int acct_on(char *name) | |||
281 | */ | 273 | */ |
282 | SYSCALL_DEFINE1(acct, const char __user *, name) | 274 | SYSCALL_DEFINE1(acct, const char __user *, name) |
283 | { | 275 | { |
284 | int error; | 276 | int error = 0; |
285 | 277 | ||
286 | if (!capable(CAP_SYS_PACCT)) | 278 | if (!capable(CAP_SYS_PACCT)) |
287 | return -EPERM; | 279 | return -EPERM; |
@@ -299,13 +291,11 @@ SYSCALL_DEFINE1(acct, const char __user *, name) | |||
299 | if (acct == NULL) | 291 | if (acct == NULL) |
300 | return 0; | 292 | return 0; |
301 | 293 | ||
302 | error = security_acct(NULL); | 294 | spin_lock(&acct_lock); |
303 | if (!error) { | 295 | acct_file_reopen(acct, NULL, NULL); |
304 | spin_lock(&acct_lock); | 296 | spin_unlock(&acct_lock); |
305 | acct_file_reopen(acct, NULL, NULL); | ||
306 | spin_unlock(&acct_lock); | ||
307 | } | ||
308 | } | 297 | } |
298 | |||
309 | return error; | 299 | return error; |
310 | } | 300 | } |
311 | 301 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e9ec642932ee..291775021b2e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -3615,7 +3615,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
3615 | * @ss: the subsystem to load | 3615 | * @ss: the subsystem to load |
3616 | * | 3616 | * |
3617 | * This function should be called in a modular subsystem's initcall. If the | 3617 | * This function should be called in a modular subsystem's initcall. If the |
3618 | * subsytem is built as a module, it will be assigned a new subsys_id and set | 3618 | * subsystem is built as a module, it will be assigned a new subsys_id and set |
3619 | * up for use. If the subsystem is built-in anyway, work is delegated to the | 3619 | * up for use. If the subsystem is built-in anyway, work is delegated to the |
3620 | * simpler cgroup_init_subsys. | 3620 | * simpler cgroup_init_subsys. |
3621 | */ | 3621 | */ |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e5c0244962b0..ce71ed53e88f 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -89,10 +89,10 @@ struct cgroup_subsys freezer_subsys; | |||
89 | 89 | ||
90 | /* Locks taken and their ordering | 90 | /* Locks taken and their ordering |
91 | * ------------------------------ | 91 | * ------------------------------ |
92 | * css_set_lock | ||
93 | * cgroup_mutex (AKA cgroup_lock) | 92 | * cgroup_mutex (AKA cgroup_lock) |
94 | * task->alloc_lock (AKA task_lock) | ||
95 | * freezer->lock | 93 | * freezer->lock |
94 | * css_set_lock | ||
95 | * task->alloc_lock (AKA task_lock) | ||
96 | * task->sighand->siglock | 96 | * task->sighand->siglock |
97 | * | 97 | * |
98 | * cgroup code forces css_set_lock to be taken before task->alloc_lock | 98 | * cgroup code forces css_set_lock to be taken before task->alloc_lock |
@@ -100,33 +100,38 @@ struct cgroup_subsys freezer_subsys; | |||
100 | * freezer_create(), freezer_destroy(): | 100 | * freezer_create(), freezer_destroy(): |
101 | * cgroup_mutex [ by cgroup core ] | 101 | * cgroup_mutex [ by cgroup core ] |
102 | * | 102 | * |
103 | * can_attach(): | 103 | * freezer_can_attach(): |
104 | * cgroup_mutex | 104 | * cgroup_mutex (held by caller of can_attach) |
105 | * | 105 | * |
106 | * cgroup_frozen(): | 106 | * cgroup_freezing_or_frozen(): |
107 | * task->alloc_lock (to get task's cgroup) | 107 | * task->alloc_lock (to get task's cgroup) |
108 | * | 108 | * |
109 | * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): | 109 | * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): |
110 | * task->alloc_lock (to get task's cgroup) | ||
111 | * freezer->lock | 110 | * freezer->lock |
112 | * sighand->siglock (if the cgroup is freezing) | 111 | * sighand->siglock (if the cgroup is freezing) |
113 | * | 112 | * |
114 | * freezer_read(): | 113 | * freezer_read(): |
115 | * cgroup_mutex | 114 | * cgroup_mutex |
116 | * freezer->lock | 115 | * freezer->lock |
116 | * write_lock css_set_lock (cgroup iterator start) | ||
117 | * task->alloc_lock | ||
117 | * read_lock css_set_lock (cgroup iterator start) | 118 | * read_lock css_set_lock (cgroup iterator start) |
118 | * | 119 | * |
119 | * freezer_write() (freeze): | 120 | * freezer_write() (freeze): |
120 | * cgroup_mutex | 121 | * cgroup_mutex |
121 | * freezer->lock | 122 | * freezer->lock |
123 | * write_lock css_set_lock (cgroup iterator start) | ||
124 | * task->alloc_lock | ||
122 | * read_lock css_set_lock (cgroup iterator start) | 125 | * read_lock css_set_lock (cgroup iterator start) |
123 | * sighand->siglock | 126 | * sighand->siglock (fake signal delivery inside freeze_task()) |
124 | * | 127 | * |
125 | * freezer_write() (unfreeze): | 128 | * freezer_write() (unfreeze): |
126 | * cgroup_mutex | 129 | * cgroup_mutex |
127 | * freezer->lock | 130 | * freezer->lock |
131 | * write_lock css_set_lock (cgroup iterator start) | ||
132 | * task->alloc_lock | ||
128 | * read_lock css_set_lock (cgroup iterator start) | 133 | * read_lock css_set_lock (cgroup iterator start) |
129 | * task->alloc_lock (to prevent races with freeze_task()) | 134 | * task->alloc_lock (inside thaw_process(), prevents race with refrigerator()) |
130 | * sighand->siglock | 135 | * sighand->siglock |
131 | */ | 136 | */ |
132 | static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, | 137 | static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, |
diff --git a/kernel/compat.c b/kernel/compat.c index 7f40e9275fd9..5adab05a3172 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -495,29 +495,26 @@ asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len, | |||
495 | { | 495 | { |
496 | int ret; | 496 | int ret; |
497 | cpumask_var_t mask; | 497 | cpumask_var_t mask; |
498 | unsigned long *k; | ||
499 | unsigned int min_length = cpumask_size(); | ||
500 | |||
501 | if (nr_cpu_ids <= BITS_PER_COMPAT_LONG) | ||
502 | min_length = sizeof(compat_ulong_t); | ||
503 | 498 | ||
504 | if (len < min_length) | 499 | if ((len * BITS_PER_BYTE) < nr_cpu_ids) |
500 | return -EINVAL; | ||
501 | if (len & (sizeof(compat_ulong_t)-1)) | ||
505 | return -EINVAL; | 502 | return -EINVAL; |
506 | 503 | ||
507 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | 504 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) |
508 | return -ENOMEM; | 505 | return -ENOMEM; |
509 | 506 | ||
510 | ret = sched_getaffinity(pid, mask); | 507 | ret = sched_getaffinity(pid, mask); |
511 | if (ret < 0) | 508 | if (ret == 0) { |
512 | goto out; | 509 | size_t retlen = min_t(size_t, len, cpumask_size()); |
513 | 510 | ||
514 | k = cpumask_bits(mask); | 511 | if (compat_put_bitmap(user_mask_ptr, cpumask_bits(mask), retlen * 8)) |
515 | ret = compat_put_bitmap(user_mask_ptr, k, min_length * 8); | 512 | ret = -EFAULT; |
516 | if (ret == 0) | 513 | else |
517 | ret = min_length; | 514 | ret = retlen; |
518 | 515 | } | |
519 | out: | ||
520 | free_cpumask_var(mask); | 516 | free_cpumask_var(mask); |
517 | |||
521 | return ret; | 518 | return ret; |
522 | } | 519 | } |
523 | 520 | ||
diff --git a/kernel/cred.c b/kernel/cred.c index 8f3672a58a1e..2c24870c55d1 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -522,8 +522,6 @@ int commit_creds(struct cred *new) | |||
522 | #endif | 522 | #endif |
523 | BUG_ON(atomic_read(&new->usage) < 1); | 523 | BUG_ON(atomic_read(&new->usage) < 1); |
524 | 524 | ||
525 | security_commit_creds(new, old); | ||
526 | |||
527 | get_cred(new); /* we will require a ref for the subj creds too */ | 525 | get_cred(new); /* we will require a ref for the subj creds too */ |
528 | 526 | ||
529 | /* dumpability changes */ | 527 | /* dumpability changes */ |
diff --git a/kernel/groups.c b/kernel/groups.c index 2b45b2ee3964..53b1916c9492 100644 --- a/kernel/groups.c +++ b/kernel/groups.c | |||
@@ -164,12 +164,6 @@ int groups_search(const struct group_info *group_info, gid_t grp) | |||
164 | */ | 164 | */ |
165 | int set_groups(struct cred *new, struct group_info *group_info) | 165 | int set_groups(struct cred *new, struct group_info *group_info) |
166 | { | 166 | { |
167 | int retval; | ||
168 | |||
169 | retval = security_task_setgroups(group_info); | ||
170 | if (retval) | ||
171 | return retval; | ||
172 | |||
173 | put_group_info(new->group_info); | 167 | put_group_info(new->group_info); |
174 | groups_sort(group_info); | 168 | groups_sort(group_info); |
175 | get_group_info(group_info); | 169 | get_group_info(group_info); |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0086628b6e97..b9b134b35088 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -1749,35 +1749,15 @@ void __init hrtimers_init(void) | |||
1749 | } | 1749 | } |
1750 | 1750 | ||
1751 | /** | 1751 | /** |
1752 | * schedule_hrtimeout_range - sleep until timeout | 1752 | * schedule_hrtimeout_range_clock - sleep until timeout |
1753 | * @expires: timeout value (ktime_t) | 1753 | * @expires: timeout value (ktime_t) |
1754 | * @delta: slack in expires timeout (ktime_t) | 1754 | * @delta: slack in expires timeout (ktime_t) |
1755 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | 1755 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL |
1756 | * | 1756 | * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME |
1757 | * Make the current task sleep until the given expiry time has | ||
1758 | * elapsed. The routine will return immediately unless | ||
1759 | * the current task state has been set (see set_current_state()). | ||
1760 | * | ||
1761 | * The @delta argument gives the kernel the freedom to schedule the | ||
1762 | * actual wakeup to a time that is both power and performance friendly. | ||
1763 | * The kernel give the normal best effort behavior for "@expires+@delta", | ||
1764 | * but may decide to fire the timer earlier, but no earlier than @expires. | ||
1765 | * | ||
1766 | * You can set the task state as follows - | ||
1767 | * | ||
1768 | * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to | ||
1769 | * pass before the routine returns. | ||
1770 | * | ||
1771 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | ||
1772 | * delivered to the current task. | ||
1773 | * | ||
1774 | * The current task state is guaranteed to be TASK_RUNNING when this | ||
1775 | * routine returns. | ||
1776 | * | ||
1777 | * Returns 0 when the timer has expired otherwise -EINTR | ||
1778 | */ | 1757 | */ |
1779 | int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | 1758 | int __sched |
1780 | const enum hrtimer_mode mode) | 1759 | schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, |
1760 | const enum hrtimer_mode mode, int clock) | ||
1781 | { | 1761 | { |
1782 | struct hrtimer_sleeper t; | 1762 | struct hrtimer_sleeper t; |
1783 | 1763 | ||
@@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | |||
1799 | return -EINTR; | 1779 | return -EINTR; |
1800 | } | 1780 | } |
1801 | 1781 | ||
1802 | hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); | 1782 | hrtimer_init_on_stack(&t.timer, clock, mode); |
1803 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); | 1783 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); |
1804 | 1784 | ||
1805 | hrtimer_init_sleeper(&t, current); | 1785 | hrtimer_init_sleeper(&t, current); |
@@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | |||
1818 | 1798 | ||
1819 | return !t.task ? 0 : -EINTR; | 1799 | return !t.task ? 0 : -EINTR; |
1820 | } | 1800 | } |
1801 | |||
1802 | /** | ||
1803 | * schedule_hrtimeout_range - sleep until timeout | ||
1804 | * @expires: timeout value (ktime_t) | ||
1805 | * @delta: slack in expires timeout (ktime_t) | ||
1806 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | ||
1807 | * | ||
1808 | * Make the current task sleep until the given expiry time has | ||
1809 | * elapsed. The routine will return immediately unless | ||
1810 | * the current task state has been set (see set_current_state()). | ||
1811 | * | ||
1812 | * The @delta argument gives the kernel the freedom to schedule the | ||
1813 | * actual wakeup to a time that is both power and performance friendly. | ||
1814 | * The kernel give the normal best effort behavior for "@expires+@delta", | ||
1815 | * but may decide to fire the timer earlier, but no earlier than @expires. | ||
1816 | * | ||
1817 | * You can set the task state as follows - | ||
1818 | * | ||
1819 | * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to | ||
1820 | * pass before the routine returns. | ||
1821 | * | ||
1822 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | ||
1823 | * delivered to the current task. | ||
1824 | * | ||
1825 | * The current task state is guaranteed to be TASK_RUNNING when this | ||
1826 | * routine returns. | ||
1827 | * | ||
1828 | * Returns 0 when the timer has expired otherwise -EINTR | ||
1829 | */ | ||
1830 | int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | ||
1831 | const enum hrtimer_mode mode) | ||
1832 | { | ||
1833 | return schedule_hrtimeout_range_clock(expires, delta, mode, | ||
1834 | CLOCK_MONOTONIC); | ||
1835 | } | ||
1821 | EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); | 1836 | EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); |
1822 | 1837 | ||
1823 | /** | 1838 | /** |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 76d5a671bfe1..27e5c6911223 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -370,9 +370,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) | |||
370 | irqreturn_t ret, retval = IRQ_NONE; | 370 | irqreturn_t ret, retval = IRQ_NONE; |
371 | unsigned int status = 0; | 371 | unsigned int status = 0; |
372 | 372 | ||
373 | if (!(action->flags & IRQF_DISABLED)) | ||
374 | local_irq_enable_in_hardirq(); | ||
375 | |||
376 | do { | 373 | do { |
377 | trace_irq_handler_entry(irq, action); | 374 | trace_irq_handler_entry(irq, action); |
378 | ret = action->handler(irq, action->dev_id); | 375 | ret = action->handler(irq, action->dev_id); |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 704e488730a5..3164ba7ce151 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -138,6 +138,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) | |||
138 | return 0; | 138 | return 0; |
139 | } | 139 | } |
140 | 140 | ||
141 | int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) | ||
142 | { | ||
143 | struct irq_desc *desc = irq_to_desc(irq); | ||
144 | unsigned long flags; | ||
145 | |||
146 | if (!desc) | ||
147 | return -EINVAL; | ||
148 | |||
149 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
150 | desc->affinity_hint = m; | ||
151 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
152 | |||
153 | return 0; | ||
154 | } | ||
155 | EXPORT_SYMBOL_GPL(irq_set_affinity_hint); | ||
156 | |||
141 | #ifndef CONFIG_AUTO_IRQ_AFFINITY | 157 | #ifndef CONFIG_AUTO_IRQ_AFFINITY |
142 | /* | 158 | /* |
143 | * Generic version of the affinity autoselector. | 159 | * Generic version of the affinity autoselector. |
@@ -757,16 +773,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
757 | if (new->flags & IRQF_ONESHOT) | 773 | if (new->flags & IRQF_ONESHOT) |
758 | desc->status |= IRQ_ONESHOT; | 774 | desc->status |= IRQ_ONESHOT; |
759 | 775 | ||
760 | /* | ||
761 | * Force MSI interrupts to run with interrupts | ||
762 | * disabled. The multi vector cards can cause stack | ||
763 | * overflows due to nested interrupts when enough of | ||
764 | * them are directed to a core and fire at the same | ||
765 | * time. | ||
766 | */ | ||
767 | if (desc->msi_desc) | ||
768 | new->flags |= IRQF_DISABLED; | ||
769 | |||
770 | if (!(desc->status & IRQ_NOAUTOEN)) { | 776 | if (!(desc->status & IRQ_NOAUTOEN)) { |
771 | desc->depth = 0; | 777 | desc->depth = 0; |
772 | desc->status &= ~IRQ_DISABLED; | 778 | desc->status &= ~IRQ_DISABLED; |
@@ -916,6 +922,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
916 | desc->chip->disable(irq); | 922 | desc->chip->disable(irq); |
917 | } | 923 | } |
918 | 924 | ||
925 | #ifdef CONFIG_SMP | ||
926 | /* make sure affinity_hint is cleaned up */ | ||
927 | if (WARN_ON_ONCE(desc->affinity_hint)) | ||
928 | desc->affinity_hint = NULL; | ||
929 | #endif | ||
930 | |||
919 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 931 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
920 | 932 | ||
921 | unregister_handler_proc(irq, action); | 933 | unregister_handler_proc(irq, action); |
@@ -1027,7 +1039,6 @@ EXPORT_SYMBOL(free_irq); | |||
1027 | * Flags: | 1039 | * Flags: |
1028 | * | 1040 | * |
1029 | * IRQF_SHARED Interrupt is shared | 1041 | * IRQF_SHARED Interrupt is shared |
1030 | * IRQF_DISABLED Disable local interrupts while processing | ||
1031 | * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy | 1042 | * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy |
1032 | * IRQF_TRIGGER_* Specify active edge(s) or level | 1043 | * IRQF_TRIGGER_* Specify active edge(s) or level |
1033 | * | 1044 | * |
@@ -1041,25 +1052,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, | |||
1041 | int retval; | 1052 | int retval; |
1042 | 1053 | ||
1043 | /* | 1054 | /* |
1044 | * handle_IRQ_event() always ignores IRQF_DISABLED except for | ||
1045 | * the _first_ irqaction (sigh). That can cause oopsing, but | ||
1046 | * the behavior is classified as "will not fix" so we need to | ||
1047 | * start nudging drivers away from using that idiom. | ||
1048 | */ | ||
1049 | if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) == | ||
1050 | (IRQF_SHARED|IRQF_DISABLED)) { | ||
1051 | pr_warning( | ||
1052 | "IRQ %d/%s: IRQF_DISABLED is not guaranteed on shared IRQs\n", | ||
1053 | irq, devname); | ||
1054 | } | ||
1055 | |||
1056 | #ifdef CONFIG_LOCKDEP | ||
1057 | /* | ||
1058 | * Lockdep wants atomic interrupt handlers: | ||
1059 | */ | ||
1060 | irqflags |= IRQF_DISABLED; | ||
1061 | #endif | ||
1062 | /* | ||
1063 | * Sanity-check: shared interrupts must pass in a real dev-ID, | 1055 | * Sanity-check: shared interrupts must pass in a real dev-ID, |
1064 | * otherwise we'll have trouble later trying to figure out | 1056 | * otherwise we'll have trouble later trying to figure out |
1065 | * which interrupt is which (messes up the interrupt freeing | 1057 | * which interrupt is which (messes up the interrupt freeing |
@@ -1120,3 +1112,40 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, | |||
1120 | return retval; | 1112 | return retval; |
1121 | } | 1113 | } |
1122 | EXPORT_SYMBOL(request_threaded_irq); | 1114 | EXPORT_SYMBOL(request_threaded_irq); |
1115 | |||
1116 | /** | ||
1117 | * request_any_context_irq - allocate an interrupt line | ||
1118 | * @irq: Interrupt line to allocate | ||
1119 | * @handler: Function to be called when the IRQ occurs. | ||
1120 | * Threaded handler for threaded interrupts. | ||
1121 | * @flags: Interrupt type flags | ||
1122 | * @name: An ascii name for the claiming device | ||
1123 | * @dev_id: A cookie passed back to the handler function | ||
1124 | * | ||
1125 | * This call allocates interrupt resources and enables the | ||
1126 | * interrupt line and IRQ handling. It selects either a | ||
1127 | * hardirq or threaded handling method depending on the | ||
1128 | * context. | ||
1129 | * | ||
1130 | * On failure, it returns a negative value. On success, | ||
1131 | * it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED. | ||
1132 | */ | ||
1133 | int request_any_context_irq(unsigned int irq, irq_handler_t handler, | ||
1134 | unsigned long flags, const char *name, void *dev_id) | ||
1135 | { | ||
1136 | struct irq_desc *desc = irq_to_desc(irq); | ||
1137 | int ret; | ||
1138 | |||
1139 | if (!desc) | ||
1140 | return -EINVAL; | ||
1141 | |||
1142 | if (desc->status & IRQ_NESTED_THREAD) { | ||
1143 | ret = request_threaded_irq(irq, NULL, handler, | ||
1144 | flags, name, dev_id); | ||
1145 | return !ret ? IRQC_IS_NESTED : ret; | ||
1146 | } | ||
1147 | |||
1148 | ret = request_irq(irq, handler, flags, name, dev_id); | ||
1149 | return !ret ? IRQC_IS_HARDIRQ : ret; | ||
1150 | } | ||
1151 | EXPORT_SYMBOL_GPL(request_any_context_irq); | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 7a6eb04ef6b5..09a2ee540bd2 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -32,6 +32,27 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v) | |||
32 | return 0; | 32 | return 0; |
33 | } | 33 | } |
34 | 34 | ||
35 | static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) | ||
36 | { | ||
37 | struct irq_desc *desc = irq_to_desc((long)m->private); | ||
38 | unsigned long flags; | ||
39 | cpumask_var_t mask; | ||
40 | |||
41 | if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) | ||
42 | return -ENOMEM; | ||
43 | |||
44 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
45 | if (desc->affinity_hint) | ||
46 | cpumask_copy(mask, desc->affinity_hint); | ||
47 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
48 | |||
49 | seq_cpumask(m, mask); | ||
50 | seq_putc(m, '\n'); | ||
51 | free_cpumask_var(mask); | ||
52 | |||
53 | return 0; | ||
54 | } | ||
55 | |||
35 | #ifndef is_affinity_mask_valid | 56 | #ifndef is_affinity_mask_valid |
36 | #define is_affinity_mask_valid(val) 1 | 57 | #define is_affinity_mask_valid(val) 1 |
37 | #endif | 58 | #endif |
@@ -84,6 +105,11 @@ static int irq_affinity_proc_open(struct inode *inode, struct file *file) | |||
84 | return single_open(file, irq_affinity_proc_show, PDE(inode)->data); | 105 | return single_open(file, irq_affinity_proc_show, PDE(inode)->data); |
85 | } | 106 | } |
86 | 107 | ||
108 | static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) | ||
109 | { | ||
110 | return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); | ||
111 | } | ||
112 | |||
87 | static const struct file_operations irq_affinity_proc_fops = { | 113 | static const struct file_operations irq_affinity_proc_fops = { |
88 | .open = irq_affinity_proc_open, | 114 | .open = irq_affinity_proc_open, |
89 | .read = seq_read, | 115 | .read = seq_read, |
@@ -92,6 +118,13 @@ static const struct file_operations irq_affinity_proc_fops = { | |||
92 | .write = irq_affinity_proc_write, | 118 | .write = irq_affinity_proc_write, |
93 | }; | 119 | }; |
94 | 120 | ||
121 | static const struct file_operations irq_affinity_hint_proc_fops = { | ||
122 | .open = irq_affinity_hint_proc_open, | ||
123 | .read = seq_read, | ||
124 | .llseek = seq_lseek, | ||
125 | .release = single_release, | ||
126 | }; | ||
127 | |||
95 | static int default_affinity_show(struct seq_file *m, void *v) | 128 | static int default_affinity_show(struct seq_file *m, void *v) |
96 | { | 129 | { |
97 | seq_cpumask(m, irq_default_affinity); | 130 | seq_cpumask(m, irq_default_affinity); |
@@ -147,6 +180,26 @@ static const struct file_operations default_affinity_proc_fops = { | |||
147 | .release = single_release, | 180 | .release = single_release, |
148 | .write = default_affinity_write, | 181 | .write = default_affinity_write, |
149 | }; | 182 | }; |
183 | |||
184 | static int irq_node_proc_show(struct seq_file *m, void *v) | ||
185 | { | ||
186 | struct irq_desc *desc = irq_to_desc((long) m->private); | ||
187 | |||
188 | seq_printf(m, "%d\n", desc->node); | ||
189 | return 0; | ||
190 | } | ||
191 | |||
192 | static int irq_node_proc_open(struct inode *inode, struct file *file) | ||
193 | { | ||
194 | return single_open(file, irq_node_proc_show, PDE(inode)->data); | ||
195 | } | ||
196 | |||
197 | static const struct file_operations irq_node_proc_fops = { | ||
198 | .open = irq_node_proc_open, | ||
199 | .read = seq_read, | ||
200 | .llseek = seq_lseek, | ||
201 | .release = single_release, | ||
202 | }; | ||
150 | #endif | 203 | #endif |
151 | 204 | ||
152 | static int irq_spurious_proc_show(struct seq_file *m, void *v) | 205 | static int irq_spurious_proc_show(struct seq_file *m, void *v) |
@@ -231,6 +284,13 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) | |||
231 | /* create /proc/irq/<irq>/smp_affinity */ | 284 | /* create /proc/irq/<irq>/smp_affinity */ |
232 | proc_create_data("smp_affinity", 0600, desc->dir, | 285 | proc_create_data("smp_affinity", 0600, desc->dir, |
233 | &irq_affinity_proc_fops, (void *)(long)irq); | 286 | &irq_affinity_proc_fops, (void *)(long)irq); |
287 | |||
288 | /* create /proc/irq/<irq>/affinity_hint */ | ||
289 | proc_create_data("affinity_hint", 0400, desc->dir, | ||
290 | &irq_affinity_hint_proc_fops, (void *)(long)irq); | ||
291 | |||
292 | proc_create_data("node", 0444, desc->dir, | ||
293 | &irq_node_proc_fops, (void *)(long)irq); | ||
234 | #endif | 294 | #endif |
235 | 295 | ||
236 | proc_create_data("spurious", 0444, desc->dir, | 296 | proc_create_data("spurious", 0444, desc->dir, |
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 21fe3c426948..0b624e791805 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
@@ -138,7 +138,8 @@ extern const void __start_notes __attribute__((weak)); | |||
138 | extern const void __stop_notes __attribute__((weak)); | 138 | extern const void __stop_notes __attribute__((weak)); |
139 | #define notes_size (&__stop_notes - &__start_notes) | 139 | #define notes_size (&__stop_notes - &__start_notes) |
140 | 140 | ||
141 | static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr, | 141 | static ssize_t notes_read(struct file *filp, struct kobject *kobj, |
142 | struct bin_attribute *bin_attr, | ||
142 | char *buf, loff_t off, size_t count) | 143 | char *buf, loff_t off, size_t count) |
143 | { | 144 | { |
144 | memcpy(buf, &__start_notes + off, count); | 145 | memcpy(buf, &__start_notes + off, count); |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index ec21304856d1..54286798c37b 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -2711,6 +2711,8 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
2711 | } | 2711 | } |
2712 | EXPORT_SYMBOL_GPL(lockdep_init_map); | 2712 | EXPORT_SYMBOL_GPL(lockdep_init_map); |
2713 | 2713 | ||
2714 | struct lock_class_key __lockdep_no_validate__; | ||
2715 | |||
2714 | /* | 2716 | /* |
2715 | * This gets called for every mutex_lock*()/spin_lock*() operation. | 2717 | * This gets called for every mutex_lock*()/spin_lock*() operation. |
2716 | * We maintain the dependency maps and validate the locking attempt: | 2718 | * We maintain the dependency maps and validate the locking attempt: |
@@ -2745,6 +2747,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
2745 | return 0; | 2747 | return 0; |
2746 | } | 2748 | } |
2747 | 2749 | ||
2750 | if (lock->key == &__lockdep_no_validate__) | ||
2751 | check = 1; | ||
2752 | |||
2748 | if (!subclass) | 2753 | if (!subclass) |
2749 | class = lock->class_cache; | 2754 | class = lock->class_cache; |
2750 | /* | 2755 | /* |
diff --git a/kernel/module.c b/kernel/module.c index e2564580f3f1..5e14483768bb 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -1182,7 +1182,7 @@ struct module_notes_attrs { | |||
1182 | struct bin_attribute attrs[0]; | 1182 | struct bin_attribute attrs[0]; |
1183 | }; | 1183 | }; |
1184 | 1184 | ||
1185 | static ssize_t module_notes_read(struct kobject *kobj, | 1185 | static ssize_t module_notes_read(struct file *filp, struct kobject *kobj, |
1186 | struct bin_attribute *bin_attr, | 1186 | struct bin_attribute *bin_attr, |
1187 | char *buf, loff_t pos, size_t count) | 1187 | char *buf, loff_t pos, size_t count) |
1188 | { | 1188 | { |
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index 3db49b9ca374..f42d3f737a33 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * This module exposes the interface to kernel space for specifying | 2 | * This module exposes the interface to kernel space for specifying |
3 | * QoS dependencies. It provides infrastructure for registration of: | 3 | * QoS dependencies. It provides infrastructure for registration of: |
4 | * | 4 | * |
5 | * Dependents on a QoS value : register requirements | 5 | * Dependents on a QoS value : register requests |
6 | * Watchers of QoS value : get notified when target QoS value changes | 6 | * Watchers of QoS value : get notified when target QoS value changes |
7 | * | 7 | * |
8 | * This QoS design is best effort based. Dependents register their QoS needs. | 8 | * This QoS design is best effort based. Dependents register their QoS needs. |
@@ -14,19 +14,21 @@ | |||
14 | * timeout: usec <-- currently not used. | 14 | * timeout: usec <-- currently not used. |
15 | * throughput: kbs (kilo byte / sec) | 15 | * throughput: kbs (kilo byte / sec) |
16 | * | 16 | * |
17 | * There are lists of pm_qos_objects each one wrapping requirements, notifiers | 17 | * There are lists of pm_qos_objects each one wrapping requests, notifiers |
18 | * | 18 | * |
19 | * User mode requirements on a QOS parameter register themselves to the | 19 | * User mode requests on a QOS parameter register themselves to the |
20 | * subsystem by opening the device node /dev/... and writing there request to | 20 | * subsystem by opening the device node /dev/... and writing there request to |
21 | * the node. As long as the process holds a file handle open to the node the | 21 | * the node. As long as the process holds a file handle open to the node the |
22 | * client continues to be accounted for. Upon file release the usermode | 22 | * client continues to be accounted for. Upon file release the usermode |
23 | * requirement is removed and a new qos target is computed. This way when the | 23 | * request is removed and a new qos target is computed. This way when the |
24 | * requirement that the application has is cleaned up when closes the file | 24 | * request that the application has is cleaned up when closes the file |
25 | * pointer or exits the pm_qos_object will get an opportunity to clean up. | 25 | * pointer or exits the pm_qos_object will get an opportunity to clean up. |
26 | * | 26 | * |
27 | * Mark Gross <mgross@linux.intel.com> | 27 | * Mark Gross <mgross@linux.intel.com> |
28 | */ | 28 | */ |
29 | 29 | ||
30 | /*#define DEBUG*/ | ||
31 | |||
30 | #include <linux/pm_qos_params.h> | 32 | #include <linux/pm_qos_params.h> |
31 | #include <linux/sched.h> | 33 | #include <linux/sched.h> |
32 | #include <linux/spinlock.h> | 34 | #include <linux/spinlock.h> |
@@ -42,25 +44,25 @@ | |||
42 | #include <linux/uaccess.h> | 44 | #include <linux/uaccess.h> |
43 | 45 | ||
44 | /* | 46 | /* |
45 | * locking rule: all changes to requirements or notifiers lists | 47 | * locking rule: all changes to requests or notifiers lists |
46 | * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock | 48 | * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock |
47 | * held, taken with _irqsave. One lock to rule them all | 49 | * held, taken with _irqsave. One lock to rule them all |
48 | */ | 50 | */ |
49 | struct requirement_list { | 51 | struct pm_qos_request_list { |
50 | struct list_head list; | 52 | struct list_head list; |
51 | union { | 53 | union { |
52 | s32 value; | 54 | s32 value; |
53 | s32 usec; | 55 | s32 usec; |
54 | s32 kbps; | 56 | s32 kbps; |
55 | }; | 57 | }; |
56 | char *name; | 58 | int pm_qos_class; |
57 | }; | 59 | }; |
58 | 60 | ||
59 | static s32 max_compare(s32 v1, s32 v2); | 61 | static s32 max_compare(s32 v1, s32 v2); |
60 | static s32 min_compare(s32 v1, s32 v2); | 62 | static s32 min_compare(s32 v1, s32 v2); |
61 | 63 | ||
62 | struct pm_qos_object { | 64 | struct pm_qos_object { |
63 | struct requirement_list requirements; | 65 | struct pm_qos_request_list requests; |
64 | struct blocking_notifier_head *notifiers; | 66 | struct blocking_notifier_head *notifiers; |
65 | struct miscdevice pm_qos_power_miscdev; | 67 | struct miscdevice pm_qos_power_miscdev; |
66 | char *name; | 68 | char *name; |
@@ -72,7 +74,7 @@ struct pm_qos_object { | |||
72 | static struct pm_qos_object null_pm_qos; | 74 | static struct pm_qos_object null_pm_qos; |
73 | static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); | 75 | static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); |
74 | static struct pm_qos_object cpu_dma_pm_qos = { | 76 | static struct pm_qos_object cpu_dma_pm_qos = { |
75 | .requirements = {LIST_HEAD_INIT(cpu_dma_pm_qos.requirements.list)}, | 77 | .requests = {LIST_HEAD_INIT(cpu_dma_pm_qos.requests.list)}, |
76 | .notifiers = &cpu_dma_lat_notifier, | 78 | .notifiers = &cpu_dma_lat_notifier, |
77 | .name = "cpu_dma_latency", | 79 | .name = "cpu_dma_latency", |
78 | .default_value = 2000 * USEC_PER_SEC, | 80 | .default_value = 2000 * USEC_PER_SEC, |
@@ -82,7 +84,7 @@ static struct pm_qos_object cpu_dma_pm_qos = { | |||
82 | 84 | ||
83 | static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); | 85 | static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); |
84 | static struct pm_qos_object network_lat_pm_qos = { | 86 | static struct pm_qos_object network_lat_pm_qos = { |
85 | .requirements = {LIST_HEAD_INIT(network_lat_pm_qos.requirements.list)}, | 87 | .requests = {LIST_HEAD_INIT(network_lat_pm_qos.requests.list)}, |
86 | .notifiers = &network_lat_notifier, | 88 | .notifiers = &network_lat_notifier, |
87 | .name = "network_latency", | 89 | .name = "network_latency", |
88 | .default_value = 2000 * USEC_PER_SEC, | 90 | .default_value = 2000 * USEC_PER_SEC, |
@@ -93,8 +95,7 @@ static struct pm_qos_object network_lat_pm_qos = { | |||
93 | 95 | ||
94 | static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); | 96 | static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); |
95 | static struct pm_qos_object network_throughput_pm_qos = { | 97 | static struct pm_qos_object network_throughput_pm_qos = { |
96 | .requirements = | 98 | .requests = {LIST_HEAD_INIT(network_throughput_pm_qos.requests.list)}, |
97 | {LIST_HEAD_INIT(network_throughput_pm_qos.requirements.list)}, | ||
98 | .notifiers = &network_throughput_notifier, | 99 | .notifiers = &network_throughput_notifier, |
99 | .name = "network_throughput", | 100 | .name = "network_throughput", |
100 | .default_value = 0, | 101 | .default_value = 0, |
@@ -135,31 +136,34 @@ static s32 min_compare(s32 v1, s32 v2) | |||
135 | } | 136 | } |
136 | 137 | ||
137 | 138 | ||
138 | static void update_target(int target) | 139 | static void update_target(int pm_qos_class) |
139 | { | 140 | { |
140 | s32 extreme_value; | 141 | s32 extreme_value; |
141 | struct requirement_list *node; | 142 | struct pm_qos_request_list *node; |
142 | unsigned long flags; | 143 | unsigned long flags; |
143 | int call_notifier = 0; | 144 | int call_notifier = 0; |
144 | 145 | ||
145 | spin_lock_irqsave(&pm_qos_lock, flags); | 146 | spin_lock_irqsave(&pm_qos_lock, flags); |
146 | extreme_value = pm_qos_array[target]->default_value; | 147 | extreme_value = pm_qos_array[pm_qos_class]->default_value; |
147 | list_for_each_entry(node, | 148 | list_for_each_entry(node, |
148 | &pm_qos_array[target]->requirements.list, list) { | 149 | &pm_qos_array[pm_qos_class]->requests.list, list) { |
149 | extreme_value = pm_qos_array[target]->comparitor( | 150 | extreme_value = pm_qos_array[pm_qos_class]->comparitor( |
150 | extreme_value, node->value); | 151 | extreme_value, node->value); |
151 | } | 152 | } |
152 | if (atomic_read(&pm_qos_array[target]->target_value) != extreme_value) { | 153 | if (atomic_read(&pm_qos_array[pm_qos_class]->target_value) != |
154 | extreme_value) { | ||
153 | call_notifier = 1; | 155 | call_notifier = 1; |
154 | atomic_set(&pm_qos_array[target]->target_value, extreme_value); | 156 | atomic_set(&pm_qos_array[pm_qos_class]->target_value, |
155 | pr_debug(KERN_ERR "new target for qos %d is %d\n", target, | 157 | extreme_value); |
156 | atomic_read(&pm_qos_array[target]->target_value)); | 158 | pr_debug(KERN_ERR "new target for qos %d is %d\n", pm_qos_class, |
159 | atomic_read(&pm_qos_array[pm_qos_class]->target_value)); | ||
157 | } | 160 | } |
158 | spin_unlock_irqrestore(&pm_qos_lock, flags); | 161 | spin_unlock_irqrestore(&pm_qos_lock, flags); |
159 | 162 | ||
160 | if (call_notifier) | 163 | if (call_notifier) |
161 | blocking_notifier_call_chain(pm_qos_array[target]->notifiers, | 164 | blocking_notifier_call_chain( |
162 | (unsigned long) extreme_value, NULL); | 165 | pm_qos_array[pm_qos_class]->notifiers, |
166 | (unsigned long) extreme_value, NULL); | ||
163 | } | 167 | } |
164 | 168 | ||
165 | static int register_pm_qos_misc(struct pm_qos_object *qos) | 169 | static int register_pm_qos_misc(struct pm_qos_object *qos) |
@@ -185,125 +189,112 @@ static int find_pm_qos_object_by_minor(int minor) | |||
185 | } | 189 | } |
186 | 190 | ||
187 | /** | 191 | /** |
188 | * pm_qos_requirement - returns current system wide qos expectation | 192 | * pm_qos_request - returns current system wide qos expectation |
189 | * @pm_qos_class: identification of which qos value is requested | 193 | * @pm_qos_class: identification of which qos value is requested |
190 | * | 194 | * |
191 | * This function returns the current target value in an atomic manner. | 195 | * This function returns the current target value in an atomic manner. |
192 | */ | 196 | */ |
193 | int pm_qos_requirement(int pm_qos_class) | 197 | int pm_qos_request(int pm_qos_class) |
194 | { | 198 | { |
195 | return atomic_read(&pm_qos_array[pm_qos_class]->target_value); | 199 | return atomic_read(&pm_qos_array[pm_qos_class]->target_value); |
196 | } | 200 | } |
197 | EXPORT_SYMBOL_GPL(pm_qos_requirement); | 201 | EXPORT_SYMBOL_GPL(pm_qos_request); |
198 | 202 | ||
199 | /** | 203 | /** |
200 | * pm_qos_add_requirement - inserts new qos request into the list | 204 | * pm_qos_add_request - inserts new qos request into the list |
201 | * @pm_qos_class: identifies which list of qos request to us | 205 | * @pm_qos_class: identifies which list of qos request to us |
202 | * @name: identifies the request | ||
203 | * @value: defines the qos request | 206 | * @value: defines the qos request |
204 | * | 207 | * |
205 | * This function inserts a new entry in the pm_qos_class list of requested qos | 208 | * This function inserts a new entry in the pm_qos_class list of requested qos |
206 | * performance characteristics. It recomputes the aggregate QoS expectations | 209 | * performance characteristics. It recomputes the aggregate QoS expectations |
207 | * for the pm_qos_class of parameters. | 210 | * for the pm_qos_class of parameters, and returns the pm_qos_request list |
211 | * element as a handle for use in updating and removal. Call needs to save | ||
212 | * this handle for later use. | ||
208 | */ | 213 | */ |
209 | int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value) | 214 | struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value) |
210 | { | 215 | { |
211 | struct requirement_list *dep; | 216 | struct pm_qos_request_list *dep; |
212 | unsigned long flags; | 217 | unsigned long flags; |
213 | 218 | ||
214 | dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL); | 219 | dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL); |
215 | if (dep) { | 220 | if (dep) { |
216 | if (value == PM_QOS_DEFAULT_VALUE) | 221 | if (value == PM_QOS_DEFAULT_VALUE) |
217 | dep->value = pm_qos_array[pm_qos_class]->default_value; | 222 | dep->value = pm_qos_array[pm_qos_class]->default_value; |
218 | else | 223 | else |
219 | dep->value = value; | 224 | dep->value = value; |
220 | dep->name = kstrdup(name, GFP_KERNEL); | 225 | dep->pm_qos_class = pm_qos_class; |
221 | if (!dep->name) | ||
222 | goto cleanup; | ||
223 | 226 | ||
224 | spin_lock_irqsave(&pm_qos_lock, flags); | 227 | spin_lock_irqsave(&pm_qos_lock, flags); |
225 | list_add(&dep->list, | 228 | list_add(&dep->list, |
226 | &pm_qos_array[pm_qos_class]->requirements.list); | 229 | &pm_qos_array[pm_qos_class]->requests.list); |
227 | spin_unlock_irqrestore(&pm_qos_lock, flags); | 230 | spin_unlock_irqrestore(&pm_qos_lock, flags); |
228 | update_target(pm_qos_class); | 231 | update_target(pm_qos_class); |
229 | |||
230 | return 0; | ||
231 | } | 232 | } |
232 | 233 | ||
233 | cleanup: | 234 | return dep; |
234 | kfree(dep); | ||
235 | return -ENOMEM; | ||
236 | } | 235 | } |
237 | EXPORT_SYMBOL_GPL(pm_qos_add_requirement); | 236 | EXPORT_SYMBOL_GPL(pm_qos_add_request); |
238 | 237 | ||
239 | /** | 238 | /** |
240 | * pm_qos_update_requirement - modifies an existing qos request | 239 | * pm_qos_update_request - modifies an existing qos request |
241 | * @pm_qos_class: identifies which list of qos request to us | 240 | * @pm_qos_req : handle to list element holding a pm_qos request to use |
242 | * @name: identifies the request | ||
243 | * @value: defines the qos request | 241 | * @value: defines the qos request |
244 | * | 242 | * |
245 | * Updates an existing qos requirement for the pm_qos_class of parameters along | 243 | * Updates an existing qos request for the pm_qos_class of parameters along |
246 | * with updating the target pm_qos_class value. | 244 | * with updating the target pm_qos_class value. |
247 | * | 245 | * |
248 | * If the named request isn't in the list then no change is made. | 246 | * Attempts are made to make this code callable on hot code paths. |
249 | */ | 247 | */ |
250 | int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value) | 248 | void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, |
249 | s32 new_value) | ||
251 | { | 250 | { |
252 | unsigned long flags; | 251 | unsigned long flags; |
253 | struct requirement_list *node; | ||
254 | int pending_update = 0; | 252 | int pending_update = 0; |
253 | s32 temp; | ||
255 | 254 | ||
256 | spin_lock_irqsave(&pm_qos_lock, flags); | 255 | if (pm_qos_req) { /*guard against callers passing in null */ |
257 | list_for_each_entry(node, | 256 | spin_lock_irqsave(&pm_qos_lock, flags); |
258 | &pm_qos_array[pm_qos_class]->requirements.list, list) { | 257 | if (new_value == PM_QOS_DEFAULT_VALUE) |
259 | if (strcmp(node->name, name) == 0) { | 258 | temp = pm_qos_array[pm_qos_req->pm_qos_class]->default_value; |
260 | if (new_value == PM_QOS_DEFAULT_VALUE) | 259 | else |
261 | node->value = | 260 | temp = new_value; |
262 | pm_qos_array[pm_qos_class]->default_value; | 261 | |
263 | else | 262 | if (temp != pm_qos_req->value) { |
264 | node->value = new_value; | ||
265 | pending_update = 1; | 263 | pending_update = 1; |
266 | break; | 264 | pm_qos_req->value = temp; |
267 | } | 265 | } |
266 | spin_unlock_irqrestore(&pm_qos_lock, flags); | ||
267 | if (pending_update) | ||
268 | update_target(pm_qos_req->pm_qos_class); | ||
268 | } | 269 | } |
269 | spin_unlock_irqrestore(&pm_qos_lock, flags); | ||
270 | if (pending_update) | ||
271 | update_target(pm_qos_class); | ||
272 | |||
273 | return 0; | ||
274 | } | 270 | } |
275 | EXPORT_SYMBOL_GPL(pm_qos_update_requirement); | 271 | EXPORT_SYMBOL_GPL(pm_qos_update_request); |
276 | 272 | ||
277 | /** | 273 | /** |
278 | * pm_qos_remove_requirement - modifies an existing qos request | 274 | * pm_qos_remove_request - modifies an existing qos request |
279 | * @pm_qos_class: identifies which list of qos request to us | 275 | * @pm_qos_req: handle to request list element |
280 | * @name: identifies the request | ||
281 | * | 276 | * |
282 | * Will remove named qos request from pm_qos_class list of parameters and | 277 | * Will remove pm qos request from the list of requests and |
283 | * recompute the current target value for the pm_qos_class. | 278 | * recompute the current target value for the pm_qos_class. Call this |
279 | * on slow code paths. | ||
284 | */ | 280 | */ |
285 | void pm_qos_remove_requirement(int pm_qos_class, char *name) | 281 | void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) |
286 | { | 282 | { |
287 | unsigned long flags; | 283 | unsigned long flags; |
288 | struct requirement_list *node; | 284 | int qos_class; |
289 | int pending_update = 0; | ||
290 | 285 | ||
286 | if (pm_qos_req == NULL) | ||
287 | return; | ||
288 | /* silent return to keep pcm code cleaner */ | ||
289 | |||
290 | qos_class = pm_qos_req->pm_qos_class; | ||
291 | spin_lock_irqsave(&pm_qos_lock, flags); | 291 | spin_lock_irqsave(&pm_qos_lock, flags); |
292 | list_for_each_entry(node, | 292 | list_del(&pm_qos_req->list); |
293 | &pm_qos_array[pm_qos_class]->requirements.list, list) { | 293 | kfree(pm_qos_req); |
294 | if (strcmp(node->name, name) == 0) { | ||
295 | kfree(node->name); | ||
296 | list_del(&node->list); | ||
297 | kfree(node); | ||
298 | pending_update = 1; | ||
299 | break; | ||
300 | } | ||
301 | } | ||
302 | spin_unlock_irqrestore(&pm_qos_lock, flags); | 294 | spin_unlock_irqrestore(&pm_qos_lock, flags); |
303 | if (pending_update) | 295 | update_target(qos_class); |
304 | update_target(pm_qos_class); | ||
305 | } | 296 | } |
306 | EXPORT_SYMBOL_GPL(pm_qos_remove_requirement); | 297 | EXPORT_SYMBOL_GPL(pm_qos_remove_request); |
307 | 298 | ||
308 | /** | 299 | /** |
309 | * pm_qos_add_notifier - sets notification entry for changes to target value | 300 | * pm_qos_add_notifier - sets notification entry for changes to target value |
@@ -313,7 +304,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_requirement); | |||
313 | * will register the notifier into a notification chain that gets called | 304 | * will register the notifier into a notification chain that gets called |
314 | * upon changes to the pm_qos_class target value. | 305 | * upon changes to the pm_qos_class target value. |
315 | */ | 306 | */ |
316 | int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) | 307 | int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) |
317 | { | 308 | { |
318 | int retval; | 309 | int retval; |
319 | 310 | ||
@@ -343,21 +334,16 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) | |||
343 | } | 334 | } |
344 | EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); | 335 | EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); |
345 | 336 | ||
346 | #define PID_NAME_LEN 32 | ||
347 | |||
348 | static int pm_qos_power_open(struct inode *inode, struct file *filp) | 337 | static int pm_qos_power_open(struct inode *inode, struct file *filp) |
349 | { | 338 | { |
350 | int ret; | ||
351 | long pm_qos_class; | 339 | long pm_qos_class; |
352 | char name[PID_NAME_LEN]; | ||
353 | 340 | ||
354 | pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); | 341 | pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); |
355 | if (pm_qos_class >= 0) { | 342 | if (pm_qos_class >= 0) { |
356 | filp->private_data = (void *)pm_qos_class; | 343 | filp->private_data = (void *) pm_qos_add_request(pm_qos_class, |
357 | snprintf(name, PID_NAME_LEN, "process_%d", current->pid); | 344 | PM_QOS_DEFAULT_VALUE); |
358 | ret = pm_qos_add_requirement(pm_qos_class, name, | 345 | |
359 | PM_QOS_DEFAULT_VALUE); | 346 | if (filp->private_data) |
360 | if (ret >= 0) | ||
361 | return 0; | 347 | return 0; |
362 | } | 348 | } |
363 | return -EPERM; | 349 | return -EPERM; |
@@ -365,32 +351,40 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp) | |||
365 | 351 | ||
366 | static int pm_qos_power_release(struct inode *inode, struct file *filp) | 352 | static int pm_qos_power_release(struct inode *inode, struct file *filp) |
367 | { | 353 | { |
368 | int pm_qos_class; | 354 | struct pm_qos_request_list *req; |
369 | char name[PID_NAME_LEN]; | ||
370 | 355 | ||
371 | pm_qos_class = (long)filp->private_data; | 356 | req = (struct pm_qos_request_list *)filp->private_data; |
372 | snprintf(name, PID_NAME_LEN, "process_%d", current->pid); | 357 | pm_qos_remove_request(req); |
373 | pm_qos_remove_requirement(pm_qos_class, name); | ||
374 | 358 | ||
375 | return 0; | 359 | return 0; |
376 | } | 360 | } |
377 | 361 | ||
362 | |||
378 | static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | 363 | static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, |
379 | size_t count, loff_t *f_pos) | 364 | size_t count, loff_t *f_pos) |
380 | { | 365 | { |
381 | s32 value; | 366 | s32 value; |
382 | int pm_qos_class; | 367 | int x; |
383 | char name[PID_NAME_LEN]; | 368 | char ascii_value[11]; |
384 | 369 | struct pm_qos_request_list *pm_qos_req; | |
385 | pm_qos_class = (long)filp->private_data; | 370 | |
386 | if (count != sizeof(s32)) | 371 | if (count == sizeof(s32)) { |
372 | if (copy_from_user(&value, buf, sizeof(s32))) | ||
373 | return -EFAULT; | ||
374 | } else if (count == 11) { /* len('0x12345678/0') */ | ||
375 | if (copy_from_user(ascii_value, buf, 11)) | ||
376 | return -EFAULT; | ||
377 | x = sscanf(ascii_value, "%x", &value); | ||
378 | if (x != 1) | ||
379 | return -EINVAL; | ||
380 | pr_debug(KERN_ERR "%s, %d, 0x%x\n", ascii_value, x, value); | ||
381 | } else | ||
387 | return -EINVAL; | 382 | return -EINVAL; |
388 | if (copy_from_user(&value, buf, sizeof(s32))) | ||
389 | return -EFAULT; | ||
390 | snprintf(name, PID_NAME_LEN, "process_%d", current->pid); | ||
391 | pm_qos_update_requirement(pm_qos_class, name, value); | ||
392 | 383 | ||
393 | return sizeof(s32); | 384 | pm_qos_req = (struct pm_qos_request_list *)filp->private_data; |
385 | pm_qos_update_request(pm_qos_req, value); | ||
386 | |||
387 | return count; | ||
394 | } | 388 | } |
395 | 389 | ||
396 | 390 | ||
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index bc7704b3a443..00bb252f29a2 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -11,19 +11,18 @@ | |||
11 | #include <trace/events/timer.h> | 11 | #include <trace/events/timer.h> |
12 | 12 | ||
13 | /* | 13 | /* |
14 | * Called after updating RLIMIT_CPU to set timer expiration if necessary. | 14 | * Called after updating RLIMIT_CPU to run cpu timer and update |
15 | * tsk->signal->cputime_expires expiration cache if necessary. Needs | ||
16 | * siglock protection since other code may update expiration cache as | ||
17 | * well. | ||
15 | */ | 18 | */ |
16 | void update_rlimit_cpu(unsigned long rlim_new) | 19 | void update_rlimit_cpu(unsigned long rlim_new) |
17 | { | 20 | { |
18 | cputime_t cputime = secs_to_cputime(rlim_new); | 21 | cputime_t cputime = secs_to_cputime(rlim_new); |
19 | struct signal_struct *const sig = current->signal; | ||
20 | 22 | ||
21 | if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) || | 23 | spin_lock_irq(¤t->sighand->siglock); |
22 | cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) { | 24 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); |
23 | spin_lock_irq(¤t->sighand->siglock); | 25 | spin_unlock_irq(¤t->sighand->siglock); |
24 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); | ||
25 | spin_unlock_irq(¤t->sighand->siglock); | ||
26 | } | ||
27 | } | 26 | } |
28 | 27 | ||
29 | static int check_clock(const clockid_t which_clock) | 28 | static int check_clock(const clockid_t which_clock) |
@@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp) | |||
548 | cputime_gt(expires, new_exp); | 547 | cputime_gt(expires, new_exp); |
549 | } | 548 | } |
550 | 549 | ||
551 | static inline int expires_le(cputime_t expires, cputime_t new_exp) | ||
552 | { | ||
553 | return !cputime_eq(expires, cputime_zero) && | ||
554 | cputime_le(expires, new_exp); | ||
555 | } | ||
556 | /* | 550 | /* |
557 | * Insert the timer on the appropriate list before any timers that | 551 | * Insert the timer on the appropriate list before any timers that |
558 | * expire later. This must be called with the tasklist_lock held | 552 | * expire later. This must be called with the tasklist_lock held |
559 | * for reading, and interrupts disabled. | 553 | * for reading, interrupts disabled and p->sighand->siglock taken. |
560 | */ | 554 | */ |
561 | static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | 555 | static void arm_timer(struct k_itimer *timer) |
562 | { | 556 | { |
563 | struct task_struct *p = timer->it.cpu.task; | 557 | struct task_struct *p = timer->it.cpu.task; |
564 | struct list_head *head, *listpos; | 558 | struct list_head *head, *listpos; |
559 | struct task_cputime *cputime_expires; | ||
565 | struct cpu_timer_list *const nt = &timer->it.cpu; | 560 | struct cpu_timer_list *const nt = &timer->it.cpu; |
566 | struct cpu_timer_list *next; | 561 | struct cpu_timer_list *next; |
567 | unsigned long i; | ||
568 | 562 | ||
569 | head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? | 563 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { |
570 | p->cpu_timers : p->signal->cpu_timers); | 564 | head = p->cpu_timers; |
565 | cputime_expires = &p->cputime_expires; | ||
566 | } else { | ||
567 | head = p->signal->cpu_timers; | ||
568 | cputime_expires = &p->signal->cputime_expires; | ||
569 | } | ||
571 | head += CPUCLOCK_WHICH(timer->it_clock); | 570 | head += CPUCLOCK_WHICH(timer->it_clock); |
572 | 571 | ||
573 | BUG_ON(!irqs_disabled()); | ||
574 | spin_lock(&p->sighand->siglock); | ||
575 | |||
576 | listpos = head; | 572 | listpos = head; |
577 | if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { | 573 | list_for_each_entry(next, head, entry) { |
578 | list_for_each_entry(next, head, entry) { | 574 | if (cpu_time_before(timer->it_clock, nt->expires, next->expires)) |
579 | if (next->expires.sched > nt->expires.sched) | 575 | break; |
580 | break; | 576 | listpos = &next->entry; |
581 | listpos = &next->entry; | ||
582 | } | ||
583 | } else { | ||
584 | list_for_each_entry(next, head, entry) { | ||
585 | if (cputime_gt(next->expires.cpu, nt->expires.cpu)) | ||
586 | break; | ||
587 | listpos = &next->entry; | ||
588 | } | ||
589 | } | 577 | } |
590 | list_add(&nt->entry, listpos); | 578 | list_add(&nt->entry, listpos); |
591 | 579 | ||
592 | if (listpos == head) { | 580 | if (listpos == head) { |
581 | union cpu_time_count *exp = &nt->expires; | ||
582 | |||
593 | /* | 583 | /* |
594 | * We are the new earliest-expiring timer. | 584 | * We are the new earliest-expiring POSIX 1.b timer, hence |
595 | * If we are a thread timer, there can always | 585 | * need to update expiration cache. Take into account that |
596 | * be a process timer telling us to stop earlier. | 586 | * for process timers we share expiration cache with itimers |
587 | * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. | ||
597 | */ | 588 | */ |
598 | 589 | ||
599 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { | 590 | switch (CPUCLOCK_WHICH(timer->it_clock)) { |
600 | union cpu_time_count *exp = &nt->expires; | 591 | case CPUCLOCK_PROF: |
601 | 592 | if (expires_gt(cputime_expires->prof_exp, exp->cpu)) | |
602 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | 593 | cputime_expires->prof_exp = exp->cpu; |
603 | default: | 594 | break; |
604 | BUG(); | 595 | case CPUCLOCK_VIRT: |
605 | case CPUCLOCK_PROF: | 596 | if (expires_gt(cputime_expires->virt_exp, exp->cpu)) |
606 | if (expires_gt(p->cputime_expires.prof_exp, | 597 | cputime_expires->virt_exp = exp->cpu; |
607 | exp->cpu)) | 598 | break; |
608 | p->cputime_expires.prof_exp = exp->cpu; | 599 | case CPUCLOCK_SCHED: |
609 | break; | 600 | if (cputime_expires->sched_exp == 0 || |
610 | case CPUCLOCK_VIRT: | 601 | cputime_expires->sched_exp > exp->sched) |
611 | if (expires_gt(p->cputime_expires.virt_exp, | 602 | cputime_expires->sched_exp = exp->sched; |
612 | exp->cpu)) | 603 | break; |
613 | p->cputime_expires.virt_exp = exp->cpu; | ||
614 | break; | ||
615 | case CPUCLOCK_SCHED: | ||
616 | if (p->cputime_expires.sched_exp == 0 || | ||
617 | p->cputime_expires.sched_exp > exp->sched) | ||
618 | p->cputime_expires.sched_exp = | ||
619 | exp->sched; | ||
620 | break; | ||
621 | } | ||
622 | } else { | ||
623 | struct signal_struct *const sig = p->signal; | ||
624 | union cpu_time_count *exp = &timer->it.cpu.expires; | ||
625 | |||
626 | /* | ||
627 | * For a process timer, set the cached expiration time. | ||
628 | */ | ||
629 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | ||
630 | default: | ||
631 | BUG(); | ||
632 | case CPUCLOCK_VIRT: | ||
633 | if (expires_le(sig->it[CPUCLOCK_VIRT].expires, | ||
634 | exp->cpu)) | ||
635 | break; | ||
636 | sig->cputime_expires.virt_exp = exp->cpu; | ||
637 | break; | ||
638 | case CPUCLOCK_PROF: | ||
639 | if (expires_le(sig->it[CPUCLOCK_PROF].expires, | ||
640 | exp->cpu)) | ||
641 | break; | ||
642 | i = sig->rlim[RLIMIT_CPU].rlim_cur; | ||
643 | if (i != RLIM_INFINITY && | ||
644 | i <= cputime_to_secs(exp->cpu)) | ||
645 | break; | ||
646 | sig->cputime_expires.prof_exp = exp->cpu; | ||
647 | break; | ||
648 | case CPUCLOCK_SCHED: | ||
649 | sig->cputime_expires.sched_exp = exp->sched; | ||
650 | break; | ||
651 | } | ||
652 | } | 604 | } |
653 | } | 605 | } |
654 | |||
655 | spin_unlock(&p->sighand->siglock); | ||
656 | } | 606 | } |
657 | 607 | ||
658 | /* | 608 | /* |
@@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | |||
660 | */ | 610 | */ |
661 | static void cpu_timer_fire(struct k_itimer *timer) | 611 | static void cpu_timer_fire(struct k_itimer *timer) |
662 | { | 612 | { |
663 | if (unlikely(timer->sigq == NULL)) { | 613 | if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { |
614 | /* | ||
615 | * User don't want any signal. | ||
616 | */ | ||
617 | timer->it.cpu.expires.sched = 0; | ||
618 | } else if (unlikely(timer->sigq == NULL)) { | ||
664 | /* | 619 | /* |
665 | * This a special case for clock_nanosleep, | 620 | * This a special case for clock_nanosleep, |
666 | * not a normal timer from sys_timer_create. | 621 | * not a normal timer from sys_timer_create. |
@@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
721 | struct itimerspec *new, struct itimerspec *old) | 676 | struct itimerspec *new, struct itimerspec *old) |
722 | { | 677 | { |
723 | struct task_struct *p = timer->it.cpu.task; | 678 | struct task_struct *p = timer->it.cpu.task; |
724 | union cpu_time_count old_expires, new_expires, val; | 679 | union cpu_time_count old_expires, new_expires, old_incr, val; |
725 | int ret; | 680 | int ret; |
726 | 681 | ||
727 | if (unlikely(p == NULL)) { | 682 | if (unlikely(p == NULL)) { |
@@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
752 | BUG_ON(!irqs_disabled()); | 707 | BUG_ON(!irqs_disabled()); |
753 | 708 | ||
754 | ret = 0; | 709 | ret = 0; |
710 | old_incr = timer->it.cpu.incr; | ||
755 | spin_lock(&p->sighand->siglock); | 711 | spin_lock(&p->sighand->siglock); |
756 | old_expires = timer->it.cpu.expires; | 712 | old_expires = timer->it.cpu.expires; |
757 | if (unlikely(timer->it.cpu.firing)) { | 713 | if (unlikely(timer->it.cpu.firing)) { |
@@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
759 | ret = TIMER_RETRY; | 715 | ret = TIMER_RETRY; |
760 | } else | 716 | } else |
761 | list_del_init(&timer->it.cpu.entry); | 717 | list_del_init(&timer->it.cpu.entry); |
762 | spin_unlock(&p->sighand->siglock); | ||
763 | 718 | ||
764 | /* | 719 | /* |
765 | * We need to sample the current value to convert the new | 720 | * We need to sample the current value to convert the new |
@@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
813 | * disable this firing since we are already reporting | 768 | * disable this firing since we are already reporting |
814 | * it as an overrun (thanks to bump_cpu_timer above). | 769 | * it as an overrun (thanks to bump_cpu_timer above). |
815 | */ | 770 | */ |
771 | spin_unlock(&p->sighand->siglock); | ||
816 | read_unlock(&tasklist_lock); | 772 | read_unlock(&tasklist_lock); |
817 | goto out; | 773 | goto out; |
818 | } | 774 | } |
@@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
828 | */ | 784 | */ |
829 | timer->it.cpu.expires = new_expires; | 785 | timer->it.cpu.expires = new_expires; |
830 | if (new_expires.sched != 0 && | 786 | if (new_expires.sched != 0 && |
831 | (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE && | ||
832 | cpu_time_before(timer->it_clock, val, new_expires)) { | 787 | cpu_time_before(timer->it_clock, val, new_expires)) { |
833 | arm_timer(timer, val); | 788 | arm_timer(timer); |
834 | } | 789 | } |
835 | 790 | ||
791 | spin_unlock(&p->sighand->siglock); | ||
836 | read_unlock(&tasklist_lock); | 792 | read_unlock(&tasklist_lock); |
837 | 793 | ||
838 | /* | 794 | /* |
@@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
853 | timer->it_overrun = -1; | 809 | timer->it_overrun = -1; |
854 | 810 | ||
855 | if (new_expires.sched != 0 && | 811 | if (new_expires.sched != 0 && |
856 | (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE && | ||
857 | !cpu_time_before(timer->it_clock, val, new_expires)) { | 812 | !cpu_time_before(timer->it_clock, val, new_expires)) { |
858 | /* | 813 | /* |
859 | * The designated time already passed, so we notify | 814 | * The designated time already passed, so we notify |
@@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
867 | out: | 822 | out: |
868 | if (old) { | 823 | if (old) { |
869 | sample_to_timespec(timer->it_clock, | 824 | sample_to_timespec(timer->it_clock, |
870 | timer->it.cpu.incr, &old->it_interval); | 825 | old_incr, &old->it_interval); |
871 | } | 826 | } |
872 | return ret; | 827 | return ret; |
873 | } | 828 | } |
@@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
927 | read_unlock(&tasklist_lock); | 882 | read_unlock(&tasklist_lock); |
928 | } | 883 | } |
929 | 884 | ||
930 | if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { | ||
931 | if (timer->it.cpu.incr.sched == 0 && | ||
932 | cpu_time_before(timer->it_clock, | ||
933 | timer->it.cpu.expires, now)) { | ||
934 | /* | ||
935 | * Do-nothing timer expired and has no reload, | ||
936 | * so it's as if it was never set. | ||
937 | */ | ||
938 | timer->it.cpu.expires.sched = 0; | ||
939 | itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; | ||
940 | return; | ||
941 | } | ||
942 | /* | ||
943 | * Account for any expirations and reloads that should | ||
944 | * have happened. | ||
945 | */ | ||
946 | bump_cpu_timer(timer, now); | ||
947 | } | ||
948 | |||
949 | if (unlikely(clear_dead)) { | 885 | if (unlikely(clear_dead)) { |
950 | /* | 886 | /* |
951 | * We've noticed that the thread is dead, but | 887 | * We've noticed that the thread is dead, but |
@@ -1066,16 +1002,9 @@ static void stop_process_timers(struct signal_struct *sig) | |||
1066 | struct thread_group_cputimer *cputimer = &sig->cputimer; | 1002 | struct thread_group_cputimer *cputimer = &sig->cputimer; |
1067 | unsigned long flags; | 1003 | unsigned long flags; |
1068 | 1004 | ||
1069 | if (!cputimer->running) | ||
1070 | return; | ||
1071 | |||
1072 | spin_lock_irqsave(&cputimer->lock, flags); | 1005 | spin_lock_irqsave(&cputimer->lock, flags); |
1073 | cputimer->running = 0; | 1006 | cputimer->running = 0; |
1074 | spin_unlock_irqrestore(&cputimer->lock, flags); | 1007 | spin_unlock_irqrestore(&cputimer->lock, flags); |
1075 | |||
1076 | sig->cputime_expires.prof_exp = cputime_zero; | ||
1077 | sig->cputime_expires.virt_exp = cputime_zero; | ||
1078 | sig->cputime_expires.sched_exp = 0; | ||
1079 | } | 1008 | } |
1080 | 1009 | ||
1081 | static u32 onecputick; | 1010 | static u32 onecputick; |
@@ -1112,6 +1041,23 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, | |||
1112 | } | 1041 | } |
1113 | } | 1042 | } |
1114 | 1043 | ||
1044 | /** | ||
1045 | * task_cputime_zero - Check a task_cputime struct for all zero fields. | ||
1046 | * | ||
1047 | * @cputime: The struct to compare. | ||
1048 | * | ||
1049 | * Checks @cputime to see if all fields are zero. Returns true if all fields | ||
1050 | * are zero, false if any field is nonzero. | ||
1051 | */ | ||
1052 | static inline int task_cputime_zero(const struct task_cputime *cputime) | ||
1053 | { | ||
1054 | if (cputime_eq(cputime->utime, cputime_zero) && | ||
1055 | cputime_eq(cputime->stime, cputime_zero) && | ||
1056 | cputime->sum_exec_runtime == 0) | ||
1057 | return 1; | ||
1058 | return 0; | ||
1059 | } | ||
1060 | |||
1115 | /* | 1061 | /* |
1116 | * Check for any per-thread CPU timers that have fired and move them | 1062 | * Check for any per-thread CPU timers that have fired and move them |
1117 | * off the tsk->*_timers list onto the firing list. Per-thread timers | 1063 | * off the tsk->*_timers list onto the firing list. Per-thread timers |
@@ -1129,19 +1075,6 @@ static void check_process_timers(struct task_struct *tsk, | |||
1129 | unsigned long soft; | 1075 | unsigned long soft; |
1130 | 1076 | ||
1131 | /* | 1077 | /* |
1132 | * Don't sample the current process CPU clocks if there are no timers. | ||
1133 | */ | ||
1134 | if (list_empty(&timers[CPUCLOCK_PROF]) && | ||
1135 | cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) && | ||
1136 | sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && | ||
1137 | list_empty(&timers[CPUCLOCK_VIRT]) && | ||
1138 | cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) && | ||
1139 | list_empty(&timers[CPUCLOCK_SCHED])) { | ||
1140 | stop_process_timers(sig); | ||
1141 | return; | ||
1142 | } | ||
1143 | |||
1144 | /* | ||
1145 | * Collect the current process totals. | 1078 | * Collect the current process totals. |
1146 | */ | 1079 | */ |
1147 | thread_group_cputimer(tsk, &cputime); | 1080 | thread_group_cputimer(tsk, &cputime); |
@@ -1230,18 +1163,11 @@ static void check_process_timers(struct task_struct *tsk, | |||
1230 | } | 1163 | } |
1231 | } | 1164 | } |
1232 | 1165 | ||
1233 | if (!cputime_eq(prof_expires, cputime_zero) && | 1166 | sig->cputime_expires.prof_exp = prof_expires; |
1234 | (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) || | 1167 | sig->cputime_expires.virt_exp = virt_expires; |
1235 | cputime_gt(sig->cputime_expires.prof_exp, prof_expires))) | 1168 | sig->cputime_expires.sched_exp = sched_expires; |
1236 | sig->cputime_expires.prof_exp = prof_expires; | 1169 | if (task_cputime_zero(&sig->cputime_expires)) |
1237 | if (!cputime_eq(virt_expires, cputime_zero) && | 1170 | stop_process_timers(sig); |
1238 | (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) || | ||
1239 | cputime_gt(sig->cputime_expires.virt_exp, virt_expires))) | ||
1240 | sig->cputime_expires.virt_exp = virt_expires; | ||
1241 | if (sched_expires != 0 && | ||
1242 | (sig->cputime_expires.sched_exp == 0 || | ||
1243 | sig->cputime_expires.sched_exp > sched_expires)) | ||
1244 | sig->cputime_expires.sched_exp = sched_expires; | ||
1245 | } | 1171 | } |
1246 | 1172 | ||
1247 | /* | 1173 | /* |
@@ -1270,6 +1196,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
1270 | goto out; | 1196 | goto out; |
1271 | } | 1197 | } |
1272 | read_lock(&tasklist_lock); /* arm_timer needs it. */ | 1198 | read_lock(&tasklist_lock); /* arm_timer needs it. */ |
1199 | spin_lock(&p->sighand->siglock); | ||
1273 | } else { | 1200 | } else { |
1274 | read_lock(&tasklist_lock); | 1201 | read_lock(&tasklist_lock); |
1275 | if (unlikely(p->signal == NULL)) { | 1202 | if (unlikely(p->signal == NULL)) { |
@@ -1290,6 +1217,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
1290 | clear_dead_task(timer, now); | 1217 | clear_dead_task(timer, now); |
1291 | goto out_unlock; | 1218 | goto out_unlock; |
1292 | } | 1219 | } |
1220 | spin_lock(&p->sighand->siglock); | ||
1293 | cpu_timer_sample_group(timer->it_clock, p, &now); | 1221 | cpu_timer_sample_group(timer->it_clock, p, &now); |
1294 | bump_cpu_timer(timer, now); | 1222 | bump_cpu_timer(timer, now); |
1295 | /* Leave the tasklist_lock locked for the call below. */ | 1223 | /* Leave the tasklist_lock locked for the call below. */ |
@@ -1298,7 +1226,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
1298 | /* | 1226 | /* |
1299 | * Now re-arm for the new expiry time. | 1227 | * Now re-arm for the new expiry time. |
1300 | */ | 1228 | */ |
1301 | arm_timer(timer, now); | 1229 | BUG_ON(!irqs_disabled()); |
1230 | arm_timer(timer); | ||
1231 | spin_unlock(&p->sighand->siglock); | ||
1302 | 1232 | ||
1303 | out_unlock: | 1233 | out_unlock: |
1304 | read_unlock(&tasklist_lock); | 1234 | read_unlock(&tasklist_lock); |
@@ -1310,23 +1240,6 @@ out: | |||
1310 | } | 1240 | } |
1311 | 1241 | ||
1312 | /** | 1242 | /** |
1313 | * task_cputime_zero - Check a task_cputime struct for all zero fields. | ||
1314 | * | ||
1315 | * @cputime: The struct to compare. | ||
1316 | * | ||
1317 | * Checks @cputime to see if all fields are zero. Returns true if all fields | ||
1318 | * are zero, false if any field is nonzero. | ||
1319 | */ | ||
1320 | static inline int task_cputime_zero(const struct task_cputime *cputime) | ||
1321 | { | ||
1322 | if (cputime_eq(cputime->utime, cputime_zero) && | ||
1323 | cputime_eq(cputime->stime, cputime_zero) && | ||
1324 | cputime->sum_exec_runtime == 0) | ||
1325 | return 1; | ||
1326 | return 0; | ||
1327 | } | ||
1328 | |||
1329 | /** | ||
1330 | * task_cputime_expired - Compare two task_cputime entities. | 1243 | * task_cputime_expired - Compare two task_cputime entities. |
1331 | * | 1244 | * |
1332 | * @sample: The task_cputime structure to be checked for expiration. | 1245 | * @sample: The task_cputime structure to be checked for expiration. |
@@ -1382,7 +1295,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
1382 | } | 1295 | } |
1383 | 1296 | ||
1384 | sig = tsk->signal; | 1297 | sig = tsk->signal; |
1385 | if (!task_cputime_zero(&sig->cputime_expires)) { | 1298 | if (sig->cputimer.running) { |
1386 | struct task_cputime group_sample; | 1299 | struct task_cputime group_sample; |
1387 | 1300 | ||
1388 | thread_group_cputimer(tsk, &group_sample); | 1301 | thread_group_cputimer(tsk, &group_sample); |
@@ -1390,7 +1303,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
1390 | return 1; | 1303 | return 1; |
1391 | } | 1304 | } |
1392 | 1305 | ||
1393 | return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; | 1306 | return 0; |
1394 | } | 1307 | } |
1395 | 1308 | ||
1396 | /* | 1309 | /* |
@@ -1419,7 +1332,12 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1419 | * put them on the firing list. | 1332 | * put them on the firing list. |
1420 | */ | 1333 | */ |
1421 | check_thread_timers(tsk, &firing); | 1334 | check_thread_timers(tsk, &firing); |
1422 | check_process_timers(tsk, &firing); | 1335 | /* |
1336 | * If there are any active process wide timers (POSIX 1.b, itimers, | ||
1337 | * RLIMIT_CPU) cputimer must be running. | ||
1338 | */ | ||
1339 | if (tsk->signal->cputimer.running) | ||
1340 | check_process_timers(tsk, &firing); | ||
1423 | 1341 | ||
1424 | /* | 1342 | /* |
1425 | * We must release these locks before taking any timer's lock. | 1343 | * We must release these locks before taking any timer's lock. |
@@ -1456,21 +1374,23 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1456 | } | 1374 | } |
1457 | 1375 | ||
1458 | /* | 1376 | /* |
1459 | * Set one of the process-wide special case CPU timers. | 1377 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. |
1460 | * The tsk->sighand->siglock must be held by the caller. | 1378 | * The tsk->sighand->siglock must be held by the caller. |
1461 | * The *newval argument is relative and we update it to be absolute, *oldval | ||
1462 | * is absolute and we update it to be relative. | ||
1463 | */ | 1379 | */ |
1464 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | 1380 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, |
1465 | cputime_t *newval, cputime_t *oldval) | 1381 | cputime_t *newval, cputime_t *oldval) |
1466 | { | 1382 | { |
1467 | union cpu_time_count now; | 1383 | union cpu_time_count now; |
1468 | struct list_head *head; | ||
1469 | 1384 | ||
1470 | BUG_ON(clock_idx == CPUCLOCK_SCHED); | 1385 | BUG_ON(clock_idx == CPUCLOCK_SCHED); |
1471 | cpu_timer_sample_group(clock_idx, tsk, &now); | 1386 | cpu_timer_sample_group(clock_idx, tsk, &now); |
1472 | 1387 | ||
1473 | if (oldval) { | 1388 | if (oldval) { |
1389 | /* | ||
1390 | * We are setting itimer. The *oldval is absolute and we update | ||
1391 | * it to be relative, *newval argument is relative and we update | ||
1392 | * it to be absolute. | ||
1393 | */ | ||
1474 | if (!cputime_eq(*oldval, cputime_zero)) { | 1394 | if (!cputime_eq(*oldval, cputime_zero)) { |
1475 | if (cputime_le(*oldval, now.cpu)) { | 1395 | if (cputime_le(*oldval, now.cpu)) { |
1476 | /* Just about to fire. */ | 1396 | /* Just about to fire. */ |
@@ -1483,33 +1403,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1483 | if (cputime_eq(*newval, cputime_zero)) | 1403 | if (cputime_eq(*newval, cputime_zero)) |
1484 | return; | 1404 | return; |
1485 | *newval = cputime_add(*newval, now.cpu); | 1405 | *newval = cputime_add(*newval, now.cpu); |
1486 | |||
1487 | /* | ||
1488 | * If the RLIMIT_CPU timer will expire before the | ||
1489 | * ITIMER_PROF timer, we have nothing else to do. | ||
1490 | */ | ||
1491 | if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur | ||
1492 | < cputime_to_secs(*newval)) | ||
1493 | return; | ||
1494 | } | 1406 | } |
1495 | 1407 | ||
1496 | /* | 1408 | /* |
1497 | * Check whether there are any process timers already set to fire | 1409 | * Update expiration cache if we are the earliest timer, or eventually |
1498 | * before this one. If so, we don't have anything more to do. | 1410 | * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire. |
1499 | */ | 1411 | */ |
1500 | head = &tsk->signal->cpu_timers[clock_idx]; | 1412 | switch (clock_idx) { |
1501 | if (list_empty(head) || | 1413 | case CPUCLOCK_PROF: |
1502 | cputime_ge(list_first_entry(head, | 1414 | if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval)) |
1503 | struct cpu_timer_list, entry)->expires.cpu, | ||
1504 | *newval)) { | ||
1505 | switch (clock_idx) { | ||
1506 | case CPUCLOCK_PROF: | ||
1507 | tsk->signal->cputime_expires.prof_exp = *newval; | 1415 | tsk->signal->cputime_expires.prof_exp = *newval; |
1508 | break; | 1416 | break; |
1509 | case CPUCLOCK_VIRT: | 1417 | case CPUCLOCK_VIRT: |
1418 | if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval)) | ||
1510 | tsk->signal->cputime_expires.virt_exp = *newval; | 1419 | tsk->signal->cputime_expires.virt_exp = *newval; |
1511 | break; | 1420 | break; |
1512 | } | ||
1513 | } | 1421 | } |
1514 | } | 1422 | } |
1515 | 1423 | ||
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 43191815f874..524e058dcf06 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile | |||
@@ -8,7 +8,8 @@ obj-$(CONFIG_PM_SLEEP) += console.o | |||
8 | obj-$(CONFIG_FREEZER) += process.o | 8 | obj-$(CONFIG_FREEZER) += process.o |
9 | obj-$(CONFIG_SUSPEND) += suspend.o | 9 | obj-$(CONFIG_SUSPEND) += suspend.o |
10 | obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o | 10 | obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o |
11 | obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o | 11 | obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ |
12 | block_io.o | ||
12 | obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o | 13 | obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o |
13 | 14 | ||
14 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o | 15 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o |
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c new file mode 100644 index 000000000000..97024fd40cd5 --- /dev/null +++ b/kernel/power/block_io.c | |||
@@ -0,0 +1,103 @@ | |||
1 | /* | ||
2 | * This file provides functions for block I/O operations on swap/file. | ||
3 | * | ||
4 | * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> | ||
5 | * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> | ||
6 | * | ||
7 | * This file is released under the GPLv2. | ||
8 | */ | ||
9 | |||
10 | #include <linux/bio.h> | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/pagemap.h> | ||
13 | #include <linux/swap.h> | ||
14 | |||
15 | #include "power.h" | ||
16 | |||
17 | /** | ||
18 | * submit - submit BIO request. | ||
19 | * @rw: READ or WRITE. | ||
20 | * @off physical offset of page. | ||
21 | * @page: page we're reading or writing. | ||
22 | * @bio_chain: list of pending biod (for async reading) | ||
23 | * | ||
24 | * Straight from the textbook - allocate and initialize the bio. | ||
25 | * If we're reading, make sure the page is marked as dirty. | ||
26 | * Then submit it and, if @bio_chain == NULL, wait. | ||
27 | */ | ||
28 | static int submit(int rw, struct block_device *bdev, sector_t sector, | ||
29 | struct page *page, struct bio **bio_chain) | ||
30 | { | ||
31 | const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); | ||
32 | struct bio *bio; | ||
33 | |||
34 | bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); | ||
35 | bio->bi_sector = sector; | ||
36 | bio->bi_bdev = bdev; | ||
37 | bio->bi_end_io = end_swap_bio_read; | ||
38 | |||
39 | if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { | ||
40 | printk(KERN_ERR "PM: Adding page to bio failed at %llu\n", | ||
41 | (unsigned long long)sector); | ||
42 | bio_put(bio); | ||
43 | return -EFAULT; | ||
44 | } | ||
45 | |||
46 | lock_page(page); | ||
47 | bio_get(bio); | ||
48 | |||
49 | if (bio_chain == NULL) { | ||
50 | submit_bio(bio_rw, bio); | ||
51 | wait_on_page_locked(page); | ||
52 | if (rw == READ) | ||
53 | bio_set_pages_dirty(bio); | ||
54 | bio_put(bio); | ||
55 | } else { | ||
56 | if (rw == READ) | ||
57 | get_page(page); /* These pages are freed later */ | ||
58 | bio->bi_private = *bio_chain; | ||
59 | *bio_chain = bio; | ||
60 | submit_bio(bio_rw, bio); | ||
61 | } | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) | ||
66 | { | ||
67 | return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), | ||
68 | virt_to_page(addr), bio_chain); | ||
69 | } | ||
70 | |||
71 | int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain) | ||
72 | { | ||
73 | return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), | ||
74 | virt_to_page(addr), bio_chain); | ||
75 | } | ||
76 | |||
77 | int hib_wait_on_bio_chain(struct bio **bio_chain) | ||
78 | { | ||
79 | struct bio *bio; | ||
80 | struct bio *next_bio; | ||
81 | int ret = 0; | ||
82 | |||
83 | if (bio_chain == NULL) | ||
84 | return 0; | ||
85 | |||
86 | bio = *bio_chain; | ||
87 | if (bio == NULL) | ||
88 | return 0; | ||
89 | while (bio) { | ||
90 | struct page *page; | ||
91 | |||
92 | next_bio = bio->bi_private; | ||
93 | page = bio->bi_io_vec[0].bv_page; | ||
94 | wait_on_page_locked(page); | ||
95 | if (!PageUptodate(page) || PageError(page)) | ||
96 | ret = -EIO; | ||
97 | put_page(page); | ||
98 | bio_put(bio); | ||
99 | bio = next_bio; | ||
100 | } | ||
101 | *bio_chain = NULL; | ||
102 | return ret; | ||
103 | } | ||
diff --git a/kernel/power/power.h b/kernel/power/power.h index 46c5a26630a3..006270fe382d 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -97,24 +97,12 @@ extern int hibernate_preallocate_memory(void); | |||
97 | */ | 97 | */ |
98 | 98 | ||
99 | struct snapshot_handle { | 99 | struct snapshot_handle { |
100 | loff_t offset; /* number of the last byte ready for reading | ||
101 | * or writing in the sequence | ||
102 | */ | ||
103 | unsigned int cur; /* number of the block of PAGE_SIZE bytes the | 100 | unsigned int cur; /* number of the block of PAGE_SIZE bytes the |
104 | * next operation will refer to (ie. current) | 101 | * next operation will refer to (ie. current) |
105 | */ | 102 | */ |
106 | unsigned int cur_offset; /* offset with respect to the current | ||
107 | * block (for the next operation) | ||
108 | */ | ||
109 | unsigned int prev; /* number of the block of PAGE_SIZE bytes that | ||
110 | * was the current one previously | ||
111 | */ | ||
112 | void *buffer; /* address of the block to read from | 103 | void *buffer; /* address of the block to read from |
113 | * or write to | 104 | * or write to |
114 | */ | 105 | */ |
115 | unsigned int buf_offset; /* location to read from or write to, | ||
116 | * given as a displacement from 'buffer' | ||
117 | */ | ||
118 | int sync_read; /* Set to one to notify the caller of | 106 | int sync_read; /* Set to one to notify the caller of |
119 | * snapshot_write_next() that it may | 107 | * snapshot_write_next() that it may |
120 | * need to call wait_on_bio_chain() | 108 | * need to call wait_on_bio_chain() |
@@ -125,12 +113,12 @@ struct snapshot_handle { | |||
125 | * snapshot_read_next()/snapshot_write_next() is allowed to | 113 | * snapshot_read_next()/snapshot_write_next() is allowed to |
126 | * read/write data after the function returns | 114 | * read/write data after the function returns |
127 | */ | 115 | */ |
128 | #define data_of(handle) ((handle).buffer + (handle).buf_offset) | 116 | #define data_of(handle) ((handle).buffer) |
129 | 117 | ||
130 | extern unsigned int snapshot_additional_pages(struct zone *zone); | 118 | extern unsigned int snapshot_additional_pages(struct zone *zone); |
131 | extern unsigned long snapshot_get_image_size(void); | 119 | extern unsigned long snapshot_get_image_size(void); |
132 | extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); | 120 | extern int snapshot_read_next(struct snapshot_handle *handle); |
133 | extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); | 121 | extern int snapshot_write_next(struct snapshot_handle *handle); |
134 | extern void snapshot_write_finalize(struct snapshot_handle *handle); | 122 | extern void snapshot_write_finalize(struct snapshot_handle *handle); |
135 | extern int snapshot_image_loaded(struct snapshot_handle *handle); | 123 | extern int snapshot_image_loaded(struct snapshot_handle *handle); |
136 | 124 | ||
@@ -154,6 +142,15 @@ extern int swsusp_read(unsigned int *flags_p); | |||
154 | extern int swsusp_write(unsigned int flags); | 142 | extern int swsusp_write(unsigned int flags); |
155 | extern void swsusp_close(fmode_t); | 143 | extern void swsusp_close(fmode_t); |
156 | 144 | ||
145 | /* kernel/power/block_io.c */ | ||
146 | extern struct block_device *hib_resume_bdev; | ||
147 | |||
148 | extern int hib_bio_read_page(pgoff_t page_off, void *addr, | ||
149 | struct bio **bio_chain); | ||
150 | extern int hib_bio_write_page(pgoff_t page_off, void *addr, | ||
151 | struct bio **bio_chain); | ||
152 | extern int hib_wait_on_bio_chain(struct bio **bio_chain); | ||
153 | |||
157 | struct timeval; | 154 | struct timeval; |
158 | /* kernel/power/swsusp.c */ | 155 | /* kernel/power/swsusp.c */ |
159 | extern void swsusp_show_speed(struct timeval *, struct timeval *, | 156 | extern void swsusp_show_speed(struct timeval *, struct timeval *, |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index be861c26dda7..25ce010e9f8b 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -1604,14 +1604,9 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm) | |||
1604 | * snapshot_handle structure. The structure gets updated and a pointer | 1604 | * snapshot_handle structure. The structure gets updated and a pointer |
1605 | * to it should be passed to this function every next time. | 1605 | * to it should be passed to this function every next time. |
1606 | * | 1606 | * |
1607 | * The @count parameter should contain the number of bytes the caller | ||
1608 | * wants to read from the snapshot. It must not be zero. | ||
1609 | * | ||
1610 | * On success the function returns a positive number. Then, the caller | 1607 | * On success the function returns a positive number. Then, the caller |
1611 | * is allowed to read up to the returned number of bytes from the memory | 1608 | * is allowed to read up to the returned number of bytes from the memory |
1612 | * location computed by the data_of() macro. The number returned | 1609 | * location computed by the data_of() macro. |
1613 | * may be smaller than @count, but this only happens if the read would | ||
1614 | * cross a page boundary otherwise. | ||
1615 | * | 1610 | * |
1616 | * The function returns 0 to indicate the end of data stream condition, | 1611 | * The function returns 0 to indicate the end of data stream condition, |
1617 | * and a negative number is returned on error. In such cases the | 1612 | * and a negative number is returned on error. In such cases the |
@@ -1619,7 +1614,7 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm) | |||
1619 | * any more. | 1614 | * any more. |
1620 | */ | 1615 | */ |
1621 | 1616 | ||
1622 | int snapshot_read_next(struct snapshot_handle *handle, size_t count) | 1617 | int snapshot_read_next(struct snapshot_handle *handle) |
1623 | { | 1618 | { |
1624 | if (handle->cur > nr_meta_pages + nr_copy_pages) | 1619 | if (handle->cur > nr_meta_pages + nr_copy_pages) |
1625 | return 0; | 1620 | return 0; |
@@ -1630,7 +1625,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count) | |||
1630 | if (!buffer) | 1625 | if (!buffer) |
1631 | return -ENOMEM; | 1626 | return -ENOMEM; |
1632 | } | 1627 | } |
1633 | if (!handle->offset) { | 1628 | if (!handle->cur) { |
1634 | int error; | 1629 | int error; |
1635 | 1630 | ||
1636 | error = init_header((struct swsusp_info *)buffer); | 1631 | error = init_header((struct swsusp_info *)buffer); |
@@ -1639,42 +1634,30 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count) | |||
1639 | handle->buffer = buffer; | 1634 | handle->buffer = buffer; |
1640 | memory_bm_position_reset(&orig_bm); | 1635 | memory_bm_position_reset(&orig_bm); |
1641 | memory_bm_position_reset(©_bm); | 1636 | memory_bm_position_reset(©_bm); |
1642 | } | 1637 | } else if (handle->cur <= nr_meta_pages) { |
1643 | if (handle->prev < handle->cur) { | 1638 | memset(buffer, 0, PAGE_SIZE); |
1644 | if (handle->cur <= nr_meta_pages) { | 1639 | pack_pfns(buffer, &orig_bm); |
1645 | memset(buffer, 0, PAGE_SIZE); | 1640 | } else { |
1646 | pack_pfns(buffer, &orig_bm); | 1641 | struct page *page; |
1647 | } else { | ||
1648 | struct page *page; | ||
1649 | 1642 | ||
1650 | page = pfn_to_page(memory_bm_next_pfn(©_bm)); | 1643 | page = pfn_to_page(memory_bm_next_pfn(©_bm)); |
1651 | if (PageHighMem(page)) { | 1644 | if (PageHighMem(page)) { |
1652 | /* Highmem pages are copied to the buffer, | 1645 | /* Highmem pages are copied to the buffer, |
1653 | * because we can't return with a kmapped | 1646 | * because we can't return with a kmapped |
1654 | * highmem page (we may not be called again). | 1647 | * highmem page (we may not be called again). |
1655 | */ | 1648 | */ |
1656 | void *kaddr; | 1649 | void *kaddr; |
1657 | 1650 | ||
1658 | kaddr = kmap_atomic(page, KM_USER0); | 1651 | kaddr = kmap_atomic(page, KM_USER0); |
1659 | memcpy(buffer, kaddr, PAGE_SIZE); | 1652 | memcpy(buffer, kaddr, PAGE_SIZE); |
1660 | kunmap_atomic(kaddr, KM_USER0); | 1653 | kunmap_atomic(kaddr, KM_USER0); |
1661 | handle->buffer = buffer; | 1654 | handle->buffer = buffer; |
1662 | } else { | 1655 | } else { |
1663 | handle->buffer = page_address(page); | 1656 | handle->buffer = page_address(page); |
1664 | } | ||
1665 | } | 1657 | } |
1666 | handle->prev = handle->cur; | ||
1667 | } | ||
1668 | handle->buf_offset = handle->cur_offset; | ||
1669 | if (handle->cur_offset + count >= PAGE_SIZE) { | ||
1670 | count = PAGE_SIZE - handle->cur_offset; | ||
1671 | handle->cur_offset = 0; | ||
1672 | handle->cur++; | ||
1673 | } else { | ||
1674 | handle->cur_offset += count; | ||
1675 | } | 1658 | } |
1676 | handle->offset += count; | 1659 | handle->cur++; |
1677 | return count; | 1660 | return PAGE_SIZE; |
1678 | } | 1661 | } |
1679 | 1662 | ||
1680 | /** | 1663 | /** |
@@ -2133,14 +2116,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) | |||
2133 | * snapshot_handle structure. The structure gets updated and a pointer | 2116 | * snapshot_handle structure. The structure gets updated and a pointer |
2134 | * to it should be passed to this function every next time. | 2117 | * to it should be passed to this function every next time. |
2135 | * | 2118 | * |
2136 | * The @count parameter should contain the number of bytes the caller | ||
2137 | * wants to write to the image. It must not be zero. | ||
2138 | * | ||
2139 | * On success the function returns a positive number. Then, the caller | 2119 | * On success the function returns a positive number. Then, the caller |
2140 | * is allowed to write up to the returned number of bytes to the memory | 2120 | * is allowed to write up to the returned number of bytes to the memory |
2141 | * location computed by the data_of() macro. The number returned | 2121 | * location computed by the data_of() macro. |
2142 | * may be smaller than @count, but this only happens if the write would | ||
2143 | * cross a page boundary otherwise. | ||
2144 | * | 2122 | * |
2145 | * The function returns 0 to indicate the "end of file" condition, | 2123 | * The function returns 0 to indicate the "end of file" condition, |
2146 | * and a negative number is returned on error. In such cases the | 2124 | * and a negative number is returned on error. In such cases the |
@@ -2148,16 +2126,18 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) | |||
2148 | * any more. | 2126 | * any more. |
2149 | */ | 2127 | */ |
2150 | 2128 | ||
2151 | int snapshot_write_next(struct snapshot_handle *handle, size_t count) | 2129 | int snapshot_write_next(struct snapshot_handle *handle) |
2152 | { | 2130 | { |
2153 | static struct chain_allocator ca; | 2131 | static struct chain_allocator ca; |
2154 | int error = 0; | 2132 | int error = 0; |
2155 | 2133 | ||
2156 | /* Check if we have already loaded the entire image */ | 2134 | /* Check if we have already loaded the entire image */ |
2157 | if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) | 2135 | if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) |
2158 | return 0; | 2136 | return 0; |
2159 | 2137 | ||
2160 | if (handle->offset == 0) { | 2138 | handle->sync_read = 1; |
2139 | |||
2140 | if (!handle->cur) { | ||
2161 | if (!buffer) | 2141 | if (!buffer) |
2162 | /* This makes the buffer be freed by swsusp_free() */ | 2142 | /* This makes the buffer be freed by swsusp_free() */ |
2163 | buffer = get_image_page(GFP_ATOMIC, PG_ANY); | 2143 | buffer = get_image_page(GFP_ATOMIC, PG_ANY); |
@@ -2166,56 +2146,43 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |||
2166 | return -ENOMEM; | 2146 | return -ENOMEM; |
2167 | 2147 | ||
2168 | handle->buffer = buffer; | 2148 | handle->buffer = buffer; |
2169 | } | 2149 | } else if (handle->cur == 1) { |
2170 | handle->sync_read = 1; | 2150 | error = load_header(buffer); |
2171 | if (handle->prev < handle->cur) { | 2151 | if (error) |
2172 | if (handle->prev == 0) { | 2152 | return error; |
2173 | error = load_header(buffer); | ||
2174 | if (error) | ||
2175 | return error; | ||
2176 | 2153 | ||
2177 | error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); | 2154 | error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); |
2178 | if (error) | 2155 | if (error) |
2179 | return error; | 2156 | return error; |
2157 | |||
2158 | } else if (handle->cur <= nr_meta_pages + 1) { | ||
2159 | error = unpack_orig_pfns(buffer, ©_bm); | ||
2160 | if (error) | ||
2161 | return error; | ||
2180 | 2162 | ||
2181 | } else if (handle->prev <= nr_meta_pages) { | 2163 | if (handle->cur == nr_meta_pages + 1) { |
2182 | error = unpack_orig_pfns(buffer, ©_bm); | 2164 | error = prepare_image(&orig_bm, ©_bm); |
2183 | if (error) | 2165 | if (error) |
2184 | return error; | 2166 | return error; |
2185 | 2167 | ||
2186 | if (handle->prev == nr_meta_pages) { | 2168 | chain_init(&ca, GFP_ATOMIC, PG_SAFE); |
2187 | error = prepare_image(&orig_bm, ©_bm); | 2169 | memory_bm_position_reset(&orig_bm); |
2188 | if (error) | 2170 | restore_pblist = NULL; |
2189 | return error; | ||
2190 | |||
2191 | chain_init(&ca, GFP_ATOMIC, PG_SAFE); | ||
2192 | memory_bm_position_reset(&orig_bm); | ||
2193 | restore_pblist = NULL; | ||
2194 | handle->buffer = get_buffer(&orig_bm, &ca); | ||
2195 | handle->sync_read = 0; | ||
2196 | if (IS_ERR(handle->buffer)) | ||
2197 | return PTR_ERR(handle->buffer); | ||
2198 | } | ||
2199 | } else { | ||
2200 | copy_last_highmem_page(); | ||
2201 | handle->buffer = get_buffer(&orig_bm, &ca); | 2171 | handle->buffer = get_buffer(&orig_bm, &ca); |
2172 | handle->sync_read = 0; | ||
2202 | if (IS_ERR(handle->buffer)) | 2173 | if (IS_ERR(handle->buffer)) |
2203 | return PTR_ERR(handle->buffer); | 2174 | return PTR_ERR(handle->buffer); |
2204 | if (handle->buffer != buffer) | ||
2205 | handle->sync_read = 0; | ||
2206 | } | 2175 | } |
2207 | handle->prev = handle->cur; | ||
2208 | } | ||
2209 | handle->buf_offset = handle->cur_offset; | ||
2210 | if (handle->cur_offset + count >= PAGE_SIZE) { | ||
2211 | count = PAGE_SIZE - handle->cur_offset; | ||
2212 | handle->cur_offset = 0; | ||
2213 | handle->cur++; | ||
2214 | } else { | 2176 | } else { |
2215 | handle->cur_offset += count; | 2177 | copy_last_highmem_page(); |
2178 | handle->buffer = get_buffer(&orig_bm, &ca); | ||
2179 | if (IS_ERR(handle->buffer)) | ||
2180 | return PTR_ERR(handle->buffer); | ||
2181 | if (handle->buffer != buffer) | ||
2182 | handle->sync_read = 0; | ||
2216 | } | 2183 | } |
2217 | handle->offset += count; | 2184 | handle->cur++; |
2218 | return count; | 2185 | return PAGE_SIZE; |
2219 | } | 2186 | } |
2220 | 2187 | ||
2221 | /** | 2188 | /** |
@@ -2230,7 +2197,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle) | |||
2230 | { | 2197 | { |
2231 | copy_last_highmem_page(); | 2198 | copy_last_highmem_page(); |
2232 | /* Free only if we have loaded the image entirely */ | 2199 | /* Free only if we have loaded the image entirely */ |
2233 | if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) { | 2200 | if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { |
2234 | memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); | 2201 | memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); |
2235 | free_highmem_data(); | 2202 | free_highmem_data(); |
2236 | } | 2203 | } |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 66824d71983a..b0bb21778391 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -29,6 +29,40 @@ | |||
29 | 29 | ||
30 | #define SWSUSP_SIG "S1SUSPEND" | 30 | #define SWSUSP_SIG "S1SUSPEND" |
31 | 31 | ||
32 | /* | ||
33 | * The swap map is a data structure used for keeping track of each page | ||
34 | * written to a swap partition. It consists of many swap_map_page | ||
35 | * structures that contain each an array of MAP_PAGE_SIZE swap entries. | ||
36 | * These structures are stored on the swap and linked together with the | ||
37 | * help of the .next_swap member. | ||
38 | * | ||
39 | * The swap map is created during suspend. The swap map pages are | ||
40 | * allocated and populated one at a time, so we only need one memory | ||
41 | * page to set up the entire structure. | ||
42 | * | ||
43 | * During resume we also only need to use one swap_map_page structure | ||
44 | * at a time. | ||
45 | */ | ||
46 | |||
47 | #define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) | ||
48 | |||
49 | struct swap_map_page { | ||
50 | sector_t entries[MAP_PAGE_ENTRIES]; | ||
51 | sector_t next_swap; | ||
52 | }; | ||
53 | |||
54 | /** | ||
55 | * The swap_map_handle structure is used for handling swap in | ||
56 | * a file-alike way | ||
57 | */ | ||
58 | |||
59 | struct swap_map_handle { | ||
60 | struct swap_map_page *cur; | ||
61 | sector_t cur_swap; | ||
62 | sector_t first_sector; | ||
63 | unsigned int k; | ||
64 | }; | ||
65 | |||
32 | struct swsusp_header { | 66 | struct swsusp_header { |
33 | char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)]; | 67 | char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)]; |
34 | sector_t image; | 68 | sector_t image; |
@@ -145,110 +179,24 @@ int swsusp_swap_in_use(void) | |||
145 | */ | 179 | */ |
146 | 180 | ||
147 | static unsigned short root_swap = 0xffff; | 181 | static unsigned short root_swap = 0xffff; |
148 | static struct block_device *resume_bdev; | 182 | struct block_device *hib_resume_bdev; |
149 | |||
150 | /** | ||
151 | * submit - submit BIO request. | ||
152 | * @rw: READ or WRITE. | ||
153 | * @off physical offset of page. | ||
154 | * @page: page we're reading or writing. | ||
155 | * @bio_chain: list of pending biod (for async reading) | ||
156 | * | ||
157 | * Straight from the textbook - allocate and initialize the bio. | ||
158 | * If we're reading, make sure the page is marked as dirty. | ||
159 | * Then submit it and, if @bio_chain == NULL, wait. | ||
160 | */ | ||
161 | static int submit(int rw, pgoff_t page_off, struct page *page, | ||
162 | struct bio **bio_chain) | ||
163 | { | ||
164 | const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); | ||
165 | struct bio *bio; | ||
166 | |||
167 | bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); | ||
168 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); | ||
169 | bio->bi_bdev = resume_bdev; | ||
170 | bio->bi_end_io = end_swap_bio_read; | ||
171 | |||
172 | if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { | ||
173 | printk(KERN_ERR "PM: Adding page to bio failed at %ld\n", | ||
174 | page_off); | ||
175 | bio_put(bio); | ||
176 | return -EFAULT; | ||
177 | } | ||
178 | |||
179 | lock_page(page); | ||
180 | bio_get(bio); | ||
181 | |||
182 | if (bio_chain == NULL) { | ||
183 | submit_bio(bio_rw, bio); | ||
184 | wait_on_page_locked(page); | ||
185 | if (rw == READ) | ||
186 | bio_set_pages_dirty(bio); | ||
187 | bio_put(bio); | ||
188 | } else { | ||
189 | if (rw == READ) | ||
190 | get_page(page); /* These pages are freed later */ | ||
191 | bio->bi_private = *bio_chain; | ||
192 | *bio_chain = bio; | ||
193 | submit_bio(bio_rw, bio); | ||
194 | } | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) | ||
199 | { | ||
200 | return submit(READ, page_off, virt_to_page(addr), bio_chain); | ||
201 | } | ||
202 | |||
203 | static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain) | ||
204 | { | ||
205 | return submit(WRITE, page_off, virt_to_page(addr), bio_chain); | ||
206 | } | ||
207 | |||
208 | static int wait_on_bio_chain(struct bio **bio_chain) | ||
209 | { | ||
210 | struct bio *bio; | ||
211 | struct bio *next_bio; | ||
212 | int ret = 0; | ||
213 | |||
214 | if (bio_chain == NULL) | ||
215 | return 0; | ||
216 | |||
217 | bio = *bio_chain; | ||
218 | if (bio == NULL) | ||
219 | return 0; | ||
220 | while (bio) { | ||
221 | struct page *page; | ||
222 | |||
223 | next_bio = bio->bi_private; | ||
224 | page = bio->bi_io_vec[0].bv_page; | ||
225 | wait_on_page_locked(page); | ||
226 | if (!PageUptodate(page) || PageError(page)) | ||
227 | ret = -EIO; | ||
228 | put_page(page); | ||
229 | bio_put(bio); | ||
230 | bio = next_bio; | ||
231 | } | ||
232 | *bio_chain = NULL; | ||
233 | return ret; | ||
234 | } | ||
235 | 183 | ||
236 | /* | 184 | /* |
237 | * Saving part | 185 | * Saving part |
238 | */ | 186 | */ |
239 | 187 | ||
240 | static int mark_swapfiles(sector_t start, unsigned int flags) | 188 | static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) |
241 | { | 189 | { |
242 | int error; | 190 | int error; |
243 | 191 | ||
244 | bio_read_page(swsusp_resume_block, swsusp_header, NULL); | 192 | hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); |
245 | if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || | 193 | if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || |
246 | !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { | 194 | !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { |
247 | memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); | 195 | memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); |
248 | memcpy(swsusp_header->sig,SWSUSP_SIG, 10); | 196 | memcpy(swsusp_header->sig,SWSUSP_SIG, 10); |
249 | swsusp_header->image = start; | 197 | swsusp_header->image = handle->first_sector; |
250 | swsusp_header->flags = flags; | 198 | swsusp_header->flags = flags; |
251 | error = bio_write_page(swsusp_resume_block, | 199 | error = hib_bio_write_page(swsusp_resume_block, |
252 | swsusp_header, NULL); | 200 | swsusp_header, NULL); |
253 | } else { | 201 | } else { |
254 | printk(KERN_ERR "PM: Swap header not found!\n"); | 202 | printk(KERN_ERR "PM: Swap header not found!\n"); |
@@ -260,25 +208,26 @@ static int mark_swapfiles(sector_t start, unsigned int flags) | |||
260 | /** | 208 | /** |
261 | * swsusp_swap_check - check if the resume device is a swap device | 209 | * swsusp_swap_check - check if the resume device is a swap device |
262 | * and get its index (if so) | 210 | * and get its index (if so) |
211 | * | ||
212 | * This is called before saving image | ||
263 | */ | 213 | */ |
264 | 214 | static int swsusp_swap_check(void) | |
265 | static int swsusp_swap_check(void) /* This is called before saving image */ | ||
266 | { | 215 | { |
267 | int res; | 216 | int res; |
268 | 217 | ||
269 | res = swap_type_of(swsusp_resume_device, swsusp_resume_block, | 218 | res = swap_type_of(swsusp_resume_device, swsusp_resume_block, |
270 | &resume_bdev); | 219 | &hib_resume_bdev); |
271 | if (res < 0) | 220 | if (res < 0) |
272 | return res; | 221 | return res; |
273 | 222 | ||
274 | root_swap = res; | 223 | root_swap = res; |
275 | res = blkdev_get(resume_bdev, FMODE_WRITE); | 224 | res = blkdev_get(hib_resume_bdev, FMODE_WRITE); |
276 | if (res) | 225 | if (res) |
277 | return res; | 226 | return res; |
278 | 227 | ||
279 | res = set_blocksize(resume_bdev, PAGE_SIZE); | 228 | res = set_blocksize(hib_resume_bdev, PAGE_SIZE); |
280 | if (res < 0) | 229 | if (res < 0) |
281 | blkdev_put(resume_bdev, FMODE_WRITE); | 230 | blkdev_put(hib_resume_bdev, FMODE_WRITE); |
282 | 231 | ||
283 | return res; | 232 | return res; |
284 | } | 233 | } |
@@ -309,42 +258,9 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) | |||
309 | } else { | 258 | } else { |
310 | src = buf; | 259 | src = buf; |
311 | } | 260 | } |
312 | return bio_write_page(offset, src, bio_chain); | 261 | return hib_bio_write_page(offset, src, bio_chain); |
313 | } | 262 | } |
314 | 263 | ||
315 | /* | ||
316 | * The swap map is a data structure used for keeping track of each page | ||
317 | * written to a swap partition. It consists of many swap_map_page | ||
318 | * structures that contain each an array of MAP_PAGE_SIZE swap entries. | ||
319 | * These structures are stored on the swap and linked together with the | ||
320 | * help of the .next_swap member. | ||
321 | * | ||
322 | * The swap map is created during suspend. The swap map pages are | ||
323 | * allocated and populated one at a time, so we only need one memory | ||
324 | * page to set up the entire structure. | ||
325 | * | ||
326 | * During resume we also only need to use one swap_map_page structure | ||
327 | * at a time. | ||
328 | */ | ||
329 | |||
330 | #define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) | ||
331 | |||
332 | struct swap_map_page { | ||
333 | sector_t entries[MAP_PAGE_ENTRIES]; | ||
334 | sector_t next_swap; | ||
335 | }; | ||
336 | |||
337 | /** | ||
338 | * The swap_map_handle structure is used for handling swap in | ||
339 | * a file-alike way | ||
340 | */ | ||
341 | |||
342 | struct swap_map_handle { | ||
343 | struct swap_map_page *cur; | ||
344 | sector_t cur_swap; | ||
345 | unsigned int k; | ||
346 | }; | ||
347 | |||
348 | static void release_swap_writer(struct swap_map_handle *handle) | 264 | static void release_swap_writer(struct swap_map_handle *handle) |
349 | { | 265 | { |
350 | if (handle->cur) | 266 | if (handle->cur) |
@@ -354,16 +270,33 @@ static void release_swap_writer(struct swap_map_handle *handle) | |||
354 | 270 | ||
355 | static int get_swap_writer(struct swap_map_handle *handle) | 271 | static int get_swap_writer(struct swap_map_handle *handle) |
356 | { | 272 | { |
273 | int ret; | ||
274 | |||
275 | ret = swsusp_swap_check(); | ||
276 | if (ret) { | ||
277 | if (ret != -ENOSPC) | ||
278 | printk(KERN_ERR "PM: Cannot find swap device, try " | ||
279 | "swapon -a.\n"); | ||
280 | return ret; | ||
281 | } | ||
357 | handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); | 282 | handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); |
358 | if (!handle->cur) | 283 | if (!handle->cur) { |
359 | return -ENOMEM; | 284 | ret = -ENOMEM; |
285 | goto err_close; | ||
286 | } | ||
360 | handle->cur_swap = alloc_swapdev_block(root_swap); | 287 | handle->cur_swap = alloc_swapdev_block(root_swap); |
361 | if (!handle->cur_swap) { | 288 | if (!handle->cur_swap) { |
362 | release_swap_writer(handle); | 289 | ret = -ENOSPC; |
363 | return -ENOSPC; | 290 | goto err_rel; |
364 | } | 291 | } |
365 | handle->k = 0; | 292 | handle->k = 0; |
293 | handle->first_sector = handle->cur_swap; | ||
366 | return 0; | 294 | return 0; |
295 | err_rel: | ||
296 | release_swap_writer(handle); | ||
297 | err_close: | ||
298 | swsusp_close(FMODE_WRITE); | ||
299 | return ret; | ||
367 | } | 300 | } |
368 | 301 | ||
369 | static int swap_write_page(struct swap_map_handle *handle, void *buf, | 302 | static int swap_write_page(struct swap_map_handle *handle, void *buf, |
@@ -380,7 +313,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, | |||
380 | return error; | 313 | return error; |
381 | handle->cur->entries[handle->k++] = offset; | 314 | handle->cur->entries[handle->k++] = offset; |
382 | if (handle->k >= MAP_PAGE_ENTRIES) { | 315 | if (handle->k >= MAP_PAGE_ENTRIES) { |
383 | error = wait_on_bio_chain(bio_chain); | 316 | error = hib_wait_on_bio_chain(bio_chain); |
384 | if (error) | 317 | if (error) |
385 | goto out; | 318 | goto out; |
386 | offset = alloc_swapdev_block(root_swap); | 319 | offset = alloc_swapdev_block(root_swap); |
@@ -406,6 +339,24 @@ static int flush_swap_writer(struct swap_map_handle *handle) | |||
406 | return -EINVAL; | 339 | return -EINVAL; |
407 | } | 340 | } |
408 | 341 | ||
342 | static int swap_writer_finish(struct swap_map_handle *handle, | ||
343 | unsigned int flags, int error) | ||
344 | { | ||
345 | if (!error) { | ||
346 | flush_swap_writer(handle); | ||
347 | printk(KERN_INFO "PM: S"); | ||
348 | error = mark_swapfiles(handle, flags); | ||
349 | printk("|\n"); | ||
350 | } | ||
351 | |||
352 | if (error) | ||
353 | free_all_swap_pages(root_swap); | ||
354 | release_swap_writer(handle); | ||
355 | swsusp_close(FMODE_WRITE); | ||
356 | |||
357 | return error; | ||
358 | } | ||
359 | |||
409 | /** | 360 | /** |
410 | * save_image - save the suspend image data | 361 | * save_image - save the suspend image data |
411 | */ | 362 | */ |
@@ -431,7 +382,7 @@ static int save_image(struct swap_map_handle *handle, | |||
431 | bio = NULL; | 382 | bio = NULL; |
432 | do_gettimeofday(&start); | 383 | do_gettimeofday(&start); |
433 | while (1) { | 384 | while (1) { |
434 | ret = snapshot_read_next(snapshot, PAGE_SIZE); | 385 | ret = snapshot_read_next(snapshot); |
435 | if (ret <= 0) | 386 | if (ret <= 0) |
436 | break; | 387 | break; |
437 | ret = swap_write_page(handle, data_of(*snapshot), &bio); | 388 | ret = swap_write_page(handle, data_of(*snapshot), &bio); |
@@ -441,7 +392,7 @@ static int save_image(struct swap_map_handle *handle, | |||
441 | printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); | 392 | printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); |
442 | nr_pages++; | 393 | nr_pages++; |
443 | } | 394 | } |
444 | err2 = wait_on_bio_chain(&bio); | 395 | err2 = hib_wait_on_bio_chain(&bio); |
445 | do_gettimeofday(&stop); | 396 | do_gettimeofday(&stop); |
446 | if (!ret) | 397 | if (!ret) |
447 | ret = err2; | 398 | ret = err2; |
@@ -483,50 +434,34 @@ int swsusp_write(unsigned int flags) | |||
483 | struct swap_map_handle handle; | 434 | struct swap_map_handle handle; |
484 | struct snapshot_handle snapshot; | 435 | struct snapshot_handle snapshot; |
485 | struct swsusp_info *header; | 436 | struct swsusp_info *header; |
437 | unsigned long pages; | ||
486 | int error; | 438 | int error; |
487 | 439 | ||
488 | error = swsusp_swap_check(); | 440 | pages = snapshot_get_image_size(); |
441 | error = get_swap_writer(&handle); | ||
489 | if (error) { | 442 | if (error) { |
490 | printk(KERN_ERR "PM: Cannot find swap device, try " | 443 | printk(KERN_ERR "PM: Cannot get swap writer\n"); |
491 | "swapon -a.\n"); | ||
492 | return error; | 444 | return error; |
493 | } | 445 | } |
446 | if (!enough_swap(pages)) { | ||
447 | printk(KERN_ERR "PM: Not enough free swap\n"); | ||
448 | error = -ENOSPC; | ||
449 | goto out_finish; | ||
450 | } | ||
494 | memset(&snapshot, 0, sizeof(struct snapshot_handle)); | 451 | memset(&snapshot, 0, sizeof(struct snapshot_handle)); |
495 | error = snapshot_read_next(&snapshot, PAGE_SIZE); | 452 | error = snapshot_read_next(&snapshot); |
496 | if (error < PAGE_SIZE) { | 453 | if (error < PAGE_SIZE) { |
497 | if (error >= 0) | 454 | if (error >= 0) |
498 | error = -EFAULT; | 455 | error = -EFAULT; |
499 | 456 | ||
500 | goto out; | 457 | goto out_finish; |
501 | } | 458 | } |
502 | header = (struct swsusp_info *)data_of(snapshot); | 459 | header = (struct swsusp_info *)data_of(snapshot); |
503 | if (!enough_swap(header->pages)) { | 460 | error = swap_write_page(&handle, header, NULL); |
504 | printk(KERN_ERR "PM: Not enough free swap\n"); | 461 | if (!error) |
505 | error = -ENOSPC; | 462 | error = save_image(&handle, &snapshot, pages - 1); |
506 | goto out; | 463 | out_finish: |
507 | } | 464 | error = swap_writer_finish(&handle, flags, error); |
508 | error = get_swap_writer(&handle); | ||
509 | if (!error) { | ||
510 | sector_t start = handle.cur_swap; | ||
511 | |||
512 | error = swap_write_page(&handle, header, NULL); | ||
513 | if (!error) | ||
514 | error = save_image(&handle, &snapshot, | ||
515 | header->pages - 1); | ||
516 | |||
517 | if (!error) { | ||
518 | flush_swap_writer(&handle); | ||
519 | printk(KERN_INFO "PM: S"); | ||
520 | error = mark_swapfiles(start, flags); | ||
521 | printk("|\n"); | ||
522 | } | ||
523 | } | ||
524 | if (error) | ||
525 | free_all_swap_pages(root_swap); | ||
526 | |||
527 | release_swap_writer(&handle); | ||
528 | out: | ||
529 | swsusp_close(FMODE_WRITE); | ||
530 | return error; | 465 | return error; |
531 | } | 466 | } |
532 | 467 | ||
@@ -542,18 +477,21 @@ static void release_swap_reader(struct swap_map_handle *handle) | |||
542 | handle->cur = NULL; | 477 | handle->cur = NULL; |
543 | } | 478 | } |
544 | 479 | ||
545 | static int get_swap_reader(struct swap_map_handle *handle, sector_t start) | 480 | static int get_swap_reader(struct swap_map_handle *handle, |
481 | unsigned int *flags_p) | ||
546 | { | 482 | { |
547 | int error; | 483 | int error; |
548 | 484 | ||
549 | if (!start) | 485 | *flags_p = swsusp_header->flags; |
486 | |||
487 | if (!swsusp_header->image) /* how can this happen? */ | ||
550 | return -EINVAL; | 488 | return -EINVAL; |
551 | 489 | ||
552 | handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH); | 490 | handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH); |
553 | if (!handle->cur) | 491 | if (!handle->cur) |
554 | return -ENOMEM; | 492 | return -ENOMEM; |
555 | 493 | ||
556 | error = bio_read_page(start, handle->cur, NULL); | 494 | error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL); |
557 | if (error) { | 495 | if (error) { |
558 | release_swap_reader(handle); | 496 | release_swap_reader(handle); |
559 | return error; | 497 | return error; |
@@ -573,21 +511,28 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, | |||
573 | offset = handle->cur->entries[handle->k]; | 511 | offset = handle->cur->entries[handle->k]; |
574 | if (!offset) | 512 | if (!offset) |
575 | return -EFAULT; | 513 | return -EFAULT; |
576 | error = bio_read_page(offset, buf, bio_chain); | 514 | error = hib_bio_read_page(offset, buf, bio_chain); |
577 | if (error) | 515 | if (error) |
578 | return error; | 516 | return error; |
579 | if (++handle->k >= MAP_PAGE_ENTRIES) { | 517 | if (++handle->k >= MAP_PAGE_ENTRIES) { |
580 | error = wait_on_bio_chain(bio_chain); | 518 | error = hib_wait_on_bio_chain(bio_chain); |
581 | handle->k = 0; | 519 | handle->k = 0; |
582 | offset = handle->cur->next_swap; | 520 | offset = handle->cur->next_swap; |
583 | if (!offset) | 521 | if (!offset) |
584 | release_swap_reader(handle); | 522 | release_swap_reader(handle); |
585 | else if (!error) | 523 | else if (!error) |
586 | error = bio_read_page(offset, handle->cur, NULL); | 524 | error = hib_bio_read_page(offset, handle->cur, NULL); |
587 | } | 525 | } |
588 | return error; | 526 | return error; |
589 | } | 527 | } |
590 | 528 | ||
529 | static int swap_reader_finish(struct swap_map_handle *handle) | ||
530 | { | ||
531 | release_swap_reader(handle); | ||
532 | |||
533 | return 0; | ||
534 | } | ||
535 | |||
591 | /** | 536 | /** |
592 | * load_image - load the image using the swap map handle | 537 | * load_image - load the image using the swap map handle |
593 | * @handle and the snapshot handle @snapshot | 538 | * @handle and the snapshot handle @snapshot |
@@ -615,21 +560,21 @@ static int load_image(struct swap_map_handle *handle, | |||
615 | bio = NULL; | 560 | bio = NULL; |
616 | do_gettimeofday(&start); | 561 | do_gettimeofday(&start); |
617 | for ( ; ; ) { | 562 | for ( ; ; ) { |
618 | error = snapshot_write_next(snapshot, PAGE_SIZE); | 563 | error = snapshot_write_next(snapshot); |
619 | if (error <= 0) | 564 | if (error <= 0) |
620 | break; | 565 | break; |
621 | error = swap_read_page(handle, data_of(*snapshot), &bio); | 566 | error = swap_read_page(handle, data_of(*snapshot), &bio); |
622 | if (error) | 567 | if (error) |
623 | break; | 568 | break; |
624 | if (snapshot->sync_read) | 569 | if (snapshot->sync_read) |
625 | error = wait_on_bio_chain(&bio); | 570 | error = hib_wait_on_bio_chain(&bio); |
626 | if (error) | 571 | if (error) |
627 | break; | 572 | break; |
628 | if (!(nr_pages % m)) | 573 | if (!(nr_pages % m)) |
629 | printk("\b\b\b\b%3d%%", nr_pages / m); | 574 | printk("\b\b\b\b%3d%%", nr_pages / m); |
630 | nr_pages++; | 575 | nr_pages++; |
631 | } | 576 | } |
632 | err2 = wait_on_bio_chain(&bio); | 577 | err2 = hib_wait_on_bio_chain(&bio); |
633 | do_gettimeofday(&stop); | 578 | do_gettimeofday(&stop); |
634 | if (!error) | 579 | if (!error) |
635 | error = err2; | 580 | error = err2; |
@@ -657,20 +602,20 @@ int swsusp_read(unsigned int *flags_p) | |||
657 | struct snapshot_handle snapshot; | 602 | struct snapshot_handle snapshot; |
658 | struct swsusp_info *header; | 603 | struct swsusp_info *header; |
659 | 604 | ||
660 | *flags_p = swsusp_header->flags; | ||
661 | |||
662 | memset(&snapshot, 0, sizeof(struct snapshot_handle)); | 605 | memset(&snapshot, 0, sizeof(struct snapshot_handle)); |
663 | error = snapshot_write_next(&snapshot, PAGE_SIZE); | 606 | error = snapshot_write_next(&snapshot); |
664 | if (error < PAGE_SIZE) | 607 | if (error < PAGE_SIZE) |
665 | return error < 0 ? error : -EFAULT; | 608 | return error < 0 ? error : -EFAULT; |
666 | header = (struct swsusp_info *)data_of(snapshot); | 609 | header = (struct swsusp_info *)data_of(snapshot); |
667 | error = get_swap_reader(&handle, swsusp_header->image); | 610 | error = get_swap_reader(&handle, flags_p); |
611 | if (error) | ||
612 | goto end; | ||
668 | if (!error) | 613 | if (!error) |
669 | error = swap_read_page(&handle, header, NULL); | 614 | error = swap_read_page(&handle, header, NULL); |
670 | if (!error) | 615 | if (!error) |
671 | error = load_image(&handle, &snapshot, header->pages - 1); | 616 | error = load_image(&handle, &snapshot, header->pages - 1); |
672 | release_swap_reader(&handle); | 617 | swap_reader_finish(&handle); |
673 | 618 | end: | |
674 | if (!error) | 619 | if (!error) |
675 | pr_debug("PM: Image successfully loaded\n"); | 620 | pr_debug("PM: Image successfully loaded\n"); |
676 | else | 621 | else |
@@ -686,11 +631,11 @@ int swsusp_check(void) | |||
686 | { | 631 | { |
687 | int error; | 632 | int error; |
688 | 633 | ||
689 | resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); | 634 | hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); |
690 | if (!IS_ERR(resume_bdev)) { | 635 | if (!IS_ERR(hib_resume_bdev)) { |
691 | set_blocksize(resume_bdev, PAGE_SIZE); | 636 | set_blocksize(hib_resume_bdev, PAGE_SIZE); |
692 | memset(swsusp_header, 0, PAGE_SIZE); | 637 | memset(swsusp_header, 0, PAGE_SIZE); |
693 | error = bio_read_page(swsusp_resume_block, | 638 | error = hib_bio_read_page(swsusp_resume_block, |
694 | swsusp_header, NULL); | 639 | swsusp_header, NULL); |
695 | if (error) | 640 | if (error) |
696 | goto put; | 641 | goto put; |
@@ -698,7 +643,7 @@ int swsusp_check(void) | |||
698 | if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) { | 643 | if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) { |
699 | memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); | 644 | memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); |
700 | /* Reset swap signature now */ | 645 | /* Reset swap signature now */ |
701 | error = bio_write_page(swsusp_resume_block, | 646 | error = hib_bio_write_page(swsusp_resume_block, |
702 | swsusp_header, NULL); | 647 | swsusp_header, NULL); |
703 | } else { | 648 | } else { |
704 | error = -EINVAL; | 649 | error = -EINVAL; |
@@ -706,11 +651,11 @@ int swsusp_check(void) | |||
706 | 651 | ||
707 | put: | 652 | put: |
708 | if (error) | 653 | if (error) |
709 | blkdev_put(resume_bdev, FMODE_READ); | 654 | blkdev_put(hib_resume_bdev, FMODE_READ); |
710 | else | 655 | else |
711 | pr_debug("PM: Signature found, resuming\n"); | 656 | pr_debug("PM: Signature found, resuming\n"); |
712 | } else { | 657 | } else { |
713 | error = PTR_ERR(resume_bdev); | 658 | error = PTR_ERR(hib_resume_bdev); |
714 | } | 659 | } |
715 | 660 | ||
716 | if (error) | 661 | if (error) |
@@ -725,12 +670,12 @@ put: | |||
725 | 670 | ||
726 | void swsusp_close(fmode_t mode) | 671 | void swsusp_close(fmode_t mode) |
727 | { | 672 | { |
728 | if (IS_ERR(resume_bdev)) { | 673 | if (IS_ERR(hib_resume_bdev)) { |
729 | pr_debug("PM: Image device not initialised\n"); | 674 | pr_debug("PM: Image device not initialised\n"); |
730 | return; | 675 | return; |
731 | } | 676 | } |
732 | 677 | ||
733 | blkdev_put(resume_bdev, mode); | 678 | blkdev_put(hib_resume_bdev, mode); |
734 | } | 679 | } |
735 | 680 | ||
736 | static int swsusp_header_init(void) | 681 | static int swsusp_header_init(void) |
diff --git a/kernel/power/user.c b/kernel/power/user.c index a8c96212bc1b..e819e17877ca 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
@@ -151,6 +151,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, | |||
151 | { | 151 | { |
152 | struct snapshot_data *data; | 152 | struct snapshot_data *data; |
153 | ssize_t res; | 153 | ssize_t res; |
154 | loff_t pg_offp = *offp & ~PAGE_MASK; | ||
154 | 155 | ||
155 | mutex_lock(&pm_mutex); | 156 | mutex_lock(&pm_mutex); |
156 | 157 | ||
@@ -159,14 +160,19 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, | |||
159 | res = -ENODATA; | 160 | res = -ENODATA; |
160 | goto Unlock; | 161 | goto Unlock; |
161 | } | 162 | } |
162 | res = snapshot_read_next(&data->handle, count); | 163 | if (!pg_offp) { /* on page boundary? */ |
163 | if (res > 0) { | 164 | res = snapshot_read_next(&data->handle); |
164 | if (copy_to_user(buf, data_of(data->handle), res)) | 165 | if (res <= 0) |
165 | res = -EFAULT; | 166 | goto Unlock; |
166 | else | 167 | } else { |
167 | *offp = data->handle.offset; | 168 | res = PAGE_SIZE - pg_offp; |
168 | } | 169 | } |
169 | 170 | ||
171 | res = simple_read_from_buffer(buf, count, &pg_offp, | ||
172 | data_of(data->handle), res); | ||
173 | if (res > 0) | ||
174 | *offp += res; | ||
175 | |||
170 | Unlock: | 176 | Unlock: |
171 | mutex_unlock(&pm_mutex); | 177 | mutex_unlock(&pm_mutex); |
172 | 178 | ||
@@ -178,18 +184,25 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, | |||
178 | { | 184 | { |
179 | struct snapshot_data *data; | 185 | struct snapshot_data *data; |
180 | ssize_t res; | 186 | ssize_t res; |
187 | loff_t pg_offp = *offp & ~PAGE_MASK; | ||
181 | 188 | ||
182 | mutex_lock(&pm_mutex); | 189 | mutex_lock(&pm_mutex); |
183 | 190 | ||
184 | data = filp->private_data; | 191 | data = filp->private_data; |
185 | res = snapshot_write_next(&data->handle, count); | 192 | |
186 | if (res > 0) { | 193 | if (!pg_offp) { |
187 | if (copy_from_user(data_of(data->handle), buf, res)) | 194 | res = snapshot_write_next(&data->handle); |
188 | res = -EFAULT; | 195 | if (res <= 0) |
189 | else | 196 | goto unlock; |
190 | *offp = data->handle.offset; | 197 | } else { |
198 | res = PAGE_SIZE - pg_offp; | ||
191 | } | 199 | } |
192 | 200 | ||
201 | res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp, | ||
202 | buf, count); | ||
203 | if (res > 0) | ||
204 | *offp += res; | ||
205 | unlock: | ||
193 | mutex_unlock(&pm_mutex); | 206 | mutex_unlock(&pm_mutex); |
194 | 207 | ||
195 | return res; | 208 | return res; |
diff --git a/kernel/sched.c b/kernel/sched.c index 1d93cd0ae4d3..d9c0368eeb21 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3851,6 +3851,7 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) | |||
3851 | { | 3851 | { |
3852 | __wake_up_common(q, mode, 1, 0, NULL); | 3852 | __wake_up_common(q, mode, 1, 0, NULL); |
3853 | } | 3853 | } |
3854 | EXPORT_SYMBOL_GPL(__wake_up_locked); | ||
3854 | 3855 | ||
3855 | void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) | 3856 | void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) |
3856 | { | 3857 | { |
diff --git a/kernel/sys.c b/kernel/sys.c index 7cb426a58965..0d36d889c74d 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -492,10 +492,6 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) | |||
492 | return -ENOMEM; | 492 | return -ENOMEM; |
493 | old = current_cred(); | 493 | old = current_cred(); |
494 | 494 | ||
495 | retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE); | ||
496 | if (retval) | ||
497 | goto error; | ||
498 | |||
499 | retval = -EPERM; | 495 | retval = -EPERM; |
500 | if (rgid != (gid_t) -1) { | 496 | if (rgid != (gid_t) -1) { |
501 | if (old->gid == rgid || | 497 | if (old->gid == rgid || |
@@ -543,10 +539,6 @@ SYSCALL_DEFINE1(setgid, gid_t, gid) | |||
543 | return -ENOMEM; | 539 | return -ENOMEM; |
544 | old = current_cred(); | 540 | old = current_cred(); |
545 | 541 | ||
546 | retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID); | ||
547 | if (retval) | ||
548 | goto error; | ||
549 | |||
550 | retval = -EPERM; | 542 | retval = -EPERM; |
551 | if (capable(CAP_SETGID)) | 543 | if (capable(CAP_SETGID)) |
552 | new->gid = new->egid = new->sgid = new->fsgid = gid; | 544 | new->gid = new->egid = new->sgid = new->fsgid = gid; |
@@ -610,10 +602,6 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) | |||
610 | return -ENOMEM; | 602 | return -ENOMEM; |
611 | old = current_cred(); | 603 | old = current_cred(); |
612 | 604 | ||
613 | retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE); | ||
614 | if (retval) | ||
615 | goto error; | ||
616 | |||
617 | retval = -EPERM; | 605 | retval = -EPERM; |
618 | if (ruid != (uid_t) -1) { | 606 | if (ruid != (uid_t) -1) { |
619 | new->uid = ruid; | 607 | new->uid = ruid; |
@@ -675,10 +663,6 @@ SYSCALL_DEFINE1(setuid, uid_t, uid) | |||
675 | return -ENOMEM; | 663 | return -ENOMEM; |
676 | old = current_cred(); | 664 | old = current_cred(); |
677 | 665 | ||
678 | retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID); | ||
679 | if (retval) | ||
680 | goto error; | ||
681 | |||
682 | retval = -EPERM; | 666 | retval = -EPERM; |
683 | if (capable(CAP_SETUID)) { | 667 | if (capable(CAP_SETUID)) { |
684 | new->suid = new->uid = uid; | 668 | new->suid = new->uid = uid; |
@@ -719,9 +703,6 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) | |||
719 | if (!new) | 703 | if (!new) |
720 | return -ENOMEM; | 704 | return -ENOMEM; |
721 | 705 | ||
722 | retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES); | ||
723 | if (retval) | ||
724 | goto error; | ||
725 | old = current_cred(); | 706 | old = current_cred(); |
726 | 707 | ||
727 | retval = -EPERM; | 708 | retval = -EPERM; |
@@ -788,10 +769,6 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) | |||
788 | return -ENOMEM; | 769 | return -ENOMEM; |
789 | old = current_cred(); | 770 | old = current_cred(); |
790 | 771 | ||
791 | retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES); | ||
792 | if (retval) | ||
793 | goto error; | ||
794 | |||
795 | retval = -EPERM; | 772 | retval = -EPERM; |
796 | if (!capable(CAP_SETGID)) { | 773 | if (!capable(CAP_SETGID)) { |
797 | if (rgid != (gid_t) -1 && rgid != old->gid && | 774 | if (rgid != (gid_t) -1 && rgid != old->gid && |
@@ -851,9 +828,6 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid) | |||
851 | old = current_cred(); | 828 | old = current_cred(); |
852 | old_fsuid = old->fsuid; | 829 | old_fsuid = old->fsuid; |
853 | 830 | ||
854 | if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0) | ||
855 | goto error; | ||
856 | |||
857 | if (uid == old->uid || uid == old->euid || | 831 | if (uid == old->uid || uid == old->euid || |
858 | uid == old->suid || uid == old->fsuid || | 832 | uid == old->suid || uid == old->fsuid || |
859 | capable(CAP_SETUID)) { | 833 | capable(CAP_SETUID)) { |
@@ -864,7 +838,6 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid) | |||
864 | } | 838 | } |
865 | } | 839 | } |
866 | 840 | ||
867 | error: | ||
868 | abort_creds(new); | 841 | abort_creds(new); |
869 | return old_fsuid; | 842 | return old_fsuid; |
870 | 843 | ||
@@ -888,9 +861,6 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid) | |||
888 | old = current_cred(); | 861 | old = current_cred(); |
889 | old_fsgid = old->fsgid; | 862 | old_fsgid = old->fsgid; |
890 | 863 | ||
891 | if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS)) | ||
892 | goto error; | ||
893 | |||
894 | if (gid == old->gid || gid == old->egid || | 864 | if (gid == old->gid || gid == old->egid || |
895 | gid == old->sgid || gid == old->fsgid || | 865 | gid == old->sgid || gid == old->fsgid || |
896 | capable(CAP_SETGID)) { | 866 | capable(CAP_SETGID)) { |
@@ -900,7 +870,6 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid) | |||
900 | } | 870 | } |
901 | } | 871 | } |
902 | 872 | ||
903 | error: | ||
904 | abort_creds(new); | 873 | abort_creds(new); |
905 | return old_fsgid; | 874 | return old_fsgid; |
906 | 875 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8686b0f5fc12..b12583047757 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -163,6 +163,27 @@ static int proc_taint(struct ctl_table *table, int write, | |||
163 | void __user *buffer, size_t *lenp, loff_t *ppos); | 163 | void __user *buffer, size_t *lenp, loff_t *ppos); |
164 | #endif | 164 | #endif |
165 | 165 | ||
166 | #ifdef CONFIG_MAGIC_SYSRQ | ||
167 | static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */ | ||
168 | |||
169 | static int sysrq_sysctl_handler(ctl_table *table, int write, | ||
170 | void __user *buffer, size_t *lenp, | ||
171 | loff_t *ppos) | ||
172 | { | ||
173 | int error; | ||
174 | |||
175 | error = proc_dointvec(table, write, buffer, lenp, ppos); | ||
176 | if (error) | ||
177 | return error; | ||
178 | |||
179 | if (write) | ||
180 | sysrq_toggle_support(__sysrq_enabled); | ||
181 | |||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | #endif | ||
186 | |||
166 | static struct ctl_table root_table[]; | 187 | static struct ctl_table root_table[]; |
167 | static struct ctl_table_root sysctl_table_root; | 188 | static struct ctl_table_root sysctl_table_root; |
168 | static struct ctl_table_header root_table_header = { | 189 | static struct ctl_table_header root_table_header = { |
@@ -567,7 +588,7 @@ static struct ctl_table kern_table[] = { | |||
567 | .data = &__sysrq_enabled, | 588 | .data = &__sysrq_enabled, |
568 | .maxlen = sizeof (int), | 589 | .maxlen = sizeof (int), |
569 | .mode = 0644, | 590 | .mode = 0644, |
570 | .proc_handler = proc_dointvec, | 591 | .proc_handler = sysrq_sysctl_handler, |
571 | }, | 592 | }, |
572 | #endif | 593 | #endif |
573 | #ifdef CONFIG_PROC_SYSCTL | 594 | #ifdef CONFIG_PROC_SYSCTL |
@@ -621,7 +642,7 @@ static struct ctl_table kern_table[] = { | |||
621 | #endif | 642 | #endif |
622 | { | 643 | { |
623 | .procname = "userprocess_debug", | 644 | .procname = "userprocess_debug", |
624 | .data = &sysctl_userprocess_debug, | 645 | .data = &show_unhandled_signals, |
625 | .maxlen = sizeof(int), | 646 | .maxlen = sizeof(int), |
626 | .mode = 0644, | 647 | .mode = 0644, |
627 | .proc_handler = proc_dointvec, | 648 | .proc_handler = proc_dointvec, |
@@ -1431,7 +1452,8 @@ static struct ctl_table fs_table[] = { | |||
1431 | }; | 1452 | }; |
1432 | 1453 | ||
1433 | static struct ctl_table debug_table[] = { | 1454 | static struct ctl_table debug_table[] = { |
1434 | #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) | 1455 | #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \ |
1456 | defined(CONFIG_S390) | ||
1435 | { | 1457 | { |
1436 | .procname = "exception-trace", | 1458 | .procname = "exception-trace", |
1437 | .data = &show_unhandled_signals, | 1459 | .data = &show_unhandled_signals, |
@@ -2040,8 +2062,132 @@ int proc_dostring(struct ctl_table *table, int write, | |||
2040 | buffer, lenp, ppos); | 2062 | buffer, lenp, ppos); |
2041 | } | 2063 | } |
2042 | 2064 | ||
2065 | static size_t proc_skip_spaces(char **buf) | ||
2066 | { | ||
2067 | size_t ret; | ||
2068 | char *tmp = skip_spaces(*buf); | ||
2069 | ret = tmp - *buf; | ||
2070 | *buf = tmp; | ||
2071 | return ret; | ||
2072 | } | ||
2073 | |||
2074 | static void proc_skip_char(char **buf, size_t *size, const char v) | ||
2075 | { | ||
2076 | while (*size) { | ||
2077 | if (**buf != v) | ||
2078 | break; | ||
2079 | (*size)--; | ||
2080 | (*buf)++; | ||
2081 | } | ||
2082 | } | ||
2083 | |||
2084 | #define TMPBUFLEN 22 | ||
2085 | /** | ||
2086 | * proc_get_long - reads an ASCII formated integer from a user buffer | ||
2087 | * | ||
2088 | * @buf - a kernel buffer | ||
2089 | * @size - size of the kernel buffer | ||
2090 | * @val - this is where the number will be stored | ||
2091 | * @neg - set to %TRUE if number is negative | ||
2092 | * @perm_tr - a vector which contains the allowed trailers | ||
2093 | * @perm_tr_len - size of the perm_tr vector | ||
2094 | * @tr - pointer to store the trailer character | ||
2095 | * | ||
2096 | * In case of success 0 is returned and buf and size are updated with | ||
2097 | * the amount of bytes read. If tr is non NULL and a trailing | ||
2098 | * character exist (size is non zero after returning from this | ||
2099 | * function) tr is updated with the trailing character. | ||
2100 | */ | ||
2101 | static int proc_get_long(char **buf, size_t *size, | ||
2102 | unsigned long *val, bool *neg, | ||
2103 | const char *perm_tr, unsigned perm_tr_len, char *tr) | ||
2104 | { | ||
2105 | int len; | ||
2106 | char *p, tmp[TMPBUFLEN]; | ||
2107 | |||
2108 | if (!*size) | ||
2109 | return -EINVAL; | ||
2110 | |||
2111 | len = *size; | ||
2112 | if (len > TMPBUFLEN - 1) | ||
2113 | len = TMPBUFLEN - 1; | ||
2114 | |||
2115 | memcpy(tmp, *buf, len); | ||
2116 | |||
2117 | tmp[len] = 0; | ||
2118 | p = tmp; | ||
2119 | if (*p == '-' && *size > 1) { | ||
2120 | *neg = true; | ||
2121 | p++; | ||
2122 | } else | ||
2123 | *neg = false; | ||
2124 | if (!isdigit(*p)) | ||
2125 | return -EINVAL; | ||
2126 | |||
2127 | *val = simple_strtoul(p, &p, 0); | ||
2128 | |||
2129 | len = p - tmp; | ||
2130 | |||
2131 | /* We don't know if the next char is whitespace thus we may accept | ||
2132 | * invalid integers (e.g. 1234...a) or two integers instead of one | ||
2133 | * (e.g. 123...1). So lets not allow such large numbers. */ | ||
2134 | if (len == TMPBUFLEN - 1) | ||
2135 | return -EINVAL; | ||
2136 | |||
2137 | if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len)) | ||
2138 | return -EINVAL; | ||
2139 | |||
2140 | if (tr && (len < *size)) | ||
2141 | *tr = *p; | ||
2142 | |||
2143 | *buf += len; | ||
2144 | *size -= len; | ||
2145 | |||
2146 | return 0; | ||
2147 | } | ||
2148 | |||
2149 | /** | ||
2150 | * proc_put_long - coverts an integer to a decimal ASCII formated string | ||
2151 | * | ||
2152 | * @buf - the user buffer | ||
2153 | * @size - the size of the user buffer | ||
2154 | * @val - the integer to be converted | ||
2155 | * @neg - sign of the number, %TRUE for negative | ||
2156 | * | ||
2157 | * In case of success 0 is returned and buf and size are updated with | ||
2158 | * the amount of bytes read. | ||
2159 | */ | ||
2160 | static int proc_put_long(void __user **buf, size_t *size, unsigned long val, | ||
2161 | bool neg) | ||
2162 | { | ||
2163 | int len; | ||
2164 | char tmp[TMPBUFLEN], *p = tmp; | ||
2165 | |||
2166 | sprintf(p, "%s%lu", neg ? "-" : "", val); | ||
2167 | len = strlen(tmp); | ||
2168 | if (len > *size) | ||
2169 | len = *size; | ||
2170 | if (copy_to_user(*buf, tmp, len)) | ||
2171 | return -EFAULT; | ||
2172 | *size -= len; | ||
2173 | *buf += len; | ||
2174 | return 0; | ||
2175 | } | ||
2176 | #undef TMPBUFLEN | ||
2043 | 2177 | ||
2044 | static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, | 2178 | static int proc_put_char(void __user **buf, size_t *size, char c) |
2179 | { | ||
2180 | if (*size) { | ||
2181 | char __user **buffer = (char __user **)buf; | ||
2182 | if (put_user(c, *buffer)) | ||
2183 | return -EFAULT; | ||
2184 | (*size)--, (*buffer)++; | ||
2185 | *buf = *buffer; | ||
2186 | } | ||
2187 | return 0; | ||
2188 | } | ||
2189 | |||
2190 | static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, | ||
2045 | int *valp, | 2191 | int *valp, |
2046 | int write, void *data) | 2192 | int write, void *data) |
2047 | { | 2193 | { |
@@ -2050,33 +2196,31 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, | |||
2050 | } else { | 2196 | } else { |
2051 | int val = *valp; | 2197 | int val = *valp; |
2052 | if (val < 0) { | 2198 | if (val < 0) { |
2053 | *negp = -1; | 2199 | *negp = true; |
2054 | *lvalp = (unsigned long)-val; | 2200 | *lvalp = (unsigned long)-val; |
2055 | } else { | 2201 | } else { |
2056 | *negp = 0; | 2202 | *negp = false; |
2057 | *lvalp = (unsigned long)val; | 2203 | *lvalp = (unsigned long)val; |
2058 | } | 2204 | } |
2059 | } | 2205 | } |
2060 | return 0; | 2206 | return 0; |
2061 | } | 2207 | } |
2062 | 2208 | ||
2209 | static const char proc_wspace_sep[] = { ' ', '\t', '\n' }; | ||
2210 | |||
2063 | static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, | 2211 | static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, |
2064 | int write, void __user *buffer, | 2212 | int write, void __user *buffer, |
2065 | size_t *lenp, loff_t *ppos, | 2213 | size_t *lenp, loff_t *ppos, |
2066 | int (*conv)(int *negp, unsigned long *lvalp, int *valp, | 2214 | int (*conv)(bool *negp, unsigned long *lvalp, int *valp, |
2067 | int write, void *data), | 2215 | int write, void *data), |
2068 | void *data) | 2216 | void *data) |
2069 | { | 2217 | { |
2070 | #define TMPBUFLEN 21 | 2218 | int *i, vleft, first = 1, err = 0; |
2071 | int *i, vleft, first = 1, neg; | 2219 | unsigned long page = 0; |
2072 | unsigned long lval; | 2220 | size_t left; |
2073 | size_t left, len; | 2221 | char *kbuf; |
2074 | 2222 | ||
2075 | char buf[TMPBUFLEN], *p; | 2223 | if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) { |
2076 | char __user *s = buffer; | ||
2077 | |||
2078 | if (!tbl_data || !table->maxlen || !*lenp || | ||
2079 | (*ppos && !write)) { | ||
2080 | *lenp = 0; | 2224 | *lenp = 0; |
2081 | return 0; | 2225 | return 0; |
2082 | } | 2226 | } |
@@ -2088,89 +2232,69 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, | |||
2088 | if (!conv) | 2232 | if (!conv) |
2089 | conv = do_proc_dointvec_conv; | 2233 | conv = do_proc_dointvec_conv; |
2090 | 2234 | ||
2235 | if (write) { | ||
2236 | if (left > PAGE_SIZE - 1) | ||
2237 | left = PAGE_SIZE - 1; | ||
2238 | page = __get_free_page(GFP_TEMPORARY); | ||
2239 | kbuf = (char *) page; | ||
2240 | if (!kbuf) | ||
2241 | return -ENOMEM; | ||
2242 | if (copy_from_user(kbuf, buffer, left)) { | ||
2243 | err = -EFAULT; | ||
2244 | goto free; | ||
2245 | } | ||
2246 | kbuf[left] = 0; | ||
2247 | } | ||
2248 | |||
2091 | for (; left && vleft--; i++, first=0) { | 2249 | for (; left && vleft--; i++, first=0) { |
2092 | if (write) { | 2250 | unsigned long lval; |
2093 | while (left) { | 2251 | bool neg; |
2094 | char c; | ||
2095 | if (get_user(c, s)) | ||
2096 | return -EFAULT; | ||
2097 | if (!isspace(c)) | ||
2098 | break; | ||
2099 | left--; | ||
2100 | s++; | ||
2101 | } | ||
2102 | if (!left) | ||
2103 | break; | ||
2104 | neg = 0; | ||
2105 | len = left; | ||
2106 | if (len > sizeof(buf) - 1) | ||
2107 | len = sizeof(buf) - 1; | ||
2108 | if (copy_from_user(buf, s, len)) | ||
2109 | return -EFAULT; | ||
2110 | buf[len] = 0; | ||
2111 | p = buf; | ||
2112 | if (*p == '-' && left > 1) { | ||
2113 | neg = 1; | ||
2114 | p++; | ||
2115 | } | ||
2116 | if (*p < '0' || *p > '9') | ||
2117 | break; | ||
2118 | 2252 | ||
2119 | lval = simple_strtoul(p, &p, 0); | 2253 | if (write) { |
2254 | left -= proc_skip_spaces(&kbuf); | ||
2120 | 2255 | ||
2121 | len = p-buf; | 2256 | err = proc_get_long(&kbuf, &left, &lval, &neg, |
2122 | if ((len < left) && *p && !isspace(*p)) | 2257 | proc_wspace_sep, |
2258 | sizeof(proc_wspace_sep), NULL); | ||
2259 | if (err) | ||
2123 | break; | 2260 | break; |
2124 | s += len; | 2261 | if (conv(&neg, &lval, i, 1, data)) { |
2125 | left -= len; | 2262 | err = -EINVAL; |
2126 | |||
2127 | if (conv(&neg, &lval, i, 1, data)) | ||
2128 | break; | 2263 | break; |
2264 | } | ||
2129 | } else { | 2265 | } else { |
2130 | p = buf; | 2266 | if (conv(&neg, &lval, i, 0, data)) { |
2267 | err = -EINVAL; | ||
2268 | break; | ||
2269 | } | ||
2131 | if (!first) | 2270 | if (!first) |
2132 | *p++ = '\t'; | 2271 | err = proc_put_char(&buffer, &left, '\t'); |
2133 | 2272 | if (err) | |
2134 | if (conv(&neg, &lval, i, 0, data)) | 2273 | break; |
2274 | err = proc_put_long(&buffer, &left, lval, neg); | ||
2275 | if (err) | ||
2135 | break; | 2276 | break; |
2136 | |||
2137 | sprintf(p, "%s%lu", neg ? "-" : "", lval); | ||
2138 | len = strlen(buf); | ||
2139 | if (len > left) | ||
2140 | len = left; | ||
2141 | if(copy_to_user(s, buf, len)) | ||
2142 | return -EFAULT; | ||
2143 | left -= len; | ||
2144 | s += len; | ||
2145 | } | 2277 | } |
2146 | } | 2278 | } |
2147 | 2279 | ||
2148 | if (!write && !first && left) { | 2280 | if (!write && !first && left && !err) |
2149 | if(put_user('\n', s)) | 2281 | err = proc_put_char(&buffer, &left, '\n'); |
2150 | return -EFAULT; | 2282 | if (write && !err) |
2151 | left--, s++; | 2283 | left -= proc_skip_spaces(&kbuf); |
2152 | } | 2284 | free: |
2153 | if (write) { | 2285 | if (write) { |
2154 | while (left) { | 2286 | free_page(page); |
2155 | char c; | 2287 | if (first) |
2156 | if (get_user(c, s++)) | 2288 | return err ? : -EINVAL; |
2157 | return -EFAULT; | ||
2158 | if (!isspace(c)) | ||
2159 | break; | ||
2160 | left--; | ||
2161 | } | ||
2162 | } | 2289 | } |
2163 | if (write && first) | ||
2164 | return -EINVAL; | ||
2165 | *lenp -= left; | 2290 | *lenp -= left; |
2166 | *ppos += *lenp; | 2291 | *ppos += *lenp; |
2167 | return 0; | 2292 | return err; |
2168 | #undef TMPBUFLEN | ||
2169 | } | 2293 | } |
2170 | 2294 | ||
2171 | static int do_proc_dointvec(struct ctl_table *table, int write, | 2295 | static int do_proc_dointvec(struct ctl_table *table, int write, |
2172 | void __user *buffer, size_t *lenp, loff_t *ppos, | 2296 | void __user *buffer, size_t *lenp, loff_t *ppos, |
2173 | int (*conv)(int *negp, unsigned long *lvalp, int *valp, | 2297 | int (*conv)(bool *negp, unsigned long *lvalp, int *valp, |
2174 | int write, void *data), | 2298 | int write, void *data), |
2175 | void *data) | 2299 | void *data) |
2176 | { | 2300 | { |
@@ -2238,8 +2362,8 @@ struct do_proc_dointvec_minmax_conv_param { | |||
2238 | int *max; | 2362 | int *max; |
2239 | }; | 2363 | }; |
2240 | 2364 | ||
2241 | static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, | 2365 | static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, |
2242 | int *valp, | 2366 | int *valp, |
2243 | int write, void *data) | 2367 | int write, void *data) |
2244 | { | 2368 | { |
2245 | struct do_proc_dointvec_minmax_conv_param *param = data; | 2369 | struct do_proc_dointvec_minmax_conv_param *param = data; |
@@ -2252,10 +2376,10 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, | |||
2252 | } else { | 2376 | } else { |
2253 | int val = *valp; | 2377 | int val = *valp; |
2254 | if (val < 0) { | 2378 | if (val < 0) { |
2255 | *negp = -1; | 2379 | *negp = true; |
2256 | *lvalp = (unsigned long)-val; | 2380 | *lvalp = (unsigned long)-val; |
2257 | } else { | 2381 | } else { |
2258 | *negp = 0; | 2382 | *negp = false; |
2259 | *lvalp = (unsigned long)val; | 2383 | *lvalp = (unsigned long)val; |
2260 | } | 2384 | } |
2261 | } | 2385 | } |
@@ -2295,102 +2419,78 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int | |||
2295 | unsigned long convmul, | 2419 | unsigned long convmul, |
2296 | unsigned long convdiv) | 2420 | unsigned long convdiv) |
2297 | { | 2421 | { |
2298 | #define TMPBUFLEN 21 | 2422 | unsigned long *i, *min, *max; |
2299 | unsigned long *i, *min, *max, val; | 2423 | int vleft, first = 1, err = 0; |
2300 | int vleft, first=1, neg; | 2424 | unsigned long page = 0; |
2301 | size_t len, left; | 2425 | size_t left; |
2302 | char buf[TMPBUFLEN], *p; | 2426 | char *kbuf; |
2303 | char __user *s = buffer; | 2427 | |
2304 | 2428 | if (!data || !table->maxlen || !*lenp || (*ppos && !write)) { | |
2305 | if (!data || !table->maxlen || !*lenp || | ||
2306 | (*ppos && !write)) { | ||
2307 | *lenp = 0; | 2429 | *lenp = 0; |
2308 | return 0; | 2430 | return 0; |
2309 | } | 2431 | } |
2310 | 2432 | ||
2311 | i = (unsigned long *) data; | 2433 | i = (unsigned long *) data; |
2312 | min = (unsigned long *) table->extra1; | 2434 | min = (unsigned long *) table->extra1; |
2313 | max = (unsigned long *) table->extra2; | 2435 | max = (unsigned long *) table->extra2; |
2314 | vleft = table->maxlen / sizeof(unsigned long); | 2436 | vleft = table->maxlen / sizeof(unsigned long); |
2315 | left = *lenp; | 2437 | left = *lenp; |
2316 | 2438 | ||
2439 | if (write) { | ||
2440 | if (left > PAGE_SIZE - 1) | ||
2441 | left = PAGE_SIZE - 1; | ||
2442 | page = __get_free_page(GFP_TEMPORARY); | ||
2443 | kbuf = (char *) page; | ||
2444 | if (!kbuf) | ||
2445 | return -ENOMEM; | ||
2446 | if (copy_from_user(kbuf, buffer, left)) { | ||
2447 | err = -EFAULT; | ||
2448 | goto free; | ||
2449 | } | ||
2450 | kbuf[left] = 0; | ||
2451 | } | ||
2452 | |||
2317 | for (; left && vleft--; i++, min++, max++, first=0) { | 2453 | for (; left && vleft--; i++, min++, max++, first=0) { |
2454 | unsigned long val; | ||
2455 | |||
2318 | if (write) { | 2456 | if (write) { |
2319 | while (left) { | 2457 | bool neg; |
2320 | char c; | 2458 | |
2321 | if (get_user(c, s)) | 2459 | left -= proc_skip_spaces(&kbuf); |
2322 | return -EFAULT; | 2460 | |
2323 | if (!isspace(c)) | 2461 | err = proc_get_long(&kbuf, &left, &val, &neg, |
2324 | break; | 2462 | proc_wspace_sep, |
2325 | left--; | 2463 | sizeof(proc_wspace_sep), NULL); |
2326 | s++; | 2464 | if (err) |
2327 | } | ||
2328 | if (!left) | ||
2329 | break; | ||
2330 | neg = 0; | ||
2331 | len = left; | ||
2332 | if (len > TMPBUFLEN-1) | ||
2333 | len = TMPBUFLEN-1; | ||
2334 | if (copy_from_user(buf, s, len)) | ||
2335 | return -EFAULT; | ||
2336 | buf[len] = 0; | ||
2337 | p = buf; | ||
2338 | if (*p == '-' && left > 1) { | ||
2339 | neg = 1; | ||
2340 | p++; | ||
2341 | } | ||
2342 | if (*p < '0' || *p > '9') | ||
2343 | break; | ||
2344 | val = simple_strtoul(p, &p, 0) * convmul / convdiv ; | ||
2345 | len = p-buf; | ||
2346 | if ((len < left) && *p && !isspace(*p)) | ||
2347 | break; | 2465 | break; |
2348 | if (neg) | 2466 | if (neg) |
2349 | val = -val; | ||
2350 | s += len; | ||
2351 | left -= len; | ||
2352 | |||
2353 | if(neg) | ||
2354 | continue; | 2467 | continue; |
2355 | if ((min && val < *min) || (max && val > *max)) | 2468 | if ((min && val < *min) || (max && val > *max)) |
2356 | continue; | 2469 | continue; |
2357 | *i = val; | 2470 | *i = val; |
2358 | } else { | 2471 | } else { |
2359 | p = buf; | 2472 | val = convdiv * (*i) / convmul; |
2360 | if (!first) | 2473 | if (!first) |
2361 | *p++ = '\t'; | 2474 | err = proc_put_char(&buffer, &left, '\t'); |
2362 | sprintf(p, "%lu", convdiv * (*i) / convmul); | 2475 | err = proc_put_long(&buffer, &left, val, false); |
2363 | len = strlen(buf); | 2476 | if (err) |
2364 | if (len > left) | 2477 | break; |
2365 | len = left; | ||
2366 | if(copy_to_user(s, buf, len)) | ||
2367 | return -EFAULT; | ||
2368 | left -= len; | ||
2369 | s += len; | ||
2370 | } | 2478 | } |
2371 | } | 2479 | } |
2372 | 2480 | ||
2373 | if (!write && !first && left) { | 2481 | if (!write && !first && left && !err) |
2374 | if(put_user('\n', s)) | 2482 | err = proc_put_char(&buffer, &left, '\n'); |
2375 | return -EFAULT; | 2483 | if (write && !err) |
2376 | left--, s++; | 2484 | left -= proc_skip_spaces(&kbuf); |
2377 | } | 2485 | free: |
2378 | if (write) { | 2486 | if (write) { |
2379 | while (left) { | 2487 | free_page(page); |
2380 | char c; | 2488 | if (first) |
2381 | if (get_user(c, s++)) | 2489 | return err ? : -EINVAL; |
2382 | return -EFAULT; | ||
2383 | if (!isspace(c)) | ||
2384 | break; | ||
2385 | left--; | ||
2386 | } | ||
2387 | } | 2490 | } |
2388 | if (write && first) | ||
2389 | return -EINVAL; | ||
2390 | *lenp -= left; | 2491 | *lenp -= left; |
2391 | *ppos += *lenp; | 2492 | *ppos += *lenp; |
2392 | return 0; | 2493 | return err; |
2393 | #undef TMPBUFLEN | ||
2394 | } | 2494 | } |
2395 | 2495 | ||
2396 | static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, | 2496 | static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, |
@@ -2451,7 +2551,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, | |||
2451 | } | 2551 | } |
2452 | 2552 | ||
2453 | 2553 | ||
2454 | static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, | 2554 | static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, |
2455 | int *valp, | 2555 | int *valp, |
2456 | int write, void *data) | 2556 | int write, void *data) |
2457 | { | 2557 | { |
@@ -2463,10 +2563,10 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, | |||
2463 | int val = *valp; | 2563 | int val = *valp; |
2464 | unsigned long lval; | 2564 | unsigned long lval; |
2465 | if (val < 0) { | 2565 | if (val < 0) { |
2466 | *negp = -1; | 2566 | *negp = true; |
2467 | lval = (unsigned long)-val; | 2567 | lval = (unsigned long)-val; |
2468 | } else { | 2568 | } else { |
2469 | *negp = 0; | 2569 | *negp = false; |
2470 | lval = (unsigned long)val; | 2570 | lval = (unsigned long)val; |
2471 | } | 2571 | } |
2472 | *lvalp = lval / HZ; | 2572 | *lvalp = lval / HZ; |
@@ -2474,7 +2574,7 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, | |||
2474 | return 0; | 2574 | return 0; |
2475 | } | 2575 | } |
2476 | 2576 | ||
2477 | static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, | 2577 | static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp, |
2478 | int *valp, | 2578 | int *valp, |
2479 | int write, void *data) | 2579 | int write, void *data) |
2480 | { | 2580 | { |
@@ -2486,10 +2586,10 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, | |||
2486 | int val = *valp; | 2586 | int val = *valp; |
2487 | unsigned long lval; | 2587 | unsigned long lval; |
2488 | if (val < 0) { | 2588 | if (val < 0) { |
2489 | *negp = -1; | 2589 | *negp = true; |
2490 | lval = (unsigned long)-val; | 2590 | lval = (unsigned long)-val; |
2491 | } else { | 2591 | } else { |
2492 | *negp = 0; | 2592 | *negp = false; |
2493 | lval = (unsigned long)val; | 2593 | lval = (unsigned long)val; |
2494 | } | 2594 | } |
2495 | *lvalp = jiffies_to_clock_t(lval); | 2595 | *lvalp = jiffies_to_clock_t(lval); |
@@ -2497,7 +2597,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, | |||
2497 | return 0; | 2597 | return 0; |
2498 | } | 2598 | } |
2499 | 2599 | ||
2500 | static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, | 2600 | static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, |
2501 | int *valp, | 2601 | int *valp, |
2502 | int write, void *data) | 2602 | int write, void *data) |
2503 | { | 2603 | { |
@@ -2507,10 +2607,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, | |||
2507 | int val = *valp; | 2607 | int val = *valp; |
2508 | unsigned long lval; | 2608 | unsigned long lval; |
2509 | if (val < 0) { | 2609 | if (val < 0) { |
2510 | *negp = -1; | 2610 | *negp = true; |
2511 | lval = (unsigned long)-val; | 2611 | lval = (unsigned long)-val; |
2512 | } else { | 2612 | } else { |
2513 | *negp = 0; | 2613 | *negp = false; |
2514 | lval = (unsigned long)val; | 2614 | lval = (unsigned long)val; |
2515 | } | 2615 | } |
2516 | *lvalp = jiffies_to_msecs(lval); | 2616 | *lvalp = jiffies_to_msecs(lval); |
@@ -2607,6 +2707,157 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, | |||
2607 | return 0; | 2707 | return 0; |
2608 | } | 2708 | } |
2609 | 2709 | ||
2710 | /** | ||
2711 | * proc_do_large_bitmap - read/write from/to a large bitmap | ||
2712 | * @table: the sysctl table | ||
2713 | * @write: %TRUE if this is a write to the sysctl file | ||
2714 | * @buffer: the user buffer | ||
2715 | * @lenp: the size of the user buffer | ||
2716 | * @ppos: file position | ||
2717 | * | ||
2718 | * The bitmap is stored at table->data and the bitmap length (in bits) | ||
2719 | * in table->maxlen. | ||
2720 | * | ||
2721 | * We use a range comma separated format (e.g. 1,3-4,10-10) so that | ||
2722 | * large bitmaps may be represented in a compact manner. Writing into | ||
2723 | * the file will clear the bitmap then update it with the given input. | ||
2724 | * | ||
2725 | * Returns 0 on success. | ||
2726 | */ | ||
2727 | int proc_do_large_bitmap(struct ctl_table *table, int write, | ||
2728 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2729 | { | ||
2730 | int err = 0; | ||
2731 | bool first = 1; | ||
2732 | size_t left = *lenp; | ||
2733 | unsigned long bitmap_len = table->maxlen; | ||
2734 | unsigned long *bitmap = (unsigned long *) table->data; | ||
2735 | unsigned long *tmp_bitmap = NULL; | ||
2736 | char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c; | ||
2737 | |||
2738 | if (!bitmap_len || !left || (*ppos && !write)) { | ||
2739 | *lenp = 0; | ||
2740 | return 0; | ||
2741 | } | ||
2742 | |||
2743 | if (write) { | ||
2744 | unsigned long page = 0; | ||
2745 | char *kbuf; | ||
2746 | |||
2747 | if (left > PAGE_SIZE - 1) | ||
2748 | left = PAGE_SIZE - 1; | ||
2749 | |||
2750 | page = __get_free_page(GFP_TEMPORARY); | ||
2751 | kbuf = (char *) page; | ||
2752 | if (!kbuf) | ||
2753 | return -ENOMEM; | ||
2754 | if (copy_from_user(kbuf, buffer, left)) { | ||
2755 | free_page(page); | ||
2756 | return -EFAULT; | ||
2757 | } | ||
2758 | kbuf[left] = 0; | ||
2759 | |||
2760 | tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long), | ||
2761 | GFP_KERNEL); | ||
2762 | if (!tmp_bitmap) { | ||
2763 | free_page(page); | ||
2764 | return -ENOMEM; | ||
2765 | } | ||
2766 | proc_skip_char(&kbuf, &left, '\n'); | ||
2767 | while (!err && left) { | ||
2768 | unsigned long val_a, val_b; | ||
2769 | bool neg; | ||
2770 | |||
2771 | err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a, | ||
2772 | sizeof(tr_a), &c); | ||
2773 | if (err) | ||
2774 | break; | ||
2775 | if (val_a >= bitmap_len || neg) { | ||
2776 | err = -EINVAL; | ||
2777 | break; | ||
2778 | } | ||
2779 | |||
2780 | val_b = val_a; | ||
2781 | if (left) { | ||
2782 | kbuf++; | ||
2783 | left--; | ||
2784 | } | ||
2785 | |||
2786 | if (c == '-') { | ||
2787 | err = proc_get_long(&kbuf, &left, &val_b, | ||
2788 | &neg, tr_b, sizeof(tr_b), | ||
2789 | &c); | ||
2790 | if (err) | ||
2791 | break; | ||
2792 | if (val_b >= bitmap_len || neg || | ||
2793 | val_a > val_b) { | ||
2794 | err = -EINVAL; | ||
2795 | break; | ||
2796 | } | ||
2797 | if (left) { | ||
2798 | kbuf++; | ||
2799 | left--; | ||
2800 | } | ||
2801 | } | ||
2802 | |||
2803 | while (val_a <= val_b) | ||
2804 | set_bit(val_a++, tmp_bitmap); | ||
2805 | |||
2806 | first = 0; | ||
2807 | proc_skip_char(&kbuf, &left, '\n'); | ||
2808 | } | ||
2809 | free_page(page); | ||
2810 | } else { | ||
2811 | unsigned long bit_a, bit_b = 0; | ||
2812 | |||
2813 | while (left) { | ||
2814 | bit_a = find_next_bit(bitmap, bitmap_len, bit_b); | ||
2815 | if (bit_a >= bitmap_len) | ||
2816 | break; | ||
2817 | bit_b = find_next_zero_bit(bitmap, bitmap_len, | ||
2818 | bit_a + 1) - 1; | ||
2819 | |||
2820 | if (!first) { | ||
2821 | err = proc_put_char(&buffer, &left, ','); | ||
2822 | if (err) | ||
2823 | break; | ||
2824 | } | ||
2825 | err = proc_put_long(&buffer, &left, bit_a, false); | ||
2826 | if (err) | ||
2827 | break; | ||
2828 | if (bit_a != bit_b) { | ||
2829 | err = proc_put_char(&buffer, &left, '-'); | ||
2830 | if (err) | ||
2831 | break; | ||
2832 | err = proc_put_long(&buffer, &left, bit_b, false); | ||
2833 | if (err) | ||
2834 | break; | ||
2835 | } | ||
2836 | |||
2837 | first = 0; bit_b++; | ||
2838 | } | ||
2839 | if (!err) | ||
2840 | err = proc_put_char(&buffer, &left, '\n'); | ||
2841 | } | ||
2842 | |||
2843 | if (!err) { | ||
2844 | if (write) { | ||
2845 | if (*ppos) | ||
2846 | bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len); | ||
2847 | else | ||
2848 | memcpy(bitmap, tmp_bitmap, | ||
2849 | BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long)); | ||
2850 | } | ||
2851 | kfree(tmp_bitmap); | ||
2852 | *lenp -= left; | ||
2853 | *ppos += *lenp; | ||
2854 | return 0; | ||
2855 | } else { | ||
2856 | kfree(tmp_bitmap); | ||
2857 | return err; | ||
2858 | } | ||
2859 | } | ||
2860 | |||
2610 | #else /* CONFIG_PROC_FS */ | 2861 | #else /* CONFIG_PROC_FS */ |
2611 | 2862 | ||
2612 | int proc_dostring(struct ctl_table *table, int write, | 2863 | int proc_dostring(struct ctl_table *table, int write, |
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 59030570f5ca..937d31dc8566 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
@@ -224,7 +224,6 @@ static const struct bin_table bin_net_ipv4_route_table[] = { | |||
224 | { CTL_INT, NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires" }, | 224 | { CTL_INT, NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires" }, |
225 | { CTL_INT, NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu" }, | 225 | { CTL_INT, NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu" }, |
226 | { CTL_INT, NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss" }, | 226 | { CTL_INT, NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss" }, |
227 | { CTL_INT, NET_IPV4_ROUTE_SECRET_INTERVAL, "secret_interval" }, | ||
228 | {} | 227 | {} |
229 | }; | 228 | }; |
230 | 229 | ||
diff --git a/kernel/time.c b/kernel/time.c index 656dccfe1cbb..50612faa9baf 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -132,12 +132,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, | |||
132 | */ | 132 | */ |
133 | static inline void warp_clock(void) | 133 | static inline void warp_clock(void) |
134 | { | 134 | { |
135 | write_seqlock_irq(&xtime_lock); | 135 | struct timespec delta, adjust; |
136 | wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; | 136 | delta.tv_sec = sys_tz.tz_minuteswest * 60; |
137 | xtime.tv_sec += sys_tz.tz_minuteswest * 60; | 137 | delta.tv_nsec = 0; |
138 | update_xtime_cache(0); | 138 | adjust = timespec_add_safe(current_kernel_time(), delta); |
139 | write_sequnlock_irq(&xtime_lock); | 139 | do_settimeofday(&adjust); |
140 | clock_was_set(); | ||
141 | } | 140 | } |
142 | 141 | ||
143 | /* | 142 | /* |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1f5dde637457..f08e99c1d561 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -625,6 +625,54 @@ static void clocksource_enqueue(struct clocksource *cs) | |||
625 | list_add(&cs->list, entry); | 625 | list_add(&cs->list, entry); |
626 | } | 626 | } |
627 | 627 | ||
628 | |||
629 | /* | ||
630 | * Maximum time we expect to go between ticks. This includes idle | ||
631 | * tickless time. It provides the trade off between selecting a | ||
632 | * mult/shift pair that is very precise but can only handle a short | ||
633 | * period of time, vs. a mult/shift pair that can handle long periods | ||
634 | * of time but isn't as precise. | ||
635 | * | ||
636 | * This is a subsystem constant, and actual hardware limitations | ||
637 | * may override it (ie: clocksources that wrap every 3 seconds). | ||
638 | */ | ||
639 | #define MAX_UPDATE_LENGTH 5 /* Seconds */ | ||
640 | |||
641 | /** | ||
642 | * __clocksource_register_scale - Used to install new clocksources | ||
643 | * @t: clocksource to be registered | ||
644 | * @scale: Scale factor multiplied against freq to get clocksource hz | ||
645 | * @freq: clocksource frequency (cycles per second) divided by scale | ||
646 | * | ||
647 | * Returns -EBUSY if registration fails, zero otherwise. | ||
648 | * | ||
649 | * This *SHOULD NOT* be called directly! Please use the | ||
650 | * clocksource_register_hz() or clocksource_register_khz helper functions. | ||
651 | */ | ||
652 | int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) | ||
653 | { | ||
654 | |||
655 | /* | ||
656 | * Ideally we want to use some of the limits used in | ||
657 | * clocksource_max_deferment, to provide a more informed | ||
658 | * MAX_UPDATE_LENGTH. But for now this just gets the | ||
659 | * register interface working properly. | ||
660 | */ | ||
661 | clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, | ||
662 | NSEC_PER_SEC/scale, | ||
663 | MAX_UPDATE_LENGTH*scale); | ||
664 | cs->max_idle_ns = clocksource_max_deferment(cs); | ||
665 | |||
666 | mutex_lock(&clocksource_mutex); | ||
667 | clocksource_enqueue(cs); | ||
668 | clocksource_select(); | ||
669 | clocksource_enqueue_watchdog(cs); | ||
670 | mutex_unlock(&clocksource_mutex); | ||
671 | return 0; | ||
672 | } | ||
673 | EXPORT_SYMBOL_GPL(__clocksource_register_scale); | ||
674 | |||
675 | |||
628 | /** | 676 | /** |
629 | * clocksource_register - Used to install new clocksources | 677 | * clocksource_register - Used to install new clocksources |
630 | * @t: clocksource to be registered | 678 | * @t: clocksource to be registered |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 7c0f180d6e9d..c63116863a80 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -69,7 +69,7 @@ static s64 time_freq; | |||
69 | /* time at last adjustment (secs): */ | 69 | /* time at last adjustment (secs): */ |
70 | static long time_reftime; | 70 | static long time_reftime; |
71 | 71 | ||
72 | long time_adjust; | 72 | static long time_adjust; |
73 | 73 | ||
74 | /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ | 74 | /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ |
75 | static s64 ntp_tick_adj; | 75 | static s64 ntp_tick_adj; |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 39f6177fafac..caf8d4d4f5c8 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -165,13 +165,6 @@ struct timespec raw_time; | |||
165 | /* flag for if timekeeping is suspended */ | 165 | /* flag for if timekeeping is suspended */ |
166 | int __read_mostly timekeeping_suspended; | 166 | int __read_mostly timekeeping_suspended; |
167 | 167 | ||
168 | static struct timespec xtime_cache __attribute__ ((aligned (16))); | ||
169 | void update_xtime_cache(u64 nsec) | ||
170 | { | ||
171 | xtime_cache = xtime; | ||
172 | timespec_add_ns(&xtime_cache, nsec); | ||
173 | } | ||
174 | |||
175 | /* must hold xtime_lock */ | 168 | /* must hold xtime_lock */ |
176 | void timekeeping_leap_insert(int leapsecond) | 169 | void timekeeping_leap_insert(int leapsecond) |
177 | { | 170 | { |
@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv) | |||
332 | 325 | ||
333 | xtime = *tv; | 326 | xtime = *tv; |
334 | 327 | ||
335 | update_xtime_cache(0); | ||
336 | |||
337 | timekeeper.ntp_error = 0; | 328 | timekeeper.ntp_error = 0; |
338 | ntp_clear(); | 329 | ntp_clear(); |
339 | 330 | ||
@@ -559,7 +550,6 @@ void __init timekeeping_init(void) | |||
559 | } | 550 | } |
560 | set_normalized_timespec(&wall_to_monotonic, | 551 | set_normalized_timespec(&wall_to_monotonic, |
561 | -boot.tv_sec, -boot.tv_nsec); | 552 | -boot.tv_sec, -boot.tv_nsec); |
562 | update_xtime_cache(0); | ||
563 | total_sleep_time.tv_sec = 0; | 553 | total_sleep_time.tv_sec = 0; |
564 | total_sleep_time.tv_nsec = 0; | 554 | total_sleep_time.tv_nsec = 0; |
565 | write_sequnlock_irqrestore(&xtime_lock, flags); | 555 | write_sequnlock_irqrestore(&xtime_lock, flags); |
@@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev) | |||
593 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); | 583 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); |
594 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); | 584 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); |
595 | } | 585 | } |
596 | update_xtime_cache(0); | ||
597 | /* re-base the last cycle value */ | 586 | /* re-base the last cycle value */ |
598 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); | 587 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); |
599 | timekeeper.ntp_error = 0; | 588 | timekeeper.ntp_error = 0; |
@@ -788,7 +777,6 @@ void update_wall_time(void) | |||
788 | { | 777 | { |
789 | struct clocksource *clock; | 778 | struct clocksource *clock; |
790 | cycle_t offset; | 779 | cycle_t offset; |
791 | u64 nsecs; | ||
792 | int shift = 0, maxshift; | 780 | int shift = 0, maxshift; |
793 | 781 | ||
794 | /* Make sure we're fully resumed: */ | 782 | /* Make sure we're fully resumed: */ |
@@ -847,7 +835,9 @@ void update_wall_time(void) | |||
847 | timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; | 835 | timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; |
848 | } | 836 | } |
849 | 837 | ||
850 | /* store full nanoseconds into xtime after rounding it up and | 838 | |
839 | /* | ||
840 | * Store full nanoseconds into xtime after rounding it up and | ||
851 | * add the remainder to the error difference. | 841 | * add the remainder to the error difference. |
852 | */ | 842 | */ |
853 | xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; | 843 | xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; |
@@ -855,8 +845,15 @@ void update_wall_time(void) | |||
855 | timekeeper.ntp_error += timekeeper.xtime_nsec << | 845 | timekeeper.ntp_error += timekeeper.xtime_nsec << |
856 | timekeeper.ntp_error_shift; | 846 | timekeeper.ntp_error_shift; |
857 | 847 | ||
858 | nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); | 848 | /* |
859 | update_xtime_cache(nsecs); | 849 | * Finally, make sure that after the rounding |
850 | * xtime.tv_nsec isn't larger then NSEC_PER_SEC | ||
851 | */ | ||
852 | if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) { | ||
853 | xtime.tv_nsec -= NSEC_PER_SEC; | ||
854 | xtime.tv_sec++; | ||
855 | second_overflow(); | ||
856 | } | ||
860 | 857 | ||
861 | /* check to see if there is a new clocksource to use */ | 858 | /* check to see if there is a new clocksource to use */ |
862 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); | 859 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); |
@@ -896,13 +893,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased); | |||
896 | 893 | ||
897 | unsigned long get_seconds(void) | 894 | unsigned long get_seconds(void) |
898 | { | 895 | { |
899 | return xtime_cache.tv_sec; | 896 | return xtime.tv_sec; |
900 | } | 897 | } |
901 | EXPORT_SYMBOL(get_seconds); | 898 | EXPORT_SYMBOL(get_seconds); |
902 | 899 | ||
903 | struct timespec __current_kernel_time(void) | 900 | struct timespec __current_kernel_time(void) |
904 | { | 901 | { |
905 | return xtime_cache; | 902 | return xtime; |
906 | } | 903 | } |
907 | 904 | ||
908 | struct timespec current_kernel_time(void) | 905 | struct timespec current_kernel_time(void) |
@@ -913,7 +910,7 @@ struct timespec current_kernel_time(void) | |||
913 | do { | 910 | do { |
914 | seq = read_seqbegin(&xtime_lock); | 911 | seq = read_seqbegin(&xtime_lock); |
915 | 912 | ||
916 | now = xtime_cache; | 913 | now = xtime; |
917 | } while (read_seqretry(&xtime_lock, seq)); | 914 | } while (read_seqretry(&xtime_lock, seq)); |
918 | 915 | ||
919 | return now; | 916 | return now; |
@@ -928,7 +925,7 @@ struct timespec get_monotonic_coarse(void) | |||
928 | do { | 925 | do { |
929 | seq = read_seqbegin(&xtime_lock); | 926 | seq = read_seqbegin(&xtime_lock); |
930 | 927 | ||
931 | now = xtime_cache; | 928 | now = xtime; |
932 | mono = wall_to_monotonic; | 929 | mono = wall_to_monotonic; |
933 | } while (read_seqretry(&xtime_lock, seq)); | 930 | } while (read_seqretry(&xtime_lock, seq)); |
934 | 931 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index aeb6a54f2771..9199f3c52215 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -319,6 +319,24 @@ unsigned long round_jiffies_up_relative(unsigned long j) | |||
319 | } | 319 | } |
320 | EXPORT_SYMBOL_GPL(round_jiffies_up_relative); | 320 | EXPORT_SYMBOL_GPL(round_jiffies_up_relative); |
321 | 321 | ||
322 | /** | ||
323 | * set_timer_slack - set the allowed slack for a timer | ||
324 | * @slack_hz: the amount of time (in jiffies) allowed for rounding | ||
325 | * | ||
326 | * Set the amount of time, in jiffies, that a certain timer has | ||
327 | * in terms of slack. By setting this value, the timer subsystem | ||
328 | * will schedule the actual timer somewhere between | ||
329 | * the time mod_timer() asks for, and that time plus the slack. | ||
330 | * | ||
331 | * By setting the slack to -1, a percentage of the delay is used | ||
332 | * instead. | ||
333 | */ | ||
334 | void set_timer_slack(struct timer_list *timer, int slack_hz) | ||
335 | { | ||
336 | timer->slack = slack_hz; | ||
337 | } | ||
338 | EXPORT_SYMBOL_GPL(set_timer_slack); | ||
339 | |||
322 | 340 | ||
323 | static inline void set_running_timer(struct tvec_base *base, | 341 | static inline void set_running_timer(struct tvec_base *base, |
324 | struct timer_list *timer) | 342 | struct timer_list *timer) |
@@ -550,6 +568,7 @@ static void __init_timer(struct timer_list *timer, | |||
550 | { | 568 | { |
551 | timer->entry.next = NULL; | 569 | timer->entry.next = NULL; |
552 | timer->base = __raw_get_cpu_var(tvec_bases); | 570 | timer->base = __raw_get_cpu_var(tvec_bases); |
571 | timer->slack = -1; | ||
553 | #ifdef CONFIG_TIMER_STATS | 572 | #ifdef CONFIG_TIMER_STATS |
554 | timer->start_site = NULL; | 573 | timer->start_site = NULL; |
555 | timer->start_pid = -1; | 574 | timer->start_pid = -1; |
@@ -715,6 +734,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) | |||
715 | } | 734 | } |
716 | EXPORT_SYMBOL(mod_timer_pending); | 735 | EXPORT_SYMBOL(mod_timer_pending); |
717 | 736 | ||
737 | /* | ||
738 | * Decide where to put the timer while taking the slack into account | ||
739 | * | ||
740 | * Algorithm: | ||
741 | * 1) calculate the maximum (absolute) time | ||
742 | * 2) calculate the highest bit where the expires and new max are different | ||
743 | * 3) use this bit to make a mask | ||
744 | * 4) use the bitmask to round down the maximum time, so that all last | ||
745 | * bits are zeros | ||
746 | */ | ||
747 | static inline | ||
748 | unsigned long apply_slack(struct timer_list *timer, unsigned long expires) | ||
749 | { | ||
750 | unsigned long expires_limit, mask; | ||
751 | int bit; | ||
752 | |||
753 | expires_limit = expires + timer->slack; | ||
754 | |||
755 | if (timer->slack < 0) /* auto slack: use 0.4% */ | ||
756 | expires_limit = expires + (expires - jiffies)/256; | ||
757 | |||
758 | mask = expires ^ expires_limit; | ||
759 | |||
760 | if (mask == 0) | ||
761 | return expires; | ||
762 | |||
763 | bit = find_last_bit(&mask, BITS_PER_LONG); | ||
764 | |||
765 | mask = (1 << bit) - 1; | ||
766 | |||
767 | expires_limit = expires_limit & ~(mask); | ||
768 | |||
769 | return expires_limit; | ||
770 | } | ||
771 | |||
718 | /** | 772 | /** |
719 | * mod_timer - modify a timer's timeout | 773 | * mod_timer - modify a timer's timeout |
720 | * @timer: the timer to be modified | 774 | * @timer: the timer to be modified |
@@ -745,6 +799,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
745 | if (timer_pending(timer) && timer->expires == expires) | 799 | if (timer_pending(timer) && timer->expires == expires) |
746 | return 1; | 800 | return 1; |
747 | 801 | ||
802 | expires = apply_slack(timer, expires); | ||
803 | |||
748 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); | 804 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); |
749 | } | 805 | } |
750 | EXPORT_SYMBOL(mod_timer); | 806 | EXPORT_SYMBOL(mod_timer); |
@@ -955,6 +1011,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) | |||
955 | return index; | 1011 | return index; |
956 | } | 1012 | } |
957 | 1013 | ||
1014 | static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), | ||
1015 | unsigned long data) | ||
1016 | { | ||
1017 | int preempt_count = preempt_count(); | ||
1018 | |||
1019 | #ifdef CONFIG_LOCKDEP | ||
1020 | /* | ||
1021 | * It is permissible to free the timer from inside the | ||
1022 | * function that is called from it, this we need to take into | ||
1023 | * account for lockdep too. To avoid bogus "held lock freed" | ||
1024 | * warnings as well as problems when looking into | ||
1025 | * timer->lockdep_map, make a copy and use that here. | ||
1026 | */ | ||
1027 | struct lockdep_map lockdep_map = timer->lockdep_map; | ||
1028 | #endif | ||
1029 | /* | ||
1030 | * Couple the lock chain with the lock chain at | ||
1031 | * del_timer_sync() by acquiring the lock_map around the fn() | ||
1032 | * call here and in del_timer_sync(). | ||
1033 | */ | ||
1034 | lock_map_acquire(&lockdep_map); | ||
1035 | |||
1036 | trace_timer_expire_entry(timer); | ||
1037 | fn(data); | ||
1038 | trace_timer_expire_exit(timer); | ||
1039 | |||
1040 | lock_map_release(&lockdep_map); | ||
1041 | |||
1042 | if (preempt_count != preempt_count()) { | ||
1043 | WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", | ||
1044 | fn, preempt_count, preempt_count()); | ||
1045 | /* | ||
1046 | * Restore the preempt count. That gives us a decent | ||
1047 | * chance to survive and extract information. If the | ||
1048 | * callback kept a lock held, bad luck, but not worse | ||
1049 | * than the BUG() we had. | ||
1050 | */ | ||
1051 | preempt_count() = preempt_count; | ||
1052 | } | ||
1053 | } | ||
1054 | |||
958 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) | 1055 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) |
959 | 1056 | ||
960 | /** | 1057 | /** |
@@ -998,45 +1095,7 @@ static inline void __run_timers(struct tvec_base *base) | |||
998 | detach_timer(timer, 1); | 1095 | detach_timer(timer, 1); |
999 | 1096 | ||
1000 | spin_unlock_irq(&base->lock); | 1097 | spin_unlock_irq(&base->lock); |
1001 | { | 1098 | call_timer_fn(timer, fn, data); |
1002 | int preempt_count = preempt_count(); | ||
1003 | |||
1004 | #ifdef CONFIG_LOCKDEP | ||
1005 | /* | ||
1006 | * It is permissible to free the timer from | ||
1007 | * inside the function that is called from | ||
1008 | * it, this we need to take into account for | ||
1009 | * lockdep too. To avoid bogus "held lock | ||
1010 | * freed" warnings as well as problems when | ||
1011 | * looking into timer->lockdep_map, make a | ||
1012 | * copy and use that here. | ||
1013 | */ | ||
1014 | struct lockdep_map lockdep_map = | ||
1015 | timer->lockdep_map; | ||
1016 | #endif | ||
1017 | /* | ||
1018 | * Couple the lock chain with the lock chain at | ||
1019 | * del_timer_sync() by acquiring the lock_map | ||
1020 | * around the fn() call here and in | ||
1021 | * del_timer_sync(). | ||
1022 | */ | ||
1023 | lock_map_acquire(&lockdep_map); | ||
1024 | |||
1025 | trace_timer_expire_entry(timer); | ||
1026 | fn(data); | ||
1027 | trace_timer_expire_exit(timer); | ||
1028 | |||
1029 | lock_map_release(&lockdep_map); | ||
1030 | |||
1031 | if (preempt_count != preempt_count()) { | ||
1032 | printk(KERN_ERR "huh, entered %p " | ||
1033 | "with preempt_count %08x, exited" | ||
1034 | " with %08x?\n", | ||
1035 | fn, preempt_count, | ||
1036 | preempt_count()); | ||
1037 | BUG(); | ||
1038 | } | ||
1039 | } | ||
1040 | spin_lock_irq(&base->lock); | 1099 | spin_lock_irq(&base->lock); |
1041 | } | 1100 | } |
1042 | } | 1101 | } |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 2404c129a8c9..ab13d7008061 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -209,6 +209,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c) | |||
209 | 209 | ||
210 | return 1; | 210 | return 1; |
211 | } | 211 | } |
212 | EXPORT_SYMBOL(trace_seq_putc); | ||
212 | 213 | ||
213 | int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) | 214 | int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) |
214 | { | 215 | { |
@@ -355,6 +356,21 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, | |||
355 | } | 356 | } |
356 | EXPORT_SYMBOL(ftrace_print_symbols_seq); | 357 | EXPORT_SYMBOL(ftrace_print_symbols_seq); |
357 | 358 | ||
359 | const char * | ||
360 | ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) | ||
361 | { | ||
362 | int i; | ||
363 | const char *ret = p->buffer + p->len; | ||
364 | |||
365 | for (i = 0; i < buf_len; i++) | ||
366 | trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]); | ||
367 | |||
368 | trace_seq_putc(p, 0); | ||
369 | |||
370 | return ret; | ||
371 | } | ||
372 | EXPORT_SYMBOL(ftrace_print_hex_seq); | ||
373 | |||
358 | #ifdef CONFIG_KRETPROBES | 374 | #ifdef CONFIG_KRETPROBES |
359 | static inline const char *kretprobed(const char *name) | 375 | static inline const char *kretprobed(const char *name) |
360 | { | 376 | { |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 076c7c8215b0..b2d70d38dff4 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -54,8 +54,8 @@ int create_user_ns(struct cred *new) | |||
54 | #endif | 54 | #endif |
55 | /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ | 55 | /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ |
56 | 56 | ||
57 | /* alloc_uid() incremented the userns refcount. Just set it to 1 */ | 57 | /* root_user holds a reference to ns, our reference can be dropped */ |
58 | kref_set(&ns->kref, 1); | 58 | put_user_ns(ns); |
59 | 59 | ||
60 | return 0; | 60 | return 0; |
61 | } | 61 | } |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5bfb213984b2..77dabbf64b8f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -229,6 +229,16 @@ static inline void set_wq_data(struct work_struct *work, | |||
229 | atomic_long_set(&work->data, new); | 229 | atomic_long_set(&work->data, new); |
230 | } | 230 | } |
231 | 231 | ||
232 | /* | ||
233 | * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued. | ||
234 | */ | ||
235 | static inline void clear_wq_data(struct work_struct *work) | ||
236 | { | ||
237 | unsigned long flags = *work_data_bits(work) & | ||
238 | (1UL << WORK_STRUCT_STATIC); | ||
239 | atomic_long_set(&work->data, flags); | ||
240 | } | ||
241 | |||
232 | static inline | 242 | static inline |
233 | struct cpu_workqueue_struct *get_wq_data(struct work_struct *work) | 243 | struct cpu_workqueue_struct *get_wq_data(struct work_struct *work) |
234 | { | 244 | { |
@@ -671,7 +681,7 @@ static int __cancel_work_timer(struct work_struct *work, | |||
671 | wait_on_work(work); | 681 | wait_on_work(work); |
672 | } while (unlikely(ret < 0)); | 682 | } while (unlikely(ret < 0)); |
673 | 683 | ||
674 | work_clear_pending(work); | 684 | clear_wq_data(work); |
675 | return ret; | 685 | return ret; |
676 | } | 686 | } |
677 | 687 | ||
@@ -845,6 +855,30 @@ int schedule_on_each_cpu(work_func_t func) | |||
845 | return 0; | 855 | return 0; |
846 | } | 856 | } |
847 | 857 | ||
858 | /** | ||
859 | * flush_scheduled_work - ensure that any scheduled work has run to completion. | ||
860 | * | ||
861 | * Forces execution of the kernel-global workqueue and blocks until its | ||
862 | * completion. | ||
863 | * | ||
864 | * Think twice before calling this function! It's very easy to get into | ||
865 | * trouble if you don't take great care. Either of the following situations | ||
866 | * will lead to deadlock: | ||
867 | * | ||
868 | * One of the work items currently on the workqueue needs to acquire | ||
869 | * a lock held by your code or its caller. | ||
870 | * | ||
871 | * Your code is running in the context of a work routine. | ||
872 | * | ||
873 | * They will be detected by lockdep when they occur, but the first might not | ||
874 | * occur very often. It depends on what work items are on the workqueue and | ||
875 | * what locks they need, which you have no control over. | ||
876 | * | ||
877 | * In most situations flushing the entire workqueue is overkill; you merely | ||
878 | * need to know that a particular work item isn't queued and isn't running. | ||
879 | * In such cases you should use cancel_delayed_work_sync() or | ||
880 | * cancel_work_sync() instead. | ||
881 | */ | ||
848 | void flush_scheduled_work(void) | 882 | void flush_scheduled_work(void) |
849 | { | 883 | { |
850 | flush_workqueue(keventd_wq); | 884 | flush_workqueue(keventd_wq); |