diff options
author | Anton Altaparmakov <aia21@cantab.net> | 2005-06-25 09:27:27 -0400 |
---|---|---|
committer | Anton Altaparmakov <aia21@cantab.net> | 2005-06-25 09:27:27 -0400 |
commit | 38b22b6e9f46ab8f73ef5734f0e0a000766a9258 (patch) | |
tree | 2ccc41ef55918d3af43e444bde7648562a031559 /kernel | |
parent | 3357d4c75f1fb67e7304998c4ad4e9a9fed66fa4 (diff) | |
parent | b3e112bcc19abd8e9657dca34a87316786e096f3 (diff) |
Automerge with /usr/src/ntfs-2.6.git.
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Kconfig.hz | 46 | ||||
-rw-r--r-- | kernel/cpuset.c | 8 | ||||
-rw-r--r-- | kernel/exit.c | 16 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 2 | ||||
-rw-r--r-- | kernel/kmod.c | 17 | ||||
-rw-r--r-- | kernel/kprobes.c | 288 | ||||
-rw-r--r-- | kernel/module.c | 97 | ||||
-rw-r--r-- | kernel/posix-timers.c | 34 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 2 | ||||
-rw-r--r-- | kernel/printk.c | 12 | ||||
-rw-r--r-- | kernel/sched.c | 4 | ||||
-rw-r--r-- | kernel/signal.c | 1 | ||||
-rw-r--r-- | kernel/sys.c | 110 | ||||
-rw-r--r-- | kernel/sysctl.c | 9 | ||||
-rw-r--r-- | kernel/timer.c | 351 |
15 files changed, 718 insertions, 279 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz new file mode 100644 index 000000000000..248e1c396f8b --- /dev/null +++ b/kernel/Kconfig.hz | |||
@@ -0,0 +1,46 @@ | |||
1 | # | ||
2 | # Timer Interrupt Frequency Configuration | ||
3 | # | ||
4 | |||
5 | choice | ||
6 | prompt "Timer frequency" | ||
7 | default HZ_250 | ||
8 | help | ||
9 | Allows the configuration of the timer frequency. It is customary | ||
10 | to have the timer interrupt run at 1000 HZ but 100 HZ may be more | ||
11 | beneficial for servers and NUMA systems that do not need to have | ||
12 | a fast response for user interaction and that may experience bus | ||
13 | contention and cacheline bounces as a result of timer interrupts. | ||
14 | Note that the timer interrupt occurs on each processor in an SMP | ||
15 | environment leading to NR_CPUS * HZ number of timer interrupts | ||
16 | per second. | ||
17 | |||
18 | |||
19 | config HZ_100 | ||
20 | bool "100 HZ" | ||
21 | help | ||
22 | 100 HZ is a typical choice for servers, SMP and NUMA systems | ||
23 | with lots of processors that may show reduced performance if | ||
24 | too many timer interrupts are occurring. | ||
25 | |||
26 | config HZ_250 | ||
27 | bool "250 HZ" | ||
28 | help | ||
29 | 250 HZ is a good compromise choice allowing server performance | ||
30 | while also showing good interactive responsiveness even | ||
31 | on SMP and NUMA systems. | ||
32 | |||
33 | config HZ_1000 | ||
34 | bool "1000 HZ" | ||
35 | help | ||
36 | 1000 HZ is the preferred choice for desktop systems and other | ||
37 | systems requiring fast interactive responses to events. | ||
38 | |||
39 | endchoice | ||
40 | |||
41 | config HZ | ||
42 | int | ||
43 | default 100 if HZ_100 | ||
44 | default 250 if HZ_250 | ||
45 | default 1000 if HZ_1000 | ||
46 | |||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 00e8f2575512..79dd929f4084 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -228,13 +228,7 @@ static struct dentry_operations cpuset_dops = { | |||
228 | 228 | ||
229 | static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name) | 229 | static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name) |
230 | { | 230 | { |
231 | struct qstr qstr; | 231 | struct dentry *d = lookup_one_len(name, parent, strlen(name)); |
232 | struct dentry *d; | ||
233 | |||
234 | qstr.name = name; | ||
235 | qstr.len = strlen(name); | ||
236 | qstr.hash = full_name_hash(name, qstr.len); | ||
237 | d = lookup_hash(&qstr, parent); | ||
238 | if (!IS_ERR(d)) | 232 | if (!IS_ERR(d)) |
239 | d->d_op = &cpuset_dops; | 233 | d->d_op = &cpuset_dops; |
240 | return d; | 234 | return d; |
diff --git a/kernel/exit.c b/kernel/exit.c index 2ef2ad540201..3ebcd60a19c6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -72,6 +72,11 @@ repeat: | |||
72 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); | 72 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); |
73 | __exit_signal(p); | 73 | __exit_signal(p); |
74 | __exit_sighand(p); | 74 | __exit_sighand(p); |
75 | /* | ||
76 | * Note that the fastpath in sys_times depends on __exit_signal having | ||
77 | * updated the counters before a task is removed from the tasklist of | ||
78 | * the process by __unhash_process. | ||
79 | */ | ||
75 | __unhash_process(p); | 80 | __unhash_process(p); |
76 | 81 | ||
77 | /* | 82 | /* |
@@ -793,6 +798,17 @@ fastcall NORET_TYPE void do_exit(long code) | |||
793 | ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP); | 798 | ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP); |
794 | } | 799 | } |
795 | 800 | ||
801 | /* | ||
802 | * We're taking recursive faults here in do_exit. Safest is to just | ||
803 | * leave this task alone and wait for reboot. | ||
804 | */ | ||
805 | if (unlikely(tsk->flags & PF_EXITING)) { | ||
806 | printk(KERN_ALERT | ||
807 | "Fixing recursive fault but reboot is needed!\n"); | ||
808 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
809 | schedule(); | ||
810 | } | ||
811 | |||
796 | tsk->flags |= PF_EXITING; | 812 | tsk->flags |= PF_EXITING; |
797 | 813 | ||
798 | /* | 814 | /* |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index f6297c306905..ba039e827d58 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -45,7 +45,7 @@ __report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret) | |||
45 | } | 45 | } |
46 | } | 46 | } |
47 | 47 | ||
48 | void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret) | 48 | static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret) |
49 | { | 49 | { |
50 | static int count = 100; | 50 | static int count = 100; |
51 | 51 | ||
diff --git a/kernel/kmod.c b/kernel/kmod.c index eed53d4f5230..44166e3bb8af 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -120,6 +120,7 @@ struct subprocess_info { | |||
120 | char *path; | 120 | char *path; |
121 | char **argv; | 121 | char **argv; |
122 | char **envp; | 122 | char **envp; |
123 | struct key *ring; | ||
123 | int wait; | 124 | int wait; |
124 | int retval; | 125 | int retval; |
125 | }; | 126 | }; |
@@ -130,16 +131,21 @@ struct subprocess_info { | |||
130 | static int ____call_usermodehelper(void *data) | 131 | static int ____call_usermodehelper(void *data) |
131 | { | 132 | { |
132 | struct subprocess_info *sub_info = data; | 133 | struct subprocess_info *sub_info = data; |
134 | struct key *old_session; | ||
133 | int retval; | 135 | int retval; |
134 | 136 | ||
135 | /* Unblock all signals. */ | 137 | /* Unblock all signals and set the session keyring. */ |
138 | key_get(sub_info->ring); | ||
136 | flush_signals(current); | 139 | flush_signals(current); |
137 | spin_lock_irq(¤t->sighand->siglock); | 140 | spin_lock_irq(¤t->sighand->siglock); |
141 | old_session = __install_session_keyring(current, sub_info->ring); | ||
138 | flush_signal_handlers(current, 1); | 142 | flush_signal_handlers(current, 1); |
139 | sigemptyset(¤t->blocked); | 143 | sigemptyset(¤t->blocked); |
140 | recalc_sigpending(); | 144 | recalc_sigpending(); |
141 | spin_unlock_irq(¤t->sighand->siglock); | 145 | spin_unlock_irq(¤t->sighand->siglock); |
142 | 146 | ||
147 | key_put(old_session); | ||
148 | |||
143 | /* We can run anywhere, unlike our parent keventd(). */ | 149 | /* We can run anywhere, unlike our parent keventd(). */ |
144 | set_cpus_allowed(current, CPU_MASK_ALL); | 150 | set_cpus_allowed(current, CPU_MASK_ALL); |
145 | 151 | ||
@@ -211,10 +217,11 @@ static void __call_usermodehelper(void *data) | |||
211 | } | 217 | } |
212 | 218 | ||
213 | /** | 219 | /** |
214 | * call_usermodehelper - start a usermode application | 220 | * call_usermodehelper_keys - start a usermode application |
215 | * @path: pathname for the application | 221 | * @path: pathname for the application |
216 | * @argv: null-terminated argument list | 222 | * @argv: null-terminated argument list |
217 | * @envp: null-terminated environment list | 223 | * @envp: null-terminated environment list |
224 | * @session_keyring: session keyring for process (NULL for an empty keyring) | ||
218 | * @wait: wait for the application to finish and return status. | 225 | * @wait: wait for the application to finish and return status. |
219 | * | 226 | * |
220 | * Runs a user-space application. The application is started | 227 | * Runs a user-space application. The application is started |
@@ -224,7 +231,8 @@ static void __call_usermodehelper(void *data) | |||
224 | * Must be called from process context. Returns a negative error code | 231 | * Must be called from process context. Returns a negative error code |
225 | * if program was not execed successfully, or 0. | 232 | * if program was not execed successfully, or 0. |
226 | */ | 233 | */ |
227 | int call_usermodehelper(char *path, char **argv, char **envp, int wait) | 234 | int call_usermodehelper_keys(char *path, char **argv, char **envp, |
235 | struct key *session_keyring, int wait) | ||
228 | { | 236 | { |
229 | DECLARE_COMPLETION(done); | 237 | DECLARE_COMPLETION(done); |
230 | struct subprocess_info sub_info = { | 238 | struct subprocess_info sub_info = { |
@@ -232,6 +240,7 @@ int call_usermodehelper(char *path, char **argv, char **envp, int wait) | |||
232 | .path = path, | 240 | .path = path, |
233 | .argv = argv, | 241 | .argv = argv, |
234 | .envp = envp, | 242 | .envp = envp, |
243 | .ring = session_keyring, | ||
235 | .wait = wait, | 244 | .wait = wait, |
236 | .retval = 0, | 245 | .retval = 0, |
237 | }; | 246 | }; |
@@ -247,7 +256,7 @@ int call_usermodehelper(char *path, char **argv, char **envp, int wait) | |||
247 | wait_for_completion(&done); | 256 | wait_for_completion(&done); |
248 | return sub_info.retval; | 257 | return sub_info.retval; |
249 | } | 258 | } |
250 | EXPORT_SYMBOL(call_usermodehelper); | 259 | EXPORT_SYMBOL(call_usermodehelper_keys); |
251 | 260 | ||
252 | void __init usermodehelper_init(void) | 261 | void __init usermodehelper_init(void) |
253 | { | 262 | { |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 037142b72a49..334f37472c56 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -27,6 +27,9 @@ | |||
27 | * interface to access function arguments. | 27 | * interface to access function arguments. |
28 | * 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes | 28 | * 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes |
29 | * exceptions notifier to be first on the priority list. | 29 | * exceptions notifier to be first on the priority list. |
30 | * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston | ||
31 | * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi | ||
32 | * <prasanna@in.ibm.com> added function-return probes. | ||
30 | */ | 33 | */ |
31 | #include <linux/kprobes.h> | 34 | #include <linux/kprobes.h> |
32 | #include <linux/spinlock.h> | 35 | #include <linux/spinlock.h> |
@@ -41,6 +44,7 @@ | |||
41 | #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) | 44 | #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) |
42 | 45 | ||
43 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; | 46 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; |
47 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | ||
44 | 48 | ||
45 | unsigned int kprobe_cpu = NR_CPUS; | 49 | unsigned int kprobe_cpu = NR_CPUS; |
46 | static DEFINE_SPINLOCK(kprobe_lock); | 50 | static DEFINE_SPINLOCK(kprobe_lock); |
@@ -78,22 +82,23 @@ struct kprobe *get_kprobe(void *addr) | |||
78 | * Aggregate handlers for multiple kprobes support - these handlers | 82 | * Aggregate handlers for multiple kprobes support - these handlers |
79 | * take care of invoking the individual kprobe handlers on p->list | 83 | * take care of invoking the individual kprobe handlers on p->list |
80 | */ | 84 | */ |
81 | int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) | 85 | static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) |
82 | { | 86 | { |
83 | struct kprobe *kp; | 87 | struct kprobe *kp; |
84 | 88 | ||
85 | list_for_each_entry(kp, &p->list, list) { | 89 | list_for_each_entry(kp, &p->list, list) { |
86 | if (kp->pre_handler) { | 90 | if (kp->pre_handler) { |
87 | curr_kprobe = kp; | 91 | curr_kprobe = kp; |
88 | kp->pre_handler(kp, regs); | 92 | if (kp->pre_handler(kp, regs)) |
89 | curr_kprobe = NULL; | 93 | return 1; |
90 | } | 94 | } |
95 | curr_kprobe = NULL; | ||
91 | } | 96 | } |
92 | return 0; | 97 | return 0; |
93 | } | 98 | } |
94 | 99 | ||
95 | void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | 100 | static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, |
96 | unsigned long flags) | 101 | unsigned long flags) |
97 | { | 102 | { |
98 | struct kprobe *kp; | 103 | struct kprobe *kp; |
99 | 104 | ||
@@ -107,7 +112,8 @@ void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | |||
107 | return; | 112 | return; |
108 | } | 113 | } |
109 | 114 | ||
110 | int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr) | 115 | static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, |
116 | int trapnr) | ||
111 | { | 117 | { |
112 | /* | 118 | /* |
113 | * if we faulted "during" the execution of a user specified | 119 | * if we faulted "during" the execution of a user specified |
@@ -120,19 +126,191 @@ int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr) | |||
120 | return 0; | 126 | return 0; |
121 | } | 127 | } |
122 | 128 | ||
129 | static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) | ||
130 | { | ||
131 | struct kprobe *kp = curr_kprobe; | ||
132 | if (curr_kprobe && kp->break_handler) { | ||
133 | if (kp->break_handler(kp, regs)) { | ||
134 | curr_kprobe = NULL; | ||
135 | return 1; | ||
136 | } | ||
137 | } | ||
138 | curr_kprobe = NULL; | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | struct kprobe trampoline_p = { | ||
143 | .addr = (kprobe_opcode_t *) &kretprobe_trampoline, | ||
144 | .pre_handler = trampoline_probe_handler, | ||
145 | .post_handler = trampoline_post_handler | ||
146 | }; | ||
147 | |||
148 | struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp) | ||
149 | { | ||
150 | struct hlist_node *node; | ||
151 | struct kretprobe_instance *ri; | ||
152 | hlist_for_each_entry(ri, node, &rp->free_instances, uflist) | ||
153 | return ri; | ||
154 | return NULL; | ||
155 | } | ||
156 | |||
157 | static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp) | ||
158 | { | ||
159 | struct hlist_node *node; | ||
160 | struct kretprobe_instance *ri; | ||
161 | hlist_for_each_entry(ri, node, &rp->used_instances, uflist) | ||
162 | return ri; | ||
163 | return NULL; | ||
164 | } | ||
165 | |||
166 | struct kretprobe_instance *get_rp_inst(void *sara) | ||
167 | { | ||
168 | struct hlist_head *head; | ||
169 | struct hlist_node *node; | ||
170 | struct task_struct *tsk; | ||
171 | struct kretprobe_instance *ri; | ||
172 | |||
173 | tsk = arch_get_kprobe_task(sara); | ||
174 | head = &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; | ||
175 | hlist_for_each_entry(ri, node, head, hlist) { | ||
176 | if (ri->stack_addr == sara) | ||
177 | return ri; | ||
178 | } | ||
179 | return NULL; | ||
180 | } | ||
181 | |||
182 | void add_rp_inst(struct kretprobe_instance *ri) | ||
183 | { | ||
184 | struct task_struct *tsk; | ||
185 | /* | ||
186 | * Remove rp inst off the free list - | ||
187 | * Add it back when probed function returns | ||
188 | */ | ||
189 | hlist_del(&ri->uflist); | ||
190 | tsk = arch_get_kprobe_task(ri->stack_addr); | ||
191 | /* Add rp inst onto table */ | ||
192 | INIT_HLIST_NODE(&ri->hlist); | ||
193 | hlist_add_head(&ri->hlist, | ||
194 | &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]); | ||
195 | |||
196 | /* Also add this rp inst to the used list. */ | ||
197 | INIT_HLIST_NODE(&ri->uflist); | ||
198 | hlist_add_head(&ri->uflist, &ri->rp->used_instances); | ||
199 | } | ||
200 | |||
201 | void recycle_rp_inst(struct kretprobe_instance *ri) | ||
202 | { | ||
203 | /* remove rp inst off the rprobe_inst_table */ | ||
204 | hlist_del(&ri->hlist); | ||
205 | if (ri->rp) { | ||
206 | /* remove rp inst off the used list */ | ||
207 | hlist_del(&ri->uflist); | ||
208 | /* put rp inst back onto the free list */ | ||
209 | INIT_HLIST_NODE(&ri->uflist); | ||
210 | hlist_add_head(&ri->uflist, &ri->rp->free_instances); | ||
211 | } else | ||
212 | /* Unregistering */ | ||
213 | kfree(ri); | ||
214 | } | ||
215 | |||
216 | struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk) | ||
217 | { | ||
218 | return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; | ||
219 | } | ||
220 | |||
221 | struct kretprobe_instance *get_rp_inst_tsk(struct task_struct *tk) | ||
222 | { | ||
223 | struct task_struct *tsk; | ||
224 | struct hlist_head *head; | ||
225 | struct hlist_node *node; | ||
226 | struct kretprobe_instance *ri; | ||
227 | |||
228 | head = &kretprobe_inst_table[hash_ptr(tk, KPROBE_HASH_BITS)]; | ||
229 | |||
230 | hlist_for_each_entry(ri, node, head, hlist) { | ||
231 | tsk = arch_get_kprobe_task(ri->stack_addr); | ||
232 | if (tsk == tk) | ||
233 | return ri; | ||
234 | } | ||
235 | return NULL; | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * This function is called from do_exit or do_execv when task tk's stack is | ||
240 | * about to be recycled. Recycle any function-return probe instances | ||
241 | * associated with this task. These represent probed functions that have | ||
242 | * been called but may never return. | ||
243 | */ | ||
244 | void kprobe_flush_task(struct task_struct *tk) | ||
245 | { | ||
246 | unsigned long flags = 0; | ||
247 | spin_lock_irqsave(&kprobe_lock, flags); | ||
248 | arch_kprobe_flush_task(tk); | ||
249 | spin_unlock_irqrestore(&kprobe_lock, flags); | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * This kprobe pre_handler is registered with every kretprobe. When probe | ||
254 | * hits it will set up the return probe. | ||
255 | */ | ||
256 | static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) | ||
257 | { | ||
258 | struct kretprobe *rp = container_of(p, struct kretprobe, kp); | ||
259 | |||
260 | /*TODO: consider to only swap the RA after the last pre_handler fired */ | ||
261 | arch_prepare_kretprobe(rp, regs); | ||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | static inline void free_rp_inst(struct kretprobe *rp) | ||
266 | { | ||
267 | struct kretprobe_instance *ri; | ||
268 | while ((ri = get_free_rp_inst(rp)) != NULL) { | ||
269 | hlist_del(&ri->uflist); | ||
270 | kfree(ri); | ||
271 | } | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Keep all fields in the kprobe consistent | ||
276 | */ | ||
277 | static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) | ||
278 | { | ||
279 | memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); | ||
280 | memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); | ||
281 | } | ||
282 | |||
283 | /* | ||
284 | * Add the new probe to old_p->list. Fail if this is the | ||
285 | * second jprobe at the address - two jprobes can't coexist | ||
286 | */ | ||
287 | static int add_new_kprobe(struct kprobe *old_p, struct kprobe *p) | ||
288 | { | ||
289 | struct kprobe *kp; | ||
290 | |||
291 | if (p->break_handler) { | ||
292 | list_for_each_entry(kp, &old_p->list, list) { | ||
293 | if (kp->break_handler) | ||
294 | return -EEXIST; | ||
295 | } | ||
296 | list_add_tail(&p->list, &old_p->list); | ||
297 | } else | ||
298 | list_add(&p->list, &old_p->list); | ||
299 | return 0; | ||
300 | } | ||
301 | |||
123 | /* | 302 | /* |
124 | * Fill in the required fields of the "manager kprobe". Replace the | 303 | * Fill in the required fields of the "manager kprobe". Replace the |
125 | * earlier kprobe in the hlist with the manager kprobe | 304 | * earlier kprobe in the hlist with the manager kprobe |
126 | */ | 305 | */ |
127 | static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | 306 | static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) |
128 | { | 307 | { |
308 | copy_kprobe(p, ap); | ||
129 | ap->addr = p->addr; | 309 | ap->addr = p->addr; |
130 | ap->opcode = p->opcode; | ||
131 | memcpy(&ap->ainsn, &p->ainsn, sizeof(struct arch_specific_insn)); | ||
132 | |||
133 | ap->pre_handler = aggr_pre_handler; | 310 | ap->pre_handler = aggr_pre_handler; |
134 | ap->post_handler = aggr_post_handler; | 311 | ap->post_handler = aggr_post_handler; |
135 | ap->fault_handler = aggr_fault_handler; | 312 | ap->fault_handler = aggr_fault_handler; |
313 | ap->break_handler = aggr_break_handler; | ||
136 | 314 | ||
137 | INIT_LIST_HEAD(&ap->list); | 315 | INIT_LIST_HEAD(&ap->list); |
138 | list_add(&p->list, &ap->list); | 316 | list_add(&p->list, &ap->list); |
@@ -153,16 +331,16 @@ static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) | |||
153 | int ret = 0; | 331 | int ret = 0; |
154 | struct kprobe *ap; | 332 | struct kprobe *ap; |
155 | 333 | ||
156 | if (old_p->break_handler || p->break_handler) { | 334 | if (old_p->pre_handler == aggr_pre_handler) { |
157 | ret = -EEXIST; /* kprobe and jprobe can't (yet) coexist */ | 335 | copy_kprobe(old_p, p); |
158 | } else if (old_p->pre_handler == aggr_pre_handler) { | 336 | ret = add_new_kprobe(old_p, p); |
159 | list_add(&p->list, &old_p->list); | ||
160 | } else { | 337 | } else { |
161 | ap = kcalloc(1, sizeof(struct kprobe), GFP_ATOMIC); | 338 | ap = kcalloc(1, sizeof(struct kprobe), GFP_ATOMIC); |
162 | if (!ap) | 339 | if (!ap) |
163 | return -ENOMEM; | 340 | return -ENOMEM; |
164 | add_aggr_kprobe(ap, old_p); | 341 | add_aggr_kprobe(ap, old_p); |
165 | list_add(&p->list, &ap->list); | 342 | copy_kprobe(ap, p); |
343 | ret = add_new_kprobe(ap, p); | ||
166 | } | 344 | } |
167 | return ret; | 345 | return ret; |
168 | } | 346 | } |
@@ -170,10 +348,8 @@ static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) | |||
170 | /* kprobe removal house-keeping routines */ | 348 | /* kprobe removal house-keeping routines */ |
171 | static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) | 349 | static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) |
172 | { | 350 | { |
173 | *p->addr = p->opcode; | 351 | arch_disarm_kprobe(p); |
174 | hlist_del(&p->hlist); | 352 | hlist_del(&p->hlist); |
175 | flush_icache_range((unsigned long) p->addr, | ||
176 | (unsigned long) p->addr + sizeof(kprobe_opcode_t)); | ||
177 | spin_unlock_irqrestore(&kprobe_lock, flags); | 353 | spin_unlock_irqrestore(&kprobe_lock, flags); |
178 | arch_remove_kprobe(p); | 354 | arch_remove_kprobe(p); |
179 | } | 355 | } |
@@ -200,6 +376,7 @@ int register_kprobe(struct kprobe *p) | |||
200 | } | 376 | } |
201 | spin_lock_irqsave(&kprobe_lock, flags); | 377 | spin_lock_irqsave(&kprobe_lock, flags); |
202 | old_p = get_kprobe(p->addr); | 378 | old_p = get_kprobe(p->addr); |
379 | p->nmissed = 0; | ||
203 | if (old_p) { | 380 | if (old_p) { |
204 | ret = register_aggr_kprobe(old_p, p); | 381 | ret = register_aggr_kprobe(old_p, p); |
205 | goto out; | 382 | goto out; |
@@ -210,10 +387,8 @@ int register_kprobe(struct kprobe *p) | |||
210 | hlist_add_head(&p->hlist, | 387 | hlist_add_head(&p->hlist, |
211 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); | 388 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); |
212 | 389 | ||
213 | p->opcode = *p->addr; | 390 | arch_arm_kprobe(p); |
214 | *p->addr = BREAKPOINT_INSTRUCTION; | 391 | |
215 | flush_icache_range((unsigned long) p->addr, | ||
216 | (unsigned long) p->addr + sizeof(kprobe_opcode_t)); | ||
217 | out: | 392 | out: |
218 | spin_unlock_irqrestore(&kprobe_lock, flags); | 393 | spin_unlock_irqrestore(&kprobe_lock, flags); |
219 | rm_kprobe: | 394 | rm_kprobe: |
@@ -257,16 +432,82 @@ void unregister_jprobe(struct jprobe *jp) | |||
257 | unregister_kprobe(&jp->kp); | 432 | unregister_kprobe(&jp->kp); |
258 | } | 433 | } |
259 | 434 | ||
435 | #ifdef ARCH_SUPPORTS_KRETPROBES | ||
436 | |||
437 | int register_kretprobe(struct kretprobe *rp) | ||
438 | { | ||
439 | int ret = 0; | ||
440 | struct kretprobe_instance *inst; | ||
441 | int i; | ||
442 | |||
443 | rp->kp.pre_handler = pre_handler_kretprobe; | ||
444 | |||
445 | /* Pre-allocate memory for max kretprobe instances */ | ||
446 | if (rp->maxactive <= 0) { | ||
447 | #ifdef CONFIG_PREEMPT | ||
448 | rp->maxactive = max(10, 2 * NR_CPUS); | ||
449 | #else | ||
450 | rp->maxactive = NR_CPUS; | ||
451 | #endif | ||
452 | } | ||
453 | INIT_HLIST_HEAD(&rp->used_instances); | ||
454 | INIT_HLIST_HEAD(&rp->free_instances); | ||
455 | for (i = 0; i < rp->maxactive; i++) { | ||
456 | inst = kmalloc(sizeof(struct kretprobe_instance), GFP_KERNEL); | ||
457 | if (inst == NULL) { | ||
458 | free_rp_inst(rp); | ||
459 | return -ENOMEM; | ||
460 | } | ||
461 | INIT_HLIST_NODE(&inst->uflist); | ||
462 | hlist_add_head(&inst->uflist, &rp->free_instances); | ||
463 | } | ||
464 | |||
465 | rp->nmissed = 0; | ||
466 | /* Establish function entry probe point */ | ||
467 | if ((ret = register_kprobe(&rp->kp)) != 0) | ||
468 | free_rp_inst(rp); | ||
469 | return ret; | ||
470 | } | ||
471 | |||
472 | #else /* ARCH_SUPPORTS_KRETPROBES */ | ||
473 | |||
474 | int register_kretprobe(struct kretprobe *rp) | ||
475 | { | ||
476 | return -ENOSYS; | ||
477 | } | ||
478 | |||
479 | #endif /* ARCH_SUPPORTS_KRETPROBES */ | ||
480 | |||
481 | void unregister_kretprobe(struct kretprobe *rp) | ||
482 | { | ||
483 | unsigned long flags; | ||
484 | struct kretprobe_instance *ri; | ||
485 | |||
486 | unregister_kprobe(&rp->kp); | ||
487 | /* No race here */ | ||
488 | spin_lock_irqsave(&kprobe_lock, flags); | ||
489 | free_rp_inst(rp); | ||
490 | while ((ri = get_used_rp_inst(rp)) != NULL) { | ||
491 | ri->rp = NULL; | ||
492 | hlist_del(&ri->uflist); | ||
493 | } | ||
494 | spin_unlock_irqrestore(&kprobe_lock, flags); | ||
495 | } | ||
496 | |||
260 | static int __init init_kprobes(void) | 497 | static int __init init_kprobes(void) |
261 | { | 498 | { |
262 | int i, err = 0; | 499 | int i, err = 0; |
263 | 500 | ||
264 | /* FIXME allocate the probe table, currently defined statically */ | 501 | /* FIXME allocate the probe table, currently defined statically */ |
265 | /* initialize all list heads */ | 502 | /* initialize all list heads */ |
266 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) | 503 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
267 | INIT_HLIST_HEAD(&kprobe_table[i]); | 504 | INIT_HLIST_HEAD(&kprobe_table[i]); |
505 | INIT_HLIST_HEAD(&kretprobe_inst_table[i]); | ||
506 | } | ||
268 | 507 | ||
269 | err = register_die_notifier(&kprobe_exceptions_nb); | 508 | err = register_die_notifier(&kprobe_exceptions_nb); |
509 | /* Register the trampoline probe for return probe */ | ||
510 | register_kprobe(&trampoline_p); | ||
270 | return err; | 511 | return err; |
271 | } | 512 | } |
272 | 513 | ||
@@ -277,3 +518,6 @@ EXPORT_SYMBOL_GPL(unregister_kprobe); | |||
277 | EXPORT_SYMBOL_GPL(register_jprobe); | 518 | EXPORT_SYMBOL_GPL(register_jprobe); |
278 | EXPORT_SYMBOL_GPL(unregister_jprobe); | 519 | EXPORT_SYMBOL_GPL(unregister_jprobe); |
279 | EXPORT_SYMBOL_GPL(jprobe_return); | 520 | EXPORT_SYMBOL_GPL(jprobe_return); |
521 | EXPORT_SYMBOL_GPL(register_kretprobe); | ||
522 | EXPORT_SYMBOL_GPL(unregister_kretprobe); | ||
523 | |||
diff --git a/kernel/module.c b/kernel/module.c index a566745dde62..068e271ab3a5 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/notifier.h> | 35 | #include <linux/notifier.h> |
36 | #include <linux/stop_machine.h> | 36 | #include <linux/stop_machine.h> |
37 | #include <linux/device.h> | 37 | #include <linux/device.h> |
38 | #include <linux/string.h> | ||
38 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
39 | #include <asm/semaphore.h> | 40 | #include <asm/semaphore.h> |
40 | #include <asm/cacheflush.h> | 41 | #include <asm/cacheflush.h> |
@@ -370,6 +371,43 @@ static inline void percpu_modcopy(void *pcpudst, const void *src, | |||
370 | #endif /* CONFIG_SMP */ | 371 | #endif /* CONFIG_SMP */ |
371 | 372 | ||
372 | #ifdef CONFIG_MODULE_UNLOAD | 373 | #ifdef CONFIG_MODULE_UNLOAD |
374 | #define MODINFO_ATTR(field) \ | ||
375 | static void setup_modinfo_##field(struct module *mod, const char *s) \ | ||
376 | { \ | ||
377 | mod->field = kstrdup(s, GFP_KERNEL); \ | ||
378 | } \ | ||
379 | static ssize_t show_modinfo_##field(struct module_attribute *mattr, \ | ||
380 | struct module *mod, char *buffer) \ | ||
381 | { \ | ||
382 | return sprintf(buffer, "%s\n", mod->field); \ | ||
383 | } \ | ||
384 | static int modinfo_##field##_exists(struct module *mod) \ | ||
385 | { \ | ||
386 | return mod->field != NULL; \ | ||
387 | } \ | ||
388 | static void free_modinfo_##field(struct module *mod) \ | ||
389 | { \ | ||
390 | kfree(mod->field); \ | ||
391 | mod->field = NULL; \ | ||
392 | } \ | ||
393 | static struct module_attribute modinfo_##field = { \ | ||
394 | .attr = { .name = __stringify(field), .mode = 0444, \ | ||
395 | .owner = THIS_MODULE }, \ | ||
396 | .show = show_modinfo_##field, \ | ||
397 | .setup = setup_modinfo_##field, \ | ||
398 | .test = modinfo_##field##_exists, \ | ||
399 | .free = free_modinfo_##field, \ | ||
400 | }; | ||
401 | |||
402 | MODINFO_ATTR(version); | ||
403 | MODINFO_ATTR(srcversion); | ||
404 | |||
405 | static struct module_attribute *modinfo_attrs[] = { | ||
406 | &modinfo_version, | ||
407 | &modinfo_srcversion, | ||
408 | NULL, | ||
409 | }; | ||
410 | |||
373 | /* Init the unload section of the module. */ | 411 | /* Init the unload section of the module. */ |
374 | static void module_unload_init(struct module *mod) | 412 | static void module_unload_init(struct module *mod) |
375 | { | 413 | { |
@@ -692,7 +730,7 @@ static int obsparm_copy_string(const char *val, struct kernel_param *kp) | |||
692 | return 0; | 730 | return 0; |
693 | } | 731 | } |
694 | 732 | ||
695 | int set_obsolete(const char *val, struct kernel_param *kp) | 733 | static int set_obsolete(const char *val, struct kernel_param *kp) |
696 | { | 734 | { |
697 | unsigned int min, max; | 735 | unsigned int min, max; |
698 | unsigned int size, maxsize; | 736 | unsigned int size, maxsize; |
@@ -1031,6 +1069,32 @@ static void module_remove_refcnt_attr(struct module *mod) | |||
1031 | } | 1069 | } |
1032 | #endif | 1070 | #endif |
1033 | 1071 | ||
1072 | #ifdef CONFIG_MODULE_UNLOAD | ||
1073 | static int module_add_modinfo_attrs(struct module *mod) | ||
1074 | { | ||
1075 | struct module_attribute *attr; | ||
1076 | int error = 0; | ||
1077 | int i; | ||
1078 | |||
1079 | for (i = 0; (attr = modinfo_attrs[i]) && !error; i++) { | ||
1080 | if (!attr->test || | ||
1081 | (attr->test && attr->test(mod))) | ||
1082 | error = sysfs_create_file(&mod->mkobj.kobj,&attr->attr); | ||
1083 | } | ||
1084 | return error; | ||
1085 | } | ||
1086 | |||
1087 | static void module_remove_modinfo_attrs(struct module *mod) | ||
1088 | { | ||
1089 | struct module_attribute *attr; | ||
1090 | int i; | ||
1091 | |||
1092 | for (i = 0; (attr = modinfo_attrs[i]); i++) { | ||
1093 | sysfs_remove_file(&mod->mkobj.kobj,&attr->attr); | ||
1094 | attr->free(mod); | ||
1095 | } | ||
1096 | } | ||
1097 | #endif | ||
1034 | 1098 | ||
1035 | static int mod_sysfs_setup(struct module *mod, | 1099 | static int mod_sysfs_setup(struct module *mod, |
1036 | struct kernel_param *kparam, | 1100 | struct kernel_param *kparam, |
@@ -1056,6 +1120,12 @@ static int mod_sysfs_setup(struct module *mod, | |||
1056 | if (err) | 1120 | if (err) |
1057 | goto out_unreg; | 1121 | goto out_unreg; |
1058 | 1122 | ||
1123 | #ifdef CONFIG_MODULE_UNLOAD | ||
1124 | err = module_add_modinfo_attrs(mod); | ||
1125 | if (err) | ||
1126 | goto out_unreg; | ||
1127 | #endif | ||
1128 | |||
1059 | return 0; | 1129 | return 0; |
1060 | 1130 | ||
1061 | out_unreg: | 1131 | out_unreg: |
@@ -1066,6 +1136,9 @@ out: | |||
1066 | 1136 | ||
1067 | static void mod_kobject_remove(struct module *mod) | 1137 | static void mod_kobject_remove(struct module *mod) |
1068 | { | 1138 | { |
1139 | #ifdef CONFIG_MODULE_UNLOAD | ||
1140 | module_remove_modinfo_attrs(mod); | ||
1141 | #endif | ||
1069 | module_remove_refcnt_attr(mod); | 1142 | module_remove_refcnt_attr(mod); |
1070 | module_param_sysfs_remove(mod); | 1143 | module_param_sysfs_remove(mod); |
1071 | 1144 | ||
@@ -1311,6 +1384,23 @@ static char *get_modinfo(Elf_Shdr *sechdrs, | |||
1311 | return NULL; | 1384 | return NULL; |
1312 | } | 1385 | } |
1313 | 1386 | ||
1387 | #ifdef CONFIG_MODULE_UNLOAD | ||
1388 | static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, | ||
1389 | unsigned int infoindex) | ||
1390 | { | ||
1391 | struct module_attribute *attr; | ||
1392 | int i; | ||
1393 | |||
1394 | for (i = 0; (attr = modinfo_attrs[i]); i++) { | ||
1395 | if (attr->setup) | ||
1396 | attr->setup(mod, | ||
1397 | get_modinfo(sechdrs, | ||
1398 | infoindex, | ||
1399 | attr->attr.name)); | ||
1400 | } | ||
1401 | } | ||
1402 | #endif | ||
1403 | |||
1314 | #ifdef CONFIG_KALLSYMS | 1404 | #ifdef CONFIG_KALLSYMS |
1315 | int is_exported(const char *name, const struct module *mod) | 1405 | int is_exported(const char *name, const struct module *mod) |
1316 | { | 1406 | { |
@@ -1615,6 +1705,11 @@ static struct module *load_module(void __user *umod, | |||
1615 | /* Set up license info based on the info section */ | 1705 | /* Set up license info based on the info section */ |
1616 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); | 1706 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); |
1617 | 1707 | ||
1708 | #ifdef CONFIG_MODULE_UNLOAD | ||
1709 | /* Set up MODINFO_ATTR fields */ | ||
1710 | setup_modinfo(mod, sechdrs, infoindex); | ||
1711 | #endif | ||
1712 | |||
1618 | /* Fix up syms, so that st_value is a pointer to location. */ | 1713 | /* Fix up syms, so that st_value is a pointer to location. */ |
1619 | err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex, | 1714 | err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex, |
1620 | mod); | 1715 | mod); |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index cabb63fc9e16..5b7b4736d82b 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -89,23 +89,6 @@ static struct idr posix_timers_id; | |||
89 | static DEFINE_SPINLOCK(idr_lock); | 89 | static DEFINE_SPINLOCK(idr_lock); |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * Just because the timer is not in the timer list does NOT mean it is | ||
93 | * inactive. It could be in the "fire" routine getting a new expire time. | ||
94 | */ | ||
95 | #define TIMER_INACTIVE 1 | ||
96 | |||
97 | #ifdef CONFIG_SMP | ||
98 | # define timer_active(tmr) \ | ||
99 | ((tmr)->it.real.timer.entry.prev != (void *)TIMER_INACTIVE) | ||
100 | # define set_timer_inactive(tmr) \ | ||
101 | do { \ | ||
102 | (tmr)->it.real.timer.entry.prev = (void *)TIMER_INACTIVE; \ | ||
103 | } while (0) | ||
104 | #else | ||
105 | # define timer_active(tmr) BARFY // error to use outside of SMP | ||
106 | # define set_timer_inactive(tmr) do { } while (0) | ||
107 | #endif | ||
108 | /* | ||
109 | * we assume that the new SIGEV_THREAD_ID shares no bits with the other | 92 | * we assume that the new SIGEV_THREAD_ID shares no bits with the other |
110 | * SIGEV values. Here we put out an error if this assumption fails. | 93 | * SIGEV values. Here we put out an error if this assumption fails. |
111 | */ | 94 | */ |
@@ -226,7 +209,6 @@ static inline int common_timer_create(struct k_itimer *new_timer) | |||
226 | init_timer(&new_timer->it.real.timer); | 209 | init_timer(&new_timer->it.real.timer); |
227 | new_timer->it.real.timer.data = (unsigned long) new_timer; | 210 | new_timer->it.real.timer.data = (unsigned long) new_timer; |
228 | new_timer->it.real.timer.function = posix_timer_fn; | 211 | new_timer->it.real.timer.function = posix_timer_fn; |
229 | set_timer_inactive(new_timer); | ||
230 | return 0; | 212 | return 0; |
231 | } | 213 | } |
232 | 214 | ||
@@ -480,7 +462,6 @@ static void posix_timer_fn(unsigned long __data) | |||
480 | int do_notify = 1; | 462 | int do_notify = 1; |
481 | 463 | ||
482 | spin_lock_irqsave(&timr->it_lock, flags); | 464 | spin_lock_irqsave(&timr->it_lock, flags); |
483 | set_timer_inactive(timr); | ||
484 | if (!list_empty(&timr->it.real.abs_timer_entry)) { | 465 | if (!list_empty(&timr->it.real.abs_timer_entry)) { |
485 | spin_lock(&abs_list.lock); | 466 | spin_lock(&abs_list.lock); |
486 | do { | 467 | do { |
@@ -983,8 +964,8 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
983 | * careful here. If smp we could be in the "fire" routine which will | 964 | * careful here. If smp we could be in the "fire" routine which will |
984 | * be spinning as we hold the lock. But this is ONLY an SMP issue. | 965 | * be spinning as we hold the lock. But this is ONLY an SMP issue. |
985 | */ | 966 | */ |
967 | if (try_to_del_timer_sync(&timr->it.real.timer) < 0) { | ||
986 | #ifdef CONFIG_SMP | 968 | #ifdef CONFIG_SMP |
987 | if (timer_active(timr) && !del_timer(&timr->it.real.timer)) | ||
988 | /* | 969 | /* |
989 | * It can only be active if on an other cpu. Since | 970 | * It can only be active if on an other cpu. Since |
990 | * we have cleared the interval stuff above, it should | 971 | * we have cleared the interval stuff above, it should |
@@ -994,11 +975,9 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
994 | * a "retry" exit status. | 975 | * a "retry" exit status. |
995 | */ | 976 | */ |
996 | return TIMER_RETRY; | 977 | return TIMER_RETRY; |
997 | |||
998 | set_timer_inactive(timr); | ||
999 | #else | ||
1000 | del_timer(&timr->it.real.timer); | ||
1001 | #endif | 978 | #endif |
979 | } | ||
980 | |||
1002 | remove_from_abslist(timr); | 981 | remove_from_abslist(timr); |
1003 | 982 | ||
1004 | timr->it_requeue_pending = (timr->it_requeue_pending + 2) & | 983 | timr->it_requeue_pending = (timr->it_requeue_pending + 2) & |
@@ -1083,8 +1062,9 @@ retry: | |||
1083 | static inline int common_timer_del(struct k_itimer *timer) | 1062 | static inline int common_timer_del(struct k_itimer *timer) |
1084 | { | 1063 | { |
1085 | timer->it.real.incr = 0; | 1064 | timer->it.real.incr = 0; |
1065 | |||
1066 | if (try_to_del_timer_sync(&timer->it.real.timer) < 0) { | ||
1086 | #ifdef CONFIG_SMP | 1067 | #ifdef CONFIG_SMP |
1087 | if (timer_active(timer) && !del_timer(&timer->it.real.timer)) | ||
1088 | /* | 1068 | /* |
1089 | * It can only be active if on an other cpu. Since | 1069 | * It can only be active if on an other cpu. Since |
1090 | * we have cleared the interval stuff above, it should | 1070 | * we have cleared the interval stuff above, it should |
@@ -1094,9 +1074,9 @@ static inline int common_timer_del(struct k_itimer *timer) | |||
1094 | * a "retry" exit status. | 1074 | * a "retry" exit status. |
1095 | */ | 1075 | */ |
1096 | return TIMER_RETRY; | 1076 | return TIMER_RETRY; |
1097 | #else | ||
1098 | del_timer(&timer->it.real.timer); | ||
1099 | #endif | 1077 | #endif |
1078 | } | ||
1079 | |||
1100 | remove_from_abslist(timer); | 1080 | remove_from_abslist(timer); |
1101 | 1081 | ||
1102 | return 0; | 1082 | return 0; |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 90b3b68dee3f..53f9f8720ee4 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -81,7 +81,7 @@ static int nr_copy_pages_check; | |||
81 | extern char resume_file[]; | 81 | extern char resume_file[]; |
82 | 82 | ||
83 | /* Local variables that should not be affected by save */ | 83 | /* Local variables that should not be affected by save */ |
84 | unsigned int nr_copy_pages __nosavedata = 0; | 84 | static unsigned int nr_copy_pages __nosavedata = 0; |
85 | 85 | ||
86 | /* Suspend pagedir is allocated before final copy, therefore it | 86 | /* Suspend pagedir is allocated before final copy, therefore it |
87 | must be freed after resume | 87 | must be freed after resume |
diff --git a/kernel/printk.c b/kernel/printk.c index 01b58d7d17ff..3a442bfb8bee 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -876,8 +876,10 @@ void register_console(struct console * console) | |||
876 | break; | 876 | break; |
877 | console->flags |= CON_ENABLED; | 877 | console->flags |= CON_ENABLED; |
878 | console->index = console_cmdline[i].index; | 878 | console->index = console_cmdline[i].index; |
879 | if (i == preferred_console) | 879 | if (i == selected_console) { |
880 | console->flags |= CON_CONSDEV; | 880 | console->flags |= CON_CONSDEV; |
881 | preferred_console = selected_console; | ||
882 | } | ||
881 | break; | 883 | break; |
882 | } | 884 | } |
883 | 885 | ||
@@ -897,6 +899,8 @@ void register_console(struct console * console) | |||
897 | if ((console->flags & CON_CONSDEV) || console_drivers == NULL) { | 899 | if ((console->flags & CON_CONSDEV) || console_drivers == NULL) { |
898 | console->next = console_drivers; | 900 | console->next = console_drivers; |
899 | console_drivers = console; | 901 | console_drivers = console; |
902 | if (console->next) | ||
903 | console->next->flags &= ~CON_CONSDEV; | ||
900 | } else { | 904 | } else { |
901 | console->next = console_drivers->next; | 905 | console->next = console_drivers->next; |
902 | console_drivers->next = console; | 906 | console_drivers->next = console; |
@@ -937,10 +941,14 @@ int unregister_console(struct console * console) | |||
937 | /* If last console is removed, we re-enable picking the first | 941 | /* If last console is removed, we re-enable picking the first |
938 | * one that gets registered. Without that, pmac early boot console | 942 | * one that gets registered. Without that, pmac early boot console |
939 | * would prevent fbcon from taking over. | 943 | * would prevent fbcon from taking over. |
944 | * | ||
945 | * If this isn't the last console and it has CON_CONSDEV set, we | ||
946 | * need to set it on the next preferred console. | ||
940 | */ | 947 | */ |
941 | if (console_drivers == NULL) | 948 | if (console_drivers == NULL) |
942 | preferred_console = selected_console; | 949 | preferred_console = selected_console; |
943 | 950 | else if (console->flags & CON_CONSDEV) | |
951 | console_drivers->flags |= CON_CONSDEV; | ||
944 | 952 | ||
945 | release_console_sem(); | 953 | release_console_sem(); |
946 | return res; | 954 | return res; |
diff --git a/kernel/sched.c b/kernel/sched.c index deca041fc364..76080d142e3d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2576,7 +2576,7 @@ void fastcall add_preempt_count(int val) | |||
2576 | /* | 2576 | /* |
2577 | * Underflow? | 2577 | * Underflow? |
2578 | */ | 2578 | */ |
2579 | BUG_ON(((int)preempt_count() < 0)); | 2579 | BUG_ON((preempt_count() < 0)); |
2580 | preempt_count() += val; | 2580 | preempt_count() += val; |
2581 | /* | 2581 | /* |
2582 | * Spinlock count overflowing soon? | 2582 | * Spinlock count overflowing soon? |
@@ -2869,7 +2869,7 @@ need_resched: | |||
2869 | 2869 | ||
2870 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key) | 2870 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key) |
2871 | { | 2871 | { |
2872 | task_t *p = curr->task; | 2872 | task_t *p = curr->private; |
2873 | return try_to_wake_up(p, mode, sync); | 2873 | return try_to_wake_up(p, mode, sync); |
2874 | } | 2874 | } |
2875 | 2875 | ||
diff --git a/kernel/signal.c b/kernel/signal.c index c89821b69ae3..d1258729a5f9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -213,6 +213,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) | |||
213 | fastcall void recalc_sigpending_tsk(struct task_struct *t) | 213 | fastcall void recalc_sigpending_tsk(struct task_struct *t) |
214 | { | 214 | { |
215 | if (t->signal->group_stop_count > 0 || | 215 | if (t->signal->group_stop_count > 0 || |
216 | (t->flags & PF_FREEZE) || | ||
216 | PENDING(&t->pending, &t->blocked) || | 217 | PENDING(&t->pending, &t->blocked) || |
217 | PENDING(&t->signal->shared_pending, &t->blocked)) | 218 | PENDING(&t->signal->shared_pending, &t->blocked)) |
218 | set_tsk_thread_flag(t, TIF_SIGPENDING); | 219 | set_tsk_thread_flag(t, TIF_SIGPENDING); |
diff --git a/kernel/sys.c b/kernel/sys.c index f006632c2ba7..da24bc1292db 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -525,7 +525,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) | |||
525 | } | 525 | } |
526 | if (new_egid != old_egid) | 526 | if (new_egid != old_egid) |
527 | { | 527 | { |
528 | current->mm->dumpable = 0; | 528 | current->mm->dumpable = suid_dumpable; |
529 | smp_wmb(); | 529 | smp_wmb(); |
530 | } | 530 | } |
531 | if (rgid != (gid_t) -1 || | 531 | if (rgid != (gid_t) -1 || |
@@ -556,7 +556,7 @@ asmlinkage long sys_setgid(gid_t gid) | |||
556 | { | 556 | { |
557 | if(old_egid != gid) | 557 | if(old_egid != gid) |
558 | { | 558 | { |
559 | current->mm->dumpable=0; | 559 | current->mm->dumpable = suid_dumpable; |
560 | smp_wmb(); | 560 | smp_wmb(); |
561 | } | 561 | } |
562 | current->gid = current->egid = current->sgid = current->fsgid = gid; | 562 | current->gid = current->egid = current->sgid = current->fsgid = gid; |
@@ -565,7 +565,7 @@ asmlinkage long sys_setgid(gid_t gid) | |||
565 | { | 565 | { |
566 | if(old_egid != gid) | 566 | if(old_egid != gid) |
567 | { | 567 | { |
568 | current->mm->dumpable=0; | 568 | current->mm->dumpable = suid_dumpable; |
569 | smp_wmb(); | 569 | smp_wmb(); |
570 | } | 570 | } |
571 | current->egid = current->fsgid = gid; | 571 | current->egid = current->fsgid = gid; |
@@ -596,7 +596,7 @@ static int set_user(uid_t new_ruid, int dumpclear) | |||
596 | 596 | ||
597 | if(dumpclear) | 597 | if(dumpclear) |
598 | { | 598 | { |
599 | current->mm->dumpable = 0; | 599 | current->mm->dumpable = suid_dumpable; |
600 | smp_wmb(); | 600 | smp_wmb(); |
601 | } | 601 | } |
602 | current->uid = new_ruid; | 602 | current->uid = new_ruid; |
@@ -653,7 +653,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) | |||
653 | 653 | ||
654 | if (new_euid != old_euid) | 654 | if (new_euid != old_euid) |
655 | { | 655 | { |
656 | current->mm->dumpable=0; | 656 | current->mm->dumpable = suid_dumpable; |
657 | smp_wmb(); | 657 | smp_wmb(); |
658 | } | 658 | } |
659 | current->fsuid = current->euid = new_euid; | 659 | current->fsuid = current->euid = new_euid; |
@@ -703,7 +703,7 @@ asmlinkage long sys_setuid(uid_t uid) | |||
703 | 703 | ||
704 | if (old_euid != uid) | 704 | if (old_euid != uid) |
705 | { | 705 | { |
706 | current->mm->dumpable = 0; | 706 | current->mm->dumpable = suid_dumpable; |
707 | smp_wmb(); | 707 | smp_wmb(); |
708 | } | 708 | } |
709 | current->fsuid = current->euid = uid; | 709 | current->fsuid = current->euid = uid; |
@@ -748,7 +748,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) | |||
748 | if (euid != (uid_t) -1) { | 748 | if (euid != (uid_t) -1) { |
749 | if (euid != current->euid) | 749 | if (euid != current->euid) |
750 | { | 750 | { |
751 | current->mm->dumpable = 0; | 751 | current->mm->dumpable = suid_dumpable; |
752 | smp_wmb(); | 752 | smp_wmb(); |
753 | } | 753 | } |
754 | current->euid = euid; | 754 | current->euid = euid; |
@@ -798,7 +798,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) | |||
798 | if (egid != (gid_t) -1) { | 798 | if (egid != (gid_t) -1) { |
799 | if (egid != current->egid) | 799 | if (egid != current->egid) |
800 | { | 800 | { |
801 | current->mm->dumpable = 0; | 801 | current->mm->dumpable = suid_dumpable; |
802 | smp_wmb(); | 802 | smp_wmb(); |
803 | } | 803 | } |
804 | current->egid = egid; | 804 | current->egid = egid; |
@@ -845,7 +845,7 @@ asmlinkage long sys_setfsuid(uid_t uid) | |||
845 | { | 845 | { |
846 | if (uid != old_fsuid) | 846 | if (uid != old_fsuid) |
847 | { | 847 | { |
848 | current->mm->dumpable = 0; | 848 | current->mm->dumpable = suid_dumpable; |
849 | smp_wmb(); | 849 | smp_wmb(); |
850 | } | 850 | } |
851 | current->fsuid = uid; | 851 | current->fsuid = uid; |
@@ -875,7 +875,7 @@ asmlinkage long sys_setfsgid(gid_t gid) | |||
875 | { | 875 | { |
876 | if (gid != old_fsgid) | 876 | if (gid != old_fsgid) |
877 | { | 877 | { |
878 | current->mm->dumpable = 0; | 878 | current->mm->dumpable = suid_dumpable; |
879 | smp_wmb(); | 879 | smp_wmb(); |
880 | } | 880 | } |
881 | current->fsgid = gid; | 881 | current->fsgid = gid; |
@@ -894,35 +894,69 @@ asmlinkage long sys_times(struct tms __user * tbuf) | |||
894 | */ | 894 | */ |
895 | if (tbuf) { | 895 | if (tbuf) { |
896 | struct tms tmp; | 896 | struct tms tmp; |
897 | struct task_struct *tsk = current; | ||
898 | struct task_struct *t; | ||
899 | cputime_t utime, stime, cutime, cstime; | 897 | cputime_t utime, stime, cutime, cstime; |
900 | 898 | ||
901 | read_lock(&tasklist_lock); | 899 | #ifdef CONFIG_SMP |
902 | utime = tsk->signal->utime; | 900 | if (thread_group_empty(current)) { |
903 | stime = tsk->signal->stime; | 901 | /* |
904 | t = tsk; | 902 | * Single thread case without the use of any locks. |
905 | do { | 903 | * |
906 | utime = cputime_add(utime, t->utime); | 904 | * We may race with release_task if two threads are |
907 | stime = cputime_add(stime, t->stime); | 905 | * executing. However, release task first adds up the |
908 | t = next_thread(t); | 906 | * counters (__exit_signal) before removing the task |
909 | } while (t != tsk); | 907 | * from the process tasklist (__unhash_process). |
910 | 908 | * __exit_signal also acquires and releases the | |
911 | /* | 909 | * siglock which results in the proper memory ordering |
912 | * While we have tasklist_lock read-locked, no dying thread | 910 | * so that the list modifications are always visible |
913 | * can be updating current->signal->[us]time. Instead, | 911 | * after the counters have been updated. |
914 | * we got their counts included in the live thread loop. | 912 | * |
915 | * However, another thread can come in right now and | 913 | * If the counters have been updated by the second thread |
916 | * do a wait call that updates current->signal->c[us]time. | 914 | * but the thread has not yet been removed from the list |
917 | * To make sure we always see that pair updated atomically, | 915 | * then the other branch will be executing which will |
918 | * we take the siglock around fetching them. | 916 | * block on tasklist_lock until the exit handling of the |
919 | */ | 917 | * other task is finished. |
920 | spin_lock_irq(&tsk->sighand->siglock); | 918 | * |
921 | cutime = tsk->signal->cutime; | 919 | * This also implies that the sighand->siglock cannot |
922 | cstime = tsk->signal->cstime; | 920 | * be held by another processor. So we can also |
923 | spin_unlock_irq(&tsk->sighand->siglock); | 921 | * skip acquiring that lock. |
924 | read_unlock(&tasklist_lock); | 922 | */ |
923 | utime = cputime_add(current->signal->utime, current->utime); | ||
924 | stime = cputime_add(current->signal->utime, current->stime); | ||
925 | cutime = current->signal->cutime; | ||
926 | cstime = current->signal->cstime; | ||
927 | } else | ||
928 | #endif | ||
929 | { | ||
930 | |||
931 | /* Process with multiple threads */ | ||
932 | struct task_struct *tsk = current; | ||
933 | struct task_struct *t; | ||
925 | 934 | ||
935 | read_lock(&tasklist_lock); | ||
936 | utime = tsk->signal->utime; | ||
937 | stime = tsk->signal->stime; | ||
938 | t = tsk; | ||
939 | do { | ||
940 | utime = cputime_add(utime, t->utime); | ||
941 | stime = cputime_add(stime, t->stime); | ||
942 | t = next_thread(t); | ||
943 | } while (t != tsk); | ||
944 | |||
945 | /* | ||
946 | * While we have tasklist_lock read-locked, no dying thread | ||
947 | * can be updating current->signal->[us]time. Instead, | ||
948 | * we got their counts included in the live thread loop. | ||
949 | * However, another thread can come in right now and | ||
950 | * do a wait call that updates current->signal->c[us]time. | ||
951 | * To make sure we always see that pair updated atomically, | ||
952 | * we take the siglock around fetching them. | ||
953 | */ | ||
954 | spin_lock_irq(&tsk->sighand->siglock); | ||
955 | cutime = tsk->signal->cutime; | ||
956 | cstime = tsk->signal->cstime; | ||
957 | spin_unlock_irq(&tsk->sighand->siglock); | ||
958 | read_unlock(&tasklist_lock); | ||
959 | } | ||
926 | tmp.tms_utime = cputime_to_clock_t(utime); | 960 | tmp.tms_utime = cputime_to_clock_t(utime); |
927 | tmp.tms_stime = cputime_to_clock_t(stime); | 961 | tmp.tms_stime = cputime_to_clock_t(stime); |
928 | tmp.tms_cutime = cputime_to_clock_t(cutime); | 962 | tmp.tms_cutime = cputime_to_clock_t(cutime); |
@@ -1225,7 +1259,7 @@ static void groups_sort(struct group_info *group_info) | |||
1225 | } | 1259 | } |
1226 | 1260 | ||
1227 | /* a simple bsearch */ | 1261 | /* a simple bsearch */ |
1228 | static int groups_search(struct group_info *group_info, gid_t grp) | 1262 | int groups_search(struct group_info *group_info, gid_t grp) |
1229 | { | 1263 | { |
1230 | int left, right; | 1264 | int left, right; |
1231 | 1265 | ||
@@ -1652,7 +1686,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
1652 | error = 1; | 1686 | error = 1; |
1653 | break; | 1687 | break; |
1654 | case PR_SET_DUMPABLE: | 1688 | case PR_SET_DUMPABLE: |
1655 | if (arg2 != 0 && arg2 != 1) { | 1689 | if (arg2 < 0 || arg2 > 2) { |
1656 | error = -EINVAL; | 1690 | error = -EINVAL; |
1657 | break; | 1691 | break; |
1658 | } | 1692 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 701d12c63068..24a4d12d5aa9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -58,6 +58,7 @@ extern int sysctl_overcommit_ratio; | |||
58 | extern int max_threads; | 58 | extern int max_threads; |
59 | extern int sysrq_enabled; | 59 | extern int sysrq_enabled; |
60 | extern int core_uses_pid; | 60 | extern int core_uses_pid; |
61 | extern int suid_dumpable; | ||
61 | extern char core_pattern[]; | 62 | extern char core_pattern[]; |
62 | extern int cad_pid; | 63 | extern int cad_pid; |
63 | extern int pid_max; | 64 | extern int pid_max; |
@@ -950,6 +951,14 @@ static ctl_table fs_table[] = { | |||
950 | .proc_handler = &proc_dointvec, | 951 | .proc_handler = &proc_dointvec, |
951 | }, | 952 | }, |
952 | #endif | 953 | #endif |
954 | { | ||
955 | .ctl_name = KERN_SETUID_DUMPABLE, | ||
956 | .procname = "suid_dumpable", | ||
957 | .data = &suid_dumpable, | ||
958 | .maxlen = sizeof(int), | ||
959 | .mode = 0644, | ||
960 | .proc_handler = &proc_dointvec, | ||
961 | }, | ||
953 | { .ctl_name = 0 } | 962 | { .ctl_name = 0 } |
954 | }; | 963 | }; |
955 | 964 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 207aa4f0aa10..51ff917c9590 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -57,6 +57,11 @@ static void time_interpolator_update(long delta_nsec); | |||
57 | #define TVN_MASK (TVN_SIZE - 1) | 57 | #define TVN_MASK (TVN_SIZE - 1) |
58 | #define TVR_MASK (TVR_SIZE - 1) | 58 | #define TVR_MASK (TVR_SIZE - 1) |
59 | 59 | ||
60 | struct timer_base_s { | ||
61 | spinlock_t lock; | ||
62 | struct timer_list *running_timer; | ||
63 | }; | ||
64 | |||
60 | typedef struct tvec_s { | 65 | typedef struct tvec_s { |
61 | struct list_head vec[TVN_SIZE]; | 66 | struct list_head vec[TVN_SIZE]; |
62 | } tvec_t; | 67 | } tvec_t; |
@@ -66,9 +71,8 @@ typedef struct tvec_root_s { | |||
66 | } tvec_root_t; | 71 | } tvec_root_t; |
67 | 72 | ||
68 | struct tvec_t_base_s { | 73 | struct tvec_t_base_s { |
69 | spinlock_t lock; | 74 | struct timer_base_s t_base; |
70 | unsigned long timer_jiffies; | 75 | unsigned long timer_jiffies; |
71 | struct timer_list *running_timer; | ||
72 | tvec_root_t tv1; | 76 | tvec_root_t tv1; |
73 | tvec_t tv2; | 77 | tvec_t tv2; |
74 | tvec_t tv3; | 78 | tvec_t tv3; |
@@ -77,18 +81,16 @@ struct tvec_t_base_s { | |||
77 | } ____cacheline_aligned_in_smp; | 81 | } ____cacheline_aligned_in_smp; |
78 | 82 | ||
79 | typedef struct tvec_t_base_s tvec_base_t; | 83 | typedef struct tvec_t_base_s tvec_base_t; |
84 | static DEFINE_PER_CPU(tvec_base_t, tvec_bases); | ||
80 | 85 | ||
81 | static inline void set_running_timer(tvec_base_t *base, | 86 | static inline void set_running_timer(tvec_base_t *base, |
82 | struct timer_list *timer) | 87 | struct timer_list *timer) |
83 | { | 88 | { |
84 | #ifdef CONFIG_SMP | 89 | #ifdef CONFIG_SMP |
85 | base->running_timer = timer; | 90 | base->t_base.running_timer = timer; |
86 | #endif | 91 | #endif |
87 | } | 92 | } |
88 | 93 | ||
89 | /* Fake initialization */ | ||
90 | static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED }; | ||
91 | |||
92 | static void check_timer_failed(struct timer_list *timer) | 94 | static void check_timer_failed(struct timer_list *timer) |
93 | { | 95 | { |
94 | static int whine_count; | 96 | static int whine_count; |
@@ -103,7 +105,6 @@ static void check_timer_failed(struct timer_list *timer) | |||
103 | /* | 105 | /* |
104 | * Now fix it up | 106 | * Now fix it up |
105 | */ | 107 | */ |
106 | spin_lock_init(&timer->lock); | ||
107 | timer->magic = TIMER_MAGIC; | 108 | timer->magic = TIMER_MAGIC; |
108 | } | 109 | } |
109 | 110 | ||
@@ -156,65 +157,113 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) | |||
156 | list_add_tail(&timer->entry, vec); | 157 | list_add_tail(&timer->entry, vec); |
157 | } | 158 | } |
158 | 159 | ||
160 | typedef struct timer_base_s timer_base_t; | ||
161 | /* | ||
162 | * Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases) | ||
163 | * at compile time, and we need timer->base to lock the timer. | ||
164 | */ | ||
165 | timer_base_t __init_timer_base | ||
166 | ____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED }; | ||
167 | EXPORT_SYMBOL(__init_timer_base); | ||
168 | |||
169 | /*** | ||
170 | * init_timer - initialize a timer. | ||
171 | * @timer: the timer to be initialized | ||
172 | * | ||
173 | * init_timer() must be done to a timer prior calling *any* of the | ||
174 | * other timer functions. | ||
175 | */ | ||
176 | void fastcall init_timer(struct timer_list *timer) | ||
177 | { | ||
178 | timer->entry.next = NULL; | ||
179 | timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base; | ||
180 | timer->magic = TIMER_MAGIC; | ||
181 | } | ||
182 | EXPORT_SYMBOL(init_timer); | ||
183 | |||
184 | static inline void detach_timer(struct timer_list *timer, | ||
185 | int clear_pending) | ||
186 | { | ||
187 | struct list_head *entry = &timer->entry; | ||
188 | |||
189 | __list_del(entry->prev, entry->next); | ||
190 | if (clear_pending) | ||
191 | entry->next = NULL; | ||
192 | entry->prev = LIST_POISON2; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock | ||
197 | * means that all timers which are tied to this base via timer->base are | ||
198 | * locked, and the base itself is locked too. | ||
199 | * | ||
200 | * So __run_timers/migrate_timers can safely modify all timers which could | ||
201 | * be found on ->tvX lists. | ||
202 | * | ||
203 | * When the timer's base is locked, and the timer removed from list, it is | ||
204 | * possible to set timer->base = NULL and drop the lock: the timer remains | ||
205 | * locked. | ||
206 | */ | ||
207 | static timer_base_t *lock_timer_base(struct timer_list *timer, | ||
208 | unsigned long *flags) | ||
209 | { | ||
210 | timer_base_t *base; | ||
211 | |||
212 | for (;;) { | ||
213 | base = timer->base; | ||
214 | if (likely(base != NULL)) { | ||
215 | spin_lock_irqsave(&base->lock, *flags); | ||
216 | if (likely(base == timer->base)) | ||
217 | return base; | ||
218 | /* The timer has migrated to another CPU */ | ||
219 | spin_unlock_irqrestore(&base->lock, *flags); | ||
220 | } | ||
221 | cpu_relax(); | ||
222 | } | ||
223 | } | ||
224 | |||
159 | int __mod_timer(struct timer_list *timer, unsigned long expires) | 225 | int __mod_timer(struct timer_list *timer, unsigned long expires) |
160 | { | 226 | { |
161 | tvec_base_t *old_base, *new_base; | 227 | timer_base_t *base; |
228 | tvec_base_t *new_base; | ||
162 | unsigned long flags; | 229 | unsigned long flags; |
163 | int ret = 0; | 230 | int ret = 0; |
164 | 231 | ||
165 | BUG_ON(!timer->function); | 232 | BUG_ON(!timer->function); |
166 | |||
167 | check_timer(timer); | 233 | check_timer(timer); |
168 | 234 | ||
169 | spin_lock_irqsave(&timer->lock, flags); | 235 | base = lock_timer_base(timer, &flags); |
236 | |||
237 | if (timer_pending(timer)) { | ||
238 | detach_timer(timer, 0); | ||
239 | ret = 1; | ||
240 | } | ||
241 | |||
170 | new_base = &__get_cpu_var(tvec_bases); | 242 | new_base = &__get_cpu_var(tvec_bases); |
171 | repeat: | ||
172 | old_base = timer->base; | ||
173 | 243 | ||
174 | /* | 244 | if (base != &new_base->t_base) { |
175 | * Prevent deadlocks via ordering by old_base < new_base. | ||
176 | */ | ||
177 | if (old_base && (new_base != old_base)) { | ||
178 | if (old_base < new_base) { | ||
179 | spin_lock(&new_base->lock); | ||
180 | spin_lock(&old_base->lock); | ||
181 | } else { | ||
182 | spin_lock(&old_base->lock); | ||
183 | spin_lock(&new_base->lock); | ||
184 | } | ||
185 | /* | 245 | /* |
186 | * The timer base might have been cancelled while we were | 246 | * We are trying to schedule the timer on the local CPU. |
187 | * trying to take the lock(s): | 247 | * However we can't change timer's base while it is running, |
248 | * otherwise del_timer_sync() can't detect that the timer's | ||
249 | * handler yet has not finished. This also guarantees that | ||
250 | * the timer is serialized wrt itself. | ||
188 | */ | 251 | */ |
189 | if (timer->base != old_base) { | 252 | if (unlikely(base->running_timer == timer)) { |
190 | spin_unlock(&new_base->lock); | 253 | /* The timer remains on a former base */ |
191 | spin_unlock(&old_base->lock); | 254 | new_base = container_of(base, tvec_base_t, t_base); |
192 | goto repeat; | 255 | } else { |
193 | } | 256 | /* See the comment in lock_timer_base() */ |
194 | } else { | 257 | timer->base = NULL; |
195 | spin_lock(&new_base->lock); | 258 | spin_unlock(&base->lock); |
196 | if (timer->base != old_base) { | 259 | spin_lock(&new_base->t_base.lock); |
197 | spin_unlock(&new_base->lock); | 260 | timer->base = &new_base->t_base; |
198 | goto repeat; | ||
199 | } | 261 | } |
200 | } | 262 | } |
201 | 263 | ||
202 | /* | ||
203 | * Delete the previous timeout (if there was any), and install | ||
204 | * the new one: | ||
205 | */ | ||
206 | if (old_base) { | ||
207 | list_del(&timer->entry); | ||
208 | ret = 1; | ||
209 | } | ||
210 | timer->expires = expires; | 264 | timer->expires = expires; |
211 | internal_add_timer(new_base, timer); | 265 | internal_add_timer(new_base, timer); |
212 | timer->base = new_base; | 266 | spin_unlock_irqrestore(&new_base->t_base.lock, flags); |
213 | |||
214 | if (old_base && (new_base != old_base)) | ||
215 | spin_unlock(&old_base->lock); | ||
216 | spin_unlock(&new_base->lock); | ||
217 | spin_unlock_irqrestore(&timer->lock, flags); | ||
218 | 267 | ||
219 | return ret; | 268 | return ret; |
220 | } | 269 | } |
@@ -232,15 +281,15 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
232 | { | 281 | { |
233 | tvec_base_t *base = &per_cpu(tvec_bases, cpu); | 282 | tvec_base_t *base = &per_cpu(tvec_bases, cpu); |
234 | unsigned long flags; | 283 | unsigned long flags; |
235 | 284 | ||
236 | BUG_ON(timer_pending(timer) || !timer->function); | 285 | BUG_ON(timer_pending(timer) || !timer->function); |
237 | 286 | ||
238 | check_timer(timer); | 287 | check_timer(timer); |
239 | 288 | ||
240 | spin_lock_irqsave(&base->lock, flags); | 289 | spin_lock_irqsave(&base->t_base.lock, flags); |
290 | timer->base = &base->t_base; | ||
241 | internal_add_timer(base, timer); | 291 | internal_add_timer(base, timer); |
242 | timer->base = base; | 292 | spin_unlock_irqrestore(&base->t_base.lock, flags); |
243 | spin_unlock_irqrestore(&base->lock, flags); | ||
244 | } | 293 | } |
245 | 294 | ||
246 | 295 | ||
@@ -295,109 +344,84 @@ EXPORT_SYMBOL(mod_timer); | |||
295 | */ | 344 | */ |
296 | int del_timer(struct timer_list *timer) | 345 | int del_timer(struct timer_list *timer) |
297 | { | 346 | { |
347 | timer_base_t *base; | ||
298 | unsigned long flags; | 348 | unsigned long flags; |
299 | tvec_base_t *base; | 349 | int ret = 0; |
300 | 350 | ||
301 | check_timer(timer); | 351 | check_timer(timer); |
302 | 352 | ||
303 | repeat: | 353 | if (timer_pending(timer)) { |
304 | base = timer->base; | 354 | base = lock_timer_base(timer, &flags); |
305 | if (!base) | 355 | if (timer_pending(timer)) { |
306 | return 0; | 356 | detach_timer(timer, 1); |
307 | spin_lock_irqsave(&base->lock, flags); | 357 | ret = 1; |
308 | if (base != timer->base) { | 358 | } |
309 | spin_unlock_irqrestore(&base->lock, flags); | 359 | spin_unlock_irqrestore(&base->lock, flags); |
310 | goto repeat; | ||
311 | } | 360 | } |
312 | list_del(&timer->entry); | ||
313 | /* Need to make sure that anybody who sees a NULL base also sees the list ops */ | ||
314 | smp_wmb(); | ||
315 | timer->base = NULL; | ||
316 | spin_unlock_irqrestore(&base->lock, flags); | ||
317 | 361 | ||
318 | return 1; | 362 | return ret; |
319 | } | 363 | } |
320 | 364 | ||
321 | EXPORT_SYMBOL(del_timer); | 365 | EXPORT_SYMBOL(del_timer); |
322 | 366 | ||
323 | #ifdef CONFIG_SMP | 367 | #ifdef CONFIG_SMP |
324 | /*** | 368 | /* |
325 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | 369 | * This function tries to deactivate a timer. Upon successful (ret >= 0) |
326 | * @timer: the timer to be deactivated | 370 | * exit the timer is not queued and the handler is not running on any CPU. |
327 | * | ||
328 | * This function only differs from del_timer() on SMP: besides deactivating | ||
329 | * the timer it also makes sure the handler has finished executing on other | ||
330 | * CPUs. | ||
331 | * | ||
332 | * Synchronization rules: callers must prevent restarting of the timer, | ||
333 | * otherwise this function is meaningless. It must not be called from | ||
334 | * interrupt contexts. The caller must not hold locks which would prevent | ||
335 | * completion of the timer's handler. Upon exit the timer is not queued and | ||
336 | * the handler is not running on any CPU. | ||
337 | * | ||
338 | * The function returns whether it has deactivated a pending timer or not. | ||
339 | * | 371 | * |
340 | * del_timer_sync() is slow and complicated because it copes with timer | 372 | * It must not be called from interrupt contexts. |
341 | * handlers which re-arm the timer (periodic timers). If the timer handler | ||
342 | * is known to not do this (a single shot timer) then use | ||
343 | * del_singleshot_timer_sync() instead. | ||
344 | */ | 373 | */ |
345 | int del_timer_sync(struct timer_list *timer) | 374 | int try_to_del_timer_sync(struct timer_list *timer) |
346 | { | 375 | { |
347 | tvec_base_t *base; | 376 | timer_base_t *base; |
348 | int i, ret = 0; | 377 | unsigned long flags; |
378 | int ret = -1; | ||
349 | 379 | ||
350 | check_timer(timer); | 380 | base = lock_timer_base(timer, &flags); |
351 | 381 | ||
352 | del_again: | 382 | if (base->running_timer == timer) |
353 | ret += del_timer(timer); | 383 | goto out; |
354 | 384 | ||
355 | for_each_online_cpu(i) { | 385 | ret = 0; |
356 | base = &per_cpu(tvec_bases, i); | 386 | if (timer_pending(timer)) { |
357 | if (base->running_timer == timer) { | 387 | detach_timer(timer, 1); |
358 | while (base->running_timer == timer) { | 388 | ret = 1; |
359 | cpu_relax(); | ||
360 | preempt_check_resched(); | ||
361 | } | ||
362 | break; | ||
363 | } | ||
364 | } | 389 | } |
365 | smp_rmb(); | 390 | out: |
366 | if (timer_pending(timer)) | 391 | spin_unlock_irqrestore(&base->lock, flags); |
367 | goto del_again; | ||
368 | 392 | ||
369 | return ret; | 393 | return ret; |
370 | } | 394 | } |
371 | EXPORT_SYMBOL(del_timer_sync); | ||
372 | 395 | ||
373 | /*** | 396 | /*** |
374 | * del_singleshot_timer_sync - deactivate a non-recursive timer | 397 | * del_timer_sync - deactivate a timer and wait for the handler to finish. |
375 | * @timer: the timer to be deactivated | 398 | * @timer: the timer to be deactivated |
376 | * | 399 | * |
377 | * This function is an optimization of del_timer_sync for the case where the | 400 | * This function only differs from del_timer() on SMP: besides deactivating |
378 | * caller can guarantee the timer does not reschedule itself in its timer | 401 | * the timer it also makes sure the handler has finished executing on other |
379 | * function. | 402 | * CPUs. |
380 | * | 403 | * |
381 | * Synchronization rules: callers must prevent restarting of the timer, | 404 | * Synchronization rules: callers must prevent restarting of the timer, |
382 | * otherwise this function is meaningless. It must not be called from | 405 | * otherwise this function is meaningless. It must not be called from |
383 | * interrupt contexts. The caller must not hold locks which wold prevent | 406 | * interrupt contexts. The caller must not hold locks which would prevent |
384 | * completion of the timer's handler. Upon exit the timer is not queued and | 407 | * completion of the timer's handler. The timer's handler must not call |
385 | * the handler is not running on any CPU. | 408 | * add_timer_on(). Upon exit the timer is not queued and the handler is |
409 | * not running on any CPU. | ||
386 | * | 410 | * |
387 | * The function returns whether it has deactivated a pending timer or not. | 411 | * The function returns whether it has deactivated a pending timer or not. |
388 | */ | 412 | */ |
389 | int del_singleshot_timer_sync(struct timer_list *timer) | 413 | int del_timer_sync(struct timer_list *timer) |
390 | { | 414 | { |
391 | int ret = del_timer(timer); | 415 | check_timer(timer); |
392 | 416 | ||
393 | if (!ret) { | 417 | for (;;) { |
394 | ret = del_timer_sync(timer); | 418 | int ret = try_to_del_timer_sync(timer); |
395 | BUG_ON(ret); | 419 | if (ret >= 0) |
420 | return ret; | ||
396 | } | 421 | } |
397 | |||
398 | return ret; | ||
399 | } | 422 | } |
400 | EXPORT_SYMBOL(del_singleshot_timer_sync); | 423 | |
424 | EXPORT_SYMBOL(del_timer_sync); | ||
401 | #endif | 425 | #endif |
402 | 426 | ||
403 | static int cascade(tvec_base_t *base, tvec_t *tv, int index) | 427 | static int cascade(tvec_base_t *base, tvec_t *tv, int index) |
@@ -415,7 +439,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index) | |||
415 | struct timer_list *tmp; | 439 | struct timer_list *tmp; |
416 | 440 | ||
417 | tmp = list_entry(curr, struct timer_list, entry); | 441 | tmp = list_entry(curr, struct timer_list, entry); |
418 | BUG_ON(tmp->base != base); | 442 | BUG_ON(tmp->base != &base->t_base); |
419 | curr = curr->next; | 443 | curr = curr->next; |
420 | internal_add_timer(base, tmp); | 444 | internal_add_timer(base, tmp); |
421 | } | 445 | } |
@@ -437,7 +461,7 @@ static inline void __run_timers(tvec_base_t *base) | |||
437 | { | 461 | { |
438 | struct timer_list *timer; | 462 | struct timer_list *timer; |
439 | 463 | ||
440 | spin_lock_irq(&base->lock); | 464 | spin_lock_irq(&base->t_base.lock); |
441 | while (time_after_eq(jiffies, base->timer_jiffies)) { | 465 | while (time_after_eq(jiffies, base->timer_jiffies)) { |
442 | struct list_head work_list = LIST_HEAD_INIT(work_list); | 466 | struct list_head work_list = LIST_HEAD_INIT(work_list); |
443 | struct list_head *head = &work_list; | 467 | struct list_head *head = &work_list; |
@@ -453,8 +477,7 @@ static inline void __run_timers(tvec_base_t *base) | |||
453 | cascade(base, &base->tv5, INDEX(3)); | 477 | cascade(base, &base->tv5, INDEX(3)); |
454 | ++base->timer_jiffies; | 478 | ++base->timer_jiffies; |
455 | list_splice_init(base->tv1.vec + index, &work_list); | 479 | list_splice_init(base->tv1.vec + index, &work_list); |
456 | repeat: | 480 | while (!list_empty(head)) { |
457 | if (!list_empty(head)) { | ||
458 | void (*fn)(unsigned long); | 481 | void (*fn)(unsigned long); |
459 | unsigned long data; | 482 | unsigned long data; |
460 | 483 | ||
@@ -462,25 +485,26 @@ repeat: | |||
462 | fn = timer->function; | 485 | fn = timer->function; |
463 | data = timer->data; | 486 | data = timer->data; |
464 | 487 | ||
465 | list_del(&timer->entry); | ||
466 | set_running_timer(base, timer); | 488 | set_running_timer(base, timer); |
467 | smp_wmb(); | 489 | detach_timer(timer, 1); |
468 | timer->base = NULL; | 490 | spin_unlock_irq(&base->t_base.lock); |
469 | spin_unlock_irq(&base->lock); | ||
470 | { | 491 | { |
471 | u32 preempt_count = preempt_count(); | 492 | int preempt_count = preempt_count(); |
472 | fn(data); | 493 | fn(data); |
473 | if (preempt_count != preempt_count()) { | 494 | if (preempt_count != preempt_count()) { |
474 | printk("huh, entered %p with %08x, exited with %08x?\n", fn, preempt_count, preempt_count()); | 495 | printk(KERN_WARNING "huh, entered %p " |
496 | "with preempt_count %08x, exited" | ||
497 | " with %08x?\n", | ||
498 | fn, preempt_count, | ||
499 | preempt_count()); | ||
475 | BUG(); | 500 | BUG(); |
476 | } | 501 | } |
477 | } | 502 | } |
478 | spin_lock_irq(&base->lock); | 503 | spin_lock_irq(&base->t_base.lock); |
479 | goto repeat; | ||
480 | } | 504 | } |
481 | } | 505 | } |
482 | set_running_timer(base, NULL); | 506 | set_running_timer(base, NULL); |
483 | spin_unlock_irq(&base->lock); | 507 | spin_unlock_irq(&base->t_base.lock); |
484 | } | 508 | } |
485 | 509 | ||
486 | #ifdef CONFIG_NO_IDLE_HZ | 510 | #ifdef CONFIG_NO_IDLE_HZ |
@@ -499,7 +523,7 @@ unsigned long next_timer_interrupt(void) | |||
499 | int i, j; | 523 | int i, j; |
500 | 524 | ||
501 | base = &__get_cpu_var(tvec_bases); | 525 | base = &__get_cpu_var(tvec_bases); |
502 | spin_lock(&base->lock); | 526 | spin_lock(&base->t_base.lock); |
503 | expires = base->timer_jiffies + (LONG_MAX >> 1); | 527 | expires = base->timer_jiffies + (LONG_MAX >> 1); |
504 | list = 0; | 528 | list = 0; |
505 | 529 | ||
@@ -547,7 +571,7 @@ found: | |||
547 | expires = nte->expires; | 571 | expires = nte->expires; |
548 | } | 572 | } |
549 | } | 573 | } |
550 | spin_unlock(&base->lock); | 574 | spin_unlock(&base->t_base.lock); |
551 | return expires; | 575 | return expires; |
552 | } | 576 | } |
553 | #endif | 577 | #endif |
@@ -1286,9 +1310,9 @@ static void __devinit init_timers_cpu(int cpu) | |||
1286 | { | 1310 | { |
1287 | int j; | 1311 | int j; |
1288 | tvec_base_t *base; | 1312 | tvec_base_t *base; |
1289 | 1313 | ||
1290 | base = &per_cpu(tvec_bases, cpu); | 1314 | base = &per_cpu(tvec_bases, cpu); |
1291 | spin_lock_init(&base->lock); | 1315 | spin_lock_init(&base->t_base.lock); |
1292 | for (j = 0; j < TVN_SIZE; j++) { | 1316 | for (j = 0; j < TVN_SIZE; j++) { |
1293 | INIT_LIST_HEAD(base->tv5.vec + j); | 1317 | INIT_LIST_HEAD(base->tv5.vec + j); |
1294 | INIT_LIST_HEAD(base->tv4.vec + j); | 1318 | INIT_LIST_HEAD(base->tv4.vec + j); |
@@ -1302,22 +1326,16 @@ static void __devinit init_timers_cpu(int cpu) | |||
1302 | } | 1326 | } |
1303 | 1327 | ||
1304 | #ifdef CONFIG_HOTPLUG_CPU | 1328 | #ifdef CONFIG_HOTPLUG_CPU |
1305 | static int migrate_timer_list(tvec_base_t *new_base, struct list_head *head) | 1329 | static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head) |
1306 | { | 1330 | { |
1307 | struct timer_list *timer; | 1331 | struct timer_list *timer; |
1308 | 1332 | ||
1309 | while (!list_empty(head)) { | 1333 | while (!list_empty(head)) { |
1310 | timer = list_entry(head->next, struct timer_list, entry); | 1334 | timer = list_entry(head->next, struct timer_list, entry); |
1311 | /* We're locking backwards from __mod_timer order here, | 1335 | detach_timer(timer, 0); |
1312 | beware deadlock. */ | 1336 | timer->base = &new_base->t_base; |
1313 | if (!spin_trylock(&timer->lock)) | ||
1314 | return 0; | ||
1315 | list_del(&timer->entry); | ||
1316 | internal_add_timer(new_base, timer); | 1337 | internal_add_timer(new_base, timer); |
1317 | timer->base = new_base; | ||
1318 | spin_unlock(&timer->lock); | ||
1319 | } | 1338 | } |
1320 | return 1; | ||
1321 | } | 1339 | } |
1322 | 1340 | ||
1323 | static void __devinit migrate_timers(int cpu) | 1341 | static void __devinit migrate_timers(int cpu) |
@@ -1331,39 +1349,24 @@ static void __devinit migrate_timers(int cpu) | |||
1331 | new_base = &get_cpu_var(tvec_bases); | 1349 | new_base = &get_cpu_var(tvec_bases); |
1332 | 1350 | ||
1333 | local_irq_disable(); | 1351 | local_irq_disable(); |
1334 | again: | 1352 | spin_lock(&new_base->t_base.lock); |
1335 | /* Prevent deadlocks via ordering by old_base < new_base. */ | 1353 | spin_lock(&old_base->t_base.lock); |
1336 | if (old_base < new_base) { | ||
1337 | spin_lock(&new_base->lock); | ||
1338 | spin_lock(&old_base->lock); | ||
1339 | } else { | ||
1340 | spin_lock(&old_base->lock); | ||
1341 | spin_lock(&new_base->lock); | ||
1342 | } | ||
1343 | 1354 | ||
1344 | if (old_base->running_timer) | 1355 | if (old_base->t_base.running_timer) |
1345 | BUG(); | 1356 | BUG(); |
1346 | for (i = 0; i < TVR_SIZE; i++) | 1357 | for (i = 0; i < TVR_SIZE; i++) |
1347 | if (!migrate_timer_list(new_base, old_base->tv1.vec + i)) | 1358 | migrate_timer_list(new_base, old_base->tv1.vec + i); |
1348 | goto unlock_again; | 1359 | for (i = 0; i < TVN_SIZE; i++) { |
1349 | for (i = 0; i < TVN_SIZE; i++) | 1360 | migrate_timer_list(new_base, old_base->tv2.vec + i); |
1350 | if (!migrate_timer_list(new_base, old_base->tv2.vec + i) | 1361 | migrate_timer_list(new_base, old_base->tv3.vec + i); |
1351 | || !migrate_timer_list(new_base, old_base->tv3.vec + i) | 1362 | migrate_timer_list(new_base, old_base->tv4.vec + i); |
1352 | || !migrate_timer_list(new_base, old_base->tv4.vec + i) | 1363 | migrate_timer_list(new_base, old_base->tv5.vec + i); |
1353 | || !migrate_timer_list(new_base, old_base->tv5.vec + i)) | 1364 | } |
1354 | goto unlock_again; | 1365 | |
1355 | spin_unlock(&old_base->lock); | 1366 | spin_unlock(&old_base->t_base.lock); |
1356 | spin_unlock(&new_base->lock); | 1367 | spin_unlock(&new_base->t_base.lock); |
1357 | local_irq_enable(); | 1368 | local_irq_enable(); |
1358 | put_cpu_var(tvec_bases); | 1369 | put_cpu_var(tvec_bases); |
1359 | return; | ||
1360 | |||
1361 | unlock_again: | ||
1362 | /* Avoid deadlock with __mod_timer, by backing off. */ | ||
1363 | spin_unlock(&old_base->lock); | ||
1364 | spin_unlock(&new_base->lock); | ||
1365 | cpu_relax(); | ||
1366 | goto again; | ||
1367 | } | 1370 | } |
1368 | #endif /* CONFIG_HOTPLUG_CPU */ | 1371 | #endif /* CONFIG_HOTPLUG_CPU */ |
1369 | 1372 | ||