aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2013-01-21 14:48:00 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-01-22 13:08:00 -0500
commit9899d11f654474d2d54ea52ceaa2a1f4db3abd68 (patch)
tree4ac2411ec2d79335128afd0142a91c8cbaac251f
parent910ffdb18a6408e14febbb6e4b6840fd2c928c82 (diff)
ptrace: ensure arch_ptrace/ptrace_request can never race with SIGKILL
putreg() assumes that the tracee is not running and pt_regs_access() can safely play with its stack. However a killed tracee can return from ptrace_stop() to the low-level asm code and do RESTORE_REST, this means that debugger can actually read/modify the kernel stack until the tracee does SAVE_REST again. set_task_blockstep() can race with SIGKILL too and in some sense this race is even worse, the very fact the tracee can be woken up breaks the logic. As Linus suggested we can clear TASK_WAKEKILL around the arch_ptrace() call, this ensures that nobody can ever wakeup the tracee while the debugger looks at it. Not only this fixes the mentioned problems, we can do some cleanups/simplifications in arch_ptrace() paths. Probably ptrace_unfreeze_traced() needs more callers, for example it makes sense to make the tracee killable for oom-killer before access_process_vm(). While at it, add the comment into may_ptrace_stop() to explain why ptrace_stop() still can't rely on SIGKILL and signal_pending_state(). Reported-by: Salman Qazi <sqazi@google.com> Reported-by: Suleiman Souhlal <suleiman@google.com> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86/kernel/step.c9
-rw-r--r--kernel/ptrace.c64
-rw-r--r--kernel/signal.c5
3 files changed, 64 insertions, 14 deletions
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index cd3b2438a980..9b4d51d0c0d0 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -165,10 +165,11 @@ void set_task_blockstep(struct task_struct *task, bool on)
165 * Ensure irq/preemption can't change debugctl in between. 165 * Ensure irq/preemption can't change debugctl in between.
166 * Note also that both TIF_BLOCKSTEP and debugctl should 166 * Note also that both TIF_BLOCKSTEP and debugctl should
167 * be changed atomically wrt preemption. 167 * be changed atomically wrt preemption.
168 * FIXME: this means that set/clear TIF_BLOCKSTEP is simply 168 *
169 * wrong if task != current, SIGKILL can wakeup the stopped 169 * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
170 * tracee and set/clear can play with the running task, this 170 * task is current or it can't be running, otherwise we can race
171 * can confuse the next __switch_to_xtra(). 171 * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
172 * PTRACE_KILL is not safe.
172 */ 173 */
173 local_irq_disable(); 174 local_irq_disable();
174 debugctl = get_debugctlmsr(); 175 debugctl = get_debugctlmsr();
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 62f7c2774b16..6cbeaae4406d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -122,6 +122,40 @@ void __ptrace_unlink(struct task_struct *child)
122 spin_unlock(&child->sighand->siglock); 122 spin_unlock(&child->sighand->siglock);
123} 123}
124 124
125/* Ensure that nothing can wake it up, even SIGKILL */
126static bool ptrace_freeze_traced(struct task_struct *task)
127{
128 bool ret = false;
129
130 /* Lockless, nobody but us can set this flag */
131 if (task->jobctl & JOBCTL_LISTENING)
132 return ret;
133
134 spin_lock_irq(&task->sighand->siglock);
135 if (task_is_traced(task) && !__fatal_signal_pending(task)) {
136 task->state = __TASK_TRACED;
137 ret = true;
138 }
139 spin_unlock_irq(&task->sighand->siglock);
140
141 return ret;
142}
143
144static void ptrace_unfreeze_traced(struct task_struct *task)
145{
146 if (task->state != __TASK_TRACED)
147 return;
148
149 WARN_ON(!task->ptrace || task->parent != current);
150
151 spin_lock_irq(&task->sighand->siglock);
152 if (__fatal_signal_pending(task))
153 wake_up_state(task, __TASK_TRACED);
154 else
155 task->state = TASK_TRACED;
156 spin_unlock_irq(&task->sighand->siglock);
157}
158
125/** 159/**
126 * ptrace_check_attach - check whether ptracee is ready for ptrace operation 160 * ptrace_check_attach - check whether ptracee is ready for ptrace operation
127 * @child: ptracee to check for 161 * @child: ptracee to check for
@@ -151,24 +185,29 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
151 * be changed by us so it's not changing right after this. 185 * be changed by us so it's not changing right after this.
152 */ 186 */
153 read_lock(&tasklist_lock); 187 read_lock(&tasklist_lock);
154 if ((child->ptrace & PT_PTRACED) && child->parent == current) { 188 if (child->ptrace && child->parent == current) {
189 WARN_ON(child->state == __TASK_TRACED);
155 /* 190 /*
156 * child->sighand can't be NULL, release_task() 191 * child->sighand can't be NULL, release_task()
157 * does ptrace_unlink() before __exit_signal(). 192 * does ptrace_unlink() before __exit_signal().
158 */ 193 */
159 spin_lock_irq(&child->sighand->siglock); 194 if (ignore_state || ptrace_freeze_traced(child))
160 WARN_ON_ONCE(task_is_stopped(child));
161 if (ignore_state || (task_is_traced(child) &&
162 !(child->jobctl & JOBCTL_LISTENING)))
163 ret = 0; 195 ret = 0;
164 spin_unlock_irq(&child->sighand->siglock);
165 } 196 }
166 read_unlock(&tasklist_lock); 197 read_unlock(&tasklist_lock);
167 198
168 if (!ret && !ignore_state) 199 if (!ret && !ignore_state) {
169 ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; 200 if (!wait_task_inactive(child, __TASK_TRACED)) {
201 /*
202 * This can only happen if may_ptrace_stop() fails and
203 * ptrace_stop() changes ->state back to TASK_RUNNING,
204 * so we should not worry about leaking __TASK_TRACED.
205 */
206 WARN_ON(child->state == __TASK_TRACED);
207 ret = -ESRCH;
208 }
209 }
170 210
171 /* All systems go.. */
172 return ret; 211 return ret;
173} 212}
174 213
@@ -900,6 +939,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
900 goto out_put_task_struct; 939 goto out_put_task_struct;
901 940
902 ret = arch_ptrace(child, request, addr, data); 941 ret = arch_ptrace(child, request, addr, data);
942 if (ret || request != PTRACE_DETACH)
943 ptrace_unfreeze_traced(child);
903 944
904 out_put_task_struct: 945 out_put_task_struct:
905 put_task_struct(child); 946 put_task_struct(child);
@@ -1039,8 +1080,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
1039 1080
1040 ret = ptrace_check_attach(child, request == PTRACE_KILL || 1081 ret = ptrace_check_attach(child, request == PTRACE_KILL ||
1041 request == PTRACE_INTERRUPT); 1082 request == PTRACE_INTERRUPT);
1042 if (!ret) 1083 if (!ret) {
1043 ret = compat_arch_ptrace(child, request, addr, data); 1084 ret = compat_arch_ptrace(child, request, addr, data);
1085 if (ret || request != PTRACE_DETACH)
1086 ptrace_unfreeze_traced(child);
1087 }
1044 1088
1045 out_put_task_struct: 1089 out_put_task_struct:
1046 put_task_struct(child); 1090 put_task_struct(child);
diff --git a/kernel/signal.c b/kernel/signal.c
index 6e97aa6fa32c..3d09cf6cde75 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1794,6 +1794,10 @@ static inline int may_ptrace_stop(void)
1794 * If SIGKILL was already sent before the caller unlocked 1794 * If SIGKILL was already sent before the caller unlocked
1795 * ->siglock we must see ->core_state != NULL. Otherwise it 1795 * ->siglock we must see ->core_state != NULL. Otherwise it
1796 * is safe to enter schedule(). 1796 * is safe to enter schedule().
1797 *
1798 * This is almost outdated, a task with the pending SIGKILL can't
1799 * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
1800 * after SIGKILL was already dequeued.
1797 */ 1801 */
1798 if (unlikely(current->mm->core_state) && 1802 if (unlikely(current->mm->core_state) &&
1799 unlikely(current->mm == current->parent->mm)) 1803 unlikely(current->mm == current->parent->mm))
@@ -1919,6 +1923,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
1919 if (gstop_done) 1923 if (gstop_done)
1920 do_notify_parent_cldstop(current, false, why); 1924 do_notify_parent_cldstop(current, false, why);
1921 1925
1926 /* tasklist protects us from ptrace_freeze_traced() */
1922 __set_current_state(TASK_RUNNING); 1927 __set_current_state(TASK_RUNNING);
1923 if (clear_code) 1928 if (clear_code)
1924 current->exit_code = 0; 1929 current->exit_code = 0;