aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 18:34:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 18:34:13 -0400
commitbca1a5c0eabe0f17081760c61e8d08e73dd6b6a6 (patch)
treef939c6f42bf459786eb0050578044fdde56fec90 /kernel
parentec7a19bfec544aa73e347369232f9bd654954aa3 (diff)
parent194f8dcbe9629d8e9346cf96345a9c0bbf0e67ae (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "The biggest changes are Intel Nehalem-EX PMU uncore support, uprobes updates/cleanups/fixes from Oleg and diverse tooling updates (mostly fixes) now that Arnaldo is back from vacation." * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (40 commits) uprobes: __replace_page() needs munlock_vma_page() uprobes: Rename vma_address() and make it return "unsigned long" uprobes: Fix register_for_each_vma()->vma_address() check uprobes: Introduce vaddr_to_offset(vma, vaddr) uprobes: Teach build_probe_list() to consider the range uprobes: Remove insert_vm_struct()->uprobe_mmap() uprobes: Remove copy_vma()->uprobe_mmap() uprobes: Fix overflow in vma_address()/find_active_uprobe() uprobes: Suppress uprobe_munmap() from mmput() uprobes: Uprobe_mmap/munmap needs list_for_each_entry_safe() uprobes: Clean up and document write_opcode()->lock_page(old_page) uprobes: Kill write_opcode()->lock_page(new_page) uprobes: __replace_page() should not use page_address_in_vma() uprobes: Don't recheck vma/f_mapping in write_opcode() perf/x86: Fix missing struct before structure name perf/x86: Fix format definition of SNB-EP uncore QPI box perf/x86: Make bitfield unsigned perf/x86: Fix LLC-* and node-* events on Intel SandyBridge perf/x86: Add Intel Nehalem-EX uncore support perf/x86: Fix typo in format definition of uncore PCU filter ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/uprobes.c213
-rw-r--r--kernel/sched/core.c2
2 files changed, 98 insertions, 117 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index f93532748bca..c08a22d02f72 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -32,6 +32,7 @@
32#include <linux/swap.h> /* try_to_free_swap */ 32#include <linux/swap.h> /* try_to_free_swap */
33#include <linux/ptrace.h> /* user_enable_single_step */ 33#include <linux/ptrace.h> /* user_enable_single_step */
34#include <linux/kdebug.h> /* notifier mechanism */ 34#include <linux/kdebug.h> /* notifier mechanism */
35#include "../../mm/internal.h" /* munlock_vma_page */
35 36
36#include <linux/uprobes.h> 37#include <linux/uprobes.h>
37 38
@@ -112,14 +113,14 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register)
112 return false; 113 return false;
113} 114}
114 115
115static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) 116static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset)
116{ 117{
117 loff_t vaddr; 118 return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
118 119}
119 vaddr = vma->vm_start + offset;
120 vaddr -= vma->vm_pgoff << PAGE_SHIFT;
121 120
122 return vaddr; 121static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
122{
123 return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start);
123} 124}
124 125
125/** 126/**
@@ -127,25 +128,27 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)
127 * based on replace_page in mm/ksm.c 128 * based on replace_page in mm/ksm.c
128 * 129 *
129 * @vma: vma that holds the pte pointing to page 130 * @vma: vma that holds the pte pointing to page
131 * @addr: address the old @page is mapped at
130 * @page: the cowed page we are replacing by kpage 132 * @page: the cowed page we are replacing by kpage
131 * @kpage: the modified page we replace page by 133 * @kpage: the modified page we replace page by
132 * 134 *
133 * Returns 0 on success, -EFAULT on failure. 135 * Returns 0 on success, -EFAULT on failure.
134 */ 136 */
135static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) 137static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
138 struct page *page, struct page *kpage)
136{ 139{
137 struct mm_struct *mm = vma->vm_mm; 140 struct mm_struct *mm = vma->vm_mm;
138 unsigned long addr;
139 spinlock_t *ptl; 141 spinlock_t *ptl;
140 pte_t *ptep; 142 pte_t *ptep;
143 int err;
141 144
142 addr = page_address_in_vma(page, vma); 145 /* For try_to_free_swap() and munlock_vma_page() below */
143 if (addr == -EFAULT) 146 lock_page(page);
144 return -EFAULT;
145 147
148 err = -EAGAIN;
146 ptep = page_check_address(page, mm, addr, &ptl, 0); 149 ptep = page_check_address(page, mm, addr, &ptl, 0);
147 if (!ptep) 150 if (!ptep)
148 return -EAGAIN; 151 goto unlock;
149 152
150 get_page(kpage); 153 get_page(kpage);
151 page_add_new_anon_rmap(kpage, vma, addr); 154 page_add_new_anon_rmap(kpage, vma, addr);
@@ -162,10 +165,16 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct
162 page_remove_rmap(page); 165 page_remove_rmap(page);
163 if (!page_mapped(page)) 166 if (!page_mapped(page))
164 try_to_free_swap(page); 167 try_to_free_swap(page);
165 put_page(page);
166 pte_unmap_unlock(ptep, ptl); 168 pte_unmap_unlock(ptep, ptl);
167 169
168 return 0; 170 if (vma->vm_flags & VM_LOCKED)
171 munlock_vma_page(page);
172 put_page(page);
173
174 err = 0;
175 unlock:
176 unlock_page(page);
177 return err;
169} 178}
170 179
171/** 180/**
@@ -206,45 +215,23 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
206 unsigned long vaddr, uprobe_opcode_t opcode) 215 unsigned long vaddr, uprobe_opcode_t opcode)
207{ 216{
208 struct page *old_page, *new_page; 217 struct page *old_page, *new_page;
209 struct address_space *mapping;
210 void *vaddr_old, *vaddr_new; 218 void *vaddr_old, *vaddr_new;
211 struct vm_area_struct *vma; 219 struct vm_area_struct *vma;
212 struct uprobe *uprobe;
213 int ret; 220 int ret;
221
214retry: 222retry:
215 /* Read the page with vaddr into memory */ 223 /* Read the page with vaddr into memory */
216 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); 224 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
217 if (ret <= 0) 225 if (ret <= 0)
218 return ret; 226 return ret;
219 227
220 ret = -EINVAL;
221
222 /*
223 * We are interested in text pages only. Our pages of interest
224 * should be mapped for read and execute only. We desist from
225 * adding probes in write mapped pages since the breakpoints
226 * might end up in the file copy.
227 */
228 if (!valid_vma(vma, is_swbp_insn(&opcode)))
229 goto put_out;
230
231 uprobe = container_of(auprobe, struct uprobe, arch);
232 mapping = uprobe->inode->i_mapping;
233 if (mapping != vma->vm_file->f_mapping)
234 goto put_out;
235
236 ret = -ENOMEM; 228 ret = -ENOMEM;
237 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); 229 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
238 if (!new_page) 230 if (!new_page)
239 goto put_out; 231 goto put_old;
240 232
241 __SetPageUptodate(new_page); 233 __SetPageUptodate(new_page);
242 234
243 /*
244 * lock page will serialize against do_wp_page()'s
245 * PageAnon() handling
246 */
247 lock_page(old_page);
248 /* copy the page now that we've got it stable */ 235 /* copy the page now that we've got it stable */
249 vaddr_old = kmap_atomic(old_page); 236 vaddr_old = kmap_atomic(old_page);
250 vaddr_new = kmap_atomic(new_page); 237 vaddr_new = kmap_atomic(new_page);
@@ -257,17 +244,13 @@ retry:
257 244
258 ret = anon_vma_prepare(vma); 245 ret = anon_vma_prepare(vma);
259 if (ret) 246 if (ret)
260 goto unlock_out; 247 goto put_new;
261 248
262 lock_page(new_page); 249 ret = __replace_page(vma, vaddr, old_page, new_page);
263 ret = __replace_page(vma, old_page, new_page);
264 unlock_page(new_page);
265 250
266unlock_out: 251put_new:
267 unlock_page(old_page);
268 page_cache_release(new_page); 252 page_cache_release(new_page);
269 253put_old:
270put_out:
271 put_page(old_page); 254 put_page(old_page);
272 255
273 if (unlikely(ret == -EAGAIN)) 256 if (unlikely(ret == -EAGAIN))
@@ -791,7 +774,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
791 curr = info; 774 curr = info;
792 775
793 info->mm = vma->vm_mm; 776 info->mm = vma->vm_mm;
794 info->vaddr = vma_address(vma, offset); 777 info->vaddr = offset_to_vaddr(vma, offset);
795 } 778 }
796 mutex_unlock(&mapping->i_mmap_mutex); 779 mutex_unlock(&mapping->i_mmap_mutex);
797 780
@@ -839,12 +822,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
839 goto free; 822 goto free;
840 823
841 down_write(&mm->mmap_sem); 824 down_write(&mm->mmap_sem);
842 vma = find_vma(mm, (unsigned long)info->vaddr); 825 vma = find_vma(mm, info->vaddr);
843 if (!vma || !valid_vma(vma, is_register)) 826 if (!vma || !valid_vma(vma, is_register) ||
827 vma->vm_file->f_mapping->host != uprobe->inode)
844 goto unlock; 828 goto unlock;
845 829
846 if (vma->vm_file->f_mapping->host != uprobe->inode || 830 if (vma->vm_start > info->vaddr ||
847 vma_address(vma, uprobe->offset) != info->vaddr) 831 vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
848 goto unlock; 832 goto unlock;
849 833
850 if (is_register) { 834 if (is_register) {
@@ -960,59 +944,66 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
960 put_uprobe(uprobe); 944 put_uprobe(uprobe);
961} 945}
962 946
963/* 947static struct rb_node *
964 * Of all the nodes that correspond to the given inode, return the node 948find_node_in_range(struct inode *inode, loff_t min, loff_t max)
965 * with the least offset.
966 */
967static struct rb_node *find_least_offset_node(struct inode *inode)
968{ 949{
969 struct uprobe u = { .inode = inode, .offset = 0};
970 struct rb_node *n = uprobes_tree.rb_node; 950 struct rb_node *n = uprobes_tree.rb_node;
971 struct rb_node *close_node = NULL;
972 struct uprobe *uprobe;
973 int match;
974 951
975 while (n) { 952 while (n) {
976 uprobe = rb_entry(n, struct uprobe, rb_node); 953 struct uprobe *u = rb_entry(n, struct uprobe, rb_node);
977 match = match_uprobe(&u, uprobe);
978
979 if (uprobe->inode == inode)
980 close_node = n;
981
982 if (!match)
983 return close_node;
984 954
985 if (match < 0) 955 if (inode < u->inode) {
986 n = n->rb_left; 956 n = n->rb_left;
987 else 957 } else if (inode > u->inode) {
988 n = n->rb_right; 958 n = n->rb_right;
959 } else {
960 if (max < u->offset)
961 n = n->rb_left;
962 else if (min > u->offset)
963 n = n->rb_right;
964 else
965 break;
966 }
989 } 967 }
990 968
991 return close_node; 969 return n;
992} 970}
993 971
994/* 972/*
995 * For a given inode, build a list of probes that need to be inserted. 973 * For a given range in vma, build a list of probes that need to be inserted.
996 */ 974 */
997static void build_probe_list(struct inode *inode, struct list_head *head) 975static void build_probe_list(struct inode *inode,
976 struct vm_area_struct *vma,
977 unsigned long start, unsigned long end,
978 struct list_head *head)
998{ 979{
999 struct uprobe *uprobe; 980 loff_t min, max;
1000 unsigned long flags; 981 unsigned long flags;
1001 struct rb_node *n; 982 struct rb_node *n, *t;
1002 983 struct uprobe *u;
1003 spin_lock_irqsave(&uprobes_treelock, flags);
1004
1005 n = find_least_offset_node(inode);
1006 984
1007 for (; n; n = rb_next(n)) { 985 INIT_LIST_HEAD(head);
1008 uprobe = rb_entry(n, struct uprobe, rb_node); 986 min = vaddr_to_offset(vma, start);
1009 if (uprobe->inode != inode) 987 max = min + (end - start) - 1;
1010 break;
1011 988
1012 list_add(&uprobe->pending_list, head); 989 spin_lock_irqsave(&uprobes_treelock, flags);
1013 atomic_inc(&uprobe->ref); 990 n = find_node_in_range(inode, min, max);
991 if (n) {
992 for (t = n; t; t = rb_prev(t)) {
993 u = rb_entry(t, struct uprobe, rb_node);
994 if (u->inode != inode || u->offset < min)
995 break;
996 list_add(&u->pending_list, head);
997 atomic_inc(&u->ref);
998 }
999 for (t = n; (t = rb_next(t)); ) {
1000 u = rb_entry(t, struct uprobe, rb_node);
1001 if (u->inode != inode || u->offset > max)
1002 break;
1003 list_add(&u->pending_list, head);
1004 atomic_inc(&u->ref);
1005 }
1014 } 1006 }
1015
1016 spin_unlock_irqrestore(&uprobes_treelock, flags); 1007 spin_unlock_irqrestore(&uprobes_treelock, flags);
1017} 1008}
1018 1009
@@ -1031,7 +1022,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head)
1031int uprobe_mmap(struct vm_area_struct *vma) 1022int uprobe_mmap(struct vm_area_struct *vma)
1032{ 1023{
1033 struct list_head tmp_list; 1024 struct list_head tmp_list;
1034 struct uprobe *uprobe; 1025 struct uprobe *uprobe, *u;
1035 struct inode *inode; 1026 struct inode *inode;
1036 int ret, count; 1027 int ret, count;
1037 1028
@@ -1042,21 +1033,15 @@ int uprobe_mmap(struct vm_area_struct *vma)
1042 if (!inode) 1033 if (!inode)
1043 return 0; 1034 return 0;
1044 1035
1045 INIT_LIST_HEAD(&tmp_list);
1046 mutex_lock(uprobes_mmap_hash(inode)); 1036 mutex_lock(uprobes_mmap_hash(inode));
1047 build_probe_list(inode, &tmp_list); 1037 build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
1048 1038
1049 ret = 0; 1039 ret = 0;
1050 count = 0; 1040 count = 0;
1051 1041
1052 list_for_each_entry(uprobe, &tmp_list, pending_list) { 1042 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
1053 if (!ret) { 1043 if (!ret) {
1054 loff_t vaddr = vma_address(vma, uprobe->offset); 1044 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
1055
1056 if (vaddr < vma->vm_start || vaddr >= vma->vm_end) {
1057 put_uprobe(uprobe);
1058 continue;
1059 }
1060 1045
1061 ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); 1046 ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
1062 /* 1047 /*
@@ -1097,12 +1082,15 @@ int uprobe_mmap(struct vm_area_struct *vma)
1097void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) 1082void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1098{ 1083{
1099 struct list_head tmp_list; 1084 struct list_head tmp_list;
1100 struct uprobe *uprobe; 1085 struct uprobe *uprobe, *u;
1101 struct inode *inode; 1086 struct inode *inode;
1102 1087
1103 if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) 1088 if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
1104 return; 1089 return;
1105 1090
1091 if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
1092 return;
1093
1106 if (!atomic_read(&vma->vm_mm->uprobes_state.count)) 1094 if (!atomic_read(&vma->vm_mm->uprobes_state.count))
1107 return; 1095 return;
1108 1096
@@ -1110,21 +1098,17 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
1110 if (!inode) 1098 if (!inode)
1111 return; 1099 return;
1112 1100
1113 INIT_LIST_HEAD(&tmp_list);
1114 mutex_lock(uprobes_mmap_hash(inode)); 1101 mutex_lock(uprobes_mmap_hash(inode));
1115 build_probe_list(inode, &tmp_list); 1102 build_probe_list(inode, vma, start, end, &tmp_list);
1116 1103
1117 list_for_each_entry(uprobe, &tmp_list, pending_list) { 1104 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
1118 loff_t vaddr = vma_address(vma, uprobe->offset); 1105 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
1119 1106 /*
1120 if (vaddr >= start && vaddr < end) { 1107 * An unregister could have removed the probe before
1121 /* 1108 * unmap. So check before we decrement the count.
1122 * An unregister could have removed the probe before 1109 */
1123 * unmap. So check before we decrement the count. 1110 if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1)
1124 */ 1111 atomic_dec(&vma->vm_mm->uprobes_state.count);
1125 if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1)
1126 atomic_dec(&vma->vm_mm->uprobes_state.count);
1127 }
1128 put_uprobe(uprobe); 1112 put_uprobe(uprobe);
1129 } 1113 }
1130 mutex_unlock(uprobes_mmap_hash(inode)); 1114 mutex_unlock(uprobes_mmap_hash(inode));
@@ -1463,12 +1447,9 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
1463 vma = find_vma(mm, bp_vaddr); 1447 vma = find_vma(mm, bp_vaddr);
1464 if (vma && vma->vm_start <= bp_vaddr) { 1448 if (vma && vma->vm_start <= bp_vaddr) {
1465 if (valid_vma(vma, false)) { 1449 if (valid_vma(vma, false)) {
1466 struct inode *inode; 1450 struct inode *inode = vma->vm_file->f_mapping->host;
1467 loff_t offset; 1451 loff_t offset = vaddr_to_offset(vma, bp_vaddr);
1468 1452
1469 inode = vma->vm_file->f_mapping->host;
1470 offset = bp_vaddr - vma->vm_start;
1471 offset += (vma->vm_pgoff << PAGE_SHIFT);
1472 uprobe = find_uprobe(inode, offset); 1453 uprobe = find_uprobe(inode, offset);
1473 } 1454 }
1474 1455
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5d011ef4c0df..d325c4b2dcbb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1910,12 +1910,12 @@ static inline void
1910prepare_task_switch(struct rq *rq, struct task_struct *prev, 1910prepare_task_switch(struct rq *rq, struct task_struct *prev,
1911 struct task_struct *next) 1911 struct task_struct *next)
1912{ 1912{
1913 trace_sched_switch(prev, next);
1913 sched_info_switch(prev, next); 1914 sched_info_switch(prev, next);
1914 perf_event_task_sched_out(prev, next); 1915 perf_event_task_sched_out(prev, next);
1915 fire_sched_out_preempt_notifiers(prev, next); 1916 fire_sched_out_preempt_notifiers(prev, next);
1916 prepare_lock_switch(rq, next); 1917 prepare_lock_switch(rq, next);
1917 prepare_arch_switch(next); 1918 prepare_arch_switch(next);
1918 trace_sched_switch(prev, next);
1919} 1919}
1920 1920
1921/** 1921/**