diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/debug/kdb/kdb_main.c | 91 | ||||
-rw-r--r-- | kernel/debug/kdb/kdb_private.h | 1 | ||||
-rw-r--r-- | kernel/events/core.c | 49 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 461 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 8 | ||||
-rw-r--r-- | kernel/power/user.c | 2 | ||||
-rw-r--r-- | kernel/printk.c | 83 | ||||
-rw-r--r-- | kernel/rcupdate.c | 44 | ||||
-rw-r--r-- | kernel/rcutiny.c | 4 | ||||
-rw-r--r-- | kernel/rcutiny_plugin.h | 56 | ||||
-rw-r--r-- | kernel/rcutorture.c | 72 | ||||
-rw-r--r-- | kernel/rcutree.c | 478 | ||||
-rw-r--r-- | kernel/rcutree.h | 46 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 223 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 148 | ||||
-rw-r--r-- | kernel/smp.c | 20 | ||||
-rw-r--r-- | kernel/smpboot.h | 2 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 2 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 1 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 8 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace.c | 33 | ||||
-rw-r--r-- | kernel/trace/trace.h | 8 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 2 |
25 files changed, 958 insertions, 890 deletions
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 67b847dfa2bb..1f91413edb87 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/ctype.h> | 14 | #include <linux/ctype.h> |
15 | #include <linux/string.h> | 15 | #include <linux/string.h> |
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/kmsg_dump.h> | ||
17 | #include <linux/reboot.h> | 18 | #include <linux/reboot.h> |
18 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
19 | #include <linux/sysrq.h> | 20 | #include <linux/sysrq.h> |
@@ -2040,8 +2041,15 @@ static int kdb_env(int argc, const char **argv) | |||
2040 | */ | 2041 | */ |
2041 | static int kdb_dmesg(int argc, const char **argv) | 2042 | static int kdb_dmesg(int argc, const char **argv) |
2042 | { | 2043 | { |
2043 | char *syslog_data[4], *start, *end, c = '\0', *p; | 2044 | int diag; |
2044 | int diag, logging, logsize, lines = 0, adjust = 0, n; | 2045 | int logging; |
2046 | int lines = 0; | ||
2047 | int adjust = 0; | ||
2048 | int n = 0; | ||
2049 | int skip = 0; | ||
2050 | struct kmsg_dumper dumper = { .active = 1 }; | ||
2051 | size_t len; | ||
2052 | char buf[201]; | ||
2045 | 2053 | ||
2046 | if (argc > 2) | 2054 | if (argc > 2) |
2047 | return KDB_ARGCOUNT; | 2055 | return KDB_ARGCOUNT; |
@@ -2064,22 +2072,10 @@ static int kdb_dmesg(int argc, const char **argv) | |||
2064 | kdb_set(2, setargs); | 2072 | kdb_set(2, setargs); |
2065 | } | 2073 | } |
2066 | 2074 | ||
2067 | /* syslog_data[0,1] physical start, end+1. syslog_data[2,3] | 2075 | kmsg_dump_rewind_nolock(&dumper); |
2068 | * logical start, end+1. */ | 2076 | while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL)) |
2069 | kdb_syslog_data(syslog_data); | 2077 | n++; |
2070 | if (syslog_data[2] == syslog_data[3]) | 2078 | |
2071 | return 0; | ||
2072 | logsize = syslog_data[1] - syslog_data[0]; | ||
2073 | start = syslog_data[2]; | ||
2074 | end = syslog_data[3]; | ||
2075 | #define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0]) | ||
2076 | for (n = 0, p = start; p < end; ++p) { | ||
2077 | c = *KDB_WRAP(p); | ||
2078 | if (c == '\n') | ||
2079 | ++n; | ||
2080 | } | ||
2081 | if (c != '\n') | ||
2082 | ++n; | ||
2083 | if (lines < 0) { | 2079 | if (lines < 0) { |
2084 | if (adjust >= n) | 2080 | if (adjust >= n) |
2085 | kdb_printf("buffer only contains %d lines, nothing " | 2081 | kdb_printf("buffer only contains %d lines, nothing " |
@@ -2087,21 +2083,11 @@ static int kdb_dmesg(int argc, const char **argv) | |||
2087 | else if (adjust - lines >= n) | 2083 | else if (adjust - lines >= n) |
2088 | kdb_printf("buffer only contains %d lines, last %d " | 2084 | kdb_printf("buffer only contains %d lines, last %d " |
2089 | "lines printed\n", n, n - adjust); | 2085 | "lines printed\n", n, n - adjust); |
2090 | if (adjust) { | 2086 | skip = adjust; |
2091 | for (; start < end && adjust; ++start) { | 2087 | lines = abs(lines); |
2092 | if (*KDB_WRAP(start) == '\n') | ||
2093 | --adjust; | ||
2094 | } | ||
2095 | if (start < end) | ||
2096 | ++start; | ||
2097 | } | ||
2098 | for (p = start; p < end && lines; ++p) { | ||
2099 | if (*KDB_WRAP(p) == '\n') | ||
2100 | ++lines; | ||
2101 | } | ||
2102 | end = p; | ||
2103 | } else if (lines > 0) { | 2088 | } else if (lines > 0) { |
2104 | int skip = n - (adjust + lines); | 2089 | skip = n - lines - adjust; |
2090 | lines = abs(lines); | ||
2105 | if (adjust >= n) { | 2091 | if (adjust >= n) { |
2106 | kdb_printf("buffer only contains %d lines, " | 2092 | kdb_printf("buffer only contains %d lines, " |
2107 | "nothing printed\n", n); | 2093 | "nothing printed\n", n); |
@@ -2112,35 +2098,24 @@ static int kdb_dmesg(int argc, const char **argv) | |||
2112 | kdb_printf("buffer only contains %d lines, first " | 2098 | kdb_printf("buffer only contains %d lines, first " |
2113 | "%d lines printed\n", n, lines); | 2099 | "%d lines printed\n", n, lines); |
2114 | } | 2100 | } |
2115 | for (; start < end && skip; ++start) { | 2101 | } else { |
2116 | if (*KDB_WRAP(start) == '\n') | 2102 | lines = n; |
2117 | --skip; | ||
2118 | } | ||
2119 | for (p = start; p < end && lines; ++p) { | ||
2120 | if (*KDB_WRAP(p) == '\n') | ||
2121 | --lines; | ||
2122 | } | ||
2123 | end = p; | ||
2124 | } | 2103 | } |
2125 | /* Do a line at a time (max 200 chars) to reduce protocol overhead */ | 2104 | |
2126 | c = '\n'; | 2105 | if (skip >= n || skip < 0) |
2127 | while (start != end) { | 2106 | return 0; |
2128 | char buf[201]; | 2107 | |
2129 | p = buf; | 2108 | kmsg_dump_rewind_nolock(&dumper); |
2130 | if (KDB_FLAG(CMD_INTERRUPT)) | 2109 | while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) { |
2131 | return 0; | 2110 | if (skip) { |
2132 | while (start < end && (c = *KDB_WRAP(start)) && | 2111 | skip--; |
2133 | (p - buf) < sizeof(buf)-1) { | 2112 | continue; |
2134 | ++start; | ||
2135 | *p++ = c; | ||
2136 | if (c == '\n') | ||
2137 | break; | ||
2138 | } | 2113 | } |
2139 | *p = '\0'; | 2114 | if (!lines--) |
2140 | kdb_printf("%s", buf); | 2115 | break; |
2116 | |||
2117 | kdb_printf("%.*s\n", (int)len - 1, buf); | ||
2141 | } | 2118 | } |
2142 | if (c != '\n') | ||
2143 | kdb_printf("\n"); | ||
2144 | 2119 | ||
2145 | return 0; | 2120 | return 0; |
2146 | } | 2121 | } |
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 47c4e56e513b..392ec6a25844 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h | |||
@@ -205,7 +205,6 @@ extern char kdb_grep_string[]; | |||
205 | extern int kdb_grep_leading; | 205 | extern int kdb_grep_leading; |
206 | extern int kdb_grep_trailing; | 206 | extern int kdb_grep_trailing; |
207 | extern char *kdb_cmds[]; | 207 | extern char *kdb_cmds[]; |
208 | extern void kdb_syslog_data(char *syslog_data[]); | ||
209 | extern unsigned long kdb_task_state_string(const char *); | 208 | extern unsigned long kdb_task_state_string(const char *); |
210 | extern char kdb_task_state_char (const struct task_struct *); | 209 | extern char kdb_task_state_char (const struct task_struct *); |
211 | extern unsigned long kdb_task_state(const struct task_struct *p, | 210 | extern unsigned long kdb_task_state(const struct task_struct *p, |
diff --git a/kernel/events/core.c b/kernel/events/core.c index d7d71d6ec972..f1cf0edeb39a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1645,6 +1645,8 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
1645 | lockdep_assert_held(&ctx->mutex); | 1645 | lockdep_assert_held(&ctx->mutex); |
1646 | 1646 | ||
1647 | event->ctx = ctx; | 1647 | event->ctx = ctx; |
1648 | if (event->cpu != -1) | ||
1649 | event->cpu = cpu; | ||
1648 | 1650 | ||
1649 | if (!task) { | 1651 | if (!task) { |
1650 | /* | 1652 | /* |
@@ -6252,6 +6254,8 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6252 | } | 6254 | } |
6253 | } | 6255 | } |
6254 | 6256 | ||
6257 | get_online_cpus(); | ||
6258 | |||
6255 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, | 6259 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, |
6256 | NULL, NULL); | 6260 | NULL, NULL); |
6257 | if (IS_ERR(event)) { | 6261 | if (IS_ERR(event)) { |
@@ -6304,7 +6308,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6304 | /* | 6308 | /* |
6305 | * Get the target context (task or percpu): | 6309 | * Get the target context (task or percpu): |
6306 | */ | 6310 | */ |
6307 | ctx = find_get_context(pmu, task, cpu); | 6311 | ctx = find_get_context(pmu, task, event->cpu); |
6308 | if (IS_ERR(ctx)) { | 6312 | if (IS_ERR(ctx)) { |
6309 | err = PTR_ERR(ctx); | 6313 | err = PTR_ERR(ctx); |
6310 | goto err_alloc; | 6314 | goto err_alloc; |
@@ -6377,20 +6381,23 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6377 | mutex_lock(&ctx->mutex); | 6381 | mutex_lock(&ctx->mutex); |
6378 | 6382 | ||
6379 | if (move_group) { | 6383 | if (move_group) { |
6380 | perf_install_in_context(ctx, group_leader, cpu); | 6384 | synchronize_rcu(); |
6385 | perf_install_in_context(ctx, group_leader, event->cpu); | ||
6381 | get_ctx(ctx); | 6386 | get_ctx(ctx); |
6382 | list_for_each_entry(sibling, &group_leader->sibling_list, | 6387 | list_for_each_entry(sibling, &group_leader->sibling_list, |
6383 | group_entry) { | 6388 | group_entry) { |
6384 | perf_install_in_context(ctx, sibling, cpu); | 6389 | perf_install_in_context(ctx, sibling, event->cpu); |
6385 | get_ctx(ctx); | 6390 | get_ctx(ctx); |
6386 | } | 6391 | } |
6387 | } | 6392 | } |
6388 | 6393 | ||
6389 | perf_install_in_context(ctx, event, cpu); | 6394 | perf_install_in_context(ctx, event, event->cpu); |
6390 | ++ctx->generation; | 6395 | ++ctx->generation; |
6391 | perf_unpin_context(ctx); | 6396 | perf_unpin_context(ctx); |
6392 | mutex_unlock(&ctx->mutex); | 6397 | mutex_unlock(&ctx->mutex); |
6393 | 6398 | ||
6399 | put_online_cpus(); | ||
6400 | |||
6394 | event->owner = current; | 6401 | event->owner = current; |
6395 | 6402 | ||
6396 | mutex_lock(¤t->perf_event_mutex); | 6403 | mutex_lock(¤t->perf_event_mutex); |
@@ -6419,6 +6426,7 @@ err_context: | |||
6419 | err_alloc: | 6426 | err_alloc: |
6420 | free_event(event); | 6427 | free_event(event); |
6421 | err_task: | 6428 | err_task: |
6429 | put_online_cpus(); | ||
6422 | if (task) | 6430 | if (task) |
6423 | put_task_struct(task); | 6431 | put_task_struct(task); |
6424 | err_group_fd: | 6432 | err_group_fd: |
@@ -6479,6 +6487,39 @@ err: | |||
6479 | } | 6487 | } |
6480 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | 6488 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); |
6481 | 6489 | ||
6490 | void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | ||
6491 | { | ||
6492 | struct perf_event_context *src_ctx; | ||
6493 | struct perf_event_context *dst_ctx; | ||
6494 | struct perf_event *event, *tmp; | ||
6495 | LIST_HEAD(events); | ||
6496 | |||
6497 | src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; | ||
6498 | dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; | ||
6499 | |||
6500 | mutex_lock(&src_ctx->mutex); | ||
6501 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, | ||
6502 | event_entry) { | ||
6503 | perf_remove_from_context(event); | ||
6504 | put_ctx(src_ctx); | ||
6505 | list_add(&event->event_entry, &events); | ||
6506 | } | ||
6507 | mutex_unlock(&src_ctx->mutex); | ||
6508 | |||
6509 | synchronize_rcu(); | ||
6510 | |||
6511 | mutex_lock(&dst_ctx->mutex); | ||
6512 | list_for_each_entry_safe(event, tmp, &events, event_entry) { | ||
6513 | list_del(&event->event_entry); | ||
6514 | if (event->state >= PERF_EVENT_STATE_OFF) | ||
6515 | event->state = PERF_EVENT_STATE_INACTIVE; | ||
6516 | perf_install_in_context(dst_ctx, event, dst_cpu); | ||
6517 | get_ctx(dst_ctx); | ||
6518 | } | ||
6519 | mutex_unlock(&dst_ctx->mutex); | ||
6520 | } | ||
6521 | EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); | ||
6522 | |||
6482 | static void sync_child_event(struct perf_event *child_event, | 6523 | static void sync_child_event(struct perf_event *child_event, |
6483 | struct task_struct *child) | 6524 | struct task_struct *child) |
6484 | { | 6525 | { |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 985be4d80fe8..f93532748bca 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -38,13 +38,29 @@ | |||
38 | #define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) | 38 | #define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) |
39 | #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE | 39 | #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE |
40 | 40 | ||
41 | static struct srcu_struct uprobes_srcu; | ||
42 | static struct rb_root uprobes_tree = RB_ROOT; | 41 | static struct rb_root uprobes_tree = RB_ROOT; |
43 | 42 | ||
44 | static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ | 43 | static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ |
45 | 44 | ||
46 | #define UPROBES_HASH_SZ 13 | 45 | #define UPROBES_HASH_SZ 13 |
47 | 46 | ||
47 | /* | ||
48 | * We need separate register/unregister and mmap/munmap lock hashes because | ||
49 | * of mmap_sem nesting. | ||
50 | * | ||
51 | * uprobe_register() needs to install probes on (potentially) all processes | ||
52 | * and thus needs to acquire multiple mmap_sems (consequtively, not | ||
53 | * concurrently), whereas uprobe_mmap() is called while holding mmap_sem | ||
54 | * for the particular process doing the mmap. | ||
55 | * | ||
56 | * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem | ||
57 | * because of lock order against i_mmap_mutex. This means there's a hole in | ||
58 | * the register vma iteration where a mmap() can happen. | ||
59 | * | ||
60 | * Thus uprobe_register() can race with uprobe_mmap() and we can try and | ||
61 | * install a probe where one is already installed. | ||
62 | */ | ||
63 | |||
48 | /* serialize (un)register */ | 64 | /* serialize (un)register */ |
49 | static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; | 65 | static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; |
50 | 66 | ||
@@ -61,17 +77,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; | |||
61 | */ | 77 | */ |
62 | static atomic_t uprobe_events = ATOMIC_INIT(0); | 78 | static atomic_t uprobe_events = ATOMIC_INIT(0); |
63 | 79 | ||
64 | /* | ||
65 | * Maintain a temporary per vma info that can be used to search if a vma | ||
66 | * has already been handled. This structure is introduced since extending | ||
67 | * vm_area_struct wasnt recommended. | ||
68 | */ | ||
69 | struct vma_info { | ||
70 | struct list_head probe_list; | ||
71 | struct mm_struct *mm; | ||
72 | loff_t vaddr; | ||
73 | }; | ||
74 | |||
75 | struct uprobe { | 80 | struct uprobe { |
76 | struct rb_node rb_node; /* node in the rb tree */ | 81 | struct rb_node rb_node; /* node in the rb tree */ |
77 | atomic_t ref; | 82 | atomic_t ref; |
@@ -100,7 +105,8 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register) | |||
100 | if (!is_register) | 105 | if (!is_register) |
101 | return true; | 106 | return true; |
102 | 107 | ||
103 | if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC)) | 108 | if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) |
109 | == (VM_READ|VM_EXEC)) | ||
104 | return true; | 110 | return true; |
105 | 111 | ||
106 | return false; | 112 | return false; |
@@ -129,33 +135,17 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) | |||
129 | static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) | 135 | static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) |
130 | { | 136 | { |
131 | struct mm_struct *mm = vma->vm_mm; | 137 | struct mm_struct *mm = vma->vm_mm; |
132 | pgd_t *pgd; | ||
133 | pud_t *pud; | ||
134 | pmd_t *pmd; | ||
135 | pte_t *ptep; | ||
136 | spinlock_t *ptl; | ||
137 | unsigned long addr; | 138 | unsigned long addr; |
138 | int err = -EFAULT; | 139 | spinlock_t *ptl; |
140 | pte_t *ptep; | ||
139 | 141 | ||
140 | addr = page_address_in_vma(page, vma); | 142 | addr = page_address_in_vma(page, vma); |
141 | if (addr == -EFAULT) | 143 | if (addr == -EFAULT) |
142 | goto out; | 144 | return -EFAULT; |
143 | |||
144 | pgd = pgd_offset(mm, addr); | ||
145 | if (!pgd_present(*pgd)) | ||
146 | goto out; | ||
147 | |||
148 | pud = pud_offset(pgd, addr); | ||
149 | if (!pud_present(*pud)) | ||
150 | goto out; | ||
151 | |||
152 | pmd = pmd_offset(pud, addr); | ||
153 | if (!pmd_present(*pmd)) | ||
154 | goto out; | ||
155 | 145 | ||
156 | ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); | 146 | ptep = page_check_address(page, mm, addr, &ptl, 0); |
157 | if (!ptep) | 147 | if (!ptep) |
158 | goto out; | 148 | return -EAGAIN; |
159 | 149 | ||
160 | get_page(kpage); | 150 | get_page(kpage); |
161 | page_add_new_anon_rmap(kpage, vma, addr); | 151 | page_add_new_anon_rmap(kpage, vma, addr); |
@@ -174,10 +164,8 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct | |||
174 | try_to_free_swap(page); | 164 | try_to_free_swap(page); |
175 | put_page(page); | 165 | put_page(page); |
176 | pte_unmap_unlock(ptep, ptl); | 166 | pte_unmap_unlock(ptep, ptl); |
177 | err = 0; | ||
178 | 167 | ||
179 | out: | 168 | return 0; |
180 | return err; | ||
181 | } | 169 | } |
182 | 170 | ||
183 | /** | 171 | /** |
@@ -222,9 +210,8 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
222 | void *vaddr_old, *vaddr_new; | 210 | void *vaddr_old, *vaddr_new; |
223 | struct vm_area_struct *vma; | 211 | struct vm_area_struct *vma; |
224 | struct uprobe *uprobe; | 212 | struct uprobe *uprobe; |
225 | loff_t addr; | ||
226 | int ret; | 213 | int ret; |
227 | 214 | retry: | |
228 | /* Read the page with vaddr into memory */ | 215 | /* Read the page with vaddr into memory */ |
229 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); | 216 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); |
230 | if (ret <= 0) | 217 | if (ret <= 0) |
@@ -246,10 +233,6 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
246 | if (mapping != vma->vm_file->f_mapping) | 233 | if (mapping != vma->vm_file->f_mapping) |
247 | goto put_out; | 234 | goto put_out; |
248 | 235 | ||
249 | addr = vma_address(vma, uprobe->offset); | ||
250 | if (vaddr != (unsigned long)addr) | ||
251 | goto put_out; | ||
252 | |||
253 | ret = -ENOMEM; | 236 | ret = -ENOMEM; |
254 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); | 237 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); |
255 | if (!new_page) | 238 | if (!new_page) |
@@ -267,11 +250,7 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
267 | vaddr_new = kmap_atomic(new_page); | 250 | vaddr_new = kmap_atomic(new_page); |
268 | 251 | ||
269 | memcpy(vaddr_new, vaddr_old, PAGE_SIZE); | 252 | memcpy(vaddr_new, vaddr_old, PAGE_SIZE); |
270 | 253 | memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE); | |
271 | /* poke the new insn in, ASSUMES we don't cross page boundary */ | ||
272 | vaddr &= ~PAGE_MASK; | ||
273 | BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); | ||
274 | memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); | ||
275 | 254 | ||
276 | kunmap_atomic(vaddr_new); | 255 | kunmap_atomic(vaddr_new); |
277 | kunmap_atomic(vaddr_old); | 256 | kunmap_atomic(vaddr_old); |
@@ -291,6 +270,8 @@ unlock_out: | |||
291 | put_out: | 270 | put_out: |
292 | put_page(old_page); | 271 | put_page(old_page); |
293 | 272 | ||
273 | if (unlikely(ret == -EAGAIN)) | ||
274 | goto retry; | ||
294 | return ret; | 275 | return ret; |
295 | } | 276 | } |
296 | 277 | ||
@@ -312,7 +293,7 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_ | |||
312 | void *vaddr_new; | 293 | void *vaddr_new; |
313 | int ret; | 294 | int ret; |
314 | 295 | ||
315 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL); | 296 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); |
316 | if (ret <= 0) | 297 | if (ret <= 0) |
317 | return ret; | 298 | return ret; |
318 | 299 | ||
@@ -333,10 +314,20 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | |||
333 | uprobe_opcode_t opcode; | 314 | uprobe_opcode_t opcode; |
334 | int result; | 315 | int result; |
335 | 316 | ||
317 | if (current->mm == mm) { | ||
318 | pagefault_disable(); | ||
319 | result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr, | ||
320 | sizeof(opcode)); | ||
321 | pagefault_enable(); | ||
322 | |||
323 | if (likely(result == 0)) | ||
324 | goto out; | ||
325 | } | ||
326 | |||
336 | result = read_opcode(mm, vaddr, &opcode); | 327 | result = read_opcode(mm, vaddr, &opcode); |
337 | if (result) | 328 | if (result) |
338 | return result; | 329 | return result; |
339 | 330 | out: | |
340 | if (is_swbp_insn(&opcode)) | 331 | if (is_swbp_insn(&opcode)) |
341 | return 1; | 332 | return 1; |
342 | 333 | ||
@@ -355,7 +346,9 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | |||
355 | int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) | 346 | int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) |
356 | { | 347 | { |
357 | int result; | 348 | int result; |
358 | 349 | /* | |
350 | * See the comment near uprobes_hash(). | ||
351 | */ | ||
359 | result = is_swbp_at_addr(mm, vaddr); | 352 | result = is_swbp_at_addr(mm, vaddr); |
360 | if (result == 1) | 353 | if (result == 1) |
361 | return -EEXIST; | 354 | return -EEXIST; |
@@ -520,7 +513,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) | |||
520 | uprobe->inode = igrab(inode); | 513 | uprobe->inode = igrab(inode); |
521 | uprobe->offset = offset; | 514 | uprobe->offset = offset; |
522 | init_rwsem(&uprobe->consumer_rwsem); | 515 | init_rwsem(&uprobe->consumer_rwsem); |
523 | INIT_LIST_HEAD(&uprobe->pending_list); | ||
524 | 516 | ||
525 | /* add to uprobes_tree, sorted on inode:offset */ | 517 | /* add to uprobes_tree, sorted on inode:offset */ |
526 | cur_uprobe = insert_uprobe(uprobe); | 518 | cur_uprobe = insert_uprobe(uprobe); |
@@ -588,20 +580,22 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) | |||
588 | } | 580 | } |
589 | 581 | ||
590 | static int | 582 | static int |
591 | __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn, | 583 | __copy_insn(struct address_space *mapping, struct file *filp, char *insn, |
592 | unsigned long nbytes, unsigned long offset) | 584 | unsigned long nbytes, loff_t offset) |
593 | { | 585 | { |
594 | struct file *filp = vma->vm_file; | ||
595 | struct page *page; | 586 | struct page *page; |
596 | void *vaddr; | 587 | void *vaddr; |
597 | unsigned long off1; | 588 | unsigned long off; |
598 | unsigned long idx; | 589 | pgoff_t idx; |
599 | 590 | ||
600 | if (!filp) | 591 | if (!filp) |
601 | return -EINVAL; | 592 | return -EINVAL; |
602 | 593 | ||
603 | idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT); | 594 | if (!mapping->a_ops->readpage) |
604 | off1 = offset &= ~PAGE_MASK; | 595 | return -EIO; |
596 | |||
597 | idx = offset >> PAGE_CACHE_SHIFT; | ||
598 | off = offset & ~PAGE_MASK; | ||
605 | 599 | ||
606 | /* | 600 | /* |
607 | * Ensure that the page that has the original instruction is | 601 | * Ensure that the page that has the original instruction is |
@@ -612,22 +606,20 @@ __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *ins | |||
612 | return PTR_ERR(page); | 606 | return PTR_ERR(page); |
613 | 607 | ||
614 | vaddr = kmap_atomic(page); | 608 | vaddr = kmap_atomic(page); |
615 | memcpy(insn, vaddr + off1, nbytes); | 609 | memcpy(insn, vaddr + off, nbytes); |
616 | kunmap_atomic(vaddr); | 610 | kunmap_atomic(vaddr); |
617 | page_cache_release(page); | 611 | page_cache_release(page); |
618 | 612 | ||
619 | return 0; | 613 | return 0; |
620 | } | 614 | } |
621 | 615 | ||
622 | static int | 616 | static int copy_insn(struct uprobe *uprobe, struct file *filp) |
623 | copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) | ||
624 | { | 617 | { |
625 | struct address_space *mapping; | 618 | struct address_space *mapping; |
626 | unsigned long nbytes; | 619 | unsigned long nbytes; |
627 | int bytes; | 620 | int bytes; |
628 | 621 | ||
629 | addr &= ~PAGE_MASK; | 622 | nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); |
630 | nbytes = PAGE_SIZE - addr; | ||
631 | mapping = uprobe->inode->i_mapping; | 623 | mapping = uprobe->inode->i_mapping; |
632 | 624 | ||
633 | /* Instruction at end of binary; copy only available bytes */ | 625 | /* Instruction at end of binary; copy only available bytes */ |
@@ -638,13 +630,13 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) | |||
638 | 630 | ||
639 | /* Instruction at the page-boundary; copy bytes in second page */ | 631 | /* Instruction at the page-boundary; copy bytes in second page */ |
640 | if (nbytes < bytes) { | 632 | if (nbytes < bytes) { |
641 | if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes, | 633 | int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, |
642 | bytes - nbytes, uprobe->offset + nbytes)) | 634 | bytes - nbytes, uprobe->offset + nbytes); |
643 | return -ENOMEM; | 635 | if (err) |
644 | 636 | return err; | |
645 | bytes = nbytes; | 637 | bytes = nbytes; |
646 | } | 638 | } |
647 | return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); | 639 | return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); |
648 | } | 640 | } |
649 | 641 | ||
650 | /* | 642 | /* |
@@ -672,9 +664,8 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) | |||
672 | */ | 664 | */ |
673 | static int | 665 | static int |
674 | install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | 666 | install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, |
675 | struct vm_area_struct *vma, loff_t vaddr) | 667 | struct vm_area_struct *vma, unsigned long vaddr) |
676 | { | 668 | { |
677 | unsigned long addr; | ||
678 | int ret; | 669 | int ret; |
679 | 670 | ||
680 | /* | 671 | /* |
@@ -687,20 +678,22 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
687 | if (!uprobe->consumers) | 678 | if (!uprobe->consumers) |
688 | return -EEXIST; | 679 | return -EEXIST; |
689 | 680 | ||
690 | addr = (unsigned long)vaddr; | ||
691 | |||
692 | if (!(uprobe->flags & UPROBE_COPY_INSN)) { | 681 | if (!(uprobe->flags & UPROBE_COPY_INSN)) { |
693 | ret = copy_insn(uprobe, vma, addr); | 682 | ret = copy_insn(uprobe, vma->vm_file); |
694 | if (ret) | 683 | if (ret) |
695 | return ret; | 684 | return ret; |
696 | 685 | ||
697 | if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) | 686 | if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) |
698 | return -EEXIST; | 687 | return -ENOTSUPP; |
699 | 688 | ||
700 | ret = arch_uprobe_analyze_insn(&uprobe->arch, mm); | 689 | ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); |
701 | if (ret) | 690 | if (ret) |
702 | return ret; | 691 | return ret; |
703 | 692 | ||
693 | /* write_opcode() assumes we don't cross page boundary */ | ||
694 | BUG_ON((uprobe->offset & ~PAGE_MASK) + | ||
695 | UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); | ||
696 | |||
704 | uprobe->flags |= UPROBE_COPY_INSN; | 697 | uprobe->flags |= UPROBE_COPY_INSN; |
705 | } | 698 | } |
706 | 699 | ||
@@ -713,7 +706,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
713 | * Hence increment before and decrement on failure. | 706 | * Hence increment before and decrement on failure. |
714 | */ | 707 | */ |
715 | atomic_inc(&mm->uprobes_state.count); | 708 | atomic_inc(&mm->uprobes_state.count); |
716 | ret = set_swbp(&uprobe->arch, mm, addr); | 709 | ret = set_swbp(&uprobe->arch, mm, vaddr); |
717 | if (ret) | 710 | if (ret) |
718 | atomic_dec(&mm->uprobes_state.count); | 711 | atomic_dec(&mm->uprobes_state.count); |
719 | 712 | ||
@@ -721,27 +714,21 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
721 | } | 714 | } |
722 | 715 | ||
723 | static void | 716 | static void |
724 | remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) | 717 | remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) |
725 | { | 718 | { |
726 | if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true)) | 719 | if (!set_orig_insn(&uprobe->arch, mm, vaddr, true)) |
727 | atomic_dec(&mm->uprobes_state.count); | 720 | atomic_dec(&mm->uprobes_state.count); |
728 | } | 721 | } |
729 | 722 | ||
730 | /* | 723 | /* |
731 | * There could be threads that have hit the breakpoint and are entering the | 724 | * There could be threads that have already hit the breakpoint. They |
732 | * notifier code and trying to acquire the uprobes_treelock. The thread | 725 | * will recheck the current insn and restart if find_uprobe() fails. |
733 | * calling delete_uprobe() that is removing the uprobe from the rb_tree can | 726 | * See find_active_uprobe(). |
734 | * race with these threads and might acquire the uprobes_treelock compared | ||
735 | * to some of the breakpoint hit threads. In such a case, the breakpoint | ||
736 | * hit threads will not find the uprobe. The current unregistering thread | ||
737 | * waits till all other threads have hit a breakpoint, to acquire the | ||
738 | * uprobes_treelock before the uprobe is removed from the rbtree. | ||
739 | */ | 727 | */ |
740 | static void delete_uprobe(struct uprobe *uprobe) | 728 | static void delete_uprobe(struct uprobe *uprobe) |
741 | { | 729 | { |
742 | unsigned long flags; | 730 | unsigned long flags; |
743 | 731 | ||
744 | synchronize_srcu(&uprobes_srcu); | ||
745 | spin_lock_irqsave(&uprobes_treelock, flags); | 732 | spin_lock_irqsave(&uprobes_treelock, flags); |
746 | rb_erase(&uprobe->rb_node, &uprobes_tree); | 733 | rb_erase(&uprobe->rb_node, &uprobes_tree); |
747 | spin_unlock_irqrestore(&uprobes_treelock, flags); | 734 | spin_unlock_irqrestore(&uprobes_treelock, flags); |
@@ -750,139 +737,135 @@ static void delete_uprobe(struct uprobe *uprobe) | |||
750 | atomic_dec(&uprobe_events); | 737 | atomic_dec(&uprobe_events); |
751 | } | 738 | } |
752 | 739 | ||
753 | static struct vma_info * | 740 | struct map_info { |
754 | __find_next_vma_info(struct address_space *mapping, struct list_head *head, | 741 | struct map_info *next; |
755 | struct vma_info *vi, loff_t offset, bool is_register) | 742 | struct mm_struct *mm; |
743 | unsigned long vaddr; | ||
744 | }; | ||
745 | |||
746 | static inline struct map_info *free_map_info(struct map_info *info) | ||
747 | { | ||
748 | struct map_info *next = info->next; | ||
749 | kfree(info); | ||
750 | return next; | ||
751 | } | ||
752 | |||
753 | static struct map_info * | ||
754 | build_map_info(struct address_space *mapping, loff_t offset, bool is_register) | ||
756 | { | 755 | { |
756 | unsigned long pgoff = offset >> PAGE_SHIFT; | ||
757 | struct prio_tree_iter iter; | 757 | struct prio_tree_iter iter; |
758 | struct vm_area_struct *vma; | 758 | struct vm_area_struct *vma; |
759 | struct vma_info *tmpvi; | 759 | struct map_info *curr = NULL; |
760 | unsigned long pgoff; | 760 | struct map_info *prev = NULL; |
761 | int existing_vma; | 761 | struct map_info *info; |
762 | loff_t vaddr; | 762 | int more = 0; |
763 | |||
764 | pgoff = offset >> PAGE_SHIFT; | ||
765 | 763 | ||
764 | again: | ||
765 | mutex_lock(&mapping->i_mmap_mutex); | ||
766 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 766 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
767 | if (!valid_vma(vma, is_register)) | 767 | if (!valid_vma(vma, is_register)) |
768 | continue; | 768 | continue; |
769 | 769 | ||
770 | existing_vma = 0; | 770 | if (!prev && !more) { |
771 | vaddr = vma_address(vma, offset); | 771 | /* |
772 | 772 | * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through | |
773 | list_for_each_entry(tmpvi, head, probe_list) { | 773 | * reclaim. This is optimistic, no harm done if it fails. |
774 | if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) { | 774 | */ |
775 | existing_vma = 1; | 775 | prev = kmalloc(sizeof(struct map_info), |
776 | break; | 776 | GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN); |
777 | } | 777 | if (prev) |
778 | prev->next = NULL; | ||
778 | } | 779 | } |
779 | 780 | if (!prev) { | |
780 | /* | 781 | more++; |
781 | * Another vma needs a probe to be installed. However skip | 782 | continue; |
782 | * installing the probe if the vma is about to be unlinked. | ||
783 | */ | ||
784 | if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) { | ||
785 | vi->mm = vma->vm_mm; | ||
786 | vi->vaddr = vaddr; | ||
787 | list_add(&vi->probe_list, head); | ||
788 | |||
789 | return vi; | ||
790 | } | 783 | } |
791 | } | ||
792 | 784 | ||
793 | return NULL; | 785 | if (!atomic_inc_not_zero(&vma->vm_mm->mm_users)) |
794 | } | 786 | continue; |
795 | |||
796 | /* | ||
797 | * Iterate in the rmap prio tree and find a vma where a probe has not | ||
798 | * yet been inserted. | ||
799 | */ | ||
800 | static struct vma_info * | ||
801 | find_next_vma_info(struct address_space *mapping, struct list_head *head, | ||
802 | loff_t offset, bool is_register) | ||
803 | { | ||
804 | struct vma_info *vi, *retvi; | ||
805 | 787 | ||
806 | vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL); | 788 | info = prev; |
807 | if (!vi) | 789 | prev = prev->next; |
808 | return ERR_PTR(-ENOMEM); | 790 | info->next = curr; |
791 | curr = info; | ||
809 | 792 | ||
810 | mutex_lock(&mapping->i_mmap_mutex); | 793 | info->mm = vma->vm_mm; |
811 | retvi = __find_next_vma_info(mapping, head, vi, offset, is_register); | 794 | info->vaddr = vma_address(vma, offset); |
795 | } | ||
812 | mutex_unlock(&mapping->i_mmap_mutex); | 796 | mutex_unlock(&mapping->i_mmap_mutex); |
813 | 797 | ||
814 | if (!retvi) | 798 | if (!more) |
815 | kfree(vi); | 799 | goto out; |
800 | |||
801 | prev = curr; | ||
802 | while (curr) { | ||
803 | mmput(curr->mm); | ||
804 | curr = curr->next; | ||
805 | } | ||
816 | 806 | ||
817 | return retvi; | 807 | do { |
808 | info = kmalloc(sizeof(struct map_info), GFP_KERNEL); | ||
809 | if (!info) { | ||
810 | curr = ERR_PTR(-ENOMEM); | ||
811 | goto out; | ||
812 | } | ||
813 | info->next = prev; | ||
814 | prev = info; | ||
815 | } while (--more); | ||
816 | |||
817 | goto again; | ||
818 | out: | ||
819 | while (prev) | ||
820 | prev = free_map_info(prev); | ||
821 | return curr; | ||
818 | } | 822 | } |
819 | 823 | ||
820 | static int register_for_each_vma(struct uprobe *uprobe, bool is_register) | 824 | static int register_for_each_vma(struct uprobe *uprobe, bool is_register) |
821 | { | 825 | { |
822 | struct list_head try_list; | 826 | struct map_info *info; |
823 | struct vm_area_struct *vma; | 827 | int err = 0; |
824 | struct address_space *mapping; | ||
825 | struct vma_info *vi, *tmpvi; | ||
826 | struct mm_struct *mm; | ||
827 | loff_t vaddr; | ||
828 | int ret; | ||
829 | 828 | ||
830 | mapping = uprobe->inode->i_mapping; | 829 | info = build_map_info(uprobe->inode->i_mapping, |
831 | INIT_LIST_HEAD(&try_list); | 830 | uprobe->offset, is_register); |
831 | if (IS_ERR(info)) | ||
832 | return PTR_ERR(info); | ||
832 | 833 | ||
833 | ret = 0; | 834 | while (info) { |
835 | struct mm_struct *mm = info->mm; | ||
836 | struct vm_area_struct *vma; | ||
834 | 837 | ||
835 | for (;;) { | 838 | if (err) |
836 | vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register); | 839 | goto free; |
837 | if (!vi) | ||
838 | break; | ||
839 | 840 | ||
840 | if (IS_ERR(vi)) { | 841 | down_write(&mm->mmap_sem); |
841 | ret = PTR_ERR(vi); | 842 | vma = find_vma(mm, (unsigned long)info->vaddr); |
842 | break; | 843 | if (!vma || !valid_vma(vma, is_register)) |
843 | } | 844 | goto unlock; |
844 | 845 | ||
845 | mm = vi->mm; | ||
846 | down_read(&mm->mmap_sem); | ||
847 | vma = find_vma(mm, (unsigned long)vi->vaddr); | ||
848 | if (!vma || !valid_vma(vma, is_register)) { | ||
849 | list_del(&vi->probe_list); | ||
850 | kfree(vi); | ||
851 | up_read(&mm->mmap_sem); | ||
852 | mmput(mm); | ||
853 | continue; | ||
854 | } | ||
855 | vaddr = vma_address(vma, uprobe->offset); | ||
856 | if (vma->vm_file->f_mapping->host != uprobe->inode || | 846 | if (vma->vm_file->f_mapping->host != uprobe->inode || |
857 | vaddr != vi->vaddr) { | 847 | vma_address(vma, uprobe->offset) != info->vaddr) |
858 | list_del(&vi->probe_list); | 848 | goto unlock; |
859 | kfree(vi); | ||
860 | up_read(&mm->mmap_sem); | ||
861 | mmput(mm); | ||
862 | continue; | ||
863 | } | ||
864 | |||
865 | if (is_register) | ||
866 | ret = install_breakpoint(uprobe, mm, vma, vi->vaddr); | ||
867 | else | ||
868 | remove_breakpoint(uprobe, mm, vi->vaddr); | ||
869 | 849 | ||
870 | up_read(&mm->mmap_sem); | ||
871 | mmput(mm); | ||
872 | if (is_register) { | 850 | if (is_register) { |
873 | if (ret && ret == -EEXIST) | 851 | err = install_breakpoint(uprobe, mm, vma, info->vaddr); |
874 | ret = 0; | 852 | /* |
875 | if (ret) | 853 | * We can race against uprobe_mmap(), see the |
876 | break; | 854 | * comment near uprobe_hash(). |
855 | */ | ||
856 | if (err == -EEXIST) | ||
857 | err = 0; | ||
858 | } else { | ||
859 | remove_breakpoint(uprobe, mm, info->vaddr); | ||
877 | } | 860 | } |
861 | unlock: | ||
862 | up_write(&mm->mmap_sem); | ||
863 | free: | ||
864 | mmput(mm); | ||
865 | info = free_map_info(info); | ||
878 | } | 866 | } |
879 | 867 | ||
880 | list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) { | 868 | return err; |
881 | list_del(&vi->probe_list); | ||
882 | kfree(vi); | ||
883 | } | ||
884 | |||
885 | return ret; | ||
886 | } | 869 | } |
887 | 870 | ||
888 | static int __uprobe_register(struct uprobe *uprobe) | 871 | static int __uprobe_register(struct uprobe *uprobe) |
@@ -1048,7 +1031,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head) | |||
1048 | int uprobe_mmap(struct vm_area_struct *vma) | 1031 | int uprobe_mmap(struct vm_area_struct *vma) |
1049 | { | 1032 | { |
1050 | struct list_head tmp_list; | 1033 | struct list_head tmp_list; |
1051 | struct uprobe *uprobe, *u; | 1034 | struct uprobe *uprobe; |
1052 | struct inode *inode; | 1035 | struct inode *inode; |
1053 | int ret, count; | 1036 | int ret, count; |
1054 | 1037 | ||
@@ -1066,12 +1049,9 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
1066 | ret = 0; | 1049 | ret = 0; |
1067 | count = 0; | 1050 | count = 0; |
1068 | 1051 | ||
1069 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { | 1052 | list_for_each_entry(uprobe, &tmp_list, pending_list) { |
1070 | loff_t vaddr; | ||
1071 | |||
1072 | list_del(&uprobe->pending_list); | ||
1073 | if (!ret) { | 1053 | if (!ret) { |
1074 | vaddr = vma_address(vma, uprobe->offset); | 1054 | loff_t vaddr = vma_address(vma, uprobe->offset); |
1075 | 1055 | ||
1076 | if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { | 1056 | if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { |
1077 | put_uprobe(uprobe); | 1057 | put_uprobe(uprobe); |
@@ -1079,8 +1059,10 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
1079 | } | 1059 | } |
1080 | 1060 | ||
1081 | ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); | 1061 | ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); |
1082 | 1062 | /* | |
1083 | /* Ignore double add: */ | 1063 | * We can race against uprobe_register(), see the |
1064 | * comment near uprobe_hash(). | ||
1065 | */ | ||
1084 | if (ret == -EEXIST) { | 1066 | if (ret == -EEXIST) { |
1085 | ret = 0; | 1067 | ret = 0; |
1086 | 1068 | ||
@@ -1115,7 +1097,7 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
1115 | void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) | 1097 | void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) |
1116 | { | 1098 | { |
1117 | struct list_head tmp_list; | 1099 | struct list_head tmp_list; |
1118 | struct uprobe *uprobe, *u; | 1100 | struct uprobe *uprobe; |
1119 | struct inode *inode; | 1101 | struct inode *inode; |
1120 | 1102 | ||
1121 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) | 1103 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) |
@@ -1132,11 +1114,8 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon | |||
1132 | mutex_lock(uprobes_mmap_hash(inode)); | 1114 | mutex_lock(uprobes_mmap_hash(inode)); |
1133 | build_probe_list(inode, &tmp_list); | 1115 | build_probe_list(inode, &tmp_list); |
1134 | 1116 | ||
1135 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { | 1117 | list_for_each_entry(uprobe, &tmp_list, pending_list) { |
1136 | loff_t vaddr; | 1118 | loff_t vaddr = vma_address(vma, uprobe->offset); |
1137 | |||
1138 | list_del(&uprobe->pending_list); | ||
1139 | vaddr = vma_address(vma, uprobe->offset); | ||
1140 | 1119 | ||
1141 | if (vaddr >= start && vaddr < end) { | 1120 | if (vaddr >= start && vaddr < end) { |
1142 | /* | 1121 | /* |
@@ -1378,9 +1357,6 @@ void uprobe_free_utask(struct task_struct *t) | |||
1378 | { | 1357 | { |
1379 | struct uprobe_task *utask = t->utask; | 1358 | struct uprobe_task *utask = t->utask; |
1380 | 1359 | ||
1381 | if (t->uprobe_srcu_id != -1) | ||
1382 | srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id); | ||
1383 | |||
1384 | if (!utask) | 1360 | if (!utask) |
1385 | return; | 1361 | return; |
1386 | 1362 | ||
@@ -1398,7 +1374,6 @@ void uprobe_free_utask(struct task_struct *t) | |||
1398 | void uprobe_copy_process(struct task_struct *t) | 1374 | void uprobe_copy_process(struct task_struct *t) |
1399 | { | 1375 | { |
1400 | t->utask = NULL; | 1376 | t->utask = NULL; |
1401 | t->uprobe_srcu_id = -1; | ||
1402 | } | 1377 | } |
1403 | 1378 | ||
1404 | /* | 1379 | /* |
@@ -1417,7 +1392,6 @@ static struct uprobe_task *add_utask(void) | |||
1417 | if (unlikely(!utask)) | 1392 | if (unlikely(!utask)) |
1418 | return NULL; | 1393 | return NULL; |
1419 | 1394 | ||
1420 | utask->active_uprobe = NULL; | ||
1421 | current->utask = utask; | 1395 | current->utask = utask; |
1422 | return utask; | 1396 | return utask; |
1423 | } | 1397 | } |
@@ -1479,41 +1453,64 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) | |||
1479 | return false; | 1453 | return false; |
1480 | } | 1454 | } |
1481 | 1455 | ||
1456 | static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) | ||
1457 | { | ||
1458 | struct mm_struct *mm = current->mm; | ||
1459 | struct uprobe *uprobe = NULL; | ||
1460 | struct vm_area_struct *vma; | ||
1461 | |||
1462 | down_read(&mm->mmap_sem); | ||
1463 | vma = find_vma(mm, bp_vaddr); | ||
1464 | if (vma && vma->vm_start <= bp_vaddr) { | ||
1465 | if (valid_vma(vma, false)) { | ||
1466 | struct inode *inode; | ||
1467 | loff_t offset; | ||
1468 | |||
1469 | inode = vma->vm_file->f_mapping->host; | ||
1470 | offset = bp_vaddr - vma->vm_start; | ||
1471 | offset += (vma->vm_pgoff << PAGE_SHIFT); | ||
1472 | uprobe = find_uprobe(inode, offset); | ||
1473 | } | ||
1474 | |||
1475 | if (!uprobe) | ||
1476 | *is_swbp = is_swbp_at_addr(mm, bp_vaddr); | ||
1477 | } else { | ||
1478 | *is_swbp = -EFAULT; | ||
1479 | } | ||
1480 | up_read(&mm->mmap_sem); | ||
1481 | |||
1482 | return uprobe; | ||
1483 | } | ||
1484 | |||
1482 | /* | 1485 | /* |
1483 | * Run handler and ask thread to singlestep. | 1486 | * Run handler and ask thread to singlestep. |
1484 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. | 1487 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. |
1485 | */ | 1488 | */ |
1486 | static void handle_swbp(struct pt_regs *regs) | 1489 | static void handle_swbp(struct pt_regs *regs) |
1487 | { | 1490 | { |
1488 | struct vm_area_struct *vma; | ||
1489 | struct uprobe_task *utask; | 1491 | struct uprobe_task *utask; |
1490 | struct uprobe *uprobe; | 1492 | struct uprobe *uprobe; |
1491 | struct mm_struct *mm; | ||
1492 | unsigned long bp_vaddr; | 1493 | unsigned long bp_vaddr; |
1494 | int uninitialized_var(is_swbp); | ||
1493 | 1495 | ||
1494 | uprobe = NULL; | ||
1495 | bp_vaddr = uprobe_get_swbp_addr(regs); | 1496 | bp_vaddr = uprobe_get_swbp_addr(regs); |
1496 | mm = current->mm; | 1497 | uprobe = find_active_uprobe(bp_vaddr, &is_swbp); |
1497 | down_read(&mm->mmap_sem); | ||
1498 | vma = find_vma(mm, bp_vaddr); | ||
1499 | |||
1500 | if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) { | ||
1501 | struct inode *inode; | ||
1502 | loff_t offset; | ||
1503 | |||
1504 | inode = vma->vm_file->f_mapping->host; | ||
1505 | offset = bp_vaddr - vma->vm_start; | ||
1506 | offset += (vma->vm_pgoff << PAGE_SHIFT); | ||
1507 | uprobe = find_uprobe(inode, offset); | ||
1508 | } | ||
1509 | |||
1510 | srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id); | ||
1511 | current->uprobe_srcu_id = -1; | ||
1512 | up_read(&mm->mmap_sem); | ||
1513 | 1498 | ||
1514 | if (!uprobe) { | 1499 | if (!uprobe) { |
1515 | /* No matching uprobe; signal SIGTRAP. */ | 1500 | if (is_swbp > 0) { |
1516 | send_sig(SIGTRAP, current, 0); | 1501 | /* No matching uprobe; signal SIGTRAP. */ |
1502 | send_sig(SIGTRAP, current, 0); | ||
1503 | } else { | ||
1504 | /* | ||
1505 | * Either we raced with uprobe_unregister() or we can't | ||
1506 | * access this memory. The latter is only possible if | ||
1507 | * another thread plays with our ->mm. In both cases | ||
1508 | * we can simply restart. If this vma was unmapped we | ||
1509 | * can pretend this insn was not executed yet and get | ||
1510 | * the (correct) SIGSEGV after restart. | ||
1511 | */ | ||
1512 | instruction_pointer_set(regs, bp_vaddr); | ||
1513 | } | ||
1517 | return; | 1514 | return; |
1518 | } | 1515 | } |
1519 | 1516 | ||
@@ -1620,7 +1617,6 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs) | |||
1620 | utask->state = UTASK_BP_HIT; | 1617 | utask->state = UTASK_BP_HIT; |
1621 | 1618 | ||
1622 | set_thread_flag(TIF_UPROBE); | 1619 | set_thread_flag(TIF_UPROBE); |
1623 | current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu); | ||
1624 | 1620 | ||
1625 | return 1; | 1621 | return 1; |
1626 | } | 1622 | } |
@@ -1655,7 +1651,6 @@ static int __init init_uprobes(void) | |||
1655 | mutex_init(&uprobes_mutex[i]); | 1651 | mutex_init(&uprobes_mutex[i]); |
1656 | mutex_init(&uprobes_mmap_mutex[i]); | 1652 | mutex_init(&uprobes_mmap_mutex[i]); |
1657 | } | 1653 | } |
1658 | init_srcu_struct(&uprobes_srcu); | ||
1659 | 1654 | ||
1660 | return register_die_notifier(&uprobe_exception_nb); | 1655 | return register_die_notifier(&uprobe_exception_nb); |
1661 | } | 1656 | } |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 8b53db38a279..238025f5472e 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/syscore_ops.h> | 27 | #include <linux/syscore_ops.h> |
28 | #include <linux/ctype.h> | 28 | #include <linux/ctype.h> |
29 | #include <linux/genhd.h> | 29 | #include <linux/genhd.h> |
30 | #include <scsi/scsi_scan.h> | ||
31 | 30 | ||
32 | #include "power.h" | 31 | #include "power.h" |
33 | 32 | ||
@@ -748,13 +747,6 @@ static int software_resume(void) | |||
748 | async_synchronize_full(); | 747 | async_synchronize_full(); |
749 | } | 748 | } |
750 | 749 | ||
751 | /* | ||
752 | * We can't depend on SCSI devices being available after loading | ||
753 | * one of their modules until scsi_complete_async_scans() is | ||
754 | * called and the resume device usually is a SCSI one. | ||
755 | */ | ||
756 | scsi_complete_async_scans(); | ||
757 | |||
758 | swsusp_resume_device = name_to_dev_t(resume_file); | 750 | swsusp_resume_device = name_to_dev_t(resume_file); |
759 | if (!swsusp_resume_device) { | 751 | if (!swsusp_resume_device) { |
760 | error = -ENODEV; | 752 | error = -ENODEV; |
diff --git a/kernel/power/user.c b/kernel/power/user.c index 91b0fd021a95..4ed81e74f86f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <linux/console.h> | 24 | #include <linux/console.h> |
25 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
26 | #include <linux/freezer.h> | 26 | #include <linux/freezer.h> |
27 | #include <scsi/scsi_scan.h> | ||
28 | 27 | ||
29 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
30 | 29 | ||
@@ -84,7 +83,6 @@ static int snapshot_open(struct inode *inode, struct file *filp) | |||
84 | * appear. | 83 | * appear. |
85 | */ | 84 | */ |
86 | wait_for_device_probe(); | 85 | wait_for_device_probe(); |
87 | scsi_complete_async_scans(); | ||
88 | 86 | ||
89 | data->swap = -1; | 87 | data->swap = -1; |
90 | data->mode = O_WRONLY; | 88 | data->mode = O_WRONLY; |
diff --git a/kernel/printk.c b/kernel/printk.c index 177fa49357a5..ac4bc9e79465 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -1192,21 +1192,6 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) | |||
1192 | return do_syslog(type, buf, len, SYSLOG_FROM_CALL); | 1192 | return do_syslog(type, buf, len, SYSLOG_FROM_CALL); |
1193 | } | 1193 | } |
1194 | 1194 | ||
1195 | #ifdef CONFIG_KGDB_KDB | ||
1196 | /* kdb dmesg command needs access to the syslog buffer. do_syslog() | ||
1197 | * uses locks so it cannot be used during debugging. Just tell kdb | ||
1198 | * where the start and end of the physical and logical logs are. This | ||
1199 | * is equivalent to do_syslog(3). | ||
1200 | */ | ||
1201 | void kdb_syslog_data(char *syslog_data[4]) | ||
1202 | { | ||
1203 | syslog_data[0] = log_buf; | ||
1204 | syslog_data[1] = log_buf + log_buf_len; | ||
1205 | syslog_data[2] = log_buf + log_first_idx; | ||
1206 | syslog_data[3] = log_buf + log_next_idx; | ||
1207 | } | ||
1208 | #endif /* CONFIG_KGDB_KDB */ | ||
1209 | |||
1210 | static bool __read_mostly ignore_loglevel; | 1195 | static bool __read_mostly ignore_loglevel; |
1211 | 1196 | ||
1212 | static int __init ignore_loglevel_setup(char *str) | 1197 | static int __init ignore_loglevel_setup(char *str) |
@@ -2525,7 +2510,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) | |||
2525 | } | 2510 | } |
2526 | 2511 | ||
2527 | /** | 2512 | /** |
2528 | * kmsg_dump_get_line - retrieve one kmsg log line | 2513 | * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) |
2529 | * @dumper: registered kmsg dumper | 2514 | * @dumper: registered kmsg dumper |
2530 | * @syslog: include the "<4>" prefixes | 2515 | * @syslog: include the "<4>" prefixes |
2531 | * @line: buffer to copy the line to | 2516 | * @line: buffer to copy the line to |
@@ -2540,11 +2525,12 @@ void kmsg_dump(enum kmsg_dump_reason reason) | |||
2540 | * | 2525 | * |
2541 | * A return value of FALSE indicates that there are no more records to | 2526 | * A return value of FALSE indicates that there are no more records to |
2542 | * read. | 2527 | * read. |
2528 | * | ||
2529 | * The function is similar to kmsg_dump_get_line(), but grabs no locks. | ||
2543 | */ | 2530 | */ |
2544 | bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, | 2531 | bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, |
2545 | char *line, size_t size, size_t *len) | 2532 | char *line, size_t size, size_t *len) |
2546 | { | 2533 | { |
2547 | unsigned long flags; | ||
2548 | struct log *msg; | 2534 | struct log *msg; |
2549 | size_t l = 0; | 2535 | size_t l = 0; |
2550 | bool ret = false; | 2536 | bool ret = false; |
@@ -2552,7 +2538,6 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, | |||
2552 | if (!dumper->active) | 2538 | if (!dumper->active) |
2553 | goto out; | 2539 | goto out; |
2554 | 2540 | ||
2555 | raw_spin_lock_irqsave(&logbuf_lock, flags); | ||
2556 | if (dumper->cur_seq < log_first_seq) { | 2541 | if (dumper->cur_seq < log_first_seq) { |
2557 | /* messages are gone, move to first available one */ | 2542 | /* messages are gone, move to first available one */ |
2558 | dumper->cur_seq = log_first_seq; | 2543 | dumper->cur_seq = log_first_seq; |
@@ -2560,10 +2545,8 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, | |||
2560 | } | 2545 | } |
2561 | 2546 | ||
2562 | /* last entry */ | 2547 | /* last entry */ |
2563 | if (dumper->cur_seq >= log_next_seq) { | 2548 | if (dumper->cur_seq >= log_next_seq) |
2564 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | ||
2565 | goto out; | 2549 | goto out; |
2566 | } | ||
2567 | 2550 | ||
2568 | msg = log_from_idx(dumper->cur_idx); | 2551 | msg = log_from_idx(dumper->cur_idx); |
2569 | l = msg_print_text(msg, 0, syslog, line, size); | 2552 | l = msg_print_text(msg, 0, syslog, line, size); |
@@ -2571,12 +2554,41 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, | |||
2571 | dumper->cur_idx = log_next(dumper->cur_idx); | 2554 | dumper->cur_idx = log_next(dumper->cur_idx); |
2572 | dumper->cur_seq++; | 2555 | dumper->cur_seq++; |
2573 | ret = true; | 2556 | ret = true; |
2574 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | ||
2575 | out: | 2557 | out: |
2576 | if (len) | 2558 | if (len) |
2577 | *len = l; | 2559 | *len = l; |
2578 | return ret; | 2560 | return ret; |
2579 | } | 2561 | } |
2562 | |||
2563 | /** | ||
2564 | * kmsg_dump_get_line - retrieve one kmsg log line | ||
2565 | * @dumper: registered kmsg dumper | ||
2566 | * @syslog: include the "<4>" prefixes | ||
2567 | * @line: buffer to copy the line to | ||
2568 | * @size: maximum size of the buffer | ||
2569 | * @len: length of line placed into buffer | ||
2570 | * | ||
2571 | * Start at the beginning of the kmsg buffer, with the oldest kmsg | ||
2572 | * record, and copy one record into the provided buffer. | ||
2573 | * | ||
2574 | * Consecutive calls will return the next available record moving | ||
2575 | * towards the end of the buffer with the youngest messages. | ||
2576 | * | ||
2577 | * A return value of FALSE indicates that there are no more records to | ||
2578 | * read. | ||
2579 | */ | ||
2580 | bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, | ||
2581 | char *line, size_t size, size_t *len) | ||
2582 | { | ||
2583 | unsigned long flags; | ||
2584 | bool ret; | ||
2585 | |||
2586 | raw_spin_lock_irqsave(&logbuf_lock, flags); | ||
2587 | ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); | ||
2588 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | ||
2589 | |||
2590 | return ret; | ||
2591 | } | ||
2580 | EXPORT_SYMBOL_GPL(kmsg_dump_get_line); | 2592 | EXPORT_SYMBOL_GPL(kmsg_dump_get_line); |
2581 | 2593 | ||
2582 | /** | 2594 | /** |
@@ -2679,6 +2691,24 @@ out: | |||
2679 | EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); | 2691 | EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); |
2680 | 2692 | ||
2681 | /** | 2693 | /** |
2694 | * kmsg_dump_rewind_nolock - reset the interator (unlocked version) | ||
2695 | * @dumper: registered kmsg dumper | ||
2696 | * | ||
2697 | * Reset the dumper's iterator so that kmsg_dump_get_line() and | ||
2698 | * kmsg_dump_get_buffer() can be called again and used multiple | ||
2699 | * times within the same dumper.dump() callback. | ||
2700 | * | ||
2701 | * The function is similar to kmsg_dump_rewind(), but grabs no locks. | ||
2702 | */ | ||
2703 | void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) | ||
2704 | { | ||
2705 | dumper->cur_seq = clear_seq; | ||
2706 | dumper->cur_idx = clear_idx; | ||
2707 | dumper->next_seq = log_next_seq; | ||
2708 | dumper->next_idx = log_next_idx; | ||
2709 | } | ||
2710 | |||
2711 | /** | ||
2682 | * kmsg_dump_rewind - reset the interator | 2712 | * kmsg_dump_rewind - reset the interator |
2683 | * @dumper: registered kmsg dumper | 2713 | * @dumper: registered kmsg dumper |
2684 | * | 2714 | * |
@@ -2691,10 +2721,7 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper) | |||
2691 | unsigned long flags; | 2721 | unsigned long flags; |
2692 | 2722 | ||
2693 | raw_spin_lock_irqsave(&logbuf_lock, flags); | 2723 | raw_spin_lock_irqsave(&logbuf_lock, flags); |
2694 | dumper->cur_seq = clear_seq; | 2724 | kmsg_dump_rewind_nolock(dumper); |
2695 | dumper->cur_idx = clear_idx; | ||
2696 | dumper->next_seq = log_next_seq; | ||
2697 | dumper->next_idx = log_next_idx; | ||
2698 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | 2725 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); |
2699 | } | 2726 | } |
2700 | EXPORT_SYMBOL_GPL(kmsg_dump_rewind); | 2727 | EXPORT_SYMBOL_GPL(kmsg_dump_rewind); |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 95cba41ce1e9..4e6a61b15e86 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -54,6 +54,50 @@ | |||
54 | #ifdef CONFIG_PREEMPT_RCU | 54 | #ifdef CONFIG_PREEMPT_RCU |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * Preemptible RCU implementation for rcu_read_lock(). | ||
58 | * Just increment ->rcu_read_lock_nesting, shared state will be updated | ||
59 | * if we block. | ||
60 | */ | ||
61 | void __rcu_read_lock(void) | ||
62 | { | ||
63 | current->rcu_read_lock_nesting++; | ||
64 | barrier(); /* critical section after entry code. */ | ||
65 | } | ||
66 | EXPORT_SYMBOL_GPL(__rcu_read_lock); | ||
67 | |||
68 | /* | ||
69 | * Preemptible RCU implementation for rcu_read_unlock(). | ||
70 | * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost | ||
71 | * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then | ||
72 | * invoke rcu_read_unlock_special() to clean up after a context switch | ||
73 | * in an RCU read-side critical section and other special cases. | ||
74 | */ | ||
75 | void __rcu_read_unlock(void) | ||
76 | { | ||
77 | struct task_struct *t = current; | ||
78 | |||
79 | if (t->rcu_read_lock_nesting != 1) { | ||
80 | --t->rcu_read_lock_nesting; | ||
81 | } else { | ||
82 | barrier(); /* critical section before exit code. */ | ||
83 | t->rcu_read_lock_nesting = INT_MIN; | ||
84 | barrier(); /* assign before ->rcu_read_unlock_special load */ | ||
85 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | ||
86 | rcu_read_unlock_special(t); | ||
87 | barrier(); /* ->rcu_read_unlock_special load before assign */ | ||
88 | t->rcu_read_lock_nesting = 0; | ||
89 | } | ||
90 | #ifdef CONFIG_PROVE_LOCKING | ||
91 | { | ||
92 | int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); | ||
93 | |||
94 | WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); | ||
95 | } | ||
96 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | ||
97 | } | ||
98 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | ||
99 | |||
100 | /* | ||
57 | * Check for a task exiting while in a preemptible-RCU read-side | 101 | * Check for a task exiting while in a preemptible-RCU read-side |
58 | * critical section, clean up if so. No need to issue warnings, | 102 | * critical section, clean up if so. No need to issue warnings, |
59 | * as debug_check_no_locks_held() already does this if lockdep | 103 | * as debug_check_no_locks_held() already does this if lockdep |
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 37a5444204d2..547b1fe5b052 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
@@ -172,7 +172,7 @@ void rcu_irq_enter(void) | |||
172 | local_irq_restore(flags); | 172 | local_irq_restore(flags); |
173 | } | 173 | } |
174 | 174 | ||
175 | #ifdef CONFIG_PROVE_RCU | 175 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
176 | 176 | ||
177 | /* | 177 | /* |
178 | * Test whether RCU thinks that the current CPU is idle. | 178 | * Test whether RCU thinks that the current CPU is idle. |
@@ -183,7 +183,7 @@ int rcu_is_cpu_idle(void) | |||
183 | } | 183 | } |
184 | EXPORT_SYMBOL(rcu_is_cpu_idle); | 184 | EXPORT_SYMBOL(rcu_is_cpu_idle); |
185 | 185 | ||
186 | #endif /* #ifdef CONFIG_PROVE_RCU */ | 186 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
187 | 187 | ||
188 | /* | 188 | /* |
189 | * Test whether the current CPU was interrupted from idle. Nested | 189 | * Test whether the current CPU was interrupted from idle. Nested |
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index fc31a2d65100..918fd1e8509c 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
@@ -132,7 +132,6 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { | |||
132 | RCU_TRACE(.rcb.name = "rcu_preempt") | 132 | RCU_TRACE(.rcb.name = "rcu_preempt") |
133 | }; | 133 | }; |
134 | 134 | ||
135 | static void rcu_read_unlock_special(struct task_struct *t); | ||
136 | static int rcu_preempted_readers_exp(void); | 135 | static int rcu_preempted_readers_exp(void); |
137 | static void rcu_report_exp_done(void); | 136 | static void rcu_report_exp_done(void); |
138 | 137 | ||
@@ -351,8 +350,9 @@ static int rcu_initiate_boost(void) | |||
351 | rcu_preempt_ctrlblk.boost_tasks = | 350 | rcu_preempt_ctrlblk.boost_tasks = |
352 | rcu_preempt_ctrlblk.gp_tasks; | 351 | rcu_preempt_ctrlblk.gp_tasks; |
353 | invoke_rcu_callbacks(); | 352 | invoke_rcu_callbacks(); |
354 | } else | 353 | } else { |
355 | RCU_TRACE(rcu_initiate_boost_trace()); | 354 | RCU_TRACE(rcu_initiate_boost_trace()); |
355 | } | ||
356 | return 1; | 356 | return 1; |
357 | } | 357 | } |
358 | 358 | ||
@@ -527,23 +527,11 @@ void rcu_preempt_note_context_switch(void) | |||
527 | } | 527 | } |
528 | 528 | ||
529 | /* | 529 | /* |
530 | * Tiny-preemptible RCU implementation for rcu_read_lock(). | ||
531 | * Just increment ->rcu_read_lock_nesting, shared state will be updated | ||
532 | * if we block. | ||
533 | */ | ||
534 | void __rcu_read_lock(void) | ||
535 | { | ||
536 | current->rcu_read_lock_nesting++; | ||
537 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */ | ||
538 | } | ||
539 | EXPORT_SYMBOL_GPL(__rcu_read_lock); | ||
540 | |||
541 | /* | ||
542 | * Handle special cases during rcu_read_unlock(), such as needing to | 530 | * Handle special cases during rcu_read_unlock(), such as needing to |
543 | * notify RCU core processing or task having blocked during the RCU | 531 | * notify RCU core processing or task having blocked during the RCU |
544 | * read-side critical section. | 532 | * read-side critical section. |
545 | */ | 533 | */ |
546 | static noinline void rcu_read_unlock_special(struct task_struct *t) | 534 | void rcu_read_unlock_special(struct task_struct *t) |
547 | { | 535 | { |
548 | int empty; | 536 | int empty; |
549 | int empty_exp; | 537 | int empty_exp; |
@@ -627,38 +615,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
627 | } | 615 | } |
628 | 616 | ||
629 | /* | 617 | /* |
630 | * Tiny-preemptible RCU implementation for rcu_read_unlock(). | ||
631 | * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost | ||
632 | * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then | ||
633 | * invoke rcu_read_unlock_special() to clean up after a context switch | ||
634 | * in an RCU read-side critical section and other special cases. | ||
635 | */ | ||
636 | void __rcu_read_unlock(void) | ||
637 | { | ||
638 | struct task_struct *t = current; | ||
639 | |||
640 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ | ||
641 | if (t->rcu_read_lock_nesting != 1) | ||
642 | --t->rcu_read_lock_nesting; | ||
643 | else { | ||
644 | t->rcu_read_lock_nesting = INT_MIN; | ||
645 | barrier(); /* assign before ->rcu_read_unlock_special load */ | ||
646 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | ||
647 | rcu_read_unlock_special(t); | ||
648 | barrier(); /* ->rcu_read_unlock_special load before assign */ | ||
649 | t->rcu_read_lock_nesting = 0; | ||
650 | } | ||
651 | #ifdef CONFIG_PROVE_LOCKING | ||
652 | { | ||
653 | int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); | ||
654 | |||
655 | WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); | ||
656 | } | ||
657 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | ||
658 | } | ||
659 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | ||
660 | |||
661 | /* | ||
662 | * Check for a quiescent state from the current CPU. When a task blocks, | 618 | * Check for a quiescent state from the current CPU. When a task blocks, |
663 | * the task is recorded in the rcu_preempt_ctrlblk structure, which is | 619 | * the task is recorded in the rcu_preempt_ctrlblk structure, which is |
664 | * checked elsewhere. This is called from the scheduling-clock interrupt. | 620 | * checked elsewhere. This is called from the scheduling-clock interrupt. |
@@ -823,9 +779,9 @@ void synchronize_rcu_expedited(void) | |||
823 | rpcp->exp_tasks = NULL; | 779 | rpcp->exp_tasks = NULL; |
824 | 780 | ||
825 | /* Wait for tail of ->blkd_tasks list to drain. */ | 781 | /* Wait for tail of ->blkd_tasks list to drain. */ |
826 | if (!rcu_preempted_readers_exp()) | 782 | if (!rcu_preempted_readers_exp()) { |
827 | local_irq_restore(flags); | 783 | local_irq_restore(flags); |
828 | else { | 784 | } else { |
829 | rcu_initiate_boost(); | 785 | rcu_initiate_boost(); |
830 | local_irq_restore(flags); | 786 | local_irq_restore(flags); |
831 | wait_event(sync_rcu_preempt_exp_wq, | 787 | wait_event(sync_rcu_preempt_exp_wq, |
@@ -846,8 +802,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | |||
846 | */ | 802 | */ |
847 | int rcu_preempt_needs_cpu(void) | 803 | int rcu_preempt_needs_cpu(void) |
848 | { | 804 | { |
849 | if (!rcu_preempt_running_reader()) | ||
850 | rcu_preempt_cpu_qs(); | ||
851 | return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; | 805 | return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; |
852 | } | 806 | } |
853 | 807 | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index e66b34ab7555..25b15033c61f 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -49,8 +49,7 @@ | |||
49 | #include <asm/byteorder.h> | 49 | #include <asm/byteorder.h> |
50 | 50 | ||
51 | MODULE_LICENSE("GPL"); | 51 | MODULE_LICENSE("GPL"); |
52 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " | 52 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>"); |
53 | "Josh Triplett <josh@freedesktop.org>"); | ||
54 | 53 | ||
55 | static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ | 54 | static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ |
56 | static int nfakewriters = 4; /* # fake writer threads */ | 55 | static int nfakewriters = 4; /* # fake writer threads */ |
@@ -206,6 +205,7 @@ static unsigned long boost_starttime; /* jiffies of next boost test start. */ | |||
206 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ | 205 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ |
207 | /* and boost task create/destroy. */ | 206 | /* and boost task create/destroy. */ |
208 | static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ | 207 | static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ |
208 | static bool barrier_phase; /* Test phase. */ | ||
209 | static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ | 209 | static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ |
210 | static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ | 210 | static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ |
211 | static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); | 211 | static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); |
@@ -407,8 +407,9 @@ rcu_torture_cb(struct rcu_head *p) | |||
407 | if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { | 407 | if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { |
408 | rp->rtort_mbtest = 0; | 408 | rp->rtort_mbtest = 0; |
409 | rcu_torture_free(rp); | 409 | rcu_torture_free(rp); |
410 | } else | 410 | } else { |
411 | cur_ops->deferred_free(rp); | 411 | cur_ops->deferred_free(rp); |
412 | } | ||
412 | } | 413 | } |
413 | 414 | ||
414 | static int rcu_no_completed(void) | 415 | static int rcu_no_completed(void) |
@@ -635,6 +636,17 @@ static void srcu_torture_synchronize(void) | |||
635 | synchronize_srcu(&srcu_ctl); | 636 | synchronize_srcu(&srcu_ctl); |
636 | } | 637 | } |
637 | 638 | ||
639 | static void srcu_torture_call(struct rcu_head *head, | ||
640 | void (*func)(struct rcu_head *head)) | ||
641 | { | ||
642 | call_srcu(&srcu_ctl, head, func); | ||
643 | } | ||
644 | |||
645 | static void srcu_torture_barrier(void) | ||
646 | { | ||
647 | srcu_barrier(&srcu_ctl); | ||
648 | } | ||
649 | |||
638 | static int srcu_torture_stats(char *page) | 650 | static int srcu_torture_stats(char *page) |
639 | { | 651 | { |
640 | int cnt = 0; | 652 | int cnt = 0; |
@@ -661,8 +673,8 @@ static struct rcu_torture_ops srcu_ops = { | |||
661 | .completed = srcu_torture_completed, | 673 | .completed = srcu_torture_completed, |
662 | .deferred_free = srcu_torture_deferred_free, | 674 | .deferred_free = srcu_torture_deferred_free, |
663 | .sync = srcu_torture_synchronize, | 675 | .sync = srcu_torture_synchronize, |
664 | .call = NULL, | 676 | .call = srcu_torture_call, |
665 | .cb_barrier = NULL, | 677 | .cb_barrier = srcu_torture_barrier, |
666 | .stats = srcu_torture_stats, | 678 | .stats = srcu_torture_stats, |
667 | .name = "srcu" | 679 | .name = "srcu" |
668 | }; | 680 | }; |
@@ -1013,7 +1025,11 @@ rcu_torture_fakewriter(void *arg) | |||
1013 | do { | 1025 | do { |
1014 | schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); | 1026 | schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); |
1015 | udelay(rcu_random(&rand) & 0x3ff); | 1027 | udelay(rcu_random(&rand) & 0x3ff); |
1016 | cur_ops->sync(); | 1028 | if (cur_ops->cb_barrier != NULL && |
1029 | rcu_random(&rand) % (nfakewriters * 8) == 0) | ||
1030 | cur_ops->cb_barrier(); | ||
1031 | else | ||
1032 | cur_ops->sync(); | ||
1017 | rcu_stutter_wait("rcu_torture_fakewriter"); | 1033 | rcu_stutter_wait("rcu_torture_fakewriter"); |
1018 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | 1034 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); |
1019 | 1035 | ||
@@ -1183,27 +1199,27 @@ rcu_torture_printk(char *page) | |||
1183 | } | 1199 | } |
1184 | cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); | 1200 | cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); |
1185 | cnt += sprintf(&page[cnt], | 1201 | cnt += sprintf(&page[cnt], |
1186 | "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " | 1202 | "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", |
1187 | "rtmbe: %d rtbke: %ld rtbre: %ld " | ||
1188 | "rtbf: %ld rtb: %ld nt: %ld " | ||
1189 | "onoff: %ld/%ld:%ld/%ld " | ||
1190 | "barrier: %ld/%ld:%ld", | ||
1191 | rcu_torture_current, | 1203 | rcu_torture_current, |
1192 | rcu_torture_current_version, | 1204 | rcu_torture_current_version, |
1193 | list_empty(&rcu_torture_freelist), | 1205 | list_empty(&rcu_torture_freelist), |
1194 | atomic_read(&n_rcu_torture_alloc), | 1206 | atomic_read(&n_rcu_torture_alloc), |
1195 | atomic_read(&n_rcu_torture_alloc_fail), | 1207 | atomic_read(&n_rcu_torture_alloc_fail), |
1196 | atomic_read(&n_rcu_torture_free), | 1208 | atomic_read(&n_rcu_torture_free)); |
1209 | cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ", | ||
1197 | atomic_read(&n_rcu_torture_mberror), | 1210 | atomic_read(&n_rcu_torture_mberror), |
1198 | n_rcu_torture_boost_ktrerror, | 1211 | n_rcu_torture_boost_ktrerror, |
1199 | n_rcu_torture_boost_rterror, | 1212 | n_rcu_torture_boost_rterror); |
1213 | cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ", | ||
1200 | n_rcu_torture_boost_failure, | 1214 | n_rcu_torture_boost_failure, |
1201 | n_rcu_torture_boosts, | 1215 | n_rcu_torture_boosts, |
1202 | n_rcu_torture_timers, | 1216 | n_rcu_torture_timers); |
1217 | cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ", | ||
1203 | n_online_successes, | 1218 | n_online_successes, |
1204 | n_online_attempts, | 1219 | n_online_attempts, |
1205 | n_offline_successes, | 1220 | n_offline_successes, |
1206 | n_offline_attempts, | 1221 | n_offline_attempts); |
1222 | cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld", | ||
1207 | n_barrier_successes, | 1223 | n_barrier_successes, |
1208 | n_barrier_attempts, | 1224 | n_barrier_attempts, |
1209 | n_rcu_torture_barrier_error); | 1225 | n_rcu_torture_barrier_error); |
@@ -1445,8 +1461,7 @@ rcu_torture_shutdown(void *arg) | |||
1445 | delta = shutdown_time - jiffies_snap; | 1461 | delta = shutdown_time - jiffies_snap; |
1446 | if (verbose) | 1462 | if (verbose) |
1447 | printk(KERN_ALERT "%s" TORTURE_FLAG | 1463 | printk(KERN_ALERT "%s" TORTURE_FLAG |
1448 | "rcu_torture_shutdown task: %lu " | 1464 | "rcu_torture_shutdown task: %lu jiffies remaining\n", |
1449 | "jiffies remaining\n", | ||
1450 | torture_type, delta); | 1465 | torture_type, delta); |
1451 | schedule_timeout_interruptible(delta); | 1466 | schedule_timeout_interruptible(delta); |
1452 | jiffies_snap = ACCESS_ONCE(jiffies); | 1467 | jiffies_snap = ACCESS_ONCE(jiffies); |
@@ -1498,8 +1513,7 @@ rcu_torture_onoff(void *arg) | |||
1498 | if (cpu_down(cpu) == 0) { | 1513 | if (cpu_down(cpu) == 0) { |
1499 | if (verbose) | 1514 | if (verbose) |
1500 | printk(KERN_ALERT "%s" TORTURE_FLAG | 1515 | printk(KERN_ALERT "%s" TORTURE_FLAG |
1501 | "rcu_torture_onoff task: " | 1516 | "rcu_torture_onoff task: offlined %d\n", |
1502 | "offlined %d\n", | ||
1503 | torture_type, cpu); | 1517 | torture_type, cpu); |
1504 | n_offline_successes++; | 1518 | n_offline_successes++; |
1505 | } | 1519 | } |
@@ -1512,8 +1526,7 @@ rcu_torture_onoff(void *arg) | |||
1512 | if (cpu_up(cpu) == 0) { | 1526 | if (cpu_up(cpu) == 0) { |
1513 | if (verbose) | 1527 | if (verbose) |
1514 | printk(KERN_ALERT "%s" TORTURE_FLAG | 1528 | printk(KERN_ALERT "%s" TORTURE_FLAG |
1515 | "rcu_torture_onoff task: " | 1529 | "rcu_torture_onoff task: onlined %d\n", |
1516 | "onlined %d\n", | ||
1517 | torture_type, cpu); | 1530 | torture_type, cpu); |
1518 | n_online_successes++; | 1531 | n_online_successes++; |
1519 | } | 1532 | } |
@@ -1631,6 +1644,7 @@ void rcu_torture_barrier_cbf(struct rcu_head *rcu) | |||
1631 | static int rcu_torture_barrier_cbs(void *arg) | 1644 | static int rcu_torture_barrier_cbs(void *arg) |
1632 | { | 1645 | { |
1633 | long myid = (long)arg; | 1646 | long myid = (long)arg; |
1647 | bool lastphase = 0; | ||
1634 | struct rcu_head rcu; | 1648 | struct rcu_head rcu; |
1635 | 1649 | ||
1636 | init_rcu_head_on_stack(&rcu); | 1650 | init_rcu_head_on_stack(&rcu); |
@@ -1638,9 +1652,11 @@ static int rcu_torture_barrier_cbs(void *arg) | |||
1638 | set_user_nice(current, 19); | 1652 | set_user_nice(current, 19); |
1639 | do { | 1653 | do { |
1640 | wait_event(barrier_cbs_wq[myid], | 1654 | wait_event(barrier_cbs_wq[myid], |
1641 | atomic_read(&barrier_cbs_count) == n_barrier_cbs || | 1655 | barrier_phase != lastphase || |
1642 | kthread_should_stop() || | 1656 | kthread_should_stop() || |
1643 | fullstop != FULLSTOP_DONTSTOP); | 1657 | fullstop != FULLSTOP_DONTSTOP); |
1658 | lastphase = barrier_phase; | ||
1659 | smp_mb(); /* ensure barrier_phase load before ->call(). */ | ||
1644 | if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) | 1660 | if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) |
1645 | break; | 1661 | break; |
1646 | cur_ops->call(&rcu, rcu_torture_barrier_cbf); | 1662 | cur_ops->call(&rcu, rcu_torture_barrier_cbf); |
@@ -1665,7 +1681,8 @@ static int rcu_torture_barrier(void *arg) | |||
1665 | do { | 1681 | do { |
1666 | atomic_set(&barrier_cbs_invoked, 0); | 1682 | atomic_set(&barrier_cbs_invoked, 0); |
1667 | atomic_set(&barrier_cbs_count, n_barrier_cbs); | 1683 | atomic_set(&barrier_cbs_count, n_barrier_cbs); |
1668 | /* wake_up() path contains the required barriers. */ | 1684 | smp_mb(); /* Ensure barrier_phase after prior assignments. */ |
1685 | barrier_phase = !barrier_phase; | ||
1669 | for (i = 0; i < n_barrier_cbs; i++) | 1686 | for (i = 0; i < n_barrier_cbs; i++) |
1670 | wake_up(&barrier_cbs_wq[i]); | 1687 | wake_up(&barrier_cbs_wq[i]); |
1671 | wait_event(barrier_wq, | 1688 | wait_event(barrier_wq, |
@@ -1684,7 +1701,7 @@ static int rcu_torture_barrier(void *arg) | |||
1684 | schedule_timeout_interruptible(HZ / 10); | 1701 | schedule_timeout_interruptible(HZ / 10); |
1685 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | 1702 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); |
1686 | VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); | 1703 | VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); |
1687 | rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); | 1704 | rcutorture_shutdown_absorb("rcu_torture_barrier"); |
1688 | while (!kthread_should_stop()) | 1705 | while (!kthread_should_stop()) |
1689 | schedule_timeout_interruptible(1); | 1706 | schedule_timeout_interruptible(1); |
1690 | return 0; | 1707 | return 0; |
@@ -1908,8 +1925,8 @@ rcu_torture_init(void) | |||
1908 | static struct rcu_torture_ops *torture_ops[] = | 1925 | static struct rcu_torture_ops *torture_ops[] = |
1909 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, | 1926 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, |
1910 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, | 1927 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, |
1911 | &srcu_ops, &srcu_sync_ops, &srcu_raw_ops, | 1928 | &srcu_ops, &srcu_sync_ops, &srcu_expedited_ops, |
1912 | &srcu_raw_sync_ops, &srcu_expedited_ops, | 1929 | &srcu_raw_ops, &srcu_raw_sync_ops, |
1913 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; | 1930 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; |
1914 | 1931 | ||
1915 | mutex_lock(&fullstop_mutex); | 1932 | mutex_lock(&fullstop_mutex); |
@@ -1931,8 +1948,7 @@ rcu_torture_init(void) | |||
1931 | return -EINVAL; | 1948 | return -EINVAL; |
1932 | } | 1949 | } |
1933 | if (cur_ops->fqs == NULL && fqs_duration != 0) { | 1950 | if (cur_ops->fqs == NULL && fqs_duration != 0) { |
1934 | printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero " | 1951 | printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); |
1935 | "fqs_duration, fqs disabled.\n"); | ||
1936 | fqs_duration = 0; | 1952 | fqs_duration = 0; |
1937 | } | 1953 | } |
1938 | if (cur_ops->init) | 1954 | if (cur_ops->init) |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396e..f280e542e3e9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -60,36 +60,44 @@ | |||
60 | 60 | ||
61 | /* Data structures. */ | 61 | /* Data structures. */ |
62 | 62 | ||
63 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | 63 | static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; |
64 | 64 | ||
65 | #define RCU_STATE_INITIALIZER(structname) { \ | 65 | #define RCU_STATE_INITIALIZER(sname, cr) { \ |
66 | .level = { &structname##_state.node[0] }, \ | 66 | .level = { &sname##_state.node[0] }, \ |
67 | .levelcnt = { \ | 67 | .call = cr, \ |
68 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ | ||
69 | NUM_RCU_LVL_1, \ | ||
70 | NUM_RCU_LVL_2, \ | ||
71 | NUM_RCU_LVL_3, \ | ||
72 | NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ | ||
73 | }, \ | ||
74 | .fqs_state = RCU_GP_IDLE, \ | 68 | .fqs_state = RCU_GP_IDLE, \ |
75 | .gpnum = -300, \ | 69 | .gpnum = -300, \ |
76 | .completed = -300, \ | 70 | .completed = -300, \ |
77 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ | 71 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ |
78 | .orphan_nxttail = &structname##_state.orphan_nxtlist, \ | 72 | .orphan_nxttail = &sname##_state.orphan_nxtlist, \ |
79 | .orphan_donetail = &structname##_state.orphan_donelist, \ | 73 | .orphan_donetail = &sname##_state.orphan_donelist, \ |
80 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ | 74 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ |
81 | .n_force_qs = 0, \ | 75 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ |
82 | .n_force_qs_ngp = 0, \ | 76 | .name = #sname, \ |
83 | .name = #structname, \ | ||
84 | } | 77 | } |
85 | 78 | ||
86 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); | 79 | struct rcu_state rcu_sched_state = |
80 | RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); | ||
87 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); | 81 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); |
88 | 82 | ||
89 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); | 83 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); |
90 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 84 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
91 | 85 | ||
92 | static struct rcu_state *rcu_state; | 86 | static struct rcu_state *rcu_state; |
87 | LIST_HEAD(rcu_struct_flavors); | ||
88 | |||
89 | /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ | ||
90 | static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; | ||
91 | module_param(rcu_fanout_leaf, int, 0); | ||
92 | int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; | ||
93 | static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ | ||
94 | NUM_RCU_LVL_0, | ||
95 | NUM_RCU_LVL_1, | ||
96 | NUM_RCU_LVL_2, | ||
97 | NUM_RCU_LVL_3, | ||
98 | NUM_RCU_LVL_4, | ||
99 | }; | ||
100 | int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ | ||
93 | 101 | ||
94 | /* | 102 | /* |
95 | * The rcu_scheduler_active variable transitions from zero to one just | 103 | * The rcu_scheduler_active variable transitions from zero to one just |
@@ -147,13 +155,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | |||
147 | unsigned long rcutorture_testseq; | 155 | unsigned long rcutorture_testseq; |
148 | unsigned long rcutorture_vernum; | 156 | unsigned long rcutorture_vernum; |
149 | 157 | ||
150 | /* State information for rcu_barrier() and friends. */ | ||
151 | |||
152 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; | ||
153 | static atomic_t rcu_barrier_cpu_count; | ||
154 | static DEFINE_MUTEX(rcu_barrier_mutex); | ||
155 | static struct completion rcu_barrier_completion; | ||
156 | |||
157 | /* | 158 | /* |
158 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s | 159 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s |
159 | * permit this function to be invoked without holding the root rcu_node | 160 | * permit this function to be invoked without holding the root rcu_node |
@@ -358,7 +359,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | |||
358 | struct task_struct *idle = idle_task(smp_processor_id()); | 359 | struct task_struct *idle = idle_task(smp_processor_id()); |
359 | 360 | ||
360 | trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); | 361 | trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); |
361 | ftrace_dump(DUMP_ALL); | 362 | ftrace_dump(DUMP_ORIG); |
362 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | 363 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", |
363 | current->pid, current->comm, | 364 | current->pid, current->comm, |
364 | idle->pid, idle->comm); /* must be idle task! */ | 365 | idle->pid, idle->comm); /* must be idle task! */ |
@@ -468,7 +469,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | |||
468 | 469 | ||
469 | trace_rcu_dyntick("Error on exit: not idle task", | 470 | trace_rcu_dyntick("Error on exit: not idle task", |
470 | oldval, rdtp->dynticks_nesting); | 471 | oldval, rdtp->dynticks_nesting); |
471 | ftrace_dump(DUMP_ALL); | 472 | ftrace_dump(DUMP_ORIG); |
472 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | 473 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", |
473 | current->pid, current->comm, | 474 | current->pid, current->comm, |
474 | idle->pid, idle->comm); /* must be idle task! */ | 475 | idle->pid, idle->comm); /* must be idle task! */ |
@@ -585,8 +586,6 @@ void rcu_nmi_exit(void) | |||
585 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 586 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
586 | } | 587 | } |
587 | 588 | ||
588 | #ifdef CONFIG_PROVE_RCU | ||
589 | |||
590 | /** | 589 | /** |
591 | * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle | 590 | * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle |
592 | * | 591 | * |
@@ -604,7 +603,7 @@ int rcu_is_cpu_idle(void) | |||
604 | } | 603 | } |
605 | EXPORT_SYMBOL(rcu_is_cpu_idle); | 604 | EXPORT_SYMBOL(rcu_is_cpu_idle); |
606 | 605 | ||
607 | #ifdef CONFIG_HOTPLUG_CPU | 606 | #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) |
608 | 607 | ||
609 | /* | 608 | /* |
610 | * Is the current CPU online? Disable preemption to avoid false positives | 609 | * Is the current CPU online? Disable preemption to avoid false positives |
@@ -645,9 +644,7 @@ bool rcu_lockdep_current_cpu_online(void) | |||
645 | } | 644 | } |
646 | EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); | 645 | EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); |
647 | 646 | ||
648 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 647 | #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ |
649 | |||
650 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
651 | 648 | ||
652 | /** | 649 | /** |
653 | * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle | 650 | * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle |
@@ -733,7 +730,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
733 | int cpu; | 730 | int cpu; |
734 | long delta; | 731 | long delta; |
735 | unsigned long flags; | 732 | unsigned long flags; |
736 | int ndetected; | 733 | int ndetected = 0; |
737 | struct rcu_node *rnp = rcu_get_root(rsp); | 734 | struct rcu_node *rnp = rcu_get_root(rsp); |
738 | 735 | ||
739 | /* Only let one CPU complain about others per time interval. */ | 736 | /* Only let one CPU complain about others per time interval. */ |
@@ -774,7 +771,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
774 | */ | 771 | */ |
775 | rnp = rcu_get_root(rsp); | 772 | rnp = rcu_get_root(rsp); |
776 | raw_spin_lock_irqsave(&rnp->lock, flags); | 773 | raw_spin_lock_irqsave(&rnp->lock, flags); |
777 | ndetected = rcu_print_task_stall(rnp); | 774 | ndetected += rcu_print_task_stall(rnp); |
778 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 775 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
779 | 776 | ||
780 | print_cpu_stall_info_end(); | 777 | print_cpu_stall_info_end(); |
@@ -860,9 +857,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) | |||
860 | */ | 857 | */ |
861 | void rcu_cpu_stall_reset(void) | 858 | void rcu_cpu_stall_reset(void) |
862 | { | 859 | { |
863 | rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; | 860 | struct rcu_state *rsp; |
864 | rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; | 861 | |
865 | rcu_preempt_stall_reset(); | 862 | for_each_rcu_flavor(rsp) |
863 | rsp->jiffies_stall = jiffies + ULONG_MAX / 2; | ||
866 | } | 864 | } |
867 | 865 | ||
868 | static struct notifier_block rcu_panic_block = { | 866 | static struct notifier_block rcu_panic_block = { |
@@ -894,8 +892,9 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct | |||
894 | if (rnp->qsmask & rdp->grpmask) { | 892 | if (rnp->qsmask & rdp->grpmask) { |
895 | rdp->qs_pending = 1; | 893 | rdp->qs_pending = 1; |
896 | rdp->passed_quiesce = 0; | 894 | rdp->passed_quiesce = 0; |
897 | } else | 895 | } else { |
898 | rdp->qs_pending = 0; | 896 | rdp->qs_pending = 0; |
897 | } | ||
899 | zero_cpu_stall_ticks(rdp); | 898 | zero_cpu_stall_ticks(rdp); |
900 | } | 899 | } |
901 | } | 900 | } |
@@ -937,6 +936,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) | |||
937 | } | 936 | } |
938 | 937 | ||
939 | /* | 938 | /* |
939 | * Initialize the specified rcu_data structure's callback list to empty. | ||
940 | */ | ||
941 | static void init_callback_list(struct rcu_data *rdp) | ||
942 | { | ||
943 | int i; | ||
944 | |||
945 | rdp->nxtlist = NULL; | ||
946 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
947 | rdp->nxttail[i] = &rdp->nxtlist; | ||
948 | } | ||
949 | |||
950 | /* | ||
940 | * Advance this CPU's callbacks, but only if the current grace period | 951 | * Advance this CPU's callbacks, but only if the current grace period |
941 | * has ended. This may be called only from the CPU to whom the rdp | 952 | * has ended. This may be called only from the CPU to whom the rdp |
942 | * belongs. In addition, the corresponding leaf rcu_node structure's | 953 | * belongs. In addition, the corresponding leaf rcu_node structure's |
@@ -1328,8 +1339,6 @@ static void | |||
1328 | rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | 1339 | rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, |
1329 | struct rcu_node *rnp, struct rcu_data *rdp) | 1340 | struct rcu_node *rnp, struct rcu_data *rdp) |
1330 | { | 1341 | { |
1331 | int i; | ||
1332 | |||
1333 | /* | 1342 | /* |
1334 | * Orphan the callbacks. First adjust the counts. This is safe | 1343 | * Orphan the callbacks. First adjust the counts. This is safe |
1335 | * because ->onofflock excludes _rcu_barrier()'s adoption of | 1344 | * because ->onofflock excludes _rcu_barrier()'s adoption of |
@@ -1340,7 +1349,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | |||
1340 | rsp->qlen += rdp->qlen; | 1349 | rsp->qlen += rdp->qlen; |
1341 | rdp->n_cbs_orphaned += rdp->qlen; | 1350 | rdp->n_cbs_orphaned += rdp->qlen; |
1342 | rdp->qlen_lazy = 0; | 1351 | rdp->qlen_lazy = 0; |
1343 | rdp->qlen = 0; | 1352 | ACCESS_ONCE(rdp->qlen) = 0; |
1344 | } | 1353 | } |
1345 | 1354 | ||
1346 | /* | 1355 | /* |
@@ -1369,9 +1378,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | |||
1369 | } | 1378 | } |
1370 | 1379 | ||
1371 | /* Finally, initialize the rcu_data structure's list to empty. */ | 1380 | /* Finally, initialize the rcu_data structure's list to empty. */ |
1372 | rdp->nxtlist = NULL; | 1381 | init_callback_list(rdp); |
1373 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
1374 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1375 | } | 1382 | } |
1376 | 1383 | ||
1377 | /* | 1384 | /* |
@@ -1505,6 +1512,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
1505 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1512 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1506 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1513 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
1507 | rcu_report_exp_rnp(rsp, rnp, true); | 1514 | rcu_report_exp_rnp(rsp, rnp, true); |
1515 | WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, | ||
1516 | "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", | ||
1517 | cpu, rdp->qlen, rdp->nxtlist); | ||
1508 | } | 1518 | } |
1509 | 1519 | ||
1510 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1520 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
@@ -1592,7 +1602,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1592 | } | 1602 | } |
1593 | smp_mb(); /* List handling before counting for rcu_barrier(). */ | 1603 | smp_mb(); /* List handling before counting for rcu_barrier(). */ |
1594 | rdp->qlen_lazy -= count_lazy; | 1604 | rdp->qlen_lazy -= count_lazy; |
1595 | rdp->qlen -= count; | 1605 | ACCESS_ONCE(rdp->qlen) -= count; |
1596 | rdp->n_cbs_invoked += count; | 1606 | rdp->n_cbs_invoked += count; |
1597 | 1607 | ||
1598 | /* Reinstate batch limit if we have worked down the excess. */ | 1608 | /* Reinstate batch limit if we have worked down the excess. */ |
@@ -1605,6 +1615,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1605 | rdp->n_force_qs_snap = rsp->n_force_qs; | 1615 | rdp->n_force_qs_snap = rsp->n_force_qs; |
1606 | } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) | 1616 | } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) |
1607 | rdp->qlen_last_fqs_check = rdp->qlen; | 1617 | rdp->qlen_last_fqs_check = rdp->qlen; |
1618 | WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0)); | ||
1608 | 1619 | ||
1609 | local_irq_restore(flags); | 1620 | local_irq_restore(flags); |
1610 | 1621 | ||
@@ -1745,8 +1756,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1745 | break; /* grace period idle or initializing, ignore. */ | 1756 | break; /* grace period idle or initializing, ignore. */ |
1746 | 1757 | ||
1747 | case RCU_SAVE_DYNTICK: | 1758 | case RCU_SAVE_DYNTICK: |
1748 | if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) | ||
1749 | break; /* So gcc recognizes the dead code. */ | ||
1750 | 1759 | ||
1751 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 1760 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
1752 | 1761 | ||
@@ -1788,9 +1797,10 @@ unlock_fqs_ret: | |||
1788 | * whom the rdp belongs. | 1797 | * whom the rdp belongs. |
1789 | */ | 1798 | */ |
1790 | static void | 1799 | static void |
1791 | __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 1800 | __rcu_process_callbacks(struct rcu_state *rsp) |
1792 | { | 1801 | { |
1793 | unsigned long flags; | 1802 | unsigned long flags; |
1803 | struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); | ||
1794 | 1804 | ||
1795 | WARN_ON_ONCE(rdp->beenonline == 0); | 1805 | WARN_ON_ONCE(rdp->beenonline == 0); |
1796 | 1806 | ||
@@ -1826,11 +1836,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1826 | */ | 1836 | */ |
1827 | static void rcu_process_callbacks(struct softirq_action *unused) | 1837 | static void rcu_process_callbacks(struct softirq_action *unused) |
1828 | { | 1838 | { |
1839 | struct rcu_state *rsp; | ||
1840 | |||
1829 | trace_rcu_utilization("Start RCU core"); | 1841 | trace_rcu_utilization("Start RCU core"); |
1830 | __rcu_process_callbacks(&rcu_sched_state, | 1842 | for_each_rcu_flavor(rsp) |
1831 | &__get_cpu_var(rcu_sched_data)); | 1843 | __rcu_process_callbacks(rsp); |
1832 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | ||
1833 | rcu_preempt_process_callbacks(); | ||
1834 | trace_rcu_utilization("End RCU core"); | 1844 | trace_rcu_utilization("End RCU core"); |
1835 | } | 1845 | } |
1836 | 1846 | ||
@@ -1857,6 +1867,56 @@ static void invoke_rcu_core(void) | |||
1857 | raise_softirq(RCU_SOFTIRQ); | 1867 | raise_softirq(RCU_SOFTIRQ); |
1858 | } | 1868 | } |
1859 | 1869 | ||
1870 | /* | ||
1871 | * Handle any core-RCU processing required by a call_rcu() invocation. | ||
1872 | */ | ||
1873 | static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, | ||
1874 | struct rcu_head *head, unsigned long flags) | ||
1875 | { | ||
1876 | /* | ||
1877 | * If called from an extended quiescent state, invoke the RCU | ||
1878 | * core in order to force a re-evaluation of RCU's idleness. | ||
1879 | */ | ||
1880 | if (rcu_is_cpu_idle() && cpu_online(smp_processor_id())) | ||
1881 | invoke_rcu_core(); | ||
1882 | |||
1883 | /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ | ||
1884 | if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) | ||
1885 | return; | ||
1886 | |||
1887 | /* | ||
1888 | * Force the grace period if too many callbacks or too long waiting. | ||
1889 | * Enforce hysteresis, and don't invoke force_quiescent_state() | ||
1890 | * if some other CPU has recently done so. Also, don't bother | ||
1891 | * invoking force_quiescent_state() if the newly enqueued callback | ||
1892 | * is the only one waiting for a grace period to complete. | ||
1893 | */ | ||
1894 | if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | ||
1895 | |||
1896 | /* Are we ignoring a completed grace period? */ | ||
1897 | rcu_process_gp_end(rsp, rdp); | ||
1898 | check_for_new_grace_period(rsp, rdp); | ||
1899 | |||
1900 | /* Start a new grace period if one not already started. */ | ||
1901 | if (!rcu_gp_in_progress(rsp)) { | ||
1902 | unsigned long nestflag; | ||
1903 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
1904 | |||
1905 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
1906 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | ||
1907 | } else { | ||
1908 | /* Give the grace period a kick. */ | ||
1909 | rdp->blimit = LONG_MAX; | ||
1910 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | ||
1911 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
1912 | force_quiescent_state(rsp, 0); | ||
1913 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
1914 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
1915 | } | ||
1916 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | ||
1917 | force_quiescent_state(rsp, 1); | ||
1918 | } | ||
1919 | |||
1860 | static void | 1920 | static void |
1861 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | 1921 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), |
1862 | struct rcu_state *rsp, bool lazy) | 1922 | struct rcu_state *rsp, bool lazy) |
@@ -1881,7 +1941,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1881 | rdp = this_cpu_ptr(rsp->rda); | 1941 | rdp = this_cpu_ptr(rsp->rda); |
1882 | 1942 | ||
1883 | /* Add the callback to our list. */ | 1943 | /* Add the callback to our list. */ |
1884 | rdp->qlen++; | 1944 | ACCESS_ONCE(rdp->qlen)++; |
1885 | if (lazy) | 1945 | if (lazy) |
1886 | rdp->qlen_lazy++; | 1946 | rdp->qlen_lazy++; |
1887 | else | 1947 | else |
@@ -1896,43 +1956,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1896 | else | 1956 | else |
1897 | trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); | 1957 | trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); |
1898 | 1958 | ||
1899 | /* If interrupts were disabled, don't dive into RCU core. */ | 1959 | /* Go handle any RCU core processing required. */ |
1900 | if (irqs_disabled_flags(flags)) { | 1960 | __call_rcu_core(rsp, rdp, head, flags); |
1901 | local_irq_restore(flags); | ||
1902 | return; | ||
1903 | } | ||
1904 | |||
1905 | /* | ||
1906 | * Force the grace period if too many callbacks or too long waiting. | ||
1907 | * Enforce hysteresis, and don't invoke force_quiescent_state() | ||
1908 | * if some other CPU has recently done so. Also, don't bother | ||
1909 | * invoking force_quiescent_state() if the newly enqueued callback | ||
1910 | * is the only one waiting for a grace period to complete. | ||
1911 | */ | ||
1912 | if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | ||
1913 | |||
1914 | /* Are we ignoring a completed grace period? */ | ||
1915 | rcu_process_gp_end(rsp, rdp); | ||
1916 | check_for_new_grace_period(rsp, rdp); | ||
1917 | |||
1918 | /* Start a new grace period if one not already started. */ | ||
1919 | if (!rcu_gp_in_progress(rsp)) { | ||
1920 | unsigned long nestflag; | ||
1921 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
1922 | |||
1923 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
1924 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | ||
1925 | } else { | ||
1926 | /* Give the grace period a kick. */ | ||
1927 | rdp->blimit = LONG_MAX; | ||
1928 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | ||
1929 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
1930 | force_quiescent_state(rsp, 0); | ||
1931 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
1932 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
1933 | } | ||
1934 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | ||
1935 | force_quiescent_state(rsp, 1); | ||
1936 | local_irq_restore(flags); | 1961 | local_irq_restore(flags); |
1937 | } | 1962 | } |
1938 | 1963 | ||
@@ -1962,28 +1987,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); | |||
1962 | * occasionally incorrectly indicate that there are multiple CPUs online | 1987 | * occasionally incorrectly indicate that there are multiple CPUs online |
1963 | * when there was in fact only one the whole time, as this just adds | 1988 | * when there was in fact only one the whole time, as this just adds |
1964 | * some overhead: RCU still operates correctly. | 1989 | * some overhead: RCU still operates correctly. |
1965 | * | ||
1966 | * Of course, sampling num_online_cpus() with preemption enabled can | ||
1967 | * give erroneous results if there are concurrent CPU-hotplug operations. | ||
1968 | * For example, given a demonic sequence of preemptions in num_online_cpus() | ||
1969 | * and CPU-hotplug operations, there could be two or more CPUs online at | ||
1970 | * all times, but num_online_cpus() might well return one (or even zero). | ||
1971 | * | ||
1972 | * However, all such demonic sequences require at least one CPU-offline | ||
1973 | * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer | ||
1974 | * is only a problem if there is an RCU read-side critical section executing | ||
1975 | * throughout. But RCU-sched and RCU-bh read-side critical sections | ||
1976 | * disable either preemption or bh, which prevents a CPU from going offline. | ||
1977 | * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return | ||
1978 | * that there is only one CPU when in fact there was more than one throughout | ||
1979 | * is when there were no RCU readers in the system. If there are no | ||
1980 | * RCU readers, the grace period by definition can be of zero length, | ||
1981 | * regardless of the number of online CPUs. | ||
1982 | */ | 1990 | */ |
1983 | static inline int rcu_blocking_is_gp(void) | 1991 | static inline int rcu_blocking_is_gp(void) |
1984 | { | 1992 | { |
1993 | int ret; | ||
1994 | |||
1985 | might_sleep(); /* Check for RCU read-side critical section. */ | 1995 | might_sleep(); /* Check for RCU read-side critical section. */ |
1986 | return num_online_cpus() <= 1; | 1996 | preempt_disable(); |
1997 | ret = num_online_cpus() <= 1; | ||
1998 | preempt_enable(); | ||
1999 | return ret; | ||
1987 | } | 2000 | } |
1988 | 2001 | ||
1989 | /** | 2002 | /** |
@@ -2118,9 +2131,9 @@ void synchronize_sched_expedited(void) | |||
2118 | put_online_cpus(); | 2131 | put_online_cpus(); |
2119 | 2132 | ||
2120 | /* No joy, try again later. Or just synchronize_sched(). */ | 2133 | /* No joy, try again later. Or just synchronize_sched(). */ |
2121 | if (trycount++ < 10) | 2134 | if (trycount++ < 10) { |
2122 | udelay(trycount * num_online_cpus()); | 2135 | udelay(trycount * num_online_cpus()); |
2123 | else { | 2136 | } else { |
2124 | synchronize_sched(); | 2137 | synchronize_sched(); |
2125 | return; | 2138 | return; |
2126 | } | 2139 | } |
@@ -2241,9 +2254,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
2241 | */ | 2254 | */ |
2242 | static int rcu_pending(int cpu) | 2255 | static int rcu_pending(int cpu) |
2243 | { | 2256 | { |
2244 | return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || | 2257 | struct rcu_state *rsp; |
2245 | __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || | 2258 | |
2246 | rcu_preempt_pending(cpu); | 2259 | for_each_rcu_flavor(rsp) |
2260 | if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu))) | ||
2261 | return 1; | ||
2262 | return 0; | ||
2247 | } | 2263 | } |
2248 | 2264 | ||
2249 | /* | 2265 | /* |
@@ -2253,20 +2269,41 @@ static int rcu_pending(int cpu) | |||
2253 | */ | 2269 | */ |
2254 | static int rcu_cpu_has_callbacks(int cpu) | 2270 | static int rcu_cpu_has_callbacks(int cpu) |
2255 | { | 2271 | { |
2272 | struct rcu_state *rsp; | ||
2273 | |||
2256 | /* RCU callbacks either ready or pending? */ | 2274 | /* RCU callbacks either ready or pending? */ |
2257 | return per_cpu(rcu_sched_data, cpu).nxtlist || | 2275 | for_each_rcu_flavor(rsp) |
2258 | per_cpu(rcu_bh_data, cpu).nxtlist || | 2276 | if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) |
2259 | rcu_preempt_cpu_has_callbacks(cpu); | 2277 | return 1; |
2278 | return 0; | ||
2279 | } | ||
2280 | |||
2281 | /* | ||
2282 | * Helper function for _rcu_barrier() tracing. If tracing is disabled, | ||
2283 | * the compiler is expected to optimize this away. | ||
2284 | */ | ||
2285 | static void _rcu_barrier_trace(struct rcu_state *rsp, char *s, | ||
2286 | int cpu, unsigned long done) | ||
2287 | { | ||
2288 | trace_rcu_barrier(rsp->name, s, cpu, | ||
2289 | atomic_read(&rsp->barrier_cpu_count), done); | ||
2260 | } | 2290 | } |
2261 | 2291 | ||
2262 | /* | 2292 | /* |
2263 | * RCU callback function for _rcu_barrier(). If we are last, wake | 2293 | * RCU callback function for _rcu_barrier(). If we are last, wake |
2264 | * up the task executing _rcu_barrier(). | 2294 | * up the task executing _rcu_barrier(). |
2265 | */ | 2295 | */ |
2266 | static void rcu_barrier_callback(struct rcu_head *notused) | 2296 | static void rcu_barrier_callback(struct rcu_head *rhp) |
2267 | { | 2297 | { |
2268 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 2298 | struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); |
2269 | complete(&rcu_barrier_completion); | 2299 | struct rcu_state *rsp = rdp->rsp; |
2300 | |||
2301 | if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { | ||
2302 | _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done); | ||
2303 | complete(&rsp->barrier_completion); | ||
2304 | } else { | ||
2305 | _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done); | ||
2306 | } | ||
2270 | } | 2307 | } |
2271 | 2308 | ||
2272 | /* | 2309 | /* |
@@ -2274,35 +2311,63 @@ static void rcu_barrier_callback(struct rcu_head *notused) | |||
2274 | */ | 2311 | */ |
2275 | static void rcu_barrier_func(void *type) | 2312 | static void rcu_barrier_func(void *type) |
2276 | { | 2313 | { |
2277 | int cpu = smp_processor_id(); | 2314 | struct rcu_state *rsp = type; |
2278 | struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); | 2315 | struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); |
2279 | void (*call_rcu_func)(struct rcu_head *head, | ||
2280 | void (*func)(struct rcu_head *head)); | ||
2281 | 2316 | ||
2282 | atomic_inc(&rcu_barrier_cpu_count); | 2317 | _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); |
2283 | call_rcu_func = type; | 2318 | atomic_inc(&rsp->barrier_cpu_count); |
2284 | call_rcu_func(head, rcu_barrier_callback); | 2319 | rsp->call(&rdp->barrier_head, rcu_barrier_callback); |
2285 | } | 2320 | } |
2286 | 2321 | ||
2287 | /* | 2322 | /* |
2288 | * Orchestrate the specified type of RCU barrier, waiting for all | 2323 | * Orchestrate the specified type of RCU barrier, waiting for all |
2289 | * RCU callbacks of the specified type to complete. | 2324 | * RCU callbacks of the specified type to complete. |
2290 | */ | 2325 | */ |
2291 | static void _rcu_barrier(struct rcu_state *rsp, | 2326 | static void _rcu_barrier(struct rcu_state *rsp) |
2292 | void (*call_rcu_func)(struct rcu_head *head, | ||
2293 | void (*func)(struct rcu_head *head))) | ||
2294 | { | 2327 | { |
2295 | int cpu; | 2328 | int cpu; |
2296 | unsigned long flags; | 2329 | unsigned long flags; |
2297 | struct rcu_data *rdp; | 2330 | struct rcu_data *rdp; |
2298 | struct rcu_head rh; | 2331 | struct rcu_data rd; |
2332 | unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); | ||
2333 | unsigned long snap_done; | ||
2299 | 2334 | ||
2300 | init_rcu_head_on_stack(&rh); | 2335 | init_rcu_head_on_stack(&rd.barrier_head); |
2336 | _rcu_barrier_trace(rsp, "Begin", -1, snap); | ||
2301 | 2337 | ||
2302 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ | 2338 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ |
2303 | mutex_lock(&rcu_barrier_mutex); | 2339 | mutex_lock(&rsp->barrier_mutex); |
2340 | |||
2341 | /* | ||
2342 | * Ensure that all prior references, including to ->n_barrier_done, | ||
2343 | * are ordered before the _rcu_barrier() machinery. | ||
2344 | */ | ||
2345 | smp_mb(); /* See above block comment. */ | ||
2346 | |||
2347 | /* | ||
2348 | * Recheck ->n_barrier_done to see if others did our work for us. | ||
2349 | * This means checking ->n_barrier_done for an even-to-odd-to-even | ||
2350 | * transition. The "if" expression below therefore rounds the old | ||
2351 | * value up to the next even number and adds two before comparing. | ||
2352 | */ | ||
2353 | snap_done = ACCESS_ONCE(rsp->n_barrier_done); | ||
2354 | _rcu_barrier_trace(rsp, "Check", -1, snap_done); | ||
2355 | if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { | ||
2356 | _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done); | ||
2357 | smp_mb(); /* caller's subsequent code after above check. */ | ||
2358 | mutex_unlock(&rsp->barrier_mutex); | ||
2359 | return; | ||
2360 | } | ||
2304 | 2361 | ||
2305 | smp_mb(); /* Prevent any prior operations from leaking in. */ | 2362 | /* |
2363 | * Increment ->n_barrier_done to avoid duplicate work. Use | ||
2364 | * ACCESS_ONCE() to prevent the compiler from speculating | ||
2365 | * the increment to precede the early-exit check. | ||
2366 | */ | ||
2367 | ACCESS_ONCE(rsp->n_barrier_done)++; | ||
2368 | WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); | ||
2369 | _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done); | ||
2370 | smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ | ||
2306 | 2371 | ||
2307 | /* | 2372 | /* |
2308 | * Initialize the count to one rather than to zero in order to | 2373 | * Initialize the count to one rather than to zero in order to |
@@ -2321,8 +2386,8 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
2321 | * 6. Both rcu_barrier_callback() callbacks are invoked, awakening | 2386 | * 6. Both rcu_barrier_callback() callbacks are invoked, awakening |
2322 | * us -- but before CPU 1's orphaned callbacks are invoked!!! | 2387 | * us -- but before CPU 1's orphaned callbacks are invoked!!! |
2323 | */ | 2388 | */ |
2324 | init_completion(&rcu_barrier_completion); | 2389 | init_completion(&rsp->barrier_completion); |
2325 | atomic_set(&rcu_barrier_cpu_count, 1); | 2390 | atomic_set(&rsp->barrier_cpu_count, 1); |
2326 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 2391 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
2327 | rsp->rcu_barrier_in_progress = current; | 2392 | rsp->rcu_barrier_in_progress = current; |
2328 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 2393 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
@@ -2338,14 +2403,19 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
2338 | preempt_disable(); | 2403 | preempt_disable(); |
2339 | rdp = per_cpu_ptr(rsp->rda, cpu); | 2404 | rdp = per_cpu_ptr(rsp->rda, cpu); |
2340 | if (cpu_is_offline(cpu)) { | 2405 | if (cpu_is_offline(cpu)) { |
2406 | _rcu_barrier_trace(rsp, "Offline", cpu, | ||
2407 | rsp->n_barrier_done); | ||
2341 | preempt_enable(); | 2408 | preempt_enable(); |
2342 | while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) | 2409 | while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) |
2343 | schedule_timeout_interruptible(1); | 2410 | schedule_timeout_interruptible(1); |
2344 | } else if (ACCESS_ONCE(rdp->qlen)) { | 2411 | } else if (ACCESS_ONCE(rdp->qlen)) { |
2345 | smp_call_function_single(cpu, rcu_barrier_func, | 2412 | _rcu_barrier_trace(rsp, "OnlineQ", cpu, |
2346 | (void *)call_rcu_func, 1); | 2413 | rsp->n_barrier_done); |
2414 | smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); | ||
2347 | preempt_enable(); | 2415 | preempt_enable(); |
2348 | } else { | 2416 | } else { |
2417 | _rcu_barrier_trace(rsp, "OnlineNQ", cpu, | ||
2418 | rsp->n_barrier_done); | ||
2349 | preempt_enable(); | 2419 | preempt_enable(); |
2350 | } | 2420 | } |
2351 | } | 2421 | } |
@@ -2362,24 +2432,32 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
2362 | rcu_adopt_orphan_cbs(rsp); | 2432 | rcu_adopt_orphan_cbs(rsp); |
2363 | rsp->rcu_barrier_in_progress = NULL; | 2433 | rsp->rcu_barrier_in_progress = NULL; |
2364 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 2434 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
2365 | atomic_inc(&rcu_barrier_cpu_count); | 2435 | atomic_inc(&rsp->barrier_cpu_count); |
2366 | smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ | 2436 | smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ |
2367 | call_rcu_func(&rh, rcu_barrier_callback); | 2437 | rd.rsp = rsp; |
2438 | rsp->call(&rd.barrier_head, rcu_barrier_callback); | ||
2368 | 2439 | ||
2369 | /* | 2440 | /* |
2370 | * Now that we have an rcu_barrier_callback() callback on each | 2441 | * Now that we have an rcu_barrier_callback() callback on each |
2371 | * CPU, and thus each counted, remove the initial count. | 2442 | * CPU, and thus each counted, remove the initial count. |
2372 | */ | 2443 | */ |
2373 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 2444 | if (atomic_dec_and_test(&rsp->barrier_cpu_count)) |
2374 | complete(&rcu_barrier_completion); | 2445 | complete(&rsp->barrier_completion); |
2446 | |||
2447 | /* Increment ->n_barrier_done to prevent duplicate work. */ | ||
2448 | smp_mb(); /* Keep increment after above mechanism. */ | ||
2449 | ACCESS_ONCE(rsp->n_barrier_done)++; | ||
2450 | WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); | ||
2451 | _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done); | ||
2452 | smp_mb(); /* Keep increment before caller's subsequent code. */ | ||
2375 | 2453 | ||
2376 | /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ | 2454 | /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ |
2377 | wait_for_completion(&rcu_barrier_completion); | 2455 | wait_for_completion(&rsp->barrier_completion); |
2378 | 2456 | ||
2379 | /* Other rcu_barrier() invocations can now safely proceed. */ | 2457 | /* Other rcu_barrier() invocations can now safely proceed. */ |
2380 | mutex_unlock(&rcu_barrier_mutex); | 2458 | mutex_unlock(&rsp->barrier_mutex); |
2381 | 2459 | ||
2382 | destroy_rcu_head_on_stack(&rh); | 2460 | destroy_rcu_head_on_stack(&rd.barrier_head); |
2383 | } | 2461 | } |
2384 | 2462 | ||
2385 | /** | 2463 | /** |
@@ -2387,7 +2465,7 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
2387 | */ | 2465 | */ |
2388 | void rcu_barrier_bh(void) | 2466 | void rcu_barrier_bh(void) |
2389 | { | 2467 | { |
2390 | _rcu_barrier(&rcu_bh_state, call_rcu_bh); | 2468 | _rcu_barrier(&rcu_bh_state); |
2391 | } | 2469 | } |
2392 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | 2470 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); |
2393 | 2471 | ||
@@ -2396,7 +2474,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh); | |||
2396 | */ | 2474 | */ |
2397 | void rcu_barrier_sched(void) | 2475 | void rcu_barrier_sched(void) |
2398 | { | 2476 | { |
2399 | _rcu_barrier(&rcu_sched_state, call_rcu_sched); | 2477 | _rcu_barrier(&rcu_sched_state); |
2400 | } | 2478 | } |
2401 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | 2479 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); |
2402 | 2480 | ||
@@ -2407,18 +2485,15 @@ static void __init | |||
2407 | rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | 2485 | rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) |
2408 | { | 2486 | { |
2409 | unsigned long flags; | 2487 | unsigned long flags; |
2410 | int i; | ||
2411 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 2488 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
2412 | struct rcu_node *rnp = rcu_get_root(rsp); | 2489 | struct rcu_node *rnp = rcu_get_root(rsp); |
2413 | 2490 | ||
2414 | /* Set up local state, ensuring consistent view of global state. */ | 2491 | /* Set up local state, ensuring consistent view of global state. */ |
2415 | raw_spin_lock_irqsave(&rnp->lock, flags); | 2492 | raw_spin_lock_irqsave(&rnp->lock, flags); |
2416 | rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); | 2493 | rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); |
2417 | rdp->nxtlist = NULL; | 2494 | init_callback_list(rdp); |
2418 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
2419 | rdp->nxttail[i] = &rdp->nxtlist; | ||
2420 | rdp->qlen_lazy = 0; | 2495 | rdp->qlen_lazy = 0; |
2421 | rdp->qlen = 0; | 2496 | ACCESS_ONCE(rdp->qlen) = 0; |
2422 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 2497 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
2423 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); | 2498 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); |
2424 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); | 2499 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); |
@@ -2492,9 +2567,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2492 | 2567 | ||
2493 | static void __cpuinit rcu_prepare_cpu(int cpu) | 2568 | static void __cpuinit rcu_prepare_cpu(int cpu) |
2494 | { | 2569 | { |
2495 | rcu_init_percpu_data(cpu, &rcu_sched_state, 0); | 2570 | struct rcu_state *rsp; |
2496 | rcu_init_percpu_data(cpu, &rcu_bh_state, 0); | 2571 | |
2497 | rcu_preempt_init_percpu_data(cpu); | 2572 | for_each_rcu_flavor(rsp) |
2573 | rcu_init_percpu_data(cpu, rsp, | ||
2574 | strcmp(rsp->name, "rcu_preempt") == 0); | ||
2498 | } | 2575 | } |
2499 | 2576 | ||
2500 | /* | 2577 | /* |
@@ -2506,6 +2583,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2506 | long cpu = (long)hcpu; | 2583 | long cpu = (long)hcpu; |
2507 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 2584 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
2508 | struct rcu_node *rnp = rdp->mynode; | 2585 | struct rcu_node *rnp = rdp->mynode; |
2586 | struct rcu_state *rsp; | ||
2509 | 2587 | ||
2510 | trace_rcu_utilization("Start CPU hotplug"); | 2588 | trace_rcu_utilization("Start CPU hotplug"); |
2511 | switch (action) { | 2589 | switch (action) { |
@@ -2530,18 +2608,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2530 | * touch any data without introducing corruption. We send the | 2608 | * touch any data without introducing corruption. We send the |
2531 | * dying CPU's callbacks to an arbitrarily chosen online CPU. | 2609 | * dying CPU's callbacks to an arbitrarily chosen online CPU. |
2532 | */ | 2610 | */ |
2533 | rcu_cleanup_dying_cpu(&rcu_bh_state); | 2611 | for_each_rcu_flavor(rsp) |
2534 | rcu_cleanup_dying_cpu(&rcu_sched_state); | 2612 | rcu_cleanup_dying_cpu(rsp); |
2535 | rcu_preempt_cleanup_dying_cpu(); | ||
2536 | rcu_cleanup_after_idle(cpu); | 2613 | rcu_cleanup_after_idle(cpu); |
2537 | break; | 2614 | break; |
2538 | case CPU_DEAD: | 2615 | case CPU_DEAD: |
2539 | case CPU_DEAD_FROZEN: | 2616 | case CPU_DEAD_FROZEN: |
2540 | case CPU_UP_CANCELED: | 2617 | case CPU_UP_CANCELED: |
2541 | case CPU_UP_CANCELED_FROZEN: | 2618 | case CPU_UP_CANCELED_FROZEN: |
2542 | rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); | 2619 | for_each_rcu_flavor(rsp) |
2543 | rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); | 2620 | rcu_cleanup_dead_cpu(cpu, rsp); |
2544 | rcu_preempt_cleanup_dead_cpu(cpu); | ||
2545 | break; | 2621 | break; |
2546 | default: | 2622 | default: |
2547 | break; | 2623 | break; |
@@ -2574,9 +2650,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
2574 | { | 2650 | { |
2575 | int i; | 2651 | int i; |
2576 | 2652 | ||
2577 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) | 2653 | for (i = rcu_num_lvls - 1; i > 0; i--) |
2578 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 2654 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
2579 | rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; | 2655 | rsp->levelspread[0] = rcu_fanout_leaf; |
2580 | } | 2656 | } |
2581 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 2657 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ |
2582 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 2658 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
@@ -2586,7 +2662,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
2586 | int i; | 2662 | int i; |
2587 | 2663 | ||
2588 | cprv = NR_CPUS; | 2664 | cprv = NR_CPUS; |
2589 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { | 2665 | for (i = rcu_num_lvls - 1; i >= 0; i--) { |
2590 | ccur = rsp->levelcnt[i]; | 2666 | ccur = rsp->levelcnt[i]; |
2591 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; | 2667 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; |
2592 | cprv = ccur; | 2668 | cprv = ccur; |
@@ -2613,13 +2689,15 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
2613 | 2689 | ||
2614 | /* Initialize the level-tracking arrays. */ | 2690 | /* Initialize the level-tracking arrays. */ |
2615 | 2691 | ||
2616 | for (i = 1; i < NUM_RCU_LVLS; i++) | 2692 | for (i = 0; i < rcu_num_lvls; i++) |
2693 | rsp->levelcnt[i] = num_rcu_lvl[i]; | ||
2694 | for (i = 1; i < rcu_num_lvls; i++) | ||
2617 | rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; | 2695 | rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; |
2618 | rcu_init_levelspread(rsp); | 2696 | rcu_init_levelspread(rsp); |
2619 | 2697 | ||
2620 | /* Initialize the elements themselves, starting from the leaves. */ | 2698 | /* Initialize the elements themselves, starting from the leaves. */ |
2621 | 2699 | ||
2622 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { | 2700 | for (i = rcu_num_lvls - 1; i >= 0; i--) { |
2623 | cpustride *= rsp->levelspread[i]; | 2701 | cpustride *= rsp->levelspread[i]; |
2624 | rnp = rsp->level[i]; | 2702 | rnp = rsp->level[i]; |
2625 | for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { | 2703 | for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { |
@@ -2649,13 +2727,74 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
2649 | } | 2727 | } |
2650 | 2728 | ||
2651 | rsp->rda = rda; | 2729 | rsp->rda = rda; |
2652 | rnp = rsp->level[NUM_RCU_LVLS - 1]; | 2730 | rnp = rsp->level[rcu_num_lvls - 1]; |
2653 | for_each_possible_cpu(i) { | 2731 | for_each_possible_cpu(i) { |
2654 | while (i > rnp->grphi) | 2732 | while (i > rnp->grphi) |
2655 | rnp++; | 2733 | rnp++; |
2656 | per_cpu_ptr(rsp->rda, i)->mynode = rnp; | 2734 | per_cpu_ptr(rsp->rda, i)->mynode = rnp; |
2657 | rcu_boot_init_percpu_data(i, rsp); | 2735 | rcu_boot_init_percpu_data(i, rsp); |
2658 | } | 2736 | } |
2737 | list_add(&rsp->flavors, &rcu_struct_flavors); | ||
2738 | } | ||
2739 | |||
2740 | /* | ||
2741 | * Compute the rcu_node tree geometry from kernel parameters. This cannot | ||
2742 | * replace the definitions in rcutree.h because those are needed to size | ||
2743 | * the ->node array in the rcu_state structure. | ||
2744 | */ | ||
2745 | static void __init rcu_init_geometry(void) | ||
2746 | { | ||
2747 | int i; | ||
2748 | int j; | ||
2749 | int n = nr_cpu_ids; | ||
2750 | int rcu_capacity[MAX_RCU_LVLS + 1]; | ||
2751 | |||
2752 | /* If the compile-time values are accurate, just leave. */ | ||
2753 | if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF) | ||
2754 | return; | ||
2755 | |||
2756 | /* | ||
2757 | * Compute number of nodes that can be handled an rcu_node tree | ||
2758 | * with the given number of levels. Setting rcu_capacity[0] makes | ||
2759 | * some of the arithmetic easier. | ||
2760 | */ | ||
2761 | rcu_capacity[0] = 1; | ||
2762 | rcu_capacity[1] = rcu_fanout_leaf; | ||
2763 | for (i = 2; i <= MAX_RCU_LVLS; i++) | ||
2764 | rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; | ||
2765 | |||
2766 | /* | ||
2767 | * The boot-time rcu_fanout_leaf parameter is only permitted | ||
2768 | * to increase the leaf-level fanout, not decrease it. Of course, | ||
2769 | * the leaf-level fanout cannot exceed the number of bits in | ||
2770 | * the rcu_node masks. Finally, the tree must be able to accommodate | ||
2771 | * the configured number of CPUs. Complain and fall back to the | ||
2772 | * compile-time values if these limits are exceeded. | ||
2773 | */ | ||
2774 | if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || | ||
2775 | rcu_fanout_leaf > sizeof(unsigned long) * 8 || | ||
2776 | n > rcu_capacity[MAX_RCU_LVLS]) { | ||
2777 | WARN_ON(1); | ||
2778 | return; | ||
2779 | } | ||
2780 | |||
2781 | /* Calculate the number of rcu_nodes at each level of the tree. */ | ||
2782 | for (i = 1; i <= MAX_RCU_LVLS; i++) | ||
2783 | if (n <= rcu_capacity[i]) { | ||
2784 | for (j = 0; j <= i; j++) | ||
2785 | num_rcu_lvl[j] = | ||
2786 | DIV_ROUND_UP(n, rcu_capacity[i - j]); | ||
2787 | rcu_num_lvls = i; | ||
2788 | for (j = i + 1; j <= MAX_RCU_LVLS; j++) | ||
2789 | num_rcu_lvl[j] = 0; | ||
2790 | break; | ||
2791 | } | ||
2792 | |||
2793 | /* Calculate the total number of rcu_node structures. */ | ||
2794 | rcu_num_nodes = 0; | ||
2795 | for (i = 0; i <= MAX_RCU_LVLS; i++) | ||
2796 | rcu_num_nodes += num_rcu_lvl[i]; | ||
2797 | rcu_num_nodes -= n; | ||
2659 | } | 2798 | } |
2660 | 2799 | ||
2661 | void __init rcu_init(void) | 2800 | void __init rcu_init(void) |
@@ -2663,6 +2802,7 @@ void __init rcu_init(void) | |||
2663 | int cpu; | 2802 | int cpu; |
2664 | 2803 | ||
2665 | rcu_bootup_announce(); | 2804 | rcu_bootup_announce(); |
2805 | rcu_init_geometry(); | ||
2666 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); | 2806 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
2667 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); | 2807 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
2668 | __rcu_init_preempt(); | 2808 | __rcu_init_preempt(); |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 19b61ac1079f..4d29169f2124 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -42,28 +42,28 @@ | |||
42 | #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) | 42 | #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) |
43 | 43 | ||
44 | #if NR_CPUS <= RCU_FANOUT_1 | 44 | #if NR_CPUS <= RCU_FANOUT_1 |
45 | # define NUM_RCU_LVLS 1 | 45 | # define RCU_NUM_LVLS 1 |
46 | # define NUM_RCU_LVL_0 1 | 46 | # define NUM_RCU_LVL_0 1 |
47 | # define NUM_RCU_LVL_1 (NR_CPUS) | 47 | # define NUM_RCU_LVL_1 (NR_CPUS) |
48 | # define NUM_RCU_LVL_2 0 | 48 | # define NUM_RCU_LVL_2 0 |
49 | # define NUM_RCU_LVL_3 0 | 49 | # define NUM_RCU_LVL_3 0 |
50 | # define NUM_RCU_LVL_4 0 | 50 | # define NUM_RCU_LVL_4 0 |
51 | #elif NR_CPUS <= RCU_FANOUT_2 | 51 | #elif NR_CPUS <= RCU_FANOUT_2 |
52 | # define NUM_RCU_LVLS 2 | 52 | # define RCU_NUM_LVLS 2 |
53 | # define NUM_RCU_LVL_0 1 | 53 | # define NUM_RCU_LVL_0 1 |
54 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) | 54 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) |
55 | # define NUM_RCU_LVL_2 (NR_CPUS) | 55 | # define NUM_RCU_LVL_2 (NR_CPUS) |
56 | # define NUM_RCU_LVL_3 0 | 56 | # define NUM_RCU_LVL_3 0 |
57 | # define NUM_RCU_LVL_4 0 | 57 | # define NUM_RCU_LVL_4 0 |
58 | #elif NR_CPUS <= RCU_FANOUT_3 | 58 | #elif NR_CPUS <= RCU_FANOUT_3 |
59 | # define NUM_RCU_LVLS 3 | 59 | # define RCU_NUM_LVLS 3 |
60 | # define NUM_RCU_LVL_0 1 | 60 | # define NUM_RCU_LVL_0 1 |
61 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) | 61 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) |
62 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) | 62 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) |
63 | # define NUM_RCU_LVL_3 (NR_CPUS) | 63 | # define NUM_RCU_LVL_3 (NR_CPUS) |
64 | # define NUM_RCU_LVL_4 0 | 64 | # define NUM_RCU_LVL_4 0 |
65 | #elif NR_CPUS <= RCU_FANOUT_4 | 65 | #elif NR_CPUS <= RCU_FANOUT_4 |
66 | # define NUM_RCU_LVLS 4 | 66 | # define RCU_NUM_LVLS 4 |
67 | # define NUM_RCU_LVL_0 1 | 67 | # define NUM_RCU_LVL_0 1 |
68 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) | 68 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) |
69 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) | 69 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) |
@@ -76,6 +76,9 @@ | |||
76 | #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) | 76 | #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) |
77 | #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) | 77 | #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) |
78 | 78 | ||
79 | extern int rcu_num_lvls; | ||
80 | extern int rcu_num_nodes; | ||
81 | |||
79 | /* | 82 | /* |
80 | * Dynticks per-CPU state. | 83 | * Dynticks per-CPU state. |
81 | */ | 84 | */ |
@@ -97,6 +100,7 @@ struct rcu_dynticks { | |||
97 | /* # times non-lazy CBs posted to CPU. */ | 100 | /* # times non-lazy CBs posted to CPU. */ |
98 | unsigned long nonlazy_posted_snap; | 101 | unsigned long nonlazy_posted_snap; |
99 | /* idle-period nonlazy_posted snapshot. */ | 102 | /* idle-period nonlazy_posted snapshot. */ |
103 | int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ | ||
100 | #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ | 104 | #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ |
101 | }; | 105 | }; |
102 | 106 | ||
@@ -206,7 +210,7 @@ struct rcu_node { | |||
206 | */ | 210 | */ |
207 | #define rcu_for_each_node_breadth_first(rsp, rnp) \ | 211 | #define rcu_for_each_node_breadth_first(rsp, rnp) \ |
208 | for ((rnp) = &(rsp)->node[0]; \ | 212 | for ((rnp) = &(rsp)->node[0]; \ |
209 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) | 213 | (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) |
210 | 214 | ||
211 | /* | 215 | /* |
212 | * Do a breadth-first scan of the non-leaf rcu_node structures for the | 216 | * Do a breadth-first scan of the non-leaf rcu_node structures for the |
@@ -215,7 +219,7 @@ struct rcu_node { | |||
215 | */ | 219 | */ |
216 | #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ | 220 | #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ |
217 | for ((rnp) = &(rsp)->node[0]; \ | 221 | for ((rnp) = &(rsp)->node[0]; \ |
218 | (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) | 222 | (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++) |
219 | 223 | ||
220 | /* | 224 | /* |
221 | * Scan the leaves of the rcu_node hierarchy for the specified rcu_state | 225 | * Scan the leaves of the rcu_node hierarchy for the specified rcu_state |
@@ -224,8 +228,8 @@ struct rcu_node { | |||
224 | * It is still a leaf node, even if it is also the root node. | 228 | * It is still a leaf node, even if it is also the root node. |
225 | */ | 229 | */ |
226 | #define rcu_for_each_leaf_node(rsp, rnp) \ | 230 | #define rcu_for_each_leaf_node(rsp, rnp) \ |
227 | for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ | 231 | for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ |
228 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) | 232 | (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) |
229 | 233 | ||
230 | /* Index values for nxttail array in struct rcu_data. */ | 234 | /* Index values for nxttail array in struct rcu_data. */ |
231 | #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ | 235 | #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ |
@@ -311,6 +315,9 @@ struct rcu_data { | |||
311 | unsigned long n_rp_need_fqs; | 315 | unsigned long n_rp_need_fqs; |
312 | unsigned long n_rp_need_nothing; | 316 | unsigned long n_rp_need_nothing; |
313 | 317 | ||
318 | /* 6) _rcu_barrier() callback. */ | ||
319 | struct rcu_head barrier_head; | ||
320 | |||
314 | int cpu; | 321 | int cpu; |
315 | struct rcu_state *rsp; | 322 | struct rcu_state *rsp; |
316 | }; | 323 | }; |
@@ -357,10 +364,12 @@ do { \ | |||
357 | */ | 364 | */ |
358 | struct rcu_state { | 365 | struct rcu_state { |
359 | struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ | 366 | struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ |
360 | struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ | 367 | struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */ |
361 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ | 368 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ |
362 | u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ | 369 | u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ |
363 | struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ | 370 | struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ |
371 | void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ | ||
372 | void (*func)(struct rcu_head *head)); | ||
364 | 373 | ||
365 | /* The following fields are guarded by the root rcu_node's lock. */ | 374 | /* The following fields are guarded by the root rcu_node's lock. */ |
366 | 375 | ||
@@ -392,6 +401,11 @@ struct rcu_state { | |||
392 | struct task_struct *rcu_barrier_in_progress; | 401 | struct task_struct *rcu_barrier_in_progress; |
393 | /* Task doing rcu_barrier(), */ | 402 | /* Task doing rcu_barrier(), */ |
394 | /* or NULL if no barrier. */ | 403 | /* or NULL if no barrier. */ |
404 | struct mutex barrier_mutex; /* Guards barrier fields. */ | ||
405 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ | ||
406 | struct completion barrier_completion; /* Wake at barrier end. */ | ||
407 | unsigned long n_barrier_done; /* ++ at start and end of */ | ||
408 | /* _rcu_barrier(). */ | ||
395 | raw_spinlock_t fqslock; /* Only one task forcing */ | 409 | raw_spinlock_t fqslock; /* Only one task forcing */ |
396 | /* quiescent states. */ | 410 | /* quiescent states. */ |
397 | unsigned long jiffies_force_qs; /* Time at which to invoke */ | 411 | unsigned long jiffies_force_qs; /* Time at which to invoke */ |
@@ -409,8 +423,13 @@ struct rcu_state { | |||
409 | unsigned long gp_max; /* Maximum GP duration in */ | 423 | unsigned long gp_max; /* Maximum GP duration in */ |
410 | /* jiffies. */ | 424 | /* jiffies. */ |
411 | char *name; /* Name of structure. */ | 425 | char *name; /* Name of structure. */ |
426 | struct list_head flavors; /* List of RCU flavors. */ | ||
412 | }; | 427 | }; |
413 | 428 | ||
429 | extern struct list_head rcu_struct_flavors; | ||
430 | #define for_each_rcu_flavor(rsp) \ | ||
431 | list_for_each_entry((rsp), &rcu_struct_flavors, flavors) | ||
432 | |||
414 | /* Return values for rcu_preempt_offline_tasks(). */ | 433 | /* Return values for rcu_preempt_offline_tasks(). */ |
415 | 434 | ||
416 | #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ | 435 | #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ |
@@ -453,25 +472,18 @@ static void rcu_stop_cpu_kthread(int cpu); | |||
453 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 472 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
454 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); | 473 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); |
455 | static int rcu_print_task_stall(struct rcu_node *rnp); | 474 | static int rcu_print_task_stall(struct rcu_node *rnp); |
456 | static void rcu_preempt_stall_reset(void); | ||
457 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | 475 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); |
458 | #ifdef CONFIG_HOTPLUG_CPU | 476 | #ifdef CONFIG_HOTPLUG_CPU |
459 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | 477 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, |
460 | struct rcu_node *rnp, | 478 | struct rcu_node *rnp, |
461 | struct rcu_data *rdp); | 479 | struct rcu_data *rdp); |
462 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 480 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
463 | static void rcu_preempt_cleanup_dead_cpu(int cpu); | ||
464 | static void rcu_preempt_check_callbacks(int cpu); | 481 | static void rcu_preempt_check_callbacks(int cpu); |
465 | static void rcu_preempt_process_callbacks(void); | ||
466 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | 482 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); |
467 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) | 483 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) |
468 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | 484 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
469 | bool wake); | 485 | bool wake); |
470 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ | 486 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ |
471 | static int rcu_preempt_pending(int cpu); | ||
472 | static int rcu_preempt_cpu_has_callbacks(int cpu); | ||
473 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | ||
474 | static void rcu_preempt_cleanup_dying_cpu(void); | ||
475 | static void __init __rcu_init_preempt(void); | 487 | static void __init __rcu_init_preempt(void); |
476 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 488 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
477 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | 489 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3e4899459f3d..7f3244c0df01 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -68,17 +68,21 @@ static void __init rcu_bootup_announce_oddness(void) | |||
68 | printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n"); | 68 | printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n"); |
69 | #endif | 69 | #endif |
70 | #if NUM_RCU_LVL_4 != 0 | 70 | #if NUM_RCU_LVL_4 != 0 |
71 | printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); | 71 | printk(KERN_INFO "\tFour-level hierarchy is enabled.\n"); |
72 | #endif | 72 | #endif |
73 | if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) | ||
74 | printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); | ||
75 | if (nr_cpu_ids != NR_CPUS) | ||
76 | printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); | ||
73 | } | 77 | } |
74 | 78 | ||
75 | #ifdef CONFIG_TREE_PREEMPT_RCU | 79 | #ifdef CONFIG_TREE_PREEMPT_RCU |
76 | 80 | ||
77 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); | 81 | struct rcu_state rcu_preempt_state = |
82 | RCU_STATE_INITIALIZER(rcu_preempt, call_rcu); | ||
78 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 83 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
79 | static struct rcu_state *rcu_state = &rcu_preempt_state; | 84 | static struct rcu_state *rcu_state = &rcu_preempt_state; |
80 | 85 | ||
81 | static void rcu_read_unlock_special(struct task_struct *t); | ||
82 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); | 86 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); |
83 | 87 | ||
84 | /* | 88 | /* |
@@ -233,18 +237,6 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
233 | } | 237 | } |
234 | 238 | ||
235 | /* | 239 | /* |
236 | * Tree-preemptible RCU implementation for rcu_read_lock(). | ||
237 | * Just increment ->rcu_read_lock_nesting, shared state will be updated | ||
238 | * if we block. | ||
239 | */ | ||
240 | void __rcu_read_lock(void) | ||
241 | { | ||
242 | current->rcu_read_lock_nesting++; | ||
243 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ | ||
244 | } | ||
245 | EXPORT_SYMBOL_GPL(__rcu_read_lock); | ||
246 | |||
247 | /* | ||
248 | * Check for preempted RCU readers blocking the current grace period | 240 | * Check for preempted RCU readers blocking the current grace period |
249 | * for the specified rcu_node structure. If the caller needs a reliable | 241 | * for the specified rcu_node structure. If the caller needs a reliable |
250 | * answer, it must hold the rcu_node's ->lock. | 242 | * answer, it must hold the rcu_node's ->lock. |
@@ -310,7 +302,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t, | |||
310 | * notify RCU core processing or task having blocked during the RCU | 302 | * notify RCU core processing or task having blocked during the RCU |
311 | * read-side critical section. | 303 | * read-side critical section. |
312 | */ | 304 | */ |
313 | static noinline void rcu_read_unlock_special(struct task_struct *t) | 305 | void rcu_read_unlock_special(struct task_struct *t) |
314 | { | 306 | { |
315 | int empty; | 307 | int empty; |
316 | int empty_exp; | 308 | int empty_exp; |
@@ -398,8 +390,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
398 | rnp->grphi, | 390 | rnp->grphi, |
399 | !!rnp->gp_tasks); | 391 | !!rnp->gp_tasks); |
400 | rcu_report_unblock_qs_rnp(rnp, flags); | 392 | rcu_report_unblock_qs_rnp(rnp, flags); |
401 | } else | 393 | } else { |
402 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 394 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
395 | } | ||
403 | 396 | ||
404 | #ifdef CONFIG_RCU_BOOST | 397 | #ifdef CONFIG_RCU_BOOST |
405 | /* Unboost if we were boosted. */ | 398 | /* Unboost if we were boosted. */ |
@@ -418,38 +411,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
418 | } | 411 | } |
419 | } | 412 | } |
420 | 413 | ||
421 | /* | ||
422 | * Tree-preemptible RCU implementation for rcu_read_unlock(). | ||
423 | * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost | ||
424 | * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then | ||
425 | * invoke rcu_read_unlock_special() to clean up after a context switch | ||
426 | * in an RCU read-side critical section and other special cases. | ||
427 | */ | ||
428 | void __rcu_read_unlock(void) | ||
429 | { | ||
430 | struct task_struct *t = current; | ||
431 | |||
432 | if (t->rcu_read_lock_nesting != 1) | ||
433 | --t->rcu_read_lock_nesting; | ||
434 | else { | ||
435 | barrier(); /* critical section before exit code. */ | ||
436 | t->rcu_read_lock_nesting = INT_MIN; | ||
437 | barrier(); /* assign before ->rcu_read_unlock_special load */ | ||
438 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | ||
439 | rcu_read_unlock_special(t); | ||
440 | barrier(); /* ->rcu_read_unlock_special load before assign */ | ||
441 | t->rcu_read_lock_nesting = 0; | ||
442 | } | ||
443 | #ifdef CONFIG_PROVE_LOCKING | ||
444 | { | ||
445 | int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); | ||
446 | |||
447 | WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); | ||
448 | } | ||
449 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | ||
450 | } | ||
451 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | ||
452 | |||
453 | #ifdef CONFIG_RCU_CPU_STALL_VERBOSE | 414 | #ifdef CONFIG_RCU_CPU_STALL_VERBOSE |
454 | 415 | ||
455 | /* | 416 | /* |
@@ -540,16 +501,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp) | |||
540 | } | 501 | } |
541 | 502 | ||
542 | /* | 503 | /* |
543 | * Suppress preemptible RCU's CPU stall warnings by pushing the | ||
544 | * time of the next stall-warning message comfortably far into the | ||
545 | * future. | ||
546 | */ | ||
547 | static void rcu_preempt_stall_reset(void) | ||
548 | { | ||
549 | rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; | ||
550 | } | ||
551 | |||
552 | /* | ||
553 | * Check that the list of blocked tasks for the newly completed grace | 504 | * Check that the list of blocked tasks for the newly completed grace |
554 | * period is in fact empty. It is a serious bug to complete a grace | 505 | * period is in fact empty. It is a serious bug to complete a grace |
555 | * period that still has RCU readers blocked! This function must be | 506 | * period that still has RCU readers blocked! This function must be |
@@ -650,14 +601,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
650 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 601 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
651 | 602 | ||
652 | /* | 603 | /* |
653 | * Do CPU-offline processing for preemptible RCU. | ||
654 | */ | ||
655 | static void rcu_preempt_cleanup_dead_cpu(int cpu) | ||
656 | { | ||
657 | rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state); | ||
658 | } | ||
659 | |||
660 | /* | ||
661 | * Check for a quiescent state from the current CPU. When a task blocks, | 604 | * Check for a quiescent state from the current CPU. When a task blocks, |
662 | * the task is recorded in the corresponding CPU's rcu_node structure, | 605 | * the task is recorded in the corresponding CPU's rcu_node structure, |
663 | * which is checked elsewhere. | 606 | * which is checked elsewhere. |
@@ -677,15 +620,6 @@ static void rcu_preempt_check_callbacks(int cpu) | |||
677 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; | 620 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; |
678 | } | 621 | } |
679 | 622 | ||
680 | /* | ||
681 | * Process callbacks for preemptible RCU. | ||
682 | */ | ||
683 | static void rcu_preempt_process_callbacks(void) | ||
684 | { | ||
685 | __rcu_process_callbacks(&rcu_preempt_state, | ||
686 | &__get_cpu_var(rcu_preempt_data)); | ||
687 | } | ||
688 | |||
689 | #ifdef CONFIG_RCU_BOOST | 623 | #ifdef CONFIG_RCU_BOOST |
690 | 624 | ||
691 | static void rcu_preempt_do_callbacks(void) | 625 | static void rcu_preempt_do_callbacks(void) |
@@ -824,9 +758,9 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | |||
824 | int must_wait = 0; | 758 | int must_wait = 0; |
825 | 759 | ||
826 | raw_spin_lock_irqsave(&rnp->lock, flags); | 760 | raw_spin_lock_irqsave(&rnp->lock, flags); |
827 | if (list_empty(&rnp->blkd_tasks)) | 761 | if (list_empty(&rnp->blkd_tasks)) { |
828 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 762 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
829 | else { | 763 | } else { |
830 | rnp->exp_tasks = rnp->blkd_tasks.next; | 764 | rnp->exp_tasks = rnp->blkd_tasks.next; |
831 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ | 765 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ |
832 | must_wait = 1; | 766 | must_wait = 1; |
@@ -870,9 +804,9 @@ void synchronize_rcu_expedited(void) | |||
870 | * expedited grace period for us, just leave. | 804 | * expedited grace period for us, just leave. |
871 | */ | 805 | */ |
872 | while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { | 806 | while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { |
873 | if (trycount++ < 10) | 807 | if (trycount++ < 10) { |
874 | udelay(trycount * num_online_cpus()); | 808 | udelay(trycount * num_online_cpus()); |
875 | else { | 809 | } else { |
876 | synchronize_rcu(); | 810 | synchronize_rcu(); |
877 | return; | 811 | return; |
878 | } | 812 | } |
@@ -917,51 +851,16 @@ mb_ret: | |||
917 | } | 851 | } |
918 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 852 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
919 | 853 | ||
920 | /* | ||
921 | * Check to see if there is any immediate preemptible-RCU-related work | ||
922 | * to be done. | ||
923 | */ | ||
924 | static int rcu_preempt_pending(int cpu) | ||
925 | { | ||
926 | return __rcu_pending(&rcu_preempt_state, | ||
927 | &per_cpu(rcu_preempt_data, cpu)); | ||
928 | } | ||
929 | |||
930 | /* | ||
931 | * Does preemptible RCU have callbacks on this CPU? | ||
932 | */ | ||
933 | static int rcu_preempt_cpu_has_callbacks(int cpu) | ||
934 | { | ||
935 | return !!per_cpu(rcu_preempt_data, cpu).nxtlist; | ||
936 | } | ||
937 | |||
938 | /** | 854 | /** |
939 | * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. | 855 | * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. |
940 | */ | 856 | */ |
941 | void rcu_barrier(void) | 857 | void rcu_barrier(void) |
942 | { | 858 | { |
943 | _rcu_barrier(&rcu_preempt_state, call_rcu); | 859 | _rcu_barrier(&rcu_preempt_state); |
944 | } | 860 | } |
945 | EXPORT_SYMBOL_GPL(rcu_barrier); | 861 | EXPORT_SYMBOL_GPL(rcu_barrier); |
946 | 862 | ||
947 | /* | 863 | /* |
948 | * Initialize preemptible RCU's per-CPU data. | ||
949 | */ | ||
950 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | ||
951 | { | ||
952 | rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); | ||
953 | } | ||
954 | |||
955 | /* | ||
956 | * Move preemptible RCU's callbacks from dying CPU to other online CPU | ||
957 | * and record a quiescent state. | ||
958 | */ | ||
959 | static void rcu_preempt_cleanup_dying_cpu(void) | ||
960 | { | ||
961 | rcu_cleanup_dying_cpu(&rcu_preempt_state); | ||
962 | } | ||
963 | |||
964 | /* | ||
965 | * Initialize preemptible RCU's state structures. | 864 | * Initialize preemptible RCU's state structures. |
966 | */ | 865 | */ |
967 | static void __init __rcu_init_preempt(void) | 866 | static void __init __rcu_init_preempt(void) |
@@ -1046,14 +945,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp) | |||
1046 | } | 945 | } |
1047 | 946 | ||
1048 | /* | 947 | /* |
1049 | * Because preemptible RCU does not exist, there is no need to suppress | ||
1050 | * its CPU stall warnings. | ||
1051 | */ | ||
1052 | static void rcu_preempt_stall_reset(void) | ||
1053 | { | ||
1054 | } | ||
1055 | |||
1056 | /* | ||
1057 | * Because there is no preemptible RCU, there can be no readers blocked, | 948 | * Because there is no preemptible RCU, there can be no readers blocked, |
1058 | * so there is no need to check for blocked tasks. So check only for | 949 | * so there is no need to check for blocked tasks. So check only for |
1059 | * bogus qsmask values. | 950 | * bogus qsmask values. |
@@ -1081,14 +972,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
1081 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 972 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
1082 | 973 | ||
1083 | /* | 974 | /* |
1084 | * Because preemptible RCU does not exist, it never needs CPU-offline | ||
1085 | * processing. | ||
1086 | */ | ||
1087 | static void rcu_preempt_cleanup_dead_cpu(int cpu) | ||
1088 | { | ||
1089 | } | ||
1090 | |||
1091 | /* | ||
1092 | * Because preemptible RCU does not exist, it never has any callbacks | 975 | * Because preemptible RCU does not exist, it never has any callbacks |
1093 | * to check. | 976 | * to check. |
1094 | */ | 977 | */ |
@@ -1097,14 +980,6 @@ static void rcu_preempt_check_callbacks(int cpu) | |||
1097 | } | 980 | } |
1098 | 981 | ||
1099 | /* | 982 | /* |
1100 | * Because preemptible RCU does not exist, it never has any callbacks | ||
1101 | * to process. | ||
1102 | */ | ||
1103 | static void rcu_preempt_process_callbacks(void) | ||
1104 | { | ||
1105 | } | ||
1106 | |||
1107 | /* | ||
1108 | * Queue an RCU callback for lazy invocation after a grace period. | 983 | * Queue an RCU callback for lazy invocation after a grace period. |
1109 | * This will likely be later named something like "call_rcu_lazy()", | 984 | * This will likely be later named something like "call_rcu_lazy()", |
1110 | * but this change will require some way of tagging the lazy RCU | 985 | * but this change will require some way of tagging the lazy RCU |
@@ -1145,22 +1020,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | |||
1145 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 1020 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
1146 | 1021 | ||
1147 | /* | 1022 | /* |
1148 | * Because preemptible RCU does not exist, it never has any work to do. | ||
1149 | */ | ||
1150 | static int rcu_preempt_pending(int cpu) | ||
1151 | { | ||
1152 | return 0; | ||
1153 | } | ||
1154 | |||
1155 | /* | ||
1156 | * Because preemptible RCU does not exist, it never has callbacks | ||
1157 | */ | ||
1158 | static int rcu_preempt_cpu_has_callbacks(int cpu) | ||
1159 | { | ||
1160 | return 0; | ||
1161 | } | ||
1162 | |||
1163 | /* | ||
1164 | * Because preemptible RCU does not exist, rcu_barrier() is just | 1023 | * Because preemptible RCU does not exist, rcu_barrier() is just |
1165 | * another name for rcu_barrier_sched(). | 1024 | * another name for rcu_barrier_sched(). |
1166 | */ | 1025 | */ |
@@ -1171,21 +1030,6 @@ void rcu_barrier(void) | |||
1171 | EXPORT_SYMBOL_GPL(rcu_barrier); | 1030 | EXPORT_SYMBOL_GPL(rcu_barrier); |
1172 | 1031 | ||
1173 | /* | 1032 | /* |
1174 | * Because preemptible RCU does not exist, there is no per-CPU | ||
1175 | * data to initialize. | ||
1176 | */ | ||
1177 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | ||
1178 | { | ||
1179 | } | ||
1180 | |||
1181 | /* | ||
1182 | * Because there is no preemptible RCU, there is no cleanup to do. | ||
1183 | */ | ||
1184 | static void rcu_preempt_cleanup_dying_cpu(void) | ||
1185 | { | ||
1186 | } | ||
1187 | |||
1188 | /* | ||
1189 | * Because preemptible RCU does not exist, it need not be initialized. | 1033 | * Because preemptible RCU does not exist, it need not be initialized. |
1190 | */ | 1034 | */ |
1191 | static void __init __rcu_init_preempt(void) | 1035 | static void __init __rcu_init_preempt(void) |
@@ -1968,9 +1812,11 @@ static void rcu_idle_count_callbacks_posted(void) | |||
1968 | */ | 1812 | */ |
1969 | #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ | 1813 | #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ |
1970 | #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ | 1814 | #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ |
1971 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ | 1815 | #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ |
1972 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ | 1816 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ |
1973 | 1817 | ||
1818 | extern int tick_nohz_enabled; | ||
1819 | |||
1974 | /* | 1820 | /* |
1975 | * Does the specified flavor of RCU have non-lazy callbacks pending on | 1821 | * Does the specified flavor of RCU have non-lazy callbacks pending on |
1976 | * the specified CPU? Both RCU flavor and CPU are specified by the | 1822 | * the specified CPU? Both RCU flavor and CPU are specified by the |
@@ -2047,10 +1893,13 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | |||
2047 | return 1; | 1893 | return 1; |
2048 | } | 1894 | } |
2049 | /* Set up for the possibility that RCU will post a timer. */ | 1895 | /* Set up for the possibility that RCU will post a timer. */ |
2050 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) | 1896 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) { |
2051 | *delta_jiffies = RCU_IDLE_GP_DELAY; | 1897 | *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies, |
2052 | else | 1898 | RCU_IDLE_GP_DELAY) - jiffies; |
2053 | *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; | 1899 | } else { |
1900 | *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY; | ||
1901 | *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; | ||
1902 | } | ||
2054 | return 0; | 1903 | return 0; |
2055 | } | 1904 | } |
2056 | 1905 | ||
@@ -2109,6 +1958,7 @@ static void rcu_cleanup_after_idle(int cpu) | |||
2109 | 1958 | ||
2110 | del_timer(&rdtp->idle_gp_timer); | 1959 | del_timer(&rdtp->idle_gp_timer); |
2111 | trace_rcu_prep_idle("Cleanup after idle"); | 1960 | trace_rcu_prep_idle("Cleanup after idle"); |
1961 | rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); | ||
2112 | } | 1962 | } |
2113 | 1963 | ||
2114 | /* | 1964 | /* |
@@ -2134,6 +1984,18 @@ static void rcu_prepare_for_idle(int cpu) | |||
2134 | { | 1984 | { |
2135 | struct timer_list *tp; | 1985 | struct timer_list *tp; |
2136 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | 1986 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
1987 | int tne; | ||
1988 | |||
1989 | /* Handle nohz enablement switches conservatively. */ | ||
1990 | tne = ACCESS_ONCE(tick_nohz_enabled); | ||
1991 | if (tne != rdtp->tick_nohz_enabled_snap) { | ||
1992 | if (rcu_cpu_has_callbacks(cpu)) | ||
1993 | invoke_rcu_core(); /* force nohz to see update. */ | ||
1994 | rdtp->tick_nohz_enabled_snap = tne; | ||
1995 | return; | ||
1996 | } | ||
1997 | if (!tne) | ||
1998 | return; | ||
2137 | 1999 | ||
2138 | /* | 2000 | /* |
2139 | * If this is an idle re-entry, for example, due to use of | 2001 | * If this is an idle re-entry, for example, due to use of |
@@ -2187,10 +2049,11 @@ static void rcu_prepare_for_idle(int cpu) | |||
2187 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) { | 2049 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) { |
2188 | trace_rcu_prep_idle("Dyntick with callbacks"); | 2050 | trace_rcu_prep_idle("Dyntick with callbacks"); |
2189 | rdtp->idle_gp_timer_expires = | 2051 | rdtp->idle_gp_timer_expires = |
2190 | jiffies + RCU_IDLE_GP_DELAY; | 2052 | round_up(jiffies + RCU_IDLE_GP_DELAY, |
2053 | RCU_IDLE_GP_DELAY); | ||
2191 | } else { | 2054 | } else { |
2192 | rdtp->idle_gp_timer_expires = | 2055 | rdtp->idle_gp_timer_expires = |
2193 | jiffies + RCU_IDLE_LAZY_GP_DELAY; | 2056 | round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); |
2194 | trace_rcu_prep_idle("Dyntick with lazy callbacks"); | 2057 | trace_rcu_prep_idle("Dyntick with lazy callbacks"); |
2195 | } | 2058 | } |
2196 | tp = &rdtp->idle_gp_timer; | 2059 | tp = &rdtp->idle_gp_timer; |
@@ -2231,8 +2094,9 @@ static void rcu_prepare_for_idle(int cpu) | |||
2231 | if (rcu_cpu_has_callbacks(cpu)) { | 2094 | if (rcu_cpu_has_callbacks(cpu)) { |
2232 | trace_rcu_prep_idle("More callbacks"); | 2095 | trace_rcu_prep_idle("More callbacks"); |
2233 | invoke_rcu_core(); | 2096 | invoke_rcu_core(); |
2234 | } else | 2097 | } else { |
2235 | trace_rcu_prep_idle("Callbacks drained"); | 2098 | trace_rcu_prep_idle("Callbacks drained"); |
2099 | } | ||
2236 | } | 2100 | } |
2237 | 2101 | ||
2238 | /* | 2102 | /* |
@@ -2269,6 +2133,7 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu) | |||
2269 | 2133 | ||
2270 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) | 2134 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) |
2271 | { | 2135 | { |
2136 | *cp = '\0'; | ||
2272 | } | 2137 | } |
2273 | 2138 | ||
2274 | #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ | 2139 | #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index d4bc16ddd1d4..abffb486e94e 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -46,6 +46,31 @@ | |||
46 | #define RCU_TREE_NONCORE | 46 | #define RCU_TREE_NONCORE |
47 | #include "rcutree.h" | 47 | #include "rcutree.h" |
48 | 48 | ||
49 | static int show_rcubarrier(struct seq_file *m, void *unused) | ||
50 | { | ||
51 | struct rcu_state *rsp; | ||
52 | |||
53 | for_each_rcu_flavor(rsp) | ||
54 | seq_printf(m, "%s: %c bcc: %d nbd: %lu\n", | ||
55 | rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.', | ||
56 | atomic_read(&rsp->barrier_cpu_count), | ||
57 | rsp->n_barrier_done); | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static int rcubarrier_open(struct inode *inode, struct file *file) | ||
62 | { | ||
63 | return single_open(file, show_rcubarrier, NULL); | ||
64 | } | ||
65 | |||
66 | static const struct file_operations rcubarrier_fops = { | ||
67 | .owner = THIS_MODULE, | ||
68 | .open = rcubarrier_open, | ||
69 | .read = seq_read, | ||
70 | .llseek = seq_lseek, | ||
71 | .release = single_release, | ||
72 | }; | ||
73 | |||
49 | #ifdef CONFIG_RCU_BOOST | 74 | #ifdef CONFIG_RCU_BOOST |
50 | 75 | ||
51 | static char convert_kthread_status(unsigned int kthread_status) | 76 | static char convert_kthread_status(unsigned int kthread_status) |
@@ -95,24 +120,16 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
95 | rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); | 120 | rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); |
96 | } | 121 | } |
97 | 122 | ||
98 | #define PRINT_RCU_DATA(name, func, m) \ | ||
99 | do { \ | ||
100 | int _p_r_d_i; \ | ||
101 | \ | ||
102 | for_each_possible_cpu(_p_r_d_i) \ | ||
103 | func(m, &per_cpu(name, _p_r_d_i)); \ | ||
104 | } while (0) | ||
105 | |||
106 | static int show_rcudata(struct seq_file *m, void *unused) | 123 | static int show_rcudata(struct seq_file *m, void *unused) |
107 | { | 124 | { |
108 | #ifdef CONFIG_TREE_PREEMPT_RCU | 125 | int cpu; |
109 | seq_puts(m, "rcu_preempt:\n"); | 126 | struct rcu_state *rsp; |
110 | PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m); | 127 | |
111 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 128 | for_each_rcu_flavor(rsp) { |
112 | seq_puts(m, "rcu_sched:\n"); | 129 | seq_printf(m, "%s:\n", rsp->name); |
113 | PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m); | 130 | for_each_possible_cpu(cpu) |
114 | seq_puts(m, "rcu_bh:\n"); | 131 | print_one_rcu_data(m, per_cpu_ptr(rsp->rda, cpu)); |
115 | PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m); | 132 | } |
116 | return 0; | 133 | return 0; |
117 | } | 134 | } |
118 | 135 | ||
@@ -166,6 +183,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
166 | 183 | ||
167 | static int show_rcudata_csv(struct seq_file *m, void *unused) | 184 | static int show_rcudata_csv(struct seq_file *m, void *unused) |
168 | { | 185 | { |
186 | int cpu; | ||
187 | struct rcu_state *rsp; | ||
188 | |||
169 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); | 189 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); |
170 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); | 190 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); |
171 | seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\""); | 191 | seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\""); |
@@ -173,14 +193,11 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) | |||
173 | seq_puts(m, "\"kt\",\"ktl\""); | 193 | seq_puts(m, "\"kt\",\"ktl\""); |
174 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 194 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
175 | seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n"); | 195 | seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n"); |
176 | #ifdef CONFIG_TREE_PREEMPT_RCU | 196 | for_each_rcu_flavor(rsp) { |
177 | seq_puts(m, "\"rcu_preempt:\"\n"); | 197 | seq_printf(m, "\"%s:\"\n", rsp->name); |
178 | PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); | 198 | for_each_possible_cpu(cpu) |
179 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 199 | print_one_rcu_data_csv(m, per_cpu_ptr(rsp->rda, cpu)); |
180 | seq_puts(m, "\"rcu_sched:\"\n"); | 200 | } |
181 | PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m); | ||
182 | seq_puts(m, "\"rcu_bh:\"\n"); | ||
183 | PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m); | ||
184 | return 0; | 201 | return 0; |
185 | } | 202 | } |
186 | 203 | ||
@@ -201,8 +218,7 @@ static const struct file_operations rcudata_csv_fops = { | |||
201 | 218 | ||
202 | static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp) | 219 | static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp) |
203 | { | 220 | { |
204 | seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu " | 221 | seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ", |
205 | "j=%04x bt=%04x\n", | ||
206 | rnp->grplo, rnp->grphi, | 222 | rnp->grplo, rnp->grphi, |
207 | "T."[list_empty(&rnp->blkd_tasks)], | 223 | "T."[list_empty(&rnp->blkd_tasks)], |
208 | "N."[!rnp->gp_tasks], | 224 | "N."[!rnp->gp_tasks], |
@@ -210,11 +226,11 @@ static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp) | |||
210 | "B."[!rnp->boost_tasks], | 226 | "B."[!rnp->boost_tasks], |
211 | convert_kthread_status(rnp->boost_kthread_status), | 227 | convert_kthread_status(rnp->boost_kthread_status), |
212 | rnp->n_tasks_boosted, rnp->n_exp_boosts, | 228 | rnp->n_tasks_boosted, rnp->n_exp_boosts, |
213 | rnp->n_normal_boosts, | 229 | rnp->n_normal_boosts); |
230 | seq_printf(m, "j=%04x bt=%04x\n", | ||
214 | (int)(jiffies & 0xffff), | 231 | (int)(jiffies & 0xffff), |
215 | (int)(rnp->boost_time & 0xffff)); | 232 | (int)(rnp->boost_time & 0xffff)); |
216 | seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n", | 233 | seq_printf(m, " balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n", |
217 | " balk", | ||
218 | rnp->n_balk_blkd_tasks, | 234 | rnp->n_balk_blkd_tasks, |
219 | rnp->n_balk_exp_gp_tasks, | 235 | rnp->n_balk_exp_gp_tasks, |
220 | rnp->n_balk_boost_tasks, | 236 | rnp->n_balk_boost_tasks, |
@@ -270,15 +286,15 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
270 | struct rcu_node *rnp; | 286 | struct rcu_node *rnp; |
271 | 287 | ||
272 | gpnum = rsp->gpnum; | 288 | gpnum = rsp->gpnum; |
273 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " | 289 | seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x ", |
274 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", | 290 | rsp->name, rsp->completed, gpnum, rsp->fqs_state, |
275 | rsp->completed, gpnum, rsp->fqs_state, | ||
276 | (long)(rsp->jiffies_force_qs - jiffies), | 291 | (long)(rsp->jiffies_force_qs - jiffies), |
277 | (int)(jiffies & 0xffff), | 292 | (int)(jiffies & 0xffff)); |
293 | seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", | ||
278 | rsp->n_force_qs, rsp->n_force_qs_ngp, | 294 | rsp->n_force_qs, rsp->n_force_qs_ngp, |
279 | rsp->n_force_qs - rsp->n_force_qs_ngp, | 295 | rsp->n_force_qs - rsp->n_force_qs_ngp, |
280 | rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); | 296 | rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); |
281 | for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { | 297 | for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { |
282 | if (rnp->level != level) { | 298 | if (rnp->level != level) { |
283 | seq_puts(m, "\n"); | 299 | seq_puts(m, "\n"); |
284 | level = rnp->level; | 300 | level = rnp->level; |
@@ -295,14 +311,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
295 | 311 | ||
296 | static int show_rcuhier(struct seq_file *m, void *unused) | 312 | static int show_rcuhier(struct seq_file *m, void *unused) |
297 | { | 313 | { |
298 | #ifdef CONFIG_TREE_PREEMPT_RCU | 314 | struct rcu_state *rsp; |
299 | seq_puts(m, "rcu_preempt:\n"); | 315 | |
300 | print_one_rcu_state(m, &rcu_preempt_state); | 316 | for_each_rcu_flavor(rsp) |
301 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 317 | print_one_rcu_state(m, rsp); |
302 | seq_puts(m, "rcu_sched:\n"); | ||
303 | print_one_rcu_state(m, &rcu_sched_state); | ||
304 | seq_puts(m, "rcu_bh:\n"); | ||
305 | print_one_rcu_state(m, &rcu_bh_state); | ||
306 | return 0; | 318 | return 0; |
307 | } | 319 | } |
308 | 320 | ||
@@ -343,11 +355,10 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp) | |||
343 | 355 | ||
344 | static int show_rcugp(struct seq_file *m, void *unused) | 356 | static int show_rcugp(struct seq_file *m, void *unused) |
345 | { | 357 | { |
346 | #ifdef CONFIG_TREE_PREEMPT_RCU | 358 | struct rcu_state *rsp; |
347 | show_one_rcugp(m, &rcu_preempt_state); | 359 | |
348 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 360 | for_each_rcu_flavor(rsp) |
349 | show_one_rcugp(m, &rcu_sched_state); | 361 | show_one_rcugp(m, rsp); |
350 | show_one_rcugp(m, &rcu_bh_state); | ||
351 | return 0; | 362 | return 0; |
352 | } | 363 | } |
353 | 364 | ||
@@ -366,44 +377,36 @@ static const struct file_operations rcugp_fops = { | |||
366 | 377 | ||
367 | static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) | 378 | static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) |
368 | { | 379 | { |
369 | seq_printf(m, "%3d%cnp=%ld " | 380 | seq_printf(m, "%3d%cnp=%ld ", |
370 | "qsp=%ld rpq=%ld cbr=%ld cng=%ld " | ||
371 | "gpc=%ld gps=%ld nf=%ld nn=%ld\n", | ||
372 | rdp->cpu, | 381 | rdp->cpu, |
373 | cpu_is_offline(rdp->cpu) ? '!' : ' ', | 382 | cpu_is_offline(rdp->cpu) ? '!' : ' ', |
374 | rdp->n_rcu_pending, | 383 | rdp->n_rcu_pending); |
384 | seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ", | ||
375 | rdp->n_rp_qs_pending, | 385 | rdp->n_rp_qs_pending, |
376 | rdp->n_rp_report_qs, | 386 | rdp->n_rp_report_qs, |
377 | rdp->n_rp_cb_ready, | 387 | rdp->n_rp_cb_ready, |
378 | rdp->n_rp_cpu_needs_gp, | 388 | rdp->n_rp_cpu_needs_gp); |
389 | seq_printf(m, "gpc=%ld gps=%ld nf=%ld nn=%ld\n", | ||
379 | rdp->n_rp_gp_completed, | 390 | rdp->n_rp_gp_completed, |
380 | rdp->n_rp_gp_started, | 391 | rdp->n_rp_gp_started, |
381 | rdp->n_rp_need_fqs, | 392 | rdp->n_rp_need_fqs, |
382 | rdp->n_rp_need_nothing); | 393 | rdp->n_rp_need_nothing); |
383 | } | 394 | } |
384 | 395 | ||
385 | static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) | 396 | static int show_rcu_pending(struct seq_file *m, void *unused) |
386 | { | 397 | { |
387 | int cpu; | 398 | int cpu; |
388 | struct rcu_data *rdp; | 399 | struct rcu_data *rdp; |
389 | 400 | struct rcu_state *rsp; | |
390 | for_each_possible_cpu(cpu) { | 401 | |
391 | rdp = per_cpu_ptr(rsp->rda, cpu); | 402 | for_each_rcu_flavor(rsp) { |
392 | if (rdp->beenonline) | 403 | seq_printf(m, "%s:\n", rsp->name); |
393 | print_one_rcu_pending(m, rdp); | 404 | for_each_possible_cpu(cpu) { |
405 | rdp = per_cpu_ptr(rsp->rda, cpu); | ||
406 | if (rdp->beenonline) | ||
407 | print_one_rcu_pending(m, rdp); | ||
408 | } | ||
394 | } | 409 | } |
395 | } | ||
396 | |||
397 | static int show_rcu_pending(struct seq_file *m, void *unused) | ||
398 | { | ||
399 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
400 | seq_puts(m, "rcu_preempt:\n"); | ||
401 | print_rcu_pendings(m, &rcu_preempt_state); | ||
402 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
403 | seq_puts(m, "rcu_sched:\n"); | ||
404 | print_rcu_pendings(m, &rcu_sched_state); | ||
405 | seq_puts(m, "rcu_bh:\n"); | ||
406 | print_rcu_pendings(m, &rcu_bh_state); | ||
407 | return 0; | 410 | return 0; |
408 | } | 411 | } |
409 | 412 | ||
@@ -453,6 +456,11 @@ static int __init rcutree_trace_init(void) | |||
453 | if (!rcudir) | 456 | if (!rcudir) |
454 | goto free_out; | 457 | goto free_out; |
455 | 458 | ||
459 | retval = debugfs_create_file("rcubarrier", 0444, rcudir, | ||
460 | NULL, &rcubarrier_fops); | ||
461 | if (!retval) | ||
462 | goto free_out; | ||
463 | |||
456 | retval = debugfs_create_file("rcudata", 0444, rcudir, | 464 | retval = debugfs_create_file("rcudata", 0444, rcudir, |
457 | NULL, &rcudata_fops); | 465 | NULL, &rcudata_fops); |
458 | if (!retval) | 466 | if (!retval) |
diff --git a/kernel/smp.c b/kernel/smp.c index d0ae5b24875e..29dd40a9f2f4 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -581,26 +581,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait) | |||
581 | return 0; | 581 | return 0; |
582 | } | 582 | } |
583 | EXPORT_SYMBOL(smp_call_function); | 583 | EXPORT_SYMBOL(smp_call_function); |
584 | |||
585 | void ipi_call_lock(void) | ||
586 | { | ||
587 | raw_spin_lock(&call_function.lock); | ||
588 | } | ||
589 | |||
590 | void ipi_call_unlock(void) | ||
591 | { | ||
592 | raw_spin_unlock(&call_function.lock); | ||
593 | } | ||
594 | |||
595 | void ipi_call_lock_irq(void) | ||
596 | { | ||
597 | raw_spin_lock_irq(&call_function.lock); | ||
598 | } | ||
599 | |||
600 | void ipi_call_unlock_irq(void) | ||
601 | { | ||
602 | raw_spin_unlock_irq(&call_function.lock); | ||
603 | } | ||
604 | #endif /* USE_GENERIC_SMP_HELPERS */ | 584 | #endif /* USE_GENERIC_SMP_HELPERS */ |
605 | 585 | ||
606 | /* Setup configured maximum number of CPUs to activate */ | 586 | /* Setup configured maximum number of CPUs to activate */ |
diff --git a/kernel/smpboot.h b/kernel/smpboot.h index 80c0acfb8472..6ef9433e1c70 100644 --- a/kernel/smpboot.h +++ b/kernel/smpboot.h | |||
@@ -3,8 +3,6 @@ | |||
3 | 3 | ||
4 | struct task_struct; | 4 | struct task_struct; |
5 | 5 | ||
6 | int smpboot_prepare(unsigned int cpu); | ||
7 | |||
8 | #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD | 6 | #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD |
9 | struct task_struct *idle_thread_get(unsigned int cpu); | 7 | struct task_struct *idle_thread_get(unsigned int cpu); |
10 | void idle_thread_set_boot_cpu(void); | 8 | void idle_thread_set_boot_cpu(void); |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 41be02250e08..024540f97f74 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -105,7 +105,7 @@ static ktime_t tick_init_jiffy_update(void) | |||
105 | /* | 105 | /* |
106 | * NO HZ enabled ? | 106 | * NO HZ enabled ? |
107 | */ | 107 | */ |
108 | static int tick_nohz_enabled __read_mostly = 1; | 108 | int tick_nohz_enabled __read_mostly = 1; |
109 | 109 | ||
110 | /* | 110 | /* |
111 | * Enable / Disable tickless mode | 111 | * Enable / Disable tickless mode |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8f2aba1246f2..cf364db5589f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -745,6 +745,7 @@ static void timekeeping_resume(void) | |||
745 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); | 745 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); |
746 | timekeeper.ntp_error = 0; | 746 | timekeeper.ntp_error = 0; |
747 | timekeeping_suspended = 0; | 747 | timekeeping_suspended = 0; |
748 | timekeeping_update(&timekeeper, false); | ||
748 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | 749 | write_sequnlock_irqrestore(&timekeeper.lock, flags); |
749 | 750 | ||
750 | touch_softlockup_watchdog(); | 751 | touch_softlockup_watchdog(); |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a008663d86c8..b4f20fba09fc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -312,7 +312,7 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list, | |||
312 | 312 | ||
313 | static int __register_ftrace_function(struct ftrace_ops *ops) | 313 | static int __register_ftrace_function(struct ftrace_ops *ops) |
314 | { | 314 | { |
315 | if (ftrace_disabled) | 315 | if (unlikely(ftrace_disabled)) |
316 | return -ENODEV; | 316 | return -ENODEV; |
317 | 317 | ||
318 | if (FTRACE_WARN_ON(ops == &global_ops)) | 318 | if (FTRACE_WARN_ON(ops == &global_ops)) |
@@ -4299,16 +4299,12 @@ int register_ftrace_function(struct ftrace_ops *ops) | |||
4299 | 4299 | ||
4300 | mutex_lock(&ftrace_lock); | 4300 | mutex_lock(&ftrace_lock); |
4301 | 4301 | ||
4302 | if (unlikely(ftrace_disabled)) | ||
4303 | goto out_unlock; | ||
4304 | |||
4305 | ret = __register_ftrace_function(ops); | 4302 | ret = __register_ftrace_function(ops); |
4306 | if (!ret) | 4303 | if (!ret) |
4307 | ret = ftrace_startup(ops, 0); | 4304 | ret = ftrace_startup(ops, 0); |
4308 | 4305 | ||
4309 | |||
4310 | out_unlock: | ||
4311 | mutex_unlock(&ftrace_lock); | 4306 | mutex_unlock(&ftrace_lock); |
4307 | |||
4312 | return ret; | 4308 | return ret; |
4313 | } | 4309 | } |
4314 | EXPORT_SYMBOL_GPL(register_ftrace_function); | 4310 | EXPORT_SYMBOL_GPL(register_ftrace_function); |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f765465bffe4..49491fa7daa2 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -3239,6 +3239,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
3239 | if (cpu_buffer->commit_page == cpu_buffer->reader_page) | 3239 | if (cpu_buffer->commit_page == cpu_buffer->reader_page) |
3240 | goto out; | 3240 | goto out; |
3241 | 3241 | ||
3242 | /* Don't bother swapping if the ring buffer is empty */ | ||
3243 | if (rb_num_of_entries(cpu_buffer) == 0) | ||
3244 | goto out; | ||
3245 | |||
3242 | /* | 3246 | /* |
3243 | * Reset the reader page to size zero. | 3247 | * Reset the reader page to size zero. |
3244 | */ | 3248 | */ |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a7fa0702be1c..a120f98c4112 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -830,6 +830,8 @@ int register_tracer(struct tracer *type) | |||
830 | current_trace = saved_tracer; | 830 | current_trace = saved_tracer; |
831 | if (ret) { | 831 | if (ret) { |
832 | printk(KERN_CONT "FAILED!\n"); | 832 | printk(KERN_CONT "FAILED!\n"); |
833 | /* Add the warning after printing 'FAILED' */ | ||
834 | WARN_ON(1); | ||
833 | goto out; | 835 | goto out; |
834 | } | 836 | } |
835 | /* Only reset on passing, to avoid touching corrupted buffers */ | 837 | /* Only reset on passing, to avoid touching corrupted buffers */ |
@@ -1708,9 +1710,11 @@ EXPORT_SYMBOL_GPL(trace_vprintk); | |||
1708 | 1710 | ||
1709 | static void trace_iterator_increment(struct trace_iterator *iter) | 1711 | static void trace_iterator_increment(struct trace_iterator *iter) |
1710 | { | 1712 | { |
1713 | struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); | ||
1714 | |||
1711 | iter->idx++; | 1715 | iter->idx++; |
1712 | if (iter->buffer_iter[iter->cpu]) | 1716 | if (buf_iter) |
1713 | ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); | 1717 | ring_buffer_read(buf_iter, NULL); |
1714 | } | 1718 | } |
1715 | 1719 | ||
1716 | static struct trace_entry * | 1720 | static struct trace_entry * |
@@ -1718,7 +1722,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, | |||
1718 | unsigned long *lost_events) | 1722 | unsigned long *lost_events) |
1719 | { | 1723 | { |
1720 | struct ring_buffer_event *event; | 1724 | struct ring_buffer_event *event; |
1721 | struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; | 1725 | struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); |
1722 | 1726 | ||
1723 | if (buf_iter) | 1727 | if (buf_iter) |
1724 | event = ring_buffer_iter_peek(buf_iter, ts); | 1728 | event = ring_buffer_iter_peek(buf_iter, ts); |
@@ -1856,10 +1860,10 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) | |||
1856 | 1860 | ||
1857 | tr->data[cpu]->skipped_entries = 0; | 1861 | tr->data[cpu]->skipped_entries = 0; |
1858 | 1862 | ||
1859 | if (!iter->buffer_iter[cpu]) | 1863 | buf_iter = trace_buffer_iter(iter, cpu); |
1864 | if (!buf_iter) | ||
1860 | return; | 1865 | return; |
1861 | 1866 | ||
1862 | buf_iter = iter->buffer_iter[cpu]; | ||
1863 | ring_buffer_iter_reset(buf_iter); | 1867 | ring_buffer_iter_reset(buf_iter); |
1864 | 1868 | ||
1865 | /* | 1869 | /* |
@@ -2205,13 +2209,15 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) | |||
2205 | 2209 | ||
2206 | int trace_empty(struct trace_iterator *iter) | 2210 | int trace_empty(struct trace_iterator *iter) |
2207 | { | 2211 | { |
2212 | struct ring_buffer_iter *buf_iter; | ||
2208 | int cpu; | 2213 | int cpu; |
2209 | 2214 | ||
2210 | /* If we are looking at one CPU buffer, only check that one */ | 2215 | /* If we are looking at one CPU buffer, only check that one */ |
2211 | if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { | 2216 | if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { |
2212 | cpu = iter->cpu_file; | 2217 | cpu = iter->cpu_file; |
2213 | if (iter->buffer_iter[cpu]) { | 2218 | buf_iter = trace_buffer_iter(iter, cpu); |
2214 | if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) | 2219 | if (buf_iter) { |
2220 | if (!ring_buffer_iter_empty(buf_iter)) | ||
2215 | return 0; | 2221 | return 0; |
2216 | } else { | 2222 | } else { |
2217 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) | 2223 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) |
@@ -2221,8 +2227,9 @@ int trace_empty(struct trace_iterator *iter) | |||
2221 | } | 2227 | } |
2222 | 2228 | ||
2223 | for_each_tracing_cpu(cpu) { | 2229 | for_each_tracing_cpu(cpu) { |
2224 | if (iter->buffer_iter[cpu]) { | 2230 | buf_iter = trace_buffer_iter(iter, cpu); |
2225 | if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) | 2231 | if (buf_iter) { |
2232 | if (!ring_buffer_iter_empty(buf_iter)) | ||
2226 | return 0; | 2233 | return 0; |
2227 | } else { | 2234 | } else { |
2228 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) | 2235 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) |
@@ -2381,6 +2388,11 @@ __tracing_open(struct inode *inode, struct file *file) | |||
2381 | if (!iter) | 2388 | if (!iter) |
2382 | return ERR_PTR(-ENOMEM); | 2389 | return ERR_PTR(-ENOMEM); |
2383 | 2390 | ||
2391 | iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(), | ||
2392 | GFP_KERNEL); | ||
2393 | if (!iter->buffer_iter) | ||
2394 | goto release; | ||
2395 | |||
2384 | /* | 2396 | /* |
2385 | * We make a copy of the current tracer to avoid concurrent | 2397 | * We make a copy of the current tracer to avoid concurrent |
2386 | * changes on it while we are reading. | 2398 | * changes on it while we are reading. |
@@ -2441,6 +2453,8 @@ __tracing_open(struct inode *inode, struct file *file) | |||
2441 | fail: | 2453 | fail: |
2442 | mutex_unlock(&trace_types_lock); | 2454 | mutex_unlock(&trace_types_lock); |
2443 | kfree(iter->trace); | 2455 | kfree(iter->trace); |
2456 | kfree(iter->buffer_iter); | ||
2457 | release: | ||
2444 | seq_release_private(inode, file); | 2458 | seq_release_private(inode, file); |
2445 | return ERR_PTR(-ENOMEM); | 2459 | return ERR_PTR(-ENOMEM); |
2446 | } | 2460 | } |
@@ -2481,6 +2495,7 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
2481 | mutex_destroy(&iter->mutex); | 2495 | mutex_destroy(&iter->mutex); |
2482 | free_cpumask_var(iter->started); | 2496 | free_cpumask_var(iter->started); |
2483 | kfree(iter->trace); | 2497 | kfree(iter->trace); |
2498 | kfree(iter->buffer_iter); | ||
2484 | seq_release_private(inode, file); | 2499 | seq_release_private(inode, file); |
2485 | return 0; | 2500 | return 0; |
2486 | } | 2501 | } |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5aec220d2de0..55e1f7f0db12 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -317,6 +317,14 @@ struct tracer { | |||
317 | 317 | ||
318 | #define TRACE_PIPE_ALL_CPU -1 | 318 | #define TRACE_PIPE_ALL_CPU -1 |
319 | 319 | ||
320 | static inline struct ring_buffer_iter * | ||
321 | trace_buffer_iter(struct trace_iterator *iter, int cpu) | ||
322 | { | ||
323 | if (iter->buffer_iter && iter->buffer_iter[cpu]) | ||
324 | return iter->buffer_iter[cpu]; | ||
325 | return NULL; | ||
326 | } | ||
327 | |||
320 | int tracer_init(struct tracer *t, struct trace_array *tr); | 328 | int tracer_init(struct tracer *t, struct trace_array *tr); |
321 | int tracing_is_enabled(void); | 329 | int tracing_is_enabled(void); |
322 | void trace_wake_up(void); | 330 | void trace_wake_up(void); |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a7d2a4c653d8..ce27c8ba8d31 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -538,7 +538,7 @@ get_return_for_leaf(struct trace_iterator *iter, | |||
538 | next = &data->ret; | 538 | next = &data->ret; |
539 | } else { | 539 | } else { |
540 | 540 | ||
541 | ring_iter = iter->buffer_iter[iter->cpu]; | 541 | ring_iter = trace_buffer_iter(iter, iter->cpu); |
542 | 542 | ||
543 | /* First peek to compare current entry and the next one */ | 543 | /* First peek to compare current entry and the next one */ |
544 | if (ring_iter) | 544 | if (ring_iter) |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index df611a0e76c5..123b189c732c 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -1325,4 +1325,4 @@ __init static int init_events(void) | |||
1325 | 1325 | ||
1326 | return 0; | 1326 | return 0; |
1327 | } | 1327 | } |
1328 | device_initcall(init_events); | 1328 | early_initcall(init_events); |